aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--MAINTAINERS2
-rw-r--r--arch/tile/Kconfig12
-rw-r--r--arch/tile/include/asm/cacheflush.h52
-rw-r--r--arch/tile/include/asm/io.h15
-rw-r--r--arch/tile/include/asm/pci-bridge.h117
-rw-r--r--arch/tile/include/asm/pci.h107
-rw-r--r--arch/tile/include/asm/processor.h10
-rw-r--r--arch/tile/include/hv/drv_xgbe_impl.h300
-rw-r--r--arch/tile/include/hv/drv_xgbe_intf.h615
-rw-r--r--arch/tile/include/hv/netio_errors.h122
-rw-r--r--arch/tile/include/hv/netio_intf.h2975
-rw-r--r--arch/tile/kernel/Makefile1
-rw-r--r--arch/tile/kernel/pci.c621
-rw-r--r--drivers/net/Kconfig12
-rw-r--r--drivers/net/Makefile1
-rw-r--r--drivers/net/tile/Makefile10
-rw-r--r--drivers/net/tile/tilepro.c2406
-rw-r--r--drivers/pci/Makefile1
-rw-r--r--drivers/pci/quirks.c18
19 files changed, 7210 insertions, 187 deletions
diff --git a/MAINTAINERS b/MAINTAINERS
index 98fc0450d7e1..b3be8b3d0437 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -5839,6 +5839,8 @@ M: Chris Metcalf <cmetcalf@tilera.com>
5839W: http://www.tilera.com/scm/ 5839W: http://www.tilera.com/scm/
5840S: Supported 5840S: Supported
5841F: arch/tile/ 5841F: arch/tile/
5842F: drivers/char/hvc_tile.c
5843F: drivers/net/tile/
5842 5844
5843TLAN NETWORK DRIVER 5845TLAN NETWORK DRIVER
5844M: Samuel Chessman <chessman@tux.org> 5846M: Samuel Chessman <chessman@tux.org>
diff --git a/arch/tile/Kconfig b/arch/tile/Kconfig
index 07ec8a865c1d..e11b5fcb70eb 100644
--- a/arch/tile/Kconfig
+++ b/arch/tile/Kconfig
@@ -329,6 +329,18 @@ endmenu # Tilera-specific configuration
329 329
330menu "Bus options" 330menu "Bus options"
331 331
332config PCI
333 bool "PCI support"
334 default y
335 select PCI_DOMAINS
336 ---help---
337 Enable PCI root complex support, so PCIe endpoint devices can
338 be attached to the Tile chip. Many, but not all, PCI devices
339 are supported under Tilera's root complex driver.
340
341config PCI_DOMAINS
342 bool
343
332config NO_IOMEM 344config NO_IOMEM
333 def_bool !PCI 345 def_bool !PCI
334 346
diff --git a/arch/tile/include/asm/cacheflush.h b/arch/tile/include/asm/cacheflush.h
index c5741da4eeac..14a3f8556ace 100644
--- a/arch/tile/include/asm/cacheflush.h
+++ b/arch/tile/include/asm/cacheflush.h
@@ -137,4 +137,56 @@ static inline void finv_buffer(void *buffer, size_t size)
137 mb_incoherent(); 137 mb_incoherent();
138} 138}
139 139
140/*
141 * Flush & invalidate a VA range that is homed remotely on a single core,
142 * waiting until the memory controller holds the flushed values.
143 */
144static inline void finv_buffer_remote(void *buffer, size_t size)
145{
146 char *p;
147 int i;
148
149 /*
150 * Flush and invalidate the buffer out of the local L1/L2
151 * and request the home cache to flush and invalidate as well.
152 */
153 __finv_buffer(buffer, size);
154
155 /*
156 * Wait for the home cache to acknowledge that it has processed
157 * all the flush-and-invalidate requests. This does not mean
158 * that the flushed data has reached the memory controller yet,
159 * but it does mean the home cache is processing the flushes.
160 */
161 __insn_mf();
162
163 /*
164 * Issue a load to the last cache line, which can't complete
165 * until all the previously-issued flushes to the same memory
166 * controller have also completed. If we weren't striping
167 * memory, that one load would be sufficient, but since we may
168 * be, we also need to back up to the last load issued to
169 * another memory controller, which would be the point where
170 * we crossed an 8KB boundary (the granularity of striping
171 * across memory controllers). Keep backing up and doing this
172 * until we are before the beginning of the buffer, or have
173 * hit all the controllers.
174 */
175 for (i = 0, p = (char *)buffer + size - 1;
176 i < (1 << CHIP_LOG_NUM_MSHIMS()) && p >= (char *)buffer;
177 ++i) {
178 const unsigned long STRIPE_WIDTH = 8192;
179
180 /* Force a load instruction to issue. */
181 *(volatile char *)p;
182
183 /* Jump to end of previous stripe. */
184 p -= STRIPE_WIDTH;
185 p = (char *)((unsigned long)p | (STRIPE_WIDTH - 1));
186 }
187
188 /* Wait for the loads (and thus flushes) to have completed. */
189 __insn_mf();
190}
191
140#endif /* _ASM_TILE_CACHEFLUSH_H */ 192#endif /* _ASM_TILE_CACHEFLUSH_H */
diff --git a/arch/tile/include/asm/io.h b/arch/tile/include/asm/io.h
index ee43328713ab..d3cbb9b14cbe 100644
--- a/arch/tile/include/asm/io.h
+++ b/arch/tile/include/asm/io.h
@@ -55,9 +55,6 @@ extern void iounmap(volatile void __iomem *addr);
55#define ioremap_writethrough(physaddr, size) ioremap(physaddr, size) 55#define ioremap_writethrough(physaddr, size) ioremap(physaddr, size)
56#define ioremap_fullcache(physaddr, size) ioremap(physaddr, size) 56#define ioremap_fullcache(physaddr, size) ioremap(physaddr, size)
57 57
58void __iomem *ioport_map(unsigned long port, unsigned int len);
59extern inline void ioport_unmap(void __iomem *addr) {}
60
61#define mmiowb() 58#define mmiowb()
62 59
63/* Conversion between virtual and physical mappings. */ 60/* Conversion between virtual and physical mappings. */
@@ -189,12 +186,22 @@ static inline void memcpy_toio(volatile void __iomem *dst, const void *src,
189 * we never run, uses them unconditionally. 186 * we never run, uses them unconditionally.
190 */ 187 */
191 188
192static inline int ioport_panic(void) 189static inline long ioport_panic(void)
193{ 190{
194 panic("inb/outb and friends do not exist on tile"); 191 panic("inb/outb and friends do not exist on tile");
195 return 0; 192 return 0;
196} 193}
197 194
195static inline void __iomem *ioport_map(unsigned long port, unsigned int len)
196{
197 return (void __iomem *) ioport_panic();
198}
199
200static inline void ioport_unmap(void __iomem *addr)
201{
202 ioport_panic();
203}
204
198static inline u8 inb(unsigned long addr) 205static inline u8 inb(unsigned long addr)
199{ 206{
200 return ioport_panic(); 207 return ioport_panic();
diff --git a/arch/tile/include/asm/pci-bridge.h b/arch/tile/include/asm/pci-bridge.h
deleted file mode 100644
index e853b0e2793b..000000000000
--- a/arch/tile/include/asm/pci-bridge.h
+++ /dev/null
@@ -1,117 +0,0 @@
1/*
2 * Copyright 2010 Tilera Corporation. All Rights Reserved.
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License
6 * as published by the Free Software Foundation, version 2.
7 *
8 * This program is distributed in the hope that it will be useful, but
9 * WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
11 * NON INFRINGEMENT. See the GNU General Public License for
12 * more details.
13 */
14
15#ifndef _ASM_TILE_PCI_BRIDGE_H
16#define _ASM_TILE_PCI_BRIDGE_H
17
18#include <linux/ioport.h>
19#include <linux/pci.h>
20
21struct device_node;
22struct pci_controller;
23
24/*
25 * pci_io_base returns the memory address at which you can access
26 * the I/O space for PCI bus number `bus' (or NULL on error).
27 */
28extern void __iomem *pci_bus_io_base(unsigned int bus);
29extern unsigned long pci_bus_io_base_phys(unsigned int bus);
30extern unsigned long pci_bus_mem_base_phys(unsigned int bus);
31
32/* Allocate a new PCI host bridge structure */
33extern struct pci_controller *pcibios_alloc_controller(void);
34
35/* Helper function for setting up resources */
36extern void pci_init_resource(struct resource *res, unsigned long start,
37 unsigned long end, int flags, char *name);
38
39/* Get the PCI host controller for a bus */
40extern struct pci_controller *pci_bus_to_hose(int bus);
41
42/*
43 * Structure of a PCI controller (host bridge)
44 */
45struct pci_controller {
46 int index; /* PCI domain number */
47 struct pci_bus *root_bus;
48
49 int first_busno;
50 int last_busno;
51
52 int hv_cfg_fd[2]; /* config{0,1} fds for this PCIe controller */
53 int hv_mem_fd; /* fd to Hypervisor for MMIO operations */
54
55 struct pci_ops *ops;
56
57 int irq_base; /* Base IRQ from the Hypervisor */
58 int plx_gen1; /* flag for PLX Gen 1 configuration */
59
60 /* Address ranges that are routed to this controller/bridge. */
61 struct resource mem_resources[3];
62};
63
64static inline struct pci_controller *pci_bus_to_host(struct pci_bus *bus)
65{
66 return bus->sysdata;
67}
68
69extern void setup_indirect_pci_nomap(struct pci_controller *hose,
70 void __iomem *cfg_addr, void __iomem *cfg_data);
71extern void setup_indirect_pci(struct pci_controller *hose,
72 u32 cfg_addr, u32 cfg_data);
73extern void setup_grackle(struct pci_controller *hose);
74
75extern unsigned char common_swizzle(struct pci_dev *, unsigned char *);
76
77/*
78 * The following code swizzles for exactly one bridge. The routine
79 * common_swizzle below handles multiple bridges. But there are a
80 * some boards that don't follow the PCI spec's suggestion so we
81 * break this piece out separately.
82 */
83static inline unsigned char bridge_swizzle(unsigned char pin,
84 unsigned char idsel)
85{
86 return (((pin-1) + idsel) % 4) + 1;
87}
88
89/*
90 * The following macro is used to lookup irqs in a standard table
91 * format for those PPC systems that do not already have PCI
92 * interrupts properly routed.
93 */
94/* FIXME - double check this */
95#define PCI_IRQ_TABLE_LOOKUP ({ \
96 long _ctl_ = -1; \
97 if (idsel >= min_idsel && idsel <= max_idsel && pin <= irqs_per_slot) \
98 _ctl_ = pci_irq_table[idsel - min_idsel][pin-1]; \
99 _ctl_; \
100})
101
102/*
103 * Scan the buses below a given PCI host bridge and assign suitable
104 * resources to all devices found.
105 */
106extern int pciauto_bus_scan(struct pci_controller *, int);
107
108#ifdef CONFIG_PCI
109extern unsigned long pci_address_to_pio(phys_addr_t address);
110#else
111static inline unsigned long pci_address_to_pio(phys_addr_t address)
112{
113 return (unsigned long)-1;
114}
115#endif
116
117#endif /* _ASM_TILE_PCI_BRIDGE_H */
diff --git a/arch/tile/include/asm/pci.h b/arch/tile/include/asm/pci.h
index b0c15da2d5d5..c3fc458a0d32 100644
--- a/arch/tile/include/asm/pci.h
+++ b/arch/tile/include/asm/pci.h
@@ -15,7 +15,29 @@
15#ifndef _ASM_TILE_PCI_H 15#ifndef _ASM_TILE_PCI_H
16#define _ASM_TILE_PCI_H 16#define _ASM_TILE_PCI_H
17 17
18#include <asm/pci-bridge.h> 18#include <linux/pci.h>
19
20/*
21 * Structure of a PCI controller (host bridge)
22 */
23struct pci_controller {
24 int index; /* PCI domain number */
25 struct pci_bus *root_bus;
26
27 int first_busno;
28 int last_busno;
29
30 int hv_cfg_fd[2]; /* config{0,1} fds for this PCIe controller */
31 int hv_mem_fd; /* fd to Hypervisor for MMIO operations */
32
33 struct pci_ops *ops;
34
35 int irq_base; /* Base IRQ from the Hypervisor */
36 int plx_gen1; /* flag for PLX Gen 1 configuration */
37
38 /* Address ranges that are routed to this controller/bridge. */
39 struct resource mem_resources[3];
40};
19 41
20/* 42/*
21 * The hypervisor maps the entirety of CPA-space as bus addresses, so 43 * The hypervisor maps the entirety of CPA-space as bus addresses, so
@@ -24,56 +46,12 @@
24 */ 46 */
25#define PCI_DMA_BUS_IS_PHYS 1 47#define PCI_DMA_BUS_IS_PHYS 1
26 48
27struct pci_controller *pci_bus_to_hose(int bus);
28unsigned char __init common_swizzle(struct pci_dev *dev, unsigned char *pinp);
29int __init tile_pci_init(void); 49int __init tile_pci_init(void);
30void pci_iounmap(struct pci_dev *dev, void __iomem *addr);
31void __iomem *pci_iomap(struct pci_dev *dev, int bar, unsigned long max);
32void __devinit pcibios_fixup_bus(struct pci_bus *bus);
33 50
34int __devinit _tile_cfg_read(struct pci_controller *hose, 51void __iomem *pci_iomap(struct pci_dev *dev, int bar, unsigned long max);
35 int bus, 52static inline void pci_iounmap(struct pci_dev *dev, void __iomem *addr) {}
36 int slot,
37 int function,
38 int offset,
39 int size,
40 u32 *val);
41int __devinit _tile_cfg_write(struct pci_controller *hose,
42 int bus,
43 int slot,
44 int function,
45 int offset,
46 int size,
47 u32 val);
48 53
49/* 54void __devinit pcibios_fixup_bus(struct pci_bus *bus);
50 * These are used to to config reads and writes in the early stages of
51 * setup before the driver infrastructure has been set up enough to be
52 * able to do config reads and writes.
53 */
54#define early_cfg_read(where, size, value) \
55 _tile_cfg_read(controller, \
56 current_bus, \
57 pci_slot, \
58 pci_fn, \
59 where, \
60 size, \
61 value)
62
63#define early_cfg_write(where, size, value) \
64 _tile_cfg_write(controller, \
65 current_bus, \
66 pci_slot, \
67 pci_fn, \
68 where, \
69 size, \
70 value)
71
72
73
74#define PCICFG_BYTE 1
75#define PCICFG_WORD 2
76#define PCICFG_DWORD 4
77 55
78#define TILE_NUM_PCIE 2 56#define TILE_NUM_PCIE 2
79 57
@@ -88,33 +66,33 @@ static inline int pci_proc_domain(struct pci_bus *bus)
88} 66}
89 67
90/* 68/*
91 * I/O space is currently not supported. 69 * pcibios_assign_all_busses() tells whether or not the bus numbers
70 * should be reassigned, in case the BIOS didn't do it correctly, or
71 * in case we don't have a BIOS and we want to let Linux do it.
92 */ 72 */
73static inline int pcibios_assign_all_busses(void)
74{
75 return 1;
76}
93 77
94#define TILE_PCIE_LOWER_IO 0x0 78/*
95#define TILE_PCIE_UPPER_IO 0x10000 79 * No special bus mastering setup handling.
96#define TILE_PCIE_PCIE_IO_SIZE 0x0000FFFF 80 */
97
98#define _PAGE_NO_CACHE 0
99#define _PAGE_GUARDED 0
100
101
102#define pcibios_assign_all_busses() pci_assign_all_buses
103extern int pci_assign_all_buses;
104
105static inline void pcibios_set_master(struct pci_dev *dev) 81static inline void pcibios_set_master(struct pci_dev *dev)
106{ 82{
107 /* No special bus mastering setup handling */
108} 83}
109 84
110#define PCIBIOS_MIN_MEM 0 85#define PCIBIOS_MIN_MEM 0
111#define PCIBIOS_MIN_IO TILE_PCIE_LOWER_IO 86#define PCIBIOS_MIN_IO 0
112 87
113/* 88/*
114 * This flag tells if the platform is TILEmpower that needs 89 * This flag tells if the platform is TILEmpower that needs
115 * special configuration for the PLX switch chip. 90 * special configuration for the PLX switch chip.
116 */ 91 */
117extern int blade_pci; 92extern int tile_plx_gen1;
93
94/* Use any cpu for PCI. */
95#define cpumask_of_pcibus(bus) cpu_online_mask
118 96
119/* implement the pci_ DMA API in terms of the generic device dma_ one */ 97/* implement the pci_ DMA API in terms of the generic device dma_ one */
120#include <asm-generic/pci-dma-compat.h> 98#include <asm-generic/pci-dma-compat.h>
@@ -122,7 +100,4 @@ extern int blade_pci;
122/* generic pci stuff */ 100/* generic pci stuff */
123#include <asm-generic/pci.h> 101#include <asm-generic/pci.h>
124 102
125/* Use any cpu for PCI. */
126#define cpumask_of_pcibus(bus) cpu_online_mask
127
128#endif /* _ASM_TILE_PCI_H */ 103#endif /* _ASM_TILE_PCI_H */
diff --git a/arch/tile/include/asm/processor.h b/arch/tile/include/asm/processor.h
index 1747ff3946b2..a9e7c8760334 100644
--- a/arch/tile/include/asm/processor.h
+++ b/arch/tile/include/asm/processor.h
@@ -292,8 +292,18 @@ extern int kstack_hash;
292/* Are we using huge pages in the TLB for kernel data? */ 292/* Are we using huge pages in the TLB for kernel data? */
293extern int kdata_huge; 293extern int kdata_huge;
294 294
295/* Support standard Linux prefetching. */
296#define ARCH_HAS_PREFETCH
297#define prefetch(x) __builtin_prefetch(x)
295#define PREFETCH_STRIDE CHIP_L2_LINE_SIZE() 298#define PREFETCH_STRIDE CHIP_L2_LINE_SIZE()
296 299
300/* Bring a value into the L1D, faulting the TLB if necessary. */
301#ifdef __tilegx__
302#define prefetch_L1(x) __insn_prefetch_l1_fault((void *)(x))
303#else
304#define prefetch_L1(x) __insn_prefetch_L1((void *)(x))
305#endif
306
297#else /* __ASSEMBLY__ */ 307#else /* __ASSEMBLY__ */
298 308
299/* Do some slow action (e.g. read a slow SPR). */ 309/* Do some slow action (e.g. read a slow SPR). */
diff --git a/arch/tile/include/hv/drv_xgbe_impl.h b/arch/tile/include/hv/drv_xgbe_impl.h
new file mode 100644
index 000000000000..3a73b2b44913
--- /dev/null
+++ b/arch/tile/include/hv/drv_xgbe_impl.h
@@ -0,0 +1,300 @@
1/*
2 * Copyright 2010 Tilera Corporation. All Rights Reserved.
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License
6 * as published by the Free Software Foundation, version 2.
7 *
8 * This program is distributed in the hope that it will be useful, but
9 * WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
11 * NON INFRINGEMENT. See the GNU General Public License for
12 * more details.
13 */
14
15/**
16 * @file drivers/xgbe/impl.h
17 * Implementation details for the NetIO library.
18 */
19
20#ifndef __DRV_XGBE_IMPL_H__
21#define __DRV_XGBE_IMPL_H__
22
23#include <hv/netio_errors.h>
24#include <hv/netio_intf.h>
25#include <hv/drv_xgbe_intf.h>
26
27
28/** How many groups we have (log2). */
29#define LOG2_NUM_GROUPS (12)
30/** How many groups we have. */
31#define NUM_GROUPS (1 << LOG2_NUM_GROUPS)
32
33/** Number of output requests we'll buffer per tile. */
34#define EPP_REQS_PER_TILE (32)
35
36/** Words used in an eDMA command without checksum acceleration. */
37#define EDMA_WDS_NO_CSUM 8
38/** Words used in an eDMA command with checksum acceleration. */
39#define EDMA_WDS_CSUM 10
40/** Total available words in the eDMA command FIFO. */
41#define EDMA_WDS_TOTAL 128
42
43
44/*
45 * FIXME: These definitions are internal and should have underscores!
46 * NOTE: The actual numeric values here are intentional and allow us to
47 * optimize the concept "if small ... else if large ... else ...", by
48 * checking for the low bit being set, and then for non-zero.
49 * These are used as array indices, so they must have the values (0, 1, 2)
50 * in some order.
51 */
52#define SIZE_SMALL (1) /**< Small packet queue. */
53#define SIZE_LARGE (2) /**< Large packet queue. */
54#define SIZE_JUMBO (0) /**< Jumbo packet queue. */
55
56/** The number of "SIZE_xxx" values. */
57#define NETIO_NUM_SIZES 3
58
59
60/*
61 * Default numbers of packets for IPP drivers. These values are chosen
62 * such that CIPP1 will not overflow its L2 cache.
63 */
64
65/** The default number of small packets. */
66#define NETIO_DEFAULT_SMALL_PACKETS 2750
67/** The default number of large packets. */
68#define NETIO_DEFAULT_LARGE_PACKETS 2500
69/** The default number of jumbo packets. */
70#define NETIO_DEFAULT_JUMBO_PACKETS 250
71
72
73/** Log2 of the size of a memory arena. */
74#define NETIO_ARENA_SHIFT 24 /* 16 MB */
75/** Size of a memory arena. */
76#define NETIO_ARENA_SIZE (1 << NETIO_ARENA_SHIFT)
77
78
79/** A queue of packets.
80 *
81 * This structure partially defines a queue of packets waiting to be
82 * processed. The queue as a whole is written to by an interrupt handler and
83 * read by non-interrupt code; this data structure is what's touched by the
84 * interrupt handler. The other part of the queue state, the read offset, is
85 * kept in user space, not in hypervisor space, so it is in a separate data
86 * structure.
87 *
88 * The read offset (__packet_receive_read in the user part of the queue
89 * structure) points to the next packet to be read. When the read offset is
90 * equal to the write offset, the queue is empty; therefore the queue must
91 * contain one more slot than the required maximum queue size.
92 *
93 * Here's an example of all 3 state variables and what they mean. All
94 * pointers move left to right.
95 *
96 * @code
97 * I I V V V V I I I I
98 * 0 1 2 3 4 5 6 7 8 9 10
99 * ^ ^ ^ ^
100 * | | |
101 * | | __last_packet_plus_one
102 * | __buffer_write
103 * __packet_receive_read
104 * @endcode
105 *
106 * This queue has 10 slots, and thus can hold 9 packets (_last_packet_plus_one
107 * = 10). The read pointer is at 2, and the write pointer is at 6; thus,
108 * there are valid, unread packets in slots 2, 3, 4, and 5. The remaining
109 * slots are invalid (do not contain a packet).
110 */
111typedef struct {
112 /** Byte offset of the next notify packet to be written: zero for the first
113 * packet on the queue, sizeof (netio_pkt_t) for the second packet on the
114 * queue, etc. */
115 volatile uint32_t __packet_write;
116
117 /** Offset of the packet after the last valid packet (i.e., when any
118 * pointer is incremented to this value, it wraps back to zero). */
119 uint32_t __last_packet_plus_one;
120}
121__netio_packet_queue_t;
122
123
124/** A queue of buffers.
125 *
126 * This structure partially defines a queue of empty buffers which have been
127 * obtained via requests to the IPP. (The elements of the queue are packet
128 * handles, which are transformed into a full netio_pkt_t when the buffer is
129 * retrieved.) The queue as a whole is written to by an interrupt handler and
130 * read by non-interrupt code; this data structure is what's touched by the
131 * interrupt handler. The other parts of the queue state, the read offset and
132 * requested write offset, are kept in user space, not in hypervisor space, so
133 * they are in a separate data structure.
134 *
135 * The read offset (__buffer_read in the user part of the queue structure)
136 * points to the next buffer to be read. When the read offset is equal to the
137 * write offset, the queue is empty; therefore the queue must contain one more
138 * slot than the required maximum queue size.
139 *
140 * The requested write offset (__buffer_requested_write in the user part of
141 * the queue structure) points to the slot which will hold the next buffer we
142 * request from the IPP, once we get around to sending such a request. When
143 * the requested write offset is equal to the write offset, no requests for
144 * new buffers are outstanding; when the requested write offset is one greater
145 * than the read offset, no more requests may be sent.
146 *
147 * Note that, unlike the packet_queue, the buffer_queue places incoming
148 * buffers at decreasing addresses. This makes the check for "is it time to
149 * wrap the buffer pointer" cheaper in the assembly code which receives new
150 * buffers, and means that the value which defines the queue size,
151 * __last_buffer, is different than in the packet queue. Also, the offset
152 * used in the packet_queue is already scaled by the size of a packet; here we
153 * use unscaled slot indices for the offsets. (These differences are
154 * historical, and in the future it's possible that the packet_queue will look
155 * more like this queue.)
156 *
157 * @code
158 * Here's an example of all 4 state variables and what they mean. Remember:
159 * all pointers move right to left.
160 *
161 * V V V I I R R V V V
162 * 0 1 2 3 4 5 6 7 8 9
163 * ^ ^ ^ ^
164 * | | | |
165 * | | | __last_buffer
166 * | | __buffer_write
167 * | __buffer_requested_write
168 * __buffer_read
169 * @endcode
170 *
171 * This queue has 10 slots, and thus can hold 9 buffers (_last_buffer = 9).
172 * The read pointer is at 2, and the write pointer is at 6; thus, there are
173 * valid, unread buffers in slots 2, 1, 0, 9, 8, and 7. The requested write
174 * pointer is at 4; thus, requests have been made to the IPP for buffers which
175 * will be placed in slots 6 and 5 when they arrive. Finally, the remaining
176 * slots are invalid (do not contain a buffer).
177 */
178typedef struct
179{
180 /** Ordinal number of the next buffer to be written: 0 for the first slot in
181 * the queue, 1 for the second slot in the queue, etc. */
182 volatile uint32_t __buffer_write;
183
184 /** Ordinal number of the last buffer (i.e., when any pointer is decremented
185 * below zero, it is reloaded with this value). */
186 uint32_t __last_buffer;
187}
188__netio_buffer_queue_t;
189
190
191/**
192 * An object for providing Ethernet packets to a process.
193 */
194typedef struct __netio_queue_impl_t
195{
196 /** The queue of packets waiting to be received. */
197 __netio_packet_queue_t __packet_receive_queue;
198 /** The intr bit mask that IDs this device. */
199 unsigned int __intr_id;
200 /** Offset to queues of empty buffers, one per size. */
201 uint32_t __buffer_queue[NETIO_NUM_SIZES];
202 /** The address of the first EPP tile, or -1 if no EPP. */
203 /* ISSUE: Actually this is always "0" or "~0". */
204 uint32_t __epp_location;
205 /** The queue ID that this queue represents. */
206 unsigned int __queue_id;
207 /** Number of acknowledgements received. */
208 volatile uint32_t __acks_received;
209 /** Last completion number received for packet_sendv. */
210 volatile uint32_t __last_completion_rcv;
211 /** Number of packets allowed to be outstanding. */
212 uint32_t __max_outstanding;
213 /** First VA available for packets. */
214 void* __va_0;
215 /** First VA in second range available for packets. */
216 void* __va_1;
217 /** Padding to align the "__packets" field to the size of a netio_pkt_t. */
218 uint32_t __padding[3];
219 /** The packets themselves. */
220 netio_pkt_t __packets[0];
221}
222netio_queue_impl_t;
223
224
225/**
226 * An object for managing the user end of a NetIO queue.
227 */
228typedef struct __netio_queue_user_impl_t
229{
230 /** The next incoming packet to be read. */
231 uint32_t __packet_receive_read;
232 /** The next empty buffers to be read, one index per size. */
233 uint8_t __buffer_read[NETIO_NUM_SIZES];
234 /** Where the empty buffer we next request from the IPP will go, one index
235 * per size. */
236 uint8_t __buffer_requested_write[NETIO_NUM_SIZES];
237 /** PCIe interface flag. */
238 uint8_t __pcie;
239 /** Number of packets left to be received before we send a credit update. */
240 uint32_t __receive_credit_remaining;
241 /** Value placed in __receive_credit_remaining when it reaches zero. */
242 uint32_t __receive_credit_interval;
243 /** First fast I/O routine index. */
244 uint32_t __fastio_index;
245 /** Number of acknowledgements expected. */
246 uint32_t __acks_outstanding;
247 /** Last completion number requested. */
248 uint32_t __last_completion_req;
249 /** File descriptor for driver. */
250 int __fd;
251}
252netio_queue_user_impl_t;
253
254
255#define NETIO_GROUP_CHUNK_SIZE 64 /**< Max # groups in one IPP request */
256#define NETIO_BUCKET_CHUNK_SIZE 64 /**< Max # buckets in one IPP request */
257
258
259/** Internal structure used to convey packet send information to the
260 * hypervisor. FIXME: Actually, it's not used for that anymore, but
261 * netio_packet_send() still uses it internally.
262 */
263typedef struct
264{
265 uint16_t flags; /**< Packet flags (__NETIO_SEND_FLG_xxx) */
266 uint16_t transfer_size; /**< Size of packet */
267 uint32_t va; /**< VA of start of packet */
268 __netio_pkt_handle_t handle; /**< Packet handle */
269 uint32_t csum0; /**< First checksum word */
270 uint32_t csum1; /**< Second checksum word */
271}
272__netio_send_cmd_t;
273
274
275/** Flags used in two contexts:
276 * - As the "flags" member in the __netio_send_cmd_t, above; used only
277 * for netio_pkt_send_{prepare,commit}.
278 * - As part of the flags passed to the various send packet fast I/O calls.
279 */
280
281/** Need acknowledgement on this packet. Note that some code in the
282 * normal send_pkt fast I/O handler assumes that this is equal to 1. */
283#define __NETIO_SEND_FLG_ACK 0x1
284
285/** Do checksum on this packet. (Only used with the __netio_send_cmd_t;
286 * normal packet sends use a special fast I/O index to denote checksumming,
287 * and multi-segment sends test the checksum descriptor.) */
288#define __NETIO_SEND_FLG_CSUM 0x2
289
290/** Get a completion on this packet. Only used with multi-segment sends. */
291#define __NETIO_SEND_FLG_COMPLETION 0x4
292
293/** Position of the number-of-extra-segments value in the flags word.
294 Only used with multi-segment sends. */
295#define __NETIO_SEND_FLG_XSEG_SHIFT 3
296
297/** Width of the number-of-extra-segments value in the flags word. */
298#define __NETIO_SEND_FLG_XSEG_WIDTH 2
299
300#endif /* __DRV_XGBE_IMPL_H__ */
diff --git a/arch/tile/include/hv/drv_xgbe_intf.h b/arch/tile/include/hv/drv_xgbe_intf.h
new file mode 100644
index 000000000000..146e47d5334b
--- /dev/null
+++ b/arch/tile/include/hv/drv_xgbe_intf.h
@@ -0,0 +1,615 @@
1/*
2 * Copyright 2010 Tilera Corporation. All Rights Reserved.
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License
6 * as published by the Free Software Foundation, version 2.
7 *
8 * This program is distributed in the hope that it will be useful, but
9 * WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
11 * NON INFRINGEMENT. See the GNU General Public License for
12 * more details.
13 */
14
15/**
16 * @file drv_xgbe_intf.h
17 * Interface to the hypervisor XGBE driver.
18 */
19
20#ifndef __DRV_XGBE_INTF_H__
21#define __DRV_XGBE_INTF_H__
22
23/**
24 * An object for forwarding VAs and PAs to the hypervisor.
25 * @ingroup types
26 *
27 * This allows the supervisor to specify a number of areas of memory to
28 * store packet buffers.
29 */
30typedef struct
31{
32 /** The physical address of the memory. */
33 HV_PhysAddr pa;
34 /** Page table entry for the memory. This is only used to derive the
35 * memory's caching mode; the PA bits are ignored. */
36 HV_PTE pte;
37 /** The virtual address of the memory. */
38 HV_VirtAddr va;
39 /** Size (in bytes) of the memory area. */
40 int size;
41
42}
43netio_ipp_address_t;
44
45/** The various pread/pwrite offsets into the hypervisor-level driver.
46 * @ingroup types
47 */
48typedef enum
49{
50 /** Inform the Linux driver of the address of the NetIO arena memory.
51 * This offset is actually only used to convey information from netio
52 * to the Linux driver; it never makes it from there to the hypervisor.
53 * Write-only; takes a uint32_t specifying the VA address. */
54 NETIO_FIXED_ADDR = 0x5000000000000000ULL,
55
56 /** Inform the Linux driver of the size of the NetIO arena memory.
57 * This offset is actually only used to convey information from netio
58 * to the Linux driver; it never makes it from there to the hypervisor.
59 * Write-only; takes a uint32_t specifying the VA size. */
60 NETIO_FIXED_SIZE = 0x5100000000000000ULL,
61
62 /** Register current tile with IPP. Write then read: write, takes a
63 * netio_input_config_t, read returns a pointer to a netio_queue_impl_t. */
64 NETIO_IPP_INPUT_REGISTER_OFF = 0x6000000000000000ULL,
65
66 /** Unregister current tile from IPP. Write-only, takes a dummy argument. */
67 NETIO_IPP_INPUT_UNREGISTER_OFF = 0x6100000000000000ULL,
68
69 /** Start packets flowing. Write-only, takes a dummy argument. */
70 NETIO_IPP_INPUT_INIT_OFF = 0x6200000000000000ULL,
71
72 /** Stop packets flowing. Write-only, takes a dummy argument. */
73 NETIO_IPP_INPUT_UNINIT_OFF = 0x6300000000000000ULL,
74
75 /** Configure group (typically we group on VLAN). Write-only: takes an
76 * array of netio_group_t's, low 24 bits of the offset is the base group
77 * number times the size of a netio_group_t. */
78 NETIO_IPP_INPUT_GROUP_CFG_OFF = 0x6400000000000000ULL,
79
80 /** Configure bucket. Write-only: takes an array of netio_bucket_t's, low
81 * 24 bits of the offset is the base bucket number times the size of a
82 * netio_bucket_t. */
83 NETIO_IPP_INPUT_BUCKET_CFG_OFF = 0x6500000000000000ULL,
84
85 /** Get/set a parameter. Read or write: read or write data is the parameter
86 * value, low 32 bits of the offset is a __netio_getset_offset_t. */
87 NETIO_IPP_PARAM_OFF = 0x6600000000000000ULL,
88
89 /** Get fast I/O index. Read-only; returns a 4-byte base index value. */
90 NETIO_IPP_GET_FASTIO_OFF = 0x6700000000000000ULL,
91
92 /** Configure hijack IP address. Packets with this IPv4 dest address
93 * go to bucket NETIO_NUM_BUCKETS - 1. Write-only: takes an IP address
94 * in some standard form. FIXME: Define the form! */
95 NETIO_IPP_INPUT_HIJACK_CFG_OFF = 0x6800000000000000ULL,
96
97 /**
98 * Offsets beyond this point are reserved for the supervisor (although that
99 * enforcement must be done by the supervisor driver itself).
100 */
101 NETIO_IPP_USER_MAX_OFF = 0x6FFFFFFFFFFFFFFFULL,
102
103 /** Register I/O memory. Write-only, takes a netio_ipp_address_t. */
104 NETIO_IPP_IOMEM_REGISTER_OFF = 0x7000000000000000ULL,
105
106 /** Unregister I/O memory. Write-only, takes a netio_ipp_address_t. */
107 NETIO_IPP_IOMEM_UNREGISTER_OFF = 0x7100000000000000ULL,
108
109 /* Offsets greater than 0x7FFFFFFF can't be used directly from Linux
110 * userspace code due to limitations in the pread/pwrite syscalls. */
111
112 /** Drain LIPP buffers. */
113 NETIO_IPP_DRAIN_OFF = 0xFA00000000000000ULL,
114
115 /** Supply a netio_ipp_address_t to be used as shared memory for the
116 * LEPP command queue. */
117 NETIO_EPP_SHM_OFF = 0xFB00000000000000ULL,
118
119 /* 0xFC... is currently unused. */
120
121 /** Stop IPP/EPP tiles. Write-only, takes a dummy argument. */
122 NETIO_IPP_STOP_SHIM_OFF = 0xFD00000000000000ULL,
123
124 /** Start IPP/EPP tiles. Write-only, takes a dummy argument. */
125 NETIO_IPP_START_SHIM_OFF = 0xFE00000000000000ULL,
126
127 /** Supply packet arena. Write-only, takes an array of
128 * netio_ipp_address_t values. */
129 NETIO_IPP_ADDRESS_OFF = 0xFF00000000000000ULL,
130} netio_hv_offset_t;
131
132/** Extract the base offset from an offset */
133#define NETIO_BASE_OFFSET(off) ((off) & 0xFF00000000000000ULL)
134/** Extract the local offset from an offset */
135#define NETIO_LOCAL_OFFSET(off) ((off) & 0x00FFFFFFFFFFFFFFULL)
136
137
138/**
139 * Get/set offset.
140 */
141typedef union
142{
143 struct
144 {
145 uint64_t addr:48; /**< Class-specific address */
146 unsigned int class:8; /**< Class (e.g., NETIO_PARAM) */
147 unsigned int opcode:8; /**< High 8 bits of NETIO_IPP_PARAM_OFF */
148 }
149 bits; /**< Bitfields */
150 uint64_t word; /**< Aggregated value to use as the offset */
151}
152__netio_getset_offset_t;
153
154/**
155 * Fast I/O index offsets (must be contiguous).
156 */
157typedef enum
158{
159 NETIO_FASTIO_ALLOCATE = 0, /**< Get empty packet buffer */
160 NETIO_FASTIO_FREE_BUFFER = 1, /**< Give buffer back to IPP */
161 NETIO_FASTIO_RETURN_CREDITS = 2, /**< Give credits to IPP */
162 NETIO_FASTIO_SEND_PKT_NOCK = 3, /**< Send a packet, no checksum */
163 NETIO_FASTIO_SEND_PKT_CK = 4, /**< Send a packet, with checksum */
164 NETIO_FASTIO_SEND_PKT_VEC = 5, /**< Send a vector of packets */
165 NETIO_FASTIO_SENDV_PKT = 6, /**< Sendv one packet */
166 NETIO_FASTIO_NUM_INDEX = 7, /**< Total number of fast I/O indices */
167} netio_fastio_index_t;
168
169/** 3-word return type for Fast I/O call. */
170typedef struct
171{
172 int err; /**< Error code. */
173 uint32_t val0; /**< Value. Meaning depends upon the specific call. */
174 uint32_t val1; /**< Value. Meaning depends upon the specific call. */
175} netio_fastio_rv3_t;
176
177/** 0-argument fast I/O call */
178int __netio_fastio0(uint32_t fastio_index);
179/** 1-argument fast I/O call */
180int __netio_fastio1(uint32_t fastio_index, uint32_t arg0);
181/** 3-argument fast I/O call, 2-word return value */
182netio_fastio_rv3_t __netio_fastio3_rv3(uint32_t fastio_index, uint32_t arg0,
183 uint32_t arg1, uint32_t arg2);
184/** 4-argument fast I/O call */
185int __netio_fastio4(uint32_t fastio_index, uint32_t arg0, uint32_t arg1,
186 uint32_t arg2, uint32_t arg3);
187/** 6-argument fast I/O call */
188int __netio_fastio6(uint32_t fastio_index, uint32_t arg0, uint32_t arg1,
189 uint32_t arg2, uint32_t arg3, uint32_t arg4, uint32_t arg5);
190/** 9-argument fast I/O call */
191int __netio_fastio9(uint32_t fastio_index, uint32_t arg0, uint32_t arg1,
192 uint32_t arg2, uint32_t arg3, uint32_t arg4, uint32_t arg5,
193 uint32_t arg6, uint32_t arg7, uint32_t arg8);
194
195/** Allocate an empty packet.
196 * @param fastio_index Fast I/O index.
197 * @param size Size of the packet to allocate.
198 */
199#define __netio_fastio_allocate(fastio_index, size) \
200 __netio_fastio1((fastio_index) + NETIO_FASTIO_ALLOCATE, size)
201
202/** Free a buffer.
203 * @param fastio_index Fast I/O index.
204 * @param handle Handle for the packet to free.
205 */
206#define __netio_fastio_free_buffer(fastio_index, handle) \
207 __netio_fastio1((fastio_index) + NETIO_FASTIO_FREE_BUFFER, handle)
208
209/** Increment our receive credits.
210 * @param fastio_index Fast I/O index.
211 * @param credits Number of credits to add.
212 */
213#define __netio_fastio_return_credits(fastio_index, credits) \
214 __netio_fastio1((fastio_index) + NETIO_FASTIO_RETURN_CREDITS, credits)
215
216/** Send packet, no checksum.
217 * @param fastio_index Fast I/O index.
218 * @param ackflag Nonzero if we want an ack.
219 * @param size Size of the packet.
220 * @param va Virtual address of start of packet.
221 * @param handle Packet handle.
222 */
223#define __netio_fastio_send_pkt_nock(fastio_index, ackflag, size, va, handle) \
224 __netio_fastio4((fastio_index) + NETIO_FASTIO_SEND_PKT_NOCK, ackflag, \
225 size, va, handle)
226
227/** Send packet, calculate checksum.
228 * @param fastio_index Fast I/O index.
229 * @param ackflag Nonzero if we want an ack.
230 * @param size Size of the packet.
231 * @param va Virtual address of start of packet.
232 * @param handle Packet handle.
233 * @param csum0 Shim checksum header.
234 * @param csum1 Checksum seed.
235 */
236#define __netio_fastio_send_pkt_ck(fastio_index, ackflag, size, va, handle, \
237 csum0, csum1) \
238 __netio_fastio6((fastio_index) + NETIO_FASTIO_SEND_PKT_CK, ackflag, \
239 size, va, handle, csum0, csum1)
240
241
242/** Format for the "csum0" argument to the __netio_fastio_send routines
243 * and LEPP. Note that this is currently exactly identical to the
244 * ShimProtocolOffloadHeader.
245 */
246typedef union
247{
248 struct
249 {
250 unsigned int start_byte:7; /**< The first byte to be checksummed */
251 unsigned int count:14; /**< Number of bytes to be checksummed. */
252 unsigned int destination_byte:7; /**< The byte to write the checksum to. */
253 unsigned int reserved:4; /**< Reserved. */
254 } bits; /**< Decomposed method of access. */
255 unsigned int word; /**< To send out the IDN. */
256} __netio_checksum_header_t;
257
258
259/** Sendv packet with 1 or 2 segments.
260 * @param fastio_index Fast I/O index.
261 * @param flags Ack/csum/notify flags in low 3 bits; number of segments minus
262 * 1 in next 2 bits; expected checksum in high 16 bits.
263 * @param confno Confirmation number to request, if notify flag set.
264 * @param csum0 Checksum descriptor; if zero, no checksum.
265 * @param va_F Virtual address of first segment.
266 * @param va_L Virtual address of last segment, if 2 segments.
267 * @param len_F_L Length of first segment in low 16 bits; length of last
268 * segment, if 2 segments, in high 16 bits.
269 */
270#define __netio_fastio_sendv_pkt_1_2(fastio_index, flags, confno, csum0, \
271 va_F, va_L, len_F_L) \
272 __netio_fastio6((fastio_index) + NETIO_FASTIO_SENDV_PKT, flags, confno, \
273 csum0, va_F, va_L, len_F_L)
274
275/** Send packet on PCIe interface.
276 * @param fastio_index Fast I/O index.
277 * @param flags Ack/csum/notify flags in low 3 bits.
278 * @param confno Confirmation number to request, if notify flag set.
279 * @param csum0 Checksum descriptor; Hard wired 0, not needed for PCIe.
280 * @param va_F Virtual address of the packet buffer.
281 * @param va_L Virtual address of last segment, if 2 segments. Hard wired 0.
282 * @param len_F_L Length of the packet buffer in low 16 bits.
283 */
284#define __netio_fastio_send_pcie_pkt(fastio_index, flags, confno, csum0, \
285 va_F, va_L, len_F_L) \
286 __netio_fastio6((fastio_index) + PCIE_FASTIO_SENDV_PKT, flags, confno, \
287 csum0, va_F, va_L, len_F_L)
288
289/** Sendv packet with 3 or 4 segments.
290 * @param fastio_index Fast I/O index.
291 * @param flags Ack/csum/notify flags in low 3 bits; number of segments minus
292 * 1 in next 2 bits; expected checksum in high 16 bits.
293 * @param confno Confirmation number to request, if notify flag set.
294 * @param csum0 Checksum descriptor; if zero, no checksum.
295 * @param va_F Virtual address of first segment.
296 * @param va_L Virtual address of last segment (third segment if 3 segments,
297 * fourth segment if 4 segments).
298 * @param len_F_L Length of first segment in low 16 bits; length of last
299 * segment in high 16 bits.
300 * @param va_M0 Virtual address of "middle 0" segment; this segment is sent
301 * second when there are three segments, and third if there are four.
302 * @param va_M1 Virtual address of "middle 1" segment; this segment is sent
303 * second when there are four segments.
304 * @param len_M0_M1 Length of middle 0 segment in low 16 bits; length of middle
305 * 1 segment, if 4 segments, in high 16 bits.
306 */
307#define __netio_fastio_sendv_pkt_3_4(fastio_index, flags, confno, csum0, va_F, \
308 va_L, len_F_L, va_M0, va_M1, len_M0_M1) \
309 __netio_fastio9((fastio_index) + NETIO_FASTIO_SENDV_PKT, flags, confno, \
310 csum0, va_F, va_L, len_F_L, va_M0, va_M1, len_M0_M1)
311
312/** Send vector of packets.
313 * @param fastio_index Fast I/O index.
314 * @param seqno Number of packets transmitted so far on this interface;
315 * used to decide which packets should be acknowledged.
316 * @param nentries Number of entries in vector.
317 * @param va Virtual address of start of vector entry array.
318 * @return 3-word netio_fastio_rv3_t structure. The structure's err member
319 * is an error code, or zero if no error. The val0 member is the
320 * updated value of seqno; it has been incremented by 1 for each
321 * packet sent. That increment may be less than nentries if an
322 * error occured, or if some of the entries in the vector contain
323 * handles equal to NETIO_PKT_HANDLE_NONE. The val1 member is the
324 * updated value of nentries; it has been decremented by 1 for each
325 * vector entry processed. Again, that decrement may be less than
326 * nentries (leaving the returned value positive) if an error
327 * occurred.
328 */
329#define __netio_fastio_send_pkt_vec(fastio_index, seqno, nentries, va) \
330 __netio_fastio3_rv3((fastio_index) + NETIO_FASTIO_SEND_PKT_VEC, seqno, \
331 nentries, va)
332
333
334/** An egress DMA command for LEPP. */
335typedef struct
336{
337 /** Is this a TSO transfer?
338 *
339 * NOTE: This field is always 0, to distinguish it from
340 * lepp_tso_cmd_t. It must come first!
341 */
342 uint8_t tso : 1;
343
344 /** Unused padding bits. */
345 uint8_t _unused : 3;
346
347 /** Should this packet be sent directly from caches instead of DRAM,
348 * using hash-for-home to locate the packet data?
349 */
350 uint8_t hash_for_home : 1;
351
352 /** Should we compute a checksum? */
353 uint8_t compute_checksum : 1;
354
355 /** Is this the final buffer for this packet?
356 *
357 * A single packet can be split over several input buffers (a "gather"
358 * operation). This flag indicates that this is the last buffer
359 * in a packet.
360 */
361 uint8_t end_of_packet : 1;
362
363 /** Should LEPP advance 'comp_busy' when this DMA is fully finished? */
364 uint8_t send_completion : 1;
365
366 /** High bits of Client Physical Address of the start of the buffer
367 * to be egressed.
368 *
369 * NOTE: Only 6 bits are actually needed here, as CPAs are
370 * currently 38 bits. So two bits could be scavenged from this.
371 */
372 uint8_t cpa_hi;
373
374 /** The number of bytes to be egressed. */
375 uint16_t length;
376
377 /** Low 32 bits of Client Physical Address of the start of the buffer
378 * to be egressed.
379 */
380 uint32_t cpa_lo;
381
382 /** Checksum information (only used if 'compute_checksum'). */
383 __netio_checksum_header_t checksum_data;
384
385} lepp_cmd_t;
386
387
388/** A chunk of physical memory for a TSO egress. */
389typedef struct
390{
391 /** The low bits of the CPA. */
392 uint32_t cpa_lo;
393 /** The high bits of the CPA. */
394 uint16_t cpa_hi : 15;
395 /** Should this packet be sent directly from caches instead of DRAM,
396 * using hash-for-home to locate the packet data?
397 */
398 uint16_t hash_for_home : 1;
399 /** The length in bytes. */
400 uint16_t length;
401} lepp_frag_t;
402
403
404/** An LEPP command that handles TSO. */
405typedef struct
406{
407 /** Is this a TSO transfer?
408 *
409 * NOTE: This field is always 1, to distinguish it from
410 * lepp_cmd_t. It must come first!
411 */
412 uint8_t tso : 1;
413
414 /** Unused padding bits. */
415 uint8_t _unused : 7;
416
417 /** Size of the header[] array in bytes. It must be in the range
418 * [40, 127], which are the smallest header for a TCP packet over
419 * Ethernet and the maximum possible prepend size supported by
420 * hardware, respectively. Note that the array storage must be
421 * padded out to a multiple of four bytes so that the following
422 * LEPP command is aligned properly.
423 */
424 uint8_t header_size;
425
426 /** Byte offset of the IP header in header[]. */
427 uint8_t ip_offset;
428
429 /** Byte offset of the TCP header in header[]. */
430 uint8_t tcp_offset;
431
432 /** The number of bytes to use for the payload of each packet,
433 * except of course the last one, which may not have enough bytes.
434 * This means that each Ethernet packet except the last will have a
435 * size of header_size + payload_size.
436 */
437 uint16_t payload_size;
438
439 /** The length of the 'frags' array that follows this struct. */
440 uint16_t num_frags;
441
442 /** The actual frags. */
443 lepp_frag_t frags[0 /* Variable-sized; num_frags entries. */];
444
445 /*
446 * The packet header template logically follows frags[],
447 * but you can't declare that in C.
448 *
449 * uint32_t header[header_size_in_words_rounded_up];
450 */
451
452} lepp_tso_cmd_t;
453
454
455/** An LEPP completion ring entry. */
456typedef void* lepp_comp_t;
457
458
459/** Maximum number of frags for one TSO command. This is adapted from
460 * linux's "MAX_SKB_FRAGS", and presumably over-estimates by one, for
461 * our page size of exactly 65536. We add one for a "body" fragment.
462 */
463#define LEPP_MAX_FRAGS (65536 / HV_PAGE_SIZE_SMALL + 2 + 1)
464
465/** Total number of bytes needed for an lepp_tso_cmd_t. */
466#define LEPP_TSO_CMD_SIZE(num_frags, header_size) \
467 (sizeof(lepp_tso_cmd_t) + \
468 (num_frags) * sizeof(lepp_frag_t) + \
469 (((header_size) + 3) & -4))
470
471/** The size of the lepp "cmd" queue. */
472#define LEPP_CMD_QUEUE_BYTES \
473 (((CHIP_L2_CACHE_SIZE() - 2 * CHIP_L2_LINE_SIZE()) / \
474 (sizeof(lepp_cmd_t) + sizeof(lepp_comp_t))) * sizeof(lepp_cmd_t))
475
476/** The largest possible command that can go in lepp_queue_t::cmds[]. */
477#define LEPP_MAX_CMD_SIZE LEPP_TSO_CMD_SIZE(LEPP_MAX_FRAGS, 128)
478
479/** The largest possible value of lepp_queue_t::cmd_{head, tail} (inclusive).
480 */
481#define LEPP_CMD_LIMIT \
482 (LEPP_CMD_QUEUE_BYTES - LEPP_MAX_CMD_SIZE)
483
484/** The maximum number of completions in an LEPP queue. */
485#define LEPP_COMP_QUEUE_SIZE \
486 ((LEPP_CMD_LIMIT + sizeof(lepp_cmd_t) - 1) / sizeof(lepp_cmd_t))
487
488/** Increment an index modulo the queue size. */
489#define LEPP_QINC(var) \
490 (var = __insn_mnz(var - (LEPP_COMP_QUEUE_SIZE - 1), var + 1))
491
492/** A queue used to convey egress commands from the client to LEPP. */
493typedef struct
494{
495 /** Index of first completion not yet processed by user code.
496 * If this is equal to comp_busy, there are no such completions.
497 *
498 * NOTE: This is only read/written by the user.
499 */
500 unsigned int comp_head;
501
502 /** Index of first completion record not yet completed.
503 * If this is equal to comp_tail, there are no such completions.
504 * This index gets advanced (modulo LEPP_QUEUE_SIZE) whenever
505 * a command with the 'completion' bit set is finished.
506 *
507 * NOTE: This is only written by LEPP, only read by the user.
508 */
509 volatile unsigned int comp_busy;
510
511 /** Index of the first empty slot in the completion ring.
512 * Entries from this up to but not including comp_head (in ring order)
513 * can be filled in with completion data.
514 *
515 * NOTE: This is only read/written by the user.
516 */
517 unsigned int comp_tail;
518
519 /** Byte index of first command enqueued for LEPP but not yet processed.
520 *
521 * This is always divisible by sizeof(void*) and always <= LEPP_CMD_LIMIT.
522 *
523 * NOTE: LEPP advances this counter as soon as it no longer needs
524 * the cmds[] storage for this entry, but the transfer is not actually
525 * complete (i.e. the buffer pointed to by the command is no longer
526 * needed) until comp_busy advances.
527 *
528 * If this is equal to cmd_tail, the ring is empty.
529 *
530 * NOTE: This is only written by LEPP, only read by the user.
531 */
532 volatile unsigned int cmd_head;
533
534 /** Byte index of first empty slot in the command ring. This field can
535 * be incremented up to but not equal to cmd_head (because that would
536 * mean the ring is empty).
537 *
538 * This is always divisible by sizeof(void*) and always <= LEPP_CMD_LIMIT.
539 *
540 * NOTE: This is read/written by the user, only read by LEPP.
541 */
542 volatile unsigned int cmd_tail;
543
544 /** A ring of variable-sized egress DMA commands.
545 *
546 * NOTE: Only written by the user, only read by LEPP.
547 */
548 char cmds[LEPP_CMD_QUEUE_BYTES]
549 __attribute__((aligned(CHIP_L2_LINE_SIZE())));
550
551 /** A ring of user completion data.
552 * NOTE: Only read/written by the user.
553 */
554 lepp_comp_t comps[LEPP_COMP_QUEUE_SIZE]
555 __attribute__((aligned(CHIP_L2_LINE_SIZE())));
556} lepp_queue_t;
557
558
559/** An internal helper function for determining the number of entries
560 * available in a ring buffer, given that there is one sentinel.
561 */
562static inline unsigned int
563_lepp_num_free_slots(unsigned int head, unsigned int tail)
564{
565 /*
566 * One entry is reserved for use as a sentinel, to distinguish
567 * "empty" from "full". So we compute
568 * (head - tail - 1) % LEPP_QUEUE_SIZE, but without using a slow % operation.
569 */
570 return (head - tail - 1) + ((head <= tail) ? LEPP_COMP_QUEUE_SIZE : 0);
571}
572
573
574/** Returns how many new comp entries can be enqueued. */
575static inline unsigned int
576lepp_num_free_comp_slots(const lepp_queue_t* q)
577{
578 return _lepp_num_free_slots(q->comp_head, q->comp_tail);
579}
580
581static inline int
582lepp_qsub(int v1, int v2)
583{
584 int delta = v1 - v2;
585 return delta + ((delta >> 31) & LEPP_COMP_QUEUE_SIZE);
586}
587
588
589/** FIXME: Check this from linux, via a new "pwrite()" call. */
590#define LIPP_VERSION 1
591
592
593/** We use exactly two bytes of alignment padding. */
594#define LIPP_PACKET_PADDING 2
595
596/** The minimum size of a "small" buffer (including the padding). */
597#define LIPP_SMALL_PACKET_SIZE 128
598
599/*
600 * NOTE: The following two values should total to less than around
601 * 13582, to keep the total size used for "lipp_state_t" below 64K.
602 */
603
604/** The maximum number of "small" buffers.
605 * This is enough for 53 network cpus with 128 credits. Note that
606 * if these are exhausted, we will fall back to using large buffers.
607 */
608#define LIPP_SMALL_BUFFERS 6785
609
610/** The maximum number of "large" buffers.
611 * This is enough for 53 network cpus with 128 credits.
612 */
613#define LIPP_LARGE_BUFFERS 6785
614
615#endif /* __DRV_XGBE_INTF_H__ */
diff --git a/arch/tile/include/hv/netio_errors.h b/arch/tile/include/hv/netio_errors.h
new file mode 100644
index 000000000000..e1591bff61b5
--- /dev/null
+++ b/arch/tile/include/hv/netio_errors.h
@@ -0,0 +1,122 @@
1/*
2 * Copyright 2010 Tilera Corporation. All Rights Reserved.
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License
6 * as published by the Free Software Foundation, version 2.
7 *
8 * This program is distributed in the hope that it will be useful, but
9 * WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
11 * NON INFRINGEMENT. See the GNU General Public License for
12 * more details.
13 */
14
15/**
16 * Error codes returned from NetIO routines.
17 */
18
19#ifndef __NETIO_ERRORS_H__
20#define __NETIO_ERRORS_H__
21
22/**
23 * @addtogroup error
24 *
25 * @brief The error codes returned by NetIO functions.
26 *
27 * NetIO functions return 0 (defined as ::NETIO_NO_ERROR) on success, and
28 * a negative value if an error occurs.
29 *
30 * In cases where a NetIO function failed due to a error reported by
31 * system libraries, the error code will be the negation of the
32 * system errno at the time of failure. The @ref netio_strerror()
33 * function will deliver error strings for both NetIO and system error
34 * codes.
35 *
36 * @{
37 */
38
39/** The set of all NetIO errors. */
40typedef enum
41{
42 /** Operation successfully completed. */
43 NETIO_NO_ERROR = 0,
44
45 /** A packet was successfully retrieved from an input queue. */
46 NETIO_PKT = 0,
47
48 /** Largest NetIO error number. */
49 NETIO_ERR_MAX = -701,
50
51 /** The tile is not registered with the IPP. */
52 NETIO_NOT_REGISTERED = -701,
53
54 /** No packet was available to retrieve from the input queue. */
55 NETIO_NOPKT = -702,
56
57 /** The requested function is not implemented. */
58 NETIO_NOT_IMPLEMENTED = -703,
59
60 /** On a registration operation, the target queue already has the maximum
61 * number of tiles registered for it, and no more may be added. On a
62 * packet send operation, the output queue is full and nothing more can
63 * be queued until some of the queued packets are actually transmitted. */
64 NETIO_QUEUE_FULL = -704,
65
66 /** The calling process or thread is not bound to exactly one CPU. */
67 NETIO_BAD_AFFINITY = -705,
68
69 /** Cannot allocate memory on requested controllers. */
70 NETIO_CANNOT_HOME = -706,
71
72 /** On a registration operation, the IPP specified is not configured
73 * to support the options requested; for instance, the application
74 * wants a specific type of tagged headers which the configured IPP
75 * doesn't support. Or, the supplied configuration information is
76 * not self-consistent, or is out of range; for instance, specifying
77 * both NETIO_RECV and NETIO_NO_RECV, or asking for more than
78 * NETIO_MAX_SEND_BUFFERS to be preallocated. On a VLAN or bucket
79 * configure operation, the number of items, or the base item, was
80 * out of range.
81 */
82 NETIO_BAD_CONFIG = -707,
83
84 /** Too many tiles have registered to transmit packets. */
85 NETIO_TOOMANY_XMIT = -708,
86
87 /** Packet transmission was attempted on a queue which was registered
88 with transmit disabled. */
89 NETIO_UNREG_XMIT = -709,
90
91 /** This tile is already registered with the IPP. */
92 NETIO_ALREADY_REGISTERED = -710,
93
94 /** The Ethernet link is down. The application should try again later. */
95 NETIO_LINK_DOWN = -711,
96
97 /** An invalid memory buffer has been specified. This may be an unmapped
98 * virtual address, or one which does not meet alignment requirements.
99 * For netio_input_register(), this error may be returned when multiple
100 * processes specify different memory regions to be used for NetIO
101 * buffers. That can happen if these processes specify explicit memory
102 * regions with the ::NETIO_FIXED_BUFFER_VA flag, or if tmc_cmem_init()
103 * has not been called by a common ancestor of the processes.
104 */
105 NETIO_FAULT = -712,
106
107 /** Cannot combine user-managed shared memory and cache coherence. */
108 NETIO_BAD_CACHE_CONFIG = -713,
109
110 /** Smallest NetIO error number. */
111 NETIO_ERR_MIN = -713,
112
113#ifndef __DOXYGEN__
114 /** Used internally to mean that no response is needed; never returned to
115 * an application. */
116 NETIO_NO_RESPONSE = 1
117#endif
118} netio_error_t;
119
120/** @} */
121
122#endif /* __NETIO_ERRORS_H__ */
diff --git a/arch/tile/include/hv/netio_intf.h b/arch/tile/include/hv/netio_intf.h
new file mode 100644
index 000000000000..8d20972aba2c
--- /dev/null
+++ b/arch/tile/include/hv/netio_intf.h
@@ -0,0 +1,2975 @@
1/*
2 * Copyright 2010 Tilera Corporation. All Rights Reserved.
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License
6 * as published by the Free Software Foundation, version 2.
7 *
8 * This program is distributed in the hope that it will be useful, but
9 * WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
11 * NON INFRINGEMENT. See the GNU General Public License for
12 * more details.
13 */
14
15/**
16 * NetIO interface structures and macros.
17 */
18
19#ifndef __NETIO_INTF_H__
20#define __NETIO_INTF_H__
21
22#include <hv/netio_errors.h>
23
24#ifdef __KERNEL__
25#include <linux/types.h>
26#else
27#include <stdint.h>
28#endif
29
30#if !defined(__HV__) && !defined(__BOGUX__) && !defined(__KERNEL__)
31#include <assert.h>
32#define netio_assert assert /**< Enable assertions from macros */
33#else
34#define netio_assert(...) ((void)(0)) /**< Disable assertions from macros */
35#endif
36
37/*
38 * If none of these symbols are defined, we're building libnetio in an
39 * environment where we have pthreads, so we'll enable locking.
40 */
41#if !defined(__HV__) && !defined(__BOGUX__) && !defined(__KERNEL__) && \
42 !defined(__NEWLIB__)
43#define _NETIO_PTHREAD /**< Include a mutex in netio_queue_t below */
44
45/*
46 * If NETIO_UNLOCKED is defined, we don't do use per-cpu locks on
47 * per-packet NetIO operations. We still do pthread locking on things
48 * like netio_input_register, though. This is used for building
49 * libnetio_unlocked.
50 */
51#ifndef NETIO_UNLOCKED
52
53/* Avoid PLT overhead by using our own inlined per-cpu lock. */
54#include <sched.h>
55typedef int _netio_percpu_mutex_t;
56
57static __inline int
58_netio_percpu_mutex_init(_netio_percpu_mutex_t* lock)
59{
60 *lock = 0;
61 return 0;
62}
63
64static __inline int
65_netio_percpu_mutex_lock(_netio_percpu_mutex_t* lock)
66{
67 while (__builtin_expect(__insn_tns(lock), 0))
68 sched_yield();
69 return 0;
70}
71
72static __inline int
73_netio_percpu_mutex_unlock(_netio_percpu_mutex_t* lock)
74{
75 *lock = 0;
76 return 0;
77}
78
79#else /* NETIO_UNLOCKED */
80
81/* Don't do any locking for per-packet NetIO operations. */
82typedef int _netio_percpu_mutex_t;
83#define _netio_percpu_mutex_init(L)
84#define _netio_percpu_mutex_lock(L)
85#define _netio_percpu_mutex_unlock(L)
86
87#endif /* NETIO_UNLOCKED */
88#endif /* !__HV__, !__BOGUX, !__KERNEL__, !__NEWLIB__ */
89
90/** How many tiles can register for a given queue.
91 * @ingroup setup */
92#define NETIO_MAX_TILES_PER_QUEUE 64
93
94
95/** Largest permissible queue identifier.
96 * @ingroup setup */
97#define NETIO_MAX_QUEUE_ID 255
98
99
100#ifndef __DOXYGEN__
101
102/* Metadata packet checksum/ethertype flags. */
103
104/** The L4 checksum has not been calculated. */
105#define _NETIO_PKT_NO_L4_CSUM_SHIFT 0
106#define _NETIO_PKT_NO_L4_CSUM_RMASK 1
107#define _NETIO_PKT_NO_L4_CSUM_MASK \
108 (_NETIO_PKT_NO_L4_CSUM_RMASK << _NETIO_PKT_NO_L4_CSUM_SHIFT)
109
110/** The L3 checksum has not been calculated. */
111#define _NETIO_PKT_NO_L3_CSUM_SHIFT 1
112#define _NETIO_PKT_NO_L3_CSUM_RMASK 1
113#define _NETIO_PKT_NO_L3_CSUM_MASK \
114 (_NETIO_PKT_NO_L3_CSUM_RMASK << _NETIO_PKT_NO_L3_CSUM_SHIFT)
115
116/** The L3 checksum is incorrect (or perhaps has not been calculated). */
117#define _NETIO_PKT_BAD_L3_CSUM_SHIFT 2
118#define _NETIO_PKT_BAD_L3_CSUM_RMASK 1
119#define _NETIO_PKT_BAD_L3_CSUM_MASK \
120 (_NETIO_PKT_BAD_L3_CSUM_RMASK << _NETIO_PKT_BAD_L3_CSUM_SHIFT)
121
122/** The Ethernet packet type is unrecognized. */
123#define _NETIO_PKT_TYPE_UNRECOGNIZED_SHIFT 3
124#define _NETIO_PKT_TYPE_UNRECOGNIZED_RMASK 1
125#define _NETIO_PKT_TYPE_UNRECOGNIZED_MASK \
126 (_NETIO_PKT_TYPE_UNRECOGNIZED_RMASK << \
127 _NETIO_PKT_TYPE_UNRECOGNIZED_SHIFT)
128
129/* Metadata packet type flags. */
130
131/** Where the packet type bits are; this field is the index into
132 * _netio_pkt_info. */
133#define _NETIO_PKT_TYPE_SHIFT 4
134#define _NETIO_PKT_TYPE_RMASK 0x3F
135
136/** How many VLAN tags the packet has, and, if we have two, which one we
137 * actually grouped on. A VLAN within a proprietary (Marvell or Broadcom)
138 * tag is counted here. */
139#define _NETIO_PKT_VLAN_SHIFT 4
140#define _NETIO_PKT_VLAN_RMASK 0x3
141#define _NETIO_PKT_VLAN_MASK \
142 (_NETIO_PKT_VLAN_RMASK << _NETIO_PKT_VLAN_SHIFT)
143#define _NETIO_PKT_VLAN_NONE 0 /* No VLAN tag. */
144#define _NETIO_PKT_VLAN_ONE 1 /* One VLAN tag. */
145#define _NETIO_PKT_VLAN_TWO_OUTER 2 /* Two VLAN tags, outer one used. */
146#define _NETIO_PKT_VLAN_TWO_INNER 3 /* Two VLAN tags, inner one used. */
147
148/** Which proprietary tags the packet has. */
149#define _NETIO_PKT_TAG_SHIFT 6
150#define _NETIO_PKT_TAG_RMASK 0x3
151#define _NETIO_PKT_TAG_MASK \
152 (_NETIO_PKT_TAG_RMASK << _NETIO_PKT_TAG_SHIFT)
153#define _NETIO_PKT_TAG_NONE 0 /* No proprietary tags. */
154#define _NETIO_PKT_TAG_MRVL 1 /* Marvell HyperG.Stack tags. */
155#define _NETIO_PKT_TAG_MRVL_EXT 2 /* HyperG.Stack extended tags. */
156#define _NETIO_PKT_TAG_BRCM 3 /* Broadcom HiGig tags. */
157
158/** Whether a packet has an LLC + SNAP header. */
159#define _NETIO_PKT_SNAP_SHIFT 8
160#define _NETIO_PKT_SNAP_RMASK 0x1
161#define _NETIO_PKT_SNAP_MASK \
162 (_NETIO_PKT_SNAP_RMASK << _NETIO_PKT_SNAP_SHIFT)
163
164/* NOTE: Bits 9 and 10 are unused. */
165
166/** Length of any custom data before the L2 header, in words. */
167#define _NETIO_PKT_CUSTOM_LEN_SHIFT 11
168#define _NETIO_PKT_CUSTOM_LEN_RMASK 0x1F
169#define _NETIO_PKT_CUSTOM_LEN_MASK \
170 (_NETIO_PKT_CUSTOM_LEN_RMASK << _NETIO_PKT_CUSTOM_LEN_SHIFT)
171
172/** The L4 checksum is incorrect (or perhaps has not been calculated). */
173#define _NETIO_PKT_BAD_L4_CSUM_SHIFT 16
174#define _NETIO_PKT_BAD_L4_CSUM_RMASK 0x1
175#define _NETIO_PKT_BAD_L4_CSUM_MASK \
176 (_NETIO_PKT_BAD_L4_CSUM_RMASK << _NETIO_PKT_BAD_L4_CSUM_SHIFT)
177
178/** Length of the L2 header, in words. */
179#define _NETIO_PKT_L2_LEN_SHIFT 17
180#define _NETIO_PKT_L2_LEN_RMASK 0x1F
181#define _NETIO_PKT_L2_LEN_MASK \
182 (_NETIO_PKT_L2_LEN_RMASK << _NETIO_PKT_L2_LEN_SHIFT)
183
184
185/* Flags in minimal packet metadata. */
186
187/** We need an eDMA checksum on this packet. */
188#define _NETIO_PKT_NEED_EDMA_CSUM_SHIFT 0
189#define _NETIO_PKT_NEED_EDMA_CSUM_RMASK 1
190#define _NETIO_PKT_NEED_EDMA_CSUM_MASK \
191 (_NETIO_PKT_NEED_EDMA_CSUM_RMASK << _NETIO_PKT_NEED_EDMA_CSUM_SHIFT)
192
193/* Data within the packet information table. */
194
195/* Note that, for efficiency, code which uses these fields assumes that none
196 * of the shift values below are zero. See uses below for an explanation. */
197
198/** Offset within the L2 header of the innermost ethertype (in halfwords). */
199#define _NETIO_PKT_INFO_ETYPE_SHIFT 6
200#define _NETIO_PKT_INFO_ETYPE_RMASK 0x1F
201
202/** Offset within the L2 header of the VLAN tag (in halfwords). */
203#define _NETIO_PKT_INFO_VLAN_SHIFT 11
204#define _NETIO_PKT_INFO_VLAN_RMASK 0x1F
205
206#endif
207
208
209/** The size of a memory buffer representing a small packet.
210 * @ingroup egress */
211#define SMALL_PACKET_SIZE 256
212
213/** The size of a memory buffer representing a large packet.
214 * @ingroup egress */
215#define LARGE_PACKET_SIZE 2048
216
217/** The size of a memory buffer representing a jumbo packet.
218 * @ingroup egress */
219#define JUMBO_PACKET_SIZE (12 * 1024)
220
221
222/* Common ethertypes.
223 * @ingroup ingress */
224/** @{ */
225/** The ethertype of IPv4. */
226#define ETHERTYPE_IPv4 (0x0800)
227/** The ethertype of ARP. */
228#define ETHERTYPE_ARP (0x0806)
229/** The ethertype of VLANs. */
230#define ETHERTYPE_VLAN (0x8100)
231/** The ethertype of a Q-in-Q header. */
232#define ETHERTYPE_Q_IN_Q (0x9100)
233/** The ethertype of IPv6. */
234#define ETHERTYPE_IPv6 (0x86DD)
235/** The ethertype of MPLS. */
236#define ETHERTYPE_MPLS (0x8847)
237/** @} */
238
239
240/** The possible return values of NETIO_PKT_STATUS.
241 * @ingroup ingress
242 */
243typedef enum
244{
245 /** No problems were detected with this packet. */
246 NETIO_PKT_STATUS_OK,
247 /** The packet is undersized; this is expected behavior if the packet's
248 * ethertype is unrecognized, but otherwise the packet is likely corrupt. */
249 NETIO_PKT_STATUS_UNDERSIZE,
250 /** The packet is oversized and some trailing bytes have been discarded.
251 This is expected behavior for short packets, since it's impossible to
252 precisely determine the amount of padding which may have been added to
253 them to make them meet the minimum Ethernet packet size. */
254 NETIO_PKT_STATUS_OVERSIZE,
255 /** The packet was judged to be corrupt by hardware (for instance, it had
256 a bad CRC, or part of it was discarded due to lack of buffer space in
257 the I/O shim) and should be discarded. */
258 NETIO_PKT_STATUS_BAD
259} netio_pkt_status_t;
260
261
262/** Log2 of how many buckets we have. */
263#define NETIO_LOG2_NUM_BUCKETS (10)
264
265/** How many buckets we have.
266 * @ingroup ingress */
267#define NETIO_NUM_BUCKETS (1 << NETIO_LOG2_NUM_BUCKETS)
268
269
270/**
271 * @brief A group-to-bucket identifier.
272 *
273 * @ingroup setup
274 *
275 * This tells us what to do with a given group.
276 */
277typedef union {
278 /** The header broken down into bits. */
279 struct {
280 /** Whether we should balance on L4, if available */
281 unsigned int __balance_on_l4:1;
282 /** Whether we should balance on L3, if available */
283 unsigned int __balance_on_l3:1;
284 /** Whether we should balance on L2, if available */
285 unsigned int __balance_on_l2:1;
286 /** Reserved for future use */
287 unsigned int __reserved:1;
288 /** The base bucket to use to send traffic */
289 unsigned int __bucket_base:NETIO_LOG2_NUM_BUCKETS;
290 /** The mask to apply to the balancing value. This must be one less
291 * than a power of two, e.g. 0x3 or 0xFF.
292 */
293 unsigned int __bucket_mask:NETIO_LOG2_NUM_BUCKETS;
294 /** Pad to 32 bits */
295 unsigned int __padding:(32 - 4 - 2 * NETIO_LOG2_NUM_BUCKETS);
296 } bits;
297 /** To send out the IDN. */
298 unsigned int word;
299}
300netio_group_t;
301
302
303/**
304 * @brief A VLAN-to-bucket identifier.
305 *
306 * @ingroup setup
307 *
308 * This tells us what to do with a given VLAN.
309 */
310typedef netio_group_t netio_vlan_t;
311
312
313/**
314 * A bucket-to-queue mapping.
315 * @ingroup setup
316 */
317typedef unsigned char netio_bucket_t;
318
319
320/**
321 * A packet size can always fit in a netio_size_t.
322 * @ingroup setup
323 */
324typedef unsigned int netio_size_t;
325
326
327/**
328 * @brief Ethernet standard (ingress) packet metadata.
329 *
330 * @ingroup ingress
331 *
332 * This is additional data associated with each packet.
333 * This structure is opaque and accessed through the @ref ingress.
334 *
335 * Also, the buffer population operation currently assumes that standard
336 * metadata is at least as large as minimal metadata, and will need to be
337 * modified if that is no longer the case.
338 */
339typedef struct
340{
341#ifdef __DOXYGEN__
342 /** This structure is opaque. */
343 unsigned char opaque[24];
344#else
345 /** The overall ordinal of the packet */
346 unsigned int __packet_ordinal;
347 /** The ordinal of the packet within the group */
348 unsigned int __group_ordinal;
349 /** The best flow hash IPP could compute. */
350 unsigned int __flow_hash;
351 /** Flags pertaining to checksum calculation, packet type, etc. */
352 unsigned int __flags;
353 /** The first word of "user data". */
354 unsigned int __user_data_0;
355 /** The second word of "user data". */
356 unsigned int __user_data_1;
357#endif
358}
359netio_pkt_metadata_t;
360
361
362/** To ensure that the L3 header is aligned mod 4, the L2 header should be
363 * aligned mod 4 plus 2, since every supported L2 header is 4n + 2 bytes
364 * long. The standard way to do this is to simply add 2 bytes of padding
365 * before the L2 header.
366 */
367#define NETIO_PACKET_PADDING 2
368
369
370
371/**
372 * @brief Ethernet minimal (egress) packet metadata.
373 *
374 * @ingroup egress
375 *
376 * This structure represents information about packets which have
377 * been processed by @ref netio_populate_buffer() or
378 * @ref netio_populate_prepend_buffer(). This structure is opaque
379 * and accessed through the @ref egress.
380 *
381 * @internal This structure is actually copied into the memory used by
382 * standard metadata, which is assumed to be large enough.
383 */
384typedef struct
385{
386#ifdef __DOXYGEN__
387 /** This structure is opaque. */
388 unsigned char opaque[14];
389#else
390 /** The offset of the L2 header from the start of the packet data. */
391 unsigned short l2_offset;
392 /** The offset of the L3 header from the start of the packet data. */
393 unsigned short l3_offset;
394 /** Where to write the checksum. */
395 unsigned char csum_location;
396 /** Where to start checksumming from. */
397 unsigned char csum_start;
398 /** Flags pertaining to checksum calculation etc. */
399 unsigned short flags;
400 /** The L2 length of the packet. */
401 unsigned short l2_length;
402 /** The checksum with which to seed the checksum generator. */
403 unsigned short csum_seed;
404 /** How much to checksum. */
405 unsigned short csum_length;
406#endif
407}
408netio_pkt_minimal_metadata_t;
409
410
411#ifndef __DOXYGEN__
412
413/**
414 * @brief An I/O notification header.
415 *
416 * This is the first word of data received from an I/O shim in a notification
417 * packet. It contains framing and status information.
418 */
419typedef union
420{
421 unsigned int word; /**< The whole word. */
422 /** The various fields. */
423 struct
424 {
425 unsigned int __channel:7; /**< Resource channel. */
426 unsigned int __type:4; /**< Type. */
427 unsigned int __ack:1; /**< Whether an acknowledgement is needed. */
428 unsigned int __reserved:1; /**< Reserved. */
429 unsigned int __protocol:1; /**< A protocol-specific word is added. */
430 unsigned int __status:2; /**< Status of the transfer. */
431 unsigned int __framing:2; /**< Framing of the transfer. */
432 unsigned int __transfer_size:14; /**< Transfer size in bytes (total). */
433 } bits;
434}
435__netio_pkt_notif_t;
436
437
438/**
439 * Returns the base address of the packet.
440 */
441#define _NETIO_PKT_HANDLE_BASE(p) \
442 ((unsigned char*)((p).word & 0xFFFFFFC0))
443
444/**
445 * Returns the base address of the packet.
446 */
447#define _NETIO_PKT_BASE(p) \
448 _NETIO_PKT_HANDLE_BASE(p->__packet)
449
450/**
451 * @brief An I/O notification packet (second word)
452 *
453 * This is the second word of data received from an I/O shim in a notification
454 * packet. This is the virtual address of the packet buffer, plus some flag
455 * bits. (The virtual address of the packet is always 256-byte aligned so we
456 * have room for 8 bits' worth of flags in the low 8 bits.)
457 *
458 * @internal
459 * NOTE: The low two bits must contain "__queue", so the "packet size"
460 * (SIZE_SMALL, SIZE_LARGE, or SIZE_JUMBO) can be determined quickly.
461 *
462 * If __addr or __offset are moved, _NETIO_PKT_BASE
463 * (defined right below this) must be changed.
464 */
465typedef union
466{
467 unsigned int word; /**< The whole word. */
468 /** The various fields. */
469 struct
470 {
471 /** Which queue the packet will be returned to once it is sent back to
472 the IPP. This is one of the SIZE_xxx values. */
473 unsigned int __queue:2;
474
475 /** The IPP handle of the sending IPP. */
476 unsigned int __ipp_handle:2;
477
478 /** Reserved for future use. */
479 unsigned int __reserved:1;
480
481 /** If 1, this packet has minimal (egress) metadata; otherwise, it
482 has standard (ingress) metadata. */
483 unsigned int __minimal:1;
484
485 /** Offset of the metadata within the packet. This value is multiplied
486 * by 64 and added to the base packet address to get the metadata
487 * address. Note that this field is aligned within the word such that
488 * you can easily extract the metadata address with a 26-bit mask. */
489 unsigned int __offset:2;
490
491 /** The top 24 bits of the packet's virtual address. */
492 unsigned int __addr:24;
493 } bits;
494}
495__netio_pkt_handle_t;
496
497#endif /* !__DOXYGEN__ */
498
499
500/**
501 * @brief A handle for an I/O packet's storage.
502 * @ingroup ingress
503 *
504 * netio_pkt_handle_t encodes the concept of a ::netio_pkt_t with its
505 * packet metadata removed. It is a much smaller type that exists to
506 * facilitate applications where the full ::netio_pkt_t type is too
507 * large, such as those that cache enormous numbers of packets or wish
508 * to transmit packet descriptors over the UDN.
509 *
510 * Because there is no metadata, most ::netio_pkt_t operations cannot be
511 * performed on a netio_pkt_handle_t. It supports only
512 * netio_free_handle() (to free the buffer) and
513 * NETIO_PKT_CUSTOM_DATA_H() (to access a pointer to its contents).
514 * The application must acquire any additional metadata it wants from the
515 * original ::netio_pkt_t and record it separately.
516 *
517 * A netio_pkt_handle_t can be extracted from a ::netio_pkt_t by calling
518 * NETIO_PKT_HANDLE(). An invalid handle (analogous to NULL) can be
519 * created by assigning the value ::NETIO_PKT_HANDLE_NONE. A handle can
520 * be tested for validity with NETIO_PKT_HANDLE_IS_VALID().
521 */
522typedef struct
523{
524 unsigned int word; /**< Opaque bits. */
525} netio_pkt_handle_t;
526
527/**
528 * @brief A packet descriptor.
529 *
530 * @ingroup ingress
531 * @ingroup egress
532 *
533 * This data structure represents a packet. The structure is manipulated
534 * through the @ref ingress and the @ref egress.
535 *
536 * While the contents of a netio_pkt_t are opaque, the structure itself is
537 * portable. This means that it may be shared between all tiles which have
538 * done a netio_input_register() call for the interface on which the pkt_t
539 * was initially received (via netio_get_packet()) or retrieved (via
540 * netio_get_buffer()). The contents of a netio_pkt_t can be transmitted to
541 * another tile via shared memory, or via a UDN message, or by other means.
542 * The destination tile may then use the pkt_t as if it had originally been
543 * received locally; it may read or write the packet's data, read its
544 * metadata, free the packet, send the packet, transfer the netio_pkt_t to
545 * yet another tile, and so forth.
546 *
547 * Once a netio_pkt_t has been transferred to a second tile, the first tile
548 * should not reference the original copy; in particular, if more than one
549 * tile frees or sends the same netio_pkt_t, the IPP's packet free lists will
550 * become corrupted. Note also that each tile which reads or modifies
551 * packet data must obey the memory coherency rules outlined in @ref input.
552 */
553typedef struct
554{
555#ifdef __DOXYGEN__
556 /** This structure is opaque. */
557 unsigned char opaque[32];
558#else
559 /** For an ingress packet (one with standard metadata), this is the
560 * notification header we got from the I/O shim. For an egress packet
561 * (one with minimal metadata), this word is zero if the packet has not
562 * been populated, and nonzero if it has. */
563 __netio_pkt_notif_t __notif_header;
564
565 /** Virtual address of the packet buffer, plus state flags. */
566 __netio_pkt_handle_t __packet;
567
568 /** Metadata associated with the packet. */
569 netio_pkt_metadata_t __metadata;
570#endif
571}
572netio_pkt_t;
573
574
575#ifndef __DOXYGEN__
576
577#define __NETIO_PKT_NOTIF_HEADER(pkt) ((pkt)->__notif_header)
578#define __NETIO_PKT_IPP_HANDLE(pkt) ((pkt)->__packet.bits.__ipp_handle)
579#define __NETIO_PKT_QUEUE(pkt) ((pkt)->__packet.bits.__queue)
580#define __NETIO_PKT_NOTIF_HEADER_M(mda, pkt) ((pkt)->__notif_header)
581#define __NETIO_PKT_IPP_HANDLE_M(mda, pkt) ((pkt)->__packet.bits.__ipp_handle)
582#define __NETIO_PKT_MINIMAL(pkt) ((pkt)->__packet.bits.__minimal)
583#define __NETIO_PKT_QUEUE_M(mda, pkt) ((pkt)->__packet.bits.__queue)
584#define __NETIO_PKT_FLAGS_M(mda, pkt) ((mda)->__flags)
585
586/* Packet information table, used by the attribute access functions below. */
587extern const uint16_t _netio_pkt_info[];
588
589#endif /* __DOXYGEN__ */
590
591
592#ifndef __DOXYGEN__
593/* These macros are deprecated and will disappear in a future MDE release. */
594#define NETIO_PKT_GOOD_CHECKSUM(pkt) \
595 NETIO_PKT_L4_CSUM_CORRECT(pkt)
596#define NETIO_PKT_GOOD_CHECKSUM_M(mda, pkt) \
597 NETIO_PKT_L4_CSUM_CORRECT_M(mda, pkt)
598#endif /* __DOXYGEN__ */
599
600
601/* Packet attribute access functions. */
602
603/** Return a pointer to the metadata for a packet.
604 * @ingroup ingress
605 *
606 * Calling this function once and passing the result to other retrieval
607 * functions with a "_M" suffix usually improves performance. This
608 * function must be called on an 'ingress' packet (i.e. one retrieved
609 * by @ref netio_get_packet(), on which @ref netio_populate_buffer() or
610 * @ref netio_populate_prepend_buffer have not been called). Use of this
611 * function on an 'egress' packet will cause an assertion failure.
612 *
613 * @param[in] pkt Packet on which to operate.
614 * @return A pointer to the packet's standard metadata.
615 */
616static __inline netio_pkt_metadata_t*
617NETIO_PKT_METADATA(netio_pkt_t* pkt)
618{
619 netio_assert(!pkt->__packet.bits.__minimal);
620 return &pkt->__metadata;
621}
622
623
624/** Return a pointer to the minimal metadata for a packet.
625 * @ingroup egress
626 *
627 * Calling this function once and passing the result to other retrieval
628 * functions with a "_MM" suffix usually improves performance. This
629 * function must be called on an 'egress' packet (i.e. one on which
630 * @ref netio_populate_buffer() or @ref netio_populate_prepend_buffer()
631 * have been called, or one retrieved by @ref netio_get_buffer()). Use of
632 * this function on an 'ingress' packet will cause an assertion failure.
633 *
634 * @param[in] pkt Packet on which to operate.
635 * @return A pointer to the packet's standard metadata.
636 */
637static __inline netio_pkt_minimal_metadata_t*
638NETIO_PKT_MINIMAL_METADATA(netio_pkt_t* pkt)
639{
640 netio_assert(pkt->__packet.bits.__minimal);
641 return (netio_pkt_minimal_metadata_t*) &pkt->__metadata;
642}
643
644
645/** Determine whether a packet has 'minimal' metadata.
646 * @ingroup pktfuncs
647 *
648 * This function will return nonzero if the packet is an 'egress'
649 * packet (i.e. one on which @ref netio_populate_buffer() or
650 * @ref netio_populate_prepend_buffer() have been called, or one
651 * retrieved by @ref netio_get_buffer()), and zero if the packet
652 * is an 'ingress' packet (i.e. one retrieved by @ref netio_get_packet(),
653 * which has not been converted into an 'egress' packet).
654 *
655 * @param[in] pkt Packet on which to operate.
656 * @return Nonzero if the packet has minimal metadata.
657 */
658static __inline unsigned int
659NETIO_PKT_IS_MINIMAL(netio_pkt_t* pkt)
660{
661 return pkt->__packet.bits.__minimal;
662}
663
664
665/** Return a handle for a packet's storage.
666 * @ingroup pktfuncs
667 *
668 * @param[in] pkt Packet on which to operate.
669 * @return A handle for the packet's storage.
670 */
671static __inline netio_pkt_handle_t
672NETIO_PKT_HANDLE(netio_pkt_t* pkt)
673{
674 netio_pkt_handle_t h;
675 h.word = pkt->__packet.word;
676 return h;
677}
678
679
680/** A special reserved value indicating the absence of a packet handle.
681 *
682 * @ingroup pktfuncs
683 */
684#define NETIO_PKT_HANDLE_NONE ((netio_pkt_handle_t) { 0 })
685
686
687/** Test whether a packet handle is valid.
688 *
689 * Applications may wish to use the reserved value NETIO_PKT_HANDLE_NONE
690 * to indicate no packet at all. This function tests to see if a packet
691 * handle is a real handle, not this special reserved value.
692 *
693 * @ingroup pktfuncs
694 *
695 * @param[in] handle Handle on which to operate.
696 * @return One if the packet handle is valid, else zero.
697 */
698static __inline unsigned int
699NETIO_PKT_HANDLE_IS_VALID(netio_pkt_handle_t handle)
700{
701 return handle.word != 0;
702}
703
704
705
706/** Return a pointer to the start of the packet's custom header.
707 * A custom header may or may not be present, depending upon the IPP; its
708 * contents and alignment are also IPP-dependent. Currently, none of the
709 * standard IPPs supplied by Tilera produce a custom header. If present,
710 * the custom header precedes the L2 header in the packet buffer.
711 * @ingroup ingress
712 *
713 * @param[in] handle Handle on which to operate.
714 * @return A pointer to start of the packet.
715 */
716static __inline unsigned char*
717NETIO_PKT_CUSTOM_DATA_H(netio_pkt_handle_t handle)
718{
719 return _NETIO_PKT_HANDLE_BASE(handle) + NETIO_PACKET_PADDING;
720}
721
722
723/** Return the length of the packet's custom header.
724 * A custom header may or may not be present, depending upon the IPP; its
725 * contents and alignment are also IPP-dependent. Currently, none of the
726 * standard IPPs supplied by Tilera produce a custom header. If present,
727 * the custom header precedes the L2 header in the packet buffer.
728 *
729 * @ingroup ingress
730 *
731 * @param[in] mda Pointer to packet's standard metadata.
732 * @param[in] pkt Packet on which to operate.
733 * @return The length of the packet's custom header, in bytes.
734 */
735static __inline netio_size_t
736NETIO_PKT_CUSTOM_HEADER_LENGTH_M(netio_pkt_metadata_t* mda, netio_pkt_t* pkt)
737{
738 /*
739 * Note that we effectively need to extract a quantity from the flags word
740 * which is measured in words, and then turn it into bytes by shifting
741 * it left by 2. We do this all at once by just shifting right two less
742 * bits, and shifting the mask up two bits.
743 */
744 return ((mda->__flags >> (_NETIO_PKT_CUSTOM_LEN_SHIFT - 2)) &
745 (_NETIO_PKT_CUSTOM_LEN_RMASK << 2));
746}
747
748
749/** Return the length of the packet, starting with the custom header.
750 * A custom header may or may not be present, depending upon the IPP; its
751 * contents and alignment are also IPP-dependent. Currently, none of the
752 * standard IPPs supplied by Tilera produce a custom header. If present,
753 * the custom header precedes the L2 header in the packet buffer.
754 * @ingroup ingress
755 *
756 * @param[in] mda Pointer to packet's standard metadata.
757 * @param[in] pkt Packet on which to operate.
758 * @return The length of the packet, in bytes.
759 */
760static __inline netio_size_t
761NETIO_PKT_CUSTOM_LENGTH_M(netio_pkt_metadata_t* mda, netio_pkt_t* pkt)
762{
763 return (__NETIO_PKT_NOTIF_HEADER(pkt).bits.__transfer_size -
764 NETIO_PACKET_PADDING);
765}
766
767
768/** Return a pointer to the start of the packet's custom header.
769 * A custom header may or may not be present, depending upon the IPP; its
770 * contents and alignment are also IPP-dependent. Currently, none of the
771 * standard IPPs supplied by Tilera produce a custom header. If present,
772 * the custom header precedes the L2 header in the packet buffer.
773 * @ingroup ingress
774 *
775 * @param[in] mda Pointer to packet's standard metadata.
776 * @param[in] pkt Packet on which to operate.
777 * @return A pointer to start of the packet.
778 */
779static __inline unsigned char*
780NETIO_PKT_CUSTOM_DATA_M(netio_pkt_metadata_t* mda, netio_pkt_t* pkt)
781{
782 return NETIO_PKT_CUSTOM_DATA_H(NETIO_PKT_HANDLE(pkt));
783}
784
785
786/** Return the length of the packet's L2 (Ethernet plus VLAN or SNAP) header.
787 * @ingroup ingress
788 *
789 * @param[in] mda Pointer to packet's standard metadata.
790 * @param[in] pkt Packet on which to operate.
791 * @return The length of the packet's L2 header, in bytes.
792 */
793static __inline netio_size_t
794NETIO_PKT_L2_HEADER_LENGTH_M(netio_pkt_metadata_t* mda, netio_pkt_t* pkt)
795{
796 /*
797 * Note that we effectively need to extract a quantity from the flags word
798 * which is measured in words, and then turn it into bytes by shifting
799 * it left by 2. We do this all at once by just shifting right two less
800 * bits, and shifting the mask up two bits. We then add two bytes.
801 */
802 return ((mda->__flags >> (_NETIO_PKT_L2_LEN_SHIFT - 2)) &
803 (_NETIO_PKT_L2_LEN_RMASK << 2)) + 2;
804}
805
806
807/** Return the length of the packet, starting with the L2 (Ethernet) header.
808 * @ingroup ingress
809 *
810 * @param[in] mda Pointer to packet's standard metadata.
811 * @param[in] pkt Packet on which to operate.
812 * @return The length of the packet, in bytes.
813 */
814static __inline netio_size_t
815NETIO_PKT_L2_LENGTH_M(netio_pkt_metadata_t* mda, netio_pkt_t* pkt)
816{
817 return (NETIO_PKT_CUSTOM_LENGTH_M(mda, pkt) -
818 NETIO_PKT_CUSTOM_HEADER_LENGTH_M(mda,pkt));
819}
820
821
822/** Return a pointer to the start of the packet's L2 (Ethernet) header.
823 * @ingroup ingress
824 *
825 * @param[in] mda Pointer to packet's standard metadata.
826 * @param[in] pkt Packet on which to operate.
827 * @return A pointer to start of the packet.
828 */
829static __inline unsigned char*
830NETIO_PKT_L2_DATA_M(netio_pkt_metadata_t* mda, netio_pkt_t* pkt)
831{
832 return (NETIO_PKT_CUSTOM_DATA_M(mda, pkt) +
833 NETIO_PKT_CUSTOM_HEADER_LENGTH_M(mda, pkt));
834}
835
836
837/** Retrieve the length of the packet, starting with the L3 (generally,
838 * the IP) header.
839 * @ingroup ingress
840 *
841 * @param[in] mda Pointer to packet's standard metadata.
842 * @param[in] pkt Packet on which to operate.
843 * @return Length of the packet's L3 header and data, in bytes.
844 */
845static __inline netio_size_t
846NETIO_PKT_L3_LENGTH_M(netio_pkt_metadata_t* mda, netio_pkt_t* pkt)
847{
848 return (NETIO_PKT_L2_LENGTH_M(mda, pkt) -
849 NETIO_PKT_L2_HEADER_LENGTH_M(mda,pkt));
850}
851
852
853/** Return a pointer to the packet's L3 (generally, the IP) header.
854 * @ingroup ingress
855 *
856 * Note that we guarantee word alignment of the L3 header.
857 *
858 * @param[in] mda Pointer to packet's standard metadata.
859 * @param[in] pkt Packet on which to operate.
860 * @return A pointer to the packet's L3 header.
861 */
862static __inline unsigned char*
863NETIO_PKT_L3_DATA_M(netio_pkt_metadata_t* mda, netio_pkt_t* pkt)
864{
865 return (NETIO_PKT_L2_DATA_M(mda, pkt) +
866 NETIO_PKT_L2_HEADER_LENGTH_M(mda, pkt));
867}
868
869
870/** Return the ordinal of the packet.
871 * @ingroup ingress
872 *
873 * Each packet is given an ordinal number when it is delivered by the IPP.
874 * In the medium term, the ordinal is unique and monotonically increasing,
875 * being incremented by 1 for each packet; the ordinal of the first packet
876 * delivered after the IPP starts is zero. (Since the ordinal is of finite
877 * size, given enough input packets, it will eventually wrap around to zero;
878 * in the long term, therefore, ordinals are not unique.) The ordinals
879 * handed out by different IPPs are not disjoint, so two packets from
880 * different IPPs may have identical ordinals. Packets dropped by the
881 * IPP or by the I/O shim are not assigned ordinals.
882 *
883 * @param[in] mda Pointer to packet's standard metadata.
884 * @param[in] pkt Packet on which to operate.
885 * @return The packet's per-IPP packet ordinal.
886 */
887static __inline unsigned int
888NETIO_PKT_ORDINAL_M(netio_pkt_metadata_t* mda, netio_pkt_t* pkt)
889{
890 return mda->__packet_ordinal;
891}
892
893
894/** Return the per-group ordinal of the packet.
895 * @ingroup ingress
896 *
897 * Each packet is given a per-group ordinal number when it is
898 * delivered by the IPP. By default, the group is the packet's VLAN,
899 * although IPP can be recompiled to use different values. In
900 * the medium term, the ordinal is unique and monotonically
901 * increasing, being incremented by 1 for each packet; the ordinal of
902 * the first packet distributed to a particular group is zero.
903 * (Since the ordinal is of finite size, given enough input packets,
904 * it will eventually wrap around to zero; in the long term,
905 * therefore, ordinals are not unique.) The ordinals handed out by
906 * different IPPs are not disjoint, so two packets from different IPPs
907 * may have identical ordinals; similarly, packets distributed to
908 * different groups may have identical ordinals. Packets dropped by
909 * the IPP or by the I/O shim are not assigned ordinals.
910 *
911 * @param[in] mda Pointer to packet's standard metadata.
912 * @param[in] pkt Packet on which to operate.
913 * @return The packet's per-IPP, per-group ordinal.
914 */
915static __inline unsigned int
916NETIO_PKT_GROUP_ORDINAL_M(netio_pkt_metadata_t* mda, netio_pkt_t* pkt)
917{
918 return mda->__group_ordinal;
919}
920
921
922/** Return the VLAN ID assigned to the packet.
923 * @ingroup ingress
924 *
925 * This value is usually contained within the packet header.
926 *
927 * This value will be zero if the packet does not have a VLAN tag, or if
928 * this value was not extracted from the packet.
929 *
930 * @param[in] mda Pointer to packet's standard metadata.
931 * @param[in] pkt Packet on which to operate.
932 * @return The packet's VLAN ID.
933 */
934static __inline unsigned short
935NETIO_PKT_VLAN_ID_M(netio_pkt_metadata_t* mda, netio_pkt_t* pkt)
936{
937 int vl = (mda->__flags >> _NETIO_PKT_VLAN_SHIFT) & _NETIO_PKT_VLAN_RMASK;
938 unsigned short* pkt_p;
939 int index;
940 unsigned short val;
941
942 if (vl == _NETIO_PKT_VLAN_NONE)
943 return 0;
944
945 pkt_p = (unsigned short*) NETIO_PKT_L2_DATA_M(mda, pkt);
946 index = (mda->__flags >> _NETIO_PKT_TYPE_SHIFT) & _NETIO_PKT_TYPE_RMASK;
947
948 val = pkt_p[(_netio_pkt_info[index] >> _NETIO_PKT_INFO_VLAN_SHIFT) &
949 _NETIO_PKT_INFO_VLAN_RMASK];
950
951#ifdef __TILECC__
952 return (__insn_bytex(val) >> 16) & 0xFFF;
953#else
954 return (__builtin_bswap32(val) >> 16) & 0xFFF;
955#endif
956}
957
958
959/** Return the ethertype of the packet.
960 * @ingroup ingress
961 *
962 * This value is usually contained within the packet header.
963 *
964 * This value is reliable if @ref NETIO_PKT_ETHERTYPE_RECOGNIZED_M()
965 * returns true, and otherwise, may not be well defined.
966 *
967 * @param[in] mda Pointer to packet's standard metadata.
968 * @param[in] pkt Packet on which to operate.
969 * @return The packet's ethertype.
970 */
971static __inline unsigned short
972NETIO_PKT_ETHERTYPE_M(netio_pkt_metadata_t* mda, netio_pkt_t* pkt)
973{
974 unsigned short* pkt_p = (unsigned short*) NETIO_PKT_L2_DATA_M(mda, pkt);
975 int index = (mda->__flags >> _NETIO_PKT_TYPE_SHIFT) & _NETIO_PKT_TYPE_RMASK;
976
977 unsigned short val =
978 pkt_p[(_netio_pkt_info[index] >> _NETIO_PKT_INFO_ETYPE_SHIFT) &
979 _NETIO_PKT_INFO_ETYPE_RMASK];
980
981 return __builtin_bswap32(val) >> 16;
982}
983
984
985/** Return the flow hash computed on the packet.
986 * @ingroup ingress
987 *
988 * For TCP and UDP packets, this hash is calculated by hashing together
989 * the "5-tuple" values, specifically the source IP address, destination
990 * IP address, protocol type, source port and destination port.
991 * The hash value is intended to be helpful for millions of distinct
992 * flows.
993 *
994 * For IPv4 or IPv6 packets which are neither TCP nor UDP, the flow hash is
995 * derived by hashing together the source and destination IP addresses.
996 *
997 * For MPLS-encapsulated packets, the flow hash is derived by hashing
998 * the first MPLS label.
999 *
1000 * For all other packets the flow hash is computed from the source
1001 * and destination Ethernet addresses.
1002 *
1003 * The hash is symmetric, meaning it produces the same value if the
1004 * source and destination are swapped. The only exceptions are
1005 * tunneling protocols 0x04 (IP in IP Encapsulation), 0x29 (Simple
1006 * Internet Protocol), 0x2F (General Routing Encapsulation) and 0x32
1007 * (Encap Security Payload), which use only the destination address
1008 * since the source address is not meaningful.
1009 *
1010 * @param[in] mda Pointer to packet's standard metadata.
1011 * @param[in] pkt Packet on which to operate.
1012 * @return The packet's 32-bit flow hash.
1013 */
1014static __inline unsigned int
1015NETIO_PKT_FLOW_HASH_M(netio_pkt_metadata_t* mda, netio_pkt_t* pkt)
1016{
1017 return mda->__flow_hash;
1018}
1019
1020
1021/** Return the first word of "user data" for the packet.
1022 *
1023 * The contents of the user data words depend on the IPP.
1024 *
1025 * When using the standard ipp1, ipp2, or ipp4 sub-drivers, the first
1026 * word of user data contains the least significant bits of the 64-bit
1027 * arrival cycle count (see @c get_cycle_count_low()).
1028 *
1029 * See the <em>System Programmer's Guide</em> for details.
1030 *
1031 * @ingroup ingress
1032 *
1033 * @param[in] mda Pointer to packet's standard metadata.
1034 * @param[in] pkt Packet on which to operate.
1035 * @return The packet's first word of "user data".
1036 */
1037static __inline unsigned int
1038NETIO_PKT_USER_DATA_0_M(netio_pkt_metadata_t* mda, netio_pkt_t* pkt)
1039{
1040 return mda->__user_data_0;
1041}
1042
1043
1044/** Return the second word of "user data" for the packet.
1045 *
1046 * The contents of the user data words depend on the IPP.
1047 *
1048 * When using the standard ipp1, ipp2, or ipp4 sub-drivers, the second
1049 * word of user data contains the most significant bits of the 64-bit
1050 * arrival cycle count (see @c get_cycle_count_high()).
1051 *
1052 * See the <em>System Programmer's Guide</em> for details.
1053 *
1054 * @ingroup ingress
1055 *
1056 * @param[in] mda Pointer to packet's standard metadata.
1057 * @param[in] pkt Packet on which to operate.
1058 * @return The packet's second word of "user data".
1059 */
1060static __inline unsigned int
1061NETIO_PKT_USER_DATA_1_M(netio_pkt_metadata_t* mda, netio_pkt_t* pkt)
1062{
1063 return mda->__user_data_1;
1064}
1065
1066
1067/** Determine whether the L4 (TCP/UDP) checksum was calculated.
1068 * @ingroup ingress
1069 *
1070 * @param[in] mda Pointer to packet's standard metadata.
1071 * @param[in] pkt Packet on which to operate.
1072 * @return Nonzero if the L4 checksum was calculated.
1073 */
1074static __inline unsigned int
1075NETIO_PKT_L4_CSUM_CALCULATED_M(netio_pkt_metadata_t* mda, netio_pkt_t* pkt)
1076{
1077 return !(mda->__flags & _NETIO_PKT_NO_L4_CSUM_MASK);
1078}
1079
1080
1081/** Determine whether the L4 (TCP/UDP) checksum was calculated and found to
1082 * be correct.
1083 * @ingroup ingress
1084 *
1085 * @param[in] mda Pointer to packet's standard metadata.
1086 * @param[in] pkt Packet on which to operate.
1087 * @return Nonzero if the checksum was calculated and is correct.
1088 */
1089static __inline unsigned int
1090NETIO_PKT_L4_CSUM_CORRECT_M(netio_pkt_metadata_t* mda, netio_pkt_t* pkt)
1091{
1092 return !(mda->__flags &
1093 (_NETIO_PKT_BAD_L4_CSUM_MASK | _NETIO_PKT_NO_L4_CSUM_MASK));
1094}
1095
1096
1097/** Determine whether the L3 (IP) checksum was calculated.
1098 * @ingroup ingress
1099 *
1100 * @param[in] mda Pointer to packet's standard metadata.
1101 * @param[in] pkt Packet on which to operate.
1102 * @return Nonzero if the L3 (IP) checksum was calculated.
1103*/
1104static __inline unsigned int
1105NETIO_PKT_L3_CSUM_CALCULATED_M(netio_pkt_metadata_t* mda, netio_pkt_t* pkt)
1106{
1107 return !(mda->__flags & _NETIO_PKT_NO_L3_CSUM_MASK);
1108}
1109
1110
1111/** Determine whether the L3 (IP) checksum was calculated and found to be
1112 * correct.
1113 * @ingroup ingress
1114 *
1115 * @param[in] mda Pointer to packet's standard metadata.
1116 * @param[in] pkt Packet on which to operate.
1117 * @return Nonzero if the checksum was calculated and is correct.
1118 */
1119static __inline unsigned int
1120NETIO_PKT_L3_CSUM_CORRECT_M(netio_pkt_metadata_t* mda, netio_pkt_t* pkt)
1121{
1122 return !(mda->__flags &
1123 (_NETIO_PKT_BAD_L3_CSUM_MASK | _NETIO_PKT_NO_L3_CSUM_MASK));
1124}
1125
1126
1127/** Determine whether the ethertype was recognized and L3 packet data was
1128 * processed.
1129 * @ingroup ingress
1130 *
1131 * @param[in] mda Pointer to packet's standard metadata.
1132 * @param[in] pkt Packet on which to operate.
1133 * @return Nonzero if the ethertype was recognized and L3 packet data was
1134 * processed.
1135 */
1136static __inline unsigned int
1137NETIO_PKT_ETHERTYPE_RECOGNIZED_M(netio_pkt_metadata_t* mda, netio_pkt_t* pkt)
1138{
1139 return !(mda->__flags & _NETIO_PKT_TYPE_UNRECOGNIZED_MASK);
1140}
1141
1142
1143/** Retrieve the status of a packet and any errors that may have occurred
1144 * during ingress processing (length mismatches, CRC errors, etc.).
1145 * @ingroup ingress
1146 *
1147 * Note that packets for which @ref NETIO_PKT_ETHERTYPE_RECOGNIZED()
1148 * returns zero are always reported as underlength, as there is no a priori
1149 * means to determine their length. Normally, applications should use
1150 * @ref NETIO_PKT_BAD_M() instead of explicitly checking status with this
1151 * function.
1152 *
1153 * @param[in] mda Pointer to packet's standard metadata.
1154 * @param[in] pkt Packet on which to operate.
1155 * @return The packet's status.
1156 */
1157static __inline netio_pkt_status_t
1158NETIO_PKT_STATUS_M(netio_pkt_metadata_t* mda, netio_pkt_t* pkt)
1159{
1160 return (netio_pkt_status_t) __NETIO_PKT_NOTIF_HEADER(pkt).bits.__status;
1161}
1162
1163
1164/** Report whether a packet is bad (i.e., was shorter than expected based on
1165 * its headers, or had a bad CRC).
1166 * @ingroup ingress
1167 *
1168 * Note that this function does not verify L3 or L4 checksums.
1169 *
1170 * @param[in] mda Pointer to packet's standard metadata.
1171 * @param[in] pkt Packet on which to operate.
1172 * @return Nonzero if the packet is bad and should be discarded.
1173 */
1174static __inline unsigned int
1175NETIO_PKT_BAD_M(netio_pkt_metadata_t* mda, netio_pkt_t* pkt)
1176{
1177 return ((NETIO_PKT_STATUS_M(mda, pkt) & 1) &&
1178 (NETIO_PKT_ETHERTYPE_RECOGNIZED_M(mda, pkt) ||
1179 NETIO_PKT_STATUS_M(mda, pkt) == NETIO_PKT_STATUS_BAD));
1180}
1181
1182
1183/** Return the length of the packet, starting with the L2 (Ethernet) header.
1184 * @ingroup egress
1185 *
1186 * @param[in] mmd Pointer to packet's minimal metadata.
1187 * @param[in] pkt Packet on which to operate.
1188 * @return The length of the packet, in bytes.
1189 */
1190static __inline netio_size_t
1191NETIO_PKT_L2_LENGTH_MM(netio_pkt_minimal_metadata_t* mmd, netio_pkt_t* pkt)
1192{
1193 return mmd->l2_length;
1194}
1195
1196
1197/** Return the length of the L2 (Ethernet) header.
1198 * @ingroup egress
1199 *
1200 * @param[in] mmd Pointer to packet's minimal metadata.
1201 * @param[in] pkt Packet on which to operate.
1202 * @return The length of the packet's L2 header, in bytes.
1203 */
1204static __inline netio_size_t
1205NETIO_PKT_L2_HEADER_LENGTH_MM(netio_pkt_minimal_metadata_t* mmd,
1206 netio_pkt_t* pkt)
1207{
1208 return mmd->l3_offset - mmd->l2_offset;
1209}
1210
1211
1212/** Return the length of the packet, starting with the L3 (IP) header.
1213 * @ingroup egress
1214 *
1215 * @param[in] mmd Pointer to packet's minimal metadata.
1216 * @param[in] pkt Packet on which to operate.
1217 * @return Length of the packet's L3 header and data, in bytes.
1218 */
1219static __inline netio_size_t
1220NETIO_PKT_L3_LENGTH_MM(netio_pkt_minimal_metadata_t* mmd, netio_pkt_t* pkt)
1221{
1222 return (NETIO_PKT_L2_LENGTH_MM(mmd, pkt) -
1223 NETIO_PKT_L2_HEADER_LENGTH_MM(mmd, pkt));
1224}
1225
1226
1227/** Return a pointer to the packet's L3 (generally, the IP) header.
1228 * @ingroup egress
1229 *
1230 * Note that we guarantee word alignment of the L3 header.
1231 *
1232 * @param[in] mmd Pointer to packet's minimal metadata.
1233 * @param[in] pkt Packet on which to operate.
1234 * @return A pointer to the packet's L3 header.
1235 */
1236static __inline unsigned char*
1237NETIO_PKT_L3_DATA_MM(netio_pkt_minimal_metadata_t* mmd, netio_pkt_t* pkt)
1238{
1239 return _NETIO_PKT_BASE(pkt) + mmd->l3_offset;
1240}
1241
1242
1243/** Return a pointer to the packet's L2 (Ethernet) header.
1244 * @ingroup egress
1245 *
1246 * @param[in] mmd Pointer to packet's minimal metadata.
1247 * @param[in] pkt Packet on which to operate.
1248 * @return A pointer to start of the packet.
1249 */
1250static __inline unsigned char*
1251NETIO_PKT_L2_DATA_MM(netio_pkt_minimal_metadata_t* mmd, netio_pkt_t* pkt)
1252{
1253 return _NETIO_PKT_BASE(pkt) + mmd->l2_offset;
1254}
1255
1256
1257/** Retrieve the status of a packet and any errors that may have occurred
1258 * during ingress processing (length mismatches, CRC errors, etc.).
1259 * @ingroup ingress
1260 *
1261 * Note that packets for which @ref NETIO_PKT_ETHERTYPE_RECOGNIZED()
1262 * returns zero are always reported as underlength, as there is no a priori
1263 * means to determine their length. Normally, applications should use
1264 * @ref NETIO_PKT_BAD() instead of explicitly checking status with this
1265 * function.
1266 *
1267 * @param[in] pkt Packet on which to operate.
1268 * @return The packet's status.
1269 */
1270static __inline netio_pkt_status_t
1271NETIO_PKT_STATUS(netio_pkt_t* pkt)
1272{
1273 netio_assert(!pkt->__packet.bits.__minimal);
1274
1275 return (netio_pkt_status_t) __NETIO_PKT_NOTIF_HEADER(pkt).bits.__status;
1276}
1277
1278
1279/** Report whether a packet is bad (i.e., was shorter than expected based on
1280 * its headers, or had a bad CRC).
1281 * @ingroup ingress
1282 *
1283 * Note that this function does not verify L3 or L4 checksums.
1284 *
1285 * @param[in] pkt Packet on which to operate.
1286 * @return Nonzero if the packet is bad and should be discarded.
1287 */
1288static __inline unsigned int
1289NETIO_PKT_BAD(netio_pkt_t* pkt)
1290{
1291 netio_pkt_metadata_t* mda = NETIO_PKT_METADATA(pkt);
1292
1293 return NETIO_PKT_BAD_M(mda, pkt);
1294}
1295
1296
1297/** Return the length of the packet's custom header.
1298 * A custom header may or may not be present, depending upon the IPP; its
1299 * contents and alignment are also IPP-dependent. Currently, none of the
1300 * standard IPPs supplied by Tilera produce a custom header. If present,
1301 * the custom header precedes the L2 header in the packet buffer.
1302 * @ingroup pktfuncs
1303 *
1304 * @param[in] pkt Packet on which to operate.
1305 * @return The length of the packet's custom header, in bytes.
1306 */
1307static __inline netio_size_t
1308NETIO_PKT_CUSTOM_HEADER_LENGTH(netio_pkt_t* pkt)
1309{
1310 netio_pkt_metadata_t* mda = NETIO_PKT_METADATA(pkt);
1311
1312 return NETIO_PKT_CUSTOM_HEADER_LENGTH_M(mda, pkt);
1313}
1314
1315
1316/** Return the length of the packet, starting with the custom header.
1317 * A custom header may or may not be present, depending upon the IPP; its
1318 * contents and alignment are also IPP-dependent. Currently, none of the
1319 * standard IPPs supplied by Tilera produce a custom header. If present,
1320 * the custom header precedes the L2 header in the packet buffer.
1321 * @ingroup pktfuncs
1322 *
1323 * @param[in] pkt Packet on which to operate.
1324 * @return The length of the packet, in bytes.
1325 */
1326static __inline netio_size_t
1327NETIO_PKT_CUSTOM_LENGTH(netio_pkt_t* pkt)
1328{
1329 netio_pkt_metadata_t* mda = NETIO_PKT_METADATA(pkt);
1330
1331 return NETIO_PKT_CUSTOM_LENGTH_M(mda, pkt);
1332}
1333
1334
1335/** Return a pointer to the packet's custom header.
1336 * A custom header may or may not be present, depending upon the IPP; its
1337 * contents and alignment are also IPP-dependent. Currently, none of the
1338 * standard IPPs supplied by Tilera produce a custom header. If present,
1339 * the custom header precedes the L2 header in the packet buffer.
1340 * @ingroup pktfuncs
1341 *
1342 * @param[in] pkt Packet on which to operate.
1343 * @return A pointer to start of the packet.
1344 */
1345static __inline unsigned char*
1346NETIO_PKT_CUSTOM_DATA(netio_pkt_t* pkt)
1347{
1348 netio_pkt_metadata_t* mda = NETIO_PKT_METADATA(pkt);
1349
1350 return NETIO_PKT_CUSTOM_DATA_M(mda, pkt);
1351}
1352
1353
1354/** Return the length of the packet's L2 (Ethernet plus VLAN or SNAP) header.
1355 * @ingroup pktfuncs
1356 *
1357 * @param[in] pkt Packet on which to operate.
1358 * @return The length of the packet's L2 header, in bytes.
1359 */
1360static __inline netio_size_t
1361NETIO_PKT_L2_HEADER_LENGTH(netio_pkt_t* pkt)
1362{
1363 if (NETIO_PKT_IS_MINIMAL(pkt))
1364 {
1365 netio_pkt_minimal_metadata_t* mmd = NETIO_PKT_MINIMAL_METADATA(pkt);
1366
1367 return NETIO_PKT_L2_HEADER_LENGTH_MM(mmd, pkt);
1368 }
1369 else
1370 {
1371 netio_pkt_metadata_t* mda = NETIO_PKT_METADATA(pkt);
1372
1373 return NETIO_PKT_L2_HEADER_LENGTH_M(mda, pkt);
1374 }
1375}
1376
1377
1378/** Return the length of the packet, starting with the L2 (Ethernet) header.
1379 * @ingroup pktfuncs
1380 *
1381 * @param[in] pkt Packet on which to operate.
1382 * @return The length of the packet, in bytes.
1383 */
1384static __inline netio_size_t
1385NETIO_PKT_L2_LENGTH(netio_pkt_t* pkt)
1386{
1387 if (NETIO_PKT_IS_MINIMAL(pkt))
1388 {
1389 netio_pkt_minimal_metadata_t* mmd = NETIO_PKT_MINIMAL_METADATA(pkt);
1390
1391 return NETIO_PKT_L2_LENGTH_MM(mmd, pkt);
1392 }
1393 else
1394 {
1395 netio_pkt_metadata_t* mda = NETIO_PKT_METADATA(pkt);
1396
1397 return NETIO_PKT_L2_LENGTH_M(mda, pkt);
1398 }
1399}
1400
1401
1402/** Return a pointer to the packet's L2 (Ethernet) header.
1403 * @ingroup pktfuncs
1404 *
1405 * @param[in] pkt Packet on which to operate.
1406 * @return A pointer to start of the packet.
1407 */
1408static __inline unsigned char*
1409NETIO_PKT_L2_DATA(netio_pkt_t* pkt)
1410{
1411 if (NETIO_PKT_IS_MINIMAL(pkt))
1412 {
1413 netio_pkt_minimal_metadata_t* mmd = NETIO_PKT_MINIMAL_METADATA(pkt);
1414
1415 return NETIO_PKT_L2_DATA_MM(mmd, pkt);
1416 }
1417 else
1418 {
1419 netio_pkt_metadata_t* mda = NETIO_PKT_METADATA(pkt);
1420
1421 return NETIO_PKT_L2_DATA_M(mda, pkt);
1422 }
1423}
1424
1425
1426/** Retrieve the length of the packet, starting with the L3 (generally, the IP)
1427 * header.
1428 * @ingroup pktfuncs
1429 *
1430 * @param[in] pkt Packet on which to operate.
1431 * @return Length of the packet's L3 header and data, in bytes.
1432 */
1433static __inline netio_size_t
1434NETIO_PKT_L3_LENGTH(netio_pkt_t* pkt)
1435{
1436 if (NETIO_PKT_IS_MINIMAL(pkt))
1437 {
1438 netio_pkt_minimal_metadata_t* mmd = NETIO_PKT_MINIMAL_METADATA(pkt);
1439
1440 return NETIO_PKT_L3_LENGTH_MM(mmd, pkt);
1441 }
1442 else
1443 {
1444 netio_pkt_metadata_t* mda = NETIO_PKT_METADATA(pkt);
1445
1446 return NETIO_PKT_L3_LENGTH_M(mda, pkt);
1447 }
1448}
1449
1450
1451/** Return a pointer to the packet's L3 (generally, the IP) header.
1452 * @ingroup pktfuncs
1453 *
1454 * Note that we guarantee word alignment of the L3 header.
1455 *
1456 * @param[in] pkt Packet on which to operate.
1457 * @return A pointer to the packet's L3 header.
1458 */
1459static __inline unsigned char*
1460NETIO_PKT_L3_DATA(netio_pkt_t* pkt)
1461{
1462 if (NETIO_PKT_IS_MINIMAL(pkt))
1463 {
1464 netio_pkt_minimal_metadata_t* mmd = NETIO_PKT_MINIMAL_METADATA(pkt);
1465
1466 return NETIO_PKT_L3_DATA_MM(mmd, pkt);
1467 }
1468 else
1469 {
1470 netio_pkt_metadata_t* mda = NETIO_PKT_METADATA(pkt);
1471
1472 return NETIO_PKT_L3_DATA_M(mda, pkt);
1473 }
1474}
1475
1476
1477/** Return the ordinal of the packet.
1478 * @ingroup ingress
1479 *
1480 * Each packet is given an ordinal number when it is delivered by the IPP.
1481 * In the medium term, the ordinal is unique and monotonically increasing,
1482 * being incremented by 1 for each packet; the ordinal of the first packet
1483 * delivered after the IPP starts is zero. (Since the ordinal is of finite
1484 * size, given enough input packets, it will eventually wrap around to zero;
1485 * in the long term, therefore, ordinals are not unique.) The ordinals
1486 * handed out by different IPPs are not disjoint, so two packets from
1487 * different IPPs may have identical ordinals. Packets dropped by the
1488 * IPP or by the I/O shim are not assigned ordinals.
1489 *
1490 *
1491 * @param[in] pkt Packet on which to operate.
1492 * @return The packet's per-IPP packet ordinal.
1493 */
1494static __inline unsigned int
1495NETIO_PKT_ORDINAL(netio_pkt_t* pkt)
1496{
1497 netio_pkt_metadata_t* mda = NETIO_PKT_METADATA(pkt);
1498
1499 return NETIO_PKT_ORDINAL_M(mda, pkt);
1500}
1501
1502
1503/** Return the per-group ordinal of the packet.
1504 * @ingroup ingress
1505 *
1506 * Each packet is given a per-group ordinal number when it is
1507 * delivered by the IPP. By default, the group is the packet's VLAN,
1508 * although IPP can be recompiled to use different values. In
1509 * the medium term, the ordinal is unique and monotonically
1510 * increasing, being incremented by 1 for each packet; the ordinal of
1511 * the first packet distributed to a particular group is zero.
1512 * (Since the ordinal is of finite size, given enough input packets,
1513 * it will eventually wrap around to zero; in the long term,
1514 * therefore, ordinals are not unique.) The ordinals handed out by
1515 * different IPPs are not disjoint, so two packets from different IPPs
1516 * may have identical ordinals; similarly, packets distributed to
1517 * different groups may have identical ordinals. Packets dropped by
1518 * the IPP or by the I/O shim are not assigned ordinals.
1519 *
1520 * @param[in] pkt Packet on which to operate.
1521 * @return The packet's per-IPP, per-group ordinal.
1522 */
1523static __inline unsigned int
1524NETIO_PKT_GROUP_ORDINAL(netio_pkt_t* pkt)
1525{
1526 netio_pkt_metadata_t* mda = NETIO_PKT_METADATA(pkt);
1527
1528 return NETIO_PKT_GROUP_ORDINAL_M(mda, pkt);
1529}
1530
1531
1532/** Return the VLAN ID assigned to the packet.
1533 * @ingroup ingress
1534 *
1535 * This is usually also contained within the packet header. If the packet
1536 * does not have a VLAN tag, the VLAN ID returned by this function is zero.
1537 *
1538 * @param[in] pkt Packet on which to operate.
1539 * @return The packet's VLAN ID.
1540 */
1541static __inline unsigned short
1542NETIO_PKT_VLAN_ID(netio_pkt_t* pkt)
1543{
1544 netio_pkt_metadata_t* mda = NETIO_PKT_METADATA(pkt);
1545
1546 return NETIO_PKT_VLAN_ID_M(mda, pkt);
1547}
1548
1549
1550/** Return the ethertype of the packet.
1551 * @ingroup ingress
1552 *
1553 * This value is reliable if @ref NETIO_PKT_ETHERTYPE_RECOGNIZED()
1554 * returns true, and otherwise, may not be well defined.
1555 *
1556 * @param[in] pkt Packet on which to operate.
1557 * @return The packet's ethertype.
1558 */
1559static __inline unsigned short
1560NETIO_PKT_ETHERTYPE(netio_pkt_t* pkt)
1561{
1562 netio_pkt_metadata_t* mda = NETIO_PKT_METADATA(pkt);
1563
1564 return NETIO_PKT_ETHERTYPE_M(mda, pkt);
1565}
1566
1567
1568/** Return the flow hash computed on the packet.
1569 * @ingroup ingress
1570 *
1571 * For TCP and UDP packets, this hash is calculated by hashing together
1572 * the "5-tuple" values, specifically the source IP address, destination
1573 * IP address, protocol type, source port and destination port.
1574 * The hash value is intended to be helpful for millions of distinct
1575 * flows.
1576 *
1577 * For IPv4 or IPv6 packets which are neither TCP nor UDP, the flow hash is
1578 * derived by hashing together the source and destination IP addresses.
1579 *
1580 * For MPLS-encapsulated packets, the flow hash is derived by hashing
1581 * the first MPLS label.
1582 *
1583 * For all other packets the flow hash is computed from the source
1584 * and destination Ethernet addresses.
1585 *
1586 * The hash is symmetric, meaning it produces the same value if the
1587 * source and destination are swapped. The only exceptions are
1588 * tunneling protocols 0x04 (IP in IP Encapsulation), 0x29 (Simple
1589 * Internet Protocol), 0x2F (General Routing Encapsulation) and 0x32
1590 * (Encap Security Payload), which use only the destination address
1591 * since the source address is not meaningful.
1592 *
1593 * @param[in] pkt Packet on which to operate.
1594 * @return The packet's 32-bit flow hash.
1595 */
1596static __inline unsigned int
1597NETIO_PKT_FLOW_HASH(netio_pkt_t* pkt)
1598{
1599 netio_pkt_metadata_t* mda = NETIO_PKT_METADATA(pkt);
1600
1601 return NETIO_PKT_FLOW_HASH_M(mda, pkt);
1602}
1603
1604
1605/** Return the first word of "user data" for the packet.
1606 *
1607 * The contents of the user data words depend on the IPP.
1608 *
1609 * When using the standard ipp1, ipp2, or ipp4 sub-drivers, the first
1610 * word of user data contains the least significant bits of the 64-bit
1611 * arrival cycle count (see @c get_cycle_count_low()).
1612 *
1613 * See the <em>System Programmer's Guide</em> for details.
1614 *
1615 * @ingroup ingress
1616 *
1617 * @param[in] pkt Packet on which to operate.
1618 * @return The packet's first word of "user data".
1619 */
1620static __inline unsigned int
1621NETIO_PKT_USER_DATA_0(netio_pkt_t* pkt)
1622{
1623 netio_pkt_metadata_t* mda = NETIO_PKT_METADATA(pkt);
1624
1625 return NETIO_PKT_USER_DATA_0_M(mda, pkt);
1626}
1627
1628
1629/** Return the second word of "user data" for the packet.
1630 *
1631 * The contents of the user data words depend on the IPP.
1632 *
1633 * When using the standard ipp1, ipp2, or ipp4 sub-drivers, the second
1634 * word of user data contains the most significant bits of the 64-bit
1635 * arrival cycle count (see @c get_cycle_count_high()).
1636 *
1637 * See the <em>System Programmer's Guide</em> for details.
1638 *
1639 * @ingroup ingress
1640 *
1641 * @param[in] pkt Packet on which to operate.
1642 * @return The packet's second word of "user data".
1643 */
1644static __inline unsigned int
1645NETIO_PKT_USER_DATA_1(netio_pkt_t* pkt)
1646{
1647 netio_pkt_metadata_t* mda = NETIO_PKT_METADATA(pkt);
1648
1649 return NETIO_PKT_USER_DATA_1_M(mda, pkt);
1650}
1651
1652
1653/** Determine whether the L4 (TCP/UDP) checksum was calculated.
1654 * @ingroup ingress
1655 *
1656 * @param[in] pkt Packet on which to operate.
1657 * @return Nonzero if the L4 checksum was calculated.
1658 */
1659static __inline unsigned int
1660NETIO_PKT_L4_CSUM_CALCULATED(netio_pkt_t* pkt)
1661{
1662 netio_pkt_metadata_t* mda = NETIO_PKT_METADATA(pkt);
1663
1664 return NETIO_PKT_L4_CSUM_CALCULATED_M(mda, pkt);
1665}
1666
1667
1668/** Determine whether the L4 (TCP/UDP) checksum was calculated and found to
1669 * be correct.
1670 * @ingroup ingress
1671 *
1672 * @param[in] pkt Packet on which to operate.
1673 * @return Nonzero if the checksum was calculated and is correct.
1674 */
1675static __inline unsigned int
1676NETIO_PKT_L4_CSUM_CORRECT(netio_pkt_t* pkt)
1677{
1678 netio_pkt_metadata_t* mda = NETIO_PKT_METADATA(pkt);
1679
1680 return NETIO_PKT_L4_CSUM_CORRECT_M(mda, pkt);
1681}
1682
1683
1684/** Determine whether the L3 (IP) checksum was calculated.
1685 * @ingroup ingress
1686 *
1687 * @param[in] pkt Packet on which to operate.
1688 * @return Nonzero if the L3 (IP) checksum was calculated.
1689*/
1690static __inline unsigned int
1691NETIO_PKT_L3_CSUM_CALCULATED(netio_pkt_t* pkt)
1692{
1693 netio_pkt_metadata_t* mda = NETIO_PKT_METADATA(pkt);
1694
1695 return NETIO_PKT_L3_CSUM_CALCULATED_M(mda, pkt);
1696}
1697
1698
1699/** Determine whether the L3 (IP) checksum was calculated and found to be
1700 * correct.
1701 * @ingroup ingress
1702 *
1703 * @param[in] pkt Packet on which to operate.
1704 * @return Nonzero if the checksum was calculated and is correct.
1705 */
1706static __inline unsigned int
1707NETIO_PKT_L3_CSUM_CORRECT(netio_pkt_t* pkt)
1708{
1709 netio_pkt_metadata_t* mda = NETIO_PKT_METADATA(pkt);
1710
1711 return NETIO_PKT_L3_CSUM_CORRECT_M(mda, pkt);
1712}
1713
1714
1715/** Determine whether the Ethertype was recognized and L3 packet data was
1716 * processed.
1717 * @ingroup ingress
1718 *
1719 * @param[in] pkt Packet on which to operate.
1720 * @return Nonzero if the Ethertype was recognized and L3 packet data was
1721 * processed.
1722 */
1723static __inline unsigned int
1724NETIO_PKT_ETHERTYPE_RECOGNIZED(netio_pkt_t* pkt)
1725{
1726 netio_pkt_metadata_t* mda = NETIO_PKT_METADATA(pkt);
1727
1728 return NETIO_PKT_ETHERTYPE_RECOGNIZED_M(mda, pkt);
1729}
1730
1731
1732/** Set an egress packet's L2 length, using a metadata pointer to speed the
1733 * computation.
1734 * @ingroup egress
1735 *
1736 * @param[in,out] mmd Pointer to packet's minimal metadata.
1737 * @param[in] pkt Packet on which to operate.
1738 * @param[in] len Packet L2 length, in bytes.
1739 */
1740static __inline void
1741NETIO_PKT_SET_L2_LENGTH_MM(netio_pkt_minimal_metadata_t* mmd, netio_pkt_t* pkt,
1742 int len)
1743{
1744 mmd->l2_length = len;
1745}
1746
1747
1748/** Set an egress packet's L2 length.
1749 * @ingroup egress
1750 *
1751 * @param[in,out] pkt Packet on which to operate.
1752 * @param[in] len Packet L2 length, in bytes.
1753 */
1754static __inline void
1755NETIO_PKT_SET_L2_LENGTH(netio_pkt_t* pkt, int len)
1756{
1757 netio_pkt_minimal_metadata_t* mmd = NETIO_PKT_MINIMAL_METADATA(pkt);
1758
1759 NETIO_PKT_SET_L2_LENGTH_MM(mmd, pkt, len);
1760}
1761
1762
1763/** Set an egress packet's L2 header length, using a metadata pointer to
1764 * speed the computation.
1765 * @ingroup egress
1766 *
1767 * It is not normally necessary to call this routine; only the L2 length,
1768 * not the header length, is needed to transmit a packet. It may be useful if
1769 * the egress packet will later be processed by code which expects to use
1770 * functions like @ref NETIO_PKT_L3_DATA() to get a pointer to the L3 payload.
1771 *
1772 * @param[in,out] mmd Pointer to packet's minimal metadata.
1773 * @param[in] pkt Packet on which to operate.
1774 * @param[in] len Packet L2 header length, in bytes.
1775 */
1776static __inline void
1777NETIO_PKT_SET_L2_HEADER_LENGTH_MM(netio_pkt_minimal_metadata_t* mmd,
1778 netio_pkt_t* pkt, int len)
1779{
1780 mmd->l3_offset = mmd->l2_offset + len;
1781}
1782
1783
1784/** Set an egress packet's L2 header length.
1785 * @ingroup egress
1786 *
1787 * It is not normally necessary to call this routine; only the L2 length,
1788 * not the header length, is needed to transmit a packet. It may be useful if
1789 * the egress packet will later be processed by code which expects to use
1790 * functions like @ref NETIO_PKT_L3_DATA() to get a pointer to the L3 payload.
1791 *
1792 * @param[in,out] pkt Packet on which to operate.
1793 * @param[in] len Packet L2 header length, in bytes.
1794 */
1795static __inline void
1796NETIO_PKT_SET_L2_HEADER_LENGTH(netio_pkt_t* pkt, int len)
1797{
1798 netio_pkt_minimal_metadata_t* mmd = NETIO_PKT_MINIMAL_METADATA(pkt);
1799
1800 NETIO_PKT_SET_L2_HEADER_LENGTH_MM(mmd, pkt, len);
1801}
1802
1803
1804/** Set up an egress packet for hardware checksum computation, using a
1805 * metadata pointer to speed the operation.
1806 * @ingroup egress
1807 *
1808 * NetIO provides the ability to automatically calculate a standard
1809 * 16-bit Internet checksum on transmitted packets. The application
1810 * may specify the point in the packet where the checksum starts, the
1811 * number of bytes to be checksummed, and the two bytes in the packet
1812 * which will be replaced with the completed checksum. (If the range
1813 * of bytes to be checksummed includes the bytes to be replaced, the
1814 * initial values of those bytes will be included in the checksum.)
1815 *
1816 * For some protocols, the packet checksum covers data which is not present
1817 * in the packet, or is at least not contiguous to the main data payload.
1818 * For instance, the TCP checksum includes a "pseudo-header" which includes
1819 * the source and destination IP addresses of the packet. To accommodate
1820 * this, the checksum engine may be "seeded" with an initial value, which
1821 * the application would need to compute based on the specific protocol's
1822 * requirements. Note that the seed is given in host byte order (little-
1823 * endian), not network byte order (big-endian); code written to compute a
1824 * pseudo-header checksum in network byte order will need to byte-swap it
1825 * before use as the seed.
1826 *
1827 * Note that the checksum is computed as part of the transmission process,
1828 * so it will not be present in the packet upon completion of this routine.
1829 *
1830 * @param[in,out] mmd Pointer to packet's minimal metadata.
1831 * @param[in] pkt Packet on which to operate.
1832 * @param[in] start Offset within L2 packet of the first byte to include in
1833 * the checksum.
1834 * @param[in] length Number of bytes to include in the checksum.
1835 * the checksum.
1836 * @param[in] location Offset within L2 packet of the first of the two bytes
1837 * to be replaced with the calculated checksum.
1838 * @param[in] seed Initial value of the running checksum before any of the
1839 * packet data is added.
1840 */
1841static __inline void
1842NETIO_PKT_DO_EGRESS_CSUM_MM(netio_pkt_minimal_metadata_t* mmd,
1843 netio_pkt_t* pkt, int start, int length,
1844 int location, uint16_t seed)
1845{
1846 mmd->csum_start = start;
1847 mmd->csum_length = length;
1848 mmd->csum_location = location;
1849 mmd->csum_seed = seed;
1850 mmd->flags |= _NETIO_PKT_NEED_EDMA_CSUM_MASK;
1851}
1852
1853
1854/** Set up an egress packet for hardware checksum computation.
1855 * @ingroup egress
1856 *
1857 * NetIO provides the ability to automatically calculate a standard
1858 * 16-bit Internet checksum on transmitted packets. The application
1859 * may specify the point in the packet where the checksum starts, the
1860 * number of bytes to be checksummed, and the two bytes in the packet
1861 * which will be replaced with the completed checksum. (If the range
1862 * of bytes to be checksummed includes the bytes to be replaced, the
1863 * initial values of those bytes will be included in the checksum.)
1864 *
1865 * For some protocols, the packet checksum covers data which is not present
1866 * in the packet, or is at least not contiguous to the main data payload.
1867 * For instance, the TCP checksum includes a "pseudo-header" which includes
1868 * the source and destination IP addresses of the packet. To accommodate
1869 * this, the checksum engine may be "seeded" with an initial value, which
1870 * the application would need to compute based on the specific protocol's
1871 * requirements. Note that the seed is given in host byte order (little-
1872 * endian), not network byte order (big-endian); code written to compute a
1873 * pseudo-header checksum in network byte order will need to byte-swap it
1874 * before use as the seed.
1875 *
1876 * Note that the checksum is computed as part of the transmission process,
1877 * so it will not be present in the packet upon completion of this routine.
1878 *
1879 * @param[in,out] pkt Packet on which to operate.
1880 * @param[in] start Offset within L2 packet of the first byte to include in
1881 * the checksum.
1882 * @param[in] length Number of bytes to include in the checksum.
1883 * the checksum.
1884 * @param[in] location Offset within L2 packet of the first of the two bytes
1885 * to be replaced with the calculated checksum.
1886 * @param[in] seed Initial value of the running checksum before any of the
1887 * packet data is added.
1888 */
1889static __inline void
1890NETIO_PKT_DO_EGRESS_CSUM(netio_pkt_t* pkt, int start, int length,
1891 int location, uint16_t seed)
1892{
1893 netio_pkt_minimal_metadata_t* mmd = NETIO_PKT_MINIMAL_METADATA(pkt);
1894
1895 NETIO_PKT_DO_EGRESS_CSUM_MM(mmd, pkt, start, length, location, seed);
1896}
1897
1898
1899/** Return the number of bytes which could be prepended to a packet, using a
1900 * metadata pointer to speed the operation.
1901 * See @ref netio_populate_prepend_buffer() to get a full description of
1902 * prepending.
1903 *
1904 * @param[in,out] mda Pointer to packet's standard metadata.
1905 * @param[in] pkt Packet on which to operate.
1906 */
1907static __inline int
1908NETIO_PKT_PREPEND_AVAIL_M(netio_pkt_metadata_t* mda, netio_pkt_t* pkt)
1909{
1910 return (pkt->__packet.bits.__offset << 6) +
1911 NETIO_PKT_CUSTOM_HEADER_LENGTH_M(mda, pkt);
1912}
1913
1914
1915/** Return the number of bytes which could be prepended to a packet, using a
1916 * metadata pointer to speed the operation.
1917 * See @ref netio_populate_prepend_buffer() to get a full description of
1918 * prepending.
1919 * @ingroup egress
1920 *
1921 * @param[in,out] mmd Pointer to packet's minimal metadata.
1922 * @param[in] pkt Packet on which to operate.
1923 */
1924static __inline int
1925NETIO_PKT_PREPEND_AVAIL_MM(netio_pkt_minimal_metadata_t* mmd, netio_pkt_t* pkt)
1926{
1927 return (pkt->__packet.bits.__offset << 6) + mmd->l2_offset;
1928}
1929
1930
1931/** Return the number of bytes which could be prepended to a packet.
1932 * See @ref netio_populate_prepend_buffer() to get a full description of
1933 * prepending.
1934 * @ingroup egress
1935 *
1936 * @param[in] pkt Packet on which to operate.
1937 */
1938static __inline int
1939NETIO_PKT_PREPEND_AVAIL(netio_pkt_t* pkt)
1940{
1941 if (NETIO_PKT_IS_MINIMAL(pkt))
1942 {
1943 netio_pkt_minimal_metadata_t* mmd = NETIO_PKT_MINIMAL_METADATA(pkt);
1944
1945 return NETIO_PKT_PREPEND_AVAIL_MM(mmd, pkt);
1946 }
1947 else
1948 {
1949 netio_pkt_metadata_t* mda = NETIO_PKT_METADATA(pkt);
1950
1951 return NETIO_PKT_PREPEND_AVAIL_M(mda, pkt);
1952 }
1953}
1954
1955
1956/** Flush a packet's minimal metadata from the cache, using a metadata pointer
1957 * to speed the operation.
1958 * @ingroup egress
1959 *
1960 * @param[in] mmd Pointer to packet's minimal metadata.
1961 * @param[in] pkt Packet on which to operate.
1962 */
1963static __inline void
1964NETIO_PKT_FLUSH_MINIMAL_METADATA_MM(netio_pkt_minimal_metadata_t* mmd,
1965 netio_pkt_t* pkt)
1966{
1967}
1968
1969
1970/** Invalidate a packet's minimal metadata from the cache, using a metadata
1971 * pointer to speed the operation.
1972 * @ingroup egress
1973 *
1974 * @param[in] mmd Pointer to packet's minimal metadata.
1975 * @param[in] pkt Packet on which to operate.
1976 */
1977static __inline void
1978NETIO_PKT_INV_MINIMAL_METADATA_MM(netio_pkt_minimal_metadata_t* mmd,
1979 netio_pkt_t* pkt)
1980{
1981}
1982
1983
1984/** Flush and then invalidate a packet's minimal metadata from the cache,
1985 * using a metadata pointer to speed the operation.
1986 * @ingroup egress
1987 *
1988 * @param[in] mmd Pointer to packet's minimal metadata.
1989 * @param[in] pkt Packet on which to operate.
1990 */
1991static __inline void
1992NETIO_PKT_FLUSH_INV_MINIMAL_METADATA_MM(netio_pkt_minimal_metadata_t* mmd,
1993 netio_pkt_t* pkt)
1994{
1995}
1996
1997
1998/** Flush a packet's metadata from the cache, using a metadata pointer
1999 * to speed the operation.
2000 * @ingroup ingress
2001 *
2002 * @param[in] mda Pointer to packet's minimal metadata.
2003 * @param[in] pkt Packet on which to operate.
2004 */
2005static __inline void
2006NETIO_PKT_FLUSH_METADATA_M(netio_pkt_metadata_t* mda, netio_pkt_t* pkt)
2007{
2008}
2009
2010
2011/** Invalidate a packet's metadata from the cache, using a metadata
2012 * pointer to speed the operation.
2013 * @ingroup ingress
2014 *
2015 * @param[in] mda Pointer to packet's metadata.
2016 * @param[in] pkt Packet on which to operate.
2017 */
2018static __inline void
2019NETIO_PKT_INV_METADATA_M(netio_pkt_metadata_t* mda, netio_pkt_t* pkt)
2020{
2021}
2022
2023
2024/** Flush and then invalidate a packet's metadata from the cache,
2025 * using a metadata pointer to speed the operation.
2026 * @ingroup ingress
2027 *
2028 * @param[in] mda Pointer to packet's metadata.
2029 * @param[in] pkt Packet on which to operate.
2030 */
2031static __inline void
2032NETIO_PKT_FLUSH_INV_METADATA_M(netio_pkt_metadata_t* mda, netio_pkt_t* pkt)
2033{
2034}
2035
2036
2037/** Flush a packet's minimal metadata from the cache.
2038 * @ingroup egress
2039 *
2040 * @param[in] pkt Packet on which to operate.
2041 */
2042static __inline void
2043NETIO_PKT_FLUSH_MINIMAL_METADATA(netio_pkt_t* pkt)
2044{
2045}
2046
2047
2048/** Invalidate a packet's minimal metadata from the cache.
2049 * @ingroup egress
2050 *
2051 * @param[in] pkt Packet on which to operate.
2052 */
2053static __inline void
2054NETIO_PKT_INV_MINIMAL_METADATA(netio_pkt_t* pkt)
2055{
2056}
2057
2058
2059/** Flush and then invalidate a packet's minimal metadata from the cache.
2060 * @ingroup egress
2061 *
2062 * @param[in] pkt Packet on which to operate.
2063 */
2064static __inline void
2065NETIO_PKT_FLUSH_INV_MINIMAL_METADATA(netio_pkt_t* pkt)
2066{
2067}
2068
2069
2070/** Flush a packet's metadata from the cache.
2071 * @ingroup ingress
2072 *
2073 * @param[in] pkt Packet on which to operate.
2074 */
2075static __inline void
2076NETIO_PKT_FLUSH_METADATA(netio_pkt_t* pkt)
2077{
2078}
2079
2080
2081/** Invalidate a packet's metadata from the cache.
2082 * @ingroup ingress
2083 *
2084 * @param[in] pkt Packet on which to operate.
2085 */
2086static __inline void
2087NETIO_PKT_INV_METADATA(netio_pkt_t* pkt)
2088{
2089}
2090
2091
2092/** Flush and then invalidate a packet's metadata from the cache.
2093 * @ingroup ingress
2094 *
2095 * @param[in] pkt Packet on which to operate.
2096 */
2097static __inline void
2098NETIO_PKT_FLUSH_INV_METADATA(netio_pkt_t* pkt)
2099{
2100}
2101
2102/** Number of NUMA nodes we can distribute buffers to.
2103 * @ingroup setup */
2104#define NETIO_NUM_NODE_WEIGHTS 16
2105
2106/**
2107 * @brief An object for specifying the characteristics of NetIO communication
2108 * endpoint.
2109 *
2110 * @ingroup setup
2111 *
2112 * The @ref netio_input_register() function uses this structure to define
2113 * how an application tile will communicate with an IPP.
2114 *
2115 *
2116 * Future updates to NetIO may add new members to this structure,
2117 * which can affect the success of the registration operation. Thus,
2118 * if dynamically initializing the structure, applications are urged to
2119 * zero it out first, for example:
2120 *
2121 * @code
2122 * netio_input_config_t config;
2123 * memset(&config, 0, sizeof (config));
2124 * config.flags = NETIO_RECV | NETIO_XMIT_CSUM | NETIO_TAG_NONE;
2125 * config.num_receive_packets = NETIO_MAX_RECEIVE_PKTS;
2126 * config.queue_id = 0;
2127 * .
2128 * .
2129 * .
2130 * @endcode
2131 *
2132 * since that guarantees that any unused structure members, including
2133 * members which did not exist when the application was first developed,
2134 * will not have unexpected values.
2135 *
2136 * If statically initializing the structure, we strongly recommend use of
2137 * C99-style named initializers, for example:
2138 *
2139 * @code
2140 * netio_input_config_t config = {
2141 * .flags = NETIO_RECV | NETIO_XMIT_CSUM | NETIO_TAG_NONE,
2142 * .num_receive_packets = NETIO_MAX_RECEIVE_PKTS,
2143 * .queue_id = 0,
2144 * },
2145 * @endcode
2146 *
2147 * instead of the old-style structure initialization:
2148 *
2149 * @code
2150 * // Bad example! Currently equivalent to the above, but don't do this.
2151 * netio_input_config_t config = {
2152 * NETIO_RECV | NETIO_XMIT_CSUM | NETIO_TAG_NONE, NETIO_MAX_RECEIVE_PKTS, 0
2153 * },
2154 * @endcode
2155 *
2156 * since the C99 style requires no changes to the code if elements of the
2157 * config structure are rearranged. (It also makes the initialization much
2158 * easier to understand.)
2159 *
2160 * Except for items which address a particular tile's transmit or receive
2161 * characteristics, such as the ::NETIO_RECV flag, applications are advised
2162 * to specify the same set of configuration data on all registrations.
2163 * This prevents differing results if multiple tiles happen to do their
2164 * registration operations in a different order on different invocations of
2165 * the application. This is particularly important for things like link
2166 * management flags, and buffer size and homing specifications.
2167 *
2168 * Unless the ::NETIO_FIXED_BUFFER_VA flag is specified in flags, the NetIO
2169 * buffer pool is automatically created and mapped into the application's
2170 * virtual address space at an address chosen by the operating system,
2171 * using the common memory (cmem) facility in the Tilera Multicore
2172 * Components library. The cmem facility allows multiple processes to gain
2173 * access to shared memory which is mapped into each process at an
2174 * identical virtual address. In order for this to work, the processes
2175 * must have a common ancestor, which must create the common memory using
2176 * tmc_cmem_init().
2177 *
2178 * In programs using the iLib process creation API, or in programs which use
2179 * only one process (which include programs using the pthreads library),
2180 * tmc_cmem_init() is called automatically. All other applications
2181 * must call it explicitly, before any child processes which might call
2182 * netio_input_register() are created.
2183 */
2184typedef struct
2185{
2186 /** Registration characteristics.
2187
2188 This value determines several characteristics of the registration;
2189 flags for different types of behavior are ORed together to make the
2190 final flag value. Generally applications should specify exactly
2191 one flag from each of the following categories:
2192
2193 - Whether the application will be receiving packets on this queue
2194 (::NETIO_RECV or ::NETIO_NO_RECV).
2195
2196 - Whether the application will be transmitting packets on this queue,
2197 and if so, whether it will request egress checksum calculation
2198 (::NETIO_XMIT, ::NETIO_XMIT_CSUM, or ::NETIO_NO_XMIT). It is
2199 legal to call netio_get_buffer() without one of the XMIT flags,
2200 as long as ::NETIO_RECV is specified; in this case, the retrieved
2201 buffers must be passed to another tile for transmission.
2202
2203 - Whether the application expects any vendor-specific tags in
2204 its packets' L2 headers (::NETIO_TAG_NONE, ::NETIO_TAG_BRCM,
2205 or ::NETIO_TAG_MRVL). This must match the configuration of the
2206 target IPP.
2207
2208 To accommodate applications written to previous versions of the NetIO
2209 interface, none of the flags above are currently required; if omitted,
2210 NetIO behaves more or less as if ::NETIO_RECV | ::NETIO_XMIT_CSUM |
2211 ::NETIO_TAG_NONE were used. However, explicit specification of
2212 the relevant flags allows NetIO to do a better job of resource
2213 allocation, allows earlier detection of certain configuration errors,
2214 and may enable advanced features or higher performance in the future,
2215 so their use is strongly recommended.
2216
2217 Note that specifying ::NETIO_NO_RECV along with ::NETIO_NO_XMIT
2218 is a special case, intended primarily for use by programs which
2219 retrieve network statistics or do link management operations.
2220 When these flags are both specified, the resulting queue may not
2221 be used with NetIO routines other than netio_get(), netio_set(),
2222 and netio_input_unregister(). See @ref link for more information
2223 on link management.
2224
2225 Other flags are optional; their use is described below.
2226 */
2227 int flags;
2228
2229 /** Interface name. This is a string which identifies the specific
2230 Ethernet controller hardware to be used. The format of the string
2231 is a device type and a device index, separated by a slash; so,
2232 the first 10 Gigabit Ethernet controller is named "xgbe/0", while
2233 the second 10/100/1000 Megabit Ethernet controller is named "gbe/1".
2234 */
2235 const char* interface;
2236
2237 /** Receive packet queue size. This specifies the maximum number
2238 of ingress packets that can be received on this queue without
2239 being retrieved by @ref netio_get_packet(). If the IPP's distribution
2240 algorithm calls for a packet to be sent to this queue, and this
2241 number of packets are already pending there, the new packet
2242 will either be discarded, or sent to another tile registered
2243 for the same queue_id (see @ref drops). This value must
2244 be at least ::NETIO_MIN_RECEIVE_PKTS, can always be at least
2245 ::NETIO_MAX_RECEIVE_PKTS, and may be larger than that on certain
2246 interfaces.
2247 */
2248 int num_receive_packets;
2249
2250 /** The queue ID being requested. Legal values for this range from 0
2251 to ::NETIO_MAX_QUEUE_ID, inclusive. ::NETIO_MAX_QUEUE_ID is always
2252 greater than or equal to the number of tiles; this allows one queue
2253 for each tile, plus at least one additional queue. Some applications
2254 may wish to use the additional queue as a destination for unwanted
2255 packets, since packets delivered to queues for which no tiles have
2256 registered are discarded.
2257 */
2258 unsigned int queue_id;
2259
2260 /** Maximum number of small send buffers to be held in the local empty
2261 buffer cache. This specifies the size of the area which holds
2262 empty small egress buffers requested from the IPP but not yet
2263 retrieved via @ref netio_get_buffer(). This value must be greater
2264 than zero if the application will ever use @ref netio_get_buffer()
2265 to allocate empty small egress buffers; it may be no larger than
2266 ::NETIO_MAX_SEND_BUFFERS. See @ref epp for more details on empty
2267 buffer caching.
2268 */
2269 int num_send_buffers_small_total;
2270
2271 /** Number of small send buffers to be preallocated at registration.
2272 If this value is nonzero, the specified number of empty small egress
2273 buffers will be requested from the IPP during the netio_input_register
2274 operation; this may speed the execution of @ref netio_get_buffer().
2275 This may be no larger than @ref num_send_buffers_small_total. See @ref
2276 epp for more details on empty buffer caching.
2277 */
2278 int num_send_buffers_small_prealloc;
2279
2280 /** Maximum number of large send buffers to be held in the local empty
2281 buffer cache. This specifies the size of the area which holds empty
2282 large egress buffers requested from the IPP but not yet retrieved via
2283 @ref netio_get_buffer(). This value must be greater than zero if the
2284 application will ever use @ref netio_get_buffer() to allocate empty
2285 large egress buffers; it may be no larger than ::NETIO_MAX_SEND_BUFFERS.
2286 See @ref epp for more details on empty buffer caching.
2287 */
2288 int num_send_buffers_large_total;
2289
2290 /** Number of large send buffers to be preallocated at registration.
2291 If this value is nonzero, the specified number of empty large egress
2292 buffers will be requested from the IPP during the netio_input_register
2293 operation; this may speed the execution of @ref netio_get_buffer().
2294 This may be no larger than @ref num_send_buffers_large_total. See @ref
2295 epp for more details on empty buffer caching.
2296 */
2297 int num_send_buffers_large_prealloc;
2298
2299 /** Maximum number of jumbo send buffers to be held in the local empty
2300 buffer cache. This specifies the size of the area which holds empty
2301 jumbo egress buffers requested from the IPP but not yet retrieved via
2302 @ref netio_get_buffer(). This value must be greater than zero if the
2303 application will ever use @ref netio_get_buffer() to allocate empty
2304 jumbo egress buffers; it may be no larger than ::NETIO_MAX_SEND_BUFFERS.
2305 See @ref epp for more details on empty buffer caching.
2306 */
2307 int num_send_buffers_jumbo_total;
2308
2309 /** Number of jumbo send buffers to be preallocated at registration.
2310 If this value is nonzero, the specified number of empty jumbo egress
2311 buffers will be requested from the IPP during the netio_input_register
2312 operation; this may speed the execution of @ref netio_get_buffer().
2313 This may be no larger than @ref num_send_buffers_jumbo_total. See @ref
2314 epp for more details on empty buffer caching.
2315 */
2316 int num_send_buffers_jumbo_prealloc;
2317
2318 /** Total packet buffer size. This determines the total size, in bytes,
2319 of the NetIO buffer pool. Note that the maximum number of available
2320 buffers of each size is determined during hypervisor configuration
2321 (see the <em>System Programmer's Guide</em> for details); this just
2322 influences how much host memory is allocated for those buffers.
2323
2324 The buffer pool is allocated from common memory, which will be
2325 automatically initialized if needed. If your buffer pool is larger
2326 than 240 MB, you might need to explicitly call @c tmc_cmem_init(),
2327 as described in the Application Libraries Reference Manual (UG227).
2328
2329 Packet buffers are currently allocated in chunks of 16 MB; this
2330 value will be rounded up to the next larger multiple of 16 MB.
2331 If this value is zero, a default of 32 MB will be used; this was
2332 the value used by previous versions of NetIO. Note that taking this
2333 default also affects the placement of buffers on Linux NUMA nodes.
2334 See @ref buffer_node_weights for an explanation of buffer placement.
2335
2336 In order to successfully allocate packet buffers, Linux must have
2337 available huge pages on the relevant Linux NUMA nodes. See the
2338 <em>System Programmer's Guide</em> for information on configuring
2339 huge page support in Linux.
2340 */
2341 uint64_t total_buffer_size;
2342
2343 /** Buffer placement weighting factors.
2344
2345 This array specifies the relative amount of buffering to place
2346 on each of the available Linux NUMA nodes. This array is
2347 indexed by the NUMA node, and the values in the array are
2348 proportional to the amount of buffer space to allocate on that
2349 node.
2350
2351 If memory striping is enabled in the Hypervisor, then there is
2352 only one logical NUMA node (node 0). In that case, NetIO will by
2353 default ignore the suggested buffer node weights, and buffers
2354 will be striped across the physical memory controllers. See
2355 UG209 System Programmer's Guide for a description of the
2356 hypervisor option that controls memory striping.
2357
2358 If memory striping is disabled, then there are up to four NUMA
2359 nodes, corresponding to the four DDRAM controllers in the TILE
2360 processor architecture. See UG100 Tile Processor Architecture
2361 Overview for a diagram showing the location of each of the DDRAM
2362 controllers relative to the tile array.
2363
2364 For instance, if memory striping is disabled, the following
2365 configuration strucure:
2366
2367 @code
2368 netio_input_config_t config = {
2369 .
2370 .
2371 .
2372 .total_buffer_size = 4 * 16 * 1024 * 1024;
2373 .buffer_node_weights = { 1, 0, 1, 0 },
2374 },
2375 @endcode
2376
2377 would result in 32 MB of buffers being placed on controller 0, and
2378 32 MB on controller 2. (Since buffers are allocated in units of
2379 16 MB, some sets of weights will not be able to be matched exactly.)
2380
2381 For the weights to be effective, @ref total_buffer_size must be
2382 nonzero. If @ref total_buffer_size is zero, causing the default
2383 32 MB of buffer space to be used, then any specified weights will
2384 be ignored, and buffers will positioned as they were in previous
2385 versions of NetIO:
2386
2387 - For xgbe/0 and gbe/0, 16 MB of buffers will be placed on controller 1,
2388 and the other 16 MB will be placed on controller 2.
2389
2390 - For xgbe/1 and gbe/1, 16 MB of buffers will be placed on controller 2,
2391 and the other 16 MB will be placed on controller 3.
2392
2393 If @ref total_buffer_size is nonzero, but all weights are zero,
2394 then all buffer space will be allocated on Linux NUMA node zero.
2395
2396 By default, the specified buffer placement is treated as a hint;
2397 if sufficient free memory is not available on the specified
2398 controllers, the buffers will be allocated elsewhere. However,
2399 if the ::NETIO_STRICT_HOMING flag is specified in @ref flags, then a
2400 failure to allocate buffer space exactly as requested will cause the
2401 registration operation to fail with an error of ::NETIO_CANNOT_HOME.
2402
2403 Note that maximal network performance cannot be achieved with
2404 only one memory controller.
2405 */
2406 uint8_t buffer_node_weights[NETIO_NUM_NODE_WEIGHTS];
2407
2408 /** Fixed virtual address for packet buffers. Only valid when
2409 ::NETIO_FIXED_BUFFER_VA is specified in @ref flags; see the
2410 description of that flag for details.
2411 */
2412 void* fixed_buffer_va;
2413
2414 /**
2415 Maximum number of outstanding send packet requests. This value is
2416 only relevant when an EPP is in use; it determines the number of
2417 slots in the EPP's outgoing packet queue which this tile is allowed
2418 to consume, and thus the number of packets which may be sent before
2419 the sending tile must wait for an acknowledgment from the EPP.
2420 Modifying this value is generally only helpful when using @ref
2421 netio_send_packet_vector(), where it can help improve performance by
2422 allowing a single vector send operation to process more packets.
2423 Typically it is not specified, and the default, which divides the
2424 outgoing packet slots evenly between all tiles on the chip, is used.
2425
2426 If a registration asks for more outgoing packet queue slots than are
2427 available, ::NETIO_TOOMANY_XMIT will be returned. The total number
2428 of packet queue slots which are available for all tiles for each EPP
2429 is subject to change, but is currently ::NETIO_TOTAL_SENDS_OUTSTANDING.
2430
2431
2432 This value is ignored if ::NETIO_XMIT is not specified in flags.
2433 If you want to specify a large value here for a specific tile, you are
2434 advised to specify NETIO_NO_XMIT on other, non-transmitting tiles so
2435 that they do not consume a default number of packet slots. Any tile
2436 transmitting is required to have at least ::NETIO_MIN_SENDS_OUTSTANDING
2437 slots allocated to it; values less than that will be silently
2438 increased by the NetIO library.
2439 */
2440 int num_sends_outstanding;
2441}
2442netio_input_config_t;
2443
2444
2445/** Registration flags; used in the @ref netio_input_config_t structure.
2446 * @addtogroup setup
2447 */
2448/** @{ */
2449
2450/** Fail a registration request if we can't put packet buffers
2451 on the specified memory controllers. */
2452#define NETIO_STRICT_HOMING 0x00000002
2453
2454/** This application expects no tags on its L2 headers. */
2455#define NETIO_TAG_NONE 0x00000004
2456
2457/** This application expects Marvell extended tags on its L2 headers. */
2458#define NETIO_TAG_MRVL 0x00000008
2459
2460/** This application expects Broadcom tags on its L2 headers. */
2461#define NETIO_TAG_BRCM 0x00000010
2462
2463/** This registration may call routines which receive packets. */
2464#define NETIO_RECV 0x00000020
2465
2466/** This registration may not call routines which receive packets. */
2467#define NETIO_NO_RECV 0x00000040
2468
2469/** This registration may call routines which transmit packets. */
2470#define NETIO_XMIT 0x00000080
2471
2472/** This registration may call routines which transmit packets with
2473 checksum acceleration. */
2474#define NETIO_XMIT_CSUM 0x00000100
2475
2476/** This registration may not call routines which transmit packets. */
2477#define NETIO_NO_XMIT 0x00000200
2478
2479/** This registration wants NetIO buffers mapped at an application-specified
2480 virtual address.
2481
2482 NetIO buffers are by default created by the TMC common memory facility,
2483 which must be configured by a common ancestor of all processes sharing
2484 a network interface. When this flag is specified, NetIO buffers are
2485 instead mapped at an address chosen by the application (and specified
2486 in @ref netio_input_config_t::fixed_buffer_va). This allows multiple
2487 unrelated but cooperating processes to share a NetIO interface.
2488 All processes sharing the same interface must specify this flag,
2489 and all must specify the same fixed virtual address.
2490
2491 @ref netio_input_config_t::fixed_buffer_va must be a
2492 multiple of 16 MB, and the packet buffers will occupy @ref
2493 netio_input_config_t::total_buffer_size bytes of virtual address
2494 space, beginning at that address. If any of those virtual addresses
2495 are currently occupied by other memory objects, like application or
2496 shared library code or data, @ref netio_input_register() will return
2497 ::NETIO_FAULT. While it is impossible to provide a fixed_buffer_va
2498 which will work for all applications, a good first guess might be to
2499 use 0xb0000000 minus @ref netio_input_config_t::total_buffer_size.
2500 If that fails, it might be helpful to consult the running application's
2501 virtual address description file (/proc/<em>pid</em>/maps) to see
2502 which regions of virtual address space are available.
2503 */
2504#define NETIO_FIXED_BUFFER_VA 0x00000400
2505
2506/** This registration call will not complete unless the network link
2507 is up. The process will wait several seconds for this to happen (the
2508 precise interval is link-dependent), but if the link does not come up,
2509 ::NETIO_LINK_DOWN will be returned. This flag is the default if
2510 ::NETIO_NOREQUIRE_LINK_UP is not specified. Note that this flag by
2511 itself does not request that the link be brought up; that can be done
2512 with the ::NETIO_AUTO_LINK_UPDN or ::NETIO_AUTO_LINK_UP flags (the
2513 latter is the default if no NETIO_AUTO_LINK_xxx flags are specified),
2514 or by explicitly setting the link's desired state via netio_set().
2515 If the link is not brought up by one of those methods, and this flag
2516 is specified, the registration operation will return ::NETIO_LINK_DOWN.
2517 This flag is ignored if it is specified along with ::NETIO_NO_XMIT and
2518 ::NETIO_NO_RECV. See @ref link for more information on link
2519 management.
2520 */
2521#define NETIO_REQUIRE_LINK_UP 0x00000800
2522
2523/** This registration call will complete even if the network link is not up.
2524 Whenever the link is not up, packets will not be sent or received:
2525 netio_get_packet() will return ::NETIO_NOPKT once all queued packets
2526 have been drained, and netio_send_packet() and similar routines will
2527 return NETIO_QUEUE_FULL once the outgoing packet queue in the EPP
2528 or the I/O shim is full. See @ref link for more information on link
2529 management.
2530 */
2531#define NETIO_NOREQUIRE_LINK_UP 0x00001000
2532
2533#ifndef __DOXYGEN__
2534/*
2535 * These are part of the implementation of the NETIO_AUTO_LINK_xxx flags,
2536 * but should not be used directly by applications, and are thus not
2537 * documented.
2538 */
2539#define _NETIO_AUTO_UP 0x00002000
2540#define _NETIO_AUTO_DN 0x00004000
2541#define _NETIO_AUTO_PRESENT 0x00008000
2542#endif
2543
2544/** Set the desired state of the link to up, allowing any speeds which are
2545 supported by the link hardware, as part of this registration operation.
2546 Do not take down the link automatically. This is the default if
2547 no other NETIO_AUTO_LINK_xxx flags are specified. This flag is ignored
2548 if it is specified along with ::NETIO_NO_XMIT and ::NETIO_NO_RECV.
2549 See @ref link for more information on link management.
2550 */
2551#define NETIO_AUTO_LINK_UP (_NETIO_AUTO_PRESENT | _NETIO_AUTO_UP)
2552
2553/** Set the desired state of the link to up, allowing any speeds which are
2554 supported by the link hardware, as part of this registration operation.
2555 Set the desired state of the link to down the next time no tiles are
2556 registered for packet reception or transmission. This flag is ignored
2557 if it is specified along with ::NETIO_NO_XMIT and ::NETIO_NO_RECV.
2558 See @ref link for more information on link management.
2559 */
2560#define NETIO_AUTO_LINK_UPDN (_NETIO_AUTO_PRESENT | _NETIO_AUTO_UP | \
2561 _NETIO_AUTO_DN)
2562
2563/** Set the desired state of the link to down the next time no tiles are
2564 registered for packet reception or transmission. This flag is ignored
2565 if it is specified along with ::NETIO_NO_XMIT and ::NETIO_NO_RECV.
2566 See @ref link for more information on link management.
2567 */
2568#define NETIO_AUTO_LINK_DN (_NETIO_AUTO_PRESENT | _NETIO_AUTO_DN)
2569
2570/** Do not bring up the link automatically as part of this registration
2571 operation. Do not take down the link automatically. This flag
2572 is ignored if it is specified along with ::NETIO_NO_XMIT and
2573 ::NETIO_NO_RECV. See @ref link for more information on link management.
2574 */
2575#define NETIO_AUTO_LINK_NONE _NETIO_AUTO_PRESENT
2576
2577
2578/** Minimum number of receive packets. */
2579#define NETIO_MIN_RECEIVE_PKTS 16
2580
2581/** Lower bound on the maximum number of receive packets; may be higher
2582 than this on some interfaces. */
2583#define NETIO_MAX_RECEIVE_PKTS 128
2584
2585/** Maximum number of send buffers, per packet size. */
2586#define NETIO_MAX_SEND_BUFFERS 16
2587
2588/** Number of EPP queue slots, and thus outstanding sends, per EPP. */
2589#define NETIO_TOTAL_SENDS_OUTSTANDING 2015
2590
2591/** Minimum number of EPP queue slots, and thus outstanding sends, per
2592 * transmitting tile. */
2593#define NETIO_MIN_SENDS_OUTSTANDING 16
2594
2595
2596/**@}*/
2597
2598#ifndef __DOXYGEN__
2599
2600/**
2601 * An object for providing Ethernet packets to a process.
2602 */
2603struct __netio_queue_impl_t;
2604
2605/**
2606 * An object for managing the user end of a NetIO queue.
2607 */
2608struct __netio_queue_user_impl_t;
2609
2610#endif /* !__DOXYGEN__ */
2611
2612
2613/** A netio_queue_t describes a NetIO communications endpoint.
2614 * @ingroup setup
2615 */
2616typedef struct
2617{
2618#ifdef __DOXYGEN__
2619 uint8_t opaque[8]; /**< This is an opaque structure. */
2620#else
2621 struct __netio_queue_impl_t* __system_part; /**< The system part. */
2622 struct __netio_queue_user_impl_t* __user_part; /**< The user part. */
2623#ifdef _NETIO_PTHREAD
2624 _netio_percpu_mutex_t lock; /**< Queue lock. */
2625#endif
2626#endif
2627}
2628netio_queue_t;
2629
2630
2631/**
2632 * @brief Packet send context.
2633 *
2634 * @ingroup egress
2635 *
2636 * Packet send context for use with netio_send_packet_prepare and _commit.
2637 */
2638typedef struct
2639{
2640#ifdef __DOXYGEN__
2641 uint8_t opaque[44]; /**< This is an opaque structure. */
2642#else
2643 uint8_t flags; /**< Defined below */
2644 uint8_t datalen; /**< Number of valid words pointed to by data. */
2645 uint32_t request[9]; /**< Request to be sent to the EPP or shim. Note
2646 that this is smaller than the 11-word maximum
2647 request size, since some constant values are
2648 not saved in the context. */
2649 uint32_t *data; /**< Data to be sent to the EPP or shim via IDN. */
2650#endif
2651}
2652netio_send_pkt_context_t;
2653
2654
2655#ifndef __DOXYGEN__
2656#define SEND_PKT_CTX_USE_EPP 1 /**< We're sending to an EPP. */
2657#define SEND_PKT_CTX_SEND_CSUM 2 /**< Request includes a checksum. */
2658#endif
2659
2660/**
2661 * @brief Packet vector entry.
2662 *
2663 * @ingroup egress
2664 *
2665 * This data structure is used with netio_send_packet_vector() to send multiple
2666 * packets with one NetIO call. The structure should be initialized by
2667 * calling netio_pkt_vector_set(), rather than by setting the fields
2668 * directly.
2669 *
2670 * This structure is guaranteed to be a power of two in size, no
2671 * bigger than one L2 cache line, and to be aligned modulo its size.
2672 */
2673typedef struct
2674#ifndef __DOXYGEN__
2675__attribute__((aligned(8)))
2676#endif
2677{
2678 /** Reserved for use by the user application. When initialized with
2679 * the netio_set_pkt_vector_entry() function, this field is guaranteed
2680 * to be visible to readers only after all other fields are already
2681 * visible. This way it can be used as a valid flag or generation
2682 * counter. */
2683 uint8_t user_data;
2684
2685 /* Structure members below this point should not be accessed directly by
2686 * applications, as they may change in the future. */
2687
2688 /** Low 8 bits of the packet address to send. The high bits are
2689 * acquired from the 'handle' field. */
2690 uint8_t buffer_address_low;
2691
2692 /** Number of bytes to transmit. */
2693 uint16_t size;
2694
2695 /** The raw handle from a netio_pkt_t. If this is NETIO_PKT_HANDLE_NONE,
2696 * this vector entry will be skipped and no packet will be transmitted. */
2697 netio_pkt_handle_t handle;
2698}
2699netio_pkt_vector_entry_t;
2700
2701
2702/**
2703 * @brief Initialize fields in a packet vector entry.
2704 *
2705 * @ingroup egress
2706 *
2707 * @param[out] v Pointer to the vector entry to be initialized.
2708 * @param[in] pkt Packet to be transmitted when the vector entry is passed to
2709 * netio_send_packet_vector(). Note that the packet's attributes
2710 * (e.g., its L2 offset and length) are captured at the time this
2711 * routine is called; subsequent changes in those attributes will not
2712 * be reflected in the packet which is actually transmitted.
2713 * Changes in the packet's contents, however, will be so reflected.
2714 * If this is NULL, no packet will be transmitted.
2715 * @param[in] user_data User data to be set in the vector entry.
2716 * This function guarantees that the "user_data" field will become
2717 * visible to a reader only after all other fields have become visible.
2718 * This allows a structure in a ring buffer to be written and read
2719 * by a polling reader without any locks or other synchronization.
2720 */
2721static __inline void
2722netio_pkt_vector_set(volatile netio_pkt_vector_entry_t* v, netio_pkt_t* pkt,
2723 uint8_t user_data)
2724{
2725 if (pkt)
2726 {
2727 if (NETIO_PKT_IS_MINIMAL(pkt))
2728 {
2729 netio_pkt_minimal_metadata_t* mmd =
2730 (netio_pkt_minimal_metadata_t*) &pkt->__metadata;
2731 v->buffer_address_low = (uintptr_t) NETIO_PKT_L2_DATA_MM(mmd, pkt) & 0xFF;
2732 v->size = NETIO_PKT_L2_LENGTH_MM(mmd, pkt);
2733 }
2734 else
2735 {
2736 netio_pkt_metadata_t* mda = &pkt->__metadata;
2737 v->buffer_address_low = (uintptr_t) NETIO_PKT_L2_DATA_M(mda, pkt) & 0xFF;
2738 v->size = NETIO_PKT_L2_LENGTH_M(mda, pkt);
2739 }
2740 v->handle.word = pkt->__packet.word;
2741 }
2742 else
2743 {
2744 v->handle.word = 0; /* Set handle to NETIO_PKT_HANDLE_NONE. */
2745 }
2746
2747 __asm__("" : : : "memory");
2748
2749 v->user_data = user_data;
2750}
2751
2752
2753/**
2754 * Flags and structures for @ref netio_get() and @ref netio_set().
2755 * @ingroup config
2756 */
2757
2758/** @{ */
2759/** Parameter class; addr is a NETIO_PARAM_xxx value. */
2760#define NETIO_PARAM 0
2761/** Interface MAC address. This address is only valid with @ref netio_get().
2762 * The value is a 6-byte MAC address. Depending upon the overall system
2763 * design, a MAC address may or may not be available for each interface. */
2764#define NETIO_PARAM_MAC 0
2765
2766/** Determine whether to suspend output on the receipt of pause frames.
2767 * If the value is nonzero, the I/O shim will suspend output when a pause
2768 * frame is received. If the value is zero, pause frames will be ignored. */
2769#define NETIO_PARAM_PAUSE_IN 1
2770
2771/** Determine whether to send pause frames if the I/O shim packet FIFOs are
2772 * nearly full. If the value is zero, pause frames are not sent. If
2773 * the value is nonzero, it is the delay value which will be sent in any
2774 * pause frames which are output, in units of 512 bit times. */
2775#define NETIO_PARAM_PAUSE_OUT 2
2776
2777/** Jumbo frame support. The value is a 4-byte integer. If the value is
2778 * nonzero, the MAC will accept frames of up to 10240 bytes. If the value
2779 * is zero, the MAC will only accept frames of up to 1544 bytes. */
2780#define NETIO_PARAM_JUMBO 3
2781
2782/** I/O shim's overflow statistics register. The value is two 16-bit integers.
2783 * The first 16-bit value (or the low 16 bits, if the value is treated as a
2784 * 32-bit number) is the count of packets which were completely dropped and
2785 * not delivered by the shim. The second 16-bit value (or the high 16 bits,
2786 * if the value is treated as a 32-bit number) is the count of packets
2787 * which were truncated and thus only partially delivered by the shim. This
2788 * register is automatically reset to zero after it has been read.
2789 */
2790#define NETIO_PARAM_OVERFLOW 4
2791
2792/** IPP statistics. This address is only valid with @ref netio_get(). The
2793 * value is a netio_stat_t structure. Unlike the I/O shim statistics, the
2794 * IPP statistics are not all reset to zero on read; see the description
2795 * of the netio_stat_t for details. */
2796#define NETIO_PARAM_STAT 5
2797
2798/** Possible link state. The value is a combination of "NETIO_LINK_xxx"
2799 * flags. With @ref netio_get(), this will indicate which flags are
2800 * actually supported by the hardware.
2801 *
2802 * For historical reasons, specifying this value to netio_set() will have
2803 * the same behavior as using ::NETIO_PARAM_LINK_CONFIG, but this usage is
2804 * discouraged.
2805 */
2806#define NETIO_PARAM_LINK_POSSIBLE_STATE 6
2807
2808/** Link configuration. The value is a combination of "NETIO_LINK_xxx" flags.
2809 * With @ref netio_set(), this will attempt to immediately bring up the
2810 * link using whichever of the requested flags are supported by the
2811 * hardware, or take down the link if the flags are zero; if this is
2812 * not possible, an error will be returned. Many programs will want
2813 * to use ::NETIO_PARAM_LINK_DESIRED_STATE instead.
2814 *
2815 * For historical reasons, specifying this value to netio_get() will
2816 * have the same behavior as using ::NETIO_PARAM_LINK_POSSIBLE_STATE,
2817 * but this usage is discouraged.
2818 */
2819#define NETIO_PARAM_LINK_CONFIG NETIO_PARAM_LINK_POSSIBLE_STATE
2820
2821/** Current link state. This address is only valid with @ref netio_get().
2822 * The value is zero or more of the "NETIO_LINK_xxx" flags, ORed together.
2823 * If the link is down, the value ANDed with NETIO_LINK_SPEED will be
2824 * zero; if the link is up, the value ANDed with NETIO_LINK_SPEED will
2825 * result in exactly one of the NETIO_LINK_xxx values, indicating the
2826 * current speed. */
2827#define NETIO_PARAM_LINK_CURRENT_STATE 7
2828
2829/** Variant symbol for current state, retained for compatibility with
2830 * pre-MDE-2.1 programs. */
2831#define NETIO_PARAM_LINK_STATUS NETIO_PARAM_LINK_CURRENT_STATE
2832
2833/** Packet Coherence protocol. This address is only valid with @ref netio_get().
2834 * The value is nonzero if the interface is configured for cache-coherent DMA.
2835 */
2836#define NETIO_PARAM_COHERENT 8
2837
2838/** Desired link state. The value is a conbination of "NETIO_LINK_xxx"
2839 * flags, which specify the desired state for the link. With @ref
2840 * netio_set(), this will, in the background, attempt to bring up the link
2841 * using whichever of the requested flags are reasonable, or take down the
2842 * link if the flags are zero. The actual link up or down operation may
2843 * happen after this call completes. If the link state changes in the
2844 * future, the system will continue to try to get back to the desired link
2845 * state; for instance, if the link is brought up successfully, and then
2846 * the network cable is disconnected, the link will go down. However, the
2847 * desired state of the link is still up, so if the cable is reconnected,
2848 * the link will be brought up again.
2849 *
2850 * With @ref netio_get(), this will indicate the desired state for the
2851 * link, as set with a previous netio_set() call, or implicitly by a
2852 * netio_input_register() or netio_input_unregister() operation. This may
2853 * not reflect the current state of the link; to get that, use
2854 * ::NETIO_PARAM_LINK_CURRENT_STATE. */
2855#define NETIO_PARAM_LINK_DESIRED_STATE 9
2856
2857/** NetIO statistics structure. Retrieved using the ::NETIO_PARAM_STAT
2858 * address passed to @ref netio_get(). */
2859typedef struct
2860{
2861 /** Number of packets which have been received by the IPP and forwarded
2862 * to a tile's receive queue for processing. This value wraps at its
2863 * maximum, and is not cleared upon read. */
2864 uint32_t packets_received;
2865
2866 /** Number of packets which have been dropped by the IPP, because they could
2867 * not be received, or could not be forwarded to a tile. The former happens
2868 * when the IPP does not have a free packet buffer of suitable size for an
2869 * incoming frame. The latter happens when all potential destination tiles
2870 * for a packet, as defined by the group, bucket, and queue configuration,
2871 * have full receive queues. This value wraps at its maximum, and is not
2872 * cleared upon read. */
2873 uint32_t packets_dropped;
2874
2875 /*
2876 * Note: the #defines after each of the following four one-byte values
2877 * denote their location within the third word of the netio_stat_t. They
2878 * are intended for use only by the IPP implementation and are thus omitted
2879 * from the Doxygen output.
2880 */
2881
2882 /** Number of packets dropped because no worker was able to accept a new
2883 * packet. This value saturates at its maximum, and is cleared upon
2884 * read. */
2885 uint8_t drops_no_worker;
2886#ifndef __DOXYGEN__
2887#define NETIO_STAT_DROPS_NO_WORKER 0
2888#endif
2889
2890 /** Number of packets dropped because no small buffers were available.
2891 * This value saturates at its maximum, and is cleared upon read. */
2892 uint8_t drops_no_smallbuf;
2893#ifndef __DOXYGEN__
2894#define NETIO_STAT_DROPS_NO_SMALLBUF 1
2895#endif
2896
2897 /** Number of packets dropped because no large buffers were available.
2898 * This value saturates at its maximum, and is cleared upon read. */
2899 uint8_t drops_no_largebuf;
2900#ifndef __DOXYGEN__
2901#define NETIO_STAT_DROPS_NO_LARGEBUF 2
2902#endif
2903
2904 /** Number of packets dropped because no jumbo buffers were available.
2905 * This value saturates at its maximum, and is cleared upon read. */
2906 uint8_t drops_no_jumbobuf;
2907#ifndef __DOXYGEN__
2908#define NETIO_STAT_DROPS_NO_JUMBOBUF 3
2909#endif
2910}
2911netio_stat_t;
2912
2913
2914/** Link can run, should run, or is running at 10 Mbps. */
2915#define NETIO_LINK_10M 0x01
2916
2917/** Link can run, should run, or is running at 100 Mbps. */
2918#define NETIO_LINK_100M 0x02
2919
2920/** Link can run, should run, or is running at 1 Gbps. */
2921#define NETIO_LINK_1G 0x04
2922
2923/** Link can run, should run, or is running at 10 Gbps. */
2924#define NETIO_LINK_10G 0x08
2925
2926/** Link should run at the highest speed supported by the link and by
2927 * the device connected to the link. Only usable as a value for
2928 * the link's desired state; never returned as a value for the current
2929 * or possible states. */
2930#define NETIO_LINK_ANYSPEED 0x10
2931
2932/** All legal link speeds. */
2933#define NETIO_LINK_SPEED (NETIO_LINK_10M | \
2934 NETIO_LINK_100M | \
2935 NETIO_LINK_1G | \
2936 NETIO_LINK_10G | \
2937 NETIO_LINK_ANYSPEED)
2938
2939
2940/** MAC register class. Addr is a register offset within the MAC.
2941 * Registers within the XGbE and GbE MACs are documented in the Tile
2942 * Processor I/O Device Guide (UG104). MAC registers start at address
2943 * 0x4000, and do not include the MAC_INTERFACE registers. */
2944#define NETIO_MAC 1
2945
2946/** MDIO register class (IEEE 802.3 clause 22 format). Addr is the "addr"
2947 * member of a netio_mdio_addr_t structure. */
2948#define NETIO_MDIO 2
2949
2950/** MDIO register class (IEEE 802.3 clause 45 format). Addr is the "addr"
2951 * member of a netio_mdio_addr_t structure. */
2952#define NETIO_MDIO_CLAUSE45 3
2953
2954/** NetIO MDIO address type. Retrieved or provided using the ::NETIO_MDIO
2955 * address passed to @ref netio_get() or @ref netio_set(). */
2956typedef union
2957{
2958 struct
2959 {
2960 unsigned int reg:16; /**< MDIO register offset. For clause 22 access,
2961 must be less than 32. */
2962 unsigned int phy:5; /**< Which MDIO PHY to access. */
2963 unsigned int dev:5; /**< Which MDIO device to access within that PHY.
2964 Applicable for clause 45 access only; ignored
2965 for clause 22 access. */
2966 }
2967 bits; /**< Container for bitfields. */
2968 uint64_t addr; /**< Value to pass to @ref netio_get() or
2969 * @ref netio_set(). */
2970}
2971netio_mdio_addr_t;
2972
2973/** @} */
2974
2975#endif /* __NETIO_INTF_H__ */
diff --git a/arch/tile/kernel/Makefile b/arch/tile/kernel/Makefile
index 112b1e248f05..b4c8e8ec45dc 100644
--- a/arch/tile/kernel/Makefile
+++ b/arch/tile/kernel/Makefile
@@ -15,3 +15,4 @@ obj-$(CONFIG_SMP) += smpboot.o smp.o tlb.o
15obj-$(CONFIG_MODULES) += module.o 15obj-$(CONFIG_MODULES) += module.o
16obj-$(CONFIG_EARLY_PRINTK) += early_printk.o 16obj-$(CONFIG_EARLY_PRINTK) += early_printk.o
17obj-$(CONFIG_KEXEC) += machine_kexec.o relocate_kernel.o 17obj-$(CONFIG_KEXEC) += machine_kexec.o relocate_kernel.o
18obj-$(CONFIG_PCI) += pci.o
diff --git a/arch/tile/kernel/pci.c b/arch/tile/kernel/pci.c
new file mode 100644
index 000000000000..a1ee25be9ad9
--- /dev/null
+++ b/arch/tile/kernel/pci.c
@@ -0,0 +1,621 @@
1/*
2 * Copyright 2010 Tilera Corporation. All Rights Reserved.
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License
6 * as published by the Free Software Foundation, version 2.
7 *
8 * This program is distributed in the hope that it will be useful, but
9 * WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
11 * NON INFRINGEMENT. See the GNU General Public License for
12 * more details.
13 */
14
15#include <linux/kernel.h>
16#include <linux/pci.h>
17#include <linux/delay.h>
18#include <linux/string.h>
19#include <linux/init.h>
20#include <linux/capability.h>
21#include <linux/sched.h>
22#include <linux/errno.h>
23#include <linux/bootmem.h>
24#include <linux/irq.h>
25#include <linux/io.h>
26#include <linux/uaccess.h>
27
28#include <asm/processor.h>
29#include <asm/sections.h>
30#include <asm/byteorder.h>
31#include <asm/hv_driver.h>
32#include <hv/drv_pcie_rc_intf.h>
33
34
35/*
36 * Initialization flow and process
37 * -------------------------------
38 *
39 * This files containes the routines to search for PCI buses,
40 * enumerate the buses, and configure any attached devices.
41 *
42 * There are two entry points here:
43 * 1) tile_pci_init
44 * This sets up the pci_controller structs, and opens the
45 * FDs to the hypervisor. This is called from setup_arch() early
46 * in the boot process.
47 * 2) pcibios_init
48 * This probes the PCI bus(es) for any attached hardware. It's
49 * called by subsys_initcall. All of the real work is done by the
50 * generic Linux PCI layer.
51 *
52 */
53
54/*
55 * This flag tells if the platform is TILEmpower that needs
56 * special configuration for the PLX switch chip.
57 */
58int __write_once tile_plx_gen1;
59
60static struct pci_controller controllers[TILE_NUM_PCIE];
61static int num_controllers;
62
63static struct pci_ops tile_cfg_ops;
64
65
66/*
67 * We don't need to worry about the alignment of resources.
68 */
69resource_size_t pcibios_align_resource(void *data, const struct resource *res,
70 resource_size_t size, resource_size_t align)
71{
72 return res->start;
73}
74EXPORT_SYMBOL(pcibios_align_resource);
75
76/*
77 * Open a FD to the hypervisor PCI device.
78 *
79 * controller_id is the controller number, config type is 0 or 1 for
80 * config0 or config1 operations.
81 */
82static int __init tile_pcie_open(int controller_id, int config_type)
83{
84 char filename[32];
85 int fd;
86
87 sprintf(filename, "pcie/%d/config%d", controller_id, config_type);
88
89 fd = hv_dev_open((HV_VirtAddr)filename, 0);
90
91 return fd;
92}
93
94
95/*
96 * Get the IRQ numbers from the HV and set up the handlers for them.
97 */
98static int __init tile_init_irqs(int controller_id,
99 struct pci_controller *controller)
100{
101 char filename[32];
102 int fd;
103 int ret;
104 int x;
105 struct pcie_rc_config rc_config;
106
107 sprintf(filename, "pcie/%d/ctl", controller_id);
108 fd = hv_dev_open((HV_VirtAddr)filename, 0);
109 if (fd < 0) {
110 pr_err("PCI: hv_dev_open(%s) failed\n", filename);
111 return -1;
112 }
113 ret = hv_dev_pread(fd, 0, (HV_VirtAddr)(&rc_config),
114 sizeof(rc_config), PCIE_RC_CONFIG_MASK_OFF);
115 hv_dev_close(fd);
116 if (ret != sizeof(rc_config)) {
117 pr_err("PCI: wanted %zd bytes, got %d\n",
118 sizeof(rc_config), ret);
119 return -1;
120 }
121 /* Record irq_base so that we can map INTx to IRQ # later. */
122 controller->irq_base = rc_config.intr;
123
124 for (x = 0; x < 4; x++)
125 tile_irq_activate(rc_config.intr + x,
126 TILE_IRQ_HW_CLEAR);
127
128 if (rc_config.plx_gen1)
129 controller->plx_gen1 = 1;
130
131 return 0;
132}
133
134/*
135 * First initialization entry point, called from setup_arch().
136 *
137 * Find valid controllers and fill in pci_controller structs for each
138 * of them.
139 *
140 * Returns the number of controllers discovered.
141 */
142int __init tile_pci_init(void)
143{
144 int i;
145
146 pr_info("PCI: Searching for controllers...\n");
147
148 /* Do any configuration we need before using the PCIe */
149
150 for (i = 0; i < TILE_NUM_PCIE; i++) {
151 int hv_cfg_fd0 = -1;
152 int hv_cfg_fd1 = -1;
153 int hv_mem_fd = -1;
154 char name[32];
155 struct pci_controller *controller;
156
157 /*
158 * Open the fd to the HV. If it fails then this
159 * device doesn't exist.
160 */
161 hv_cfg_fd0 = tile_pcie_open(i, 0);
162 if (hv_cfg_fd0 < 0)
163 continue;
164 hv_cfg_fd1 = tile_pcie_open(i, 1);
165 if (hv_cfg_fd1 < 0) {
166 pr_err("PCI: Couldn't open config fd to HV "
167 "for controller %d\n", i);
168 goto err_cont;
169 }
170
171 sprintf(name, "pcie/%d/mem", i);
172 hv_mem_fd = hv_dev_open((HV_VirtAddr)name, 0);
173 if (hv_mem_fd < 0) {
174 pr_err("PCI: Could not open mem fd to HV!\n");
175 goto err_cont;
176 }
177
178 pr_info("PCI: Found PCI controller #%d\n", i);
179
180 controller = &controllers[num_controllers];
181
182 if (tile_init_irqs(i, controller)) {
183 pr_err("PCI: Could not initialize "
184 "IRQs, aborting.\n");
185 goto err_cont;
186 }
187
188 controller->index = num_controllers;
189 controller->hv_cfg_fd[0] = hv_cfg_fd0;
190 controller->hv_cfg_fd[1] = hv_cfg_fd1;
191 controller->hv_mem_fd = hv_mem_fd;
192 controller->first_busno = 0;
193 controller->last_busno = 0xff;
194 controller->ops = &tile_cfg_ops;
195
196 num_controllers++;
197 continue;
198
199err_cont:
200 if (hv_cfg_fd0 >= 0)
201 hv_dev_close(hv_cfg_fd0);
202 if (hv_cfg_fd1 >= 0)
203 hv_dev_close(hv_cfg_fd1);
204 if (hv_mem_fd >= 0)
205 hv_dev_close(hv_mem_fd);
206 continue;
207 }
208
209 /*
210 * Before using the PCIe, see if we need to do any platform-specific
211 * configuration, such as the PLX switch Gen 1 issue on TILEmpower.
212 */
213 for (i = 0; i < num_controllers; i++) {
214 struct pci_controller *controller = &controllers[i];
215
216 if (controller->plx_gen1)
217 tile_plx_gen1 = 1;
218 }
219
220 return num_controllers;
221}
222
223/*
224 * (pin - 1) converts from the PCI standard's [1:4] convention to
225 * a normal [0:3] range.
226 */
227static int tile_map_irq(struct pci_dev *dev, u8 slot, u8 pin)
228{
229 struct pci_controller *controller =
230 (struct pci_controller *)dev->sysdata;
231 return (pin - 1) + controller->irq_base;
232}
233
234
235static void __init fixup_read_and_payload_sizes(void)
236{
237 struct pci_dev *dev = NULL;
238 int smallest_max_payload = 0x1; /* Tile maxes out at 256 bytes. */
239 int max_read_size = 0x2; /* Limit to 512 byte reads. */
240 u16 new_values;
241
242 /* Scan for the smallest maximum payload size. */
243 while ((dev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, dev)) != NULL) {
244 int pcie_caps_offset;
245 u32 devcap;
246 int max_payload;
247
248 pcie_caps_offset = pci_find_capability(dev, PCI_CAP_ID_EXP);
249 if (pcie_caps_offset == 0)
250 continue;
251
252 pci_read_config_dword(dev, pcie_caps_offset + PCI_EXP_DEVCAP,
253 &devcap);
254 max_payload = devcap & PCI_EXP_DEVCAP_PAYLOAD;
255 if (max_payload < smallest_max_payload)
256 smallest_max_payload = max_payload;
257 }
258
259 /* Now, set the max_payload_size for all devices to that value. */
260 new_values = (max_read_size << 12) | (smallest_max_payload << 5);
261 while ((dev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, dev)) != NULL) {
262 int pcie_caps_offset;
263 u16 devctl;
264
265 pcie_caps_offset = pci_find_capability(dev, PCI_CAP_ID_EXP);
266 if (pcie_caps_offset == 0)
267 continue;
268
269 pci_read_config_word(dev, pcie_caps_offset + PCI_EXP_DEVCTL,
270 &devctl);
271 devctl &= ~(PCI_EXP_DEVCTL_PAYLOAD | PCI_EXP_DEVCTL_READRQ);
272 devctl |= new_values;
273 pci_write_config_word(dev, pcie_caps_offset + PCI_EXP_DEVCTL,
274 devctl);
275 }
276}
277
278
279/*
280 * Second PCI initialization entry point, called by subsys_initcall.
281 *
282 * The controllers have been set up by the time we get here, by a call to
283 * tile_pci_init.
284 */
285static int __init pcibios_init(void)
286{
287 int i;
288
289 pr_info("PCI: Probing PCI hardware\n");
290
291 /*
292 * Delay a bit in case devices aren't ready. Some devices are
293 * known to require at least 20ms here, but we use a more
294 * conservative value.
295 */
296 mdelay(250);
297
298 /* Scan all of the recorded PCI controllers. */
299 for (i = 0; i < num_controllers; i++) {
300 struct pci_controller *controller = &controllers[i];
301 struct pci_bus *bus;
302
303 pr_info("PCI: initializing controller #%d\n", i);
304
305 /*
306 * This comes from the generic Linux PCI driver.
307 *
308 * It reads the PCI tree for this bus into the Linux
309 * data structures.
310 *
311 * This is inlined in linux/pci.h and calls into
312 * pci_scan_bus_parented() in probe.c.
313 */
314 bus = pci_scan_bus(0, controller->ops, controller);
315 controller->root_bus = bus;
316 controller->last_busno = bus->subordinate;
317
318 }
319
320 /* Do machine dependent PCI interrupt routing */
321 pci_fixup_irqs(pci_common_swizzle, tile_map_irq);
322
323 /*
324 * This comes from the generic Linux PCI driver.
325 *
326 * It allocates all of the resources (I/O memory, etc)
327 * associated with the devices read in above.
328 */
329
330 pci_assign_unassigned_resources();
331
332 /* Configure the max_read_size and max_payload_size values. */
333 fixup_read_and_payload_sizes();
334
335 /* Record the I/O resources in the PCI controller structure. */
336 for (i = 0; i < num_controllers; i++) {
337 struct pci_bus *root_bus = controllers[i].root_bus;
338 struct pci_bus *next_bus;
339 struct pci_dev *dev;
340
341 list_for_each_entry(dev, &root_bus->devices, bus_list) {
342 /* Find the PCI host controller, ie. the 1st bridge. */
343 if ((dev->class >> 8) == PCI_CLASS_BRIDGE_PCI &&
344 (PCI_SLOT(dev->devfn) == 0)) {
345 next_bus = dev->subordinate;
346 controllers[i].mem_resources[0] =
347 *next_bus->resource[0];
348 controllers[i].mem_resources[1] =
349 *next_bus->resource[1];
350 controllers[i].mem_resources[2] =
351 *next_bus->resource[2];
352
353 break;
354 }
355 }
356
357 }
358
359 return 0;
360}
361subsys_initcall(pcibios_init);
362
363/*
364 * No bus fixups needed.
365 */
366void __devinit pcibios_fixup_bus(struct pci_bus *bus)
367{
368 /* Nothing needs to be done. */
369}
370
371/*
372 * This can be called from the generic PCI layer, but doesn't need to
373 * do anything.
374 */
375char __devinit *pcibios_setup(char *str)
376{
377 /* Nothing needs to be done. */
378 return str;
379}
380
381/*
382 * This is called from the generic Linux layer.
383 */
384void __init pcibios_update_irq(struct pci_dev *dev, int irq)
385{
386 pci_write_config_byte(dev, PCI_INTERRUPT_LINE, irq);
387}
388
389/*
390 * Enable memory and/or address decoding, as appropriate, for the
391 * device described by the 'dev' struct.
392 *
393 * This is called from the generic PCI layer, and can be called
394 * for bridges or endpoints.
395 */
396int pcibios_enable_device(struct pci_dev *dev, int mask)
397{
398 u16 cmd, old_cmd;
399 u8 header_type;
400 int i;
401 struct resource *r;
402
403 pci_read_config_byte(dev, PCI_HEADER_TYPE, &header_type);
404
405 pci_read_config_word(dev, PCI_COMMAND, &cmd);
406 old_cmd = cmd;
407 if ((header_type & 0x7F) == PCI_HEADER_TYPE_BRIDGE) {
408 /*
409 * For bridges, we enable both memory and I/O decoding
410 * in call cases.
411 */
412 cmd |= PCI_COMMAND_IO;
413 cmd |= PCI_COMMAND_MEMORY;
414 } else {
415 /*
416 * For endpoints, we enable memory and/or I/O decoding
417 * only if they have a memory resource of that type.
418 */
419 for (i = 0; i < 6; i++) {
420 r = &dev->resource[i];
421 if (r->flags & IORESOURCE_UNSET) {
422 pr_err("PCI: Device %s not available "
423 "because of resource collisions\n",
424 pci_name(dev));
425 return -EINVAL;
426 }
427 if (r->flags & IORESOURCE_IO)
428 cmd |= PCI_COMMAND_IO;
429 if (r->flags & IORESOURCE_MEM)
430 cmd |= PCI_COMMAND_MEMORY;
431 }
432 }
433
434 /*
435 * We only write the command if it changed.
436 */
437 if (cmd != old_cmd)
438 pci_write_config_word(dev, PCI_COMMAND, cmd);
439 return 0;
440}
441
442void __iomem *pci_iomap(struct pci_dev *dev, int bar, unsigned long max)
443{
444 unsigned long start = pci_resource_start(dev, bar);
445 unsigned long len = pci_resource_len(dev, bar);
446 unsigned long flags = pci_resource_flags(dev, bar);
447
448 if (!len)
449 return NULL;
450 if (max && len > max)
451 len = max;
452
453 if (!(flags & IORESOURCE_MEM)) {
454 pr_info("PCI: Trying to map invalid resource %#lx\n", flags);
455 start = 0;
456 }
457
458 return (void __iomem *)start;
459}
460EXPORT_SYMBOL(pci_iomap);
461
462
463/****************************************************************
464 *
465 * Tile PCI config space read/write routines
466 *
467 ****************************************************************/
468
469/*
470 * These are the normal read and write ops
471 * These are expanded with macros from pci_bus_read_config_byte() etc.
472 *
473 * devfn is the combined PCI slot & function.
474 *
475 * offset is in bytes, from the start of config space for the
476 * specified bus & slot.
477 */
478
479static int __devinit tile_cfg_read(struct pci_bus *bus,
480 unsigned int devfn,
481 int offset,
482 int size,
483 u32 *val)
484{
485 struct pci_controller *controller = bus->sysdata;
486 int busnum = bus->number & 0xff;
487 int slot = (devfn >> 3) & 0x1f;
488 int function = devfn & 0x7;
489 u32 addr;
490 int config_mode = 1;
491
492 /*
493 * There is no bridge between the Tile and bus 0, so we
494 * use config0 to talk to bus 0.
495 *
496 * If we're talking to a bus other than zero then we
497 * must have found a bridge.
498 */
499 if (busnum == 0) {
500 /*
501 * We fake an empty slot for (busnum == 0) && (slot > 0),
502 * since there is only one slot on bus 0.
503 */
504 if (slot) {
505 *val = 0xFFFFFFFF;
506 return 0;
507 }
508 config_mode = 0;
509 }
510
511 addr = busnum << 20; /* Bus in 27:20 */
512 addr |= slot << 15; /* Slot (device) in 19:15 */
513 addr |= function << 12; /* Function is in 14:12 */
514 addr |= (offset & 0xFFF); /* byte address in 0:11 */
515
516 return hv_dev_pread(controller->hv_cfg_fd[config_mode], 0,
517 (HV_VirtAddr)(val), size, addr);
518}
519
520
521/*
522 * See tile_cfg_read() for relevent comments.
523 * Note that "val" is the value to write, not a pointer to that value.
524 */
525static int __devinit tile_cfg_write(struct pci_bus *bus,
526 unsigned int devfn,
527 int offset,
528 int size,
529 u32 val)
530{
531 struct pci_controller *controller = bus->sysdata;
532 int busnum = bus->number & 0xff;
533 int slot = (devfn >> 3) & 0x1f;
534 int function = devfn & 0x7;
535 u32 addr;
536 int config_mode = 1;
537 HV_VirtAddr valp = (HV_VirtAddr)&val;
538
539 /*
540 * For bus 0 slot 0 we use config 0 accesses.
541 */
542 if (busnum == 0) {
543 /*
544 * We fake an empty slot for (busnum == 0) && (slot > 0),
545 * since there is only one slot on bus 0.
546 */
547 if (slot)
548 return 0;
549 config_mode = 0;
550 }
551
552 addr = busnum << 20; /* Bus in 27:20 */
553 addr |= slot << 15; /* Slot (device) in 19:15 */
554 addr |= function << 12; /* Function is in 14:12 */
555 addr |= (offset & 0xFFF); /* byte address in 0:11 */
556
557#ifdef __BIG_ENDIAN
558 /* Point to the correct part of the 32-bit "val". */
559 valp += 4 - size;
560#endif
561
562 return hv_dev_pwrite(controller->hv_cfg_fd[config_mode], 0,
563 valp, size, addr);
564}
565
566
567static struct pci_ops tile_cfg_ops = {
568 .read = tile_cfg_read,
569 .write = tile_cfg_write,
570};
571
572
573/*
574 * In the following, each PCI controller's mem_resources[1]
575 * represents its (non-prefetchable) PCI memory resource.
576 * mem_resources[0] and mem_resources[2] refer to its PCI I/O and
577 * prefetchable PCI memory resources, respectively.
578 * For more details, see pci_setup_bridge() in setup-bus.c.
579 * By comparing the target PCI memory address against the
580 * end address of controller 0, we can determine the controller
581 * that should accept the PCI memory access.
582 */
583#define TILE_READ(size, type) \
584type _tile_read##size(unsigned long addr) \
585{ \
586 type val; \
587 int idx = 0; \
588 if (addr > controllers[0].mem_resources[1].end && \
589 addr > controllers[0].mem_resources[2].end) \
590 idx = 1; \
591 if (hv_dev_pread(controllers[idx].hv_mem_fd, 0, \
592 (HV_VirtAddr)(&val), sizeof(type), addr)) \
593 pr_err("PCI: read %zd bytes at 0x%lX failed\n", \
594 sizeof(type), addr); \
595 return val; \
596} \
597EXPORT_SYMBOL(_tile_read##size)
598
599TILE_READ(b, u8);
600TILE_READ(w, u16);
601TILE_READ(l, u32);
602TILE_READ(q, u64);
603
604#define TILE_WRITE(size, type) \
605void _tile_write##size(type val, unsigned long addr) \
606{ \
607 int idx = 0; \
608 if (addr > controllers[0].mem_resources[1].end && \
609 addr > controllers[0].mem_resources[2].end) \
610 idx = 1; \
611 if (hv_dev_pwrite(controllers[idx].hv_mem_fd, 0, \
612 (HV_VirtAddr)(&val), sizeof(type), addr)) \
613 pr_err("PCI: write %zd bytes at 0x%lX failed\n", \
614 sizeof(type), addr); \
615} \
616EXPORT_SYMBOL(_tile_write##size)
617
618TILE_WRITE(b, u8);
619TILE_WRITE(w, u16);
620TILE_WRITE(l, u32);
621TILE_WRITE(q, u64);
diff --git a/drivers/net/Kconfig b/drivers/net/Kconfig
index f6668cdaac85..43db398437b7 100644
--- a/drivers/net/Kconfig
+++ b/drivers/net/Kconfig
@@ -2945,6 +2945,18 @@ source "drivers/s390/net/Kconfig"
2945 2945
2946source "drivers/net/caif/Kconfig" 2946source "drivers/net/caif/Kconfig"
2947 2947
2948config TILE_NET
2949 tristate "Tilera GBE/XGBE network driver support"
2950 depends on TILE
2951 default y
2952 select CRC32
2953 help
2954 This is a standard Linux network device driver for the
2955 on-chip Tilera Gigabit Ethernet and XAUI interfaces.
2956
2957 To compile this driver as a module, choose M here: the module
2958 will be called tile_net.
2959
2948config XEN_NETDEV_FRONTEND 2960config XEN_NETDEV_FRONTEND
2949 tristate "Xen network device frontend driver" 2961 tristate "Xen network device frontend driver"
2950 depends on XEN 2962 depends on XEN
diff --git a/drivers/net/Makefile b/drivers/net/Makefile
index 652fc6b98039..b90738d13994 100644
--- a/drivers/net/Makefile
+++ b/drivers/net/Makefile
@@ -301,3 +301,4 @@ obj-$(CONFIG_CAIF) += caif/
301 301
302obj-$(CONFIG_OCTEON_MGMT_ETHERNET) += octeon/ 302obj-$(CONFIG_OCTEON_MGMT_ETHERNET) += octeon/
303obj-$(CONFIG_PCH_GBE) += pch_gbe/ 303obj-$(CONFIG_PCH_GBE) += pch_gbe/
304obj-$(CONFIG_TILE_NET) += tile/
diff --git a/drivers/net/tile/Makefile b/drivers/net/tile/Makefile
new file mode 100644
index 000000000000..f634f142cab4
--- /dev/null
+++ b/drivers/net/tile/Makefile
@@ -0,0 +1,10 @@
1#
2# Makefile for the TILE on-chip networking support.
3#
4
5obj-$(CONFIG_TILE_NET) += tile_net.o
6ifdef CONFIG_TILEGX
7tile_net-objs := tilegx.o mpipe.o iorpc_mpipe.o dma_queue.o
8else
9tile_net-objs := tilepro.o
10endif
diff --git a/drivers/net/tile/tilepro.c b/drivers/net/tile/tilepro.c
new file mode 100644
index 000000000000..0e6bac5ec65b
--- /dev/null
+++ b/drivers/net/tile/tilepro.c
@@ -0,0 +1,2406 @@
1/*
2 * Copyright 2010 Tilera Corporation. All Rights Reserved.
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License
6 * as published by the Free Software Foundation, version 2.
7 *
8 * This program is distributed in the hope that it will be useful, but
9 * WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
11 * NON INFRINGEMENT. See the GNU General Public License for
12 * more details.
13 */
14
15#include <linux/module.h>
16#include <linux/init.h>
17#include <linux/moduleparam.h>
18#include <linux/sched.h>
19#include <linux/kernel.h> /* printk() */
20#include <linux/slab.h> /* kmalloc() */
21#include <linux/errno.h> /* error codes */
22#include <linux/types.h> /* size_t */
23#include <linux/interrupt.h>
24#include <linux/in.h>
25#include <linux/netdevice.h> /* struct device, and other headers */
26#include <linux/etherdevice.h> /* eth_type_trans */
27#include <linux/skbuff.h>
28#include <linux/ioctl.h>
29#include <linux/cdev.h>
30#include <linux/hugetlb.h>
31#include <linux/in6.h>
32#include <linux/timer.h>
33#include <linux/io.h>
34#include <asm/checksum.h>
35#include <asm/homecache.h>
36
37#include <hv/drv_xgbe_intf.h>
38#include <hv/drv_xgbe_impl.h>
39#include <hv/hypervisor.h>
40#include <hv/netio_intf.h>
41
42/* For TSO */
43#include <linux/ip.h>
44#include <linux/tcp.h>
45
46
47/* There is no singlethread_cpu, so schedule work on the current cpu. */
48#define singlethread_cpu -1
49
50
51/*
52 * First, "tile_net_init_module()" initializes all four "devices" which
53 * can be used by linux.
54 *
55 * Then, "ifconfig DEVICE up" calls "tile_net_open()", which analyzes
56 * the network cpus, then uses "tile_net_open_aux()" to initialize
57 * LIPP/LEPP, and then uses "tile_net_open_inner()" to register all
58 * the tiles, provide buffers to LIPP, allow ingress to start, and
59 * turn on hypervisor interrupt handling (and NAPI) on all tiles.
60 *
61 * If registration fails due to the link being down, then "retry_work"
62 * is used to keep calling "tile_net_open_inner()" until it succeeds.
63 *
64 * If "ifconfig DEVICE down" is called, it uses "tile_net_stop()" to
65 * stop egress, drain the LIPP buffers, unregister all the tiles, stop
66 * LIPP/LEPP, and wipe the LEPP queue.
67 *
68 * We start out with the ingress interrupt enabled on each CPU. When
69 * this interrupt fires, we disable it, and call "napi_schedule()".
70 * This will cause "tile_net_poll()" to be called, which will pull
71 * packets from the netio queue, filtering them out, or passing them
72 * to "netif_receive_skb()". If our budget is exhausted, we will
73 * return, knowing we will be called again later. Otherwise, we
74 * reenable the ingress interrupt, and call "napi_complete()".
75 *
76 *
77 * NOTE: The use of "native_driver" ensures that EPP exists, and that
78 * "epp_sendv" is legal, and that "LIPP" is being used.
79 *
80 * NOTE: Failing to free completions for an arbitrarily long time
81 * (which is defined to be illegal) does in fact cause bizarre
82 * problems. The "egress_timer" helps prevent this from happening.
83 *
84 * NOTE: The egress code can be interrupted by the interrupt handler.
85 */
86
87
88/* HACK: Allow use of "jumbo" packets. */
89/* This should be 1500 if "jumbo" is not set in LIPP. */
90/* This should be at most 10226 (10240 - 14) if "jumbo" is set in LIPP. */
91/* ISSUE: This has not been thoroughly tested (except at 1500). */
92#define TILE_NET_MTU 1500
93
94/* HACK: Define to support GSO. */
95/* ISSUE: This may actually hurt performance of the TCP blaster. */
96/* #define TILE_NET_GSO */
97
98/* Define this to collapse "duplicate" acks. */
99/* #define IGNORE_DUP_ACKS */
100
101/* HACK: Define this to verify incoming packets. */
102/* #define TILE_NET_VERIFY_INGRESS */
103
104/* Use 3000 to enable the Linux Traffic Control (QoS) layer, else 0. */
105#define TILE_NET_TX_QUEUE_LEN 0
106
107/* Define to dump packets (prints out the whole packet on tx and rx). */
108/* #define TILE_NET_DUMP_PACKETS */
109
110/* Define to enable debug spew (all PDEBUG's are enabled). */
111/* #define TILE_NET_DEBUG */
112
113
114/* Define to activate paranoia checks. */
115/* #define TILE_NET_PARANOIA */
116
117/* Default transmit lockup timeout period, in jiffies. */
118#define TILE_NET_TIMEOUT (5 * HZ)
119
120/* Default retry interval for bringing up the NetIO interface, in jiffies. */
121#define TILE_NET_RETRY_INTERVAL (5 * HZ)
122
123/* Number of ports (xgbe0, xgbe1, gbe0, gbe1). */
124#define TILE_NET_DEVS 4
125
126
127
128/* Paranoia. */
129#if NET_IP_ALIGN != LIPP_PACKET_PADDING
130#error "NET_IP_ALIGN must match LIPP_PACKET_PADDING."
131#endif
132
133
134/* Debug print. */
135#ifdef TILE_NET_DEBUG
136#define PDEBUG(fmt, args...) net_printk(fmt, ## args)
137#else
138#define PDEBUG(fmt, args...)
139#endif
140
141
142MODULE_AUTHOR("Tilera");
143MODULE_LICENSE("GPL");
144
145
146#define IS_MULTICAST(mac_addr) \
147 (((u8 *)(mac_addr))[0] & 0x01)
148
149#define IS_BROADCAST(mac_addr) \
150 (((u16 *)(mac_addr))[0] == 0xffff)
151
152
153/*
154 * Queue of incoming packets for a specific cpu and device.
155 *
156 * Includes a pointer to the "system" data, and the actual "user" data.
157 */
158struct tile_netio_queue {
159 netio_queue_impl_t *__system_part;
160 netio_queue_user_impl_t __user_part;
161
162};
163
164
165/*
166 * Statistics counters for a specific cpu and device.
167 */
168struct tile_net_stats_t {
169 u32 rx_packets;
170 u32 rx_bytes;
171 u32 tx_packets;
172 u32 tx_bytes;
173};
174
175
176/*
177 * Info for a specific cpu and device.
178 *
179 * ISSUE: There is a "dev" pointer in "napi" as well.
180 */
181struct tile_net_cpu {
182 /* The NAPI struct. */
183 struct napi_struct napi;
184 /* Packet queue. */
185 struct tile_netio_queue queue;
186 /* Statistics. */
187 struct tile_net_stats_t stats;
188 /* ISSUE: Is this needed? */
189 bool napi_enabled;
190 /* True if this tile has succcessfully registered with the IPP. */
191 bool registered;
192 /* True if the link was down last time we tried to register. */
193 bool link_down;
194 /* True if "egress_timer" is scheduled. */
195 bool egress_timer_scheduled;
196 /* Number of small sk_buffs which must still be provided. */
197 unsigned int num_needed_small_buffers;
198 /* Number of large sk_buffs which must still be provided. */
199 unsigned int num_needed_large_buffers;
200 /* A timer for handling egress completions. */
201 struct timer_list egress_timer;
202};
203
204
205/*
206 * Info for a specific device.
207 */
208struct tile_net_priv {
209 /* Our network device. */
210 struct net_device *dev;
211 /* The actual egress queue. */
212 lepp_queue_t *epp_queue;
213 /* Protects "epp_queue->cmd_tail" and "epp_queue->comp_tail" */
214 spinlock_t cmd_lock;
215 /* Protects "epp_queue->comp_head". */
216 spinlock_t comp_lock;
217 /* The hypervisor handle for this interface. */
218 int hv_devhdl;
219 /* The intr bit mask that IDs this device. */
220 u32 intr_id;
221 /* True iff "tile_net_open_aux()" has succeeded. */
222 int partly_opened;
223 /* True iff "tile_net_open_inner()" has succeeded. */
224 int fully_opened;
225 /* Effective network cpus. */
226 struct cpumask network_cpus_map;
227 /* Number of network cpus. */
228 int network_cpus_count;
229 /* Credits per network cpu. */
230 int network_cpus_credits;
231 /* Network stats. */
232 struct net_device_stats stats;
233 /* For NetIO bringup retries. */
234 struct delayed_work retry_work;
235 /* Quick access to per cpu data. */
236 struct tile_net_cpu *cpu[NR_CPUS];
237};
238
239
240/*
241 * The actual devices (xgbe0, xgbe1, gbe0, gbe1).
242 */
243static struct net_device *tile_net_devs[TILE_NET_DEVS];
244
245/*
246 * The "tile_net_cpu" structures for each device.
247 */
248static DEFINE_PER_CPU(struct tile_net_cpu, hv_xgbe0);
249static DEFINE_PER_CPU(struct tile_net_cpu, hv_xgbe1);
250static DEFINE_PER_CPU(struct tile_net_cpu, hv_gbe0);
251static DEFINE_PER_CPU(struct tile_net_cpu, hv_gbe1);
252
253
254/*
255 * True if "network_cpus" was specified.
256 */
257static bool network_cpus_used;
258
259/*
260 * The actual cpus in "network_cpus".
261 */
262static struct cpumask network_cpus_map;
263
264
265
266#ifdef TILE_NET_DEBUG
267/*
268 * printk with extra stuff.
269 *
270 * We print the CPU we're running in brackets.
271 */
272static void net_printk(char *fmt, ...)
273{
274 int i;
275 int len;
276 va_list args;
277 static char buf[256];
278
279 len = sprintf(buf, "tile_net[%2.2d]: ", smp_processor_id());
280 va_start(args, fmt);
281 i = vscnprintf(buf + len, sizeof(buf) - len - 1, fmt, args);
282 va_end(args);
283 buf[255] = '\0';
284 pr_notice(buf);
285}
286#endif
287
288
289#ifdef TILE_NET_DUMP_PACKETS
290/*
291 * Dump a packet.
292 */
293static void dump_packet(unsigned char *data, unsigned long length, char *s)
294{
295 unsigned long i;
296 static unsigned int count;
297
298 pr_info("dump_packet(data %p, length 0x%lx s %s count 0x%x)\n",
299 data, length, s, count++);
300
301 pr_info("\n");
302
303 for (i = 0; i < length; i++) {
304 if ((i & 0xf) == 0)
305 sprintf(buf, "%8.8lx:", i);
306 sprintf(buf + strlen(buf), " %2.2x", data[i]);
307 if ((i & 0xf) == 0xf || i == length - 1)
308 pr_info("%s\n", buf);
309 }
310}
311#endif
312
313
314/*
315 * Provide support for the __netio_fastio1() swint
316 * (see <hv/drv_xgbe_intf.h> for how it is used).
317 *
318 * The fastio swint2 call may clobber all the caller-saved registers.
319 * It rarely clobbers memory, but we allow for the possibility in
320 * the signature just to be on the safe side.
321 *
322 * Also, gcc doesn't seem to allow an input operand to be
323 * clobbered, so we fake it with dummy outputs.
324 *
325 * This function can't be static because of the way it is declared
326 * in the netio header.
327 */
328inline int __netio_fastio1(u32 fastio_index, u32 arg0)
329{
330 long result, clobber_r1, clobber_r10;
331 asm volatile("swint2"
332 : "=R00" (result),
333 "=R01" (clobber_r1), "=R10" (clobber_r10)
334 : "R10" (fastio_index), "R01" (arg0)
335 : "memory", "r2", "r3", "r4",
336 "r5", "r6", "r7", "r8", "r9",
337 "r11", "r12", "r13", "r14",
338 "r15", "r16", "r17", "r18", "r19",
339 "r20", "r21", "r22", "r23", "r24",
340 "r25", "r26", "r27", "r28", "r29");
341 return result;
342}
343
344
345/*
346 * Provide a linux buffer to LIPP.
347 */
348static void tile_net_provide_linux_buffer(struct tile_net_cpu *info,
349 void *va, bool small)
350{
351 struct tile_netio_queue *queue = &info->queue;
352
353 /* Convert "va" and "small" to "linux_buffer_t". */
354 unsigned int buffer = ((unsigned int)(__pa(va) >> 7) << 1) + small;
355
356 __netio_fastio_free_buffer(queue->__user_part.__fastio_index, buffer);
357}
358
359
360/*
361 * Provide a linux buffer for LIPP.
362 */
363static bool tile_net_provide_needed_buffer(struct tile_net_cpu *info,
364 bool small)
365{
366 /* ISSUE: What should we use here? */
367 unsigned int large_size = NET_IP_ALIGN + TILE_NET_MTU + 100;
368
369 /* Round up to ensure to avoid "false sharing" with last cache line. */
370 unsigned int buffer_size =
371 (((small ? LIPP_SMALL_PACKET_SIZE : large_size) +
372 CHIP_L2_LINE_SIZE() - 1) & -CHIP_L2_LINE_SIZE());
373
374 /*
375 * ISSUE: Since CPAs are 38 bits, and we can only encode the
376 * high 31 bits in a "linux_buffer_t", the low 7 bits must be
377 * zero, and thus, we must align the actual "va" mod 128.
378 */
379 const unsigned long align = 128;
380
381 struct sk_buff *skb;
382 void *va;
383
384 struct sk_buff **skb_ptr;
385
386 /* Note that "dev_alloc_skb()" adds NET_SKB_PAD more bytes, */
387 /* and also "reserves" that many bytes. */
388 /* ISSUE: Can we "share" the NET_SKB_PAD bytes with "skb_ptr"? */
389 int len = sizeof(*skb_ptr) + align + buffer_size;
390
391 while (1) {
392
393 /* Allocate (or fail). */
394 skb = dev_alloc_skb(len);
395 if (skb == NULL)
396 return false;
397
398 /* Make room for a back-pointer to 'skb'. */
399 skb_reserve(skb, sizeof(*skb_ptr));
400
401 /* Make sure we are aligned. */
402 skb_reserve(skb, -(long)skb->data & (align - 1));
403
404 /* This address is given to IPP. */
405 va = skb->data;
406
407 if (small)
408 break;
409
410 /* ISSUE: This has never been observed! */
411 /* Large buffers must not span a huge page. */
412 if (((((long)va & ~HPAGE_MASK) + 1535) & HPAGE_MASK) == 0)
413 break;
414 pr_err("Leaking unaligned linux buffer at %p.\n", va);
415 }
416
417 /* Skip two bytes to satisfy LIPP assumptions. */
418 /* Note that this aligns IP on a 16 byte boundary. */
419 /* ISSUE: Do this when the packet arrives? */
420 skb_reserve(skb, NET_IP_ALIGN);
421
422 /* Save a back-pointer to 'skb'. */
423 skb_ptr = va - sizeof(*skb_ptr);
424 *skb_ptr = skb;
425
426 /* Invalidate the packet buffer. */
427 if (!hash_default)
428 __inv_buffer(skb->data, buffer_size);
429
430 /* Make sure "skb_ptr" has been flushed. */
431 __insn_mf();
432
433#ifdef TILE_NET_PARANOIA
434#if CHIP_HAS_CBOX_HOME_MAP()
435 if (hash_default) {
436 HV_PTE pte = *virt_to_pte(current->mm, (unsigned long)va);
437 if (hv_pte_get_mode(pte) != HV_PTE_MODE_CACHE_HASH_L3)
438 panic("Non-coherent ingress buffer!");
439 }
440#endif
441#endif
442
443 /* Provide the new buffer. */
444 tile_net_provide_linux_buffer(info, va, small);
445
446 return true;
447}
448
449
450/*
451 * Provide linux buffers for LIPP.
452 */
453static void tile_net_provide_needed_buffers(struct tile_net_cpu *info)
454{
455 while (info->num_needed_small_buffers != 0) {
456 if (!tile_net_provide_needed_buffer(info, true))
457 goto oops;
458 info->num_needed_small_buffers--;
459 }
460
461 while (info->num_needed_large_buffers != 0) {
462 if (!tile_net_provide_needed_buffer(info, false))
463 goto oops;
464 info->num_needed_large_buffers--;
465 }
466
467 return;
468
469oops:
470
471 /* Add a description to the page allocation failure dump. */
472 pr_notice("Could not provide a linux buffer to LIPP.\n");
473}
474
475
476/*
477 * Grab some LEPP completions, and store them in "comps", of size
478 * "comps_size", and return the number of completions which were
479 * stored, so the caller can free them.
480 *
481 * If "pending" is not NULL, it will be set to true if there might
482 * still be some pending completions caused by this tile, else false.
483 */
484static unsigned int tile_net_lepp_grab_comps(struct net_device *dev,
485 struct sk_buff *comps[],
486 unsigned int comps_size,
487 bool *pending)
488{
489 struct tile_net_priv *priv = netdev_priv(dev);
490
491 lepp_queue_t *eq = priv->epp_queue;
492
493 unsigned int n = 0;
494
495 unsigned int comp_head;
496 unsigned int comp_busy;
497 unsigned int comp_tail;
498
499 spin_lock(&priv->comp_lock);
500
501 comp_head = eq->comp_head;
502 comp_busy = eq->comp_busy;
503 comp_tail = eq->comp_tail;
504
505 while (comp_head != comp_busy && n < comps_size) {
506 comps[n++] = eq->comps[comp_head];
507 LEPP_QINC(comp_head);
508 }
509
510 if (pending != NULL)
511 *pending = (comp_head != comp_tail);
512
513 eq->comp_head = comp_head;
514
515 spin_unlock(&priv->comp_lock);
516
517 return n;
518}
519
520
521/*
522 * Make sure the egress timer is scheduled.
523 *
524 * Note that we use "schedule if not scheduled" logic instead of the more
525 * obvious "reschedule" logic, because "reschedule" is fairly expensive.
526 */
527static void tile_net_schedule_egress_timer(struct tile_net_cpu *info)
528{
529 if (!info->egress_timer_scheduled) {
530 mod_timer_pinned(&info->egress_timer, jiffies + 1);
531 info->egress_timer_scheduled = true;
532 }
533}
534
535
536/*
537 * The "function" for "info->egress_timer".
538 *
539 * This timer will reschedule itself as long as there are any pending
540 * completions expected (on behalf of any tile).
541 *
542 * ISSUE: Realistically, will the timer ever stop scheduling itself?
543 *
544 * ISSUE: This timer is almost never actually needed, so just use a global
545 * timer that can run on any tile.
546 *
547 * ISSUE: Maybe instead track number of expected completions, and free
548 * only that many, resetting to zero if "pending" is ever false.
549 */
550static void tile_net_handle_egress_timer(unsigned long arg)
551{
552 struct tile_net_cpu *info = (struct tile_net_cpu *)arg;
553 struct net_device *dev = info->napi.dev;
554
555 struct sk_buff *olds[32];
556 unsigned int wanted = 32;
557 unsigned int i, nolds = 0;
558 bool pending;
559
560 /* The timer is no longer scheduled. */
561 info->egress_timer_scheduled = false;
562
563 nolds = tile_net_lepp_grab_comps(dev, olds, wanted, &pending);
564
565 for (i = 0; i < nolds; i++)
566 kfree_skb(olds[i]);
567
568 /* Reschedule timer if needed. */
569 if (pending)
570 tile_net_schedule_egress_timer(info);
571}
572
573
574#ifdef IGNORE_DUP_ACKS
575
576/*
577 * Help detect "duplicate" ACKs. These are sequential packets (for a
578 * given flow) which are exactly 66 bytes long, sharing everything but
579 * ID=2@0x12, Hsum=2@0x18, Ack=4@0x2a, WinSize=2@0x30, Csum=2@0x32,
580 * Tstamps=10@0x38. The ID's are +1, the Hsum's are -1, the Ack's are
581 * +N, and the Tstamps are usually identical.
582 *
583 * NOTE: Apparently truly duplicate acks (with identical "ack" values),
584 * should not be collapsed, as they are used for some kind of flow control.
585 */
586static bool is_dup_ack(char *s1, char *s2, unsigned int len)
587{
588 int i;
589
590 unsigned long long ignorable = 0;
591
592 /* Identification. */
593 ignorable |= (1ULL << 0x12);
594 ignorable |= (1ULL << 0x13);
595
596 /* Header checksum. */
597 ignorable |= (1ULL << 0x18);
598 ignorable |= (1ULL << 0x19);
599
600 /* ACK. */
601 ignorable |= (1ULL << 0x2a);
602 ignorable |= (1ULL << 0x2b);
603 ignorable |= (1ULL << 0x2c);
604 ignorable |= (1ULL << 0x2d);
605
606 /* WinSize. */
607 ignorable |= (1ULL << 0x30);
608 ignorable |= (1ULL << 0x31);
609
610 /* Checksum. */
611 ignorable |= (1ULL << 0x32);
612 ignorable |= (1ULL << 0x33);
613
614 for (i = 0; i < len; i++, ignorable >>= 1) {
615
616 if ((ignorable & 1) || (s1[i] == s2[i]))
617 continue;
618
619#ifdef TILE_NET_DEBUG
620 /* HACK: Mention non-timestamp diffs. */
621 if (i < 0x38 && i != 0x2f &&
622 net_ratelimit())
623 pr_info("Diff at 0x%x\n", i);
624#endif
625
626 return false;
627 }
628
629#ifdef TILE_NET_NO_SUPPRESS_DUP_ACKS
630 /* HACK: Do not suppress truly duplicate ACKs. */
631 /* ISSUE: Is this actually necessary or helpful? */
632 if (s1[0x2a] == s2[0x2a] &&
633 s1[0x2b] == s2[0x2b] &&
634 s1[0x2c] == s2[0x2c] &&
635 s1[0x2d] == s2[0x2d]) {
636 return false;
637 }
638#endif
639
640 return true;
641}
642
643#endif
644
645
646
647/*
648 * Like "tile_net_handle_packets()", but just discard packets.
649 */
650static void tile_net_discard_packets(struct net_device *dev)
651{
652 struct tile_net_priv *priv = netdev_priv(dev);
653 int my_cpu = smp_processor_id();
654 struct tile_net_cpu *info = priv->cpu[my_cpu];
655 struct tile_netio_queue *queue = &info->queue;
656 netio_queue_impl_t *qsp = queue->__system_part;
657 netio_queue_user_impl_t *qup = &queue->__user_part;
658
659 while (qup->__packet_receive_read !=
660 qsp->__packet_receive_queue.__packet_write) {
661
662 int index = qup->__packet_receive_read;
663
664 int index2_aux = index + sizeof(netio_pkt_t);
665 int index2 =
666 ((index2_aux ==
667 qsp->__packet_receive_queue.__last_packet_plus_one) ?
668 0 : index2_aux);
669
670 netio_pkt_t *pkt = (netio_pkt_t *)
671 ((unsigned long) &qsp[1] + index);
672
673 /* Extract the "linux_buffer_t". */
674 unsigned int buffer = pkt->__packet.word;
675
676 /* Convert "linux_buffer_t" to "va". */
677 void *va = __va((phys_addr_t)(buffer >> 1) << 7);
678
679 /* Acquire the associated "skb". */
680 struct sk_buff **skb_ptr = va - sizeof(*skb_ptr);
681 struct sk_buff *skb = *skb_ptr;
682
683 kfree_skb(skb);
684
685 /* Consume this packet. */
686 qup->__packet_receive_read = index2;
687 }
688}
689
690
691/*
692 * Handle the next packet. Return true if "processed", false if "filtered".
693 */
694static bool tile_net_poll_aux(struct tile_net_cpu *info, int index)
695{
696 struct net_device *dev = info->napi.dev;
697
698 struct tile_netio_queue *queue = &info->queue;
699 netio_queue_impl_t *qsp = queue->__system_part;
700 netio_queue_user_impl_t *qup = &queue->__user_part;
701 struct tile_net_stats_t *stats = &info->stats;
702
703 int filter;
704
705 int index2_aux = index + sizeof(netio_pkt_t);
706 int index2 =
707 ((index2_aux ==
708 qsp->__packet_receive_queue.__last_packet_plus_one) ?
709 0 : index2_aux);
710
711 netio_pkt_t *pkt = (netio_pkt_t *)((unsigned long) &qsp[1] + index);
712
713 netio_pkt_metadata_t *metadata = NETIO_PKT_METADATA(pkt);
714
715 /* Extract the packet size. */
716 unsigned long len =
717 (NETIO_PKT_CUSTOM_LENGTH(pkt) +
718 NET_IP_ALIGN - NETIO_PACKET_PADDING);
719
720 /* Extract the "linux_buffer_t". */
721 unsigned int buffer = pkt->__packet.word;
722
723 /* Extract "small" (vs "large"). */
724 bool small = ((buffer & 1) != 0);
725
726 /* Convert "linux_buffer_t" to "va". */
727 void *va = __va((phys_addr_t)(buffer >> 1) << 7);
728
729 /* Extract the packet data pointer. */
730 /* Compare to "NETIO_PKT_CUSTOM_DATA(pkt)". */
731 unsigned char *buf = va + NET_IP_ALIGN;
732
733#ifdef IGNORE_DUP_ACKS
734
735 static int other;
736 static int final;
737 static int keep;
738 static int skip;
739
740#endif
741
742 /* Invalidate the packet buffer. */
743 if (!hash_default)
744 __inv_buffer(buf, len);
745
746 /* ISSUE: Is this needed? */
747 dev->last_rx = jiffies;
748
749#ifdef TILE_NET_DUMP_PACKETS
750 dump_packet(buf, len, "rx");
751#endif /* TILE_NET_DUMP_PACKETS */
752
753#ifdef TILE_NET_VERIFY_INGRESS
754 if (!NETIO_PKT_L4_CSUM_CORRECT_M(metadata, pkt) &&
755 NETIO_PKT_L4_CSUM_CALCULATED_M(metadata, pkt)) {
756 /*
757 * FIXME: This complains about UDP packets
758 * with a "zero" checksum (bug 6624).
759 */
760#ifdef TILE_NET_PANIC_ON_BAD
761 dump_packet(buf, len, "rx");
762 panic("Bad L4 checksum.");
763#else
764 pr_warning("Bad L4 checksum on %d byte packet.\n", len);
765#endif
766 }
767 if (!NETIO_PKT_L3_CSUM_CORRECT_M(metadata, pkt) &&
768 NETIO_PKT_L3_CSUM_CALCULATED_M(metadata, pkt)) {
769 dump_packet(buf, len, "rx");
770 panic("Bad L3 checksum.");
771 }
772 switch (NETIO_PKT_STATUS_M(metadata, pkt)) {
773 case NETIO_PKT_STATUS_OVERSIZE:
774 if (len >= 64) {
775 dump_packet(buf, len, "rx");
776 panic("Unexpected OVERSIZE.");
777 }
778 break;
779 case NETIO_PKT_STATUS_BAD:
780#ifdef TILE_NET_PANIC_ON_BAD
781 dump_packet(buf, len, "rx");
782 panic("Unexpected BAD packet.");
783#else
784 pr_warning("Unexpected BAD %d byte packet.\n", len);
785#endif
786 }
787#endif
788
789 filter = 0;
790
791 if (!(dev->flags & IFF_UP)) {
792 /* Filter packets received before we're up. */
793 filter = 1;
794 } else if (!(dev->flags & IFF_PROMISC)) {
795 /*
796 * FIXME: Implement HW multicast filter.
797 */
798 if (!IS_MULTICAST(buf) && !IS_BROADCAST(buf)) {
799 /* Filter packets not for our address. */
800 const u8 *mine = dev->dev_addr;
801 filter = compare_ether_addr(mine, buf);
802 }
803 }
804
805#ifdef IGNORE_DUP_ACKS
806
807 if (len != 66) {
808 /* FIXME: Must check "is_tcp_ack(buf, len)" somehow. */
809
810 other++;
811
812 } else if (index2 ==
813 qsp->__packet_receive_queue.__packet_write) {
814
815 final++;
816
817 } else {
818
819 netio_pkt_t *pkt2 = (netio_pkt_t *)
820 ((unsigned long) &qsp[1] + index2);
821
822 netio_pkt_metadata_t *metadata2 =
823 NETIO_PKT_METADATA(pkt2);
824
825 /* Extract the packet size. */
826 unsigned long len2 =
827 (NETIO_PKT_CUSTOM_LENGTH(pkt2) +
828 NET_IP_ALIGN - NETIO_PACKET_PADDING);
829
830 if (len2 == 66 &&
831 NETIO_PKT_FLOW_HASH_M(metadata, pkt) ==
832 NETIO_PKT_FLOW_HASH_M(metadata2, pkt2)) {
833
834 /* Extract the "linux_buffer_t". */
835 unsigned int buffer2 = pkt2->__packet.word;
836
837 /* Convert "linux_buffer_t" to "va". */
838 void *va2 =
839 __va((phys_addr_t)(buffer2 >> 1) << 7);
840
841 /* Extract the packet data pointer. */
842 /* Compare to "NETIO_PKT_CUSTOM_DATA(pkt)". */
843 unsigned char *buf2 = va2 + NET_IP_ALIGN;
844
845 /* Invalidate the packet buffer. */
846 if (!hash_default)
847 __inv_buffer(buf2, len2);
848
849 if (is_dup_ack(buf, buf2, len)) {
850 skip++;
851 filter = 1;
852 } else {
853 keep++;
854 }
855 }
856 }
857
858 if (net_ratelimit())
859 pr_info("Other %d Final %d Keep %d Skip %d.\n",
860 other, final, keep, skip);
861
862#endif
863
864 if (filter) {
865
866 /* ISSUE: Update "drop" statistics? */
867
868 tile_net_provide_linux_buffer(info, va, small);
869
870 } else {
871
872 /* Acquire the associated "skb". */
873 struct sk_buff **skb_ptr = va - sizeof(*skb_ptr);
874 struct sk_buff *skb = *skb_ptr;
875
876 /* Paranoia. */
877 if (skb->data != buf)
878 panic("Corrupt linux buffer from LIPP! "
879 "VA=%p, skb=%p, skb->data=%p\n",
880 va, skb, skb->data);
881
882 /* Encode the actual packet length. */
883 skb_put(skb, len);
884
885 /* NOTE: This call also sets "skb->dev = dev". */
886 skb->protocol = eth_type_trans(skb, dev);
887
888 /* ISSUE: Discard corrupt packets? */
889 /* ISSUE: Discard packets with bad checksums? */
890
891 /* Avoid recomputing TCP/UDP checksums. */
892 if (NETIO_PKT_L4_CSUM_CORRECT_M(metadata, pkt))
893 skb->ip_summed = CHECKSUM_UNNECESSARY;
894
895 netif_receive_skb(skb);
896
897 stats->rx_packets++;
898 stats->rx_bytes += len;
899
900 if (small)
901 info->num_needed_small_buffers++;
902 else
903 info->num_needed_large_buffers++;
904 }
905
906 /* Return four credits after every fourth packet. */
907 if (--qup->__receive_credit_remaining == 0) {
908 u32 interval = qup->__receive_credit_interval;
909 qup->__receive_credit_remaining = interval;
910 __netio_fastio_return_credits(qup->__fastio_index, interval);
911 }
912
913 /* Consume this packet. */
914 qup->__packet_receive_read = index2;
915
916 return !filter;
917}
918
919
920/*
921 * Handle some packets for the given device on the current CPU.
922 *
923 * ISSUE: The "rotting packet" race condition occurs if a packet
924 * arrives after the queue appears to be empty, and before the
925 * hypervisor interrupt is re-enabled.
926 */
927static int tile_net_poll(struct napi_struct *napi, int budget)
928{
929 struct net_device *dev = napi->dev;
930 struct tile_net_priv *priv = netdev_priv(dev);
931 int my_cpu = smp_processor_id();
932 struct tile_net_cpu *info = priv->cpu[my_cpu];
933 struct tile_netio_queue *queue = &info->queue;
934 netio_queue_impl_t *qsp = queue->__system_part;
935 netio_queue_user_impl_t *qup = &queue->__user_part;
936
937 unsigned int work = 0;
938
939 while (1) {
940 int index = qup->__packet_receive_read;
941 if (index == qsp->__packet_receive_queue.__packet_write)
942 break;
943
944 if (tile_net_poll_aux(info, index)) {
945 if (++work >= budget)
946 goto done;
947 }
948 }
949
950 napi_complete(&info->napi);
951
952 /* Re-enable hypervisor interrupts. */
953 enable_percpu_irq(priv->intr_id);
954
955 /* HACK: Avoid the "rotting packet" problem. */
956 if (qup->__packet_receive_read !=
957 qsp->__packet_receive_queue.__packet_write)
958 napi_schedule(&info->napi);
959
960 /* ISSUE: Handle completions? */
961
962done:
963
964 tile_net_provide_needed_buffers(info);
965
966 return work;
967}
968
969
970/*
971 * Handle an ingress interrupt for the given device on the current cpu.
972 */
973static irqreturn_t tile_net_handle_ingress_interrupt(int irq, void *dev_ptr)
974{
975 struct net_device *dev = (struct net_device *)dev_ptr;
976 struct tile_net_priv *priv = netdev_priv(dev);
977 int my_cpu = smp_processor_id();
978 struct tile_net_cpu *info = priv->cpu[my_cpu];
979
980 /* Disable hypervisor interrupt. */
981 disable_percpu_irq(priv->intr_id);
982
983 napi_schedule(&info->napi);
984
985 return IRQ_HANDLED;
986}
987
988
989/*
990 * One time initialization per interface.
991 */
992static int tile_net_open_aux(struct net_device *dev)
993{
994 struct tile_net_priv *priv = netdev_priv(dev);
995
996 int ret;
997 int dummy;
998 unsigned int epp_lotar;
999
1000 /*
1001 * Find out where EPP memory should be homed.
1002 */
1003 ret = hv_dev_pread(priv->hv_devhdl, 0,
1004 (HV_VirtAddr)&epp_lotar, sizeof(epp_lotar),
1005 NETIO_EPP_SHM_OFF);
1006 if (ret < 0) {
1007 pr_err("could not read epp_shm_queue lotar.\n");
1008 return -EIO;
1009 }
1010
1011 /*
1012 * Home the page on the EPP.
1013 */
1014 {
1015 int epp_home = hv_lotar_to_cpu(epp_lotar);
1016 struct page *page = virt_to_page(priv->epp_queue);
1017 homecache_change_page_home(page, 0, epp_home);
1018 }
1019
1020 /*
1021 * Register the EPP shared memory queue.
1022 */
1023 {
1024 netio_ipp_address_t ea = {
1025 .va = 0,
1026 .pa = __pa(priv->epp_queue),
1027 .pte = hv_pte(0),
1028 .size = PAGE_SIZE,
1029 };
1030 ea.pte = hv_pte_set_lotar(ea.pte, epp_lotar);
1031 ea.pte = hv_pte_set_mode(ea.pte, HV_PTE_MODE_CACHE_TILE_L3);
1032 ret = hv_dev_pwrite(priv->hv_devhdl, 0,
1033 (HV_VirtAddr)&ea,
1034 sizeof(ea),
1035 NETIO_EPP_SHM_OFF);
1036 if (ret < 0)
1037 return -EIO;
1038 }
1039
1040 /*
1041 * Start LIPP/LEPP.
1042 */
1043 if (hv_dev_pwrite(priv->hv_devhdl, 0, (HV_VirtAddr)&dummy,
1044 sizeof(dummy), NETIO_IPP_START_SHIM_OFF) < 0) {
1045 pr_warning("Failed to start LIPP/LEPP.\n");
1046 return -EIO;
1047 }
1048
1049 return 0;
1050}
1051
1052
1053/*
1054 * Register with hypervisor on each CPU.
1055 *
1056 * Strangely, this function does important things even if it "fails",
1057 * which is especially common if the link is not up yet. Hopefully
1058 * these things are all "harmless" if done twice!
1059 */
1060static void tile_net_register(void *dev_ptr)
1061{
1062 struct net_device *dev = (struct net_device *)dev_ptr;
1063 struct tile_net_priv *priv = netdev_priv(dev);
1064 int my_cpu = smp_processor_id();
1065 struct tile_net_cpu *info;
1066
1067 struct tile_netio_queue *queue;
1068
1069 /* Only network cpus can receive packets. */
1070 int queue_id =
1071 cpumask_test_cpu(my_cpu, &priv->network_cpus_map) ? 0 : 255;
1072
1073 netio_input_config_t config = {
1074 .flags = 0,
1075 .num_receive_packets = priv->network_cpus_credits,
1076 .queue_id = queue_id
1077 };
1078
1079 int ret = 0;
1080 netio_queue_impl_t *queuep;
1081
1082 PDEBUG("tile_net_register(queue_id %d)\n", queue_id);
1083
1084 if (!strcmp(dev->name, "xgbe0"))
1085 info = &__get_cpu_var(hv_xgbe0);
1086 else if (!strcmp(dev->name, "xgbe1"))
1087 info = &__get_cpu_var(hv_xgbe1);
1088 else if (!strcmp(dev->name, "gbe0"))
1089 info = &__get_cpu_var(hv_gbe0);
1090 else if (!strcmp(dev->name, "gbe1"))
1091 info = &__get_cpu_var(hv_gbe1);
1092 else
1093 BUG();
1094
1095 /* Initialize the egress timer. */
1096 init_timer(&info->egress_timer);
1097 info->egress_timer.data = (long)info;
1098 info->egress_timer.function = tile_net_handle_egress_timer;
1099
1100 priv->cpu[my_cpu] = info;
1101
1102 /*
1103 * Register ourselves with the IPP.
1104 */
1105 ret = hv_dev_pwrite(priv->hv_devhdl, 0,
1106 (HV_VirtAddr)&config,
1107 sizeof(netio_input_config_t),
1108 NETIO_IPP_INPUT_REGISTER_OFF);
1109 PDEBUG("hv_dev_pwrite(NETIO_IPP_INPUT_REGISTER_OFF) returned %d\n",
1110 ret);
1111 if (ret < 0) {
1112 printk(KERN_DEBUG "hv_dev_pwrite NETIO_IPP_INPUT_REGISTER_OFF"
1113 " failure %d\n", ret);
1114 info->link_down = (ret == NETIO_LINK_DOWN);
1115 return;
1116 }
1117
1118 /*
1119 * Get the pointer to our queue's system part.
1120 */
1121
1122 ret = hv_dev_pread(priv->hv_devhdl, 0,
1123 (HV_VirtAddr)&queuep,
1124 sizeof(netio_queue_impl_t *),
1125 NETIO_IPP_INPUT_REGISTER_OFF);
1126 PDEBUG("hv_dev_pread(NETIO_IPP_INPUT_REGISTER_OFF) returned %d\n",
1127 ret);
1128 PDEBUG("queuep %p\n", queuep);
1129 if (ret <= 0) {
1130 /* ISSUE: Shouldn't this be a fatal error? */
1131 pr_err("hv_dev_pread NETIO_IPP_INPUT_REGISTER_OFF failure\n");
1132 return;
1133 }
1134
1135 queue = &info->queue;
1136
1137 queue->__system_part = queuep;
1138
1139 memset(&queue->__user_part, 0, sizeof(netio_queue_user_impl_t));
1140
1141 /* This is traditionally "config.num_receive_packets / 2". */
1142 queue->__user_part.__receive_credit_interval = 4;
1143 queue->__user_part.__receive_credit_remaining =
1144 queue->__user_part.__receive_credit_interval;
1145
1146 /*
1147 * Get a fastio index from the hypervisor.
1148 * ISSUE: Shouldn't this check the result?
1149 */
1150 ret = hv_dev_pread(priv->hv_devhdl, 0,
1151 (HV_VirtAddr)&queue->__user_part.__fastio_index,
1152 sizeof(queue->__user_part.__fastio_index),
1153 NETIO_IPP_GET_FASTIO_OFF);
1154 PDEBUG("hv_dev_pread(NETIO_IPP_GET_FASTIO_OFF) returned %d\n", ret);
1155
1156 netif_napi_add(dev, &info->napi, tile_net_poll, 64);
1157
1158 /* Now we are registered. */
1159 info->registered = true;
1160}
1161
1162
1163/*
1164 * Unregister with hypervisor on each CPU.
1165 */
1166static void tile_net_unregister(void *dev_ptr)
1167{
1168 struct net_device *dev = (struct net_device *)dev_ptr;
1169 struct tile_net_priv *priv = netdev_priv(dev);
1170 int my_cpu = smp_processor_id();
1171 struct tile_net_cpu *info = priv->cpu[my_cpu];
1172
1173 int ret = 0;
1174 int dummy = 0;
1175
1176 /* Do nothing if never registered. */
1177 if (info == NULL)
1178 return;
1179
1180 /* Do nothing if already unregistered. */
1181 if (!info->registered)
1182 return;
1183
1184 /*
1185 * Unregister ourselves with LIPP.
1186 */
1187 ret = hv_dev_pwrite(priv->hv_devhdl, 0, (HV_VirtAddr)&dummy,
1188 sizeof(dummy), NETIO_IPP_INPUT_UNREGISTER_OFF);
1189 PDEBUG("hv_dev_pwrite(NETIO_IPP_INPUT_UNREGISTER_OFF) returned %d\n",
1190 ret);
1191 if (ret < 0) {
1192 /* FIXME: Just panic? */
1193 pr_err("hv_dev_pwrite NETIO_IPP_INPUT_UNREGISTER_OFF"
1194 " failure %d\n", ret);
1195 }
1196
1197 /*
1198 * Discard all packets still in our NetIO queue. Hopefully,
1199 * once the unregister call is complete, there will be no
1200 * packets still in flight on the IDN.
1201 */
1202 tile_net_discard_packets(dev);
1203
1204 /* Reset state. */
1205 info->num_needed_small_buffers = 0;
1206 info->num_needed_large_buffers = 0;
1207
1208 /* Cancel egress timer. */
1209 del_timer(&info->egress_timer);
1210 info->egress_timer_scheduled = false;
1211
1212 netif_napi_del(&info->napi);
1213
1214 /* Now we are unregistered. */
1215 info->registered = false;
1216}
1217
1218
1219/*
1220 * Helper function for "tile_net_stop()".
1221 *
1222 * Also used to handle registration failure in "tile_net_open_inner()",
1223 * when "fully_opened" is known to be false, and the various extra
1224 * steps in "tile_net_stop()" are not necessary. ISSUE: It might be
1225 * simpler if we could just call "tile_net_stop()" anyway.
1226 */
1227static void tile_net_stop_aux(struct net_device *dev)
1228{
1229 struct tile_net_priv *priv = netdev_priv(dev);
1230
1231 int dummy = 0;
1232
1233 /* Unregister all tiles, so LIPP will stop delivering packets. */
1234 on_each_cpu(tile_net_unregister, (void *)dev, 1);
1235
1236 /* Stop LIPP/LEPP. */
1237 if (hv_dev_pwrite(priv->hv_devhdl, 0, (HV_VirtAddr)&dummy,
1238 sizeof(dummy), NETIO_IPP_STOP_SHIM_OFF) < 0)
1239 panic("Failed to stop LIPP/LEPP!\n");
1240
1241 priv->partly_opened = 0;
1242}
1243
1244
1245/*
1246 * Disable ingress interrupts for the given device on the current cpu.
1247 */
1248static void tile_net_disable_intr(void *dev_ptr)
1249{
1250 struct net_device *dev = (struct net_device *)dev_ptr;
1251 struct tile_net_priv *priv = netdev_priv(dev);
1252 int my_cpu = smp_processor_id();
1253 struct tile_net_cpu *info = priv->cpu[my_cpu];
1254
1255 /* Disable hypervisor interrupt. */
1256 disable_percpu_irq(priv->intr_id);
1257
1258 /* Disable NAPI if needed. */
1259 if (info != NULL && info->napi_enabled) {
1260 napi_disable(&info->napi);
1261 info->napi_enabled = false;
1262 }
1263}
1264
1265
1266/*
1267 * Enable ingress interrupts for the given device on the current cpu.
1268 */
1269static void tile_net_enable_intr(void *dev_ptr)
1270{
1271 struct net_device *dev = (struct net_device *)dev_ptr;
1272 struct tile_net_priv *priv = netdev_priv(dev);
1273 int my_cpu = smp_processor_id();
1274 struct tile_net_cpu *info = priv->cpu[my_cpu];
1275
1276 /* Enable hypervisor interrupt. */
1277 enable_percpu_irq(priv->intr_id);
1278
1279 /* Enable NAPI. */
1280 napi_enable(&info->napi);
1281 info->napi_enabled = true;
1282}
1283
1284
1285/*
1286 * tile_net_open_inner does most of the work of bringing up the interface.
1287 * It's called from tile_net_open(), and also from tile_net_retry_open().
1288 * The return value is 0 if the interface was brought up, < 0 if
1289 * tile_net_open() should return the return value as an error, and > 0 if
1290 * tile_net_open() should return success and schedule a work item to
1291 * periodically retry the bringup.
1292 */
1293static int tile_net_open_inner(struct net_device *dev)
1294{
1295 struct tile_net_priv *priv = netdev_priv(dev);
1296 int my_cpu = smp_processor_id();
1297 struct tile_net_cpu *info;
1298 struct tile_netio_queue *queue;
1299 unsigned int irq;
1300 int i;
1301
1302 /*
1303 * First try to register just on the local CPU, and handle any
1304 * semi-expected "link down" failure specially. Note that we
1305 * do NOT call "tile_net_stop_aux()", unlike below.
1306 */
1307 tile_net_register(dev);
1308 info = priv->cpu[my_cpu];
1309 if (!info->registered) {
1310 if (info->link_down)
1311 return 1;
1312 return -EAGAIN;
1313 }
1314
1315 /*
1316 * Now register everywhere else. If any registration fails,
1317 * even for "link down" (which might not be possible), we
1318 * clean up using "tile_net_stop_aux()".
1319 */
1320 smp_call_function(tile_net_register, (void *)dev, 1);
1321 for_each_online_cpu(i) {
1322 if (!priv->cpu[i]->registered) {
1323 tile_net_stop_aux(dev);
1324 return -EAGAIN;
1325 }
1326 }
1327
1328 queue = &info->queue;
1329
1330 /*
1331 * Set the device intr bit mask.
1332 * The tile_net_register above sets per tile __intr_id.
1333 */
1334 priv->intr_id = queue->__system_part->__intr_id;
1335 BUG_ON(!priv->intr_id);
1336
1337 /*
1338 * Register the device interrupt handler.
1339 * The __ffs() function returns the index into the interrupt handler
1340 * table from the interrupt bit mask which should have one bit
1341 * and one bit only set.
1342 */
1343 irq = __ffs(priv->intr_id);
1344 tile_irq_activate(irq, TILE_IRQ_PERCPU);
1345 BUG_ON(request_irq(irq, tile_net_handle_ingress_interrupt,
1346 0, dev->name, (void *)dev) != 0);
1347
1348 /* ISSUE: How could "priv->fully_opened" ever be "true" here? */
1349
1350 if (!priv->fully_opened) {
1351
1352 int dummy = 0;
1353
1354 /* Allocate initial buffers. */
1355
1356 int max_buffers =
1357 priv->network_cpus_count * priv->network_cpus_credits;
1358
1359 info->num_needed_small_buffers =
1360 min(LIPP_SMALL_BUFFERS, max_buffers);
1361
1362 info->num_needed_large_buffers =
1363 min(LIPP_LARGE_BUFFERS, max_buffers);
1364
1365 tile_net_provide_needed_buffers(info);
1366
1367 if (info->num_needed_small_buffers != 0 ||
1368 info->num_needed_large_buffers != 0)
1369 panic("Insufficient memory for buffer stack!");
1370
1371 /* Start LIPP/LEPP and activate "ingress" at the shim. */
1372 if (hv_dev_pwrite(priv->hv_devhdl, 0, (HV_VirtAddr)&dummy,
1373 sizeof(dummy), NETIO_IPP_INPUT_INIT_OFF) < 0)
1374 panic("Failed to activate the LIPP Shim!\n");
1375
1376 priv->fully_opened = 1;
1377 }
1378
1379 /* On each tile, enable the hypervisor to trigger interrupts. */
1380 /* ISSUE: Do this before starting LIPP/LEPP? */
1381 on_each_cpu(tile_net_enable_intr, (void *)dev, 1);
1382
1383 /* Start our transmit queue. */
1384 netif_start_queue(dev);
1385
1386 return 0;
1387}
1388
1389
1390/*
1391 * Called periodically to retry bringing up the NetIO interface,
1392 * if it doesn't come up cleanly during tile_net_open().
1393 */
1394static void tile_net_open_retry(struct work_struct *w)
1395{
1396 struct delayed_work *dw =
1397 container_of(w, struct delayed_work, work);
1398
1399 struct tile_net_priv *priv =
1400 container_of(dw, struct tile_net_priv, retry_work);
1401
1402 /*
1403 * Try to bring the NetIO interface up. If it fails, reschedule
1404 * ourselves to try again later; otherwise, tell Linux we now have
1405 * a working link. ISSUE: What if the return value is negative?
1406 */
1407 if (tile_net_open_inner(priv->dev))
1408 schedule_delayed_work_on(singlethread_cpu, &priv->retry_work,
1409 TILE_NET_RETRY_INTERVAL);
1410 else
1411 netif_carrier_on(priv->dev);
1412}
1413
1414
1415/*
1416 * Called when a network interface is made active.
1417 *
1418 * Returns 0 on success, negative value on failure.
1419 *
1420 * The open entry point is called when a network interface is made
1421 * active by the system (IFF_UP). At this point all resources needed
1422 * for transmit and receive operations are allocated, the interrupt
1423 * handler is registered with the OS, the watchdog timer is started,
1424 * and the stack is notified that the interface is ready.
1425 *
1426 * If the actual link is not available yet, then we tell Linux that
1427 * we have no carrier, and we keep checking until the link comes up.
1428 */
1429static int tile_net_open(struct net_device *dev)
1430{
1431 int ret = 0;
1432 struct tile_net_priv *priv = netdev_priv(dev);
1433
1434 /*
1435 * We rely on priv->partly_opened to tell us if this is the
1436 * first time this interface is being brought up. If it is
1437 * set, the IPP was already initialized and should not be
1438 * initialized again.
1439 */
1440 if (!priv->partly_opened) {
1441
1442 int count;
1443 int credits;
1444
1445 /* Initialize LIPP/LEPP, and start the Shim. */
1446 ret = tile_net_open_aux(dev);
1447 if (ret < 0) {
1448 pr_err("tile_net_open_aux failed: %d\n", ret);
1449 return ret;
1450 }
1451
1452 /* Analyze the network cpus. */
1453
1454 if (network_cpus_used)
1455 cpumask_copy(&priv->network_cpus_map,
1456 &network_cpus_map);
1457 else
1458 cpumask_copy(&priv->network_cpus_map, cpu_online_mask);
1459
1460
1461 count = cpumask_weight(&priv->network_cpus_map);
1462
1463 /* Limit credits to available buffers, and apply min. */
1464 credits = max(16, (LIPP_LARGE_BUFFERS / count) & ~1);
1465
1466 /* Apply "GBE" max limit. */
1467 /* ISSUE: Use higher limit for XGBE? */
1468 credits = min(NETIO_MAX_RECEIVE_PKTS, credits);
1469
1470 priv->network_cpus_count = count;
1471 priv->network_cpus_credits = credits;
1472
1473#ifdef TILE_NET_DEBUG
1474 pr_info("Using %d network cpus, with %d credits each\n",
1475 priv->network_cpus_count, priv->network_cpus_credits);
1476#endif
1477
1478 priv->partly_opened = 1;
1479 }
1480
1481 /*
1482 * Attempt to bring up the link.
1483 */
1484 ret = tile_net_open_inner(dev);
1485 if (ret <= 0) {
1486 if (ret == 0)
1487 netif_carrier_on(dev);
1488 return ret;
1489 }
1490
1491 /*
1492 * We were unable to bring up the NetIO interface, but we want to
1493 * try again in a little bit. Tell Linux that we have no carrier
1494 * so it doesn't try to use the interface before the link comes up
1495 * and then remember to try again later.
1496 */
1497 netif_carrier_off(dev);
1498 schedule_delayed_work_on(singlethread_cpu, &priv->retry_work,
1499 TILE_NET_RETRY_INTERVAL);
1500
1501 return 0;
1502}
1503
1504
1505/*
1506 * Disables a network interface.
1507 *
1508 * Returns 0, this is not allowed to fail.
1509 *
1510 * The close entry point is called when an interface is de-activated
1511 * by the OS. The hardware is still under the drivers control, but
1512 * needs to be disabled. A global MAC reset is issued to stop the
1513 * hardware, and all transmit and receive resources are freed.
1514 *
1515 * ISSUE: Can this can be called while "tile_net_poll()" is running?
1516 */
1517static int tile_net_stop(struct net_device *dev)
1518{
1519 struct tile_net_priv *priv = netdev_priv(dev);
1520
1521 bool pending = true;
1522
1523 PDEBUG("tile_net_stop()\n");
1524
1525 /* ISSUE: Only needed if not yet fully open. */
1526 cancel_delayed_work_sync(&priv->retry_work);
1527
1528 /* Can't transmit any more. */
1529 netif_stop_queue(dev);
1530
1531 /*
1532 * Disable hypervisor interrupts on each tile.
1533 */
1534 on_each_cpu(tile_net_disable_intr, (void *)dev, 1);
1535
1536 /*
1537 * Unregister the interrupt handler.
1538 * The __ffs() function returns the index into the interrupt handler
1539 * table from the interrupt bit mask which should have one bit
1540 * and one bit only set.
1541 */
1542 if (priv->intr_id)
1543 free_irq(__ffs(priv->intr_id), dev);
1544
1545 /*
1546 * Drain all the LIPP buffers.
1547 */
1548
1549 while (true) {
1550 int buffer;
1551
1552 /* NOTE: This should never fail. */
1553 if (hv_dev_pread(priv->hv_devhdl, 0, (HV_VirtAddr)&buffer,
1554 sizeof(buffer), NETIO_IPP_DRAIN_OFF) < 0)
1555 break;
1556
1557 /* Stop when done. */
1558 if (buffer == 0)
1559 break;
1560
1561 {
1562 /* Convert "linux_buffer_t" to "va". */
1563 void *va = __va((phys_addr_t)(buffer >> 1) << 7);
1564
1565 /* Acquire the associated "skb". */
1566 struct sk_buff **skb_ptr = va - sizeof(*skb_ptr);
1567 struct sk_buff *skb = *skb_ptr;
1568
1569 kfree_skb(skb);
1570 }
1571 }
1572
1573 /* Stop LIPP/LEPP. */
1574 tile_net_stop_aux(dev);
1575
1576
1577 priv->fully_opened = 0;
1578
1579
1580 /*
1581 * XXX: ISSUE: It appears that, in practice anyway, by the
1582 * time we get here, there are no pending completions.
1583 */
1584 while (pending) {
1585
1586 struct sk_buff *olds[32];
1587 unsigned int wanted = 32;
1588 unsigned int i, nolds = 0;
1589
1590 nolds = tile_net_lepp_grab_comps(dev, olds,
1591 wanted, &pending);
1592
1593 /* ISSUE: We have never actually seen this debug spew. */
1594 if (nolds != 0)
1595 pr_info("During tile_net_stop(), grabbed %d comps.\n",
1596 nolds);
1597
1598 for (i = 0; i < nolds; i++)
1599 kfree_skb(olds[i]);
1600 }
1601
1602
1603 /* Wipe the EPP queue. */
1604 memset(priv->epp_queue, 0, sizeof(lepp_queue_t));
1605
1606 /* Evict the EPP queue. */
1607 finv_buffer(priv->epp_queue, PAGE_SIZE);
1608
1609 return 0;
1610}
1611
1612
1613/*
1614 * Prepare the "frags" info for the resulting LEPP command.
1615 *
1616 * If needed, flush the memory used by the frags.
1617 */
1618static unsigned int tile_net_tx_frags(lepp_frag_t *frags,
1619 struct sk_buff *skb,
1620 void *b_data, unsigned int b_len)
1621{
1622 unsigned int i, n = 0;
1623
1624 struct skb_shared_info *sh = skb_shinfo(skb);
1625
1626 phys_addr_t cpa;
1627
1628 if (b_len != 0) {
1629
1630 if (!hash_default)
1631 finv_buffer_remote(b_data, b_len);
1632
1633 cpa = __pa(b_data);
1634 frags[n].cpa_lo = cpa;
1635 frags[n].cpa_hi = cpa >> 32;
1636 frags[n].length = b_len;
1637 frags[n].hash_for_home = hash_default;
1638 n++;
1639 }
1640
1641 for (i = 0; i < sh->nr_frags; i++) {
1642
1643 skb_frag_t *f = &sh->frags[i];
1644 unsigned long pfn = page_to_pfn(f->page);
1645
1646 /* FIXME: Compute "hash_for_home" properly. */
1647 /* ISSUE: The hypervisor checks CHIP_HAS_REV1_DMA_PACKETS(). */
1648 int hash_for_home = hash_default;
1649
1650 /* FIXME: Hmmm. */
1651 if (!hash_default) {
1652 void *va = pfn_to_kaddr(pfn) + f->page_offset;
1653 BUG_ON(PageHighMem(f->page));
1654 finv_buffer_remote(va, f->size);
1655 }
1656
1657 cpa = ((phys_addr_t)pfn << PAGE_SHIFT) + f->page_offset;
1658 frags[n].cpa_lo = cpa;
1659 frags[n].cpa_hi = cpa >> 32;
1660 frags[n].length = f->size;
1661 frags[n].hash_for_home = hash_for_home;
1662 n++;
1663 }
1664
1665 return n;
1666}
1667
1668
1669/*
1670 * This function takes "skb", consisting of a header template and a
1671 * payload, and hands it to LEPP, to emit as one or more segments,
1672 * each consisting of a possibly modified header, plus a piece of the
1673 * payload, via a process known as "tcp segmentation offload".
1674 *
1675 * Usually, "data" will contain the header template, of size "sh_len",
1676 * and "sh->frags" will contain "skb->data_len" bytes of payload, and
1677 * there will be "sh->gso_segs" segments.
1678 *
1679 * Sometimes, if "sendfile()" requires copying, we will be called with
1680 * "data" containing the header and payload, with "frags" being empty.
1681 *
1682 * In theory, "sh->nr_frags" could be 3, but in practice, it seems
1683 * that this will never actually happen.
1684 *
1685 * See "emulate_large_send_offload()" for some reference code, which
1686 * does not handle checksumming.
1687 *
1688 * ISSUE: How do we make sure that high memory DMA does not migrate?
1689 */
1690static int tile_net_tx_tso(struct sk_buff *skb, struct net_device *dev)
1691{
1692 struct tile_net_priv *priv = netdev_priv(dev);
1693 int my_cpu = smp_processor_id();
1694 struct tile_net_cpu *info = priv->cpu[my_cpu];
1695 struct tile_net_stats_t *stats = &info->stats;
1696
1697 struct skb_shared_info *sh = skb_shinfo(skb);
1698
1699 unsigned char *data = skb->data;
1700
1701 /* The ip header follows the ethernet header. */
1702 struct iphdr *ih = ip_hdr(skb);
1703 unsigned int ih_len = ih->ihl * 4;
1704
1705 /* Note that "nh == ih", by definition. */
1706 unsigned char *nh = skb_network_header(skb);
1707 unsigned int eh_len = nh - data;
1708
1709 /* The tcp header follows the ip header. */
1710 struct tcphdr *th = (struct tcphdr *)(nh + ih_len);
1711 unsigned int th_len = th->doff * 4;
1712
1713 /* The total number of header bytes. */
1714 /* NOTE: This may be less than skb_headlen(skb). */
1715 unsigned int sh_len = eh_len + ih_len + th_len;
1716
1717 /* The number of payload bytes at "skb->data + sh_len". */
1718 /* This is non-zero for sendfile() without HIGHDMA. */
1719 unsigned int b_len = skb_headlen(skb) - sh_len;
1720
1721 /* The total number of payload bytes. */
1722 unsigned int d_len = b_len + skb->data_len;
1723
1724 /* The maximum payload size. */
1725 unsigned int p_len = sh->gso_size;
1726
1727 /* The total number of segments. */
1728 unsigned int num_segs = sh->gso_segs;
1729
1730 /* The temporary copy of the command. */
1731 u32 cmd_body[(LEPP_MAX_CMD_SIZE + 3) / 4];
1732 lepp_tso_cmd_t *cmd = (lepp_tso_cmd_t *)cmd_body;
1733
1734 /* Analyze the "frags". */
1735 unsigned int num_frags =
1736 tile_net_tx_frags(cmd->frags, skb, data + sh_len, b_len);
1737
1738 /* The size of the command, including frags and header. */
1739 size_t cmd_size = LEPP_TSO_CMD_SIZE(num_frags, sh_len);
1740
1741 /* The command header. */
1742 lepp_tso_cmd_t cmd_init = {
1743 .tso = true,
1744 .header_size = sh_len,
1745 .ip_offset = eh_len,
1746 .tcp_offset = eh_len + ih_len,
1747 .payload_size = p_len,
1748 .num_frags = num_frags,
1749 };
1750
1751 unsigned long irqflags;
1752
1753 lepp_queue_t *eq = priv->epp_queue;
1754
1755 struct sk_buff *olds[4];
1756 unsigned int wanted = 4;
1757 unsigned int i, nolds = 0;
1758
1759 unsigned int cmd_head, cmd_tail, cmd_next;
1760 unsigned int comp_tail;
1761
1762 unsigned int free_slots;
1763
1764
1765 /* Paranoia. */
1766 BUG_ON(skb->protocol != htons(ETH_P_IP));
1767 BUG_ON(ih->protocol != IPPROTO_TCP);
1768 BUG_ON(skb->ip_summed != CHECKSUM_PARTIAL);
1769 BUG_ON(num_frags > LEPP_MAX_FRAGS);
1770 /*--BUG_ON(num_segs != (d_len + (p_len - 1)) / p_len); */
1771 BUG_ON(num_segs <= 1);
1772
1773
1774 /* Finish preparing the command. */
1775
1776 /* Copy the command header. */
1777 *cmd = cmd_init;
1778
1779 /* Copy the "header". */
1780 memcpy(&cmd->frags[num_frags], data, sh_len);
1781
1782
1783 /* Prefetch and wait, to minimize time spent holding the spinlock. */
1784 prefetch_L1(&eq->comp_tail);
1785 prefetch_L1(&eq->cmd_tail);
1786 mb();
1787
1788
1789 /* Enqueue the command. */
1790
1791 spin_lock_irqsave(&priv->cmd_lock, irqflags);
1792
1793 /*
1794 * Handle completions if needed to make room.
1795 * HACK: Spin until there is sufficient room.
1796 */
1797 free_slots = lepp_num_free_comp_slots(eq);
1798 if (free_slots < 1) {
1799spin:
1800 nolds += tile_net_lepp_grab_comps(dev, olds + nolds,
1801 wanted - nolds, NULL);
1802 if (lepp_num_free_comp_slots(eq) < 1)
1803 goto spin;
1804 }
1805
1806 cmd_head = eq->cmd_head;
1807 cmd_tail = eq->cmd_tail;
1808
1809 /* NOTE: The "gotos" below are untested. */
1810
1811 /* Prepare to advance, detecting full queue. */
1812 cmd_next = cmd_tail + cmd_size;
1813 if (cmd_tail < cmd_head && cmd_next >= cmd_head)
1814 goto spin;
1815 if (cmd_next > LEPP_CMD_LIMIT) {
1816 cmd_next = 0;
1817 if (cmd_next == cmd_head)
1818 goto spin;
1819 }
1820
1821 /* Copy the command. */
1822 memcpy(&eq->cmds[cmd_tail], cmd, cmd_size);
1823
1824 /* Advance. */
1825 cmd_tail = cmd_next;
1826
1827 /* Record "skb" for eventual freeing. */
1828 comp_tail = eq->comp_tail;
1829 eq->comps[comp_tail] = skb;
1830 LEPP_QINC(comp_tail);
1831 eq->comp_tail = comp_tail;
1832
1833 /* Flush before allowing LEPP to handle the command. */
1834 __insn_mf();
1835
1836 eq->cmd_tail = cmd_tail;
1837
1838 spin_unlock_irqrestore(&priv->cmd_lock, irqflags);
1839
1840 if (nolds == 0)
1841 nolds = tile_net_lepp_grab_comps(dev, olds, wanted, NULL);
1842
1843 /* Handle completions. */
1844 for (i = 0; i < nolds; i++)
1845 kfree_skb(olds[i]);
1846
1847 /* Update stats. */
1848 stats->tx_packets += num_segs;
1849 stats->tx_bytes += (num_segs * sh_len) + d_len;
1850
1851 /* Make sure the egress timer is scheduled. */
1852 tile_net_schedule_egress_timer(info);
1853
1854 return NETDEV_TX_OK;
1855}
1856
1857
1858/*
1859 * Transmit a packet (called by the kernel via "hard_start_xmit" hook).
1860 */
1861static int tile_net_tx(struct sk_buff *skb, struct net_device *dev)
1862{
1863 struct tile_net_priv *priv = netdev_priv(dev);
1864 int my_cpu = smp_processor_id();
1865 struct tile_net_cpu *info = priv->cpu[my_cpu];
1866 struct tile_net_stats_t *stats = &info->stats;
1867
1868 unsigned long irqflags;
1869
1870 struct skb_shared_info *sh = skb_shinfo(skb);
1871
1872 unsigned int len = skb->len;
1873 unsigned char *data = skb->data;
1874
1875 unsigned int csum_start = skb->csum_start - skb_headroom(skb);
1876
1877 lepp_frag_t frags[LEPP_MAX_FRAGS];
1878
1879 unsigned int num_frags;
1880
1881 lepp_queue_t *eq = priv->epp_queue;
1882
1883 struct sk_buff *olds[4];
1884 unsigned int wanted = 4;
1885 unsigned int i, nolds = 0;
1886
1887 unsigned int cmd_size = sizeof(lepp_cmd_t);
1888
1889 unsigned int cmd_head, cmd_tail, cmd_next;
1890 unsigned int comp_tail;
1891
1892 lepp_cmd_t cmds[LEPP_MAX_FRAGS];
1893
1894 unsigned int free_slots;
1895
1896
1897 /*
1898 * This is paranoia, since we think that if the link doesn't come
1899 * up, telling Linux we have no carrier will keep it from trying
1900 * to transmit. If it does, though, we can't execute this routine,
1901 * since data structures we depend on aren't set up yet.
1902 */
1903 if (!info->registered)
1904 return NETDEV_TX_BUSY;
1905
1906
1907 /* Save the timestamp. */
1908 dev->trans_start = jiffies;
1909
1910
1911#ifdef TILE_NET_PARANOIA
1912#if CHIP_HAS_CBOX_HOME_MAP()
1913 if (hash_default) {
1914 HV_PTE pte = *virt_to_pte(current->mm, (unsigned long)data);
1915 if (hv_pte_get_mode(pte) != HV_PTE_MODE_CACHE_HASH_L3)
1916 panic("Non-coherent egress buffer!");
1917 }
1918#endif
1919#endif
1920
1921
1922#ifdef TILE_NET_DUMP_PACKETS
1923 /* ISSUE: Does not dump the "frags". */
1924 dump_packet(data, skb_headlen(skb), "tx");
1925#endif /* TILE_NET_DUMP_PACKETS */
1926
1927
1928 if (sh->gso_size != 0)
1929 return tile_net_tx_tso(skb, dev);
1930
1931
1932 /* Prepare the commands. */
1933
1934 num_frags = tile_net_tx_frags(frags, skb, data, skb_headlen(skb));
1935
1936 for (i = 0; i < num_frags; i++) {
1937
1938 bool final = (i == num_frags - 1);
1939
1940 lepp_cmd_t cmd = {
1941 .cpa_lo = frags[i].cpa_lo,
1942 .cpa_hi = frags[i].cpa_hi,
1943 .length = frags[i].length,
1944 .hash_for_home = frags[i].hash_for_home,
1945 .send_completion = final,
1946 .end_of_packet = final
1947 };
1948
1949 if (i == 0 && skb->ip_summed == CHECKSUM_PARTIAL) {
1950 cmd.compute_checksum = 1;
1951 cmd.checksum_data.bits.start_byte = csum_start;
1952 cmd.checksum_data.bits.count = len - csum_start;
1953 cmd.checksum_data.bits.destination_byte =
1954 csum_start + skb->csum_offset;
1955 }
1956
1957 cmds[i] = cmd;
1958 }
1959
1960
1961 /* Prefetch and wait, to minimize time spent holding the spinlock. */
1962 prefetch_L1(&eq->comp_tail);
1963 prefetch_L1(&eq->cmd_tail);
1964 mb();
1965
1966
1967 /* Enqueue the commands. */
1968
1969 spin_lock_irqsave(&priv->cmd_lock, irqflags);
1970
1971 /*
1972 * Handle completions if needed to make room.
1973 * HACK: Spin until there is sufficient room.
1974 */
1975 free_slots = lepp_num_free_comp_slots(eq);
1976 if (free_slots < 1) {
1977spin:
1978 nolds += tile_net_lepp_grab_comps(dev, olds + nolds,
1979 wanted - nolds, NULL);
1980 if (lepp_num_free_comp_slots(eq) < 1)
1981 goto spin;
1982 }
1983
1984 cmd_head = eq->cmd_head;
1985 cmd_tail = eq->cmd_tail;
1986
1987 /* NOTE: The "gotos" below are untested. */
1988
1989 /* Copy the commands, or fail. */
1990 for (i = 0; i < num_frags; i++) {
1991
1992 /* Prepare to advance, detecting full queue. */
1993 cmd_next = cmd_tail + cmd_size;
1994 if (cmd_tail < cmd_head && cmd_next >= cmd_head)
1995 goto spin;
1996 if (cmd_next > LEPP_CMD_LIMIT) {
1997 cmd_next = 0;
1998 if (cmd_next == cmd_head)
1999 goto spin;
2000 }
2001
2002 /* Copy the command. */
2003 *(lepp_cmd_t *)&eq->cmds[cmd_tail] = cmds[i];
2004
2005 /* Advance. */
2006 cmd_tail = cmd_next;
2007 }
2008
2009 /* Record "skb" for eventual freeing. */
2010 comp_tail = eq->comp_tail;
2011 eq->comps[comp_tail] = skb;
2012 LEPP_QINC(comp_tail);
2013 eq->comp_tail = comp_tail;
2014
2015 /* Flush before allowing LEPP to handle the command. */
2016 __insn_mf();
2017
2018 eq->cmd_tail = cmd_tail;
2019
2020 spin_unlock_irqrestore(&priv->cmd_lock, irqflags);
2021
2022 if (nolds == 0)
2023 nolds = tile_net_lepp_grab_comps(dev, olds, wanted, NULL);
2024
2025 /* Handle completions. */
2026 for (i = 0; i < nolds; i++)
2027 kfree_skb(olds[i]);
2028
2029 /* HACK: Track "expanded" size for short packets (e.g. 42 < 60). */
2030 stats->tx_packets++;
2031 stats->tx_bytes += ((len >= ETH_ZLEN) ? len : ETH_ZLEN);
2032
2033 /* Make sure the egress timer is scheduled. */
2034 tile_net_schedule_egress_timer(info);
2035
2036 return NETDEV_TX_OK;
2037}
2038
2039
2040/*
2041 * Deal with a transmit timeout.
2042 */
2043static void tile_net_tx_timeout(struct net_device *dev)
2044{
2045 PDEBUG("tile_net_tx_timeout()\n");
2046 PDEBUG("Transmit timeout at %ld, latency %ld\n", jiffies,
2047 jiffies - dev->trans_start);
2048
2049 /* XXX: ISSUE: This doesn't seem useful for us. */
2050 netif_wake_queue(dev);
2051}
2052
2053
2054/*
2055 * Ioctl commands.
2056 */
2057static int tile_net_ioctl(struct net_device *dev, struct ifreq *rq, int cmd)
2058{
2059 return -EOPNOTSUPP;
2060}
2061
2062
2063/*
2064 * Get System Network Statistics.
2065 *
2066 * Returns the address of the device statistics structure.
2067 */
2068static struct net_device_stats *tile_net_get_stats(struct net_device *dev)
2069{
2070 struct tile_net_priv *priv = netdev_priv(dev);
2071 u32 rx_packets = 0;
2072 u32 tx_packets = 0;
2073 u32 rx_bytes = 0;
2074 u32 tx_bytes = 0;
2075 int i;
2076
2077 for_each_online_cpu(i) {
2078 if (priv->cpu[i]) {
2079 rx_packets += priv->cpu[i]->stats.rx_packets;
2080 rx_bytes += priv->cpu[i]->stats.rx_bytes;
2081 tx_packets += priv->cpu[i]->stats.tx_packets;
2082 tx_bytes += priv->cpu[i]->stats.tx_bytes;
2083 }
2084 }
2085
2086 priv->stats.rx_packets = rx_packets;
2087 priv->stats.rx_bytes = rx_bytes;
2088 priv->stats.tx_packets = tx_packets;
2089 priv->stats.tx_bytes = tx_bytes;
2090
2091 return &priv->stats;
2092}
2093
2094
2095/*
2096 * Change the "mtu".
2097 *
2098 * The "change_mtu" method is usually not needed.
2099 * If you need it, it must be like this.
2100 */
2101static int tile_net_change_mtu(struct net_device *dev, int new_mtu)
2102{
2103 PDEBUG("tile_net_change_mtu()\n");
2104
2105 /* Check ranges. */
2106 if ((new_mtu < 68) || (new_mtu > 1500))
2107 return -EINVAL;
2108
2109 /* Accept the value. */
2110 dev->mtu = new_mtu;
2111
2112 return 0;
2113}
2114
2115
2116/*
2117 * Change the Ethernet Address of the NIC.
2118 *
2119 * The hypervisor driver does not support changing MAC address. However,
2120 * the IPP does not do anything with the MAC address, so the address which
2121 * gets used on outgoing packets, and which is accepted on incoming packets,
2122 * is completely up to the NetIO program or kernel driver which is actually
2123 * handling them.
2124 *
2125 * Returns 0 on success, negative on failure.
2126 */
2127static int tile_net_set_mac_address(struct net_device *dev, void *p)
2128{
2129 struct sockaddr *addr = p;
2130
2131 if (!is_valid_ether_addr(addr->sa_data))
2132 return -EINVAL;
2133
2134 /* ISSUE: Note that "dev_addr" is now a pointer. */
2135 memcpy(dev->dev_addr, addr->sa_data, dev->addr_len);
2136
2137 return 0;
2138}
2139
2140
2141/*
2142 * Obtain the MAC address from the hypervisor.
2143 * This must be done before opening the device.
2144 */
2145static int tile_net_get_mac(struct net_device *dev)
2146{
2147 struct tile_net_priv *priv = netdev_priv(dev);
2148
2149 char hv_dev_name[32];
2150 int len;
2151
2152 __netio_getset_offset_t offset = { .word = NETIO_IPP_PARAM_OFF };
2153
2154 int ret;
2155
2156 /* For example, "xgbe0". */
2157 strcpy(hv_dev_name, dev->name);
2158 len = strlen(hv_dev_name);
2159
2160 /* For example, "xgbe/0". */
2161 hv_dev_name[len] = hv_dev_name[len - 1];
2162 hv_dev_name[len - 1] = '/';
2163 len++;
2164
2165 /* For example, "xgbe/0/native_hash". */
2166 strcpy(hv_dev_name + len, hash_default ? "/native_hash" : "/native");
2167
2168 /* Get the hypervisor handle for this device. */
2169 priv->hv_devhdl = hv_dev_open((HV_VirtAddr)hv_dev_name, 0);
2170 PDEBUG("hv_dev_open(%s) returned %d %p\n",
2171 hv_dev_name, priv->hv_devhdl, &priv->hv_devhdl);
2172 if (priv->hv_devhdl < 0) {
2173 if (priv->hv_devhdl == HV_ENODEV)
2174 printk(KERN_DEBUG "Ignoring unconfigured device %s\n",
2175 hv_dev_name);
2176 else
2177 printk(KERN_DEBUG "hv_dev_open(%s) returned %d\n",
2178 hv_dev_name, priv->hv_devhdl);
2179 return -1;
2180 }
2181
2182 /*
2183 * Read the hardware address from the hypervisor.
2184 * ISSUE: Note that "dev_addr" is now a pointer.
2185 */
2186 offset.bits.class = NETIO_PARAM;
2187 offset.bits.addr = NETIO_PARAM_MAC;
2188 ret = hv_dev_pread(priv->hv_devhdl, 0,
2189 (HV_VirtAddr)dev->dev_addr, dev->addr_len,
2190 offset.word);
2191 PDEBUG("hv_dev_pread(NETIO_PARAM_MAC) returned %d\n", ret);
2192 if (ret <= 0) {
2193 printk(KERN_DEBUG "hv_dev_pread(NETIO_PARAM_MAC) %s failed\n",
2194 dev->name);
2195 /*
2196 * Since the device is configured by the hypervisor but we
2197 * can't get its MAC address, we are most likely running
2198 * the simulator, so let's generate a random MAC address.
2199 */
2200 random_ether_addr(dev->dev_addr);
2201 }
2202
2203 return 0;
2204}
2205
2206
2207static struct net_device_ops tile_net_ops = {
2208 .ndo_open = tile_net_open,
2209 .ndo_stop = tile_net_stop,
2210 .ndo_start_xmit = tile_net_tx,
2211 .ndo_do_ioctl = tile_net_ioctl,
2212 .ndo_get_stats = tile_net_get_stats,
2213 .ndo_change_mtu = tile_net_change_mtu,
2214 .ndo_tx_timeout = tile_net_tx_timeout,
2215 .ndo_set_mac_address = tile_net_set_mac_address
2216};
2217
2218
2219/*
2220 * The setup function.
2221 *
2222 * This uses ether_setup() to assign various fields in dev, including
2223 * setting IFF_BROADCAST and IFF_MULTICAST, then sets some extra fields.
2224 */
2225static void tile_net_setup(struct net_device *dev)
2226{
2227 PDEBUG("tile_net_setup()\n");
2228
2229 ether_setup(dev);
2230
2231 dev->netdev_ops = &tile_net_ops;
2232
2233 dev->watchdog_timeo = TILE_NET_TIMEOUT;
2234
2235 /* We want lockless xmit. */
2236 dev->features |= NETIF_F_LLTX;
2237
2238 /* We support hardware tx checksums. */
2239 dev->features |= NETIF_F_HW_CSUM;
2240
2241 /* We support scatter/gather. */
2242 dev->features |= NETIF_F_SG;
2243
2244 /* We support TSO. */
2245 dev->features |= NETIF_F_TSO;
2246
2247#ifdef TILE_NET_GSO
2248 /* We support GSO. */
2249 dev->features |= NETIF_F_GSO;
2250#endif
2251
2252 if (hash_default)
2253 dev->features |= NETIF_F_HIGHDMA;
2254
2255 /* ISSUE: We should support NETIF_F_UFO. */
2256
2257 dev->tx_queue_len = TILE_NET_TX_QUEUE_LEN;
2258
2259 dev->mtu = TILE_NET_MTU;
2260}
2261
2262
2263/*
2264 * Allocate the device structure, register the device, and obtain the
2265 * MAC address from the hypervisor.
2266 */
2267static struct net_device *tile_net_dev_init(const char *name)
2268{
2269 int ret;
2270 struct net_device *dev;
2271 struct tile_net_priv *priv;
2272 struct page *page;
2273
2274 /*
2275 * Allocate the device structure. This allocates "priv", calls
2276 * tile_net_setup(), and saves "name". Normally, "name" is a
2277 * template, instantiated by register_netdev(), but not for us.
2278 */
2279 dev = alloc_netdev(sizeof(*priv), name, tile_net_setup);
2280 if (!dev) {
2281 pr_err("alloc_netdev(%s) failed\n", name);
2282 return NULL;
2283 }
2284
2285 priv = netdev_priv(dev);
2286
2287 /* Initialize "priv". */
2288
2289 memset(priv, 0, sizeof(*priv));
2290
2291 /* Save "dev" for "tile_net_open_retry()". */
2292 priv->dev = dev;
2293
2294 INIT_DELAYED_WORK(&priv->retry_work, tile_net_open_retry);
2295
2296 spin_lock_init(&priv->cmd_lock);
2297 spin_lock_init(&priv->comp_lock);
2298
2299 /* Allocate "epp_queue". */
2300 BUG_ON(get_order(sizeof(lepp_queue_t)) != 0);
2301 page = alloc_pages(GFP_KERNEL | __GFP_ZERO, 0);
2302 if (!page) {
2303 free_netdev(dev);
2304 return NULL;
2305 }
2306 priv->epp_queue = page_address(page);
2307
2308 /* Register the network device. */
2309 ret = register_netdev(dev);
2310 if (ret) {
2311 pr_err("register_netdev %s failed %d\n", dev->name, ret);
2312 free_page((unsigned long)priv->epp_queue);
2313 free_netdev(dev);
2314 return NULL;
2315 }
2316
2317 /* Get the MAC address. */
2318 ret = tile_net_get_mac(dev);
2319 if (ret < 0) {
2320 unregister_netdev(dev);
2321 free_page((unsigned long)priv->epp_queue);
2322 free_netdev(dev);
2323 return NULL;
2324 }
2325
2326 return dev;
2327}
2328
2329
2330/*
2331 * Module cleanup.
2332 */
2333static void tile_net_cleanup(void)
2334{
2335 int i;
2336
2337 for (i = 0; i < TILE_NET_DEVS; i++) {
2338 if (tile_net_devs[i]) {
2339 struct net_device *dev = tile_net_devs[i];
2340 struct tile_net_priv *priv = netdev_priv(dev);
2341 unregister_netdev(dev);
2342 finv_buffer(priv->epp_queue, PAGE_SIZE);
2343 free_page((unsigned long)priv->epp_queue);
2344 free_netdev(dev);
2345 }
2346 }
2347}
2348
2349
2350/*
2351 * Module initialization.
2352 */
2353static int tile_net_init_module(void)
2354{
2355 pr_info("Tilera IPP Net Driver\n");
2356
2357 tile_net_devs[0] = tile_net_dev_init("xgbe0");
2358 tile_net_devs[1] = tile_net_dev_init("xgbe1");
2359 tile_net_devs[2] = tile_net_dev_init("gbe0");
2360 tile_net_devs[3] = tile_net_dev_init("gbe1");
2361
2362 return 0;
2363}
2364
2365
2366#ifndef MODULE
2367/*
2368 * The "network_cpus" boot argument specifies the cpus that are dedicated
2369 * to handle ingress packets.
2370 *
2371 * The parameter should be in the form "network_cpus=m-n[,x-y]", where
2372 * m, n, x, y are integer numbers that represent the cpus that can be
2373 * neither a dedicated cpu nor a dataplane cpu.
2374 */
2375static int __init network_cpus_setup(char *str)
2376{
2377 int rc = cpulist_parse_crop(str, &network_cpus_map);
2378 if (rc != 0) {
2379 pr_warning("network_cpus=%s: malformed cpu list\n",
2380 str);
2381 } else {
2382
2383 /* Remove dedicated cpus. */
2384 cpumask_and(&network_cpus_map, &network_cpus_map,
2385 cpu_possible_mask);
2386
2387
2388 if (cpumask_empty(&network_cpus_map)) {
2389 pr_warning("Ignoring network_cpus='%s'.\n",
2390 str);
2391 } else {
2392 char buf[1024];
2393 cpulist_scnprintf(buf, sizeof(buf), &network_cpus_map);
2394 pr_info("Linux network CPUs: %s\n", buf);
2395 network_cpus_used = true;
2396 }
2397 }
2398
2399 return 0;
2400}
2401__setup("network_cpus=", network_cpus_setup);
2402#endif
2403
2404
2405module_init(tile_net_init_module);
2406module_exit(tile_net_cleanup);
diff --git a/drivers/pci/Makefile b/drivers/pci/Makefile
index f01e344cf4bd..98e6fdf34d30 100644
--- a/drivers/pci/Makefile
+++ b/drivers/pci/Makefile
@@ -49,6 +49,7 @@ obj-$(CONFIG_MIPS) += setup-bus.o setup-irq.o
49obj-$(CONFIG_X86_VISWS) += setup-irq.o 49obj-$(CONFIG_X86_VISWS) += setup-irq.o
50obj-$(CONFIG_MN10300) += setup-bus.o 50obj-$(CONFIG_MN10300) += setup-bus.o
51obj-$(CONFIG_MICROBLAZE) += setup-bus.o 51obj-$(CONFIG_MICROBLAZE) += setup-bus.o
52obj-$(CONFIG_TILE) += setup-bus.o setup-irq.o
52 53
53# 54#
54# ACPI Related PCI FW Functions 55# ACPI Related PCI FW Functions
diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c
index f5c63fe9db5c..6f9350cabbd5 100644
--- a/drivers/pci/quirks.c
+++ b/drivers/pci/quirks.c
@@ -2136,6 +2136,24 @@ DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82865_HB,
2136DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82875_HB, 2136DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82875_HB,
2137 quirk_unhide_mch_dev6); 2137 quirk_unhide_mch_dev6);
2138 2138
2139#ifdef CONFIG_TILE
2140/*
2141 * The Tilera TILEmpower platform needs to set the link speed
2142 * to 2.5GT(Giga-Transfers)/s (Gen 1). The default link speed
2143 * setting is 5GT/s (Gen 2). 0x98 is the Link Control2 PCIe
2144 * capability register of the PEX8624 PCIe switch. The switch
2145 * supports link speed auto negotiation, but falsely sets
2146 * the link speed to 5GT/s.
2147 */
2148static void __devinit quirk_tile_plx_gen1(struct pci_dev *dev)
2149{
2150 if (tile_plx_gen1) {
2151 pci_write_config_dword(dev, 0x98, 0x1);
2152 mdelay(50);
2153 }
2154}
2155DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_PLX, 0x8624, quirk_tile_plx_gen1);
2156#endif /* CONFIG_TILE */
2139 2157
2140#ifdef CONFIG_PCI_MSI 2158#ifdef CONFIG_PCI_MSI
2141/* Some chipsets do not support MSI. We cannot easily rely on setting 2159/* Some chipsets do not support MSI. We cannot easily rely on setting