diff options
Diffstat (limited to 'Documentation/networking')
-rw-r--r-- | Documentation/networking/00-INDEX | 116 | ||||
-rw-r--r-- | Documentation/networking/bonding.txt | 31 | ||||
-rw-r--r-- | Documentation/networking/dmfe.txt | 3 | ||||
-rw-r--r-- | Documentation/networking/ifenslave.c | 18 | ||||
-rw-r--r-- | Documentation/networking/ip-sysctl.txt | 35 | ||||
-rw-r--r-- | Documentation/networking/netdev-features.txt | 154 | ||||
-rw-r--r-- | Documentation/networking/nfc.txt | 128 | ||||
-rw-r--r-- | Documentation/networking/scaling.txt | 378 | ||||
-rw-r--r-- | Documentation/networking/stmmac.txt | 200 |
9 files changed, 969 insertions, 94 deletions
diff --git a/Documentation/networking/00-INDEX b/Documentation/networking/00-INDEX index 4edd78dfb36..bbce1215434 100644 --- a/Documentation/networking/00-INDEX +++ b/Documentation/networking/00-INDEX | |||
@@ -1,13 +1,21 @@ | |||
1 | 00-INDEX | 1 | 00-INDEX |
2 | - this file | 2 | - this file |
3 | 3c359.txt | ||
4 | - information on the 3Com TokenLink Velocity XL (3c5359) driver. | ||
3 | 3c505.txt | 5 | 3c505.txt |
4 | - information on the 3Com EtherLink Plus (3c505) driver. | 6 | - information on the 3Com EtherLink Plus (3c505) driver. |
7 | 3c509.txt | ||
8 | - information on the 3Com Etherlink III Series Ethernet cards. | ||
5 | 6pack.txt | 9 | 6pack.txt |
6 | - info on the 6pack protocol, an alternative to KISS for AX.25 | 10 | - info on the 6pack protocol, an alternative to KISS for AX.25 |
7 | DLINK.txt | 11 | DLINK.txt |
8 | - info on the D-Link DE-600/DE-620 parallel port pocket adapters | 12 | - info on the D-Link DE-600/DE-620 parallel port pocket adapters |
9 | PLIP.txt | 13 | PLIP.txt |
10 | - PLIP: The Parallel Line Internet Protocol device driver | 14 | - PLIP: The Parallel Line Internet Protocol device driver |
15 | README.ipw2100 | ||
16 | - README for the Intel PRO/Wireless 2100 driver. | ||
17 | README.ipw2200 | ||
18 | - README for the Intel PRO/Wireless 2915ABG and 2200BG driver. | ||
11 | README.sb1000 | 19 | README.sb1000 |
12 | - info on General Instrument/NextLevel SURFboard1000 cable modem. | 20 | - info on General Instrument/NextLevel SURFboard1000 cable modem. |
13 | alias.txt | 21 | alias.txt |
@@ -20,8 +28,12 @@ atm.txt | |||
20 | - info on where to get ATM programs and support for Linux. | 28 | - info on where to get ATM programs and support for Linux. |
21 | ax25.txt | 29 | ax25.txt |
22 | - info on using AX.25 and NET/ROM code for Linux | 30 | - info on using AX.25 and NET/ROM code for Linux |
31 | batman-adv.txt | ||
32 | - B.A.T.M.A.N routing protocol on top of layer 2 Ethernet Frames. | ||
23 | baycom.txt | 33 | baycom.txt |
24 | - info on the driver for Baycom style amateur radio modems | 34 | - info on the driver for Baycom style amateur radio modems |
35 | bonding.txt | ||
36 | - Linux Ethernet Bonding Driver HOWTO: link aggregation in Linux. | ||
25 | bridge.txt | 37 | bridge.txt |
26 | - where to get user space programs for ethernet bridging with Linux. | 38 | - where to get user space programs for ethernet bridging with Linux. |
27 | can.txt | 39 | can.txt |
@@ -34,32 +46,60 @@ cxacru.txt | |||
34 | - Conexant AccessRunner USB ADSL Modem | 46 | - Conexant AccessRunner USB ADSL Modem |
35 | cxacru-cf.py | 47 | cxacru-cf.py |
36 | - Conexant AccessRunner USB ADSL Modem configuration file parser | 48 | - Conexant AccessRunner USB ADSL Modem configuration file parser |
49 | cxgb.txt | ||
50 | - Release Notes for the Chelsio N210 Linux device driver. | ||
51 | dccp.txt | ||
52 | - the Datagram Congestion Control Protocol (DCCP) (RFC 4340..42). | ||
37 | de4x5.txt | 53 | de4x5.txt |
38 | - the Digital EtherWORKS DE4?? and DE5?? PCI Ethernet driver | 54 | - the Digital EtherWORKS DE4?? and DE5?? PCI Ethernet driver |
39 | decnet.txt | 55 | decnet.txt |
40 | - info on using the DECnet networking layer in Linux. | 56 | - info on using the DECnet networking layer in Linux. |
41 | depca.txt | 57 | depca.txt |
42 | - the Digital DEPCA/EtherWORKS DE1?? and DE2?? LANCE Ethernet driver | 58 | - the Digital DEPCA/EtherWORKS DE1?? and DE2?? LANCE Ethernet driver |
59 | dl2k.txt | ||
60 | - README for D-Link DL2000-based Gigabit Ethernet Adapters (dl2k.ko). | ||
61 | dm9000.txt | ||
62 | - README for the Simtec DM9000 Network driver. | ||
43 | dmfe.txt | 63 | dmfe.txt |
44 | - info on the Davicom DM9102(A)/DM9132/DM9801 fast ethernet driver. | 64 | - info on the Davicom DM9102(A)/DM9132/DM9801 fast ethernet driver. |
65 | dns_resolver.txt | ||
66 | - The DNS resolver module allows kernel servies to make DNS queries. | ||
67 | driver.txt | ||
68 | - Softnet driver issues. | ||
45 | e100.txt | 69 | e100.txt |
46 | - info on Intel's EtherExpress PRO/100 line of 10/100 boards | 70 | - info on Intel's EtherExpress PRO/100 line of 10/100 boards |
47 | e1000.txt | 71 | e1000.txt |
48 | - info on Intel's E1000 line of gigabit ethernet boards | 72 | - info on Intel's E1000 line of gigabit ethernet boards |
73 | e1000e.txt | ||
74 | - README for the Intel Gigabit Ethernet Driver (e1000e). | ||
49 | eql.txt | 75 | eql.txt |
50 | - serial IP load balancing | 76 | - serial IP load balancing |
51 | ewrk3.txt | 77 | ewrk3.txt |
52 | - the Digital EtherWORKS 3 DE203/4/5 Ethernet driver | 78 | - the Digital EtherWORKS 3 DE203/4/5 Ethernet driver |
79 | fib_trie.txt | ||
80 | - Level Compressed Trie (LC-trie) notes: a structure for routing. | ||
53 | filter.txt | 81 | filter.txt |
54 | - Linux Socket Filtering | 82 | - Linux Socket Filtering |
55 | fore200e.txt | 83 | fore200e.txt |
56 | - FORE Systems PCA-200E/SBA-200E ATM NIC driver info. | 84 | - FORE Systems PCA-200E/SBA-200E ATM NIC driver info. |
57 | framerelay.txt | 85 | framerelay.txt |
58 | - info on using Frame Relay/Data Link Connection Identifier (DLCI). | 86 | - info on using Frame Relay/Data Link Connection Identifier (DLCI). |
87 | gen_stats.txt | ||
88 | - Generic networking statistics for netlink users. | ||
89 | generic_hdlc.txt | ||
90 | - The generic High Level Data Link Control (HDLC) layer. | ||
59 | generic_netlink.txt | 91 | generic_netlink.txt |
60 | - info on Generic Netlink | 92 | - info on Generic Netlink |
93 | gianfar.txt | ||
94 | - Gianfar Ethernet Driver. | ||
61 | ieee802154.txt | 95 | ieee802154.txt |
62 | - Linux IEEE 802.15.4 implementation, API and drivers | 96 | - Linux IEEE 802.15.4 implementation, API and drivers |
97 | ifenslave.c | ||
98 | - Configure network interfaces for parallel routing (bonding). | ||
99 | igb.txt | ||
100 | - README for the Intel Gigabit Ethernet Driver (igb). | ||
101 | igbvf.txt | ||
102 | - README for the Intel Gigabit Ethernet Driver (igbvf). | ||
63 | ip-sysctl.txt | 103 | ip-sysctl.txt |
64 | - /proc/sys/net/ipv4/* variables | 104 | - /proc/sys/net/ipv4/* variables |
65 | ip_dynaddr.txt | 105 | ip_dynaddr.txt |
@@ -68,41 +108,117 @@ ipddp.txt | |||
68 | - AppleTalk-IP Decapsulation and AppleTalk-IP Encapsulation | 108 | - AppleTalk-IP Decapsulation and AppleTalk-IP Encapsulation |
69 | iphase.txt | 109 | iphase.txt |
70 | - Interphase PCI ATM (i)Chip IA Linux driver info. | 110 | - Interphase PCI ATM (i)Chip IA Linux driver info. |
111 | ipv6.txt | ||
112 | - Options to the ipv6 kernel module. | ||
113 | ipvs-sysctl.txt | ||
114 | - Per-inode explanation of the /proc/sys/net/ipv4/vs interface. | ||
71 | irda.txt | 115 | irda.txt |
72 | - where to get IrDA (infrared) utilities and info for Linux. | 116 | - where to get IrDA (infrared) utilities and info for Linux. |
117 | ixgb.txt | ||
118 | - README for the Intel 10 Gigabit Ethernet Driver (ixgb). | ||
119 | ixgbe.txt | ||
120 | - README for the Intel 10 Gigabit Ethernet Driver (ixgbe). | ||
121 | ixgbevf.txt | ||
122 | - README for the Intel Virtual Function (VF) Driver (ixgbevf). | ||
123 | l2tp.txt | ||
124 | - User guide to the L2TP tunnel protocol. | ||
73 | lapb-module.txt | 125 | lapb-module.txt |
74 | - programming information of the LAPB module. | 126 | - programming information of the LAPB module. |
75 | ltpc.txt | 127 | ltpc.txt |
76 | - the Apple or Farallon LocalTalk PC card driver | 128 | - the Apple or Farallon LocalTalk PC card driver |
129 | mac80211-injection.txt | ||
130 | - HOWTO use packet injection with mac80211 | ||
77 | multicast.txt | 131 | multicast.txt |
78 | - Behaviour of cards under Multicast | 132 | - Behaviour of cards under Multicast |
133 | multiqueue.txt | ||
134 | - HOWTO for multiqueue network device support. | ||
135 | netconsole.txt | ||
136 | - The network console module netconsole.ko: configuration and notes. | ||
137 | netdev-features.txt | ||
138 | - Network interface features API description. | ||
79 | netdevices.txt | 139 | netdevices.txt |
80 | - info on network device driver functions exported to the kernel. | 140 | - info on network device driver functions exported to the kernel. |
141 | netif-msg.txt | ||
142 | - Design of the network interface message level setting (NETIF_MSG_*). | ||
143 | nfc.txt | ||
144 | - The Linux Near Field Communication (NFS) subsystem. | ||
81 | olympic.txt | 145 | olympic.txt |
82 | - IBM PCI Pit/Pit-Phy/Olympic Token Ring driver info. | 146 | - IBM PCI Pit/Pit-Phy/Olympic Token Ring driver info. |
147 | operstates.txt | ||
148 | - Overview of network interface operational states. | ||
149 | packet_mmap.txt | ||
150 | - User guide to memory mapped packet socket rings (PACKET_[RT]X_RING). | ||
151 | phonet.txt | ||
152 | - The Phonet packet protocol used in Nokia cellular modems. | ||
153 | phy.txt | ||
154 | - The PHY abstraction layer. | ||
155 | pktgen.txt | ||
156 | - User guide to the kernel packet generator (pktgen.ko). | ||
83 | policy-routing.txt | 157 | policy-routing.txt |
84 | - IP policy-based routing | 158 | - IP policy-based routing |
159 | ppp_generic.txt | ||
160 | - Information about the generic PPP driver. | ||
161 | proc_net_tcp.txt | ||
162 | - Per inode overview of the /proc/net/tcp and /proc/net/tcp6 interfaces. | ||
163 | radiotap-headers.txt | ||
164 | - Background on radiotap headers. | ||
85 | ray_cs.txt | 165 | ray_cs.txt |
86 | - Raylink Wireless LAN card driver info. | 166 | - Raylink Wireless LAN card driver info. |
167 | rds.txt | ||
168 | - Background on the reliable, ordered datagram delivery method RDS. | ||
169 | regulatory.txt | ||
170 | - Overview of the Linux wireless regulatory infrastructure. | ||
171 | rxrpc.txt | ||
172 | - Guide to the RxRPC protocol. | ||
173 | s2io.txt | ||
174 | - Release notes for Neterion Xframe I/II 10GbE driver. | ||
175 | scaling.txt | ||
176 | - Explanation of network scaling techniques: RSS, RPS, RFS, aRFS, XPS. | ||
177 | sctp.txt | ||
178 | - Notes on the Linux kernel implementation of the SCTP protocol. | ||
179 | secid.txt | ||
180 | - Explanation of the secid member in flow structures. | ||
87 | skfp.txt | 181 | skfp.txt |
88 | - SysKonnect FDDI (SK-5xxx, Compaq Netelligent) driver info. | 182 | - SysKonnect FDDI (SK-5xxx, Compaq Netelligent) driver info. |
89 | smc9.txt | 183 | smc9.txt |
90 | - the driver for SMC's 9000 series of Ethernet cards | 184 | - the driver for SMC's 9000 series of Ethernet cards |
91 | smctr.txt | 185 | smctr.txt |
92 | - SMC TokenCard TokenRing Linux driver info. | 186 | - SMC TokenCard TokenRing Linux driver info. |
187 | spider-net.txt | ||
188 | - README for the Spidernet Driver (as found in PS3 / Cell BE). | ||
189 | stmmac.txt | ||
190 | - README for the STMicro Synopsys Ethernet driver. | ||
191 | tc-actions-env-rules.txt | ||
192 | - rules for traffic control (tc) actions. | ||
193 | timestamping.txt | ||
194 | - overview of network packet timestamping variants. | ||
93 | tcp.txt | 195 | tcp.txt |
94 | - short blurb on how TCP output takes place. | 196 | - short blurb on how TCP output takes place. |
197 | tcp-thin.txt | ||
198 | - kernel tuning options for low rate 'thin' TCP streams. | ||
95 | tlan.txt | 199 | tlan.txt |
96 | - ThunderLAN (Compaq Netelligent 10/100, Olicom OC-2xxx) driver info. | 200 | - ThunderLAN (Compaq Netelligent 10/100, Olicom OC-2xxx) driver info. |
97 | tms380tr.txt | 201 | tms380tr.txt |
98 | - SysKonnect Token Ring ISA/PCI adapter driver info. | 202 | - SysKonnect Token Ring ISA/PCI adapter driver info. |
203 | tproxy.txt | ||
204 | - Transparent proxy support user guide. | ||
99 | tuntap.txt | 205 | tuntap.txt |
100 | - TUN/TAP device driver, allowing user space Rx/Tx of packets. | 206 | - TUN/TAP device driver, allowing user space Rx/Tx of packets. |
207 | udplite.txt | ||
208 | - UDP-Lite protocol (RFC 3828) introduction. | ||
101 | vortex.txt | 209 | vortex.txt |
102 | - info on using 3Com Vortex (3c590, 3c592, 3c595, 3c597) Ethernet cards. | 210 | - info on using 3Com Vortex (3c590, 3c592, 3c595, 3c597) Ethernet cards. |
211 | vxge.txt | ||
212 | - README for the Neterion X3100 PCIe Server Adapter. | ||
103 | x25.txt | 213 | x25.txt |
104 | - general info on X.25 development. | 214 | - general info on X.25 development. |
105 | x25-iface.txt | 215 | x25-iface.txt |
106 | - description of the X.25 Packet Layer to LAPB device interface. | 216 | - description of the X.25 Packet Layer to LAPB device interface. |
217 | xfrm_proc.txt | ||
218 | - description of the statistics package for XFRM. | ||
219 | xfrm_sync.txt | ||
220 | - sync patches for XFRM enable migration of an SA between hosts. | ||
221 | xfrm_sysctl.txt | ||
222 | - description of the XFRM configuration options. | ||
107 | z8530drv.txt | 223 | z8530drv.txt |
108 | - info about Linux driver for Z8530 based HDLC cards for AX.25 | 224 | - info about Linux driver for Z8530 based HDLC cards for AX.25 |
diff --git a/Documentation/networking/bonding.txt b/Documentation/networking/bonding.txt index 675612ff41a..91df678fb7f 100644 --- a/Documentation/networking/bonding.txt +++ b/Documentation/networking/bonding.txt | |||
@@ -238,6 +238,18 @@ ad_select | |||
238 | 238 | ||
239 | This option was added in bonding version 3.4.0. | 239 | This option was added in bonding version 3.4.0. |
240 | 240 | ||
241 | all_slaves_active | ||
242 | |||
243 | Specifies that duplicate frames (received on inactive ports) should be | ||
244 | dropped (0) or delivered (1). | ||
245 | |||
246 | Normally, bonding will drop duplicate frames (received on inactive | ||
247 | ports), which is desirable for most users. But there are some times | ||
248 | it is nice to allow duplicate frames to be delivered. | ||
249 | |||
250 | The default value is 0 (drop duplicate frames received on inactive | ||
251 | ports). | ||
252 | |||
241 | arp_interval | 253 | arp_interval |
242 | 254 | ||
243 | Specifies the ARP link monitoring frequency in milliseconds. | 255 | Specifies the ARP link monitoring frequency in milliseconds. |
@@ -433,6 +445,23 @@ miimon | |||
433 | determined. See the High Availability section for additional | 445 | determined. See the High Availability section for additional |
434 | information. The default value is 0. | 446 | information. The default value is 0. |
435 | 447 | ||
448 | min_links | ||
449 | |||
450 | Specifies the minimum number of links that must be active before | ||
451 | asserting carrier. It is similar to the Cisco EtherChannel min-links | ||
452 | feature. This allows setting the minimum number of member ports that | ||
453 | must be up (link-up state) before marking the bond device as up | ||
454 | (carrier on). This is useful for situations where higher level services | ||
455 | such as clustering want to ensure a minimum number of low bandwidth | ||
456 | links are active before switchover. This option only affect 802.3ad | ||
457 | mode. | ||
458 | |||
459 | The default value is 0. This will cause carrier to be asserted (for | ||
460 | 802.3ad mode) whenever there is an active aggregator, regardless of the | ||
461 | number of available links in that aggregator. Note that, because an | ||
462 | aggregator cannot be active without at least one available link, | ||
463 | setting this option to 0 or to 1 has the exact same effect. | ||
464 | |||
436 | mode | 465 | mode |
437 | 466 | ||
438 | Specifies one of the bonding policies. The default is | 467 | Specifies one of the bonding policies. The default is |
@@ -599,7 +628,7 @@ num_unsol_na | |||
599 | affect only the active-backup mode. These options were added for | 628 | affect only the active-backup mode. These options were added for |
600 | bonding versions 3.3.0 and 3.4.0 respectively. | 629 | bonding versions 3.3.0 and 3.4.0 respectively. |
601 | 630 | ||
602 | From Linux 2.6.40 and bonding version 3.7.1, these notifications | 631 | From Linux 3.0 and bonding version 3.7.1, these notifications |
603 | are generated by the ipv4 and ipv6 code and the numbers of | 632 | are generated by the ipv4 and ipv6 code and the numbers of |
604 | repetitions cannot be set independently. | 633 | repetitions cannot be set independently. |
605 | 634 | ||
diff --git a/Documentation/networking/dmfe.txt b/Documentation/networking/dmfe.txt index 8006c227fda..25320bf19c8 100644 --- a/Documentation/networking/dmfe.txt +++ b/Documentation/networking/dmfe.txt | |||
@@ -1,3 +1,5 @@ | |||
1 | Note: This driver doesn't have a maintainer. | ||
2 | |||
1 | Davicom DM9102(A)/DM9132/DM9801 fast ethernet driver for Linux. | 3 | Davicom DM9102(A)/DM9132/DM9801 fast ethernet driver for Linux. |
2 | 4 | ||
3 | This program is free software; you can redistribute it and/or | 5 | This program is free software; you can redistribute it and/or |
@@ -55,7 +57,6 @@ Test and make sure PCI latency is now correct for all cases. | |||
55 | Authors: | 57 | Authors: |
56 | 58 | ||
57 | Sten Wang <sten_wang@davicom.com.tw > : Original Author | 59 | Sten Wang <sten_wang@davicom.com.tw > : Original Author |
58 | Tobias Ringstrom <tori@unhappy.mine.nu> : Current Maintainer | ||
59 | 60 | ||
60 | Contributors: | 61 | Contributors: |
61 | 62 | ||
diff --git a/Documentation/networking/ifenslave.c b/Documentation/networking/ifenslave.c index 2bac9618c34..65968fbf1e4 100644 --- a/Documentation/networking/ifenslave.c +++ b/Documentation/networking/ifenslave.c | |||
@@ -260,7 +260,7 @@ int main(int argc, char *argv[]) | |||
260 | case 'V': opt_V++; exclusive++; break; | 260 | case 'V': opt_V++; exclusive++; break; |
261 | 261 | ||
262 | case '?': | 262 | case '?': |
263 | fprintf(stderr, usage_msg); | 263 | fprintf(stderr, "%s", usage_msg); |
264 | res = 2; | 264 | res = 2; |
265 | goto out; | 265 | goto out; |
266 | } | 266 | } |
@@ -268,13 +268,13 @@ int main(int argc, char *argv[]) | |||
268 | 268 | ||
269 | /* options check */ | 269 | /* options check */ |
270 | if (exclusive > 1) { | 270 | if (exclusive > 1) { |
271 | fprintf(stderr, usage_msg); | 271 | fprintf(stderr, "%s", usage_msg); |
272 | res = 2; | 272 | res = 2; |
273 | goto out; | 273 | goto out; |
274 | } | 274 | } |
275 | 275 | ||
276 | if (opt_v || opt_V) { | 276 | if (opt_v || opt_V) { |
277 | printf(version); | 277 | printf("%s", version); |
278 | if (opt_V) { | 278 | if (opt_V) { |
279 | res = 0; | 279 | res = 0; |
280 | goto out; | 280 | goto out; |
@@ -282,14 +282,14 @@ int main(int argc, char *argv[]) | |||
282 | } | 282 | } |
283 | 283 | ||
284 | if (opt_u) { | 284 | if (opt_u) { |
285 | printf(usage_msg); | 285 | printf("%s", usage_msg); |
286 | res = 0; | 286 | res = 0; |
287 | goto out; | 287 | goto out; |
288 | } | 288 | } |
289 | 289 | ||
290 | if (opt_h) { | 290 | if (opt_h) { |
291 | printf(usage_msg); | 291 | printf("%s", usage_msg); |
292 | printf(help_msg); | 292 | printf("%s", help_msg); |
293 | res = 0; | 293 | res = 0; |
294 | goto out; | 294 | goto out; |
295 | } | 295 | } |
@@ -309,7 +309,7 @@ int main(int argc, char *argv[]) | |||
309 | goto out; | 309 | goto out; |
310 | } else { | 310 | } else { |
311 | /* Just show usage */ | 311 | /* Just show usage */ |
312 | fprintf(stderr, usage_msg); | 312 | fprintf(stderr, "%s", usage_msg); |
313 | res = 2; | 313 | res = 2; |
314 | goto out; | 314 | goto out; |
315 | } | 315 | } |
@@ -320,7 +320,7 @@ int main(int argc, char *argv[]) | |||
320 | master_ifname = *spp++; | 320 | master_ifname = *spp++; |
321 | 321 | ||
322 | if (master_ifname == NULL) { | 322 | if (master_ifname == NULL) { |
323 | fprintf(stderr, usage_msg); | 323 | fprintf(stderr, "%s", usage_msg); |
324 | res = 2; | 324 | res = 2; |
325 | goto out; | 325 | goto out; |
326 | } | 326 | } |
@@ -339,7 +339,7 @@ int main(int argc, char *argv[]) | |||
339 | 339 | ||
340 | if (slave_ifname == NULL) { | 340 | if (slave_ifname == NULL) { |
341 | if (opt_d || opt_c) { | 341 | if (opt_d || opt_c) { |
342 | fprintf(stderr, usage_msg); | 342 | fprintf(stderr, "%s", usage_msg); |
343 | res = 2; | 343 | res = 2; |
344 | goto out; | 344 | goto out; |
345 | } | 345 | } |
diff --git a/Documentation/networking/ip-sysctl.txt b/Documentation/networking/ip-sysctl.txt index bfe924217f2..ca5cdcd0f0e 100644 --- a/Documentation/networking/ip-sysctl.txt +++ b/Documentation/networking/ip-sysctl.txt | |||
@@ -106,16 +106,6 @@ inet_peer_maxttl - INTEGER | |||
106 | when the number of entries in the pool is very small). | 106 | when the number of entries in the pool is very small). |
107 | Measured in seconds. | 107 | Measured in seconds. |
108 | 108 | ||
109 | inet_peer_gc_mintime - INTEGER | ||
110 | Minimum interval between garbage collection passes. This interval is | ||
111 | in effect under high memory pressure on the pool. | ||
112 | Measured in seconds. | ||
113 | |||
114 | inet_peer_gc_maxtime - INTEGER | ||
115 | Minimum interval between garbage collection passes. This interval is | ||
116 | in effect under low (or absent) memory pressure on the pool. | ||
117 | Measured in seconds. | ||
118 | |||
119 | TCP variables: | 109 | TCP variables: |
120 | 110 | ||
121 | somaxconn - INTEGER | 111 | somaxconn - INTEGER |
@@ -394,7 +384,7 @@ tcp_rmem - vector of 3 INTEGERs: min, default, max | |||
394 | min: Minimal size of receive buffer used by TCP sockets. | 384 | min: Minimal size of receive buffer used by TCP sockets. |
395 | It is guaranteed to each TCP socket, even under moderate memory | 385 | It is guaranteed to each TCP socket, even under moderate memory |
396 | pressure. | 386 | pressure. |
397 | Default: 8K | 387 | Default: 1 page |
398 | 388 | ||
399 | default: initial size of receive buffer used by TCP sockets. | 389 | default: initial size of receive buffer used by TCP sockets. |
400 | This value overrides net.core.rmem_default used by other protocols. | 390 | This value overrides net.core.rmem_default used by other protocols. |
@@ -483,7 +473,7 @@ tcp_window_scaling - BOOLEAN | |||
483 | tcp_wmem - vector of 3 INTEGERs: min, default, max | 473 | tcp_wmem - vector of 3 INTEGERs: min, default, max |
484 | min: Amount of memory reserved for send buffers for TCP sockets. | 474 | min: Amount of memory reserved for send buffers for TCP sockets. |
485 | Each TCP socket has rights to use it due to fact of its birth. | 475 | Each TCP socket has rights to use it due to fact of its birth. |
486 | Default: 4K | 476 | Default: 1 page |
487 | 477 | ||
488 | default: initial size of send buffer used by TCP sockets. This | 478 | default: initial size of send buffer used by TCP sockets. This |
489 | value overrides net.core.wmem_default used by other protocols. | 479 | value overrides net.core.wmem_default used by other protocols. |
@@ -553,13 +543,13 @@ udp_rmem_min - INTEGER | |||
553 | Minimal size of receive buffer used by UDP sockets in moderation. | 543 | Minimal size of receive buffer used by UDP sockets in moderation. |
554 | Each UDP socket is able to use the size for receiving data, even if | 544 | Each UDP socket is able to use the size for receiving data, even if |
555 | total pages of UDP sockets exceed udp_mem pressure. The unit is byte. | 545 | total pages of UDP sockets exceed udp_mem pressure. The unit is byte. |
556 | Default: 4096 | 546 | Default: 1 page |
557 | 547 | ||
558 | udp_wmem_min - INTEGER | 548 | udp_wmem_min - INTEGER |
559 | Minimal size of send buffer used by UDP sockets in moderation. | 549 | Minimal size of send buffer used by UDP sockets in moderation. |
560 | Each UDP socket is able to use the size for sending data, even if | 550 | Each UDP socket is able to use the size for sending data, even if |
561 | total pages of UDP sockets exceed udp_mem pressure. The unit is byte. | 551 | total pages of UDP sockets exceed udp_mem pressure. The unit is byte. |
562 | Default: 4096 | 552 | Default: 1 page |
563 | 553 | ||
564 | CIPSOv4 Variables: | 554 | CIPSOv4 Variables: |
565 | 555 | ||
@@ -1002,7 +992,7 @@ bindv6only - BOOLEAN | |||
1002 | TRUE: disable IPv4-mapped address feature | 992 | TRUE: disable IPv4-mapped address feature |
1003 | FALSE: enable IPv4-mapped address feature | 993 | FALSE: enable IPv4-mapped address feature |
1004 | 994 | ||
1005 | Default: FALSE (as specified in RFC2553bis) | 995 | Default: FALSE (as specified in RFC3493) |
1006 | 996 | ||
1007 | IPv6 Fragmentation: | 997 | IPv6 Fragmentation: |
1008 | 998 | ||
@@ -1052,7 +1042,7 @@ conf/interface/*: | |||
1052 | The functional behaviour for certain settings is different | 1042 | The functional behaviour for certain settings is different |
1053 | depending on whether local forwarding is enabled or not. | 1043 | depending on whether local forwarding is enabled or not. |
1054 | 1044 | ||
1055 | accept_ra - BOOLEAN | 1045 | accept_ra - INTEGER |
1056 | Accept Router Advertisements; autoconfigure using them. | 1046 | Accept Router Advertisements; autoconfigure using them. |
1057 | 1047 | ||
1058 | Possible values are: | 1048 | Possible values are: |
@@ -1116,7 +1106,7 @@ dad_transmits - INTEGER | |||
1116 | The amount of Duplicate Address Detection probes to send. | 1106 | The amount of Duplicate Address Detection probes to send. |
1117 | Default: 1 | 1107 | Default: 1 |
1118 | 1108 | ||
1119 | forwarding - BOOLEAN | 1109 | forwarding - INTEGER |
1120 | Configure interface-specific Host/Router behaviour. | 1110 | Configure interface-specific Host/Router behaviour. |
1121 | 1111 | ||
1122 | Note: It is recommended to have the same setting on all | 1112 | Note: It is recommended to have the same setting on all |
@@ -1465,10 +1455,17 @@ sctp_mem - vector of 3 INTEGERs: min, pressure, max | |||
1465 | Default is calculated at boot time from amount of available memory. | 1455 | Default is calculated at boot time from amount of available memory. |
1466 | 1456 | ||
1467 | sctp_rmem - vector of 3 INTEGERs: min, default, max | 1457 | sctp_rmem - vector of 3 INTEGERs: min, default, max |
1468 | See tcp_rmem for a description. | 1458 | Only the first value ("min") is used, "default" and "max" are |
1459 | ignored. | ||
1460 | |||
1461 | min: Minimal size of receive buffer used by SCTP socket. | ||
1462 | It is guaranteed to each SCTP socket (but not association) even | ||
1463 | under moderate memory pressure. | ||
1464 | |||
1465 | Default: 1 page | ||
1469 | 1466 | ||
1470 | sctp_wmem - vector of 3 INTEGERs: min, default, max | 1467 | sctp_wmem - vector of 3 INTEGERs: min, default, max |
1471 | See tcp_wmem for a description. | 1468 | Currently this tunable has no effect. |
1472 | 1469 | ||
1473 | addr_scope_policy - INTEGER | 1470 | addr_scope_policy - INTEGER |
1474 | Control IPv4 address scoping - draft-stewart-tsvwg-sctp-ipv4-00 | 1471 | Control IPv4 address scoping - draft-stewart-tsvwg-sctp-ipv4-00 |
diff --git a/Documentation/networking/netdev-features.txt b/Documentation/networking/netdev-features.txt new file mode 100644 index 00000000000..4b1c0dcef84 --- /dev/null +++ b/Documentation/networking/netdev-features.txt | |||
@@ -0,0 +1,154 @@ | |||
1 | Netdev features mess and how to get out from it alive | ||
2 | ===================================================== | ||
3 | |||
4 | Author: | ||
5 | Michał Mirosław <mirq-linux@rere.qmqm.pl> | ||
6 | |||
7 | |||
8 | |||
9 | Part I: Feature sets | ||
10 | ====================== | ||
11 | |||
12 | Long gone are the days when a network card would just take and give packets | ||
13 | verbatim. Today's devices add multiple features and bugs (read: offloads) | ||
14 | that relieve an OS of various tasks like generating and checking checksums, | ||
15 | splitting packets, classifying them. Those capabilities and their state | ||
16 | are commonly referred to as netdev features in Linux kernel world. | ||
17 | |||
18 | There are currently three sets of features relevant to the driver, and | ||
19 | one used internally by network core: | ||
20 | |||
21 | 1. netdev->hw_features set contains features whose state may possibly | ||
22 | be changed (enabled or disabled) for a particular device by user's | ||
23 | request. This set should be initialized in ndo_init callback and not | ||
24 | changed later. | ||
25 | |||
26 | 2. netdev->features set contains features which are currently enabled | ||
27 | for a device. This should be changed only by network core or in | ||
28 | error paths of ndo_set_features callback. | ||
29 | |||
30 | 3. netdev->vlan_features set contains features whose state is inherited | ||
31 | by child VLAN devices (limits netdev->features set). This is currently | ||
32 | used for all VLAN devices whether tags are stripped or inserted in | ||
33 | hardware or software. | ||
34 | |||
35 | 4. netdev->wanted_features set contains feature set requested by user. | ||
36 | This set is filtered by ndo_fix_features callback whenever it or | ||
37 | some device-specific conditions change. This set is internal to | ||
38 | networking core and should not be referenced in drivers. | ||
39 | |||
40 | |||
41 | |||
42 | Part II: Controlling enabled features | ||
43 | ======================================= | ||
44 | |||
45 | When current feature set (netdev->features) is to be changed, new set | ||
46 | is calculated and filtered by calling ndo_fix_features callback | ||
47 | and netdev_fix_features(). If the resulting set differs from current | ||
48 | set, it is passed to ndo_set_features callback and (if the callback | ||
49 | returns success) replaces value stored in netdev->features. | ||
50 | NETDEV_FEAT_CHANGE notification is issued after that whenever current | ||
51 | set might have changed. | ||
52 | |||
53 | The following events trigger recalculation: | ||
54 | 1. device's registration, after ndo_init returned success | ||
55 | 2. user requested changes in features state | ||
56 | 3. netdev_update_features() is called | ||
57 | |||
58 | ndo_*_features callbacks are called with rtnl_lock held. Missing callbacks | ||
59 | are treated as always returning success. | ||
60 | |||
61 | A driver that wants to trigger recalculation must do so by calling | ||
62 | netdev_update_features() while holding rtnl_lock. This should not be done | ||
63 | from ndo_*_features callbacks. netdev->features should not be modified by | ||
64 | driver except by means of ndo_fix_features callback. | ||
65 | |||
66 | |||
67 | |||
68 | Part III: Implementation hints | ||
69 | ================================ | ||
70 | |||
71 | * ndo_fix_features: | ||
72 | |||
73 | All dependencies between features should be resolved here. The resulting | ||
74 | set can be reduced further by networking core imposed limitations (as coded | ||
75 | in netdev_fix_features()). For this reason it is safer to disable a feature | ||
76 | when its dependencies are not met instead of forcing the dependency on. | ||
77 | |||
78 | This callback should not modify hardware nor driver state (should be | ||
79 | stateless). It can be called multiple times between successive | ||
80 | ndo_set_features calls. | ||
81 | |||
82 | Callback must not alter features contained in NETIF_F_SOFT_FEATURES or | ||
83 | NETIF_F_NEVER_CHANGE sets. The exception is NETIF_F_VLAN_CHALLENGED but | ||
84 | care must be taken as the change won't affect already configured VLANs. | ||
85 | |||
86 | * ndo_set_features: | ||
87 | |||
88 | Hardware should be reconfigured to match passed feature set. The set | ||
89 | should not be altered unless some error condition happens that can't | ||
90 | be reliably detected in ndo_fix_features. In this case, the callback | ||
91 | should update netdev->features to match resulting hardware state. | ||
92 | Errors returned are not (and cannot be) propagated anywhere except dmesg. | ||
93 | (Note: successful return is zero, >0 means silent error.) | ||
94 | |||
95 | |||
96 | |||
97 | Part IV: Features | ||
98 | =================== | ||
99 | |||
100 | For current list of features, see include/linux/netdev_features.h. | ||
101 | This section describes semantics of some of them. | ||
102 | |||
103 | * Transmit checksumming | ||
104 | |||
105 | For complete description, see comments near the top of include/linux/skbuff.h. | ||
106 | |||
107 | Note: NETIF_F_HW_CSUM is a superset of NETIF_F_IP_CSUM + NETIF_F_IPV6_CSUM. | ||
108 | It means that device can fill TCP/UDP-like checksum anywhere in the packets | ||
109 | whatever headers there might be. | ||
110 | |||
111 | * Transmit TCP segmentation offload | ||
112 | |||
113 | NETIF_F_TSO_ECN means that hardware can properly split packets with CWR bit | ||
114 | set, be it TCPv4 (when NETIF_F_TSO is enabled) or TCPv6 (NETIF_F_TSO6). | ||
115 | |||
116 | * Transmit DMA from high memory | ||
117 | |||
118 | On platforms where this is relevant, NETIF_F_HIGHDMA signals that | ||
119 | ndo_start_xmit can handle skbs with frags in high memory. | ||
120 | |||
121 | * Transmit scatter-gather | ||
122 | |||
123 | Those features say that ndo_start_xmit can handle fragmented skbs: | ||
124 | NETIF_F_SG --- paged skbs (skb_shinfo()->frags), NETIF_F_FRAGLIST --- | ||
125 | chained skbs (skb->next/prev list). | ||
126 | |||
127 | * Software features | ||
128 | |||
129 | Features contained in NETIF_F_SOFT_FEATURES are features of networking | ||
130 | stack. Driver should not change behaviour based on them. | ||
131 | |||
132 | * LLTX driver (deprecated for hardware drivers) | ||
133 | |||
134 | NETIF_F_LLTX should be set in drivers that implement their own locking in | ||
135 | transmit path or don't need locking at all (e.g. software tunnels). | ||
136 | In ndo_start_xmit, it is recommended to use a try_lock and return | ||
137 | NETDEV_TX_LOCKED when the spin lock fails. The locking should also properly | ||
138 | protect against other callbacks (the rules you need to find out). | ||
139 | |||
140 | Don't use it for new drivers. | ||
141 | |||
142 | * netns-local device | ||
143 | |||
144 | NETIF_F_NETNS_LOCAL is set for devices that are not allowed to move between | ||
145 | network namespaces (e.g. loopback). | ||
146 | |||
147 | Don't use it in drivers. | ||
148 | |||
149 | * VLAN challenged | ||
150 | |||
151 | NETIF_F_VLAN_CHALLENGED should be set for devices which can't cope with VLAN | ||
152 | headers. Some drivers set this because the cards can't handle the bigger MTU. | ||
153 | [FIXME: Those cases could be fixed in VLAN code by allowing only reduced-MTU | ||
154 | VLANs. This may be not useful, though.] | ||
diff --git a/Documentation/networking/nfc.txt b/Documentation/networking/nfc.txt new file mode 100644 index 00000000000..b24c29bdae2 --- /dev/null +++ b/Documentation/networking/nfc.txt | |||
@@ -0,0 +1,128 @@ | |||
1 | Linux NFC subsystem | ||
2 | =================== | ||
3 | |||
4 | The Near Field Communication (NFC) subsystem is required to standardize the | ||
5 | NFC device drivers development and to create an unified userspace interface. | ||
6 | |||
7 | This document covers the architecture overview, the device driver interface | ||
8 | description and the userspace interface description. | ||
9 | |||
10 | Architecture overview | ||
11 | --------------------- | ||
12 | |||
13 | The NFC subsystem is responsible for: | ||
14 | - NFC adapters management; | ||
15 | - Polling for targets; | ||
16 | - Low-level data exchange; | ||
17 | |||
18 | The subsystem is divided in some parts. The 'core' is responsible for | ||
19 | providing the device driver interface. On the other side, it is also | ||
20 | responsible for providing an interface to control operations and low-level | ||
21 | data exchange. | ||
22 | |||
23 | The control operations are available to userspace via generic netlink. | ||
24 | |||
25 | The low-level data exchange interface is provided by the new socket family | ||
26 | PF_NFC. The NFC_SOCKPROTO_RAW performs raw communication with NFC targets. | ||
27 | |||
28 | |||
29 | +--------------------------------------+ | ||
30 | | USER SPACE | | ||
31 | +--------------------------------------+ | ||
32 | ^ ^ | ||
33 | | low-level | control | ||
34 | | data exchange | operations | ||
35 | | | | ||
36 | | v | ||
37 | | +-----------+ | ||
38 | | AF_NFC | netlink | | ||
39 | | socket +-----------+ | ||
40 | | raw ^ | ||
41 | | | | ||
42 | v v | ||
43 | +---------+ +-----------+ | ||
44 | | rawsock | <--------> | core | | ||
45 | +---------+ +-----------+ | ||
46 | ^ | ||
47 | | | ||
48 | v | ||
49 | +-----------+ | ||
50 | | driver | | ||
51 | +-----------+ | ||
52 | |||
53 | Device Driver Interface | ||
54 | ----------------------- | ||
55 | |||
56 | When registering on the NFC subsystem, the device driver must inform the core | ||
57 | of the set of supported NFC protocols and the set of ops callbacks. The ops | ||
58 | callbacks that must be implemented are the following: | ||
59 | |||
60 | * start_poll - setup the device to poll for targets | ||
61 | * stop_poll - stop on progress polling operation | ||
62 | * activate_target - select and initialize one of the targets found | ||
63 | * deactivate_target - deselect and deinitialize the selected target | ||
64 | * data_exchange - send data and receive the response (transceive operation) | ||
65 | |||
66 | Userspace interface | ||
67 | -------------------- | ||
68 | |||
69 | The userspace interface is divided in control operations and low-level data | ||
70 | exchange operation. | ||
71 | |||
72 | CONTROL OPERATIONS: | ||
73 | |||
74 | Generic netlink is used to implement the interface to the control operations. | ||
75 | The operations are composed by commands and events, all listed below: | ||
76 | |||
77 | * NFC_CMD_GET_DEVICE - get specific device info or dump the device list | ||
78 | * NFC_CMD_START_POLL - setup a specific device to polling for targets | ||
79 | * NFC_CMD_STOP_POLL - stop the polling operation in a specific device | ||
80 | * NFC_CMD_GET_TARGET - dump the list of targets found by a specific device | ||
81 | |||
82 | * NFC_EVENT_DEVICE_ADDED - reports an NFC device addition | ||
83 | * NFC_EVENT_DEVICE_REMOVED - reports an NFC device removal | ||
84 | * NFC_EVENT_TARGETS_FOUND - reports START_POLL results when 1 or more targets | ||
85 | are found | ||
86 | |||
87 | The user must call START_POLL to poll for NFC targets, passing the desired NFC | ||
88 | protocols through NFC_ATTR_PROTOCOLS attribute. The device remains in polling | ||
89 | state until it finds any target. However, the user can stop the polling | ||
90 | operation by calling STOP_POLL command. In this case, it will be checked if | ||
91 | the requester of STOP_POLL is the same of START_POLL. | ||
92 | |||
93 | If the polling operation finds one or more targets, the event TARGETS_FOUND is | ||
94 | sent (including the device id). The user must call GET_TARGET to get the list of | ||
95 | all targets found by such device. Each reply message has target attributes with | ||
96 | relevant information such as the supported NFC protocols. | ||
97 | |||
98 | All polling operations requested through one netlink socket are stopped when | ||
99 | it's closed. | ||
100 | |||
101 | LOW-LEVEL DATA EXCHANGE: | ||
102 | |||
103 | The userspace must use PF_NFC sockets to perform any data communication with | ||
104 | targets. All NFC sockets use AF_NFC: | ||
105 | |||
106 | struct sockaddr_nfc { | ||
107 | sa_family_t sa_family; | ||
108 | __u32 dev_idx; | ||
109 | __u32 target_idx; | ||
110 | __u32 nfc_protocol; | ||
111 | }; | ||
112 | |||
113 | To establish a connection with one target, the user must create an | ||
114 | NFC_SOCKPROTO_RAW socket and call the 'connect' syscall with the sockaddr_nfc | ||
115 | struct correctly filled. All information comes from NFC_EVENT_TARGETS_FOUND | ||
116 | netlink event. As a target can support more than one NFC protocol, the user | ||
117 | must inform which protocol it wants to use. | ||
118 | |||
119 | Internally, 'connect' will result in an activate_target call to the driver. | ||
120 | When the socket is closed, the target is deactivated. | ||
121 | |||
122 | The data format exchanged through the sockets is NFC protocol dependent. For | ||
123 | instance, when communicating with MIFARE tags, the data exchanged are MIFARE | ||
124 | commands and their responses. | ||
125 | |||
126 | The first received package is the response to the first sent package and so | ||
127 | on. In order to allow valid "empty" responses, every data received has a NULL | ||
128 | header of 1 byte. | ||
diff --git a/Documentation/networking/scaling.txt b/Documentation/networking/scaling.txt new file mode 100644 index 00000000000..fe67b5c79f0 --- /dev/null +++ b/Documentation/networking/scaling.txt | |||
@@ -0,0 +1,378 @@ | |||
1 | Scaling in the Linux Networking Stack | ||
2 | |||
3 | |||
4 | Introduction | ||
5 | ============ | ||
6 | |||
7 | This document describes a set of complementary techniques in the Linux | ||
8 | networking stack to increase parallelism and improve performance for | ||
9 | multi-processor systems. | ||
10 | |||
11 | The following technologies are described: | ||
12 | |||
13 | RSS: Receive Side Scaling | ||
14 | RPS: Receive Packet Steering | ||
15 | RFS: Receive Flow Steering | ||
16 | Accelerated Receive Flow Steering | ||
17 | XPS: Transmit Packet Steering | ||
18 | |||
19 | |||
20 | RSS: Receive Side Scaling | ||
21 | ========================= | ||
22 | |||
23 | Contemporary NICs support multiple receive and transmit descriptor queues | ||
24 | (multi-queue). On reception, a NIC can send different packets to different | ||
25 | queues to distribute processing among CPUs. The NIC distributes packets by | ||
26 | applying a filter to each packet that assigns it to one of a small number | ||
27 | of logical flows. Packets for each flow are steered to a separate receive | ||
28 | queue, which in turn can be processed by separate CPUs. This mechanism is | ||
29 | generally known as “Receive-side Scaling” (RSS). The goal of RSS and | ||
30 | the other scaling techniques is to increase performance uniformly. | ||
31 | Multi-queue distribution can also be used for traffic prioritization, but | ||
32 | that is not the focus of these techniques. | ||
33 | |||
34 | The filter used in RSS is typically a hash function over the network | ||
35 | and/or transport layer headers-- for example, a 4-tuple hash over | ||
36 | IP addresses and TCP ports of a packet. The most common hardware | ||
37 | implementation of RSS uses a 128-entry indirection table where each entry | ||
38 | stores a queue number. The receive queue for a packet is determined | ||
39 | by masking out the low order seven bits of the computed hash for the | ||
40 | packet (usually a Toeplitz hash), taking this number as a key into the | ||
41 | indirection table and reading the corresponding value. | ||
42 | |||
43 | Some advanced NICs allow steering packets to queues based on | ||
44 | programmable filters. For example, webserver bound TCP port 80 packets | ||
45 | can be directed to their own receive queue. Such “n-tuple” filters can | ||
46 | be configured from ethtool (--config-ntuple). | ||
47 | |||
48 | ==== RSS Configuration | ||
49 | |||
50 | The driver for a multi-queue capable NIC typically provides a kernel | ||
51 | module parameter for specifying the number of hardware queues to | ||
52 | configure. In the bnx2x driver, for instance, this parameter is called | ||
53 | num_queues. A typical RSS configuration would be to have one receive queue | ||
54 | for each CPU if the device supports enough queues, or otherwise at least | ||
55 | one for each memory domain, where a memory domain is a set of CPUs that | ||
56 | share a particular memory level (L1, L2, NUMA node, etc.). | ||
57 | |||
58 | The indirection table of an RSS device, which resolves a queue by masked | ||
59 | hash, is usually programmed by the driver at initialization. The | ||
60 | default mapping is to distribute the queues evenly in the table, but the | ||
61 | indirection table can be retrieved and modified at runtime using ethtool | ||
62 | commands (--show-rxfh-indir and --set-rxfh-indir). Modifying the | ||
63 | indirection table could be done to give different queues different | ||
64 | relative weights. | ||
65 | |||
66 | == RSS IRQ Configuration | ||
67 | |||
68 | Each receive queue has a separate IRQ associated with it. The NIC triggers | ||
69 | this to notify a CPU when new packets arrive on the given queue. The | ||
70 | signaling path for PCIe devices uses message signaled interrupts (MSI-X), | ||
71 | that can route each interrupt to a particular CPU. The active mapping | ||
72 | of queues to IRQs can be determined from /proc/interrupts. By default, | ||
73 | an IRQ may be handled on any CPU. Because a non-negligible part of packet | ||
74 | processing takes place in receive interrupt handling, it is advantageous | ||
75 | to spread receive interrupts between CPUs. To manually adjust the IRQ | ||
76 | affinity of each interrupt see Documentation/IRQ-affinity. Some systems | ||
77 | will be running irqbalance, a daemon that dynamically optimizes IRQ | ||
78 | assignments and as a result may override any manual settings. | ||
79 | |||
80 | == Suggested Configuration | ||
81 | |||
82 | RSS should be enabled when latency is a concern or whenever receive | ||
83 | interrupt processing forms a bottleneck. Spreading load between CPUs | ||
84 | decreases queue length. For low latency networking, the optimal setting | ||
85 | is to allocate as many queues as there are CPUs in the system (or the | ||
86 | NIC maximum, if lower). The most efficient high-rate configuration | ||
87 | is likely the one with the smallest number of receive queues where no | ||
88 | receive queue overflows due to a saturated CPU, because in default | ||
89 | mode with interrupt coalescing enabled, the aggregate number of | ||
90 | interrupts (and thus work) grows with each additional queue. | ||
91 | |||
92 | Per-cpu load can be observed using the mpstat utility, but note that on | ||
93 | processors with hyperthreading (HT), each hyperthread is represented as | ||
94 | a separate CPU. For interrupt handling, HT has shown no benefit in | ||
95 | initial tests, so limit the number of queues to the number of CPU cores | ||
96 | in the system. | ||
97 | |||
98 | |||
99 | RPS: Receive Packet Steering | ||
100 | ============================ | ||
101 | |||
102 | Receive Packet Steering (RPS) is logically a software implementation of | ||
103 | RSS. Being in software, it is necessarily called later in the datapath. | ||
104 | Whereas RSS selects the queue and hence CPU that will run the hardware | ||
105 | interrupt handler, RPS selects the CPU to perform protocol processing | ||
106 | above the interrupt handler. This is accomplished by placing the packet | ||
107 | on the desired CPU’s backlog queue and waking up the CPU for processing. | ||
108 | RPS has some advantages over RSS: 1) it can be used with any NIC, | ||
109 | 2) software filters can easily be added to hash over new protocols, | ||
110 | 3) it does not increase hardware device interrupt rate (although it does | ||
111 | introduce inter-processor interrupts (IPIs)). | ||
112 | |||
113 | RPS is called during bottom half of the receive interrupt handler, when | ||
114 | a driver sends a packet up the network stack with netif_rx() or | ||
115 | netif_receive_skb(). These call the get_rps_cpu() function, which | ||
116 | selects the queue that should process a packet. | ||
117 | |||
118 | The first step in determining the target CPU for RPS is to calculate a | ||
119 | flow hash over the packet’s addresses or ports (2-tuple or 4-tuple hash | ||
120 | depending on the protocol). This serves as a consistent hash of the | ||
121 | associated flow of the packet. The hash is either provided by hardware | ||
122 | or will be computed in the stack. Capable hardware can pass the hash in | ||
123 | the receive descriptor for the packet; this would usually be the same | ||
124 | hash used for RSS (e.g. computed Toeplitz hash). The hash is saved in | ||
125 | skb->rx_hash and can be used elsewhere in the stack as a hash of the | ||
126 | packet’s flow. | ||
127 | |||
128 | Each receive hardware queue has an associated list of CPUs to which | ||
129 | RPS may enqueue packets for processing. For each received packet, | ||
130 | an index into the list is computed from the flow hash modulo the size | ||
131 | of the list. The indexed CPU is the target for processing the packet, | ||
132 | and the packet is queued to the tail of that CPU’s backlog queue. At | ||
133 | the end of the bottom half routine, IPIs are sent to any CPUs for which | ||
134 | packets have been queued to their backlog queue. The IPI wakes backlog | ||
135 | processing on the remote CPU, and any queued packets are then processed | ||
136 | up the networking stack. | ||
137 | |||
138 | ==== RPS Configuration | ||
139 | |||
140 | RPS requires a kernel compiled with the CONFIG_RPS kconfig symbol (on | ||
141 | by default for SMP). Even when compiled in, RPS remains disabled until | ||
142 | explicitly configured. The list of CPUs to which RPS may forward traffic | ||
143 | can be configured for each receive queue using a sysfs file entry: | ||
144 | |||
145 | /sys/class/net/<dev>/queues/rx-<n>/rps_cpus | ||
146 | |||
147 | This file implements a bitmap of CPUs. RPS is disabled when it is zero | ||
148 | (the default), in which case packets are processed on the interrupting | ||
149 | CPU. Documentation/IRQ-affinity.txt explains how CPUs are assigned to | ||
150 | the bitmap. | ||
151 | |||
152 | == Suggested Configuration | ||
153 | |||
154 | For a single queue device, a typical RPS configuration would be to set | ||
155 | the rps_cpus to the CPUs in the same memory domain of the interrupting | ||
156 | CPU. If NUMA locality is not an issue, this could also be all CPUs in | ||
157 | the system. At high interrupt rate, it might be wise to exclude the | ||
158 | interrupting CPU from the map since that already performs much work. | ||
159 | |||
160 | For a multi-queue system, if RSS is configured so that a hardware | ||
161 | receive queue is mapped to each CPU, then RPS is probably redundant | ||
162 | and unnecessary. If there are fewer hardware queues than CPUs, then | ||
163 | RPS might be beneficial if the rps_cpus for each queue are the ones that | ||
164 | share the same memory domain as the interrupting CPU for that queue. | ||
165 | |||
166 | |||
167 | RFS: Receive Flow Steering | ||
168 | ========================== | ||
169 | |||
170 | While RPS steers packets solely based on hash, and thus generally | ||
171 | provides good load distribution, it does not take into account | ||
172 | application locality. This is accomplished by Receive Flow Steering | ||
173 | (RFS). The goal of RFS is to increase datacache hitrate by steering | ||
174 | kernel processing of packets to the CPU where the application thread | ||
175 | consuming the packet is running. RFS relies on the same RPS mechanisms | ||
176 | to enqueue packets onto the backlog of another CPU and to wake up that | ||
177 | CPU. | ||
178 | |||
179 | In RFS, packets are not forwarded directly by the value of their hash, | ||
180 | but the hash is used as index into a flow lookup table. This table maps | ||
181 | flows to the CPUs where those flows are being processed. The flow hash | ||
182 | (see RPS section above) is used to calculate the index into this table. | ||
183 | The CPU recorded in each entry is the one which last processed the flow. | ||
184 | If an entry does not hold a valid CPU, then packets mapped to that entry | ||
185 | are steered using plain RPS. Multiple table entries may point to the | ||
186 | same CPU. Indeed, with many flows and few CPUs, it is very likely that | ||
187 | a single application thread handles flows with many different flow hashes. | ||
188 | |||
189 | rps_sock_flow_table is a global flow table that contains the *desired* CPU | ||
190 | for flows: the CPU that is currently processing the flow in userspace. | ||
191 | Each table value is a CPU index that is updated during calls to recvmsg | ||
192 | and sendmsg (specifically, inet_recvmsg(), inet_sendmsg(), inet_sendpage() | ||
193 | and tcp_splice_read()). | ||
194 | |||
195 | When the scheduler moves a thread to a new CPU while it has outstanding | ||
196 | receive packets on the old CPU, packets may arrive out of order. To | ||
197 | avoid this, RFS uses a second flow table to track outstanding packets | ||
198 | for each flow: rps_dev_flow_table is a table specific to each hardware | ||
199 | receive queue of each device. Each table value stores a CPU index and a | ||
200 | counter. The CPU index represents the *current* CPU onto which packets | ||
201 | for this flow are enqueued for further kernel processing. Ideally, kernel | ||
202 | and userspace processing occur on the same CPU, and hence the CPU index | ||
203 | in both tables is identical. This is likely false if the scheduler has | ||
204 | recently migrated a userspace thread while the kernel still has packets | ||
205 | enqueued for kernel processing on the old CPU. | ||
206 | |||
207 | The counter in rps_dev_flow_table values records the length of the current | ||
208 | CPU's backlog when a packet in this flow was last enqueued. Each backlog | ||
209 | queue has a head counter that is incremented on dequeue. A tail counter | ||
210 | is computed as head counter + queue length. In other words, the counter | ||
211 | in rps_dev_flow_table[i] records the last element in flow i that has | ||
212 | been enqueued onto the currently designated CPU for flow i (of course, | ||
213 | entry i is actually selected by hash and multiple flows may hash to the | ||
214 | same entry i). | ||
215 | |||
216 | And now the trick for avoiding out of order packets: when selecting the | ||
217 | CPU for packet processing (from get_rps_cpu()) the rps_sock_flow table | ||
218 | and the rps_dev_flow table of the queue that the packet was received on | ||
219 | are compared. If the desired CPU for the flow (found in the | ||
220 | rps_sock_flow table) matches the current CPU (found in the rps_dev_flow | ||
221 | table), the packet is enqueued onto that CPU’s backlog. If they differ, | ||
222 | the current CPU is updated to match the desired CPU if one of the | ||
223 | following is true: | ||
224 | |||
225 | - The current CPU's queue head counter >= the recorded tail counter | ||
226 | value in rps_dev_flow[i] | ||
227 | - The current CPU is unset (equal to NR_CPUS) | ||
228 | - The current CPU is offline | ||
229 | |||
230 | After this check, the packet is sent to the (possibly updated) current | ||
231 | CPU. These rules aim to ensure that a flow only moves to a new CPU when | ||
232 | there are no packets outstanding on the old CPU, as the outstanding | ||
233 | packets could arrive later than those about to be processed on the new | ||
234 | CPU. | ||
235 | |||
236 | ==== RFS Configuration | ||
237 | |||
238 | RFS is only available if the kconfig symbol CONFIG_RFS is enabled (on | ||
239 | by default for SMP). The functionality remains disabled until explicitly | ||
240 | configured. The number of entries in the global flow table is set through: | ||
241 | |||
242 | /proc/sys/net/core/rps_sock_flow_entries | ||
243 | |||
244 | The number of entries in the per-queue flow table are set through: | ||
245 | |||
246 | /sys/class/net/<dev>/queues/rx-<n>/rps_flow_cnt | ||
247 | |||
248 | == Suggested Configuration | ||
249 | |||
250 | Both of these need to be set before RFS is enabled for a receive queue. | ||
251 | Values for both are rounded up to the nearest power of two. The | ||
252 | suggested flow count depends on the expected number of active connections | ||
253 | at any given time, which may be significantly less than the number of open | ||
254 | connections. We have found that a value of 32768 for rps_sock_flow_entries | ||
255 | works fairly well on a moderately loaded server. | ||
256 | |||
257 | For a single queue device, the rps_flow_cnt value for the single queue | ||
258 | would normally be configured to the same value as rps_sock_flow_entries. | ||
259 | For a multi-queue device, the rps_flow_cnt for each queue might be | ||
260 | configured as rps_sock_flow_entries / N, where N is the number of | ||
261 | queues. So for instance, if rps_flow_entries is set to 32768 and there | ||
262 | are 16 configured receive queues, rps_flow_cnt for each queue might be | ||
263 | configured as 2048. | ||
264 | |||
265 | |||
266 | Accelerated RFS | ||
267 | =============== | ||
268 | |||
269 | Accelerated RFS is to RFS what RSS is to RPS: a hardware-accelerated load | ||
270 | balancing mechanism that uses soft state to steer flows based on where | ||
271 | the application thread consuming the packets of each flow is running. | ||
272 | Accelerated RFS should perform better than RFS since packets are sent | ||
273 | directly to a CPU local to the thread consuming the data. The target CPU | ||
274 | will either be the same CPU where the application runs, or at least a CPU | ||
275 | which is local to the application thread’s CPU in the cache hierarchy. | ||
276 | |||
277 | To enable accelerated RFS, the networking stack calls the | ||
278 | ndo_rx_flow_steer driver function to communicate the desired hardware | ||
279 | queue for packets matching a particular flow. The network stack | ||
280 | automatically calls this function every time a flow entry in | ||
281 | rps_dev_flow_table is updated. The driver in turn uses a device specific | ||
282 | method to program the NIC to steer the packets. | ||
283 | |||
284 | The hardware queue for a flow is derived from the CPU recorded in | ||
285 | rps_dev_flow_table. The stack consults a CPU to hardware queue map which | ||
286 | is maintained by the NIC driver. This is an auto-generated reverse map of | ||
287 | the IRQ affinity table shown by /proc/interrupts. Drivers can use | ||
288 | functions in the cpu_rmap (“CPU affinity reverse map”) kernel library | ||
289 | to populate the map. For each CPU, the corresponding queue in the map is | ||
290 | set to be one whose processing CPU is closest in cache locality. | ||
291 | |||
292 | ==== Accelerated RFS Configuration | ||
293 | |||
294 | Accelerated RFS is only available if the kernel is compiled with | ||
295 | CONFIG_RFS_ACCEL and support is provided by the NIC device and driver. | ||
296 | It also requires that ntuple filtering is enabled via ethtool. The map | ||
297 | of CPU to queues is automatically deduced from the IRQ affinities | ||
298 | configured for each receive queue by the driver, so no additional | ||
299 | configuration should be necessary. | ||
300 | |||
301 | == Suggested Configuration | ||
302 | |||
303 | This technique should be enabled whenever one wants to use RFS and the | ||
304 | NIC supports hardware acceleration. | ||
305 | |||
306 | XPS: Transmit Packet Steering | ||
307 | ============================= | ||
308 | |||
309 | Transmit Packet Steering is a mechanism for intelligently selecting | ||
310 | which transmit queue to use when transmitting a packet on a multi-queue | ||
311 | device. To accomplish this, a mapping from CPU to hardware queue(s) is | ||
312 | recorded. The goal of this mapping is usually to assign queues | ||
313 | exclusively to a subset of CPUs, where the transmit completions for | ||
314 | these queues are processed on a CPU within this set. This choice | ||
315 | provides two benefits. First, contention on the device queue lock is | ||
316 | significantly reduced since fewer CPUs contend for the same queue | ||
317 | (contention can be eliminated completely if each CPU has its own | ||
318 | transmit queue). Secondly, cache miss rate on transmit completion is | ||
319 | reduced, in particular for data cache lines that hold the sk_buff | ||
320 | structures. | ||
321 | |||
322 | XPS is configured per transmit queue by setting a bitmap of CPUs that | ||
323 | may use that queue to transmit. The reverse mapping, from CPUs to | ||
324 | transmit queues, is computed and maintained for each network device. | ||
325 | When transmitting the first packet in a flow, the function | ||
326 | get_xps_queue() is called to select a queue. This function uses the ID | ||
327 | of the running CPU as a key into the CPU-to-queue lookup table. If the | ||
328 | ID matches a single queue, that is used for transmission. If multiple | ||
329 | queues match, one is selected by using the flow hash to compute an index | ||
330 | into the set. | ||
331 | |||
332 | The queue chosen for transmitting a particular flow is saved in the | ||
333 | corresponding socket structure for the flow (e.g. a TCP connection). | ||
334 | This transmit queue is used for subsequent packets sent on the flow to | ||
335 | prevent out of order (ooo) packets. The choice also amortizes the cost | ||
336 | of calling get_xps_queues() over all packets in the flow. To avoid | ||
337 | ooo packets, the queue for a flow can subsequently only be changed if | ||
338 | skb->ooo_okay is set for a packet in the flow. This flag indicates that | ||
339 | there are no outstanding packets in the flow, so the transmit queue can | ||
340 | change without the risk of generating out of order packets. The | ||
341 | transport layer is responsible for setting ooo_okay appropriately. TCP, | ||
342 | for instance, sets the flag when all data for a connection has been | ||
343 | acknowledged. | ||
344 | |||
345 | ==== XPS Configuration | ||
346 | |||
347 | XPS is only available if the kconfig symbol CONFIG_XPS is enabled (on by | ||
348 | default for SMP). The functionality remains disabled until explicitly | ||
349 | configured. To enable XPS, the bitmap of CPUs that may use a transmit | ||
350 | queue is configured using the sysfs file entry: | ||
351 | |||
352 | /sys/class/net/<dev>/queues/tx-<n>/xps_cpus | ||
353 | |||
354 | == Suggested Configuration | ||
355 | |||
356 | For a network device with a single transmission queue, XPS configuration | ||
357 | has no effect, since there is no choice in this case. In a multi-queue | ||
358 | system, XPS is preferably configured so that each CPU maps onto one queue. | ||
359 | If there are as many queues as there are CPUs in the system, then each | ||
360 | queue can also map onto one CPU, resulting in exclusive pairings that | ||
361 | experience no contention. If there are fewer queues than CPUs, then the | ||
362 | best CPUs to share a given queue are probably those that share the cache | ||
363 | with the CPU that processes transmit completions for that queue | ||
364 | (transmit interrupts). | ||
365 | |||
366 | |||
367 | Further Information | ||
368 | =================== | ||
369 | RPS and RFS were introduced in kernel 2.6.35. XPS was incorporated into | ||
370 | 2.6.38. Original patches were submitted by Tom Herbert | ||
371 | (therbert@google.com) | ||
372 | |||
373 | Accelerated RFS was introduced in 2.6.35. Original patches were | ||
374 | submitted by Ben Hutchings (bhutchings@solarflare.com) | ||
375 | |||
376 | Authors: | ||
377 | Tom Herbert (therbert@google.com) | ||
378 | Willem de Bruijn (willemb@google.com) | ||
diff --git a/Documentation/networking/stmmac.txt b/Documentation/networking/stmmac.txt index 80a7a345490..57a24108b84 100644 --- a/Documentation/networking/stmmac.txt +++ b/Documentation/networking/stmmac.txt | |||
@@ -7,7 +7,7 @@ This is the driver for the MAC 10/100/1000 on-chip Ethernet controllers | |||
7 | (Synopsys IP blocks); it has been fully tested on STLinux platforms. | 7 | (Synopsys IP blocks); it has been fully tested on STLinux platforms. |
8 | 8 | ||
9 | Currently this network device driver is for all STM embedded MAC/GMAC | 9 | Currently this network device driver is for all STM embedded MAC/GMAC |
10 | (7xxx SoCs). Other platforms start using it i.e. ARM SPEAr. | 10 | (i.e. 7xxx/5xxx SoCs) and it's known working on other platforms i.e. ARM SPEAr. |
11 | 11 | ||
12 | DWC Ether MAC 10/100/1000 Universal version 3.41a and DWC Ether MAC 10/100 | 12 | DWC Ether MAC 10/100/1000 Universal version 3.41a and DWC Ether MAC 10/100 |
13 | Universal version 4.0 have been used for developing the first code | 13 | Universal version 4.0 have been used for developing the first code |
@@ -71,7 +71,7 @@ Several performance tests on STM platforms showed this optimisation allows to sp | |||
71 | the CPU while having the maximum throughput. | 71 | the CPU while having the maximum throughput. |
72 | 72 | ||
73 | 4.4) WOL | 73 | 4.4) WOL |
74 | Wake up on Lan feature through Magic Frame is only supported for the GMAC | 74 | Wake up on Lan feature through Magic and Unicast frames are supported for the GMAC |
75 | core. | 75 | core. |
76 | 76 | ||
77 | 4.5) DMA descriptors | 77 | 4.5) DMA descriptors |
@@ -91,11 +91,15 @@ LRO is not supported. | |||
91 | The driver is compatible with PAL to work with PHY and GPHY devices. | 91 | The driver is compatible with PAL to work with PHY and GPHY devices. |
92 | 92 | ||
93 | 4.9) Platform information | 93 | 4.9) Platform information |
94 | Several information came from the platform; please refer to the | 94 | Several driver's information can be passed through the platform |
95 | driver's Header file in include/linux directory. | 95 | These are included in the include/linux/stmmac.h header file |
96 | and detailed below as well: | ||
96 | 97 | ||
97 | struct plat_stmmacenet_data { | 98 | struct plat_stmmacenet_data { |
98 | int bus_id; | 99 | int bus_id; |
100 | int phy_addr; | ||
101 | int interface; | ||
102 | struct stmmac_mdio_bus_data *mdio_bus_data; | ||
99 | int pbl; | 103 | int pbl; |
100 | int clk_csr; | 104 | int clk_csr; |
101 | int has_gmac; | 105 | int has_gmac; |
@@ -103,67 +107,135 @@ struct plat_stmmacenet_data { | |||
103 | int tx_coe; | 107 | int tx_coe; |
104 | int bugged_jumbo; | 108 | int bugged_jumbo; |
105 | int pmt; | 109 | int pmt; |
106 | void (*fix_mac_speed)(void *priv, unsigned int speed); | 110 | int force_sf_dma_mode; |
107 | void (*bus_setup)(unsigned long ioaddr); | 111 | void (*fix_mac_speed)(void *priv, unsigned int speed); |
108 | #ifdef CONFIG_STM_DRIVERS | 112 | void (*bus_setup)(void __iomem *ioaddr); |
109 | struct stm_pad_config *pad_config; | 113 | int (*init)(struct platform_device *pdev); |
110 | #endif | 114 | void (*exit)(struct platform_device *pdev); |
111 | void *bsp_priv; | 115 | void *bsp_priv; |
112 | }; | 116 | }; |
113 | 117 | ||
114 | Where: | 118 | Where: |
115 | - pbl (Programmable Burst Length) is maximum number of | 119 | o bus_id: bus identifier. |
116 | beats to be transferred in one DMA transaction. | 120 | o phy_addr: the physical address can be passed from the platform. |
117 | GMAC also enables the 4xPBL by default. | 121 | If it is set to -1 the driver will automatically |
118 | - fix_mac_speed and bus_setup are used to configure internal target | 122 | detect it at run-time by probing all the 32 addresses. |
119 | registers (on STM platforms); | 123 | o interface: PHY device's interface. |
120 | - has_gmac: GMAC core is on board (get it at run-time in the next step); | 124 | o mdio_bus_data: specific platform fields for the MDIO bus. |
121 | - bus_id: bus identifier. | 125 | o pbl: the Programmable Burst Length is maximum number of beats to |
122 | - tx_coe: core is able to perform the tx csum in HW. | 126 | be transferred in one DMA transaction. |
123 | - enh_desc: if sets the MAC will use the enhanced descriptor structure. | 127 | GMAC also enables the 4xPBL by default. |
124 | - clk_csr: CSR Clock range selection. | 128 | o clk_csr: CSR Clock range selection. |
125 | - bugged_jumbo: some HWs are not able to perform the csum in HW for | 129 | o has_gmac: uses the GMAC core. |
126 | over-sized frames due to limited buffer sizes. Setting this | 130 | o enh_desc: if sets the MAC will use the enhanced descriptor structure. |
127 | flag the csum will be done in SW on JUMBO frames. | 131 | o tx_coe: core is able to perform the tx csum in HW. |
128 | 132 | o bugged_jumbo: some HWs are not able to perform the csum in HW for | |
129 | struct plat_stmmacphy_data { | 133 | over-sized frames due to limited buffer sizes. |
130 | int bus_id; | 134 | Setting this flag the csum will be done in SW on |
131 | int phy_addr; | 135 | JUMBO frames. |
132 | unsigned int phy_mask; | 136 | o pmt: core has the embedded power module (optional). |
133 | int interface; | 137 | o force_sf_dma_mode: force DMA to use the Store and Forward mode |
134 | int (*phy_reset)(void *priv); | 138 | instead of the Threshold. |
135 | void *priv; | 139 | o fix_mac_speed: this callback is used for modifying some syscfg registers |
136 | }; | 140 | (on ST SoCs) according to the link speed negotiated by the |
141 | physical layer . | ||
142 | o bus_setup: perform HW setup of the bus. For example, on some ST platforms | ||
143 | this field is used to configure the AMBA bridge to generate more | ||
144 | efficient STBus traffic. | ||
145 | o init/exit: callbacks used for calling a custom initialisation; | ||
146 | this is sometime necessary on some platforms (e.g. ST boxes) | ||
147 | where the HW needs to have set some PIO lines or system cfg | ||
148 | registers. | ||
149 | o custom_cfg: this is a custom configuration that can be passed while | ||
150 | initialising the resources. | ||
151 | |||
152 | The we have: | ||
153 | |||
154 | struct stmmac_mdio_bus_data { | ||
155 | int bus_id; | ||
156 | int (*phy_reset)(void *priv); | ||
157 | unsigned int phy_mask; | ||
158 | int *irqs; | ||
159 | int probed_phy_irq; | ||
160 | }; | ||
137 | 161 | ||
138 | Where: | 162 | Where: |
139 | - bus_id: bus identifier; | 163 | o bus_id: bus identifier; |
140 | - phy_addr: physical address used for the attached phy device; | 164 | o phy_reset: hook to reset the phy device attached to the bus. |
141 | set it to -1 to get it at run-time; | 165 | o phy_mask: phy mask passed when register the MDIO bus within the driver. |
142 | - interface: physical MII interface mode; | 166 | o irqs: list of IRQs, one per PHY. |
143 | - phy_reset: hook to reset HW function. | 167 | o probed_phy_irq: if irqs is NULL, use this for probed PHY. |
144 | 168 | ||
145 | SOURCES: | 169 | Below an example how the structures above are using on ST platforms. |
146 | - Kconfig | 170 | |
147 | - Makefile | 171 | static struct plat_stmmacenet_data stxYYY_ethernet_platform_data = { |
148 | - stmmac_main.c: main network device driver; | 172 | .pbl = 32, |
149 | - stmmac_mdio.c: mdio functions; | 173 | .has_gmac = 0, |
150 | - stmmac_ethtool.c: ethtool support; | 174 | .enh_desc = 0, |
151 | - stmmac_timer.[ch]: timer code used for mitigating the driver dma interrupts | 175 | .fix_mac_speed = stxYYY_ethernet_fix_mac_speed, |
152 | Only tested on ST40 platforms based. | 176 | | |
153 | - stmmac.h: private driver structure; | 177 | |-> to write an internal syscfg |
154 | - common.h: common definitions and VFTs; | 178 | | on this platform when the |
155 | - descs.h: descriptor structure definitions; | 179 | | link speed changes from 10 to |
156 | - dwmac1000_core.c: GMAC core functions; | 180 | | 100 and viceversa |
157 | - dwmac1000_dma.c: dma functions for the GMAC chip; | 181 | .init = &stmmac_claim_resource, |
158 | - dwmac1000.h: specific header file for the GMAC; | 182 | | |
159 | - dwmac100_core: MAC 100 core and dma code; | 183 | |-> On ST SoC this calls own "PAD" |
160 | - dwmac100_dma.c: dma funtions for the MAC chip; | 184 | | manager framework to claim |
161 | - dwmac1000.h: specific header file for the MAC; | 185 | | all the resources necessary |
162 | - dwmac_lib.c: generic DMA functions shared among chips | 186 | | (GPIO ...). The .custom_cfg field |
163 | - enh_desc.c: functions for handling enhanced descriptors | 187 | | is used to pass a custom config. |
164 | - norm_desc.c: functions for handling normal descriptors | 188 | }; |
165 | 189 | ||
166 | TODO: | 190 | Below the usage of the stmmac_mdio_bus_data: on this SoC, in fact, |
167 | - XGMAC controller is not supported. | 191 | there are two MAC cores: one MAC is for MDIO Bus/PHY emulation |
168 | - Review the timer optimisation code to use an embedded device that seems to be | 192 | with fixed_link support. |
193 | |||
194 | static struct stmmac_mdio_bus_data stmmac1_mdio_bus = { | ||
195 | .bus_id = 1, | ||
196 | | | ||
197 | |-> phy device on the bus_id 1 | ||
198 | .phy_reset = phy_reset; | ||
199 | | | ||
200 | |-> function to provide the phy_reset on this board | ||
201 | .phy_mask = 0, | ||
202 | }; | ||
203 | |||
204 | static struct fixed_phy_status stmmac0_fixed_phy_status = { | ||
205 | .link = 1, | ||
206 | .speed = 100, | ||
207 | .duplex = 1, | ||
208 | }; | ||
209 | |||
210 | During the board's device_init we can configure the first | ||
211 | MAC for fixed_link by calling: | ||
212 | fixed_phy_add(PHY_POLL, 1, &stmmac0_fixed_phy_status));) | ||
213 | and the second one, with a real PHY device attached to the bus, | ||
214 | by using the stmmac_mdio_bus_data structure (to provide the id, the | ||
215 | reset procedure etc). | ||
216 | |||
217 | 4.10) List of source files: | ||
218 | o Kconfig | ||
219 | o Makefile | ||
220 | o stmmac_main.c: main network device driver; | ||
221 | o stmmac_mdio.c: mdio functions; | ||
222 | o stmmac_ethtool.c: ethtool support; | ||
223 | o stmmac_timer.[ch]: timer code used for mitigating the driver dma interrupts | ||
224 | Only tested on ST40 platforms based. | ||
225 | o stmmac.h: private driver structure; | ||
226 | o common.h: common definitions and VFTs; | ||
227 | o descs.h: descriptor structure definitions; | ||
228 | o dwmac1000_core.c: GMAC core functions; | ||
229 | o dwmac1000_dma.c: dma functions for the GMAC chip; | ||
230 | o dwmac1000.h: specific header file for the GMAC; | ||
231 | o dwmac100_core: MAC 100 core and dma code; | ||
232 | o dwmac100_dma.c: dma funtions for the MAC chip; | ||
233 | o dwmac1000.h: specific header file for the MAC; | ||
234 | o dwmac_lib.c: generic DMA functions shared among chips | ||
235 | o enh_desc.c: functions for handling enhanced descriptors | ||
236 | o norm_desc.c: functions for handling normal descriptors | ||
237 | |||
238 | 5) TODO: | ||
239 | o XGMAC is not supported. | ||
240 | o Review the timer optimisation code to use an embedded device that will be | ||
169 | available in new chip generations. | 241 | available in new chip generations. |