diff options
175 files changed, 6839 insertions, 5527 deletions
diff --git a/Documentation/networking/e100.txt b/Documentation/networking/e100.txt index 4ef9f7cd5dc3..944aa55e79f8 100644 --- a/Documentation/networking/e100.txt +++ b/Documentation/networking/e100.txt | |||
@@ -1,16 +1,17 @@ | |||
1 | Linux* Base Driver for the Intel(R) PRO/100 Family of Adapters | 1 | Linux* Base Driver for the Intel(R) PRO/100 Family of Adapters |
2 | ============================================================== | 2 | ============================================================== |
3 | 3 | ||
4 | November 17, 2004 | 4 | November 15, 2005 |
5 | |||
6 | 5 | ||
7 | Contents | 6 | Contents |
8 | ======== | 7 | ======== |
9 | 8 | ||
10 | - In This Release | 9 | - In This Release |
11 | - Identifying Your Adapter | 10 | - Identifying Your Adapter |
11 | - Building and Installation | ||
12 | - Driver Configuration Parameters | 12 | - Driver Configuration Parameters |
13 | - Additional Configurations | 13 | - Additional Configurations |
14 | - Known Issues | ||
14 | - Support | 15 | - Support |
15 | 16 | ||
16 | 17 | ||
@@ -18,18 +19,30 @@ In This Release | |||
18 | =============== | 19 | =============== |
19 | 20 | ||
20 | This file describes the Linux* Base Driver for the Intel(R) PRO/100 Family of | 21 | This file describes the Linux* Base Driver for the Intel(R) PRO/100 Family of |
21 | Adapters, version 3.3.x. This driver supports 2.4.x and 2.6.x kernels. | 22 | Adapters. This driver includes support for Itanium(R)2-based systems. |
23 | |||
24 | For questions related to hardware requirements, refer to the documentation | ||
25 | supplied with your Intel PRO/100 adapter. | ||
26 | |||
27 | The following features are now available in supported kernels: | ||
28 | - Native VLANs | ||
29 | - Channel Bonding (teaming) | ||
30 | - SNMP | ||
31 | |||
32 | Channel Bonding documentation can be found in the Linux kernel source: | ||
33 | /Documentation/networking/bonding.txt | ||
34 | |||
22 | 35 | ||
23 | Identifying Your Adapter | 36 | Identifying Your Adapter |
24 | ======================== | 37 | ======================== |
25 | 38 | ||
26 | For more information on how to identify your adapter, go to the Adapter & | 39 | For more information on how to identify your adapter, go to the Adapter & |
27 | Driver ID Guide at: | 40 | Driver ID Guide at: |
28 | 41 | ||
29 | http://support.intel.com/support/network/adapter/pro100/21397.htm | 42 | http://support.intel.com/support/network/adapter/pro100/21397.htm |
30 | 43 | ||
31 | For the latest Intel network drivers for Linux, refer to the following | 44 | For the latest Intel network drivers for Linux, refer to the following |
32 | website. In the search field, enter your adapter name or type, or use the | 45 | website. In the search field, enter your adapter name or type, or use the |
33 | networking link on the left to search for your adapter: | 46 | networking link on the left to search for your adapter: |
34 | 47 | ||
35 | http://downloadfinder.intel.com/scripts-df/support_intel.asp | 48 | http://downloadfinder.intel.com/scripts-df/support_intel.asp |
@@ -40,73 +53,75 @@ Driver Configuration Parameters | |||
40 | The default value for each parameter is generally the recommended setting, | 53 | The default value for each parameter is generally the recommended setting, |
41 | unless otherwise noted. | 54 | unless otherwise noted. |
42 | 55 | ||
43 | Rx Descriptors: Number of receive descriptors. A receive descriptor is a data | 56 | Rx Descriptors: Number of receive descriptors. A receive descriptor is a data |
44 | structure that describes a receive buffer and its attributes to the network | 57 | structure that describes a receive buffer and its attributes to the network |
45 | controller. The data in the descriptor is used by the controller to write | 58 | controller. The data in the descriptor is used by the controller to write |
46 | data from the controller to host memory. In the 3.0.x driver the valid | 59 | data from the controller to host memory. In the 3.x.x driver the valid range |
47 | range for this parameter is 64-256. The default value is 64. This parameter | 60 | for this parameter is 64-256. The default value is 64. This parameter can be |
48 | can be changed using the command | 61 | changed using the command: |
49 | 62 | ||
50 | ethtool -G eth? rx n, where n is the number of desired rx descriptors. | 63 | ethtool -G eth? rx n, where n is the number of desired rx descriptors. |
51 | 64 | ||
52 | Tx Descriptors: Number of transmit descriptors. A transmit descriptor is a | 65 | Tx Descriptors: Number of transmit descriptors. A transmit descriptor is a data |
53 | data structure that describes a transmit buffer and its attributes to the | 66 | structure that describes a transmit buffer and its attributes to the network |
54 | network controller. The data in the descriptor is used by the controller to | 67 | controller. The data in the descriptor is used by the controller to read |
55 | read data from the host memory to the controller. In the 3.0.x driver the | 68 | data from the host memory to the controller. In the 3.x.x driver the valid |
56 | valid range for this parameter is 64-256. The default value is 64. This | 69 | range for this parameter is 64-256. The default value is 64. This parameter |
57 | parameter can be changed using the command | 70 | can be changed using the command: |
58 | 71 | ||
59 | ethtool -G eth? tx n, where n is the number of desired tx descriptors. | 72 | ethtool -G eth? tx n, where n is the number of desired tx descriptors. |
60 | 73 | ||
61 | Speed/Duplex: The driver auto-negotiates the link speed and duplex settings by | 74 | Speed/Duplex: The driver auto-negotiates the link speed and duplex settings by |
62 | default. Ethtool can be used as follows to force speed/duplex. | 75 | default. Ethtool can be used as follows to force speed/duplex. |
63 | 76 | ||
64 | ethtool -s eth? autoneg off speed {10|100} duplex {full|half} | 77 | ethtool -s eth? autoneg off speed {10|100} duplex {full|half} |
65 | 78 | ||
66 | NOTE: setting the speed/duplex to incorrect values will cause the link to | 79 | NOTE: setting the speed/duplex to incorrect values will cause the link to |
67 | fail. | 80 | fail. |
68 | 81 | ||
69 | Event Log Message Level: The driver uses the message level flag to log events | 82 | Event Log Message Level: The driver uses the message level flag to log events |
70 | to syslog. The message level can be set at driver load time. It can also be | 83 | to syslog. The message level can be set at driver load time. It can also be |
71 | set using the command | 84 | set using the command: |
72 | 85 | ||
73 | ethtool -s eth? msglvl n | 86 | ethtool -s eth? msglvl n |
74 | 87 | ||
88 | |||
75 | Additional Configurations | 89 | Additional Configurations |
76 | ========================= | 90 | ========================= |
77 | 91 | ||
78 | Configuring the Driver on Different Distributions | 92 | Configuring the Driver on Different Distributions |
79 | ------------------------------------------------- | 93 | ------------------------------------------------- |
80 | 94 | ||
81 | Configuring a network driver to load properly when the system is started is | 95 | Configuring a network driver to load properly when the system is started is |
82 | distribution dependent. Typically, the configuration process involves adding | 96 | distribution dependent. Typically, the configuration process involves adding |
83 | an alias line to /etc/modules.conf as well as editing other system startup | 97 | an alias line to /etc/modules.conf or /etc/modprobe.conf as well as editing |
84 | scripts and/or configuration files. Many popular Linux distributions ship | 98 | other system startup scripts and/or configuration files. Many popular Linux |
85 | with tools to make these changes for you. To learn the proper way to | 99 | distributions ship with tools to make these changes for you. To learn the |
86 | configure a network device for your system, refer to your distribution | 100 | proper way to configure a network device for your system, refer to your |
87 | documentation. If during this process you are asked for the driver or module | 101 | distribution documentation. If during this process you are asked for the |
88 | name, the name for the Linux Base Driver for the Intel PRO/100 Family of | 102 | driver or module name, the name for the Linux Base Driver for the Intel |
89 | Adapters is e100. | 103 | PRO/100 Family of Adapters is e100. |
90 | 104 | ||
91 | As an example, if you install the e100 driver for two PRO/100 adapters | 105 | As an example, if you install the e100 driver for two PRO/100 adapters |
92 | (eth0 and eth1), add the following to modules.conf: | 106 | (eth0 and eth1), add the following to modules.conf or modprobe.conf: |
93 | 107 | ||
94 | alias eth0 e100 | 108 | alias eth0 e100 |
95 | alias eth1 e100 | 109 | alias eth1 e100 |
96 | 110 | ||
97 | Viewing Link Messages | 111 | Viewing Link Messages |
98 | --------------------- | 112 | --------------------- |
99 | In order to see link messages and other Intel driver information on your | 113 | In order to see link messages and other Intel driver information on your |
100 | console, you must set the dmesg level up to six. This can be done by | 114 | console, you must set the dmesg level up to six. This can be done by |
101 | entering the following on the command line before loading the e100 driver: | 115 | entering the following on the command line before loading the e100 driver: |
102 | 116 | ||
103 | dmesg -n 8 | 117 | dmesg -n 8 |
104 | 118 | ||
105 | If you wish to see all messages issued by the driver, including debug | 119 | If you wish to see all messages issued by the driver, including debug |
106 | messages, set the dmesg level to eight. | 120 | messages, set the dmesg level to eight. |
107 | 121 | ||
108 | NOTE: This setting is not saved across reboots. | 122 | NOTE: This setting is not saved across reboots. |
109 | 123 | ||
124 | |||
110 | Ethtool | 125 | Ethtool |
111 | ------- | 126 | ------- |
112 | 127 | ||
@@ -114,29 +129,27 @@ Additional Configurations | |||
114 | diagnostics, as well as displaying statistical information. Ethtool | 129 | diagnostics, as well as displaying statistical information. Ethtool |
115 | version 1.6 or later is required for this functionality. | 130 | version 1.6 or later is required for this functionality. |
116 | 131 | ||
117 | The latest release of ethtool can be found at: | 132 | The latest release of ethtool can be found from |
118 | http://sf.net/projects/gkernel. | 133 | http://sourceforge.net/projects/gkernel. |
119 | 134 | ||
120 | NOTE: This driver uses mii support from the kernel. As a result, when | 135 | NOTE: Ethtool 1.6 only supports a limited set of ethtool options. Support |
121 | there is no link, ethtool will report speed/duplex to be 10/half. | 136 | for a more complete ethtool feature set can be enabled by upgrading |
137 | ethtool to ethtool-1.8.1. | ||
122 | 138 | ||
123 | NOTE: Ethtool 1.6 only supports a limited set of ethtool options. Support | ||
124 | for a more complete ethtool feature set can be enabled by upgrading | ||
125 | ethtool to ethtool-1.8.1. | ||
126 | 139 | ||
127 | Enabling Wake on LAN* (WoL) | 140 | Enabling Wake on LAN* (WoL) |
128 | --------------------------- | 141 | --------------------------- |
129 | WoL is provided through the Ethtool* utility. Ethtool is included with Red | 142 | WoL is provided through the Ethtool* utility. Ethtool is included with Red |
130 | Hat* 8.0. For other Linux distributions, download and install Ethtool from | 143 | Hat* 8.0. For other Linux distributions, download and install Ethtool from |
131 | the following website: http://sourceforge.net/projects/gkernel. | 144 | the following website: http://sourceforge.net/projects/gkernel. |
132 | 145 | ||
133 | For instructions on enabling WoL with Ethtool, refer to the Ethtool man | 146 | For instructions on enabling WoL with Ethtool, refer to the Ethtool man page. |
134 | page. | ||
135 | 147 | ||
136 | WoL will be enabled on the system during the next shut down or reboot. For | 148 | WoL will be enabled on the system during the next shut down or reboot. For |
137 | this driver version, in order to enable WoL, the e100 driver must be | 149 | this driver version, in order to enable WoL, the e100 driver must be |
138 | loaded when shutting down or rebooting the system. | 150 | loaded when shutting down or rebooting the system. |
139 | 151 | ||
152 | |||
140 | NAPI | 153 | NAPI |
141 | ---- | 154 | ---- |
142 | 155 | ||
@@ -144,6 +157,25 @@ Additional Configurations | |||
144 | 157 | ||
145 | See www.cyberus.ca/~hadi/usenix-paper.tgz for more information on NAPI. | 158 | See www.cyberus.ca/~hadi/usenix-paper.tgz for more information on NAPI. |
146 | 159 | ||
160 | Multiple Interfaces on Same Ethernet Broadcast Network | ||
161 | ------------------------------------------------------ | ||
162 | |||
163 | Due to the default ARP behavior on Linux, it is not possible to have | ||
164 | one system on two IP networks in the same Ethernet broadcast domain | ||
165 | (non-partitioned switch) behave as expected. All Ethernet interfaces | ||
166 | will respond to IP traffic for any IP address assigned to the system. | ||
167 | This results in unbalanced receive traffic. | ||
168 | |||
169 | If you have multiple interfaces in a server, either turn on ARP | ||
170 | filtering by | ||
171 | |||
172 | (1) entering: echo 1 > /proc/sys/net/ipv4/conf/all/arp_filter | ||
173 | (this only works if your kernel's version is higher than 2.4.5), or | ||
174 | |||
175 | (2) installing the interfaces in separate broadcast domains (either | ||
176 | in different switches or in a switch partitioned to VLANs). | ||
177 | |||
178 | |||
147 | Support | 179 | Support |
148 | ======= | 180 | ======= |
149 | 181 | ||
@@ -151,20 +183,24 @@ For general information, go to the Intel support website at: | |||
151 | 183 | ||
152 | http://support.intel.com | 184 | http://support.intel.com |
153 | 185 | ||
186 | or the Intel Wired Networking project hosted by Sourceforge at: | ||
187 | |||
188 | http://sourceforge.net/projects/e1000 | ||
189 | |||
154 | If an issue is identified with the released source code on the supported | 190 | If an issue is identified with the released source code on the supported |
155 | kernel with a supported adapter, email the specific information related to | 191 | kernel with a supported adapter, email the specific information related to the |
156 | the issue to linux.nics@intel.com. | 192 | issue to e1000-devel@lists.sourceforge.net. |
157 | 193 | ||
158 | 194 | ||
159 | License | 195 | License |
160 | ======= | 196 | ======= |
161 | 197 | ||
162 | This software program is released under the terms of a license agreement | 198 | This software program is released under the terms of a license agreement |
163 | between you ('Licensee') and Intel. Do not use or load this software or any | 199 | between you ('Licensee') and Intel. Do not use or load this software or any |
164 | associated materials (collectively, the 'Software') until you have carefully | 200 | associated materials (collectively, the 'Software') until you have carefully |
165 | read the full terms and conditions of the LICENSE located in this software | 201 | read the full terms and conditions of the file COPYING located in this software |
166 | package. By loading or using the Software, you agree to the terms of this | 202 | package. By loading or using the Software, you agree to the terms of this |
167 | Agreement. If you do not agree with the terms of this Agreement, do not | 203 | Agreement. If you do not agree with the terms of this Agreement, do not install |
168 | install or use the Software. | 204 | or use the Software. |
169 | 205 | ||
170 | * Other names and brands may be claimed as the property of others. | 206 | * Other names and brands may be claimed as the property of others. |
diff --git a/Documentation/networking/e1000.txt b/Documentation/networking/e1000.txt index 2ebd4058d46d..71fe15af356c 100644 --- a/Documentation/networking/e1000.txt +++ b/Documentation/networking/e1000.txt | |||
@@ -1,7 +1,7 @@ | |||
1 | Linux* Base Driver for the Intel(R) PRO/1000 Family of Adapters | 1 | Linux* Base Driver for the Intel(R) PRO/1000 Family of Adapters |
2 | =============================================================== | 2 | =============================================================== |
3 | 3 | ||
4 | November 17, 2004 | 4 | November 15, 2005 |
5 | 5 | ||
6 | 6 | ||
7 | Contents | 7 | Contents |
@@ -20,254 +20,316 @@ In This Release | |||
20 | =============== | 20 | =============== |
21 | 21 | ||
22 | This file describes the Linux* Base Driver for the Intel(R) PRO/1000 Family | 22 | This file describes the Linux* Base Driver for the Intel(R) PRO/1000 Family |
23 | of Adapters, version 5.x.x. | 23 | of Adapters. This driver includes support for Itanium(R)2-based systems. |
24 | 24 | ||
25 | For questions related to hardware requirements, refer to the documentation | 25 | For questions related to hardware requirements, refer to the documentation |
26 | supplied with your Intel PRO/1000 adapter. All hardware requirements listed | 26 | supplied with your Intel PRO/1000 adapter. All hardware requirements listed |
27 | apply to use with Linux. | 27 | apply to use with Linux. |
28 | 28 | ||
29 | Native VLANs are now available with supported kernels. | 29 | The following features are now available in supported kernels: |
30 | - Native VLANs | ||
31 | - Channel Bonding (teaming) | ||
32 | - SNMP | ||
33 | |||
34 | Channel Bonding documentation can be found in the Linux kernel source: | ||
35 | /Documentation/networking/bonding.txt | ||
36 | |||
37 | The driver information previously displayed in the /proc filesystem is not | ||
38 | supported in this release. Alternatively, you can use ethtool (version 1.6 | ||
39 | or later), lspci, and ifconfig to obtain the same information. | ||
40 | |||
41 | Instructions on updating ethtool can be found in the section "Additional | ||
42 | Configurations" later in this document. | ||
43 | |||
30 | 44 | ||
31 | Identifying Your Adapter | 45 | Identifying Your Adapter |
32 | ======================== | 46 | ======================== |
33 | 47 | ||
34 | For more information on how to identify your adapter, go to the Adapter & | 48 | For more information on how to identify your adapter, go to the Adapter & |
35 | Driver ID Guide at: | 49 | Driver ID Guide at: |
36 | 50 | ||
37 | http://support.intel.com/support/network/adapter/pro100/21397.htm | 51 | http://support.intel.com/support/network/adapter/pro100/21397.htm |
38 | 52 | ||
39 | For the latest Intel network drivers for Linux, refer to the following | 53 | For the latest Intel network drivers for Linux, refer to the following |
40 | website. In the search field, enter your adapter name or type, or use the | 54 | website. In the search field, enter your adapter name or type, or use the |
41 | networking link on the left to search for your adapter: | 55 | networking link on the left to search for your adapter: |
42 | 56 | ||
43 | http://downloadfinder.intel.com/scripts-df/support_intel.asp | 57 | http://downloadfinder.intel.com/scripts-df/support_intel.asp |
44 | 58 | ||
45 | Command Line Parameters | ||
46 | ======================= | ||
47 | 59 | ||
48 | If the driver is built as a module, the following optional parameters are | 60 | Command Line Parameters ======================= |
49 | used by entering them on the command line with the modprobe or insmod command | 61 | |
50 | using this syntax: | 62 | If the driver is built as a module, the following optional parameters |
63 | are used by entering them on the command line with the modprobe or insmod | ||
64 | command using this syntax: | ||
51 | 65 | ||
52 | modprobe e1000 [<option>=<VAL1>,<VAL2>,...] | 66 | modprobe e1000 [<option>=<VAL1>,<VAL2>,...] |
53 | 67 | ||
54 | insmod e1000 [<option>=<VAL1>,<VAL2>,...] | 68 | insmod e1000 [<option>=<VAL1>,<VAL2>,...] |
55 | 69 | ||
56 | For example, with two PRO/1000 PCI adapters, entering: | 70 | For example, with two PRO/1000 PCI adapters, entering: |
57 | 71 | ||
58 | insmod e1000 TxDescriptors=80,128 | 72 | insmod e1000 TxDescriptors=80,128 |
59 | 73 | ||
60 | loads the e1000 driver with 80 TX descriptors for the first adapter and 128 TX | 74 | loads the e1000 driver with 80 TX descriptors for the first adapter and 128 |
61 | descriptors for the second adapter. | 75 | TX descriptors for the second adapter. |
62 | 76 | ||
63 | The default value for each parameter is generally the recommended setting, | 77 | The default value for each parameter is generally the recommended setting, |
64 | unless otherwise noted. Also, if the driver is statically built into the | 78 | unless otherwise noted. |
65 | kernel, the driver is loaded with the default values for all the parameters. | 79 | |
66 | Ethtool can be used to change some of the parameters at runtime. | 80 | NOTES: For more information about the AutoNeg, Duplex, and Speed |
81 | parameters, see the "Speed and Duplex Configuration" section in | ||
82 | this document. | ||
67 | 83 | ||
68 | NOTES: For more information about the AutoNeg, Duplex, and Speed | 84 | For more information about the InterruptThrottleRate, |
69 | parameters, see the "Speed and Duplex Configuration" section in | 85 | RxIntDelay, TxIntDelay, RxAbsIntDelay, and TxAbsIntDelay |
70 | this document. | 86 | parameters, see the application note at: |
87 | http://www.intel.com/design/network/applnots/ap450.htm | ||
71 | 88 | ||
72 | For more information about the InterruptThrottleRate, RxIntDelay, | 89 | A descriptor describes a data buffer and attributes related to |
73 | TxIntDelay, RxAbsIntDelay, and TxAbsIntDelay parameters, see the | 90 | the data buffer. This information is accessed by the hardware. |
74 | application note at: | ||
75 | http://www.intel.com/design/network/applnots/ap450.htm | ||
76 | 91 | ||
77 | A descriptor describes a data buffer and attributes related to the | ||
78 | data buffer. This information is accessed by the hardware. | ||
79 | 92 | ||
80 | AutoNeg (adapters using copper connections only) | 93 | AutoNeg |
81 | Valid Range: 0x01-0x0F, 0x20-0x2F | 94 | ------- |
95 | (Supported only on adapters with copper connections) | ||
96 | Valid Range: 0x01-0x0F, 0x20-0x2F | ||
82 | Default Value: 0x2F | 97 | Default Value: 0x2F |
83 | This parameter is a bit mask that specifies which speed and duplex | 98 | |
84 | settings the board advertises. When this parameter is used, the Speed and | 99 | This parameter is a bit mask that specifies which speed and duplex |
85 | Duplex parameters must not be specified. | 100 | settings the board advertises. When this parameter is used, the Speed |
86 | NOTE: Refer to the Speed and Duplex section of this readme for more | 101 | and Duplex parameters must not be specified. |
87 | information on the AutoNeg parameter. | 102 | |
88 | 103 | NOTE: Refer to the Speed and Duplex section of this readme for more | |
89 | Duplex (adapters using copper connections only) | 104 | information on the AutoNeg parameter. |
90 | Valid Range: 0-2 (0=auto-negotiate, 1=half, 2=full) | 105 | |
106 | |||
107 | Duplex | ||
108 | ------ | ||
109 | (Supported only on adapters with copper connections) | ||
110 | Valid Range: 0-2 (0=auto-negotiate, 1=half, 2=full) | ||
91 | Default Value: 0 | 111 | Default Value: 0 |
92 | Defines the direction in which data is allowed to flow. Can be either one | 112 | |
93 | or two-directional. If both Duplex and the link partner are set to auto- | 113 | Defines the direction in which data is allowed to flow. Can be either |
94 | negotiate, the board auto-detects the correct duplex. If the link partner | 114 | one or two-directional. If both Duplex and the link partner are set to |
95 | is forced (either full or half), Duplex defaults to half-duplex. | 115 | auto-negotiate, the board auto-detects the correct duplex. If the link |
116 | partner is forced (either full or half), Duplex defaults to half-duplex. | ||
117 | |||
96 | 118 | ||
97 | FlowControl | 119 | FlowControl |
98 | Valid Range: 0-3 (0=none, 1=Rx only, 2=Tx only, 3=Rx&Tx) | 120 | ---------- |
99 | Default: Read flow control settings from the EEPROM | 121 | Valid Range: 0-3 (0=none, 1=Rx only, 2=Tx only, 3=Rx&Tx) |
100 | This parameter controls the automatic generation(Tx) and response(Rx) to | 122 | Default Value: Reads flow control settings from the EEPROM |
101 | Ethernet PAUSE frames. | 123 | |
124 | This parameter controls the automatic generation(Tx) and response(Rx) | ||
125 | to Ethernet PAUSE frames. | ||
126 | |||
102 | 127 | ||
103 | InterruptThrottleRate | 128 | InterruptThrottleRate |
104 | Valid Range: 100-100000 (0=off, 1=dynamic) | 129 | --------------------- |
130 | (not supported on Intel 82542, 82543 or 82544-based adapters) | ||
131 | Valid Range: 100-100000 (0=off, 1=dynamic) | ||
105 | Default Value: 8000 | 132 | Default Value: 8000 |
106 | This value represents the maximum number of interrupts per second the | 133 | |
107 | controller generates. InterruptThrottleRate is another setting used in | 134 | This value represents the maximum number of interrupts per second the |
108 | interrupt moderation. Dynamic mode uses a heuristic algorithm to adjust | 135 | controller generates. InterruptThrottleRate is another setting used in |
109 | InterruptThrottleRate based on the current traffic load. | 136 | interrupt moderation. Dynamic mode uses a heuristic algorithm to adjust |
110 | Un-supported Adapters: InterruptThrottleRate is NOT supported by 82542, 82543 | 137 | InterruptThrottleRate based on the current traffic load. |
111 | or 82544-based adapters. | 138 | |
112 | 139 | NOTE: InterruptThrottleRate takes precedence over the TxAbsIntDelay and | |
113 | NOTE: InterruptThrottleRate takes precedence over the TxAbsIntDelay and | 140 | RxAbsIntDelay parameters. In other words, minimizing the receive |
114 | RxAbsIntDelay parameters. In other words, minimizing the receive | 141 | and/or transmit absolute delays does not force the controller to |
115 | and/or transmit absolute delays does not force the controller to | 142 | generate more interrupts than what the Interrupt Throttle Rate |
116 | generate more interrupts than what the Interrupt Throttle Rate | 143 | allows. |
117 | allows. | 144 | |
118 | CAUTION: If you are using the Intel PRO/1000 CT Network Connection | 145 | CAUTION: If you are using the Intel PRO/1000 CT Network Connection |
119 | (controller 82547), setting InterruptThrottleRate to a value | 146 | (controller 82547), setting InterruptThrottleRate to a value |
120 | greater than 75,000, may hang (stop transmitting) adapters under | 147 | greater than 75,000, may hang (stop transmitting) adapters |
121 | certain network conditions. If this occurs a NETDEV WATCHDOG | 148 | under certain network conditions. If this occurs a NETDEV |
122 | message is logged in the system event log. In addition, the | 149 | WATCHDOG message is logged in the system event log. In |
123 | controller is automatically reset, restoring the network | 150 | addition, the controller is automatically reset, restoring |
124 | connection. To eliminate the potential for the hang, ensure | 151 | the network connection. To eliminate the potential for the |
125 | that InterruptThrottleRate is set no greater than 75,000 and is | 152 | hang, ensure that InterruptThrottleRate is set no greater |
126 | not set to 0. | 153 | than 75,000 and is not set to 0. |
127 | NOTE: When e1000 is loaded with default settings and multiple adapters are | 154 | |
128 | in use simultaneously, the CPU utilization may increase non-linearly. | 155 | NOTE: When e1000 is loaded with default settings and multiple adapters |
129 | In order to limit the CPU utilization without impacting the overall | 156 | are in use simultaneously, the CPU utilization may increase non- |
130 | throughput, we recommend that you load the driver as follows: | 157 | linearly. In order to limit the CPU utilization without impacting |
131 | 158 | the overall throughput, we recommend that you load the driver as | |
132 | insmod e1000.o InterruptThrottleRate=3000,3000,3000 | 159 | follows: |
133 | 160 | ||
134 | This sets the InterruptThrottleRate to 3000 interrupts/sec for the | 161 | insmod e1000.o InterruptThrottleRate=3000,3000,3000 |
135 | first, second, and third instances of the driver. The range of 2000 to | 162 | |
136 | 3000 interrupts per second works on a majority of systems and is a | 163 | This sets the InterruptThrottleRate to 3000 interrupts/sec for |
137 | good starting point, but the optimal value will be platform-specific. | 164 | the first, second, and third instances of the driver. The range |
138 | If CPU utilization is not a concern, use RX_POLLING (NAPI) and default | 165 | of 2000 to 3000 interrupts per second works on a majority of |
139 | driver settings. | 166 | systems and is a good starting point, but the optimal value will |
167 | be platform-specific. If CPU utilization is not a concern, use | ||
168 | RX_POLLING (NAPI) and default driver settings. | ||
169 | |||
140 | 170 | ||
141 | RxDescriptors | 171 | RxDescriptors |
142 | Valid Range: 80-256 for 82542 and 82543-based adapters | 172 | ------------- |
143 | 80-4096 for all other supported adapters | 173 | Valid Range: 80-256 for 82542 and 82543-based adapters |
174 | 80-4096 for all other supported adapters | ||
144 | Default Value: 256 | 175 | Default Value: 256 |
145 | This value is the number of receive descriptors allocated by the driver. | ||
146 | Increasing this value allows the driver to buffer more incoming packets. | ||
147 | Each descriptor is 16 bytes. A receive buffer is allocated for each | ||
148 | descriptor and can either be 2048 or 4096 bytes long, depending on the MTU | ||
149 | 176 | ||
150 | setting. An incoming packet can span one or more receive descriptors. | 177 | This value specifies the number of receive descriptors allocated by the |
151 | The maximum MTU size is 16110. | 178 | driver. Increasing this value allows the driver to buffer more incoming |
179 | packets. Each descriptor is 16 bytes. A receive buffer is also | ||
180 | allocated for each descriptor and is 2048. | ||
152 | 181 | ||
153 | NOTE: MTU designates the frame size. It only needs to be set for Jumbo | ||
154 | Frames. | ||
155 | NOTE: Depending on the available system resources, the request for a | ||
156 | higher number of receive descriptors may be denied. In this case, | ||
157 | use a lower number. | ||
158 | 182 | ||
159 | RxIntDelay | 183 | RxIntDelay |
160 | Valid Range: 0-65535 (0=off) | 184 | ---------- |
185 | Valid Range: 0-65535 (0=off) | ||
161 | Default Value: 0 | 186 | Default Value: 0 |
162 | This value delays the generation of receive interrupts in units of 1.024 | 187 | |
163 | microseconds. Receive interrupt reduction can improve CPU efficiency if | 188 | This value delays the generation of receive interrupts in units of 1.024 |
164 | properly tuned for specific network traffic. Increasing this value adds | 189 | microseconds. Receive interrupt reduction can improve CPU efficiency if |
165 | extra latency to frame reception and can end up decreasing the throughput | 190 | properly tuned for specific network traffic. Increasing this value adds |
166 | of TCP traffic. If the system is reporting dropped receives, this value | 191 | extra latency to frame reception and can end up decreasing the throughput |
167 | may be set too high, causing the driver to run out of available receive | 192 | of TCP traffic. If the system is reporting dropped receives, this value |
168 | descriptors. | 193 | may be set too high, causing the driver to run out of available receive |
169 | 194 | descriptors. | |
170 | CAUTION: When setting RxIntDelay to a value other than 0, adapters may | 195 | |
171 | hang (stop transmitting) under certain network conditions. If | 196 | CAUTION: When setting RxIntDelay to a value other than 0, adapters may |
172 | this occurs a NETDEV WATCHDOG message is logged in the system | 197 | hang (stop transmitting) under certain network conditions. If |
173 | event log. In addition, the controller is automatically reset, | 198 | this occurs a NETDEV WATCHDOG message is logged in the system |
174 | restoring the network connection. To eliminate the potential for | 199 | event log. In addition, the controller is automatically reset, |
175 | the hang ensure that RxIntDelay is set to 0. | 200 | restoring the network connection. To eliminate the potential |
176 | 201 | for the hang ensure that RxIntDelay is set to 0. | |
177 | RxAbsIntDelay (82540, 82545 and later adapters only) | 202 | |
178 | Valid Range: 0-65535 (0=off) | 203 | |
204 | RxAbsIntDelay | ||
205 | ------------- | ||
206 | (This parameter is supported only on 82540, 82545 and later adapters.) | ||
207 | Valid Range: 0-65535 (0=off) | ||
179 | Default Value: 128 | 208 | Default Value: 128 |
180 | This value, in units of 1.024 microseconds, limits the delay in which a | 209 | |
181 | receive interrupt is generated. Useful only if RxIntDelay is non-zero, | 210 | This value, in units of 1.024 microseconds, limits the delay in which a |
182 | this value ensures that an interrupt is generated after the initial | 211 | receive interrupt is generated. Useful only if RxIntDelay is non-zero, |
183 | packet is received within the set amount of time. Proper tuning, | 212 | this value ensures that an interrupt is generated after the initial |
184 | along with RxIntDelay, may improve traffic throughput in specific network | 213 | packet is received within the set amount of time. Proper tuning, |
185 | conditions. | 214 | along with RxIntDelay, may improve traffic throughput in specific network |
186 | 215 | conditions. | |
187 | Speed (adapters using copper connections only) | 216 | |
217 | |||
218 | Speed | ||
219 | ----- | ||
220 | (This parameter is supported only on adapters with copper connections.) | ||
188 | Valid Settings: 0, 10, 100, 1000 | 221 | Valid Settings: 0, 10, 100, 1000 |
189 | Default Value: 0 (auto-negotiate at all supported speeds) | 222 | Default Value: 0 (auto-negotiate at all supported speeds) |
190 | Speed forces the line speed to the specified value in megabits per second | 223 | |
191 | (Mbps). If this parameter is not specified or is set to 0 and the link | 224 | Speed forces the line speed to the specified value in megabits per second |
192 | partner is set to auto-negotiate, the board will auto-detect the correct | 225 | (Mbps). If this parameter is not specified or is set to 0 and the link |
193 | speed. Duplex should also be set when Speed is set to either 10 or 100. | 226 | partner is set to auto-negotiate, the board will auto-detect the correct |
227 | speed. Duplex should also be set when Speed is set to either 10 or 100. | ||
228 | |||
194 | 229 | ||
195 | TxDescriptors | 230 | TxDescriptors |
196 | Valid Range: 80-256 for 82542 and 82543-based adapters | 231 | ------------- |
197 | 80-4096 for all other supported adapters | 232 | Valid Range: 80-256 for 82542 and 82543-based adapters |
233 | 80-4096 for all other supported adapters | ||
198 | Default Value: 256 | 234 | Default Value: 256 |
199 | This value is the number of transmit descriptors allocated by the driver. | ||
200 | Increasing this value allows the driver to queue more transmits. Each | ||
201 | descriptor is 16 bytes. | ||
202 | 235 | ||
203 | NOTE: Depending on the available system resources, the request for a | 236 | This value is the number of transmit descriptors allocated by the driver. |
204 | higher number of transmit descriptors may be denied. In this case, | 237 | Increasing this value allows the driver to queue more transmits. Each |
205 | use a lower number. | 238 | descriptor is 16 bytes. |
239 | |||
240 | NOTE: Depending on the available system resources, the request for a | ||
241 | higher number of transmit descriptors may be denied. In this case, | ||
242 | use a lower number. | ||
243 | |||
206 | 244 | ||
207 | TxIntDelay | 245 | TxIntDelay |
208 | Valid Range: 0-65535 (0=off) | 246 | ---------- |
247 | Valid Range: 0-65535 (0=off) | ||
209 | Default Value: 64 | 248 | Default Value: 64 |
210 | This value delays the generation of transmit interrupts in units of | 249 | |
211 | 1.024 microseconds. Transmit interrupt reduction can improve CPU | 250 | This value delays the generation of transmit interrupts in units of |
212 | efficiency if properly tuned for specific network traffic. If the | 251 | 1.024 microseconds. Transmit interrupt reduction can improve CPU |
213 | system is reporting dropped transmits, this value may be set too high | 252 | efficiency if properly tuned for specific network traffic. If the |
214 | causing the driver to run out of available transmit descriptors. | 253 | system is reporting dropped transmits, this value may be set too high |
215 | 254 | causing the driver to run out of available transmit descriptors. | |
216 | TxAbsIntDelay (82540, 82545 and later adapters only) | 255 | |
217 | Valid Range: 0-65535 (0=off) | 256 | |
257 | TxAbsIntDelay | ||
258 | ------------- | ||
259 | (This parameter is supported only on 82540, 82545 and later adapters.) | ||
260 | Valid Range: 0-65535 (0=off) | ||
218 | Default Value: 64 | 261 | Default Value: 64 |
219 | This value, in units of 1.024 microseconds, limits the delay in which a | 262 | |
220 | transmit interrupt is generated. Useful only if TxIntDelay is non-zero, | 263 | This value, in units of 1.024 microseconds, limits the delay in which a |
221 | this value ensures that an interrupt is generated after the initial | 264 | transmit interrupt is generated. Useful only if TxIntDelay is non-zero, |
222 | packet is sent on the wire within the set amount of time. Proper tuning, | 265 | this value ensures that an interrupt is generated after the initial |
223 | along with TxIntDelay, may improve traffic throughput in specific | 266 | packet is sent on the wire within the set amount of time. Proper tuning, |
224 | network conditions. | 267 | along with TxIntDelay, may improve traffic throughput in specific |
225 | 268 | network conditions. | |
226 | XsumRX (not available on the 82542-based adapter) | 269 | |
227 | Valid Range: 0-1 | 270 | XsumRX |
271 | ------ | ||
272 | (This parameter is NOT supported on the 82542-based adapter.) | ||
273 | Valid Range: 0-1 | ||
228 | Default Value: 1 | 274 | Default Value: 1 |
229 | A value of '1' indicates that the driver should enable IP checksum | 275 | |
230 | offload for received packets (both UDP and TCP) to the adapter hardware. | 276 | A value of '1' indicates that the driver should enable IP checksum |
277 | offload for received packets (both UDP and TCP) to the adapter hardware. | ||
278 | |||
231 | 279 | ||
232 | Speed and Duplex Configuration | 280 | Speed and Duplex Configuration |
233 | ============================== | 281 | ============================== |
234 | 282 | ||
235 | Three keywords are used to control the speed and duplex configuration. These | 283 | Three keywords are used to control the speed and duplex configuration. |
236 | keywords are Speed, Duplex, and AutoNeg. | 284 | These keywords are Speed, Duplex, and AutoNeg. |
237 | 285 | ||
238 | If the board uses a fiber interface, these keywords are ignored, and the | 286 | If the board uses a fiber interface, these keywords are ignored, and the |
239 | fiber interface board only links at 1000 Mbps full-duplex. | 287 | fiber interface board only links at 1000 Mbps full-duplex. |
240 | 288 | ||
241 | For copper-based boards, the keywords interact as follows: | 289 | For copper-based boards, the keywords interact as follows: |
242 | 290 | ||
243 | The default operation is auto-negotiate. The board advertises all supported | 291 | The default operation is auto-negotiate. The board advertises all |
244 | speed and duplex combinations, and it links at the highest common speed and | 292 | supported speed and duplex combinations, and it links at the highest |
245 | duplex mode IF the link partner is set to auto-negotiate. | 293 | common speed and duplex mode IF the link partner is set to auto-negotiate. |
246 | 294 | ||
247 | If Speed = 1000, limited auto-negotiation is enabled and only 1000 Mbps is | 295 | If Speed = 1000, limited auto-negotiation is enabled and only 1000 Mbps |
248 | advertised (The 1000BaseT spec requires auto-negotiation.) | 296 | is advertised (The 1000BaseT spec requires auto-negotiation.) |
249 | 297 | ||
250 | If Speed = 10 or 100, then both Speed and Duplex should be set. Auto- | 298 | If Speed = 10 or 100, then both Speed and Duplex should be set. Auto- |
251 | negotiation is disabled, and the AutoNeg parameter is ignored. Partner SHOULD | 299 | negotiation is disabled, and the AutoNeg parameter is ignored. Partner |
252 | also be forced. | 300 | SHOULD also be forced. |
301 | |||
302 | The AutoNeg parameter is used when more control is required over the | ||
303 | auto-negotiation process. It should be used when you wish to control which | ||
304 | speed and duplex combinations are advertised during the auto-negotiation | ||
305 | process. | ||
306 | |||
307 | The parameter may be specified as either a decimal or hexidecimal value as | ||
308 | determined by the bitmap below. | ||
253 | 309 | ||
254 | The AutoNeg parameter is used when more control is required over the auto- | 310 | Bit position 7 6 5 4 3 2 1 0 |
255 | negotiation process. When this parameter is used, Speed and Duplex parameters | 311 | Decimal Value 128 64 32 16 8 4 2 1 |
256 | must not be specified. The following table describes supported values for the | 312 | Hex value 80 40 20 10 8 4 2 1 |
257 | AutoNeg parameter: | 313 | Speed (Mbps) N/A N/A 1000 N/A 100 100 10 10 |
314 | Duplex Full Full Half Full Half | ||
258 | 315 | ||
259 | Speed (Mbps) 1000 100 100 10 10 | 316 | Some examples of using AutoNeg: |
260 | Duplex Full Full Half Full Half | ||
261 | Value (in base 16) 0x20 0x08 0x04 0x02 0x01 | ||
262 | 317 | ||
263 | Example: insmod e1000 AutoNeg=0x03, loads e1000 and specifies (10 full duplex, | 318 | modprobe e1000 AutoNeg=0x01 (Restricts autonegotiation to 10 Half) |
264 | 10 half duplex) for negotiation with the peer. | 319 | modprobe e1000 AutoNeg=1 (Same as above) |
320 | modprobe e1000 AutoNeg=0x02 (Restricts autonegotiation to 10 Full) | ||
321 | modprobe e1000 AutoNeg=0x03 (Restricts autonegotiation to 10 Half or 10 Full) | ||
322 | modprobe e1000 AutoNeg=0x04 (Restricts autonegotiation to 100 Half) | ||
323 | modprobe e1000 AutoNeg=0x05 (Restricts autonegotiation to 10 Half or 100 | ||
324 | Half) | ||
325 | modprobe e1000 AutoNeg=0x020 (Restricts autonegotiation to 1000 Full) | ||
326 | modprobe e1000 AutoNeg=32 (Same as above) | ||
265 | 327 | ||
266 | Note that setting AutoNeg does not guarantee that the board will link at the | 328 | Note that when this parameter is used, Speed and Duplex must not be specified. |
267 | highest specified speed or duplex mode, but the board will link at the | 329 | |
268 | highest possible speed/duplex of the link partner IF the link partner is also | 330 | If the link partner is forced to a specific speed and duplex, then this |
269 | set to auto-negotiate. If the link partner is forced speed/duplex, the | 331 | parameter should not be used. Instead, use the Speed and Duplex parameters |
270 | adapter MUST be forced to the same speed/duplex. | 332 | previously mentioned to force the adapter to the same speed and duplex. |
271 | 333 | ||
272 | 334 | ||
273 | Additional Configurations | 335 | Additional Configurations |
@@ -276,19 +338,19 @@ Additional Configurations | |||
276 | Configuring the Driver on Different Distributions | 338 | Configuring the Driver on Different Distributions |
277 | ------------------------------------------------- | 339 | ------------------------------------------------- |
278 | 340 | ||
279 | Configuring a network driver to load properly when the system is started is | 341 | Configuring a network driver to load properly when the system is started |
280 | distribution dependent. Typically, the configuration process involves adding | 342 | is distribution dependent. Typically, the configuration process involves |
281 | an alias line to /etc/modules.conf as well as editing other system startup | 343 | adding an alias line to /etc/modules.conf or /etc/modprobe.conf as well |
282 | scripts and/or configuration files. Many popular Linux distributions ship | 344 | as editing other system startup scripts and/or configuration files. Many |
283 | with tools to make these changes for you. To learn the proper way to | 345 | popular Linux distributions ship with tools to make these changes for you. |
284 | configure a network device for your system, refer to your distribution | 346 | To learn the proper way to configure a network device for your system, |
285 | documentation. If during this process you are asked for the driver or module | 347 | refer to your distribution documentation. If during this process you are |
286 | name, the name for the Linux Base Driver for the Intel PRO/1000 Family of | 348 | asked for the driver or module name, the name for the Linux Base Driver |
287 | Adapters is e1000. | 349 | for the Intel PRO/1000 Family of Adapters is e1000. |
288 | 350 | ||
289 | As an example, if you install the e1000 driver for two PRO/1000 adapters | 351 | As an example, if you install the e1000 driver for two PRO/1000 adapters |
290 | (eth0 and eth1) and set the speed and duplex to 10full and 100half, add the | 352 | (eth0 and eth1) and set the speed and duplex to 10full and 100half, add |
291 | following to modules.conf: | 353 | the following to modules.conf or or modprobe.conf: |
292 | 354 | ||
293 | alias eth0 e1000 | 355 | alias eth0 e1000 |
294 | alias eth1 e1000 | 356 | alias eth1 e1000 |
@@ -297,9 +359,9 @@ Additional Configurations | |||
297 | Viewing Link Messages | 359 | Viewing Link Messages |
298 | --------------------- | 360 | --------------------- |
299 | 361 | ||
300 | Link messages will not be displayed to the console if the distribution is | 362 | Link messages will not be displayed to the console if the distribution is |
301 | restricting system messages. In order to see network driver link messages on | 363 | restricting system messages. In order to see network driver link messages |
302 | your console, set dmesg to eight by entering the following: | 364 | on your console, set dmesg to eight by entering the following: |
303 | 365 | ||
304 | dmesg -n 8 | 366 | dmesg -n 8 |
305 | 367 | ||
@@ -308,22 +370,42 @@ Additional Configurations | |||
308 | Jumbo Frames | 370 | Jumbo Frames |
309 | ------------ | 371 | ------------ |
310 | 372 | ||
311 | The driver supports Jumbo Frames for all adapters except 82542-based | 373 | The driver supports Jumbo Frames for all adapters except 82542 and |
312 | adapters. Jumbo Frames support is enabled by changing the MTU to a value | 374 | 82573-based adapters. Jumbo Frames support is enabled by changing the |
313 | larger than the default of 1500. Use the ifconfig command to increase the | 375 | MTU to a value larger than the default of 1500. Use the ifconfig command |
314 | MTU size. For example: | 376 | to increase the MTU size. For example: |
377 | |||
378 | ifconfig eth<x> mtu 9000 up | ||
379 | |||
380 | This setting is not saved across reboots. It can be made permanent if | ||
381 | you add: | ||
382 | |||
383 | MTU=9000 | ||
315 | 384 | ||
316 | ifconfig ethx mtu 9000 up | 385 | to the file /etc/sysconfig/network-scripts/ifcfg-eth<x>. This example |
386 | applies to the Red Hat distributions; other distributions may store this | ||
387 | setting in a different location. | ||
317 | 388 | ||
318 | The maximum MTU setting for Jumbo Frames is 16110. This value coincides | 389 | Notes: |
319 | with the maximum Jumbo Frames size of 16128. | ||
320 | 390 | ||
321 | NOTE: Jumbo Frames are supported at 1000 Mbps only. Using Jumbo Frames at | 391 | - To enable Jumbo Frames, increase the MTU size on the interface beyond |
322 | 10 or 100 Mbps may result in poor performance or loss of link. | 392 | 1500. |
393 | - The maximum MTU setting for Jumbo Frames is 16110. This value coincides | ||
394 | with the maximum Jumbo Frames size of 16128. | ||
395 | - Using Jumbo Frames at 10 or 100 Mbps may result in poor performance or | ||
396 | loss of link. | ||
397 | - Some Intel gigabit adapters that support Jumbo Frames have a frame size | ||
398 | limit of 9238 bytes, with a corresponding MTU size limit of 9216 bytes. | ||
399 | The adapters with this limitation are based on the Intel 82571EB and | ||
400 | 82572EI controllers, which correspond to these product names: | ||
401 | Intel® PRO/1000 PT Dual Port Server Adapter | ||
402 | Intel® PRO/1000 PF Dual Port Server Adapter | ||
403 | Intel® PRO/1000 PT Server Adapter | ||
404 | Intel® PRO/1000 PT Desktop Adapter | ||
405 | Intel® PRO/1000 PF Server Adapter | ||
323 | 406 | ||
407 | - The Intel PRO/1000 PM Network Connection does not support jumbo frames. | ||
324 | 408 | ||
325 | NOTE: MTU designates the frame size. To enable Jumbo Frames, increase the | ||
326 | MTU size on the interface beyond 1500. | ||
327 | 409 | ||
328 | Ethtool | 410 | Ethtool |
329 | ------- | 411 | ------- |
@@ -333,32 +415,41 @@ Additional Configurations | |||
333 | version 1.6 or later is required for this functionality. | 415 | version 1.6 or later is required for this functionality. |
334 | 416 | ||
335 | The latest release of ethtool can be found from | 417 | The latest release of ethtool can be found from |
336 | http://sf.net/projects/gkernel. | 418 | http://sourceforge.net/projects/gkernel. |
337 | 419 | ||
338 | NOTE: Ethtool 1.6 only supports a limited set of ethtool options. Support | 420 | NOTE: Ethtool 1.6 only supports a limited set of ethtool options. Support |
339 | for a more complete ethtool feature set can be enabled by upgrading | 421 | for a more complete ethtool feature set can be enabled by upgrading |
340 | ethtool to ethtool-1.8.1. | 422 | ethtool to ethtool-1.8.1. |
341 | 423 | ||
342 | Enabling Wake on LAN* (WoL) | 424 | Enabling Wake on LAN* (WoL) |
343 | --------------------------- | 425 | --------------------------- |
344 | 426 | ||
345 | WoL is configured through the Ethtool* utility. Ethtool is included with | 427 | WoL is configured through the Ethtool* utility. Ethtool is included with |
346 | all versions of Red Hat after Red Hat 7.2. For other Linux distributions, | 428 | all versions of Red Hat after Red Hat 7.2. For other Linux distributions, |
347 | download and install Ethtool from the following website: | 429 | download and install Ethtool from the following website: |
348 | http://sourceforge.net/projects/gkernel. | 430 | http://sourceforge.net/projects/gkernel. |
349 | 431 | ||
350 | For instructions on enabling WoL with Ethtool, refer to the website listed | 432 | For instructions on enabling WoL with Ethtool, refer to the website listed |
351 | above. | 433 | above. |
352 | 434 | ||
353 | WoL will be enabled on the system during the next shut down or reboot. | 435 | WoL will be enabled on the system during the next shut down or reboot. |
354 | For this driver version, in order to enable WoL, the e1000 driver must be | 436 | For this driver version, in order to enable WoL, the e1000 driver must be |
355 | loaded when shutting down or rebooting the system. | 437 | loaded when shutting down or rebooting the system. |
356 | 438 | ||
357 | NAPI | 439 | NAPI |
358 | ---- | 440 | ---- |
359 | 441 | ||
360 | NAPI (Rx polling mode) is supported in the e1000 driver. NAPI is enabled | 442 | NAPI (Rx polling mode) is supported in the e1000 driver. NAPI is enabled |
361 | or disabled based on the configuration of the kernel. | 443 | or disabled based on the configuration of the kernel. To override |
444 | the default, use the following compile-time flags. | ||
445 | |||
446 | To enable NAPI, compile the driver module, passing in a configuration option: | ||
447 | |||
448 | make CFLAGS_EXTRA=-DE1000_NAPI install | ||
449 | |||
450 | To disable NAPI, compile the driver module, passing in a configuration option: | ||
451 | |||
452 | make CFLAGS_EXTRA=-DE1000_NO_NAPI install | ||
362 | 453 | ||
363 | See www.cyberus.ca/~hadi/usenix-paper.tgz for more information on NAPI. | 454 | See www.cyberus.ca/~hadi/usenix-paper.tgz for more information on NAPI. |
364 | 455 | ||
@@ -369,10 +460,85 @@ Known Issues | |||
369 | Jumbo Frames System Requirement | 460 | Jumbo Frames System Requirement |
370 | ------------------------------- | 461 | ------------------------------- |
371 | 462 | ||
372 | Memory allocation failures have been observed on Linux systems with 64 MB | 463 | Memory allocation failures have been observed on Linux systems with 64 MB |
373 | of RAM or less that are running Jumbo Frames. If you are using Jumbo Frames, | 464 | of RAM or less that are running Jumbo Frames. If you are using Jumbo |
374 | your system may require more than the advertised minimum requirement of 64 MB | 465 | Frames, your system may require more than the advertised minimum |
375 | of system memory. | 466 | requirement of 64 MB of system memory. |
467 | |||
468 | Performance Degradation with Jumbo Frames | ||
469 | ----------------------------------------- | ||
470 | |||
471 | Degradation in throughput performance may be observed in some Jumbo frames | ||
472 | environments. If this is observed, increasing the application's socket | ||
473 | buffer size and/or increasing the /proc/sys/net/ipv4/tcp_*mem entry values | ||
474 | may help. See the specific application manual and | ||
475 | /usr/src/linux*/Documentation/ | ||
476 | networking/ip-sysctl.txt for more details. | ||
477 | |||
478 | Jumbo frames on Foundry BigIron 8000 switch | ||
479 | ------------------------------------------- | ||
480 | There is a known issue using Jumbo frames when connected to a Foundry | ||
481 | BigIron 8000 switch. This is a 3rd party limitation. If you experience | ||
482 | loss of packets, lower the MTU size. | ||
483 | |||
484 | Multiple Interfaces on Same Ethernet Broadcast Network | ||
485 | ------------------------------------------------------ | ||
486 | |||
487 | Due to the default ARP behavior on Linux, it is not possible to have | ||
488 | one system on two IP networks in the same Ethernet broadcast domain | ||
489 | (non-partitioned switch) behave as expected. All Ethernet interfaces | ||
490 | will respond to IP traffic for any IP address assigned to the system. | ||
491 | This results in unbalanced receive traffic. | ||
492 | |||
493 | If you have multiple interfaces in a server, either turn on ARP | ||
494 | filtering by entering: | ||
495 | |||
496 | echo 1 > /proc/sys/net/ipv4/conf/all/arp_filter | ||
497 | (this only works if your kernel's version is higher than 2.4.5), | ||
498 | |||
499 | NOTE: This setting is not saved across reboots. The configuration | ||
500 | change can be made permanent by adding the line: | ||
501 | net.ipv4.conf.all.arp_filter = 1 | ||
502 | to the file /etc/sysctl.conf | ||
503 | |||
504 | or, | ||
505 | |||
506 | install the interfaces in separate broadcast domains (either in | ||
507 | different switches or in a switch partitioned to VLANs). | ||
508 | |||
509 | 82541/82547 can't link or are slow to link with some link partners | ||
510 | ----------------------------------------------------------------- | ||
511 | |||
512 | There is a known compatibility issue with 82541/82547 and some | ||
513 | low-end switches where the link will not be established, or will | ||
514 | be slow to establish. In particular, these switches are known to | ||
515 | be incompatible with 82541/82547: | ||
516 | |||
517 | Planex FXG-08TE | ||
518 | I-O Data ETG-SH8 | ||
519 | |||
520 | To workaround this issue, the driver can be compiled with an override | ||
521 | of the PHY's master/slave setting. Forcing master or forcing slave | ||
522 | mode will improve time-to-link. | ||
523 | |||
524 | # make EXTRA_CFLAGS=-DE1000_MASTER_SLAVE=<n> | ||
525 | |||
526 | Where <n> is: | ||
527 | |||
528 | 0 = Hardware default | ||
529 | 1 = Master mode | ||
530 | 2 = Slave mode | ||
531 | 3 = Auto master/slave | ||
532 | |||
533 | Disable rx flow control with ethtool | ||
534 | ------------------------------------ | ||
535 | |||
536 | In order to disable receive flow control using ethtool, you must turn | ||
537 | off auto-negotiation on the same command line. | ||
538 | |||
539 | For example: | ||
540 | |||
541 | ethtool -A eth? autoneg off rx off | ||
376 | 542 | ||
377 | 543 | ||
378 | Support | 544 | Support |
@@ -382,20 +548,24 @@ For general information, go to the Intel support website at: | |||
382 | 548 | ||
383 | http://support.intel.com | 549 | http://support.intel.com |
384 | 550 | ||
551 | or the Intel Wired Networking project hosted by Sourceforge at: | ||
552 | |||
553 | http://sourceforge.net/projects/e1000 | ||
554 | |||
385 | If an issue is identified with the released source code on the supported | 555 | If an issue is identified with the released source code on the supported |
386 | kernel with a supported adapter, email the specific information related to | 556 | kernel with a supported adapter, email the specific information related |
387 | the issue to linux.nics@intel.com. | 557 | to the issue to e1000-devel@lists.sourceforge.net |
388 | 558 | ||
389 | 559 | ||
390 | License | 560 | License |
391 | ======= | 561 | ======= |
392 | 562 | ||
393 | This software program is released under the terms of a license agreement | 563 | This software program is released under the terms of a license agreement |
394 | between you ('Licensee') and Intel. Do not use or load this software or any | 564 | between you ('Licensee') and Intel. Do not use or load this software or any |
395 | associated materials (collectively, the 'Software') until you have carefully | 565 | associated materials (collectively, the 'Software') until you have carefully |
396 | read the full terms and conditions of the LICENSE located in this software | 566 | read the full terms and conditions of the file COPYING located in this software |
397 | package. By loading or using the Software, you agree to the terms of this | 567 | package. By loading or using the Software, you agree to the terms of this |
398 | Agreement. If you do not agree with the terms of this Agreement, do not | 568 | Agreement. If you do not agree with the terms of this Agreement, do not |
399 | install or use the Software. | 569 | install or use the Software. |
400 | 570 | ||
401 | * Other names and brands may be claimed as the property of others. | 571 | * Other names and brands may be claimed as the property of others. |
diff --git a/MAINTAINERS b/MAINTAINERS index b0dc75a5e74e..dd1351dc32b8 100644 --- a/MAINTAINERS +++ b/MAINTAINERS | |||
@@ -1349,10 +1349,10 @@ S: Maintained | |||
1349 | INTEL PRO/100 ETHERNET SUPPORT | 1349 | INTEL PRO/100 ETHERNET SUPPORT |
1350 | P: John Ronciak | 1350 | P: John Ronciak |
1351 | M: john.ronciak@intel.com | 1351 | M: john.ronciak@intel.com |
1352 | P: Ganesh Venkatesan | ||
1353 | M: ganesh.venkatesan@intel.com | ||
1354 | P: Jesse Brandeburg | 1352 | P: Jesse Brandeburg |
1355 | M: jesse.brandeburg@intel.com | 1353 | M: jesse.brandeburg@intel.com |
1354 | P: Jeff Kirsher | ||
1355 | M: jeffrey.t.kirsher@intel.com | ||
1356 | W: http://sourceforge.net/projects/e1000/ | 1356 | W: http://sourceforge.net/projects/e1000/ |
1357 | S: Supported | 1357 | S: Supported |
1358 | 1358 | ||
@@ -1361,18 +1361,22 @@ P: Jeb Cramer | |||
1361 | M: cramerj@intel.com | 1361 | M: cramerj@intel.com |
1362 | P: John Ronciak | 1362 | P: John Ronciak |
1363 | M: john.ronciak@intel.com | 1363 | M: john.ronciak@intel.com |
1364 | P: Ganesh Venkatesan | 1364 | P: Jesse Brandeburg |
1365 | M: ganesh.venkatesan@intel.com | 1365 | M: jesse.brandeburg@intel.com |
1366 | P: Jeff Kirsher | ||
1367 | M: jeffrey.t.kirsher@intel.com | ||
1366 | W: http://sourceforge.net/projects/e1000/ | 1368 | W: http://sourceforge.net/projects/e1000/ |
1367 | S: Supported | 1369 | S: Supported |
1368 | 1370 | ||
1369 | INTEL PRO/10GbE SUPPORT | 1371 | INTEL PRO/10GbE SUPPORT |
1372 | P: Jeff Kirsher | ||
1373 | M: jeffrey.t.kirsher@intel.com | ||
1370 | P: Ayyappan Veeraiyan | 1374 | P: Ayyappan Veeraiyan |
1371 | M: ayyappan.veeraiyan@intel.com | 1375 | M: ayyappan.veeraiyan@intel.com |
1372 | P: Ganesh Venkatesan | ||
1373 | M: ganesh.venkatesan@intel.com | ||
1374 | P: John Ronciak | 1376 | P: John Ronciak |
1375 | M: john.ronciak@intel.com | 1377 | M: john.ronciak@intel.com |
1378 | P: Jesse Brandeburg | ||
1379 | M: jesse.brandeburg@intel.com | ||
1376 | W: http://sourceforge.net/projects/e1000/ | 1380 | W: http://sourceforge.net/projects/e1000/ |
1377 | S: Supported | 1381 | S: Supported |
1378 | 1382 | ||
diff --git a/arch/alpha/mm/init.c b/arch/alpha/mm/init.c index 486d7945583d..544ac5dc09eb 100644 --- a/arch/alpha/mm/init.c +++ b/arch/alpha/mm/init.c | |||
@@ -357,7 +357,7 @@ free_reserved_mem(void *start, void *end) | |||
357 | void *__start = start; | 357 | void *__start = start; |
358 | for (; __start < end; __start += PAGE_SIZE) { | 358 | for (; __start < end; __start += PAGE_SIZE) { |
359 | ClearPageReserved(virt_to_page(__start)); | 359 | ClearPageReserved(virt_to_page(__start)); |
360 | set_page_count(virt_to_page(__start), 1); | 360 | init_page_count(virt_to_page(__start)); |
361 | free_page((long)__start); | 361 | free_page((long)__start); |
362 | totalram_pages++; | 362 | totalram_pages++; |
363 | } | 363 | } |
diff --git a/arch/arm/mm/consistent.c b/arch/arm/mm/consistent.c index c2ee18d2075e..8a1bfcd50087 100644 --- a/arch/arm/mm/consistent.c +++ b/arch/arm/mm/consistent.c | |||
@@ -223,6 +223,8 @@ __dma_alloc(struct device *dev, size_t size, dma_addr_t *handle, gfp_t gfp, | |||
223 | pte = consistent_pte[idx] + off; | 223 | pte = consistent_pte[idx] + off; |
224 | c->vm_pages = page; | 224 | c->vm_pages = page; |
225 | 225 | ||
226 | split_page(page, order); | ||
227 | |||
226 | /* | 228 | /* |
227 | * Set the "dma handle" | 229 | * Set the "dma handle" |
228 | */ | 230 | */ |
@@ -231,7 +233,6 @@ __dma_alloc(struct device *dev, size_t size, dma_addr_t *handle, gfp_t gfp, | |||
231 | do { | 233 | do { |
232 | BUG_ON(!pte_none(*pte)); | 234 | BUG_ON(!pte_none(*pte)); |
233 | 235 | ||
234 | set_page_count(page, 1); | ||
235 | /* | 236 | /* |
236 | * x86 does not mark the pages reserved... | 237 | * x86 does not mark the pages reserved... |
237 | */ | 238 | */ |
@@ -250,7 +251,6 @@ __dma_alloc(struct device *dev, size_t size, dma_addr_t *handle, gfp_t gfp, | |||
250 | * Free the otherwise unused pages. | 251 | * Free the otherwise unused pages. |
251 | */ | 252 | */ |
252 | while (page < end) { | 253 | while (page < end) { |
253 | set_page_count(page, 1); | ||
254 | __free_page(page); | 254 | __free_page(page); |
255 | page++; | 255 | page++; |
256 | } | 256 | } |
diff --git a/arch/arm/mm/init.c b/arch/arm/mm/init.c index 8b276ee38acf..b0321e943b76 100644 --- a/arch/arm/mm/init.c +++ b/arch/arm/mm/init.c | |||
@@ -531,7 +531,7 @@ static inline void free_area(unsigned long addr, unsigned long end, char *s) | |||
531 | for (; addr < end; addr += PAGE_SIZE) { | 531 | for (; addr < end; addr += PAGE_SIZE) { |
532 | struct page *page = virt_to_page(addr); | 532 | struct page *page = virt_to_page(addr); |
533 | ClearPageReserved(page); | 533 | ClearPageReserved(page); |
534 | set_page_count(page, 1); | 534 | init_page_count(page); |
535 | free_page(addr); | 535 | free_page(addr); |
536 | totalram_pages++; | 536 | totalram_pages++; |
537 | } | 537 | } |
diff --git a/arch/arm26/mm/init.c b/arch/arm26/mm/init.c index 1f09a9d0fb83..e3ecaa453747 100644 --- a/arch/arm26/mm/init.c +++ b/arch/arm26/mm/init.c | |||
@@ -324,7 +324,7 @@ static inline void free_area(unsigned long addr, unsigned long end, char *s) | |||
324 | for (; addr < end; addr += PAGE_SIZE) { | 324 | for (; addr < end; addr += PAGE_SIZE) { |
325 | struct page *page = virt_to_page(addr); | 325 | struct page *page = virt_to_page(addr); |
326 | ClearPageReserved(page); | 326 | ClearPageReserved(page); |
327 | set_page_count(page, 1); | 327 | init_page_count(page); |
328 | free_page(addr); | 328 | free_page(addr); |
329 | totalram_pages++; | 329 | totalram_pages++; |
330 | } | 330 | } |
diff --git a/arch/cris/mm/init.c b/arch/cris/mm/init.c index 31a0018b525a..b7842ff213a6 100644 --- a/arch/cris/mm/init.c +++ b/arch/cris/mm/init.c | |||
@@ -216,7 +216,7 @@ free_initmem(void) | |||
216 | addr = (unsigned long)(&__init_begin); | 216 | addr = (unsigned long)(&__init_begin); |
217 | for (; addr < (unsigned long)(&__init_end); addr += PAGE_SIZE) { | 217 | for (; addr < (unsigned long)(&__init_end); addr += PAGE_SIZE) { |
218 | ClearPageReserved(virt_to_page(addr)); | 218 | ClearPageReserved(virt_to_page(addr)); |
219 | set_page_count(virt_to_page(addr), 1); | 219 | init_page_count(virt_to_page(addr)); |
220 | free_page(addr); | 220 | free_page(addr); |
221 | totalram_pages++; | 221 | totalram_pages++; |
222 | } | 222 | } |
diff --git a/arch/frv/kernel/frv_ksyms.c b/arch/frv/kernel/frv_ksyms.c index 0f1c6cbc4f50..aa6b7d0a2109 100644 --- a/arch/frv/kernel/frv_ksyms.c +++ b/arch/frv/kernel/frv_ksyms.c | |||
@@ -27,6 +27,7 @@ EXPORT_SYMBOL(__ioremap); | |||
27 | EXPORT_SYMBOL(iounmap); | 27 | EXPORT_SYMBOL(iounmap); |
28 | 28 | ||
29 | EXPORT_SYMBOL(strnlen); | 29 | EXPORT_SYMBOL(strnlen); |
30 | EXPORT_SYMBOL(strpbrk); | ||
30 | EXPORT_SYMBOL(strrchr); | 31 | EXPORT_SYMBOL(strrchr); |
31 | EXPORT_SYMBOL(strstr); | 32 | EXPORT_SYMBOL(strstr); |
32 | EXPORT_SYMBOL(strchr); | 33 | EXPORT_SYMBOL(strchr); |
diff --git a/arch/frv/mm/dma-alloc.c b/arch/frv/mm/dma-alloc.c index 342823aad758..636b2f8b5d98 100644 --- a/arch/frv/mm/dma-alloc.c +++ b/arch/frv/mm/dma-alloc.c | |||
@@ -115,9 +115,7 @@ void *consistent_alloc(gfp_t gfp, size_t size, dma_addr_t *dma_handle) | |||
115 | */ | 115 | */ |
116 | if (order > 0) { | 116 | if (order > 0) { |
117 | struct page *rpage = virt_to_page(page); | 117 | struct page *rpage = virt_to_page(page); |
118 | 118 | split_page(rpage, order); | |
119 | for (i = 1; i < (1 << order); i++) | ||
120 | set_page_count(rpage + i, 1); | ||
121 | } | 119 | } |
122 | 120 | ||
123 | err = 0; | 121 | err = 0; |
diff --git a/arch/frv/mm/init.c b/arch/frv/mm/init.c index 765088ea8a50..8899aa1a4f06 100644 --- a/arch/frv/mm/init.c +++ b/arch/frv/mm/init.c | |||
@@ -169,7 +169,7 @@ void __init mem_init(void) | |||
169 | struct page *page = &mem_map[pfn]; | 169 | struct page *page = &mem_map[pfn]; |
170 | 170 | ||
171 | ClearPageReserved(page); | 171 | ClearPageReserved(page); |
172 | set_page_count(page, 1); | 172 | init_page_count(page); |
173 | __free_page(page); | 173 | __free_page(page); |
174 | totalram_pages++; | 174 | totalram_pages++; |
175 | } | 175 | } |
@@ -210,7 +210,7 @@ void __init free_initmem(void) | |||
210 | /* next to check that the page we free is not a partial page */ | 210 | /* next to check that the page we free is not a partial page */ |
211 | for (addr = start; addr < end; addr += PAGE_SIZE) { | 211 | for (addr = start; addr < end; addr += PAGE_SIZE) { |
212 | ClearPageReserved(virt_to_page(addr)); | 212 | ClearPageReserved(virt_to_page(addr)); |
213 | set_page_count(virt_to_page(addr), 1); | 213 | init_page_count(virt_to_page(addr)); |
214 | free_page(addr); | 214 | free_page(addr); |
215 | totalram_pages++; | 215 | totalram_pages++; |
216 | } | 216 | } |
@@ -230,7 +230,7 @@ void __init free_initrd_mem(unsigned long start, unsigned long end) | |||
230 | int pages = 0; | 230 | int pages = 0; |
231 | for (; start < end; start += PAGE_SIZE) { | 231 | for (; start < end; start += PAGE_SIZE) { |
232 | ClearPageReserved(virt_to_page(start)); | 232 | ClearPageReserved(virt_to_page(start)); |
233 | set_page_count(virt_to_page(start), 1); | 233 | init_page_count(virt_to_page(start)); |
234 | free_page(start); | 234 | free_page(start); |
235 | totalram_pages++; | 235 | totalram_pages++; |
236 | pages++; | 236 | pages++; |
diff --git a/arch/h8300/kernel/h8300_ksyms.c b/arch/h8300/kernel/h8300_ksyms.c index 5cc76efaf7aa..69d6ad32d56c 100644 --- a/arch/h8300/kernel/h8300_ksyms.c +++ b/arch/h8300/kernel/h8300_ksyms.c | |||
@@ -25,6 +25,7 @@ extern char h8300_debug_device[]; | |||
25 | /* platform dependent support */ | 25 | /* platform dependent support */ |
26 | 26 | ||
27 | EXPORT_SYMBOL(strnlen); | 27 | EXPORT_SYMBOL(strnlen); |
28 | EXPORT_SYMBOL(strpbrk); | ||
28 | EXPORT_SYMBOL(strrchr); | 29 | EXPORT_SYMBOL(strrchr); |
29 | EXPORT_SYMBOL(strstr); | 30 | EXPORT_SYMBOL(strstr); |
30 | EXPORT_SYMBOL(strchr); | 31 | EXPORT_SYMBOL(strchr); |
diff --git a/arch/h8300/mm/init.c b/arch/h8300/mm/init.c index 1e0929ddc8c4..09efc4b1f038 100644 --- a/arch/h8300/mm/init.c +++ b/arch/h8300/mm/init.c | |||
@@ -196,7 +196,7 @@ void free_initrd_mem(unsigned long start, unsigned long end) | |||
196 | int pages = 0; | 196 | int pages = 0; |
197 | for (; start < end; start += PAGE_SIZE) { | 197 | for (; start < end; start += PAGE_SIZE) { |
198 | ClearPageReserved(virt_to_page(start)); | 198 | ClearPageReserved(virt_to_page(start)); |
199 | set_page_count(virt_to_page(start), 1); | 199 | init_page_count(virt_to_page(start)); |
200 | free_page(start); | 200 | free_page(start); |
201 | totalram_pages++; | 201 | totalram_pages++; |
202 | pages++; | 202 | pages++; |
@@ -219,7 +219,7 @@ free_initmem() | |||
219 | /* next to check that the page we free is not a partial page */ | 219 | /* next to check that the page we free is not a partial page */ |
220 | for (; addr + PAGE_SIZE < (unsigned long)(&__init_end); addr +=PAGE_SIZE) { | 220 | for (; addr + PAGE_SIZE < (unsigned long)(&__init_end); addr +=PAGE_SIZE) { |
221 | ClearPageReserved(virt_to_page(addr)); | 221 | ClearPageReserved(virt_to_page(addr)); |
222 | set_page_count(virt_to_page(addr), 1); | 222 | init_page_count(virt_to_page(addr)); |
223 | free_page(addr); | 223 | free_page(addr); |
224 | totalram_pages++; | 224 | totalram_pages++; |
225 | } | 225 | } |
diff --git a/arch/i386/kernel/efi.c b/arch/i386/kernel/efi.c index c9cad7ba0d2d..aeabb4196861 100644 --- a/arch/i386/kernel/efi.c +++ b/arch/i386/kernel/efi.c | |||
@@ -115,7 +115,7 @@ static void efi_call_phys_epilog(void) | |||
115 | unsigned long cr4; | 115 | unsigned long cr4; |
116 | struct Xgt_desc_struct *cpu_gdt_descr = &per_cpu(cpu_gdt_descr, 0); | 116 | struct Xgt_desc_struct *cpu_gdt_descr = &per_cpu(cpu_gdt_descr, 0); |
117 | 117 | ||
118 | cpu_gdt_descr->address = __va(cpu_gdt_descr->address); | 118 | cpu_gdt_descr->address = (unsigned long)__va(cpu_gdt_descr->address); |
119 | load_gdt(cpu_gdt_descr); | 119 | load_gdt(cpu_gdt_descr); |
120 | 120 | ||
121 | cr4 = read_cr4(); | 121 | cr4 = read_cr4(); |
diff --git a/arch/i386/kernel/smp.c b/arch/i386/kernel/smp.c index 218d725a5a1e..d134e9643a58 100644 --- a/arch/i386/kernel/smp.c +++ b/arch/i386/kernel/smp.c | |||
@@ -504,27 +504,23 @@ void unlock_ipi_call_lock(void) | |||
504 | spin_unlock_irq(&call_lock); | 504 | spin_unlock_irq(&call_lock); |
505 | } | 505 | } |
506 | 506 | ||
507 | static struct call_data_struct * call_data; | 507 | static struct call_data_struct *call_data; |
508 | 508 | ||
509 | /* | 509 | /** |
510 | * this function sends a 'generic call function' IPI to all other CPUs | 510 | * smp_call_function(): Run a function on all other CPUs. |
511 | * in the system. | 511 | * @func: The function to run. This must be fast and non-blocking. |
512 | */ | 512 | * @info: An arbitrary pointer to pass to the function. |
513 | 513 | * @nonatomic: currently unused. | |
514 | int smp_call_function (void (*func) (void *info), void *info, int nonatomic, | 514 | * @wait: If true, wait (atomically) until function has completed on other CPUs. |
515 | int wait) | 515 | * |
516 | /* | 516 | * Returns 0 on success, else a negative status code. Does not return until |
517 | * [SUMMARY] Run a function on all other CPUs. | ||
518 | * <func> The function to run. This must be fast and non-blocking. | ||
519 | * <info> An arbitrary pointer to pass to the function. | ||
520 | * <nonatomic> currently unused. | ||
521 | * <wait> If true, wait (atomically) until function has completed on other CPUs. | ||
522 | * [RETURNS] 0 on success, else a negative status code. Does not return until | ||
523 | * remote CPUs are nearly ready to execute <<func>> or are or have executed. | 517 | * remote CPUs are nearly ready to execute <<func>> or are or have executed. |
524 | * | 518 | * |
525 | * You must not call this function with disabled interrupts or from a | 519 | * You must not call this function with disabled interrupts or from a |
526 | * hardware interrupt handler or from a bottom half handler. | 520 | * hardware interrupt handler or from a bottom half handler. |
527 | */ | 521 | */ |
522 | int smp_call_function (void (*func) (void *info), void *info, int nonatomic, | ||
523 | int wait) | ||
528 | { | 524 | { |
529 | struct call_data_struct data; | 525 | struct call_data_struct data; |
530 | int cpus; | 526 | int cpus; |
diff --git a/arch/i386/kernel/sys_i386.c b/arch/i386/kernel/sys_i386.c index a4a61976ecb9..8fdb1fb17a5f 100644 --- a/arch/i386/kernel/sys_i386.c +++ b/arch/i386/kernel/sys_i386.c | |||
@@ -40,14 +40,13 @@ asmlinkage int sys_pipe(unsigned long __user * fildes) | |||
40 | return error; | 40 | return error; |
41 | } | 41 | } |
42 | 42 | ||
43 | /* common code for old and new mmaps */ | 43 | asmlinkage long sys_mmap2(unsigned long addr, unsigned long len, |
44 | static inline long do_mmap2( | 44 | unsigned long prot, unsigned long flags, |
45 | unsigned long addr, unsigned long len, | 45 | unsigned long fd, unsigned long pgoff) |
46 | unsigned long prot, unsigned long flags, | ||
47 | unsigned long fd, unsigned long pgoff) | ||
48 | { | 46 | { |
49 | int error = -EBADF; | 47 | int error = -EBADF; |
50 | struct file * file = NULL; | 48 | struct file *file = NULL; |
49 | struct mm_struct *mm = current->mm; | ||
51 | 50 | ||
52 | flags &= ~(MAP_EXECUTABLE | MAP_DENYWRITE); | 51 | flags &= ~(MAP_EXECUTABLE | MAP_DENYWRITE); |
53 | if (!(flags & MAP_ANONYMOUS)) { | 52 | if (!(flags & MAP_ANONYMOUS)) { |
@@ -56,9 +55,9 @@ static inline long do_mmap2( | |||
56 | goto out; | 55 | goto out; |
57 | } | 56 | } |
58 | 57 | ||
59 | down_write(¤t->mm->mmap_sem); | 58 | down_write(&mm->mmap_sem); |
60 | error = do_mmap_pgoff(file, addr, len, prot, flags, pgoff); | 59 | error = do_mmap_pgoff(file, addr, len, prot, flags, pgoff); |
61 | up_write(¤t->mm->mmap_sem); | 60 | up_write(&mm->mmap_sem); |
62 | 61 | ||
63 | if (file) | 62 | if (file) |
64 | fput(file); | 63 | fput(file); |
@@ -66,13 +65,6 @@ out: | |||
66 | return error; | 65 | return error; |
67 | } | 66 | } |
68 | 67 | ||
69 | asmlinkage long sys_mmap2(unsigned long addr, unsigned long len, | ||
70 | unsigned long prot, unsigned long flags, | ||
71 | unsigned long fd, unsigned long pgoff) | ||
72 | { | ||
73 | return do_mmap2(addr, len, prot, flags, fd, pgoff); | ||
74 | } | ||
75 | |||
76 | /* | 68 | /* |
77 | * Perform the select(nd, in, out, ex, tv) and mmap() system | 69 | * Perform the select(nd, in, out, ex, tv) and mmap() system |
78 | * calls. Linux/i386 didn't use to be able to handle more than | 70 | * calls. Linux/i386 didn't use to be able to handle more than |
@@ -101,7 +93,8 @@ asmlinkage int old_mmap(struct mmap_arg_struct __user *arg) | |||
101 | if (a.offset & ~PAGE_MASK) | 93 | if (a.offset & ~PAGE_MASK) |
102 | goto out; | 94 | goto out; |
103 | 95 | ||
104 | err = do_mmap2(a.addr, a.len, a.prot, a.flags, a.fd, a.offset >> PAGE_SHIFT); | 96 | err = sys_mmap2(a.addr, a.len, a.prot, a.flags, |
97 | a.fd, a.offset >> PAGE_SHIFT); | ||
105 | out: | 98 | out: |
106 | return err; | 99 | return err; |
107 | } | 100 | } |
diff --git a/arch/i386/kernel/timers/timer_hpet.c b/arch/i386/kernel/timers/timer_hpet.c index be242723c339..17a6fe7166e7 100644 --- a/arch/i386/kernel/timers/timer_hpet.c +++ b/arch/i386/kernel/timers/timer_hpet.c | |||
@@ -46,7 +46,7 @@ static seqlock_t monotonic_lock = SEQLOCK_UNLOCKED; | |||
46 | * | 46 | * |
47 | * -johnstul@us.ibm.com "math is hard, lets go shopping!" | 47 | * -johnstul@us.ibm.com "math is hard, lets go shopping!" |
48 | */ | 48 | */ |
49 | static unsigned long cyc2ns_scale; | 49 | static unsigned long cyc2ns_scale __read_mostly; |
50 | #define CYC2NS_SCALE_FACTOR 10 /* 2^10, carefully chosen */ | 50 | #define CYC2NS_SCALE_FACTOR 10 /* 2^10, carefully chosen */ |
51 | 51 | ||
52 | static inline void set_cyc2ns_scale(unsigned long cpu_khz) | 52 | static inline void set_cyc2ns_scale(unsigned long cpu_khz) |
diff --git a/arch/i386/kernel/timers/timer_tsc.c b/arch/i386/kernel/timers/timer_tsc.c index a7f5a2aceba2..5e41ee29c8cf 100644 --- a/arch/i386/kernel/timers/timer_tsc.c +++ b/arch/i386/kernel/timers/timer_tsc.c | |||
@@ -74,7 +74,7 @@ late_initcall(start_lost_tick_compensation); | |||
74 | * | 74 | * |
75 | * -johnstul@us.ibm.com "math is hard, lets go shopping!" | 75 | * -johnstul@us.ibm.com "math is hard, lets go shopping!" |
76 | */ | 76 | */ |
77 | static unsigned long cyc2ns_scale; | 77 | static unsigned long cyc2ns_scale __read_mostly; |
78 | #define CYC2NS_SCALE_FACTOR 10 /* 2^10, carefully chosen */ | 78 | #define CYC2NS_SCALE_FACTOR 10 /* 2^10, carefully chosen */ |
79 | 79 | ||
80 | static inline void set_cyc2ns_scale(unsigned long cpu_khz) | 80 | static inline void set_cyc2ns_scale(unsigned long cpu_khz) |
diff --git a/arch/i386/mm/hugetlbpage.c b/arch/i386/mm/hugetlbpage.c index d524127c9afc..a7d891585411 100644 --- a/arch/i386/mm/hugetlbpage.c +++ b/arch/i386/mm/hugetlbpage.c | |||
@@ -48,18 +48,6 @@ pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr) | |||
48 | return (pte_t *) pmd; | 48 | return (pte_t *) pmd; |
49 | } | 49 | } |
50 | 50 | ||
51 | /* | ||
52 | * This function checks for proper alignment of input addr and len parameters. | ||
53 | */ | ||
54 | int is_aligned_hugepage_range(unsigned long addr, unsigned long len) | ||
55 | { | ||
56 | if (len & ~HPAGE_MASK) | ||
57 | return -EINVAL; | ||
58 | if (addr & ~HPAGE_MASK) | ||
59 | return -EINVAL; | ||
60 | return 0; | ||
61 | } | ||
62 | |||
63 | #if 0 /* This is just for testing */ | 51 | #if 0 /* This is just for testing */ |
64 | struct page * | 52 | struct page * |
65 | follow_huge_addr(struct mm_struct *mm, unsigned long address, int write) | 53 | follow_huge_addr(struct mm_struct *mm, unsigned long address, int write) |
diff --git a/arch/i386/mm/init.c b/arch/i386/mm/init.c index 2700f01994ba..7ba55a6e2dbc 100644 --- a/arch/i386/mm/init.c +++ b/arch/i386/mm/init.c | |||
@@ -270,7 +270,7 @@ static void __init permanent_kmaps_init(pgd_t *pgd_base) | |||
270 | 270 | ||
271 | static void __meminit free_new_highpage(struct page *page) | 271 | static void __meminit free_new_highpage(struct page *page) |
272 | { | 272 | { |
273 | set_page_count(page, 1); | 273 | init_page_count(page); |
274 | __free_page(page); | 274 | __free_page(page); |
275 | totalhigh_pages++; | 275 | totalhigh_pages++; |
276 | } | 276 | } |
@@ -727,7 +727,7 @@ void free_initmem(void) | |||
727 | addr = (unsigned long)(&__init_begin); | 727 | addr = (unsigned long)(&__init_begin); |
728 | for (; addr < (unsigned long)(&__init_end); addr += PAGE_SIZE) { | 728 | for (; addr < (unsigned long)(&__init_end); addr += PAGE_SIZE) { |
729 | ClearPageReserved(virt_to_page(addr)); | 729 | ClearPageReserved(virt_to_page(addr)); |
730 | set_page_count(virt_to_page(addr), 1); | 730 | init_page_count(virt_to_page(addr)); |
731 | memset((void *)addr, 0xcc, PAGE_SIZE); | 731 | memset((void *)addr, 0xcc, PAGE_SIZE); |
732 | free_page(addr); | 732 | free_page(addr); |
733 | totalram_pages++; | 733 | totalram_pages++; |
@@ -766,7 +766,7 @@ void free_initrd_mem(unsigned long start, unsigned long end) | |||
766 | printk (KERN_INFO "Freeing initrd memory: %ldk freed\n", (end - start) >> 10); | 766 | printk (KERN_INFO "Freeing initrd memory: %ldk freed\n", (end - start) >> 10); |
767 | for (; start < end; start += PAGE_SIZE) { | 767 | for (; start < end; start += PAGE_SIZE) { |
768 | ClearPageReserved(virt_to_page(start)); | 768 | ClearPageReserved(virt_to_page(start)); |
769 | set_page_count(virt_to_page(start), 1); | 769 | init_page_count(virt_to_page(start)); |
770 | free_page(start); | 770 | free_page(start); |
771 | totalram_pages++; | 771 | totalram_pages++; |
772 | } | 772 | } |
diff --git a/arch/i386/mm/pageattr.c b/arch/i386/mm/pageattr.c index d0cadb33b54c..92c3d9f0e731 100644 --- a/arch/i386/mm/pageattr.c +++ b/arch/i386/mm/pageattr.c | |||
@@ -51,6 +51,13 @@ static struct page *split_large_page(unsigned long address, pgprot_t prot, | |||
51 | if (!base) | 51 | if (!base) |
52 | return NULL; | 52 | return NULL; |
53 | 53 | ||
54 | /* | ||
55 | * page_private is used to track the number of entries in | ||
56 | * the page table page that have non standard attributes. | ||
57 | */ | ||
58 | SetPagePrivate(base); | ||
59 | page_private(base) = 0; | ||
60 | |||
54 | address = __pa(address); | 61 | address = __pa(address); |
55 | addr = address & LARGE_PAGE_MASK; | 62 | addr = address & LARGE_PAGE_MASK; |
56 | pbase = (pte_t *)page_address(base); | 63 | pbase = (pte_t *)page_address(base); |
@@ -143,11 +150,12 @@ __change_page_attr(struct page *page, pgprot_t prot) | |||
143 | return -ENOMEM; | 150 | return -ENOMEM; |
144 | set_pmd_pte(kpte,address,mk_pte(split, ref_prot)); | 151 | set_pmd_pte(kpte,address,mk_pte(split, ref_prot)); |
145 | kpte_page = split; | 152 | kpte_page = split; |
146 | } | 153 | } |
147 | get_page(kpte_page); | 154 | page_private(kpte_page)++; |
148 | } else if ((pte_val(*kpte) & _PAGE_PSE) == 0) { | 155 | } else if ((pte_val(*kpte) & _PAGE_PSE) == 0) { |
149 | set_pte_atomic(kpte, mk_pte(page, PAGE_KERNEL)); | 156 | set_pte_atomic(kpte, mk_pte(page, PAGE_KERNEL)); |
150 | __put_page(kpte_page); | 157 | BUG_ON(page_private(kpte_page) == 0); |
158 | page_private(kpte_page)--; | ||
151 | } else | 159 | } else |
152 | BUG(); | 160 | BUG(); |
153 | 161 | ||
@@ -157,10 +165,8 @@ __change_page_attr(struct page *page, pgprot_t prot) | |||
157 | * replace it with a largepage. | 165 | * replace it with a largepage. |
158 | */ | 166 | */ |
159 | if (!PageReserved(kpte_page)) { | 167 | if (!PageReserved(kpte_page)) { |
160 | /* memleak and potential failed 2M page regeneration */ | 168 | if (cpu_has_pse && (page_private(kpte_page) == 0)) { |
161 | BUG_ON(!page_count(kpte_page)); | 169 | ClearPagePrivate(kpte_page); |
162 | |||
163 | if (cpu_has_pse && (page_count(kpte_page) == 1)) { | ||
164 | list_add(&kpte_page->lru, &df_list); | 170 | list_add(&kpte_page->lru, &df_list); |
165 | revert_page(kpte_page, address); | 171 | revert_page(kpte_page, address); |
166 | } | 172 | } |
diff --git a/arch/ia64/Kconfig b/arch/ia64/Kconfig index a85ea9d37f05..ff7ae6b664e8 100644 --- a/arch/ia64/Kconfig +++ b/arch/ia64/Kconfig | |||
@@ -271,6 +271,25 @@ config SCHED_SMT | |||
271 | Intel IA64 chips with MultiThreading at a cost of slightly increased | 271 | Intel IA64 chips with MultiThreading at a cost of slightly increased |
272 | overhead in some places. If unsure say N here. | 272 | overhead in some places. If unsure say N here. |
273 | 273 | ||
274 | config PERMIT_BSP_REMOVE | ||
275 | bool "Support removal of Bootstrap Processor" | ||
276 | depends on HOTPLUG_CPU | ||
277 | default n | ||
278 | ---help--- | ||
279 | Say Y here if your platform SAL will support removal of BSP with HOTPLUG_CPU | ||
280 | support. | ||
281 | |||
282 | config FORCE_CPEI_RETARGET | ||
283 | bool "Force assumption that CPEI can be re-targetted" | ||
284 | depends on PERMIT_BSP_REMOVE | ||
285 | default n | ||
286 | ---help--- | ||
287 | Say Y if you need to force the assumption that CPEI can be re-targetted to | ||
288 | any cpu in the system. This hint is available via ACPI 3.0 specifications. | ||
289 | Tiger4 systems are capable of re-directing CPEI to any CPU other than BSP. | ||
290 | This option it useful to enable this feature on older BIOS's as well. | ||
291 | You can also enable this by using boot command line option force_cpei=1. | ||
292 | |||
274 | config PREEMPT | 293 | config PREEMPT |
275 | bool "Preemptible Kernel" | 294 | bool "Preemptible Kernel" |
276 | help | 295 | help |
diff --git a/arch/ia64/configs/tiger_defconfig b/arch/ia64/configs/tiger_defconfig index 125568118b84..766bf4955432 100644 --- a/arch/ia64/configs/tiger_defconfig +++ b/arch/ia64/configs/tiger_defconfig | |||
@@ -116,6 +116,8 @@ CONFIG_FORCE_MAX_ZONEORDER=17 | |||
116 | CONFIG_SMP=y | 116 | CONFIG_SMP=y |
117 | CONFIG_NR_CPUS=4 | 117 | CONFIG_NR_CPUS=4 |
118 | CONFIG_HOTPLUG_CPU=y | 118 | CONFIG_HOTPLUG_CPU=y |
119 | CONFIG_PERMIT_BSP_REMOVE=y | ||
120 | CONFIG_FORCE_CPEI_RETARGET=y | ||
119 | # CONFIG_SCHED_SMT is not set | 121 | # CONFIG_SCHED_SMT is not set |
120 | # CONFIG_PREEMPT is not set | 122 | # CONFIG_PREEMPT is not set |
121 | CONFIG_SELECT_MEMORY_MODEL=y | 123 | CONFIG_SELECT_MEMORY_MODEL=y |
diff --git a/arch/ia64/kernel/acpi.c b/arch/ia64/kernel/acpi.c index ecd44bdc8394..4722ec51c70c 100644 --- a/arch/ia64/kernel/acpi.c +++ b/arch/ia64/kernel/acpi.c | |||
@@ -284,19 +284,24 @@ acpi_parse_plat_int_src(acpi_table_entry_header * header, | |||
284 | return 0; | 284 | return 0; |
285 | } | 285 | } |
286 | 286 | ||
287 | #ifdef CONFIG_HOTPLUG_CPU | ||
287 | unsigned int can_cpei_retarget(void) | 288 | unsigned int can_cpei_retarget(void) |
288 | { | 289 | { |
289 | extern int cpe_vector; | 290 | extern int cpe_vector; |
291 | extern unsigned int force_cpei_retarget; | ||
290 | 292 | ||
291 | /* | 293 | /* |
292 | * Only if CPEI is supported and the override flag | 294 | * Only if CPEI is supported and the override flag |
293 | * is present, otherwise return that its re-targettable | 295 | * is present, otherwise return that its re-targettable |
294 | * if we are in polling mode. | 296 | * if we are in polling mode. |
295 | */ | 297 | */ |
296 | if (cpe_vector > 0 && !acpi_cpei_override) | 298 | if (cpe_vector > 0) { |
297 | return 0; | 299 | if (acpi_cpei_override || force_cpei_retarget) |
298 | else | 300 | return 1; |
299 | return 1; | 301 | else |
302 | return 0; | ||
303 | } | ||
304 | return 1; | ||
300 | } | 305 | } |
301 | 306 | ||
302 | unsigned int is_cpu_cpei_target(unsigned int cpu) | 307 | unsigned int is_cpu_cpei_target(unsigned int cpu) |
@@ -315,6 +320,7 @@ void set_cpei_target_cpu(unsigned int cpu) | |||
315 | { | 320 | { |
316 | acpi_cpei_phys_cpuid = cpu_physical_id(cpu); | 321 | acpi_cpei_phys_cpuid = cpu_physical_id(cpu); |
317 | } | 322 | } |
323 | #endif | ||
318 | 324 | ||
319 | unsigned int get_cpei_target_cpu(void) | 325 | unsigned int get_cpei_target_cpu(void) |
320 | { | 326 | { |
diff --git a/arch/ia64/kernel/entry.S b/arch/ia64/kernel/entry.S index 930fdfca6ddb..0e3eda99e549 100644 --- a/arch/ia64/kernel/entry.S +++ b/arch/ia64/kernel/entry.S | |||
@@ -1102,9 +1102,6 @@ skip_rbs_switch: | |||
1102 | st8 [r2]=r8 | 1102 | st8 [r2]=r8 |
1103 | st8 [r3]=r10 | 1103 | st8 [r3]=r10 |
1104 | .work_pending: | 1104 | .work_pending: |
1105 | tbit.nz p6,p0=r31,TIF_SIGDELAYED // signal delayed from MCA/INIT/NMI/PMI context? | ||
1106 | (p6) br.cond.sptk.few .sigdelayed | ||
1107 | ;; | ||
1108 | tbit.z p6,p0=r31,TIF_NEED_RESCHED // current_thread_info()->need_resched==0? | 1105 | tbit.z p6,p0=r31,TIF_NEED_RESCHED // current_thread_info()->need_resched==0? |
1109 | (p6) br.cond.sptk.few .notify | 1106 | (p6) br.cond.sptk.few .notify |
1110 | #ifdef CONFIG_PREEMPT | 1107 | #ifdef CONFIG_PREEMPT |
@@ -1131,17 +1128,6 @@ skip_rbs_switch: | |||
1131 | (pLvSys)br.cond.sptk.few .work_pending_syscall_end | 1128 | (pLvSys)br.cond.sptk.few .work_pending_syscall_end |
1132 | br.cond.sptk.many .work_processed_kernel // don't re-check | 1129 | br.cond.sptk.many .work_processed_kernel // don't re-check |
1133 | 1130 | ||
1134 | // There is a delayed signal that was detected in MCA/INIT/NMI/PMI context where | ||
1135 | // it could not be delivered. Deliver it now. The signal might be for us and | ||
1136 | // may set TIF_SIGPENDING, so redrive ia64_leave_* after processing the delayed | ||
1137 | // signal. | ||
1138 | |||
1139 | .sigdelayed: | ||
1140 | br.call.sptk.many rp=do_sigdelayed | ||
1141 | cmp.eq p6,p0=r0,r0 // p6 <- 1, always re-check | ||
1142 | (pLvSys)br.cond.sptk.few .work_pending_syscall_end | ||
1143 | br.cond.sptk.many .work_processed_kernel // re-check | ||
1144 | |||
1145 | .work_pending_syscall_end: | 1131 | .work_pending_syscall_end: |
1146 | adds r2=PT(R8)+16,r12 | 1132 | adds r2=PT(R8)+16,r12 |
1147 | adds r3=PT(R10)+16,r12 | 1133 | adds r3=PT(R10)+16,r12 |
diff --git a/arch/ia64/kernel/iosapic.c b/arch/ia64/kernel/iosapic.c index 574084f343fa..8832c553230a 100644 --- a/arch/ia64/kernel/iosapic.c +++ b/arch/ia64/kernel/iosapic.c | |||
@@ -631,6 +631,7 @@ get_target_cpu (unsigned int gsi, int vector) | |||
631 | { | 631 | { |
632 | #ifdef CONFIG_SMP | 632 | #ifdef CONFIG_SMP |
633 | static int cpu = -1; | 633 | static int cpu = -1; |
634 | extern int cpe_vector; | ||
634 | 635 | ||
635 | /* | 636 | /* |
636 | * In case of vector shared by multiple RTEs, all RTEs that | 637 | * In case of vector shared by multiple RTEs, all RTEs that |
@@ -653,6 +654,11 @@ get_target_cpu (unsigned int gsi, int vector) | |||
653 | if (!cpu_online(smp_processor_id())) | 654 | if (!cpu_online(smp_processor_id())) |
654 | return cpu_physical_id(smp_processor_id()); | 655 | return cpu_physical_id(smp_processor_id()); |
655 | 656 | ||
657 | #ifdef CONFIG_ACPI | ||
658 | if (cpe_vector > 0 && vector == IA64_CPEP_VECTOR) | ||
659 | return get_cpei_target_cpu(); | ||
660 | #endif | ||
661 | |||
656 | #ifdef CONFIG_NUMA | 662 | #ifdef CONFIG_NUMA |
657 | { | 663 | { |
658 | int num_cpus, cpu_index, iosapic_index, numa_cpu, i = 0; | 664 | int num_cpus, cpu_index, iosapic_index, numa_cpu, i = 0; |
diff --git a/arch/ia64/kernel/irq.c b/arch/ia64/kernel/irq.c index d33244c32759..5ce908ef9c95 100644 --- a/arch/ia64/kernel/irq.c +++ b/arch/ia64/kernel/irq.c | |||
@@ -163,8 +163,19 @@ void fixup_irqs(void) | |||
163 | { | 163 | { |
164 | unsigned int irq; | 164 | unsigned int irq; |
165 | extern void ia64_process_pending_intr(void); | 165 | extern void ia64_process_pending_intr(void); |
166 | extern void ia64_disable_timer(void); | ||
167 | extern volatile int time_keeper_id; | ||
168 | |||
169 | ia64_disable_timer(); | ||
170 | |||
171 | /* | ||
172 | * Find a new timesync master | ||
173 | */ | ||
174 | if (smp_processor_id() == time_keeper_id) { | ||
175 | time_keeper_id = first_cpu(cpu_online_map); | ||
176 | printk ("CPU %d is now promoted to time-keeper master\n", time_keeper_id); | ||
177 | } | ||
166 | 178 | ||
167 | ia64_set_itv(1<<16); | ||
168 | /* | 179 | /* |
169 | * Phase 1: Locate irq's bound to this cpu and | 180 | * Phase 1: Locate irq's bound to this cpu and |
170 | * relocate them for cpu removal. | 181 | * relocate them for cpu removal. |
diff --git a/arch/ia64/kernel/mca.c b/arch/ia64/kernel/mca.c index ee7eec9ee576..b57e723f194c 100644 --- a/arch/ia64/kernel/mca.c +++ b/arch/ia64/kernel/mca.c | |||
@@ -281,14 +281,10 @@ ia64_mca_log_sal_error_record(int sal_info_type) | |||
281 | ia64_sal_clear_state_info(sal_info_type); | 281 | ia64_sal_clear_state_info(sal_info_type); |
282 | } | 282 | } |
283 | 283 | ||
284 | /* | ||
285 | * platform dependent error handling | ||
286 | */ | ||
287 | #ifndef PLATFORM_MCA_HANDLERS | ||
288 | |||
289 | #ifdef CONFIG_ACPI | 284 | #ifdef CONFIG_ACPI |
290 | 285 | ||
291 | int cpe_vector = -1; | 286 | int cpe_vector = -1; |
287 | int ia64_cpe_irq = -1; | ||
292 | 288 | ||
293 | static irqreturn_t | 289 | static irqreturn_t |
294 | ia64_mca_cpe_int_handler (int cpe_irq, void *arg, struct pt_regs *ptregs) | 290 | ia64_mca_cpe_int_handler (int cpe_irq, void *arg, struct pt_regs *ptregs) |
@@ -377,8 +373,6 @@ ia64_mca_register_cpev (int cpev) | |||
377 | } | 373 | } |
378 | #endif /* CONFIG_ACPI */ | 374 | #endif /* CONFIG_ACPI */ |
379 | 375 | ||
380 | #endif /* PLATFORM_MCA_HANDLERS */ | ||
381 | |||
382 | /* | 376 | /* |
383 | * ia64_mca_cmc_vector_setup | 377 | * ia64_mca_cmc_vector_setup |
384 | * | 378 | * |
@@ -630,6 +624,32 @@ copy_reg(const u64 *fr, u64 fnat, u64 *tr, u64 *tnat) | |||
630 | *tnat |= (nat << tslot); | 624 | *tnat |= (nat << tslot); |
631 | } | 625 | } |
632 | 626 | ||
627 | /* Change the comm field on the MCA/INT task to include the pid that | ||
628 | * was interrupted, it makes for easier debugging. If that pid was 0 | ||
629 | * (swapper or nested MCA/INIT) then use the start of the previous comm | ||
630 | * field suffixed with its cpu. | ||
631 | */ | ||
632 | |||
633 | static void | ||
634 | ia64_mca_modify_comm(const task_t *previous_current) | ||
635 | { | ||
636 | char *p, comm[sizeof(current->comm)]; | ||
637 | if (previous_current->pid) | ||
638 | snprintf(comm, sizeof(comm), "%s %d", | ||
639 | current->comm, previous_current->pid); | ||
640 | else { | ||
641 | int l; | ||
642 | if ((p = strchr(previous_current->comm, ' '))) | ||
643 | l = p - previous_current->comm; | ||
644 | else | ||
645 | l = strlen(previous_current->comm); | ||
646 | snprintf(comm, sizeof(comm), "%s %*s %d", | ||
647 | current->comm, l, previous_current->comm, | ||
648 | task_thread_info(previous_current)->cpu); | ||
649 | } | ||
650 | memcpy(current->comm, comm, sizeof(current->comm)); | ||
651 | } | ||
652 | |||
633 | /* On entry to this routine, we are running on the per cpu stack, see | 653 | /* On entry to this routine, we are running on the per cpu stack, see |
634 | * mca_asm.h. The original stack has not been touched by this event. Some of | 654 | * mca_asm.h. The original stack has not been touched by this event. Some of |
635 | * the original stack's registers will be in the RBS on this stack. This stack | 655 | * the original stack's registers will be in the RBS on this stack. This stack |
@@ -648,7 +668,7 @@ ia64_mca_modify_original_stack(struct pt_regs *regs, | |||
648 | struct ia64_sal_os_state *sos, | 668 | struct ia64_sal_os_state *sos, |
649 | const char *type) | 669 | const char *type) |
650 | { | 670 | { |
651 | char *p, comm[sizeof(current->comm)]; | 671 | char *p; |
652 | ia64_va va; | 672 | ia64_va va; |
653 | extern char ia64_leave_kernel[]; /* Need asm address, not function descriptor */ | 673 | extern char ia64_leave_kernel[]; /* Need asm address, not function descriptor */ |
654 | const pal_min_state_area_t *ms = sos->pal_min_state; | 674 | const pal_min_state_area_t *ms = sos->pal_min_state; |
@@ -721,6 +741,10 @@ ia64_mca_modify_original_stack(struct pt_regs *regs, | |||
721 | /* Verify the previous stack state before we change it */ | 741 | /* Verify the previous stack state before we change it */ |
722 | if (user_mode(regs)) { | 742 | if (user_mode(regs)) { |
723 | msg = "occurred in user space"; | 743 | msg = "occurred in user space"; |
744 | /* previous_current is guaranteed to be valid when the task was | ||
745 | * in user space, so ... | ||
746 | */ | ||
747 | ia64_mca_modify_comm(previous_current); | ||
724 | goto no_mod; | 748 | goto no_mod; |
725 | } | 749 | } |
726 | if (r13 != sos->prev_IA64_KR_CURRENT) { | 750 | if (r13 != sos->prev_IA64_KR_CURRENT) { |
@@ -750,25 +774,7 @@ ia64_mca_modify_original_stack(struct pt_regs *regs, | |||
750 | goto no_mod; | 774 | goto no_mod; |
751 | } | 775 | } |
752 | 776 | ||
753 | /* Change the comm field on the MCA/INT task to include the pid that | 777 | ia64_mca_modify_comm(previous_current); |
754 | * was interrupted, it makes for easier debugging. If that pid was 0 | ||
755 | * (swapper or nested MCA/INIT) then use the start of the previous comm | ||
756 | * field suffixed with its cpu. | ||
757 | */ | ||
758 | if (previous_current->pid) | ||
759 | snprintf(comm, sizeof(comm), "%s %d", | ||
760 | current->comm, previous_current->pid); | ||
761 | else { | ||
762 | int l; | ||
763 | if ((p = strchr(previous_current->comm, ' '))) | ||
764 | l = p - previous_current->comm; | ||
765 | else | ||
766 | l = strlen(previous_current->comm); | ||
767 | snprintf(comm, sizeof(comm), "%s %*s %d", | ||
768 | current->comm, l, previous_current->comm, | ||
769 | task_thread_info(previous_current)->cpu); | ||
770 | } | ||
771 | memcpy(current->comm, comm, sizeof(current->comm)); | ||
772 | 778 | ||
773 | /* Make the original task look blocked. First stack a struct pt_regs, | 779 | /* Make the original task look blocked. First stack a struct pt_regs, |
774 | * describing the state at the time of interrupt. mca_asm.S built a | 780 | * describing the state at the time of interrupt. mca_asm.S built a |
@@ -908,7 +914,7 @@ no_mod: | |||
908 | static void | 914 | static void |
909 | ia64_wait_for_slaves(int monarch) | 915 | ia64_wait_for_slaves(int monarch) |
910 | { | 916 | { |
911 | int c, wait = 0; | 917 | int c, wait = 0, missing = 0; |
912 | for_each_online_cpu(c) { | 918 | for_each_online_cpu(c) { |
913 | if (c == monarch) | 919 | if (c == monarch) |
914 | continue; | 920 | continue; |
@@ -919,15 +925,32 @@ ia64_wait_for_slaves(int monarch) | |||
919 | } | 925 | } |
920 | } | 926 | } |
921 | if (!wait) | 927 | if (!wait) |
922 | return; | 928 | goto all_in; |
923 | for_each_online_cpu(c) { | 929 | for_each_online_cpu(c) { |
924 | if (c == monarch) | 930 | if (c == monarch) |
925 | continue; | 931 | continue; |
926 | if (ia64_mc_info.imi_rendez_checkin[c] == IA64_MCA_RENDEZ_CHECKIN_NOTDONE) { | 932 | if (ia64_mc_info.imi_rendez_checkin[c] == IA64_MCA_RENDEZ_CHECKIN_NOTDONE) { |
927 | udelay(5*1000000); /* wait 5 seconds for slaves (arbitrary) */ | 933 | udelay(5*1000000); /* wait 5 seconds for slaves (arbitrary) */ |
934 | if (ia64_mc_info.imi_rendez_checkin[c] == IA64_MCA_RENDEZ_CHECKIN_NOTDONE) | ||
935 | missing = 1; | ||
928 | break; | 936 | break; |
929 | } | 937 | } |
930 | } | 938 | } |
939 | if (!missing) | ||
940 | goto all_in; | ||
941 | printk(KERN_INFO "OS MCA slave did not rendezvous on cpu"); | ||
942 | for_each_online_cpu(c) { | ||
943 | if (c == monarch) | ||
944 | continue; | ||
945 | if (ia64_mc_info.imi_rendez_checkin[c] == IA64_MCA_RENDEZ_CHECKIN_NOTDONE) | ||
946 | printk(" %d", c); | ||
947 | } | ||
948 | printk("\n"); | ||
949 | return; | ||
950 | |||
951 | all_in: | ||
952 | printk(KERN_INFO "All OS MCA slaves have reached rendezvous\n"); | ||
953 | return; | ||
931 | } | 954 | } |
932 | 955 | ||
933 | /* | 956 | /* |
@@ -953,6 +976,10 @@ ia64_mca_handler(struct pt_regs *regs, struct switch_stack *sw, | |||
953 | task_t *previous_current; | 976 | task_t *previous_current; |
954 | 977 | ||
955 | oops_in_progress = 1; /* FIXME: make printk NMI/MCA/INIT safe */ | 978 | oops_in_progress = 1; /* FIXME: make printk NMI/MCA/INIT safe */ |
979 | console_loglevel = 15; /* make sure printks make it to console */ | ||
980 | printk(KERN_INFO "Entered OS MCA handler. PSP=%lx cpu=%d monarch=%ld\n", | ||
981 | sos->proc_state_param, cpu, sos->monarch); | ||
982 | |||
956 | previous_current = ia64_mca_modify_original_stack(regs, sw, sos, "MCA"); | 983 | previous_current = ia64_mca_modify_original_stack(regs, sw, sos, "MCA"); |
957 | monarch_cpu = cpu; | 984 | monarch_cpu = cpu; |
958 | if (notify_die(DIE_MCA_MONARCH_ENTER, "MCA", regs, 0, 0, 0) | 985 | if (notify_die(DIE_MCA_MONARCH_ENTER, "MCA", regs, 0, 0, 0) |
@@ -1444,11 +1471,13 @@ void __devinit | |||
1444 | ia64_mca_cpu_init(void *cpu_data) | 1471 | ia64_mca_cpu_init(void *cpu_data) |
1445 | { | 1472 | { |
1446 | void *pal_vaddr; | 1473 | void *pal_vaddr; |
1474 | static int first_time = 1; | ||
1447 | 1475 | ||
1448 | if (smp_processor_id() == 0) { | 1476 | if (first_time) { |
1449 | void *mca_data; | 1477 | void *mca_data; |
1450 | int cpu; | 1478 | int cpu; |
1451 | 1479 | ||
1480 | first_time = 0; | ||
1452 | mca_data = alloc_bootmem(sizeof(struct ia64_mca_cpu) | 1481 | mca_data = alloc_bootmem(sizeof(struct ia64_mca_cpu) |
1453 | * NR_CPUS + KERNEL_STACK_SIZE); | 1482 | * NR_CPUS + KERNEL_STACK_SIZE); |
1454 | mca_data = (void *)(((unsigned long)mca_data + | 1483 | mca_data = (void *)(((unsigned long)mca_data + |
@@ -1704,6 +1733,7 @@ ia64_mca_late_init(void) | |||
1704 | desc = irq_descp(irq); | 1733 | desc = irq_descp(irq); |
1705 | desc->status |= IRQ_PER_CPU; | 1734 | desc->status |= IRQ_PER_CPU; |
1706 | setup_irq(irq, &mca_cpe_irqaction); | 1735 | setup_irq(irq, &mca_cpe_irqaction); |
1736 | ia64_cpe_irq = irq; | ||
1707 | } | 1737 | } |
1708 | ia64_mca_register_cpev(cpe_vector); | 1738 | ia64_mca_register_cpev(cpe_vector); |
1709 | IA64_MCA_DEBUG("%s: CPEI/P setup and enabled.\n", __FUNCTION__); | 1739 | IA64_MCA_DEBUG("%s: CPEI/P setup and enabled.\n", __FUNCTION__); |
diff --git a/arch/ia64/kernel/perfmon.c b/arch/ia64/kernel/perfmon.c index 9c5194b385da..077f21216b65 100644 --- a/arch/ia64/kernel/perfmon.c +++ b/arch/ia64/kernel/perfmon.c | |||
@@ -6722,6 +6722,7 @@ __initcall(pfm_init); | |||
6722 | void | 6722 | void |
6723 | pfm_init_percpu (void) | 6723 | pfm_init_percpu (void) |
6724 | { | 6724 | { |
6725 | static int first_time=1; | ||
6725 | /* | 6726 | /* |
6726 | * make sure no measurement is active | 6727 | * make sure no measurement is active |
6727 | * (may inherit programmed PMCs from EFI). | 6728 | * (may inherit programmed PMCs from EFI). |
@@ -6734,8 +6735,10 @@ pfm_init_percpu (void) | |||
6734 | */ | 6735 | */ |
6735 | pfm_unfreeze_pmu(); | 6736 | pfm_unfreeze_pmu(); |
6736 | 6737 | ||
6737 | if (smp_processor_id() == 0) | 6738 | if (first_time) { |
6738 | register_percpu_irq(IA64_PERFMON_VECTOR, &perfmon_irqaction); | 6739 | register_percpu_irq(IA64_PERFMON_VECTOR, &perfmon_irqaction); |
6740 | first_time=0; | ||
6741 | } | ||
6739 | 6742 | ||
6740 | ia64_setreg(_IA64_REG_CR_PMV, IA64_PERFMON_VECTOR); | 6743 | ia64_setreg(_IA64_REG_CR_PMV, IA64_PERFMON_VECTOR); |
6741 | ia64_srlz_d(); | 6744 | ia64_srlz_d(); |
diff --git a/arch/ia64/kernel/signal.c b/arch/ia64/kernel/signal.c index 463f6bb44d07..1d7903ee2126 100644 --- a/arch/ia64/kernel/signal.c +++ b/arch/ia64/kernel/signal.c | |||
@@ -588,104 +588,3 @@ ia64_do_signal (sigset_t *oldset, struct sigscratch *scr, long in_syscall) | |||
588 | } | 588 | } |
589 | return 0; | 589 | return 0; |
590 | } | 590 | } |
591 | |||
592 | /* Set a delayed signal that was detected in MCA/INIT/NMI/PMI context where it | ||
593 | * could not be delivered. It is important that the target process is not | ||
594 | * allowed to do any more work in user space. Possible cases for the target | ||
595 | * process: | ||
596 | * | ||
597 | * - It is sleeping and will wake up soon. Store the data in the current task, | ||
598 | * the signal will be sent when the current task returns from the next | ||
599 | * interrupt. | ||
600 | * | ||
601 | * - It is running in user context. Store the data in the current task, the | ||
602 | * signal will be sent when the current task returns from the next interrupt. | ||
603 | * | ||
604 | * - It is running in kernel context on this or another cpu and will return to | ||
605 | * user context. Store the data in the target task, the signal will be sent | ||
606 | * to itself when the target task returns to user space. | ||
607 | * | ||
608 | * - It is running in kernel context on this cpu and will sleep before | ||
609 | * returning to user context. Because this is also the current task, the | ||
610 | * signal will not get delivered and the task could sleep indefinitely. | ||
611 | * Store the data in the idle task for this cpu, the signal will be sent | ||
612 | * after the idle task processes its next interrupt. | ||
613 | * | ||
614 | * To cover all cases, store the data in the target task, the current task and | ||
615 | * the idle task on this cpu. Whatever happens, the signal will be delivered | ||
616 | * to the target task before it can do any useful user space work. Multiple | ||
617 | * deliveries have no unwanted side effects. | ||
618 | * | ||
619 | * Note: This code is executed in MCA/INIT/NMI/PMI context, with interrupts | ||
620 | * disabled. It must not take any locks nor use kernel structures or services | ||
621 | * that require locks. | ||
622 | */ | ||
623 | |||
624 | /* To ensure that we get the right pid, check its start time. To avoid extra | ||
625 | * include files in thread_info.h, convert the task start_time to unsigned long, | ||
626 | * giving us a cycle time of > 580 years. | ||
627 | */ | ||
628 | static inline unsigned long | ||
629 | start_time_ul(const struct task_struct *t) | ||
630 | { | ||
631 | return t->start_time.tv_sec * NSEC_PER_SEC + t->start_time.tv_nsec; | ||
632 | } | ||
633 | |||
634 | void | ||
635 | set_sigdelayed(pid_t pid, int signo, int code, void __user *addr) | ||
636 | { | ||
637 | struct task_struct *t; | ||
638 | unsigned long start_time = 0; | ||
639 | int i; | ||
640 | |||
641 | for (i = 1; i <= 3; ++i) { | ||
642 | switch (i) { | ||
643 | case 1: | ||
644 | t = find_task_by_pid(pid); | ||
645 | if (t) | ||
646 | start_time = start_time_ul(t); | ||
647 | break; | ||
648 | case 2: | ||
649 | t = current; | ||
650 | break; | ||
651 | default: | ||
652 | t = idle_task(smp_processor_id()); | ||
653 | break; | ||
654 | } | ||
655 | |||
656 | if (!t) | ||
657 | return; | ||
658 | task_thread_info(t)->sigdelayed.signo = signo; | ||
659 | task_thread_info(t)->sigdelayed.code = code; | ||
660 | task_thread_info(t)->sigdelayed.addr = addr; | ||
661 | task_thread_info(t)->sigdelayed.start_time = start_time; | ||
662 | task_thread_info(t)->sigdelayed.pid = pid; | ||
663 | wmb(); | ||
664 | set_tsk_thread_flag(t, TIF_SIGDELAYED); | ||
665 | } | ||
666 | } | ||
667 | |||
668 | /* Called from entry.S when it detects TIF_SIGDELAYED, a delayed signal that | ||
669 | * was detected in MCA/INIT/NMI/PMI context where it could not be delivered. | ||
670 | */ | ||
671 | |||
672 | void | ||
673 | do_sigdelayed(void) | ||
674 | { | ||
675 | struct siginfo siginfo; | ||
676 | pid_t pid; | ||
677 | struct task_struct *t; | ||
678 | |||
679 | clear_thread_flag(TIF_SIGDELAYED); | ||
680 | memset(&siginfo, 0, sizeof(siginfo)); | ||
681 | siginfo.si_signo = current_thread_info()->sigdelayed.signo; | ||
682 | siginfo.si_code = current_thread_info()->sigdelayed.code; | ||
683 | siginfo.si_addr = current_thread_info()->sigdelayed.addr; | ||
684 | pid = current_thread_info()->sigdelayed.pid; | ||
685 | t = find_task_by_pid(pid); | ||
686 | if (!t) | ||
687 | return; | ||
688 | if (current_thread_info()->sigdelayed.start_time != start_time_ul(t)) | ||
689 | return; | ||
690 | force_sig_info(siginfo.si_signo, &siginfo, t); | ||
691 | } | ||
diff --git a/arch/ia64/kernel/smpboot.c b/arch/ia64/kernel/smpboot.c index b681ef34a86e..c4b633b36dab 100644 --- a/arch/ia64/kernel/smpboot.c +++ b/arch/ia64/kernel/smpboot.c | |||
@@ -70,6 +70,12 @@ | |||
70 | #endif | 70 | #endif |
71 | 71 | ||
72 | #ifdef CONFIG_HOTPLUG_CPU | 72 | #ifdef CONFIG_HOTPLUG_CPU |
73 | #ifdef CONFIG_PERMIT_BSP_REMOVE | ||
74 | #define bsp_remove_ok 1 | ||
75 | #else | ||
76 | #define bsp_remove_ok 0 | ||
77 | #endif | ||
78 | |||
73 | /* | 79 | /* |
74 | * Store all idle threads, this can be reused instead of creating | 80 | * Store all idle threads, this can be reused instead of creating |
75 | * a new thread. Also avoids complicated thread destroy functionality | 81 | * a new thread. Also avoids complicated thread destroy functionality |
@@ -104,7 +110,7 @@ struct sal_to_os_boot *sal_state_for_booting_cpu = &sal_boot_rendez_state[0]; | |||
104 | /* | 110 | /* |
105 | * ITC synchronization related stuff: | 111 | * ITC synchronization related stuff: |
106 | */ | 112 | */ |
107 | #define MASTER 0 | 113 | #define MASTER (0) |
108 | #define SLAVE (SMP_CACHE_BYTES/8) | 114 | #define SLAVE (SMP_CACHE_BYTES/8) |
109 | 115 | ||
110 | #define NUM_ROUNDS 64 /* magic value */ | 116 | #define NUM_ROUNDS 64 /* magic value */ |
@@ -151,6 +157,27 @@ char __initdata no_int_routing; | |||
151 | 157 | ||
152 | unsigned char smp_int_redirect; /* are INT and IPI redirectable by the chipset? */ | 158 | unsigned char smp_int_redirect; /* are INT and IPI redirectable by the chipset? */ |
153 | 159 | ||
160 | #ifdef CONFIG_FORCE_CPEI_RETARGET | ||
161 | #define CPEI_OVERRIDE_DEFAULT (1) | ||
162 | #else | ||
163 | #define CPEI_OVERRIDE_DEFAULT (0) | ||
164 | #endif | ||
165 | |||
166 | unsigned int force_cpei_retarget = CPEI_OVERRIDE_DEFAULT; | ||
167 | |||
168 | static int __init | ||
169 | cmdl_force_cpei(char *str) | ||
170 | { | ||
171 | int value=0; | ||
172 | |||
173 | get_option (&str, &value); | ||
174 | force_cpei_retarget = value; | ||
175 | |||
176 | return 1; | ||
177 | } | ||
178 | |||
179 | __setup("force_cpei=", cmdl_force_cpei); | ||
180 | |||
154 | static int __init | 181 | static int __init |
155 | nointroute (char *str) | 182 | nointroute (char *str) |
156 | { | 183 | { |
@@ -161,6 +188,27 @@ nointroute (char *str) | |||
161 | 188 | ||
162 | __setup("nointroute", nointroute); | 189 | __setup("nointroute", nointroute); |
163 | 190 | ||
191 | static void fix_b0_for_bsp(void) | ||
192 | { | ||
193 | #ifdef CONFIG_HOTPLUG_CPU | ||
194 | int cpuid; | ||
195 | static int fix_bsp_b0 = 1; | ||
196 | |||
197 | cpuid = smp_processor_id(); | ||
198 | |||
199 | /* | ||
200 | * Cache the b0 value on the first AP that comes up | ||
201 | */ | ||
202 | if (!(fix_bsp_b0 && cpuid)) | ||
203 | return; | ||
204 | |||
205 | sal_boot_rendez_state[0].br[0] = sal_boot_rendez_state[cpuid].br[0]; | ||
206 | printk ("Fixed BSP b0 value from CPU %d\n", cpuid); | ||
207 | |||
208 | fix_bsp_b0 = 0; | ||
209 | #endif | ||
210 | } | ||
211 | |||
164 | void | 212 | void |
165 | sync_master (void *arg) | 213 | sync_master (void *arg) |
166 | { | 214 | { |
@@ -327,8 +375,9 @@ smp_setup_percpu_timer (void) | |||
327 | static void __devinit | 375 | static void __devinit |
328 | smp_callin (void) | 376 | smp_callin (void) |
329 | { | 377 | { |
330 | int cpuid, phys_id; | 378 | int cpuid, phys_id, itc_master; |
331 | extern void ia64_init_itm(void); | 379 | extern void ia64_init_itm(void); |
380 | extern volatile int time_keeper_id; | ||
332 | 381 | ||
333 | #ifdef CONFIG_PERFMON | 382 | #ifdef CONFIG_PERFMON |
334 | extern void pfm_init_percpu(void); | 383 | extern void pfm_init_percpu(void); |
@@ -336,6 +385,7 @@ smp_callin (void) | |||
336 | 385 | ||
337 | cpuid = smp_processor_id(); | 386 | cpuid = smp_processor_id(); |
338 | phys_id = hard_smp_processor_id(); | 387 | phys_id = hard_smp_processor_id(); |
388 | itc_master = time_keeper_id; | ||
339 | 389 | ||
340 | if (cpu_online(cpuid)) { | 390 | if (cpu_online(cpuid)) { |
341 | printk(KERN_ERR "huh, phys CPU#0x%x, CPU#0x%x already present??\n", | 391 | printk(KERN_ERR "huh, phys CPU#0x%x, CPU#0x%x already present??\n", |
@@ -343,6 +393,8 @@ smp_callin (void) | |||
343 | BUG(); | 393 | BUG(); |
344 | } | 394 | } |
345 | 395 | ||
396 | fix_b0_for_bsp(); | ||
397 | |||
346 | lock_ipi_calllock(); | 398 | lock_ipi_calllock(); |
347 | cpu_set(cpuid, cpu_online_map); | 399 | cpu_set(cpuid, cpu_online_map); |
348 | unlock_ipi_calllock(); | 400 | unlock_ipi_calllock(); |
@@ -365,8 +417,8 @@ smp_callin (void) | |||
365 | * calls spin_unlock_bh(), which calls spin_unlock_bh(), which calls | 417 | * calls spin_unlock_bh(), which calls spin_unlock_bh(), which calls |
366 | * local_bh_enable(), which bugs out if irqs are not enabled... | 418 | * local_bh_enable(), which bugs out if irqs are not enabled... |
367 | */ | 419 | */ |
368 | Dprintk("Going to syncup ITC with BP.\n"); | 420 | Dprintk("Going to syncup ITC with ITC Master.\n"); |
369 | ia64_sync_itc(0); | 421 | ia64_sync_itc(itc_master); |
370 | } | 422 | } |
371 | 423 | ||
372 | /* | 424 | /* |
@@ -635,6 +687,47 @@ remove_siblinginfo(int cpu) | |||
635 | } | 687 | } |
636 | 688 | ||
637 | extern void fixup_irqs(void); | 689 | extern void fixup_irqs(void); |
690 | |||
691 | int migrate_platform_irqs(unsigned int cpu) | ||
692 | { | ||
693 | int new_cpei_cpu; | ||
694 | irq_desc_t *desc = NULL; | ||
695 | cpumask_t mask; | ||
696 | int retval = 0; | ||
697 | |||
698 | /* | ||
699 | * dont permit CPEI target to removed. | ||
700 | */ | ||
701 | if (cpe_vector > 0 && is_cpu_cpei_target(cpu)) { | ||
702 | printk ("CPU (%d) is CPEI Target\n", cpu); | ||
703 | if (can_cpei_retarget()) { | ||
704 | /* | ||
705 | * Now re-target the CPEI to a different processor | ||
706 | */ | ||
707 | new_cpei_cpu = any_online_cpu(cpu_online_map); | ||
708 | mask = cpumask_of_cpu(new_cpei_cpu); | ||
709 | set_cpei_target_cpu(new_cpei_cpu); | ||
710 | desc = irq_descp(ia64_cpe_irq); | ||
711 | /* | ||
712 | * Switch for now, immediatly, we need to do fake intr | ||
713 | * as other interrupts, but need to study CPEI behaviour with | ||
714 | * polling before making changes. | ||
715 | */ | ||
716 | if (desc) { | ||
717 | desc->handler->disable(ia64_cpe_irq); | ||
718 | desc->handler->set_affinity(ia64_cpe_irq, mask); | ||
719 | desc->handler->enable(ia64_cpe_irq); | ||
720 | printk ("Re-targetting CPEI to cpu %d\n", new_cpei_cpu); | ||
721 | } | ||
722 | } | ||
723 | if (!desc) { | ||
724 | printk ("Unable to retarget CPEI, offline cpu [%d] failed\n", cpu); | ||
725 | retval = -EBUSY; | ||
726 | } | ||
727 | } | ||
728 | return retval; | ||
729 | } | ||
730 | |||
638 | /* must be called with cpucontrol mutex held */ | 731 | /* must be called with cpucontrol mutex held */ |
639 | int __cpu_disable(void) | 732 | int __cpu_disable(void) |
640 | { | 733 | { |
@@ -643,8 +736,17 @@ int __cpu_disable(void) | |||
643 | /* | 736 | /* |
644 | * dont permit boot processor for now | 737 | * dont permit boot processor for now |
645 | */ | 738 | */ |
646 | if (cpu == 0) | 739 | if (cpu == 0 && !bsp_remove_ok) { |
647 | return -EBUSY; | 740 | printk ("Your platform does not support removal of BSP\n"); |
741 | return (-EBUSY); | ||
742 | } | ||
743 | |||
744 | cpu_clear(cpu, cpu_online_map); | ||
745 | |||
746 | if (migrate_platform_irqs(cpu)) { | ||
747 | cpu_set(cpu, cpu_online_map); | ||
748 | return (-EBUSY); | ||
749 | } | ||
648 | 750 | ||
649 | remove_siblinginfo(cpu); | 751 | remove_siblinginfo(cpu); |
650 | cpu_clear(cpu, cpu_online_map); | 752 | cpu_clear(cpu, cpu_online_map); |
diff --git a/arch/ia64/kernel/time.c b/arch/ia64/kernel/time.c index 307d01e15b2e..ac167436e936 100644 --- a/arch/ia64/kernel/time.c +++ b/arch/ia64/kernel/time.c | |||
@@ -32,7 +32,7 @@ | |||
32 | 32 | ||
33 | extern unsigned long wall_jiffies; | 33 | extern unsigned long wall_jiffies; |
34 | 34 | ||
35 | #define TIME_KEEPER_ID 0 /* smp_processor_id() of time-keeper */ | 35 | volatile int time_keeper_id = 0; /* smp_processor_id() of time-keeper */ |
36 | 36 | ||
37 | #ifdef CONFIG_IA64_DEBUG_IRQ | 37 | #ifdef CONFIG_IA64_DEBUG_IRQ |
38 | 38 | ||
@@ -71,7 +71,7 @@ timer_interrupt (int irq, void *dev_id, struct pt_regs *regs) | |||
71 | 71 | ||
72 | new_itm += local_cpu_data->itm_delta; | 72 | new_itm += local_cpu_data->itm_delta; |
73 | 73 | ||
74 | if (smp_processor_id() == TIME_KEEPER_ID) { | 74 | if (smp_processor_id() == time_keeper_id) { |
75 | /* | 75 | /* |
76 | * Here we are in the timer irq handler. We have irqs locally | 76 | * Here we are in the timer irq handler. We have irqs locally |
77 | * disabled, but we don't know if the timer_bh is running on | 77 | * disabled, but we don't know if the timer_bh is running on |
@@ -236,6 +236,11 @@ static struct irqaction timer_irqaction = { | |||
236 | .name = "timer" | 236 | .name = "timer" |
237 | }; | 237 | }; |
238 | 238 | ||
239 | void __devinit ia64_disable_timer(void) | ||
240 | { | ||
241 | ia64_set_itv(1 << 16); | ||
242 | } | ||
243 | |||
239 | void __init | 244 | void __init |
240 | time_init (void) | 245 | time_init (void) |
241 | { | 246 | { |
diff --git a/arch/ia64/kernel/topology.c b/arch/ia64/kernel/topology.c index 6e5eea19fa67..3b6fd798c4d6 100644 --- a/arch/ia64/kernel/topology.c +++ b/arch/ia64/kernel/topology.c | |||
@@ -36,7 +36,7 @@ int arch_register_cpu(int num) | |||
36 | parent = &sysfs_nodes[cpu_to_node(num)]; | 36 | parent = &sysfs_nodes[cpu_to_node(num)]; |
37 | #endif /* CONFIG_NUMA */ | 37 | #endif /* CONFIG_NUMA */ |
38 | 38 | ||
39 | #ifdef CONFIG_ACPI | 39 | #if defined (CONFIG_ACPI) && defined (CONFIG_HOTPLUG_CPU) |
40 | /* | 40 | /* |
41 | * If CPEI cannot be re-targetted, and this is | 41 | * If CPEI cannot be re-targetted, and this is |
42 | * CPEI target, then dont create the control file | 42 | * CPEI target, then dont create the control file |
diff --git a/arch/ia64/mm/contig.c b/arch/ia64/mm/contig.c index acaaec4e4681..9855ba318094 100644 --- a/arch/ia64/mm/contig.c +++ b/arch/ia64/mm/contig.c | |||
@@ -181,13 +181,15 @@ per_cpu_init (void) | |||
181 | { | 181 | { |
182 | void *cpu_data; | 182 | void *cpu_data; |
183 | int cpu; | 183 | int cpu; |
184 | static int first_time=1; | ||
184 | 185 | ||
185 | /* | 186 | /* |
186 | * get_free_pages() cannot be used before cpu_init() done. BSP | 187 | * get_free_pages() cannot be used before cpu_init() done. BSP |
187 | * allocates "NR_CPUS" pages for all CPUs to avoid that AP calls | 188 | * allocates "NR_CPUS" pages for all CPUs to avoid that AP calls |
188 | * get_zeroed_page(). | 189 | * get_zeroed_page(). |
189 | */ | 190 | */ |
190 | if (smp_processor_id() == 0) { | 191 | if (first_time) { |
192 | first_time=0; | ||
191 | cpu_data = __alloc_bootmem(PERCPU_PAGE_SIZE * NR_CPUS, | 193 | cpu_data = __alloc_bootmem(PERCPU_PAGE_SIZE * NR_CPUS, |
192 | PERCPU_PAGE_SIZE, __pa(MAX_DMA_ADDRESS)); | 194 | PERCPU_PAGE_SIZE, __pa(MAX_DMA_ADDRESS)); |
193 | for (cpu = 0; cpu < NR_CPUS; cpu++) { | 195 | for (cpu = 0; cpu < NR_CPUS; cpu++) { |
diff --git a/arch/ia64/mm/discontig.c b/arch/ia64/mm/discontig.c index c87d6d1d5813..573d5cc63e2b 100644 --- a/arch/ia64/mm/discontig.c +++ b/arch/ia64/mm/discontig.c | |||
@@ -528,12 +528,17 @@ void __init find_memory(void) | |||
528 | void *per_cpu_init(void) | 528 | void *per_cpu_init(void) |
529 | { | 529 | { |
530 | int cpu; | 530 | int cpu; |
531 | static int first_time = 1; | ||
532 | |||
531 | 533 | ||
532 | if (smp_processor_id() != 0) | 534 | if (smp_processor_id() != 0) |
533 | return __per_cpu_start + __per_cpu_offset[smp_processor_id()]; | 535 | return __per_cpu_start + __per_cpu_offset[smp_processor_id()]; |
534 | 536 | ||
535 | for (cpu = 0; cpu < NR_CPUS; cpu++) | 537 | if (first_time) { |
536 | per_cpu(local_per_cpu_offset, cpu) = __per_cpu_offset[cpu]; | 538 | first_time = 0; |
539 | for (cpu = 0; cpu < NR_CPUS; cpu++) | ||
540 | per_cpu(local_per_cpu_offset, cpu) = __per_cpu_offset[cpu]; | ||
541 | } | ||
537 | 542 | ||
538 | return __per_cpu_start + __per_cpu_offset[smp_processor_id()]; | 543 | return __per_cpu_start + __per_cpu_offset[smp_processor_id()]; |
539 | } | 544 | } |
diff --git a/arch/ia64/mm/hugetlbpage.c b/arch/ia64/mm/hugetlbpage.c index 2d13889d0a99..9dbc7dadd165 100644 --- a/arch/ia64/mm/hugetlbpage.c +++ b/arch/ia64/mm/hugetlbpage.c | |||
@@ -68,9 +68,10 @@ huge_pte_offset (struct mm_struct *mm, unsigned long addr) | |||
68 | #define mk_pte_huge(entry) { pte_val(entry) |= _PAGE_P; } | 68 | #define mk_pte_huge(entry) { pte_val(entry) |= _PAGE_P; } |
69 | 69 | ||
70 | /* | 70 | /* |
71 | * This function checks for proper alignment of input addr and len parameters. | 71 | * Don't actually need to do any preparation, but need to make sure |
72 | * the address is in the right region. | ||
72 | */ | 73 | */ |
73 | int is_aligned_hugepage_range(unsigned long addr, unsigned long len) | 74 | int prepare_hugepage_range(unsigned long addr, unsigned long len) |
74 | { | 75 | { |
75 | if (len & ~HPAGE_MASK) | 76 | if (len & ~HPAGE_MASK) |
76 | return -EINVAL; | 77 | return -EINVAL; |
diff --git a/arch/ia64/mm/init.c b/arch/ia64/mm/init.c index b38b6d213c15..08d94e6bfa18 100644 --- a/arch/ia64/mm/init.c +++ b/arch/ia64/mm/init.c | |||
@@ -197,7 +197,7 @@ free_initmem (void) | |||
197 | eaddr = (unsigned long) ia64_imva(__init_end); | 197 | eaddr = (unsigned long) ia64_imva(__init_end); |
198 | while (addr < eaddr) { | 198 | while (addr < eaddr) { |
199 | ClearPageReserved(virt_to_page(addr)); | 199 | ClearPageReserved(virt_to_page(addr)); |
200 | set_page_count(virt_to_page(addr), 1); | 200 | init_page_count(virt_to_page(addr)); |
201 | free_page(addr); | 201 | free_page(addr); |
202 | ++totalram_pages; | 202 | ++totalram_pages; |
203 | addr += PAGE_SIZE; | 203 | addr += PAGE_SIZE; |
@@ -252,7 +252,7 @@ free_initrd_mem (unsigned long start, unsigned long end) | |||
252 | continue; | 252 | continue; |
253 | page = virt_to_page(start); | 253 | page = virt_to_page(start); |
254 | ClearPageReserved(page); | 254 | ClearPageReserved(page); |
255 | set_page_count(page, 1); | 255 | init_page_count(page); |
256 | free_page(start); | 256 | free_page(start); |
257 | ++totalram_pages; | 257 | ++totalram_pages; |
258 | } | 258 | } |
@@ -640,7 +640,7 @@ mem_init (void) | |||
640 | void online_page(struct page *page) | 640 | void online_page(struct page *page) |
641 | { | 641 | { |
642 | ClearPageReserved(page); | 642 | ClearPageReserved(page); |
643 | set_page_count(page, 1); | 643 | init_page_count(page); |
644 | __free_page(page); | 644 | __free_page(page); |
645 | totalram_pages++; | 645 | totalram_pages++; |
646 | num_physpages++; | 646 | num_physpages++; |
diff --git a/arch/ia64/sn/kernel/Makefile b/arch/ia64/sn/kernel/Makefile index 3e9b4eea7418..ab9c48c88012 100644 --- a/arch/ia64/sn/kernel/Makefile +++ b/arch/ia64/sn/kernel/Makefile | |||
@@ -10,7 +10,8 @@ | |||
10 | CPPFLAGS += -I$(srctree)/arch/ia64/sn/include | 10 | CPPFLAGS += -I$(srctree)/arch/ia64/sn/include |
11 | 11 | ||
12 | obj-y += setup.o bte.o bte_error.o irq.o mca.o idle.o \ | 12 | obj-y += setup.o bte.o bte_error.o irq.o mca.o idle.o \ |
13 | huberror.o io_init.o iomv.o klconflib.o sn2/ | 13 | huberror.o io_init.o iomv.o klconflib.o pio_phys.o \ |
14 | sn2/ | ||
14 | obj-$(CONFIG_IA64_GENERIC) += machvec.o | 15 | obj-$(CONFIG_IA64_GENERIC) += machvec.o |
15 | obj-$(CONFIG_SGI_TIOCX) += tiocx.o | 16 | obj-$(CONFIG_SGI_TIOCX) += tiocx.o |
16 | obj-$(CONFIG_IA64_SGI_SN_XP) += xp.o | 17 | obj-$(CONFIG_IA64_SGI_SN_XP) += xp.o |
diff --git a/arch/ia64/sn/kernel/pio_phys.S b/arch/ia64/sn/kernel/pio_phys.S new file mode 100644 index 000000000000..3c7d48d6ecb8 --- /dev/null +++ b/arch/ia64/sn/kernel/pio_phys.S | |||
@@ -0,0 +1,71 @@ | |||
1 | /* | ||
2 | * This file is subject to the terms and conditions of the GNU General Public | ||
3 | * License. See the file "COPYING" in the main directory of this archive | ||
4 | * for more details. | ||
5 | * | ||
6 | * Copyright (C) 2000-2005 Silicon Graphics, Inc. All rights reserved. | ||
7 | * | ||
8 | * This file contains macros used to access MMR registers via | ||
9 | * uncached physical addresses. | ||
10 | * pio_phys_read_mmr - read an MMR | ||
11 | * pio_phys_write_mmr - write an MMR | ||
12 | * pio_atomic_phys_write_mmrs - atomically write 1 or 2 MMRs with psr.ic=0 | ||
13 | * Second MMR will be skipped if address is NULL | ||
14 | * | ||
15 | * Addresses passed to these routines should be uncached physical addresses | ||
16 | * ie., 0x80000.... | ||
17 | */ | ||
18 | |||
19 | |||
20 | |||
21 | #include <asm/asmmacro.h> | ||
22 | #include <asm/page.h> | ||
23 | |||
24 | GLOBAL_ENTRY(pio_phys_read_mmr) | ||
25 | .prologue | ||
26 | .regstk 1,0,0,0 | ||
27 | .body | ||
28 | mov r2=psr | ||
29 | rsm psr.i | psr.dt | ||
30 | ;; | ||
31 | srlz.d | ||
32 | ld8.acq r8=[r32] | ||
33 | ;; | ||
34 | mov psr.l=r2;; | ||
35 | srlz.d | ||
36 | br.ret.sptk.many rp | ||
37 | END(pio_phys_read_mmr) | ||
38 | |||
39 | GLOBAL_ENTRY(pio_phys_write_mmr) | ||
40 | .prologue | ||
41 | .regstk 2,0,0,0 | ||
42 | .body | ||
43 | mov r2=psr | ||
44 | rsm psr.i | psr.dt | ||
45 | ;; | ||
46 | srlz.d | ||
47 | st8.rel [r32]=r33 | ||
48 | ;; | ||
49 | mov psr.l=r2;; | ||
50 | srlz.d | ||
51 | br.ret.sptk.many rp | ||
52 | END(pio_phys_write_mmr) | ||
53 | |||
54 | GLOBAL_ENTRY(pio_atomic_phys_write_mmrs) | ||
55 | .prologue | ||
56 | .regstk 4,0,0,0 | ||
57 | .body | ||
58 | mov r2=psr | ||
59 | cmp.ne p9,p0=r34,r0; | ||
60 | rsm psr.i | psr.dt | psr.ic | ||
61 | ;; | ||
62 | srlz.d | ||
63 | st8.rel [r32]=r33 | ||
64 | (p9) st8.rel [r34]=r35 | ||
65 | ;; | ||
66 | mov psr.l=r2;; | ||
67 | srlz.d | ||
68 | br.ret.sptk.many rp | ||
69 | END(pio_atomic_phys_write_mmrs) | ||
70 | |||
71 | |||
diff --git a/arch/ia64/sn/kernel/setup.c b/arch/ia64/sn/kernel/setup.c index 5b84836c2171..8b6d5c844708 100644 --- a/arch/ia64/sn/kernel/setup.c +++ b/arch/ia64/sn/kernel/setup.c | |||
@@ -3,7 +3,7 @@ | |||
3 | * License. See the file "COPYING" in the main directory of this archive | 3 | * License. See the file "COPYING" in the main directory of this archive |
4 | * for more details. | 4 | * for more details. |
5 | * | 5 | * |
6 | * Copyright (C) 1999,2001-2005 Silicon Graphics, Inc. All rights reserved. | 6 | * Copyright (C) 1999,2001-2006 Silicon Graphics, Inc. All rights reserved. |
7 | */ | 7 | */ |
8 | 8 | ||
9 | #include <linux/config.h> | 9 | #include <linux/config.h> |
@@ -498,6 +498,7 @@ void __init sn_setup(char **cmdline_p) | |||
498 | * for sn. | 498 | * for sn. |
499 | */ | 499 | */ |
500 | pm_power_off = ia64_sn_power_down; | 500 | pm_power_off = ia64_sn_power_down; |
501 | current->thread.flags |= IA64_THREAD_MIGRATION; | ||
501 | } | 502 | } |
502 | 503 | ||
503 | /** | 504 | /** |
@@ -660,7 +661,8 @@ void __init sn_cpu_init(void) | |||
660 | SH2_PIO_WRITE_STATUS_1, SH2_PIO_WRITE_STATUS_3}; | 661 | SH2_PIO_WRITE_STATUS_1, SH2_PIO_WRITE_STATUS_3}; |
661 | u64 *pio; | 662 | u64 *pio; |
662 | pio = is_shub1() ? pio1 : pio2; | 663 | pio = is_shub1() ? pio1 : pio2; |
663 | pda->pio_write_status_addr = (volatile unsigned long *) LOCAL_MMR_ADDR(pio[slice]); | 664 | pda->pio_write_status_addr = |
665 | (volatile unsigned long *)GLOBAL_MMR_ADDR(nasid, pio[slice]); | ||
664 | pda->pio_write_status_val = is_shub1() ? SH_PIO_WRITE_STATUS_PENDING_WRITE_COUNT_MASK : 0; | 666 | pda->pio_write_status_val = is_shub1() ? SH_PIO_WRITE_STATUS_PENDING_WRITE_COUNT_MASK : 0; |
665 | } | 667 | } |
666 | 668 | ||
diff --git a/arch/ia64/sn/kernel/sn2/sn2_smp.c b/arch/ia64/sn/kernel/sn2/sn2_smp.c index b2e1e746b47f..d9d306c79f2d 100644 --- a/arch/ia64/sn/kernel/sn2/sn2_smp.c +++ b/arch/ia64/sn/kernel/sn2/sn2_smp.c | |||
@@ -93,6 +93,27 @@ static inline unsigned long wait_piowc(void) | |||
93 | return (ws & SH_PIO_WRITE_STATUS_WRITE_DEADLOCK_MASK) != 0; | 93 | return (ws & SH_PIO_WRITE_STATUS_WRITE_DEADLOCK_MASK) != 0; |
94 | } | 94 | } |
95 | 95 | ||
96 | /** | ||
97 | * sn_migrate - SN-specific task migration actions | ||
98 | * @task: Task being migrated to new CPU | ||
99 | * | ||
100 | * SN2 PIO writes from separate CPUs are not guaranteed to arrive in order. | ||
101 | * Context switching user threads which have memory-mapped MMIO may cause | ||
102 | * PIOs to issue from seperate CPUs, thus the PIO writes must be drained | ||
103 | * from the previous CPU's Shub before execution resumes on the new CPU. | ||
104 | */ | ||
105 | void sn_migrate(struct task_struct *task) | ||
106 | { | ||
107 | pda_t *last_pda = pdacpu(task_thread_info(task)->last_cpu); | ||
108 | volatile unsigned long *adr = last_pda->pio_write_status_addr; | ||
109 | unsigned long val = last_pda->pio_write_status_val; | ||
110 | |||
111 | /* Drain PIO writes from old CPU's Shub */ | ||
112 | while (unlikely((*adr & SH_PIO_WRITE_STATUS_PENDING_WRITE_COUNT_MASK) | ||
113 | != val)) | ||
114 | cpu_relax(); | ||
115 | } | ||
116 | |||
96 | void sn_tlb_migrate_finish(struct mm_struct *mm) | 117 | void sn_tlb_migrate_finish(struct mm_struct *mm) |
97 | { | 118 | { |
98 | /* flush_tlb_mm is inefficient if more than 1 users of mm */ | 119 | /* flush_tlb_mm is inefficient if more than 1 users of mm */ |
diff --git a/arch/ia64/sn/kernel/xpc_channel.c b/arch/ia64/sn/kernel/xpc_channel.c index cdf6856ce089..d0abddd9ffe6 100644 --- a/arch/ia64/sn/kernel/xpc_channel.c +++ b/arch/ia64/sn/kernel/xpc_channel.c | |||
@@ -21,7 +21,6 @@ | |||
21 | #include <linux/sched.h> | 21 | #include <linux/sched.h> |
22 | #include <linux/cache.h> | 22 | #include <linux/cache.h> |
23 | #include <linux/interrupt.h> | 23 | #include <linux/interrupt.h> |
24 | #include <linux/slab.h> | ||
25 | #include <linux/mutex.h> | 24 | #include <linux/mutex.h> |
26 | #include <linux/completion.h> | 25 | #include <linux/completion.h> |
27 | #include <asm/sn/bte.h> | 26 | #include <asm/sn/bte.h> |
@@ -30,6 +29,31 @@ | |||
30 | 29 | ||
31 | 30 | ||
32 | /* | 31 | /* |
32 | * Guarantee that the kzalloc'd memory is cacheline aligned. | ||
33 | */ | ||
34 | static void * | ||
35 | xpc_kzalloc_cacheline_aligned(size_t size, gfp_t flags, void **base) | ||
36 | { | ||
37 | /* see if kzalloc will give us cachline aligned memory by default */ | ||
38 | *base = kzalloc(size, flags); | ||
39 | if (*base == NULL) { | ||
40 | return NULL; | ||
41 | } | ||
42 | if ((u64) *base == L1_CACHE_ALIGN((u64) *base)) { | ||
43 | return *base; | ||
44 | } | ||
45 | kfree(*base); | ||
46 | |||
47 | /* nope, we'll have to do it ourselves */ | ||
48 | *base = kzalloc(size + L1_CACHE_BYTES, flags); | ||
49 | if (*base == NULL) { | ||
50 | return NULL; | ||
51 | } | ||
52 | return (void *) L1_CACHE_ALIGN((u64) *base); | ||
53 | } | ||
54 | |||
55 | |||
56 | /* | ||
33 | * Set up the initial values for the XPartition Communication channels. | 57 | * Set up the initial values for the XPartition Communication channels. |
34 | */ | 58 | */ |
35 | static void | 59 | static void |
@@ -93,20 +117,19 @@ xpc_setup_infrastructure(struct xpc_partition *part) | |||
93 | * Allocate all of the channel structures as a contiguous chunk of | 117 | * Allocate all of the channel structures as a contiguous chunk of |
94 | * memory. | 118 | * memory. |
95 | */ | 119 | */ |
96 | part->channels = kmalloc(sizeof(struct xpc_channel) * XPC_NCHANNELS, | 120 | part->channels = kzalloc(sizeof(struct xpc_channel) * XPC_NCHANNELS, |
97 | GFP_KERNEL); | 121 | GFP_KERNEL); |
98 | if (part->channels == NULL) { | 122 | if (part->channels == NULL) { |
99 | dev_err(xpc_chan, "can't get memory for channels\n"); | 123 | dev_err(xpc_chan, "can't get memory for channels\n"); |
100 | return xpcNoMemory; | 124 | return xpcNoMemory; |
101 | } | 125 | } |
102 | memset(part->channels, 0, sizeof(struct xpc_channel) * XPC_NCHANNELS); | ||
103 | 126 | ||
104 | part->nchannels = XPC_NCHANNELS; | 127 | part->nchannels = XPC_NCHANNELS; |
105 | 128 | ||
106 | 129 | ||
107 | /* allocate all the required GET/PUT values */ | 130 | /* allocate all the required GET/PUT values */ |
108 | 131 | ||
109 | part->local_GPs = xpc_kmalloc_cacheline_aligned(XPC_GP_SIZE, | 132 | part->local_GPs = xpc_kzalloc_cacheline_aligned(XPC_GP_SIZE, |
110 | GFP_KERNEL, &part->local_GPs_base); | 133 | GFP_KERNEL, &part->local_GPs_base); |
111 | if (part->local_GPs == NULL) { | 134 | if (part->local_GPs == NULL) { |
112 | kfree(part->channels); | 135 | kfree(part->channels); |
@@ -115,55 +138,51 @@ xpc_setup_infrastructure(struct xpc_partition *part) | |||
115 | "values\n"); | 138 | "values\n"); |
116 | return xpcNoMemory; | 139 | return xpcNoMemory; |
117 | } | 140 | } |
118 | memset(part->local_GPs, 0, XPC_GP_SIZE); | ||
119 | 141 | ||
120 | part->remote_GPs = xpc_kmalloc_cacheline_aligned(XPC_GP_SIZE, | 142 | part->remote_GPs = xpc_kzalloc_cacheline_aligned(XPC_GP_SIZE, |
121 | GFP_KERNEL, &part->remote_GPs_base); | 143 | GFP_KERNEL, &part->remote_GPs_base); |
122 | if (part->remote_GPs == NULL) { | 144 | if (part->remote_GPs == NULL) { |
123 | kfree(part->channels); | ||
124 | part->channels = NULL; | ||
125 | kfree(part->local_GPs_base); | ||
126 | part->local_GPs = NULL; | ||
127 | dev_err(xpc_chan, "can't get memory for remote get/put " | 145 | dev_err(xpc_chan, "can't get memory for remote get/put " |
128 | "values\n"); | 146 | "values\n"); |
147 | kfree(part->local_GPs_base); | ||
148 | part->local_GPs = NULL; | ||
149 | kfree(part->channels); | ||
150 | part->channels = NULL; | ||
129 | return xpcNoMemory; | 151 | return xpcNoMemory; |
130 | } | 152 | } |
131 | memset(part->remote_GPs, 0, XPC_GP_SIZE); | ||
132 | 153 | ||
133 | 154 | ||
134 | /* allocate all the required open and close args */ | 155 | /* allocate all the required open and close args */ |
135 | 156 | ||
136 | part->local_openclose_args = xpc_kmalloc_cacheline_aligned( | 157 | part->local_openclose_args = xpc_kzalloc_cacheline_aligned( |
137 | XPC_OPENCLOSE_ARGS_SIZE, GFP_KERNEL, | 158 | XPC_OPENCLOSE_ARGS_SIZE, GFP_KERNEL, |
138 | &part->local_openclose_args_base); | 159 | &part->local_openclose_args_base); |
139 | if (part->local_openclose_args == NULL) { | 160 | if (part->local_openclose_args == NULL) { |
140 | kfree(part->channels); | 161 | dev_err(xpc_chan, "can't get memory for local connect args\n"); |
141 | part->channels = NULL; | ||
142 | kfree(part->local_GPs_base); | ||
143 | part->local_GPs = NULL; | ||
144 | kfree(part->remote_GPs_base); | 162 | kfree(part->remote_GPs_base); |
145 | part->remote_GPs = NULL; | 163 | part->remote_GPs = NULL; |
146 | dev_err(xpc_chan, "can't get memory for local connect args\n"); | 164 | kfree(part->local_GPs_base); |
165 | part->local_GPs = NULL; | ||
166 | kfree(part->channels); | ||
167 | part->channels = NULL; | ||
147 | return xpcNoMemory; | 168 | return xpcNoMemory; |
148 | } | 169 | } |
149 | memset(part->local_openclose_args, 0, XPC_OPENCLOSE_ARGS_SIZE); | ||
150 | 170 | ||
151 | part->remote_openclose_args = xpc_kmalloc_cacheline_aligned( | 171 | part->remote_openclose_args = xpc_kzalloc_cacheline_aligned( |
152 | XPC_OPENCLOSE_ARGS_SIZE, GFP_KERNEL, | 172 | XPC_OPENCLOSE_ARGS_SIZE, GFP_KERNEL, |
153 | &part->remote_openclose_args_base); | 173 | &part->remote_openclose_args_base); |
154 | if (part->remote_openclose_args == NULL) { | 174 | if (part->remote_openclose_args == NULL) { |
155 | kfree(part->channels); | 175 | dev_err(xpc_chan, "can't get memory for remote connect args\n"); |
156 | part->channels = NULL; | ||
157 | kfree(part->local_GPs_base); | ||
158 | part->local_GPs = NULL; | ||
159 | kfree(part->remote_GPs_base); | ||
160 | part->remote_GPs = NULL; | ||
161 | kfree(part->local_openclose_args_base); | 176 | kfree(part->local_openclose_args_base); |
162 | part->local_openclose_args = NULL; | 177 | part->local_openclose_args = NULL; |
163 | dev_err(xpc_chan, "can't get memory for remote connect args\n"); | 178 | kfree(part->remote_GPs_base); |
179 | part->remote_GPs = NULL; | ||
180 | kfree(part->local_GPs_base); | ||
181 | part->local_GPs = NULL; | ||
182 | kfree(part->channels); | ||
183 | part->channels = NULL; | ||
164 | return xpcNoMemory; | 184 | return xpcNoMemory; |
165 | } | 185 | } |
166 | memset(part->remote_openclose_args, 0, XPC_OPENCLOSE_ARGS_SIZE); | ||
167 | 186 | ||
168 | 187 | ||
169 | xpc_initialize_channels(part, partid); | 188 | xpc_initialize_channels(part, partid); |
@@ -186,18 +205,18 @@ xpc_setup_infrastructure(struct xpc_partition *part) | |||
186 | ret = request_irq(SGI_XPC_NOTIFY, xpc_notify_IRQ_handler, SA_SHIRQ, | 205 | ret = request_irq(SGI_XPC_NOTIFY, xpc_notify_IRQ_handler, SA_SHIRQ, |
187 | part->IPI_owner, (void *) (u64) partid); | 206 | part->IPI_owner, (void *) (u64) partid); |
188 | if (ret != 0) { | 207 | if (ret != 0) { |
189 | kfree(part->channels); | ||
190 | part->channels = NULL; | ||
191 | kfree(part->local_GPs_base); | ||
192 | part->local_GPs = NULL; | ||
193 | kfree(part->remote_GPs_base); | ||
194 | part->remote_GPs = NULL; | ||
195 | kfree(part->local_openclose_args_base); | ||
196 | part->local_openclose_args = NULL; | ||
197 | kfree(part->remote_openclose_args_base); | ||
198 | part->remote_openclose_args = NULL; | ||
199 | dev_err(xpc_chan, "can't register NOTIFY IRQ handler, " | 208 | dev_err(xpc_chan, "can't register NOTIFY IRQ handler, " |
200 | "errno=%d\n", -ret); | 209 | "errno=%d\n", -ret); |
210 | kfree(part->remote_openclose_args_base); | ||
211 | part->remote_openclose_args = NULL; | ||
212 | kfree(part->local_openclose_args_base); | ||
213 | part->local_openclose_args = NULL; | ||
214 | kfree(part->remote_GPs_base); | ||
215 | part->remote_GPs = NULL; | ||
216 | kfree(part->local_GPs_base); | ||
217 | part->local_GPs = NULL; | ||
218 | kfree(part->channels); | ||
219 | part->channels = NULL; | ||
201 | return xpcLackOfResources; | 220 | return xpcLackOfResources; |
202 | } | 221 | } |
203 | 222 | ||
@@ -446,22 +465,20 @@ xpc_allocate_local_msgqueue(struct xpc_channel *ch) | |||
446 | for (nentries = ch->local_nentries; nentries > 0; nentries--) { | 465 | for (nentries = ch->local_nentries; nentries > 0; nentries--) { |
447 | 466 | ||
448 | nbytes = nentries * ch->msg_size; | 467 | nbytes = nentries * ch->msg_size; |
449 | ch->local_msgqueue = xpc_kmalloc_cacheline_aligned(nbytes, | 468 | ch->local_msgqueue = xpc_kzalloc_cacheline_aligned(nbytes, |
450 | GFP_KERNEL, | 469 | GFP_KERNEL, |
451 | &ch->local_msgqueue_base); | 470 | &ch->local_msgqueue_base); |
452 | if (ch->local_msgqueue == NULL) { | 471 | if (ch->local_msgqueue == NULL) { |
453 | continue; | 472 | continue; |
454 | } | 473 | } |
455 | memset(ch->local_msgqueue, 0, nbytes); | ||
456 | 474 | ||
457 | nbytes = nentries * sizeof(struct xpc_notify); | 475 | nbytes = nentries * sizeof(struct xpc_notify); |
458 | ch->notify_queue = kmalloc(nbytes, GFP_KERNEL); | 476 | ch->notify_queue = kzalloc(nbytes, GFP_KERNEL); |
459 | if (ch->notify_queue == NULL) { | 477 | if (ch->notify_queue == NULL) { |
460 | kfree(ch->local_msgqueue_base); | 478 | kfree(ch->local_msgqueue_base); |
461 | ch->local_msgqueue = NULL; | 479 | ch->local_msgqueue = NULL; |
462 | continue; | 480 | continue; |
463 | } | 481 | } |
464 | memset(ch->notify_queue, 0, nbytes); | ||
465 | 482 | ||
466 | spin_lock_irqsave(&ch->lock, irq_flags); | 483 | spin_lock_irqsave(&ch->lock, irq_flags); |
467 | if (nentries < ch->local_nentries) { | 484 | if (nentries < ch->local_nentries) { |
@@ -501,13 +518,12 @@ xpc_allocate_remote_msgqueue(struct xpc_channel *ch) | |||
501 | for (nentries = ch->remote_nentries; nentries > 0; nentries--) { | 518 | for (nentries = ch->remote_nentries; nentries > 0; nentries--) { |
502 | 519 | ||
503 | nbytes = nentries * ch->msg_size; | 520 | nbytes = nentries * ch->msg_size; |
504 | ch->remote_msgqueue = xpc_kmalloc_cacheline_aligned(nbytes, | 521 | ch->remote_msgqueue = xpc_kzalloc_cacheline_aligned(nbytes, |
505 | GFP_KERNEL, | 522 | GFP_KERNEL, |
506 | &ch->remote_msgqueue_base); | 523 | &ch->remote_msgqueue_base); |
507 | if (ch->remote_msgqueue == NULL) { | 524 | if (ch->remote_msgqueue == NULL) { |
508 | continue; | 525 | continue; |
509 | } | 526 | } |
510 | memset(ch->remote_msgqueue, 0, nbytes); | ||
511 | 527 | ||
512 | spin_lock_irqsave(&ch->lock, irq_flags); | 528 | spin_lock_irqsave(&ch->lock, irq_flags); |
513 | if (nentries < ch->remote_nentries) { | 529 | if (nentries < ch->remote_nentries) { |
diff --git a/arch/ia64/sn/kernel/xpc_main.c b/arch/ia64/sn/kernel/xpc_main.c index 8cbf16432570..99b123a6421a 100644 --- a/arch/ia64/sn/kernel/xpc_main.c +++ b/arch/ia64/sn/kernel/xpc_main.c | |||
@@ -52,7 +52,6 @@ | |||
52 | #include <linux/syscalls.h> | 52 | #include <linux/syscalls.h> |
53 | #include <linux/cache.h> | 53 | #include <linux/cache.h> |
54 | #include <linux/interrupt.h> | 54 | #include <linux/interrupt.h> |
55 | #include <linux/slab.h> | ||
56 | #include <linux/delay.h> | 55 | #include <linux/delay.h> |
57 | #include <linux/reboot.h> | 56 | #include <linux/reboot.h> |
58 | #include <linux/completion.h> | 57 | #include <linux/completion.h> |
diff --git a/arch/ia64/sn/kernel/xpc_partition.c b/arch/ia64/sn/kernel/xpc_partition.c index 88a730e6cfdb..94211429fd0c 100644 --- a/arch/ia64/sn/kernel/xpc_partition.c +++ b/arch/ia64/sn/kernel/xpc_partition.c | |||
@@ -81,6 +81,31 @@ char ____cacheline_aligned xpc_remote_copy_buffer[XPC_RP_HEADER_SIZE + | |||
81 | 81 | ||
82 | 82 | ||
83 | /* | 83 | /* |
84 | * Guarantee that the kmalloc'd memory is cacheline aligned. | ||
85 | */ | ||
86 | static void * | ||
87 | xpc_kmalloc_cacheline_aligned(size_t size, gfp_t flags, void **base) | ||
88 | { | ||
89 | /* see if kmalloc will give us cachline aligned memory by default */ | ||
90 | *base = kmalloc(size, flags); | ||
91 | if (*base == NULL) { | ||
92 | return NULL; | ||
93 | } | ||
94 | if ((u64) *base == L1_CACHE_ALIGN((u64) *base)) { | ||
95 | return *base; | ||
96 | } | ||
97 | kfree(*base); | ||
98 | |||
99 | /* nope, we'll have to do it ourselves */ | ||
100 | *base = kmalloc(size + L1_CACHE_BYTES, flags); | ||
101 | if (*base == NULL) { | ||
102 | return NULL; | ||
103 | } | ||
104 | return (void *) L1_CACHE_ALIGN((u64) *base); | ||
105 | } | ||
106 | |||
107 | |||
108 | /* | ||
84 | * Given a nasid, get the physical address of the partition's reserved page | 109 | * Given a nasid, get the physical address of the partition's reserved page |
85 | * for that nasid. This function returns 0 on any error. | 110 | * for that nasid. This function returns 0 on any error. |
86 | */ | 111 | */ |
@@ -1038,13 +1063,12 @@ xpc_discovery(void) | |||
1038 | remote_vars = (struct xpc_vars *) remote_rp; | 1063 | remote_vars = (struct xpc_vars *) remote_rp; |
1039 | 1064 | ||
1040 | 1065 | ||
1041 | discovered_nasids = kmalloc(sizeof(u64) * xp_nasid_mask_words, | 1066 | discovered_nasids = kzalloc(sizeof(u64) * xp_nasid_mask_words, |
1042 | GFP_KERNEL); | 1067 | GFP_KERNEL); |
1043 | if (discovered_nasids == NULL) { | 1068 | if (discovered_nasids == NULL) { |
1044 | kfree(remote_rp_base); | 1069 | kfree(remote_rp_base); |
1045 | return; | 1070 | return; |
1046 | } | 1071 | } |
1047 | memset(discovered_nasids, 0, sizeof(u64) * xp_nasid_mask_words); | ||
1048 | 1072 | ||
1049 | rp = (struct xpc_rsvd_page *) xpc_rsvd_page; | 1073 | rp = (struct xpc_rsvd_page *) xpc_rsvd_page; |
1050 | 1074 | ||
diff --git a/arch/ia64/sn/pci/tioce_provider.c b/arch/ia64/sn/pci/tioce_provider.c index e52831ed93eb..fa073cc4b565 100644 --- a/arch/ia64/sn/pci/tioce_provider.c +++ b/arch/ia64/sn/pci/tioce_provider.c | |||
@@ -15,6 +15,124 @@ | |||
15 | #include <asm/sn/pcidev.h> | 15 | #include <asm/sn/pcidev.h> |
16 | #include <asm/sn/pcibus_provider_defs.h> | 16 | #include <asm/sn/pcibus_provider_defs.h> |
17 | #include <asm/sn/tioce_provider.h> | 17 | #include <asm/sn/tioce_provider.h> |
18 | #include <asm/sn/sn2/sn_hwperf.h> | ||
19 | |||
20 | /* | ||
21 | * 1/26/2006 | ||
22 | * | ||
23 | * WAR for SGI PV 944642. For revA TIOCE, need to use the following recipe | ||
24 | * (taken from the above PV) before and after accessing tioce internal MMR's | ||
25 | * to avoid tioce lockups. | ||
26 | * | ||
27 | * The recipe as taken from the PV: | ||
28 | * | ||
29 | * if(mmr address < 0x45000) { | ||
30 | * if(mmr address == 0 or 0x80) | ||
31 | * mmr wrt or read address 0xc0 | ||
32 | * else if(mmr address == 0x148 or 0x200) | ||
33 | * mmr wrt or read address 0x28 | ||
34 | * else | ||
35 | * mmr wrt or read address 0x158 | ||
36 | * | ||
37 | * do desired mmr access (rd or wrt) | ||
38 | * | ||
39 | * if(mmr address == 0x100) | ||
40 | * mmr wrt or read address 0x38 | ||
41 | * mmr wrt or read address 0xb050 | ||
42 | * } else | ||
43 | * do desired mmr access | ||
44 | * | ||
45 | * According to hw, we can use reads instead of writes to the above addres | ||
46 | * | ||
47 | * Note this WAR can only to be used for accessing internal MMR's in the | ||
48 | * TIOCE Coretalk Address Range 0x0 - 0x07ff_ffff. This includes the | ||
49 | * "Local CE Registers and Memories" and "PCI Compatible Config Space" address | ||
50 | * spaces from table 2-1 of the "CE Programmer's Reference Overview" document. | ||
51 | * | ||
52 | * All registers defined in struct tioce will meet that criteria. | ||
53 | */ | ||
54 | |||
55 | static void inline | ||
56 | tioce_mmr_war_pre(struct tioce_kernel *kern, void *mmr_addr) | ||
57 | { | ||
58 | u64 mmr_base; | ||
59 | u64 mmr_offset; | ||
60 | |||
61 | if (kern->ce_common->ce_rev != TIOCE_REV_A) | ||
62 | return; | ||
63 | |||
64 | mmr_base = kern->ce_common->ce_pcibus.bs_base; | ||
65 | mmr_offset = (u64)mmr_addr - mmr_base; | ||
66 | |||
67 | if (mmr_offset < 0x45000) { | ||
68 | u64 mmr_war_offset; | ||
69 | |||
70 | if (mmr_offset == 0 || mmr_offset == 0x80) | ||
71 | mmr_war_offset = 0xc0; | ||
72 | else if (mmr_offset == 0x148 || mmr_offset == 0x200) | ||
73 | mmr_war_offset = 0x28; | ||
74 | else | ||
75 | mmr_war_offset = 0x158; | ||
76 | |||
77 | readq_relaxed((void *)(mmr_base + mmr_war_offset)); | ||
78 | } | ||
79 | } | ||
80 | |||
81 | static void inline | ||
82 | tioce_mmr_war_post(struct tioce_kernel *kern, void *mmr_addr) | ||
83 | { | ||
84 | u64 mmr_base; | ||
85 | u64 mmr_offset; | ||
86 | |||
87 | if (kern->ce_common->ce_rev != TIOCE_REV_A) | ||
88 | return; | ||
89 | |||
90 | mmr_base = kern->ce_common->ce_pcibus.bs_base; | ||
91 | mmr_offset = (u64)mmr_addr - mmr_base; | ||
92 | |||
93 | if (mmr_offset < 0x45000) { | ||
94 | if (mmr_offset == 0x100) | ||
95 | readq_relaxed((void *)(mmr_base + 0x38)); | ||
96 | readq_relaxed((void *)(mmr_base + 0xb050)); | ||
97 | } | ||
98 | } | ||
99 | |||
100 | /* load mmr contents into a variable */ | ||
101 | #define tioce_mmr_load(kern, mmrp, varp) do {\ | ||
102 | tioce_mmr_war_pre(kern, mmrp); \ | ||
103 | *(varp) = readq_relaxed(mmrp); \ | ||
104 | tioce_mmr_war_post(kern, mmrp); \ | ||
105 | } while (0) | ||
106 | |||
107 | /* store variable contents into mmr */ | ||
108 | #define tioce_mmr_store(kern, mmrp, varp) do {\ | ||
109 | tioce_mmr_war_pre(kern, mmrp); \ | ||
110 | writeq(*varp, mmrp); \ | ||
111 | tioce_mmr_war_post(kern, mmrp); \ | ||
112 | } while (0) | ||
113 | |||
114 | /* store immediate value into mmr */ | ||
115 | #define tioce_mmr_storei(kern, mmrp, val) do {\ | ||
116 | tioce_mmr_war_pre(kern, mmrp); \ | ||
117 | writeq(val, mmrp); \ | ||
118 | tioce_mmr_war_post(kern, mmrp); \ | ||
119 | } while (0) | ||
120 | |||
121 | /* set bits (immediate value) into mmr */ | ||
122 | #define tioce_mmr_seti(kern, mmrp, bits) do {\ | ||
123 | u64 tmp; \ | ||
124 | tioce_mmr_load(kern, mmrp, &tmp); \ | ||
125 | tmp |= (bits); \ | ||
126 | tioce_mmr_store(kern, mmrp, &tmp); \ | ||
127 | } while (0) | ||
128 | |||
129 | /* clear bits (immediate value) into mmr */ | ||
130 | #define tioce_mmr_clri(kern, mmrp, bits) do { \ | ||
131 | u64 tmp; \ | ||
132 | tioce_mmr_load(kern, mmrp, &tmp); \ | ||
133 | tmp &= ~(bits); \ | ||
134 | tioce_mmr_store(kern, mmrp, &tmp); \ | ||
135 | } while (0) | ||
18 | 136 | ||
19 | /** | 137 | /** |
20 | * Bus address ranges for the 5 flavors of TIOCE DMA | 138 | * Bus address ranges for the 5 flavors of TIOCE DMA |
@@ -62,9 +180,9 @@ | |||
62 | #define TIOCE_ATE_M40 2 | 180 | #define TIOCE_ATE_M40 2 |
63 | #define TIOCE_ATE_M40S 3 | 181 | #define TIOCE_ATE_M40S 3 |
64 | 182 | ||
65 | #define KB(x) ((x) << 10) | 183 | #define KB(x) ((u64)(x) << 10) |
66 | #define MB(x) ((x) << 20) | 184 | #define MB(x) ((u64)(x) << 20) |
67 | #define GB(x) ((x) << 30) | 185 | #define GB(x) ((u64)(x) << 30) |
68 | 186 | ||
69 | /** | 187 | /** |
70 | * tioce_dma_d64 - create a DMA mapping using 64-bit direct mode | 188 | * tioce_dma_d64 - create a DMA mapping using 64-bit direct mode |
@@ -151,7 +269,7 @@ tioce_alloc_map(struct tioce_kernel *ce_kern, int type, int port, | |||
151 | int last; | 269 | int last; |
152 | int entries; | 270 | int entries; |
153 | int nates; | 271 | int nates; |
154 | int pagesize; | 272 | u64 pagesize; |
155 | u64 *ate_shadow; | 273 | u64 *ate_shadow; |
156 | u64 *ate_reg; | 274 | u64 *ate_reg; |
157 | u64 addr; | 275 | u64 addr; |
@@ -228,7 +346,7 @@ tioce_alloc_map(struct tioce_kernel *ce_kern, int type, int port, | |||
228 | 346 | ||
229 | ate = ATE_MAKE(addr, pagesize); | 347 | ate = ATE_MAKE(addr, pagesize); |
230 | ate_shadow[i + j] = ate; | 348 | ate_shadow[i + j] = ate; |
231 | writeq(ate, &ate_reg[i + j]); | 349 | tioce_mmr_storei(ce_kern, &ate_reg[i + j], ate); |
232 | addr += pagesize; | 350 | addr += pagesize; |
233 | } | 351 | } |
234 | 352 | ||
@@ -272,7 +390,8 @@ tioce_dma_d32(struct pci_dev *pdev, u64 ct_addr) | |||
272 | u64 tmp; | 390 | u64 tmp; |
273 | 391 | ||
274 | ce_kern->ce_port[port].dirmap_shadow = ct_upper; | 392 | ce_kern->ce_port[port].dirmap_shadow = ct_upper; |
275 | writeq(ct_upper, &ce_mmr->ce_ure_dir_map[port]); | 393 | tioce_mmr_storei(ce_kern, &ce_mmr->ce_ure_dir_map[port], |
394 | ct_upper); | ||
276 | tmp = ce_mmr->ce_ure_dir_map[port]; | 395 | tmp = ce_mmr->ce_ure_dir_map[port]; |
277 | dma_ok = 1; | 396 | dma_ok = 1; |
278 | } else | 397 | } else |
@@ -344,7 +463,8 @@ tioce_dma_unmap(struct pci_dev *pdev, dma_addr_t bus_addr, int dir) | |||
344 | if (TIOCE_D32_ADDR(bus_addr)) { | 463 | if (TIOCE_D32_ADDR(bus_addr)) { |
345 | if (--ce_kern->ce_port[port].dirmap_refcnt == 0) { | 464 | if (--ce_kern->ce_port[port].dirmap_refcnt == 0) { |
346 | ce_kern->ce_port[port].dirmap_shadow = 0; | 465 | ce_kern->ce_port[port].dirmap_shadow = 0; |
347 | writeq(0, &ce_mmr->ce_ure_dir_map[port]); | 466 | tioce_mmr_storei(ce_kern, &ce_mmr->ce_ure_dir_map[port], |
467 | 0); | ||
348 | } | 468 | } |
349 | } else { | 469 | } else { |
350 | struct tioce_dmamap *map; | 470 | struct tioce_dmamap *map; |
@@ -365,7 +485,7 @@ tioce_dma_unmap(struct pci_dev *pdev, dma_addr_t bus_addr, int dir) | |||
365 | } else if (--map->refcnt == 0) { | 485 | } else if (--map->refcnt == 0) { |
366 | for (i = 0; i < map->ate_count; i++) { | 486 | for (i = 0; i < map->ate_count; i++) { |
367 | map->ate_shadow[i] = 0; | 487 | map->ate_shadow[i] = 0; |
368 | map->ate_hw[i] = 0; | 488 | tioce_mmr_storei(ce_kern, &map->ate_hw[i], 0); |
369 | } | 489 | } |
370 | 490 | ||
371 | list_del(&map->ce_dmamap_list); | 491 | list_del(&map->ce_dmamap_list); |
@@ -486,7 +606,7 @@ tioce_do_dma_map(struct pci_dev *pdev, u64 paddr, size_t byte_count, | |||
486 | spin_unlock_irqrestore(&ce_kern->ce_lock, flags); | 606 | spin_unlock_irqrestore(&ce_kern->ce_lock, flags); |
487 | 607 | ||
488 | dma_map_done: | 608 | dma_map_done: |
489 | if (mapaddr & barrier) | 609 | if (mapaddr && barrier) |
490 | mapaddr = tioce_dma_barrier(mapaddr, 1); | 610 | mapaddr = tioce_dma_barrier(mapaddr, 1); |
491 | 611 | ||
492 | return mapaddr; | 612 | return mapaddr; |
@@ -541,17 +661,61 @@ tioce_error_intr_handler(int irq, void *arg, struct pt_regs *pt) | |||
541 | soft->ce_pcibus.bs_persist_segment, | 661 | soft->ce_pcibus.bs_persist_segment, |
542 | soft->ce_pcibus.bs_persist_busnum, 0, 0, 0, 0, 0); | 662 | soft->ce_pcibus.bs_persist_busnum, 0, 0, 0, 0, 0); |
543 | 663 | ||
664 | if (ret_stuff.v0) | ||
665 | panic("tioce_error_intr_handler: Fatal TIOCE error"); | ||
666 | |||
544 | return IRQ_HANDLED; | 667 | return IRQ_HANDLED; |
545 | } | 668 | } |
546 | 669 | ||
547 | /** | 670 | /** |
671 | * tioce_reserve_m32 - reserve M32 ate's for the indicated address range | ||
672 | * @tioce_kernel: TIOCE context to reserve ate's for | ||
673 | * @base: starting bus address to reserve | ||
674 | * @limit: last bus address to reserve | ||
675 | * | ||
676 | * If base/limit falls within the range of bus space mapped through the | ||
677 | * M32 space, reserve the resources corresponding to the range. | ||
678 | */ | ||
679 | static void | ||
680 | tioce_reserve_m32(struct tioce_kernel *ce_kern, u64 base, u64 limit) | ||
681 | { | ||
682 | int ate_index, last_ate, ps; | ||
683 | struct tioce *ce_mmr; | ||
684 | |||
685 | if (!TIOCE_M32_ADDR(base)) | ||
686 | return; | ||
687 | |||
688 | ce_mmr = (struct tioce *)ce_kern->ce_common->ce_pcibus.bs_base; | ||
689 | ps = ce_kern->ce_ate3240_pagesize; | ||
690 | ate_index = ATE_PAGE(base, ps); | ||
691 | last_ate = ate_index + ATE_NPAGES(base, limit-base+1, ps) - 1; | ||
692 | |||
693 | if (ate_index < 64) | ||
694 | ate_index = 64; | ||
695 | |||
696 | while (ate_index <= last_ate) { | ||
697 | u64 ate; | ||
698 | |||
699 | ate = ATE_MAKE(0xdeadbeef, ps); | ||
700 | ce_kern->ce_ate3240_shadow[ate_index] = ate; | ||
701 | tioce_mmr_storei(ce_kern, &ce_mmr->ce_ure_ate3240[ate_index], | ||
702 | ate); | ||
703 | ate_index++; | ||
704 | } | ||
705 | } | ||
706 | |||
707 | /** | ||
548 | * tioce_kern_init - init kernel structures related to a given TIOCE | 708 | * tioce_kern_init - init kernel structures related to a given TIOCE |
549 | * @tioce_common: ptr to a cached tioce_common struct that originated in prom | 709 | * @tioce_common: ptr to a cached tioce_common struct that originated in prom |
550 | */ static struct tioce_kernel * | 710 | */ |
711 | static struct tioce_kernel * | ||
551 | tioce_kern_init(struct tioce_common *tioce_common) | 712 | tioce_kern_init(struct tioce_common *tioce_common) |
552 | { | 713 | { |
553 | int i; | 714 | int i; |
715 | int ps; | ||
716 | int dev; | ||
554 | u32 tmp; | 717 | u32 tmp; |
718 | unsigned int seg, bus; | ||
555 | struct tioce *tioce_mmr; | 719 | struct tioce *tioce_mmr; |
556 | struct tioce_kernel *tioce_kern; | 720 | struct tioce_kernel *tioce_kern; |
557 | 721 | ||
@@ -572,9 +736,10 @@ tioce_kern_init(struct tioce_common *tioce_common) | |||
572 | * here to use pci_read_config_xxx() so use the raw_pci_ops vector. | 736 | * here to use pci_read_config_xxx() so use the raw_pci_ops vector. |
573 | */ | 737 | */ |
574 | 738 | ||
575 | raw_pci_ops->read(tioce_common->ce_pcibus.bs_persist_segment, | 739 | seg = tioce_common->ce_pcibus.bs_persist_segment; |
576 | tioce_common->ce_pcibus.bs_persist_busnum, | 740 | bus = tioce_common->ce_pcibus.bs_persist_busnum; |
577 | PCI_DEVFN(2, 0), PCI_SECONDARY_BUS, 1, &tmp); | 741 | |
742 | raw_pci_ops->read(seg, bus, PCI_DEVFN(2, 0), PCI_SECONDARY_BUS, 1,&tmp); | ||
578 | tioce_kern->ce_port1_secondary = (u8) tmp; | 743 | tioce_kern->ce_port1_secondary = (u8) tmp; |
579 | 744 | ||
580 | /* | 745 | /* |
@@ -583,18 +748,76 @@ tioce_kern_init(struct tioce_common *tioce_common) | |||
583 | */ | 748 | */ |
584 | 749 | ||
585 | tioce_mmr = (struct tioce *)tioce_common->ce_pcibus.bs_base; | 750 | tioce_mmr = (struct tioce *)tioce_common->ce_pcibus.bs_base; |
586 | __sn_clrq_relaxed(&tioce_mmr->ce_ure_page_map, CE_URE_PAGESIZE_MASK); | 751 | tioce_mmr_clri(tioce_kern, &tioce_mmr->ce_ure_page_map, |
587 | __sn_setq_relaxed(&tioce_mmr->ce_ure_page_map, CE_URE_256K_PAGESIZE); | 752 | CE_URE_PAGESIZE_MASK); |
588 | tioce_kern->ce_ate3240_pagesize = KB(256); | 753 | tioce_mmr_seti(tioce_kern, &tioce_mmr->ce_ure_page_map, |
754 | CE_URE_256K_PAGESIZE); | ||
755 | ps = tioce_kern->ce_ate3240_pagesize = KB(256); | ||
589 | 756 | ||
590 | for (i = 0; i < TIOCE_NUM_M40_ATES; i++) { | 757 | for (i = 0; i < TIOCE_NUM_M40_ATES; i++) { |
591 | tioce_kern->ce_ate40_shadow[i] = 0; | 758 | tioce_kern->ce_ate40_shadow[i] = 0; |
592 | writeq(0, &tioce_mmr->ce_ure_ate40[i]); | 759 | tioce_mmr_storei(tioce_kern, &tioce_mmr->ce_ure_ate40[i], 0); |
593 | } | 760 | } |
594 | 761 | ||
595 | for (i = 0; i < TIOCE_NUM_M3240_ATES; i++) { | 762 | for (i = 0; i < TIOCE_NUM_M3240_ATES; i++) { |
596 | tioce_kern->ce_ate3240_shadow[i] = 0; | 763 | tioce_kern->ce_ate3240_shadow[i] = 0; |
597 | writeq(0, &tioce_mmr->ce_ure_ate3240[i]); | 764 | tioce_mmr_storei(tioce_kern, &tioce_mmr->ce_ure_ate3240[i], 0); |
765 | } | ||
766 | |||
767 | /* | ||
768 | * Reserve ATE's corresponding to reserved address ranges. These | ||
769 | * include: | ||
770 | * | ||
771 | * Memory space covered by each PPB mem base/limit register | ||
772 | * Memory space covered by each PPB prefetch base/limit register | ||
773 | * | ||
774 | * These bus ranges are for pio (downstream) traffic only, and so | ||
775 | * cannot be used for DMA. | ||
776 | */ | ||
777 | |||
778 | for (dev = 1; dev <= 2; dev++) { | ||
779 | u64 base, limit; | ||
780 | |||
781 | /* mem base/limit */ | ||
782 | |||
783 | raw_pci_ops->read(seg, bus, PCI_DEVFN(dev, 0), | ||
784 | PCI_MEMORY_BASE, 2, &tmp); | ||
785 | base = (u64)tmp << 16; | ||
786 | |||
787 | raw_pci_ops->read(seg, bus, PCI_DEVFN(dev, 0), | ||
788 | PCI_MEMORY_LIMIT, 2, &tmp); | ||
789 | limit = (u64)tmp << 16; | ||
790 | limit |= 0xfffffUL; | ||
791 | |||
792 | if (base < limit) | ||
793 | tioce_reserve_m32(tioce_kern, base, limit); | ||
794 | |||
795 | /* | ||
796 | * prefetch mem base/limit. The tioce ppb's have 64-bit | ||
797 | * decoders, so read the upper portions w/o checking the | ||
798 | * attributes. | ||
799 | */ | ||
800 | |||
801 | raw_pci_ops->read(seg, bus, PCI_DEVFN(dev, 0), | ||
802 | PCI_PREF_MEMORY_BASE, 2, &tmp); | ||
803 | base = ((u64)tmp & PCI_PREF_RANGE_MASK) << 16; | ||
804 | |||
805 | raw_pci_ops->read(seg, bus, PCI_DEVFN(dev, 0), | ||
806 | PCI_PREF_BASE_UPPER32, 4, &tmp); | ||
807 | base |= (u64)tmp << 32; | ||
808 | |||
809 | raw_pci_ops->read(seg, bus, PCI_DEVFN(dev, 0), | ||
810 | PCI_PREF_MEMORY_LIMIT, 2, &tmp); | ||
811 | |||
812 | limit = ((u64)tmp & PCI_PREF_RANGE_MASK) << 16; | ||
813 | limit |= 0xfffffUL; | ||
814 | |||
815 | raw_pci_ops->read(seg, bus, PCI_DEVFN(dev, 0), | ||
816 | PCI_PREF_LIMIT_UPPER32, 4, &tmp); | ||
817 | limit |= (u64)tmp << 32; | ||
818 | |||
819 | if ((base < limit) && TIOCE_M32_ADDR(base)) | ||
820 | tioce_reserve_m32(tioce_kern, base, limit); | ||
598 | } | 821 | } |
599 | 822 | ||
600 | return tioce_kern; | 823 | return tioce_kern; |
@@ -614,6 +837,7 @@ tioce_force_interrupt(struct sn_irq_info *sn_irq_info) | |||
614 | { | 837 | { |
615 | struct pcidev_info *pcidev_info; | 838 | struct pcidev_info *pcidev_info; |
616 | struct tioce_common *ce_common; | 839 | struct tioce_common *ce_common; |
840 | struct tioce_kernel *ce_kern; | ||
617 | struct tioce *ce_mmr; | 841 | struct tioce *ce_mmr; |
618 | u64 force_int_val; | 842 | u64 force_int_val; |
619 | 843 | ||
@@ -629,6 +853,29 @@ tioce_force_interrupt(struct sn_irq_info *sn_irq_info) | |||
629 | 853 | ||
630 | ce_common = (struct tioce_common *)pcidev_info->pdi_pcibus_info; | 854 | ce_common = (struct tioce_common *)pcidev_info->pdi_pcibus_info; |
631 | ce_mmr = (struct tioce *)ce_common->ce_pcibus.bs_base; | 855 | ce_mmr = (struct tioce *)ce_common->ce_pcibus.bs_base; |
856 | ce_kern = (struct tioce_kernel *)ce_common->ce_kernel_private; | ||
857 | |||
858 | /* | ||
859 | * TIOCE Rev A workaround (PV 945826), force an interrupt by writing | ||
860 | * the TIO_INTx register directly (1/26/2006) | ||
861 | */ | ||
862 | if (ce_common->ce_rev == TIOCE_REV_A) { | ||
863 | u64 int_bit_mask = (1ULL << sn_irq_info->irq_int_bit); | ||
864 | u64 status; | ||
865 | |||
866 | tioce_mmr_load(ce_kern, &ce_mmr->ce_adm_int_status, &status); | ||
867 | if (status & int_bit_mask) { | ||
868 | u64 force_irq = (1 << 8) | sn_irq_info->irq_irq; | ||
869 | u64 ctalk = sn_irq_info->irq_xtalkaddr; | ||
870 | u64 nasid, offset; | ||
871 | |||
872 | nasid = (ctalk & CTALK_NASID_MASK) >> CTALK_NASID_SHFT; | ||
873 | offset = (ctalk & CTALK_NODE_OFFSET); | ||
874 | HUB_S(TIO_IOSPACE_ADDR(nasid, offset), force_irq); | ||
875 | } | ||
876 | |||
877 | return; | ||
878 | } | ||
632 | 879 | ||
633 | /* | 880 | /* |
634 | * irq_int_bit is originally set up by prom, and holds the interrupt | 881 | * irq_int_bit is originally set up by prom, and holds the interrupt |
@@ -666,7 +913,7 @@ tioce_force_interrupt(struct sn_irq_info *sn_irq_info) | |||
666 | default: | 913 | default: |
667 | return; | 914 | return; |
668 | } | 915 | } |
669 | writeq(force_int_val, &ce_mmr->ce_adm_force_int); | 916 | tioce_mmr_storei(ce_kern, &ce_mmr->ce_adm_force_int, force_int_val); |
670 | } | 917 | } |
671 | 918 | ||
672 | /** | 919 | /** |
@@ -685,6 +932,7 @@ tioce_target_interrupt(struct sn_irq_info *sn_irq_info) | |||
685 | { | 932 | { |
686 | struct pcidev_info *pcidev_info; | 933 | struct pcidev_info *pcidev_info; |
687 | struct tioce_common *ce_common; | 934 | struct tioce_common *ce_common; |
935 | struct tioce_kernel *ce_kern; | ||
688 | struct tioce *ce_mmr; | 936 | struct tioce *ce_mmr; |
689 | int bit; | 937 | int bit; |
690 | u64 vector; | 938 | u64 vector; |
@@ -695,14 +943,15 @@ tioce_target_interrupt(struct sn_irq_info *sn_irq_info) | |||
695 | 943 | ||
696 | ce_common = (struct tioce_common *)pcidev_info->pdi_pcibus_info; | 944 | ce_common = (struct tioce_common *)pcidev_info->pdi_pcibus_info; |
697 | ce_mmr = (struct tioce *)ce_common->ce_pcibus.bs_base; | 945 | ce_mmr = (struct tioce *)ce_common->ce_pcibus.bs_base; |
946 | ce_kern = (struct tioce_kernel *)ce_common->ce_kernel_private; | ||
698 | 947 | ||
699 | bit = sn_irq_info->irq_int_bit; | 948 | bit = sn_irq_info->irq_int_bit; |
700 | 949 | ||
701 | __sn_setq_relaxed(&ce_mmr->ce_adm_int_mask, (1UL << bit)); | 950 | tioce_mmr_seti(ce_kern, &ce_mmr->ce_adm_int_mask, (1UL << bit)); |
702 | vector = (u64)sn_irq_info->irq_irq << INTR_VECTOR_SHFT; | 951 | vector = (u64)sn_irq_info->irq_irq << INTR_VECTOR_SHFT; |
703 | vector |= sn_irq_info->irq_xtalkaddr; | 952 | vector |= sn_irq_info->irq_xtalkaddr; |
704 | writeq(vector, &ce_mmr->ce_adm_int_dest[bit]); | 953 | tioce_mmr_storei(ce_kern, &ce_mmr->ce_adm_int_dest[bit], vector); |
705 | __sn_clrq_relaxed(&ce_mmr->ce_adm_int_mask, (1UL << bit)); | 954 | tioce_mmr_clri(ce_kern, &ce_mmr->ce_adm_int_mask, (1UL << bit)); |
706 | 955 | ||
707 | tioce_force_interrupt(sn_irq_info); | 956 | tioce_force_interrupt(sn_irq_info); |
708 | } | 957 | } |
@@ -721,7 +970,11 @@ tioce_target_interrupt(struct sn_irq_info *sn_irq_info) | |||
721 | static void * | 970 | static void * |
722 | tioce_bus_fixup(struct pcibus_bussoft *prom_bussoft, struct pci_controller *controller) | 971 | tioce_bus_fixup(struct pcibus_bussoft *prom_bussoft, struct pci_controller *controller) |
723 | { | 972 | { |
973 | int my_nasid; | ||
974 | cnodeid_t my_cnode, mem_cnode; | ||
724 | struct tioce_common *tioce_common; | 975 | struct tioce_common *tioce_common; |
976 | struct tioce_kernel *tioce_kern; | ||
977 | struct tioce *tioce_mmr; | ||
725 | 978 | ||
726 | /* | 979 | /* |
727 | * Allocate kernel bus soft and copy from prom. | 980 | * Allocate kernel bus soft and copy from prom. |
@@ -734,11 +987,23 @@ tioce_bus_fixup(struct pcibus_bussoft *prom_bussoft, struct pci_controller *cont | |||
734 | memcpy(tioce_common, prom_bussoft, sizeof(struct tioce_common)); | 987 | memcpy(tioce_common, prom_bussoft, sizeof(struct tioce_common)); |
735 | tioce_common->ce_pcibus.bs_base |= __IA64_UNCACHED_OFFSET; | 988 | tioce_common->ce_pcibus.bs_base |= __IA64_UNCACHED_OFFSET; |
736 | 989 | ||
737 | if (tioce_kern_init(tioce_common) == NULL) { | 990 | tioce_kern = tioce_kern_init(tioce_common); |
991 | if (tioce_kern == NULL) { | ||
738 | kfree(tioce_common); | 992 | kfree(tioce_common); |
739 | return NULL; | 993 | return NULL; |
740 | } | 994 | } |
741 | 995 | ||
996 | /* | ||
997 | * Clear out any transient errors before registering the error | ||
998 | * interrupt handler. | ||
999 | */ | ||
1000 | |||
1001 | tioce_mmr = (struct tioce *)tioce_common->ce_pcibus.bs_base; | ||
1002 | tioce_mmr_seti(tioce_kern, &tioce_mmr->ce_adm_int_status_alias, ~0ULL); | ||
1003 | tioce_mmr_seti(tioce_kern, &tioce_mmr->ce_adm_error_summary_alias, | ||
1004 | ~0ULL); | ||
1005 | tioce_mmr_seti(tioce_kern, &tioce_mmr->ce_dre_comp_err_addr, ~0ULL); | ||
1006 | |||
742 | if (request_irq(SGI_PCIASIC_ERROR, | 1007 | if (request_irq(SGI_PCIASIC_ERROR, |
743 | tioce_error_intr_handler, | 1008 | tioce_error_intr_handler, |
744 | SA_SHIRQ, "TIOCE error", (void *)tioce_common)) | 1009 | SA_SHIRQ, "TIOCE error", (void *)tioce_common)) |
@@ -750,6 +1015,21 @@ tioce_bus_fixup(struct pcibus_bussoft *prom_bussoft, struct pci_controller *cont | |||
750 | tioce_common->ce_pcibus.bs_persist_segment, | 1015 | tioce_common->ce_pcibus.bs_persist_segment, |
751 | tioce_common->ce_pcibus.bs_persist_busnum); | 1016 | tioce_common->ce_pcibus.bs_persist_busnum); |
752 | 1017 | ||
1018 | /* | ||
1019 | * identify closest nasid for memory allocations | ||
1020 | */ | ||
1021 | |||
1022 | my_nasid = NASID_GET(tioce_common->ce_pcibus.bs_base); | ||
1023 | my_cnode = nasid_to_cnodeid(my_nasid); | ||
1024 | |||
1025 | if (sn_hwperf_get_nearest_node(my_cnode, &mem_cnode, NULL) < 0) { | ||
1026 | printk(KERN_WARNING "tioce_bus_fixup: failed to find " | ||
1027 | "closest node with MEM to TIO node %d\n", my_cnode); | ||
1028 | mem_cnode = (cnodeid_t)-1; /* use any node */ | ||
1029 | } | ||
1030 | |||
1031 | controller->node = mem_cnode; | ||
1032 | |||
753 | return tioce_common; | 1033 | return tioce_common; |
754 | } | 1034 | } |
755 | 1035 | ||
diff --git a/arch/m32r/mm/init.c b/arch/m32r/mm/init.c index 6facf15b04f3..c9e7dad860b7 100644 --- a/arch/m32r/mm/init.c +++ b/arch/m32r/mm/init.c | |||
@@ -226,7 +226,7 @@ void free_initmem(void) | |||
226 | addr = (unsigned long)(&__init_begin); | 226 | addr = (unsigned long)(&__init_begin); |
227 | for (; addr < (unsigned long)(&__init_end); addr += PAGE_SIZE) { | 227 | for (; addr < (unsigned long)(&__init_end); addr += PAGE_SIZE) { |
228 | ClearPageReserved(virt_to_page(addr)); | 228 | ClearPageReserved(virt_to_page(addr)); |
229 | set_page_count(virt_to_page(addr), 1); | 229 | init_page_count(virt_to_page(addr)); |
230 | free_page(addr); | 230 | free_page(addr); |
231 | totalram_pages++; | 231 | totalram_pages++; |
232 | } | 232 | } |
@@ -244,7 +244,7 @@ void free_initrd_mem(unsigned long start, unsigned long end) | |||
244 | unsigned long p; | 244 | unsigned long p; |
245 | for (p = start; p < end; p += PAGE_SIZE) { | 245 | for (p = start; p < end; p += PAGE_SIZE) { |
246 | ClearPageReserved(virt_to_page(p)); | 246 | ClearPageReserved(virt_to_page(p)); |
247 | set_page_count(virt_to_page(p), 1); | 247 | init_page_count(virt_to_page(p)); |
248 | free_page(p); | 248 | free_page(p); |
249 | totalram_pages++; | 249 | totalram_pages++; |
250 | } | 250 | } |
diff --git a/arch/m68k/mm/init.c b/arch/m68k/mm/init.c index c45beb955943..a190e39c907a 100644 --- a/arch/m68k/mm/init.c +++ b/arch/m68k/mm/init.c | |||
@@ -137,7 +137,7 @@ void free_initrd_mem(unsigned long start, unsigned long end) | |||
137 | int pages = 0; | 137 | int pages = 0; |
138 | for (; start < end; start += PAGE_SIZE) { | 138 | for (; start < end; start += PAGE_SIZE) { |
139 | ClearPageReserved(virt_to_page(start)); | 139 | ClearPageReserved(virt_to_page(start)); |
140 | set_page_count(virt_to_page(start), 1); | 140 | init_page_count(virt_to_page(start)); |
141 | free_page(start); | 141 | free_page(start); |
142 | totalram_pages++; | 142 | totalram_pages++; |
143 | pages++; | 143 | pages++; |
diff --git a/arch/m68k/mm/memory.c b/arch/m68k/mm/memory.c index 559942ce0e1e..d6d582a5abb0 100644 --- a/arch/m68k/mm/memory.c +++ b/arch/m68k/mm/memory.c | |||
@@ -54,7 +54,7 @@ void __init init_pointer_table(unsigned long ptable) | |||
54 | 54 | ||
55 | /* unreserve the page so it's possible to free that page */ | 55 | /* unreserve the page so it's possible to free that page */ |
56 | PD_PAGE(dp)->flags &= ~(1 << PG_reserved); | 56 | PD_PAGE(dp)->flags &= ~(1 << PG_reserved); |
57 | set_page_count(PD_PAGE(dp), 1); | 57 | init_page_count(PD_PAGE(dp)); |
58 | 58 | ||
59 | return; | 59 | return; |
60 | } | 60 | } |
diff --git a/arch/m68k/mm/motorola.c b/arch/m68k/mm/motorola.c index d855fec26317..afb57eeafdcb 100644 --- a/arch/m68k/mm/motorola.c +++ b/arch/m68k/mm/motorola.c | |||
@@ -276,7 +276,7 @@ void free_initmem(void) | |||
276 | addr = (unsigned long)&__init_begin; | 276 | addr = (unsigned long)&__init_begin; |
277 | for (; addr < (unsigned long)&__init_end; addr += PAGE_SIZE) { | 277 | for (; addr < (unsigned long)&__init_end; addr += PAGE_SIZE) { |
278 | virt_to_page(addr)->flags &= ~(1 << PG_reserved); | 278 | virt_to_page(addr)->flags &= ~(1 << PG_reserved); |
279 | set_page_count(virt_to_page(addr), 1); | 279 | init_page_count(virt_to_page(addr)); |
280 | free_page(addr); | 280 | free_page(addr); |
281 | totalram_pages++; | 281 | totalram_pages++; |
282 | } | 282 | } |
diff --git a/arch/m68knommu/kernel/m68k_ksyms.c b/arch/m68knommu/kernel/m68k_ksyms.c index eddb8d3e130a..d844c755945a 100644 --- a/arch/m68knommu/kernel/m68k_ksyms.c +++ b/arch/m68knommu/kernel/m68k_ksyms.c | |||
@@ -26,6 +26,7 @@ EXPORT_SYMBOL(__ioremap); | |||
26 | EXPORT_SYMBOL(iounmap); | 26 | EXPORT_SYMBOL(iounmap); |
27 | EXPORT_SYMBOL(dump_fpu); | 27 | EXPORT_SYMBOL(dump_fpu); |
28 | EXPORT_SYMBOL(strnlen); | 28 | EXPORT_SYMBOL(strnlen); |
29 | EXPORT_SYMBOL(strpbrk); | ||
29 | EXPORT_SYMBOL(strrchr); | 30 | EXPORT_SYMBOL(strrchr); |
30 | EXPORT_SYMBOL(strstr); | 31 | EXPORT_SYMBOL(strstr); |
31 | EXPORT_SYMBOL(strchr); | 32 | EXPORT_SYMBOL(strchr); |
diff --git a/arch/m68knommu/mm/init.c b/arch/m68knommu/mm/init.c index 89f0b554ffb7..d79503fe6e42 100644 --- a/arch/m68knommu/mm/init.c +++ b/arch/m68knommu/mm/init.c | |||
@@ -195,7 +195,7 @@ void free_initrd_mem(unsigned long start, unsigned long end) | |||
195 | int pages = 0; | 195 | int pages = 0; |
196 | for (; start < end; start += PAGE_SIZE) { | 196 | for (; start < end; start += PAGE_SIZE) { |
197 | ClearPageReserved(virt_to_page(start)); | 197 | ClearPageReserved(virt_to_page(start)); |
198 | set_page_count(virt_to_page(start), 1); | 198 | init_page_count(virt_to_page(start)); |
199 | free_page(start); | 199 | free_page(start); |
200 | totalram_pages++; | 200 | totalram_pages++; |
201 | pages++; | 201 | pages++; |
@@ -218,7 +218,7 @@ free_initmem() | |||
218 | /* next to check that the page we free is not a partial page */ | 218 | /* next to check that the page we free is not a partial page */ |
219 | for (; addr + PAGE_SIZE < (unsigned long)(&__init_end); addr +=PAGE_SIZE) { | 219 | for (; addr + PAGE_SIZE < (unsigned long)(&__init_end); addr +=PAGE_SIZE) { |
220 | ClearPageReserved(virt_to_page(addr)); | 220 | ClearPageReserved(virt_to_page(addr)); |
221 | set_page_count(virt_to_page(addr), 1); | 221 | init_page_count(virt_to_page(addr)); |
222 | free_page(addr); | 222 | free_page(addr); |
223 | totalram_pages++; | 223 | totalram_pages++; |
224 | } | 224 | } |
diff --git a/arch/mips/arc/memory.c b/arch/mips/arc/memory.c index 958d2eb78862..8a9ef58cc399 100644 --- a/arch/mips/arc/memory.c +++ b/arch/mips/arc/memory.c | |||
@@ -158,7 +158,7 @@ unsigned long __init prom_free_prom_memory(void) | |||
158 | while (addr < boot_mem_map.map[i].addr | 158 | while (addr < boot_mem_map.map[i].addr |
159 | + boot_mem_map.map[i].size) { | 159 | + boot_mem_map.map[i].size) { |
160 | ClearPageReserved(virt_to_page(__va(addr))); | 160 | ClearPageReserved(virt_to_page(__va(addr))); |
161 | set_page_count(virt_to_page(__va(addr)), 1); | 161 | init_page_count(virt_to_page(__va(addr))); |
162 | free_page((unsigned long)__va(addr)); | 162 | free_page((unsigned long)__va(addr)); |
163 | addr += PAGE_SIZE; | 163 | addr += PAGE_SIZE; |
164 | freed += PAGE_SIZE; | 164 | freed += PAGE_SIZE; |
diff --git a/arch/mips/dec/prom/memory.c b/arch/mips/dec/prom/memory.c index 81cb5a76cfb7..1edaf3074ee9 100644 --- a/arch/mips/dec/prom/memory.c +++ b/arch/mips/dec/prom/memory.c | |||
@@ -118,7 +118,7 @@ unsigned long __init prom_free_prom_memory(void) | |||
118 | addr = PAGE_SIZE; | 118 | addr = PAGE_SIZE; |
119 | while (addr < end) { | 119 | while (addr < end) { |
120 | ClearPageReserved(virt_to_page(__va(addr))); | 120 | ClearPageReserved(virt_to_page(__va(addr))); |
121 | set_page_count(virt_to_page(__va(addr)), 1); | 121 | init_page_count(virt_to_page(__va(addr))); |
122 | free_page((unsigned long)__va(addr)); | 122 | free_page((unsigned long)__va(addr)); |
123 | addr += PAGE_SIZE; | 123 | addr += PAGE_SIZE; |
124 | } | 124 | } |
diff --git a/arch/mips/mips-boards/generic/memory.c b/arch/mips/mips-boards/generic/memory.c index 2c8afd77a20b..ee5e70c95cf3 100644 --- a/arch/mips/mips-boards/generic/memory.c +++ b/arch/mips/mips-boards/generic/memory.c | |||
@@ -174,7 +174,7 @@ unsigned long __init prom_free_prom_memory(void) | |||
174 | while (addr < boot_mem_map.map[i].addr | 174 | while (addr < boot_mem_map.map[i].addr |
175 | + boot_mem_map.map[i].size) { | 175 | + boot_mem_map.map[i].size) { |
176 | ClearPageReserved(virt_to_page(__va(addr))); | 176 | ClearPageReserved(virt_to_page(__va(addr))); |
177 | set_page_count(virt_to_page(__va(addr)), 1); | 177 | init_page_count(virt_to_page(__va(addr))); |
178 | free_page((unsigned long)__va(addr)); | 178 | free_page((unsigned long)__va(addr)); |
179 | addr += PAGE_SIZE; | 179 | addr += PAGE_SIZE; |
180 | freed += PAGE_SIZE; | 180 | freed += PAGE_SIZE; |
diff --git a/arch/mips/mips-boards/sim/sim_mem.c b/arch/mips/mips-boards/sim/sim_mem.c index 0dbd7435bb2a..1ec4e75656bd 100644 --- a/arch/mips/mips-boards/sim/sim_mem.c +++ b/arch/mips/mips-boards/sim/sim_mem.c | |||
@@ -117,7 +117,7 @@ unsigned long __init prom_free_prom_memory(void) | |||
117 | while (addr < boot_mem_map.map[i].addr | 117 | while (addr < boot_mem_map.map[i].addr |
118 | + boot_mem_map.map[i].size) { | 118 | + boot_mem_map.map[i].size) { |
119 | ClearPageReserved(virt_to_page(__va(addr))); | 119 | ClearPageReserved(virt_to_page(__va(addr))); |
120 | set_page_count(virt_to_page(__va(addr)), 1); | 120 | init_page_count(virt_to_page(__va(addr))); |
121 | free_page((unsigned long)__va(addr)); | 121 | free_page((unsigned long)__va(addr)); |
122 | addr += PAGE_SIZE; | 122 | addr += PAGE_SIZE; |
123 | freed += PAGE_SIZE; | 123 | freed += PAGE_SIZE; |
diff --git a/arch/mips/mm/init.c b/arch/mips/mm/init.c index 0ff9a348b843..52f7d59fe612 100644 --- a/arch/mips/mm/init.c +++ b/arch/mips/mm/init.c | |||
@@ -54,7 +54,8 @@ unsigned long empty_zero_page, zero_page_mask; | |||
54 | */ | 54 | */ |
55 | unsigned long setup_zero_pages(void) | 55 | unsigned long setup_zero_pages(void) |
56 | { | 56 | { |
57 | unsigned long order, size; | 57 | unsigned int order; |
58 | unsigned long size; | ||
58 | struct page *page; | 59 | struct page *page; |
59 | 60 | ||
60 | if (cpu_has_vce) | 61 | if (cpu_has_vce) |
@@ -67,9 +68,9 @@ unsigned long setup_zero_pages(void) | |||
67 | panic("Oh boy, that early out of memory?"); | 68 | panic("Oh boy, that early out of memory?"); |
68 | 69 | ||
69 | page = virt_to_page(empty_zero_page); | 70 | page = virt_to_page(empty_zero_page); |
71 | split_page(page, order); | ||
70 | while (page < virt_to_page(empty_zero_page + (PAGE_SIZE << order))) { | 72 | while (page < virt_to_page(empty_zero_page + (PAGE_SIZE << order))) { |
71 | SetPageReserved(page); | 73 | SetPageReserved(page); |
72 | set_page_count(page, 1); | ||
73 | page++; | 74 | page++; |
74 | } | 75 | } |
75 | 76 | ||
@@ -244,7 +245,7 @@ void __init mem_init(void) | |||
244 | #ifdef CONFIG_LIMITED_DMA | 245 | #ifdef CONFIG_LIMITED_DMA |
245 | set_page_address(page, lowmem_page_address(page)); | 246 | set_page_address(page, lowmem_page_address(page)); |
246 | #endif | 247 | #endif |
247 | set_page_count(page, 1); | 248 | init_page_count(page); |
248 | __free_page(page); | 249 | __free_page(page); |
249 | totalhigh_pages++; | 250 | totalhigh_pages++; |
250 | } | 251 | } |
@@ -291,7 +292,7 @@ void free_initrd_mem(unsigned long start, unsigned long end) | |||
291 | 292 | ||
292 | for (; start < end; start += PAGE_SIZE) { | 293 | for (; start < end; start += PAGE_SIZE) { |
293 | ClearPageReserved(virt_to_page(start)); | 294 | ClearPageReserved(virt_to_page(start)); |
294 | set_page_count(virt_to_page(start), 1); | 295 | init_page_count(virt_to_page(start)); |
295 | free_page(start); | 296 | free_page(start); |
296 | totalram_pages++; | 297 | totalram_pages++; |
297 | } | 298 | } |
@@ -314,7 +315,7 @@ void free_initmem(void) | |||
314 | page = addr; | 315 | page = addr; |
315 | #endif | 316 | #endif |
316 | ClearPageReserved(virt_to_page(page)); | 317 | ClearPageReserved(virt_to_page(page)); |
317 | set_page_count(virt_to_page(page), 1); | 318 | init_page_count(virt_to_page(page)); |
318 | free_page(page); | 319 | free_page(page); |
319 | totalram_pages++; | 320 | totalram_pages++; |
320 | freed += PAGE_SIZE; | 321 | freed += PAGE_SIZE; |
diff --git a/arch/mips/sgi-ip27/ip27-memory.c b/arch/mips/sgi-ip27/ip27-memory.c index ed93a9792959..e0d095daa5ed 100644 --- a/arch/mips/sgi-ip27/ip27-memory.c +++ b/arch/mips/sgi-ip27/ip27-memory.c | |||
@@ -559,7 +559,7 @@ void __init mem_init(void) | |||
559 | /* if (!page_is_ram(pgnr)) continue; */ | 559 | /* if (!page_is_ram(pgnr)) continue; */ |
560 | /* commented out until page_is_ram works */ | 560 | /* commented out until page_is_ram works */ |
561 | ClearPageReserved(p); | 561 | ClearPageReserved(p); |
562 | set_page_count(p, 1); | 562 | init_page_count(p); |
563 | __free_page(p); | 563 | __free_page(p); |
564 | totalram_pages++; | 564 | totalram_pages++; |
565 | } | 565 | } |
diff --git a/arch/parisc/mm/init.c b/arch/parisc/mm/init.c index 7847ca13d6c2..852eda3953dc 100644 --- a/arch/parisc/mm/init.c +++ b/arch/parisc/mm/init.c | |||
@@ -398,7 +398,7 @@ void free_initmem(void) | |||
398 | addr = (unsigned long)(&__init_begin); | 398 | addr = (unsigned long)(&__init_begin); |
399 | for (; addr < (unsigned long)(&__init_end); addr += PAGE_SIZE) { | 399 | for (; addr < (unsigned long)(&__init_end); addr += PAGE_SIZE) { |
400 | ClearPageReserved(virt_to_page(addr)); | 400 | ClearPageReserved(virt_to_page(addr)); |
401 | set_page_count(virt_to_page(addr), 1); | 401 | init_page_count(virt_to_page(addr)); |
402 | free_page(addr); | 402 | free_page(addr); |
403 | num_physpages++; | 403 | num_physpages++; |
404 | totalram_pages++; | 404 | totalram_pages++; |
@@ -1018,7 +1018,7 @@ void free_initrd_mem(unsigned long start, unsigned long end) | |||
1018 | printk(KERN_INFO "Freeing initrd memory: %ldk freed\n", (end - start) >> 10); | 1018 | printk(KERN_INFO "Freeing initrd memory: %ldk freed\n", (end - start) >> 10); |
1019 | for (; start < end; start += PAGE_SIZE) { | 1019 | for (; start < end; start += PAGE_SIZE) { |
1020 | ClearPageReserved(virt_to_page(start)); | 1020 | ClearPageReserved(virt_to_page(start)); |
1021 | set_page_count(virt_to_page(start), 1); | 1021 | init_page_count(virt_to_page(start)); |
1022 | free_page(start); | 1022 | free_page(start); |
1023 | num_physpages++; | 1023 | num_physpages++; |
1024 | totalram_pages++; | 1024 | totalram_pages++; |
diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c index b51bb28c054b..7370f9f33e29 100644 --- a/arch/powerpc/mm/hugetlbpage.c +++ b/arch/powerpc/mm/hugetlbpage.c | |||
@@ -133,21 +133,6 @@ pte_t huge_ptep_get_and_clear(struct mm_struct *mm, unsigned long addr, | |||
133 | return __pte(old); | 133 | return __pte(old); |
134 | } | 134 | } |
135 | 135 | ||
136 | /* | ||
137 | * This function checks for proper alignment of input addr and len parameters. | ||
138 | */ | ||
139 | int is_aligned_hugepage_range(unsigned long addr, unsigned long len) | ||
140 | { | ||
141 | if (len & ~HPAGE_MASK) | ||
142 | return -EINVAL; | ||
143 | if (addr & ~HPAGE_MASK) | ||
144 | return -EINVAL; | ||
145 | if (! (within_hugepage_low_range(addr, len) | ||
146 | || within_hugepage_high_range(addr, len)) ) | ||
147 | return -EINVAL; | ||
148 | return 0; | ||
149 | } | ||
150 | |||
151 | struct slb_flush_info { | 136 | struct slb_flush_info { |
152 | struct mm_struct *mm; | 137 | struct mm_struct *mm; |
153 | u16 newareas; | 138 | u16 newareas; |
diff --git a/arch/powerpc/mm/init_32.c b/arch/powerpc/mm/init_32.c index 7d0d75c11848..b57fb3a2b7bb 100644 --- a/arch/powerpc/mm/init_32.c +++ b/arch/powerpc/mm/init_32.c | |||
@@ -216,7 +216,7 @@ static void free_sec(unsigned long start, unsigned long end, const char *name) | |||
216 | 216 | ||
217 | while (start < end) { | 217 | while (start < end) { |
218 | ClearPageReserved(virt_to_page(start)); | 218 | ClearPageReserved(virt_to_page(start)); |
219 | set_page_count(virt_to_page(start), 1); | 219 | init_page_count(virt_to_page(start)); |
220 | free_page(start); | 220 | free_page(start); |
221 | cnt++; | 221 | cnt++; |
222 | start += PAGE_SIZE; | 222 | start += PAGE_SIZE; |
@@ -248,7 +248,7 @@ void free_initrd_mem(unsigned long start, unsigned long end) | |||
248 | printk ("Freeing initrd memory: %ldk freed\n", (end - start) >> 10); | 248 | printk ("Freeing initrd memory: %ldk freed\n", (end - start) >> 10); |
249 | for (; start < end; start += PAGE_SIZE) { | 249 | for (; start < end; start += PAGE_SIZE) { |
250 | ClearPageReserved(virt_to_page(start)); | 250 | ClearPageReserved(virt_to_page(start)); |
251 | set_page_count(virt_to_page(start), 1); | 251 | init_page_count(virt_to_page(start)); |
252 | free_page(start); | 252 | free_page(start); |
253 | totalram_pages++; | 253 | totalram_pages++; |
254 | } | 254 | } |
diff --git a/arch/powerpc/mm/init_64.c b/arch/powerpc/mm/init_64.c index 81cfb0c2ec58..bacb71c89811 100644 --- a/arch/powerpc/mm/init_64.c +++ b/arch/powerpc/mm/init_64.c | |||
@@ -140,7 +140,7 @@ void free_initmem(void) | |||
140 | for (; addr < (unsigned long)__init_end; addr += PAGE_SIZE) { | 140 | for (; addr < (unsigned long)__init_end; addr += PAGE_SIZE) { |
141 | memset((void *)addr, 0xcc, PAGE_SIZE); | 141 | memset((void *)addr, 0xcc, PAGE_SIZE); |
142 | ClearPageReserved(virt_to_page(addr)); | 142 | ClearPageReserved(virt_to_page(addr)); |
143 | set_page_count(virt_to_page(addr), 1); | 143 | init_page_count(virt_to_page(addr)); |
144 | free_page(addr); | 144 | free_page(addr); |
145 | totalram_pages++; | 145 | totalram_pages++; |
146 | } | 146 | } |
@@ -155,7 +155,7 @@ void free_initrd_mem(unsigned long start, unsigned long end) | |||
155 | printk ("Freeing initrd memory: %ldk freed\n", (end - start) >> 10); | 155 | printk ("Freeing initrd memory: %ldk freed\n", (end - start) >> 10); |
156 | for (; start < end; start += PAGE_SIZE) { | 156 | for (; start < end; start += PAGE_SIZE) { |
157 | ClearPageReserved(virt_to_page(start)); | 157 | ClearPageReserved(virt_to_page(start)); |
158 | set_page_count(virt_to_page(start), 1); | 158 | init_page_count(virt_to_page(start)); |
159 | free_page(start); | 159 | free_page(start); |
160 | totalram_pages++; | 160 | totalram_pages++; |
161 | } | 161 | } |
diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c index 550517c2dd42..454cac01d8cc 100644 --- a/arch/powerpc/mm/mem.c +++ b/arch/powerpc/mm/mem.c | |||
@@ -108,8 +108,8 @@ EXPORT_SYMBOL(phys_mem_access_prot); | |||
108 | void online_page(struct page *page) | 108 | void online_page(struct page *page) |
109 | { | 109 | { |
110 | ClearPageReserved(page); | 110 | ClearPageReserved(page); |
111 | set_page_count(page, 0); | 111 | init_page_count(page); |
112 | free_cold_page(page); | 112 | __free_page(page); |
113 | totalram_pages++; | 113 | totalram_pages++; |
114 | num_physpages++; | 114 | num_physpages++; |
115 | } | 115 | } |
@@ -376,7 +376,7 @@ void __init mem_init(void) | |||
376 | struct page *page = pfn_to_page(pfn); | 376 | struct page *page = pfn_to_page(pfn); |
377 | 377 | ||
378 | ClearPageReserved(page); | 378 | ClearPageReserved(page); |
379 | set_page_count(page, 1); | 379 | init_page_count(page); |
380 | __free_page(page); | 380 | __free_page(page); |
381 | totalhigh_pages++; | 381 | totalhigh_pages++; |
382 | } | 382 | } |
diff --git a/arch/powerpc/platforms/cell/setup.c b/arch/powerpc/platforms/cell/setup.c index b33a4443f5a9..fec8e65b36ea 100644 --- a/arch/powerpc/platforms/cell/setup.c +++ b/arch/powerpc/platforms/cell/setup.c | |||
@@ -115,7 +115,7 @@ static void __init cell_spuprop_present(struct device_node *spe, | |||
115 | for (pfn = start_pfn; pfn < end_pfn; pfn++) { | 115 | for (pfn = start_pfn; pfn < end_pfn; pfn++) { |
116 | struct page *page = pfn_to_page(pfn); | 116 | struct page *page = pfn_to_page(pfn); |
117 | set_page_links(page, ZONE_DMA, node_id, pfn); | 117 | set_page_links(page, ZONE_DMA, node_id, pfn); |
118 | set_page_count(page, 1); | 118 | init_page_count(page); |
119 | reset_page_mapcount(page); | 119 | reset_page_mapcount(page); |
120 | SetPageReserved(page); | 120 | SetPageReserved(page); |
121 | INIT_LIST_HEAD(&page->lru); | 121 | INIT_LIST_HEAD(&page->lru); |
diff --git a/arch/ppc/kernel/dma-mapping.c b/arch/ppc/kernel/dma-mapping.c index 685fd0defe23..61465ec88bc7 100644 --- a/arch/ppc/kernel/dma-mapping.c +++ b/arch/ppc/kernel/dma-mapping.c | |||
@@ -223,6 +223,8 @@ __dma_alloc_coherent(size_t size, dma_addr_t *handle, gfp_t gfp) | |||
223 | pte_t *pte = consistent_pte + CONSISTENT_OFFSET(vaddr); | 223 | pte_t *pte = consistent_pte + CONSISTENT_OFFSET(vaddr); |
224 | struct page *end = page + (1 << order); | 224 | struct page *end = page + (1 << order); |
225 | 225 | ||
226 | split_page(page, order); | ||
227 | |||
226 | /* | 228 | /* |
227 | * Set the "dma handle" | 229 | * Set the "dma handle" |
228 | */ | 230 | */ |
@@ -231,7 +233,6 @@ __dma_alloc_coherent(size_t size, dma_addr_t *handle, gfp_t gfp) | |||
231 | do { | 233 | do { |
232 | BUG_ON(!pte_none(*pte)); | 234 | BUG_ON(!pte_none(*pte)); |
233 | 235 | ||
234 | set_page_count(page, 1); | ||
235 | SetPageReserved(page); | 236 | SetPageReserved(page); |
236 | set_pte_at(&init_mm, vaddr, | 237 | set_pte_at(&init_mm, vaddr, |
237 | pte, mk_pte(page, pgprot_noncached(PAGE_KERNEL))); | 238 | pte, mk_pte(page, pgprot_noncached(PAGE_KERNEL))); |
@@ -244,7 +245,6 @@ __dma_alloc_coherent(size_t size, dma_addr_t *handle, gfp_t gfp) | |||
244 | * Free the otherwise unused pages. | 245 | * Free the otherwise unused pages. |
245 | */ | 246 | */ |
246 | while (page < end) { | 247 | while (page < end) { |
247 | set_page_count(page, 1); | ||
248 | __free_page(page); | 248 | __free_page(page); |
249 | page++; | 249 | page++; |
250 | } | 250 | } |
diff --git a/arch/ppc/mm/init.c b/arch/ppc/mm/init.c index 134db5c04203..cb1c294fb932 100644 --- a/arch/ppc/mm/init.c +++ b/arch/ppc/mm/init.c | |||
@@ -140,7 +140,7 @@ static void free_sec(unsigned long start, unsigned long end, const char *name) | |||
140 | 140 | ||
141 | while (start < end) { | 141 | while (start < end) { |
142 | ClearPageReserved(virt_to_page(start)); | 142 | ClearPageReserved(virt_to_page(start)); |
143 | set_page_count(virt_to_page(start), 1); | 143 | init_page_count(virt_to_page(start)); |
144 | free_page(start); | 144 | free_page(start); |
145 | cnt++; | 145 | cnt++; |
146 | start += PAGE_SIZE; | 146 | start += PAGE_SIZE; |
@@ -172,7 +172,7 @@ void free_initrd_mem(unsigned long start, unsigned long end) | |||
172 | 172 | ||
173 | for (; start < end; start += PAGE_SIZE) { | 173 | for (; start < end; start += PAGE_SIZE) { |
174 | ClearPageReserved(virt_to_page(start)); | 174 | ClearPageReserved(virt_to_page(start)); |
175 | set_page_count(virt_to_page(start), 1); | 175 | init_page_count(virt_to_page(start)); |
176 | free_page(start); | 176 | free_page(start); |
177 | totalram_pages++; | 177 | totalram_pages++; |
178 | } | 178 | } |
@@ -441,7 +441,7 @@ void __init mem_init(void) | |||
441 | struct page *page = mem_map + pfn; | 441 | struct page *page = mem_map + pfn; |
442 | 442 | ||
443 | ClearPageReserved(page); | 443 | ClearPageReserved(page); |
444 | set_page_count(page, 1); | 444 | init_page_count(page); |
445 | __free_page(page); | 445 | __free_page(page); |
446 | totalhigh_pages++; | 446 | totalhigh_pages++; |
447 | } | 447 | } |
diff --git a/arch/s390/mm/init.c b/arch/s390/mm/init.c index df953383724d..a055894f3bd8 100644 --- a/arch/s390/mm/init.c +++ b/arch/s390/mm/init.c | |||
@@ -292,7 +292,7 @@ void free_initmem(void) | |||
292 | addr = (unsigned long)(&__init_begin); | 292 | addr = (unsigned long)(&__init_begin); |
293 | for (; addr < (unsigned long)(&__init_end); addr += PAGE_SIZE) { | 293 | for (; addr < (unsigned long)(&__init_end); addr += PAGE_SIZE) { |
294 | ClearPageReserved(virt_to_page(addr)); | 294 | ClearPageReserved(virt_to_page(addr)); |
295 | set_page_count(virt_to_page(addr), 1); | 295 | init_page_count(virt_to_page(addr)); |
296 | free_page(addr); | 296 | free_page(addr); |
297 | totalram_pages++; | 297 | totalram_pages++; |
298 | } | 298 | } |
@@ -307,7 +307,7 @@ void free_initrd_mem(unsigned long start, unsigned long end) | |||
307 | printk ("Freeing initrd memory: %ldk freed\n", (end - start) >> 10); | 307 | printk ("Freeing initrd memory: %ldk freed\n", (end - start) >> 10); |
308 | for (; start < end; start += PAGE_SIZE) { | 308 | for (; start < end; start += PAGE_SIZE) { |
309 | ClearPageReserved(virt_to_page(start)); | 309 | ClearPageReserved(virt_to_page(start)); |
310 | set_page_count(virt_to_page(start), 1); | 310 | init_page_count(virt_to_page(start)); |
311 | free_page(start); | 311 | free_page(start); |
312 | totalram_pages++; | 312 | totalram_pages++; |
313 | } | 313 | } |
diff --git a/arch/sh/mm/consistent.c b/arch/sh/mm/consistent.c index df3a9e452cc5..ee73e30263af 100644 --- a/arch/sh/mm/consistent.c +++ b/arch/sh/mm/consistent.c | |||
@@ -23,6 +23,7 @@ void *consistent_alloc(gfp_t gfp, size_t size, dma_addr_t *handle) | |||
23 | page = alloc_pages(gfp, order); | 23 | page = alloc_pages(gfp, order); |
24 | if (!page) | 24 | if (!page) |
25 | return NULL; | 25 | return NULL; |
26 | split_page(page, order); | ||
26 | 27 | ||
27 | ret = page_address(page); | 28 | ret = page_address(page); |
28 | *handle = virt_to_phys(ret); | 29 | *handle = virt_to_phys(ret); |
@@ -37,8 +38,6 @@ void *consistent_alloc(gfp_t gfp, size_t size, dma_addr_t *handle) | |||
37 | end = page + (1 << order); | 38 | end = page + (1 << order); |
38 | 39 | ||
39 | while (++page < end) { | 40 | while (++page < end) { |
40 | set_page_count(page, 1); | ||
41 | |||
42 | /* Free any unused pages */ | 41 | /* Free any unused pages */ |
43 | if (page >= free) { | 42 | if (page >= free) { |
44 | __free_page(page); | 43 | __free_page(page); |
diff --git a/arch/sh/mm/hugetlbpage.c b/arch/sh/mm/hugetlbpage.c index 6b7a7688c98e..a3568fd51508 100644 --- a/arch/sh/mm/hugetlbpage.c +++ b/arch/sh/mm/hugetlbpage.c | |||
@@ -84,18 +84,6 @@ pte_t huge_ptep_get_and_clear(struct mm_struct *mm, unsigned long addr, | |||
84 | return entry; | 84 | return entry; |
85 | } | 85 | } |
86 | 86 | ||
87 | /* | ||
88 | * This function checks for proper alignment of input addr and len parameters. | ||
89 | */ | ||
90 | int is_aligned_hugepage_range(unsigned long addr, unsigned long len) | ||
91 | { | ||
92 | if (len & ~HPAGE_MASK) | ||
93 | return -EINVAL; | ||
94 | if (addr & ~HPAGE_MASK) | ||
95 | return -EINVAL; | ||
96 | return 0; | ||
97 | } | ||
98 | |||
99 | struct page *follow_huge_addr(struct mm_struct *mm, | 87 | struct page *follow_huge_addr(struct mm_struct *mm, |
100 | unsigned long address, int write) | 88 | unsigned long address, int write) |
101 | { | 89 | { |
diff --git a/arch/sh/mm/init.c b/arch/sh/mm/init.c index e342565f75fb..77b4a838fe10 100644 --- a/arch/sh/mm/init.c +++ b/arch/sh/mm/init.c | |||
@@ -273,7 +273,7 @@ void free_initmem(void) | |||
273 | addr = (unsigned long)(&__init_begin); | 273 | addr = (unsigned long)(&__init_begin); |
274 | for (; addr < (unsigned long)(&__init_end); addr += PAGE_SIZE) { | 274 | for (; addr < (unsigned long)(&__init_end); addr += PAGE_SIZE) { |
275 | ClearPageReserved(virt_to_page(addr)); | 275 | ClearPageReserved(virt_to_page(addr)); |
276 | set_page_count(virt_to_page(addr), 1); | 276 | init_page_count(virt_to_page(addr)); |
277 | free_page(addr); | 277 | free_page(addr); |
278 | totalram_pages++; | 278 | totalram_pages++; |
279 | } | 279 | } |
@@ -286,7 +286,7 @@ void free_initrd_mem(unsigned long start, unsigned long end) | |||
286 | unsigned long p; | 286 | unsigned long p; |
287 | for (p = start; p < end; p += PAGE_SIZE) { | 287 | for (p = start; p < end; p += PAGE_SIZE) { |
288 | ClearPageReserved(virt_to_page(p)); | 288 | ClearPageReserved(virt_to_page(p)); |
289 | set_page_count(virt_to_page(p), 1); | 289 | init_page_count(virt_to_page(p)); |
290 | free_page(p); | 290 | free_page(p); |
291 | totalram_pages++; | 291 | totalram_pages++; |
292 | } | 292 | } |
diff --git a/arch/sh64/mm/hugetlbpage.c b/arch/sh64/mm/hugetlbpage.c index ed6a505b3ee2..3d89f2a6c785 100644 --- a/arch/sh64/mm/hugetlbpage.c +++ b/arch/sh64/mm/hugetlbpage.c | |||
@@ -84,18 +84,6 @@ pte_t huge_ptep_get_and_clear(struct mm_struct *mm, unsigned long addr, | |||
84 | return entry; | 84 | return entry; |
85 | } | 85 | } |
86 | 86 | ||
87 | /* | ||
88 | * This function checks for proper alignment of input addr and len parameters. | ||
89 | */ | ||
90 | int is_aligned_hugepage_range(unsigned long addr, unsigned long len) | ||
91 | { | ||
92 | if (len & ~HPAGE_MASK) | ||
93 | return -EINVAL; | ||
94 | if (addr & ~HPAGE_MASK) | ||
95 | return -EINVAL; | ||
96 | return 0; | ||
97 | } | ||
98 | |||
99 | struct page *follow_huge_addr(struct mm_struct *mm, | 87 | struct page *follow_huge_addr(struct mm_struct *mm, |
100 | unsigned long address, int write) | 88 | unsigned long address, int write) |
101 | { | 89 | { |
diff --git a/arch/sh64/mm/init.c b/arch/sh64/mm/init.c index a65e8bb2c3cc..1169757fb38b 100644 --- a/arch/sh64/mm/init.c +++ b/arch/sh64/mm/init.c | |||
@@ -173,7 +173,7 @@ void free_initmem(void) | |||
173 | addr = (unsigned long)(&__init_begin); | 173 | addr = (unsigned long)(&__init_begin); |
174 | for (; addr < (unsigned long)(&__init_end); addr += PAGE_SIZE) { | 174 | for (; addr < (unsigned long)(&__init_end); addr += PAGE_SIZE) { |
175 | ClearPageReserved(virt_to_page(addr)); | 175 | ClearPageReserved(virt_to_page(addr)); |
176 | set_page_count(virt_to_page(addr), 1); | 176 | init_page_count(virt_to_page(addr)); |
177 | free_page(addr); | 177 | free_page(addr); |
178 | totalram_pages++; | 178 | totalram_pages++; |
179 | } | 179 | } |
@@ -186,7 +186,7 @@ void free_initrd_mem(unsigned long start, unsigned long end) | |||
186 | unsigned long p; | 186 | unsigned long p; |
187 | for (p = start; p < end; p += PAGE_SIZE) { | 187 | for (p = start; p < end; p += PAGE_SIZE) { |
188 | ClearPageReserved(virt_to_page(p)); | 188 | ClearPageReserved(virt_to_page(p)); |
189 | set_page_count(virt_to_page(p), 1); | 189 | init_page_count(virt_to_page(p)); |
190 | free_page(p); | 190 | free_page(p); |
191 | totalram_pages++; | 191 | totalram_pages++; |
192 | } | 192 | } |
diff --git a/arch/sparc/kernel/sun4d_smp.c b/arch/sparc/kernel/sun4d_smp.c index 40d426cce824..4219dd2ce3a2 100644 --- a/arch/sparc/kernel/sun4d_smp.c +++ b/arch/sparc/kernel/sun4d_smp.c | |||
@@ -266,19 +266,19 @@ void __init smp4d_boot_cpus(void) | |||
266 | 266 | ||
267 | /* Free unneeded trap tables */ | 267 | /* Free unneeded trap tables */ |
268 | ClearPageReserved(virt_to_page(trapbase_cpu1)); | 268 | ClearPageReserved(virt_to_page(trapbase_cpu1)); |
269 | set_page_count(virt_to_page(trapbase_cpu1), 1); | 269 | init_page_count(virt_to_page(trapbase_cpu1)); |
270 | free_page((unsigned long)trapbase_cpu1); | 270 | free_page((unsigned long)trapbase_cpu1); |
271 | totalram_pages++; | 271 | totalram_pages++; |
272 | num_physpages++; | 272 | num_physpages++; |
273 | 273 | ||
274 | ClearPageReserved(virt_to_page(trapbase_cpu2)); | 274 | ClearPageReserved(virt_to_page(trapbase_cpu2)); |
275 | set_page_count(virt_to_page(trapbase_cpu2), 1); | 275 | init_page_count(virt_to_page(trapbase_cpu2)); |
276 | free_page((unsigned long)trapbase_cpu2); | 276 | free_page((unsigned long)trapbase_cpu2); |
277 | totalram_pages++; | 277 | totalram_pages++; |
278 | num_physpages++; | 278 | num_physpages++; |
279 | 279 | ||
280 | ClearPageReserved(virt_to_page(trapbase_cpu3)); | 280 | ClearPageReserved(virt_to_page(trapbase_cpu3)); |
281 | set_page_count(virt_to_page(trapbase_cpu3), 1); | 281 | init_page_count(virt_to_page(trapbase_cpu3)); |
282 | free_page((unsigned long)trapbase_cpu3); | 282 | free_page((unsigned long)trapbase_cpu3); |
283 | totalram_pages++; | 283 | totalram_pages++; |
284 | num_physpages++; | 284 | num_physpages++; |
diff --git a/arch/sparc/kernel/sun4m_smp.c b/arch/sparc/kernel/sun4m_smp.c index a21f27d10e55..fbbd8a474c4c 100644 --- a/arch/sparc/kernel/sun4m_smp.c +++ b/arch/sparc/kernel/sun4m_smp.c | |||
@@ -233,21 +233,21 @@ void __init smp4m_boot_cpus(void) | |||
233 | /* Free unneeded trap tables */ | 233 | /* Free unneeded trap tables */ |
234 | if (!cpu_isset(i, cpu_present_map)) { | 234 | if (!cpu_isset(i, cpu_present_map)) { |
235 | ClearPageReserved(virt_to_page(trapbase_cpu1)); | 235 | ClearPageReserved(virt_to_page(trapbase_cpu1)); |
236 | set_page_count(virt_to_page(trapbase_cpu1), 1); | 236 | init_page_count(virt_to_page(trapbase_cpu1)); |
237 | free_page((unsigned long)trapbase_cpu1); | 237 | free_page((unsigned long)trapbase_cpu1); |
238 | totalram_pages++; | 238 | totalram_pages++; |
239 | num_physpages++; | 239 | num_physpages++; |
240 | } | 240 | } |
241 | if (!cpu_isset(2, cpu_present_map)) { | 241 | if (!cpu_isset(2, cpu_present_map)) { |
242 | ClearPageReserved(virt_to_page(trapbase_cpu2)); | 242 | ClearPageReserved(virt_to_page(trapbase_cpu2)); |
243 | set_page_count(virt_to_page(trapbase_cpu2), 1); | 243 | init_page_count(virt_to_page(trapbase_cpu2)); |
244 | free_page((unsigned long)trapbase_cpu2); | 244 | free_page((unsigned long)trapbase_cpu2); |
245 | totalram_pages++; | 245 | totalram_pages++; |
246 | num_physpages++; | 246 | num_physpages++; |
247 | } | 247 | } |
248 | if (!cpu_isset(3, cpu_present_map)) { | 248 | if (!cpu_isset(3, cpu_present_map)) { |
249 | ClearPageReserved(virt_to_page(trapbase_cpu3)); | 249 | ClearPageReserved(virt_to_page(trapbase_cpu3)); |
250 | set_page_count(virt_to_page(trapbase_cpu3), 1); | 250 | init_page_count(virt_to_page(trapbase_cpu3)); |
251 | free_page((unsigned long)trapbase_cpu3); | 251 | free_page((unsigned long)trapbase_cpu3); |
252 | totalram_pages++; | 252 | totalram_pages++; |
253 | num_physpages++; | 253 | num_physpages++; |
diff --git a/arch/sparc/mm/init.c b/arch/sparc/mm/init.c index c03babaa0498..898669732466 100644 --- a/arch/sparc/mm/init.c +++ b/arch/sparc/mm/init.c | |||
@@ -383,7 +383,7 @@ void map_high_region(unsigned long start_pfn, unsigned long end_pfn) | |||
383 | struct page *page = pfn_to_page(tmp); | 383 | struct page *page = pfn_to_page(tmp); |
384 | 384 | ||
385 | ClearPageReserved(page); | 385 | ClearPageReserved(page); |
386 | set_page_count(page, 1); | 386 | init_page_count(page); |
387 | __free_page(page); | 387 | __free_page(page); |
388 | totalhigh_pages++; | 388 | totalhigh_pages++; |
389 | } | 389 | } |
@@ -480,7 +480,7 @@ void free_initmem (void) | |||
480 | p = virt_to_page(addr); | 480 | p = virt_to_page(addr); |
481 | 481 | ||
482 | ClearPageReserved(p); | 482 | ClearPageReserved(p); |
483 | set_page_count(p, 1); | 483 | init_page_count(p); |
484 | __free_page(p); | 484 | __free_page(p); |
485 | totalram_pages++; | 485 | totalram_pages++; |
486 | num_physpages++; | 486 | num_physpages++; |
@@ -497,7 +497,7 @@ void free_initrd_mem(unsigned long start, unsigned long end) | |||
497 | struct page *p = virt_to_page(start); | 497 | struct page *p = virt_to_page(start); |
498 | 498 | ||
499 | ClearPageReserved(p); | 499 | ClearPageReserved(p); |
500 | set_page_count(p, 1); | 500 | init_page_count(p); |
501 | __free_page(p); | 501 | __free_page(p); |
502 | num_physpages++; | 502 | num_physpages++; |
503 | } | 503 | } |
diff --git a/arch/sparc64/mm/hugetlbpage.c b/arch/sparc64/mm/hugetlbpage.c index a7a24869d045..280dc7958a13 100644 --- a/arch/sparc64/mm/hugetlbpage.c +++ b/arch/sparc64/mm/hugetlbpage.c | |||
@@ -263,18 +263,6 @@ pte_t huge_ptep_get_and_clear(struct mm_struct *mm, unsigned long addr, | |||
263 | return entry; | 263 | return entry; |
264 | } | 264 | } |
265 | 265 | ||
266 | /* | ||
267 | * This function checks for proper alignment of input addr and len parameters. | ||
268 | */ | ||
269 | int is_aligned_hugepage_range(unsigned long addr, unsigned long len) | ||
270 | { | ||
271 | if (len & ~HPAGE_MASK) | ||
272 | return -EINVAL; | ||
273 | if (addr & ~HPAGE_MASK) | ||
274 | return -EINVAL; | ||
275 | return 0; | ||
276 | } | ||
277 | |||
278 | struct page *follow_huge_addr(struct mm_struct *mm, | 266 | struct page *follow_huge_addr(struct mm_struct *mm, |
279 | unsigned long address, int write) | 267 | unsigned long address, int write) |
280 | { | 268 | { |
diff --git a/arch/sparc64/mm/init.c b/arch/sparc64/mm/init.c index c2b556106fc1..2ae143ba50d8 100644 --- a/arch/sparc64/mm/init.c +++ b/arch/sparc64/mm/init.c | |||
@@ -1461,7 +1461,7 @@ void free_initmem(void) | |||
1461 | p = virt_to_page(page); | 1461 | p = virt_to_page(page); |
1462 | 1462 | ||
1463 | ClearPageReserved(p); | 1463 | ClearPageReserved(p); |
1464 | set_page_count(p, 1); | 1464 | init_page_count(p); |
1465 | __free_page(p); | 1465 | __free_page(p); |
1466 | num_physpages++; | 1466 | num_physpages++; |
1467 | totalram_pages++; | 1467 | totalram_pages++; |
@@ -1477,7 +1477,7 @@ void free_initrd_mem(unsigned long start, unsigned long end) | |||
1477 | struct page *p = virt_to_page(start); | 1477 | struct page *p = virt_to_page(start); |
1478 | 1478 | ||
1479 | ClearPageReserved(p); | 1479 | ClearPageReserved(p); |
1480 | set_page_count(p, 1); | 1480 | init_page_count(p); |
1481 | __free_page(p); | 1481 | __free_page(p); |
1482 | num_physpages++; | 1482 | num_physpages++; |
1483 | totalram_pages++; | 1483 | totalram_pages++; |
diff --git a/arch/um/kernel/mem.c b/arch/um/kernel/mem.c index fa4f915be5c5..92cce96b5e24 100644 --- a/arch/um/kernel/mem.c +++ b/arch/um/kernel/mem.c | |||
@@ -57,7 +57,7 @@ static void setup_highmem(unsigned long highmem_start, | |||
57 | for(i = 0; i < highmem_len >> PAGE_SHIFT; i++){ | 57 | for(i = 0; i < highmem_len >> PAGE_SHIFT; i++){ |
58 | page = &mem_map[highmem_pfn + i]; | 58 | page = &mem_map[highmem_pfn + i]; |
59 | ClearPageReserved(page); | 59 | ClearPageReserved(page); |
60 | set_page_count(page, 1); | 60 | init_page_count(page); |
61 | __free_page(page); | 61 | __free_page(page); |
62 | } | 62 | } |
63 | } | 63 | } |
@@ -296,7 +296,7 @@ void free_initrd_mem(unsigned long start, unsigned long end) | |||
296 | (end - start) >> 10); | 296 | (end - start) >> 10); |
297 | for (; start < end; start += PAGE_SIZE) { | 297 | for (; start < end; start += PAGE_SIZE) { |
298 | ClearPageReserved(virt_to_page(start)); | 298 | ClearPageReserved(virt_to_page(start)); |
299 | set_page_count(virt_to_page(start), 1); | 299 | init_page_count(virt_to_page(start)); |
300 | free_page(start); | 300 | free_page(start); |
301 | totalram_pages++; | 301 | totalram_pages++; |
302 | } | 302 | } |
diff --git a/arch/um/kernel/physmem.c b/arch/um/kernel/physmem.c index 544665e04513..0e65340eee33 100644 --- a/arch/um/kernel/physmem.c +++ b/arch/um/kernel/physmem.c | |||
@@ -279,7 +279,7 @@ int init_maps(unsigned long physmem, unsigned long iomem, unsigned long highmem) | |||
279 | 279 | ||
280 | for(i = 0; i < total_pages; i++){ | 280 | for(i = 0; i < total_pages; i++){ |
281 | p = &map[i]; | 281 | p = &map[i]; |
282 | set_page_count(p, 0); | 282 | memset(p, 0, sizeof(struct page)); |
283 | SetPageReserved(p); | 283 | SetPageReserved(p); |
284 | INIT_LIST_HEAD(&p->lru); | 284 | INIT_LIST_HEAD(&p->lru); |
285 | } | 285 | } |
diff --git a/arch/x86_64/kernel/time.c b/arch/x86_64/kernel/time.c index 3080f84bf7b7..ee5ce3d3cbc3 100644 --- a/arch/x86_64/kernel/time.c +++ b/arch/x86_64/kernel/time.c | |||
@@ -477,7 +477,7 @@ static irqreturn_t timer_interrupt(int irq, void *dev_id, struct pt_regs *regs) | |||
477 | return IRQ_HANDLED; | 477 | return IRQ_HANDLED; |
478 | } | 478 | } |
479 | 479 | ||
480 | static unsigned int cyc2ns_scale; | 480 | static unsigned int cyc2ns_scale __read_mostly; |
481 | #define CYC2NS_SCALE_FACTOR 10 /* 2^10, carefully chosen */ | 481 | #define CYC2NS_SCALE_FACTOR 10 /* 2^10, carefully chosen */ |
482 | 482 | ||
483 | static inline void set_cyc2ns_scale(unsigned long cpu_khz) | 483 | static inline void set_cyc2ns_scale(unsigned long cpu_khz) |
diff --git a/arch/x86_64/kernel/x8664_ksyms.c b/arch/x86_64/kernel/x8664_ksyms.c index 3496abc8d372..c9dc7e46731e 100644 --- a/arch/x86_64/kernel/x8664_ksyms.c +++ b/arch/x86_64/kernel/x8664_ksyms.c | |||
@@ -124,6 +124,7 @@ extern void * __memcpy(void *,const void *,__kernel_size_t); | |||
124 | 124 | ||
125 | EXPORT_SYMBOL(memset); | 125 | EXPORT_SYMBOL(memset); |
126 | EXPORT_SYMBOL(strlen); | 126 | EXPORT_SYMBOL(strlen); |
127 | EXPORT_SYMBOL(strpbrk); | ||
127 | EXPORT_SYMBOL(memmove); | 128 | EXPORT_SYMBOL(memmove); |
128 | EXPORT_SYMBOL(memcpy); | 129 | EXPORT_SYMBOL(memcpy); |
129 | EXPORT_SYMBOL(__memcpy); | 130 | EXPORT_SYMBOL(__memcpy); |
diff --git a/arch/x86_64/mm/init.c b/arch/x86_64/mm/init.c index 7af1742aa958..40ed13d263cd 100644 --- a/arch/x86_64/mm/init.c +++ b/arch/x86_64/mm/init.c | |||
@@ -486,7 +486,7 @@ void __init clear_kernel_mapping(unsigned long address, unsigned long size) | |||
486 | void online_page(struct page *page) | 486 | void online_page(struct page *page) |
487 | { | 487 | { |
488 | ClearPageReserved(page); | 488 | ClearPageReserved(page); |
489 | set_page_count(page, 1); | 489 | init_page_count(page); |
490 | __free_page(page); | 490 | __free_page(page); |
491 | totalram_pages++; | 491 | totalram_pages++; |
492 | num_physpages++; | 492 | num_physpages++; |
@@ -592,7 +592,7 @@ void free_initmem(void) | |||
592 | addr = (unsigned long)(&__init_begin); | 592 | addr = (unsigned long)(&__init_begin); |
593 | for (; addr < (unsigned long)(&__init_end); addr += PAGE_SIZE) { | 593 | for (; addr < (unsigned long)(&__init_end); addr += PAGE_SIZE) { |
594 | ClearPageReserved(virt_to_page(addr)); | 594 | ClearPageReserved(virt_to_page(addr)); |
595 | set_page_count(virt_to_page(addr), 1); | 595 | init_page_count(virt_to_page(addr)); |
596 | memset((void *)(addr & ~(PAGE_SIZE-1)), 0xcc, PAGE_SIZE); | 596 | memset((void *)(addr & ~(PAGE_SIZE-1)), 0xcc, PAGE_SIZE); |
597 | free_page(addr); | 597 | free_page(addr); |
598 | totalram_pages++; | 598 | totalram_pages++; |
@@ -632,7 +632,7 @@ void free_initrd_mem(unsigned long start, unsigned long end) | |||
632 | printk ("Freeing initrd memory: %ldk freed\n", (end - start) >> 10); | 632 | printk ("Freeing initrd memory: %ldk freed\n", (end - start) >> 10); |
633 | for (; start < end; start += PAGE_SIZE) { | 633 | for (; start < end; start += PAGE_SIZE) { |
634 | ClearPageReserved(virt_to_page(start)); | 634 | ClearPageReserved(virt_to_page(start)); |
635 | set_page_count(virt_to_page(start), 1); | 635 | init_page_count(virt_to_page(start)); |
636 | free_page(start); | 636 | free_page(start); |
637 | totalram_pages++; | 637 | totalram_pages++; |
638 | } | 638 | } |
diff --git a/arch/x86_64/mm/pageattr.c b/arch/x86_64/mm/pageattr.c index 35f1f1aab063..531ad21447b1 100644 --- a/arch/x86_64/mm/pageattr.c +++ b/arch/x86_64/mm/pageattr.c | |||
@@ -45,6 +45,13 @@ static struct page *split_large_page(unsigned long address, pgprot_t prot, | |||
45 | pte_t *pbase; | 45 | pte_t *pbase; |
46 | if (!base) | 46 | if (!base) |
47 | return NULL; | 47 | return NULL; |
48 | /* | ||
49 | * page_private is used to track the number of entries in | ||
50 | * the page table page have non standard attributes. | ||
51 | */ | ||
52 | SetPagePrivate(base); | ||
53 | page_private(base) = 0; | ||
54 | |||
48 | address = __pa(address); | 55 | address = __pa(address); |
49 | addr = address & LARGE_PAGE_MASK; | 56 | addr = address & LARGE_PAGE_MASK; |
50 | pbase = (pte_t *)page_address(base); | 57 | pbase = (pte_t *)page_address(base); |
@@ -77,26 +84,12 @@ static inline void flush_map(unsigned long address) | |||
77 | on_each_cpu(flush_kernel_map, (void *)address, 1, 1); | 84 | on_each_cpu(flush_kernel_map, (void *)address, 1, 1); |
78 | } | 85 | } |
79 | 86 | ||
80 | struct deferred_page { | 87 | static struct page *deferred_pages; /* protected by init_mm.mmap_sem */ |
81 | struct deferred_page *next; | ||
82 | struct page *fpage; | ||
83 | unsigned long address; | ||
84 | }; | ||
85 | static struct deferred_page *df_list; /* protected by init_mm.mmap_sem */ | ||
86 | 88 | ||
87 | static inline void save_page(unsigned long address, struct page *fpage) | 89 | static inline void save_page(struct page *fpage) |
88 | { | 90 | { |
89 | struct deferred_page *df; | 91 | fpage->lru.next = (struct list_head *)deferred_pages; |
90 | df = kmalloc(sizeof(struct deferred_page), GFP_KERNEL); | 92 | deferred_pages = fpage; |
91 | if (!df) { | ||
92 | flush_map(address); | ||
93 | __free_page(fpage); | ||
94 | } else { | ||
95 | df->next = df_list; | ||
96 | df->fpage = fpage; | ||
97 | df->address = address; | ||
98 | df_list = df; | ||
99 | } | ||
100 | } | 93 | } |
101 | 94 | ||
102 | /* | 95 | /* |
@@ -138,8 +131,8 @@ __change_page_attr(unsigned long address, unsigned long pfn, pgprot_t prot, | |||
138 | set_pte(kpte, pfn_pte(pfn, prot)); | 131 | set_pte(kpte, pfn_pte(pfn, prot)); |
139 | } else { | 132 | } else { |
140 | /* | 133 | /* |
141 | * split_large_page will take the reference for this change_page_attr | 134 | * split_large_page will take the reference for this |
142 | * on the split page. | 135 | * change_page_attr on the split page. |
143 | */ | 136 | */ |
144 | 137 | ||
145 | struct page *split; | 138 | struct page *split; |
@@ -151,23 +144,20 @@ __change_page_attr(unsigned long address, unsigned long pfn, pgprot_t prot, | |||
151 | set_pte(kpte,mk_pte(split, ref_prot2)); | 144 | set_pte(kpte,mk_pte(split, ref_prot2)); |
152 | kpte_page = split; | 145 | kpte_page = split; |
153 | } | 146 | } |
154 | get_page(kpte_page); | 147 | page_private(kpte_page)++; |
155 | } else if ((kpte_flags & _PAGE_PSE) == 0) { | 148 | } else if ((kpte_flags & _PAGE_PSE) == 0) { |
156 | set_pte(kpte, pfn_pte(pfn, ref_prot)); | 149 | set_pte(kpte, pfn_pte(pfn, ref_prot)); |
157 | __put_page(kpte_page); | 150 | BUG_ON(page_private(kpte_page) == 0); |
151 | page_private(kpte_page)--; | ||
158 | } else | 152 | } else |
159 | BUG(); | 153 | BUG(); |
160 | 154 | ||
161 | /* on x86-64 the direct mapping set at boot is not using 4k pages */ | 155 | /* on x86-64 the direct mapping set at boot is not using 4k pages */ |
162 | BUG_ON(PageReserved(kpte_page)); | 156 | BUG_ON(PageReserved(kpte_page)); |
163 | 157 | ||
164 | switch (page_count(kpte_page)) { | 158 | if (page_private(kpte_page) == 0) { |
165 | case 1: | 159 | save_page(kpte_page); |
166 | save_page(address, kpte_page); | ||
167 | revert_page(address, ref_prot); | 160 | revert_page(address, ref_prot); |
168 | break; | ||
169 | case 0: | ||
170 | BUG(); /* memleak and failed 2M page regeneration */ | ||
171 | } | 161 | } |
172 | return 0; | 162 | return 0; |
173 | } | 163 | } |
@@ -220,17 +210,18 @@ int change_page_attr(struct page *page, int numpages, pgprot_t prot) | |||
220 | 210 | ||
221 | void global_flush_tlb(void) | 211 | void global_flush_tlb(void) |
222 | { | 212 | { |
223 | struct deferred_page *df, *next_df; | 213 | struct page *dpage; |
224 | 214 | ||
225 | down_read(&init_mm.mmap_sem); | 215 | down_read(&init_mm.mmap_sem); |
226 | df = xchg(&df_list, NULL); | 216 | dpage = xchg(&deferred_pages, NULL); |
227 | up_read(&init_mm.mmap_sem); | 217 | up_read(&init_mm.mmap_sem); |
228 | flush_map((df && !df->next) ? df->address : 0); | 218 | |
229 | for (; df; df = next_df) { | 219 | flush_map((dpage && !dpage->lru.next) ? (unsigned long)page_address(dpage) : 0); |
230 | next_df = df->next; | 220 | while (dpage) { |
231 | if (df->fpage) | 221 | struct page *tmp = dpage; |
232 | __free_page(df->fpage); | 222 | dpage = (struct page *)dpage->lru.next; |
233 | kfree(df); | 223 | ClearPagePrivate(tmp); |
224 | __free_page(tmp); | ||
234 | } | 225 | } |
235 | } | 226 | } |
236 | 227 | ||
diff --git a/arch/xtensa/mm/init.c b/arch/xtensa/mm/init.c index 5a91d6c9e66d..e1be4235f367 100644 --- a/arch/xtensa/mm/init.c +++ b/arch/xtensa/mm/init.c | |||
@@ -272,7 +272,7 @@ free_reserved_mem(void *start, void *end) | |||
272 | { | 272 | { |
273 | for (; start < end; start += PAGE_SIZE) { | 273 | for (; start < end; start += PAGE_SIZE) { |
274 | ClearPageReserved(virt_to_page(start)); | 274 | ClearPageReserved(virt_to_page(start)); |
275 | set_page_count(virt_to_page(start), 1); | 275 | init_page_count(virt_to_page(start)); |
276 | free_page((unsigned long)start); | 276 | free_page((unsigned long)start); |
277 | totalram_pages++; | 277 | totalram_pages++; |
278 | } | 278 | } |
diff --git a/arch/xtensa/mm/pgtable.c b/arch/xtensa/mm/pgtable.c index e5e119c820e4..7d28914d11cb 100644 --- a/arch/xtensa/mm/pgtable.c +++ b/arch/xtensa/mm/pgtable.c | |||
@@ -14,25 +14,21 @@ | |||
14 | 14 | ||
15 | pte_t* pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address) | 15 | pte_t* pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address) |
16 | { | 16 | { |
17 | pte_t *pte, p; | 17 | pte_t *pte = NULL, *p; |
18 | int color = ADDR_COLOR(address); | 18 | int color = ADDR_COLOR(address); |
19 | int i; | 19 | int i; |
20 | 20 | ||
21 | p = (pte_t*) __get_free_pages(GFP_KERNEL|__GFP_REPEAT, COLOR_ORDER); | 21 | p = (pte_t*) __get_free_pages(GFP_KERNEL|__GFP_REPEAT, COLOR_ORDER); |
22 | 22 | ||
23 | if (likely(p)) { | 23 | if (likely(p)) { |
24 | struct page *page; | 24 | split_page(virt_to_page(p), COLOR_ORDER); |
25 | |||
26 | for (i = 0; i < COLOR_SIZE; i++, p++) { | ||
27 | page = virt_to_page(pte); | ||
28 | |||
29 | set_page_count(page, 1); | ||
30 | ClearPageCompound(page); | ||
31 | 25 | ||
26 | for (i = 0; i < COLOR_SIZE; i++) { | ||
32 | if (ADDR_COLOR(p) == color) | 27 | if (ADDR_COLOR(p) == color) |
33 | pte = p; | 28 | pte = p; |
34 | else | 29 | else |
35 | free_page(p); | 30 | free_page(p); |
31 | p += PTRS_PER_PTE; | ||
36 | } | 32 | } |
37 | clear_page(pte); | 33 | clear_page(pte); |
38 | } | 34 | } |
@@ -49,20 +45,20 @@ int flush; | |||
49 | 45 | ||
50 | struct page* pte_alloc_one(struct mm_struct *mm, unsigned long address) | 46 | struct page* pte_alloc_one(struct mm_struct *mm, unsigned long address) |
51 | { | 47 | { |
52 | struct page *page, p; | 48 | struct page *page = NULL, *p; |
53 | int color = ADDR_COLOR(address); | 49 | int color = ADDR_COLOR(address); |
54 | 50 | ||
55 | p = alloc_pages(GFP_KERNEL | __GFP_REPEAT, PTE_ORDER); | 51 | p = alloc_pages(GFP_KERNEL | __GFP_REPEAT, PTE_ORDER); |
56 | 52 | ||
57 | if (likely(p)) { | 53 | if (likely(p)) { |
58 | for (i = 0; i < PAGE_ORDER; i++) { | 54 | split_page(p, COLOR_ORDER); |
59 | set_page_count(p, 1); | ||
60 | ClearPageCompound(p); | ||
61 | 55 | ||
62 | if (PADDR_COLOR(page_address(pg)) == color) | 56 | for (i = 0; i < PAGE_ORDER; i++) { |
57 | if (PADDR_COLOR(page_address(p)) == color) | ||
63 | page = p; | 58 | page = p; |
64 | else | 59 | else |
65 | free_page(p); | 60 | __free_page(p); |
61 | p++; | ||
66 | } | 62 | } |
67 | clear_highpage(page); | 63 | clear_highpage(page); |
68 | } | 64 | } |
diff --git a/drivers/char/snsc.h b/drivers/char/snsc.h index a9efc13cc858..8a98169b60c1 100644 --- a/drivers/char/snsc.h +++ b/drivers/char/snsc.h | |||
@@ -5,7 +5,7 @@ | |||
5 | * License. See the file "COPYING" in the main directory of this archive | 5 | * License. See the file "COPYING" in the main directory of this archive |
6 | * for more details. | 6 | * for more details. |
7 | * | 7 | * |
8 | * Copyright (C) 2004 Silicon Graphics, Inc. All rights reserved. | 8 | * Copyright (C) 2004-2006 Silicon Graphics, Inc. All rights reserved. |
9 | */ | 9 | */ |
10 | 10 | ||
11 | /* | 11 | /* |
@@ -70,6 +70,9 @@ struct sysctl_data_s { | |||
70 | #define EV_CLASS_TEST_WARNING 0x6000ul | 70 | #define EV_CLASS_TEST_WARNING 0x6000ul |
71 | #define EV_CLASS_PWRD_NOTIFY 0x8000ul | 71 | #define EV_CLASS_PWRD_NOTIFY 0x8000ul |
72 | 72 | ||
73 | /* ENV class codes */ | ||
74 | #define ENV_PWRDN_PEND 0x4101ul | ||
75 | |||
73 | #define EV_SEVERITY_POWER_STABLE 0x0000ul | 76 | #define EV_SEVERITY_POWER_STABLE 0x0000ul |
74 | #define EV_SEVERITY_POWER_LOW_WARNING 0x0100ul | 77 | #define EV_SEVERITY_POWER_LOW_WARNING 0x0100ul |
75 | #define EV_SEVERITY_POWER_HIGH_WARNING 0x0200ul | 78 | #define EV_SEVERITY_POWER_HIGH_WARNING 0x0200ul |
diff --git a/drivers/char/snsc_event.c b/drivers/char/snsc_event.c index baaa365285fa..a4fa507eed9e 100644 --- a/drivers/char/snsc_event.c +++ b/drivers/char/snsc_event.c | |||
@@ -5,7 +5,7 @@ | |||
5 | * License. See the file "COPYING" in the main directory of this archive | 5 | * License. See the file "COPYING" in the main directory of this archive |
6 | * for more details. | 6 | * for more details. |
7 | * | 7 | * |
8 | * Copyright (C) 2004 Silicon Graphics, Inc. All rights reserved. | 8 | * Copyright (C) 2004-2006 Silicon Graphics, Inc. All rights reserved. |
9 | */ | 9 | */ |
10 | 10 | ||
11 | /* | 11 | /* |
@@ -187,7 +187,8 @@ scdrv_event_severity(int code) | |||
187 | static void | 187 | static void |
188 | scdrv_dispatch_event(char *event, int len) | 188 | scdrv_dispatch_event(char *event, int len) |
189 | { | 189 | { |
190 | int code, esp_code, src; | 190 | static int snsc_shutting_down = 0; |
191 | int code, esp_code, src, class; | ||
191 | char desc[CHUNKSIZE]; | 192 | char desc[CHUNKSIZE]; |
192 | char *severity; | 193 | char *severity; |
193 | 194 | ||
@@ -199,9 +200,25 @@ scdrv_dispatch_event(char *event, int len) | |||
199 | /* how urgent is the message? */ | 200 | /* how urgent is the message? */ |
200 | severity = scdrv_event_severity(code); | 201 | severity = scdrv_event_severity(code); |
201 | 202 | ||
202 | if ((code & EV_CLASS_MASK) == EV_CLASS_PWRD_NOTIFY) { | 203 | class = (code & EV_CLASS_MASK); |
204 | |||
205 | if (class == EV_CLASS_PWRD_NOTIFY || code == ENV_PWRDN_PEND) { | ||
203 | struct task_struct *p; | 206 | struct task_struct *p; |
204 | 207 | ||
208 | if (snsc_shutting_down) | ||
209 | return; | ||
210 | |||
211 | snsc_shutting_down = 1; | ||
212 | |||
213 | /* give a message for each type of event */ | ||
214 | if (class == EV_CLASS_PWRD_NOTIFY) | ||
215 | printk(KERN_NOTICE "Power off indication received." | ||
216 | " Sending SIGPWR to init...\n"); | ||
217 | else if (code == ENV_PWRDN_PEND) | ||
218 | printk(KERN_CRIT "WARNING: Shutting down the system" | ||
219 | " due to a critical environmental condition." | ||
220 | " Sending SIGPWR to init...\n"); | ||
221 | |||
205 | /* give a SIGPWR signal to init proc */ | 222 | /* give a SIGPWR signal to init proc */ |
206 | 223 | ||
207 | /* first find init's task */ | 224 | /* first find init's task */ |
@@ -210,12 +227,11 @@ scdrv_dispatch_event(char *event, int len) | |||
210 | if (p->pid == 1) | 227 | if (p->pid == 1) |
211 | break; | 228 | break; |
212 | } | 229 | } |
213 | if (p) { /* we found init's task */ | 230 | if (p) { |
214 | printk(KERN_EMERG "Power off indication received. Initiating power fail sequence...\n"); | ||
215 | force_sig(SIGPWR, p); | 231 | force_sig(SIGPWR, p); |
216 | } else { /* failed to find init's task - just give message(s) */ | 232 | } else { |
217 | printk(KERN_WARNING "Failed to find init proc to handle power off!\n"); | 233 | printk(KERN_ERR "Failed to signal init!\n"); |
218 | printk("%s|$(0x%x)%s\n", severity, esp_code, desc); | 234 | snsc_shutting_down = 0; /* so can try again (?) */ |
219 | } | 235 | } |
220 | read_unlock(&tasklist_lock); | 236 | read_unlock(&tasklist_lock); |
221 | } else { | 237 | } else { |
diff --git a/drivers/char/tb0219.c b/drivers/char/tb0219.c index ac2a297ce37c..a80c83210872 100644 --- a/drivers/char/tb0219.c +++ b/drivers/char/tb0219.c | |||
@@ -283,7 +283,7 @@ static void tb0219_pci_irq_init(void) | |||
283 | vr41xx_set_irq_level(TB0219_PCI_SLOT3_PIN, IRQ_LEVEL_LOW); | 283 | vr41xx_set_irq_level(TB0219_PCI_SLOT3_PIN, IRQ_LEVEL_LOW); |
284 | } | 284 | } |
285 | 285 | ||
286 | static int tb0219_probe(struct platform_device *dev) | 286 | static int __devinit tb0219_probe(struct platform_device *dev) |
287 | { | 287 | { |
288 | int retval; | 288 | int retval; |
289 | 289 | ||
@@ -319,7 +319,7 @@ static int tb0219_probe(struct platform_device *dev) | |||
319 | return 0; | 319 | return 0; |
320 | } | 320 | } |
321 | 321 | ||
322 | static int tb0219_remove(struct platform_device *dev) | 322 | static int __devexit tb0219_remove(struct platform_device *dev) |
323 | { | 323 | { |
324 | _machine_restart = old_machine_restart; | 324 | _machine_restart = old_machine_restart; |
325 | 325 | ||
@@ -335,19 +335,26 @@ static struct platform_device *tb0219_platform_device; | |||
335 | 335 | ||
336 | static struct platform_driver tb0219_device_driver = { | 336 | static struct platform_driver tb0219_device_driver = { |
337 | .probe = tb0219_probe, | 337 | .probe = tb0219_probe, |
338 | .remove = tb0219_remove, | 338 | .remove = __devexit_p(tb0219_remove), |
339 | .driver = { | 339 | .driver = { |
340 | .name = "TB0219", | 340 | .name = "TB0219", |
341 | .owner = THIS_MODULE, | ||
341 | }, | 342 | }, |
342 | }; | 343 | }; |
343 | 344 | ||
344 | static int __devinit tanbac_tb0219_init(void) | 345 | static int __init tanbac_tb0219_init(void) |
345 | { | 346 | { |
346 | int retval; | 347 | int retval; |
347 | 348 | ||
348 | tb0219_platform_device = platform_device_register_simple("TB0219", -1, NULL, 0); | 349 | tb0219_platform_device = platform_device_alloc("TB0219", -1); |
349 | if (IS_ERR(tb0219_platform_device)) | 350 | if (!tb0219_platform_device) |
350 | return PTR_ERR(tb0219_platform_device); | 351 | return -ENOMEM; |
352 | |||
353 | retval = platform_device_add(tb0219_platform_device); | ||
354 | if (retval < 0) { | ||
355 | platform_device_put(tb0219_platform_device); | ||
356 | return retval; | ||
357 | } | ||
351 | 358 | ||
352 | retval = platform_driver_register(&tb0219_device_driver); | 359 | retval = platform_driver_register(&tb0219_device_driver); |
353 | if (retval < 0) | 360 | if (retval < 0) |
@@ -356,10 +363,9 @@ static int __devinit tanbac_tb0219_init(void) | |||
356 | return retval; | 363 | return retval; |
357 | } | 364 | } |
358 | 365 | ||
359 | static void __devexit tanbac_tb0219_exit(void) | 366 | static void __exit tanbac_tb0219_exit(void) |
360 | { | 367 | { |
361 | platform_driver_unregister(&tb0219_device_driver); | 368 | platform_driver_unregister(&tb0219_device_driver); |
362 | |||
363 | platform_device_unregister(tb0219_platform_device); | 369 | platform_device_unregister(tb0219_platform_device); |
364 | } | 370 | } |
365 | 371 | ||
diff --git a/drivers/char/vr41xx_giu.c b/drivers/char/vr41xx_giu.c index 2267c7b81799..05e6e814d86f 100644 --- a/drivers/char/vr41xx_giu.c +++ b/drivers/char/vr41xx_giu.c | |||
@@ -613,7 +613,7 @@ static struct file_operations gpio_fops = { | |||
613 | .release = gpio_release, | 613 | .release = gpio_release, |
614 | }; | 614 | }; |
615 | 615 | ||
616 | static int giu_probe(struct platform_device *dev) | 616 | static int __devinit giu_probe(struct platform_device *dev) |
617 | { | 617 | { |
618 | unsigned long start, size, flags = 0; | 618 | unsigned long start, size, flags = 0; |
619 | unsigned int nr_pins = 0; | 619 | unsigned int nr_pins = 0; |
@@ -697,7 +697,7 @@ static int giu_probe(struct platform_device *dev) | |||
697 | return cascade_irq(GIUINT_IRQ, giu_get_irq); | 697 | return cascade_irq(GIUINT_IRQ, giu_get_irq); |
698 | } | 698 | } |
699 | 699 | ||
700 | static int giu_remove(struct platform_device *dev) | 700 | static int __devexit giu_remove(struct platform_device *dev) |
701 | { | 701 | { |
702 | iounmap(giu_base); | 702 | iounmap(giu_base); |
703 | 703 | ||
@@ -712,9 +712,10 @@ static struct platform_device *giu_platform_device; | |||
712 | 712 | ||
713 | static struct platform_driver giu_device_driver = { | 713 | static struct platform_driver giu_device_driver = { |
714 | .probe = giu_probe, | 714 | .probe = giu_probe, |
715 | .remove = giu_remove, | 715 | .remove = __devexit_p(giu_remove), |
716 | .driver = { | 716 | .driver = { |
717 | .name = "GIU", | 717 | .name = "GIU", |
718 | .owner = THIS_MODULE, | ||
718 | }, | 719 | }, |
719 | }; | 720 | }; |
720 | 721 | ||
@@ -722,9 +723,15 @@ static int __init vr41xx_giu_init(void) | |||
722 | { | 723 | { |
723 | int retval; | 724 | int retval; |
724 | 725 | ||
725 | giu_platform_device = platform_device_register_simple("GIU", -1, NULL, 0); | 726 | giu_platform_device = platform_device_alloc("GIU", -1); |
726 | if (IS_ERR(giu_platform_device)) | 727 | if (!giu_platform_device) |
727 | return PTR_ERR(giu_platform_device); | 728 | return -ENOMEM; |
729 | |||
730 | retval = platform_device_add(giu_platform_device); | ||
731 | if (retval < 0) { | ||
732 | platform_device_put(giu_platform_device); | ||
733 | return retval; | ||
734 | } | ||
728 | 735 | ||
729 | retval = platform_driver_register(&giu_device_driver); | 736 | retval = platform_driver_register(&giu_device_driver); |
730 | if (retval < 0) | 737 | if (retval < 0) |
diff --git a/drivers/char/vr41xx_rtc.c b/drivers/char/vr41xx_rtc.c index bc1b4a15212c..b109d9a502d6 100644 --- a/drivers/char/vr41xx_rtc.c +++ b/drivers/char/vr41xx_rtc.c | |||
@@ -558,7 +558,7 @@ static struct miscdevice rtc_miscdevice = { | |||
558 | .fops = &rtc_fops, | 558 | .fops = &rtc_fops, |
559 | }; | 559 | }; |
560 | 560 | ||
561 | static int rtc_probe(struct platform_device *pdev) | 561 | static int __devinit rtc_probe(struct platform_device *pdev) |
562 | { | 562 | { |
563 | unsigned int irq; | 563 | unsigned int irq; |
564 | int retval; | 564 | int retval; |
@@ -631,7 +631,7 @@ static int rtc_probe(struct platform_device *pdev) | |||
631 | return 0; | 631 | return 0; |
632 | } | 632 | } |
633 | 633 | ||
634 | static int rtc_remove(struct platform_device *dev) | 634 | static int __devexit rtc_remove(struct platform_device *dev) |
635 | { | 635 | { |
636 | int retval; | 636 | int retval; |
637 | 637 | ||
@@ -653,13 +653,14 @@ static struct platform_device *rtc_platform_device; | |||
653 | 653 | ||
654 | static struct platform_driver rtc_device_driver = { | 654 | static struct platform_driver rtc_device_driver = { |
655 | .probe = rtc_probe, | 655 | .probe = rtc_probe, |
656 | .remove = rtc_remove, | 656 | .remove = __devexit_p(rtc_remove), |
657 | .driver = { | 657 | .driver = { |
658 | .name = rtc_name, | 658 | .name = rtc_name, |
659 | .owner = THIS_MODULE, | ||
659 | }, | 660 | }, |
660 | }; | 661 | }; |
661 | 662 | ||
662 | static int __devinit vr41xx_rtc_init(void) | 663 | static int __init vr41xx_rtc_init(void) |
663 | { | 664 | { |
664 | int retval; | 665 | int retval; |
665 | 666 | ||
@@ -684,10 +685,20 @@ static int __devinit vr41xx_rtc_init(void) | |||
684 | break; | 685 | break; |
685 | } | 686 | } |
686 | 687 | ||
687 | rtc_platform_device = platform_device_register_simple("RTC", -1, | 688 | rtc_platform_device = platform_device_alloc("RTC", -1); |
688 | rtc_resource, ARRAY_SIZE(rtc_resource)); | 689 | if (!rtc_platform_device) |
689 | if (IS_ERR(rtc_platform_device)) | 690 | return -ENOMEM; |
690 | return PTR_ERR(rtc_platform_device); | 691 | |
692 | retval = platform_device_add_resources(rtc_platform_device, | ||
693 | rtc_resource, ARRAY_SIZE(rtc_resource)); | ||
694 | |||
695 | if (retval == 0) | ||
696 | retval = platform_device_add(rtc_platform_device); | ||
697 | |||
698 | if (retval < 0) { | ||
699 | platform_device_put(rtc_platform_device); | ||
700 | return retval; | ||
701 | } | ||
691 | 702 | ||
692 | retval = platform_driver_register(&rtc_device_driver); | 703 | retval = platform_driver_register(&rtc_device_driver); |
693 | if (retval < 0) | 704 | if (retval < 0) |
@@ -696,10 +707,9 @@ static int __devinit vr41xx_rtc_init(void) | |||
696 | return retval; | 707 | return retval; |
697 | } | 708 | } |
698 | 709 | ||
699 | static void __devexit vr41xx_rtc_exit(void) | 710 | static void __exit vr41xx_rtc_exit(void) |
700 | { | 711 | { |
701 | platform_driver_unregister(&rtc_device_driver); | 712 | platform_driver_unregister(&rtc_device_driver); |
702 | |||
703 | platform_device_unregister(rtc_platform_device); | 713 | platform_device_unregister(rtc_platform_device); |
704 | } | 714 | } |
705 | 715 | ||
diff --git a/drivers/char/watchdog/mv64x60_wdt.c b/drivers/char/watchdog/mv64x60_wdt.c index 00d9ef04a369..f1b9cf89f153 100644 --- a/drivers/char/watchdog/mv64x60_wdt.c +++ b/drivers/char/watchdog/mv64x60_wdt.c | |||
@@ -228,15 +228,25 @@ static int __init mv64x60_wdt_init(void) | |||
228 | 228 | ||
229 | printk(KERN_INFO "MV64x60 watchdog driver\n"); | 229 | printk(KERN_INFO "MV64x60 watchdog driver\n"); |
230 | 230 | ||
231 | mv64x60_wdt_dev = platform_device_register_simple(MV64x60_WDT_NAME, | 231 | mv64x60_wdt_dev = platform_device_alloc(MV64x60_WDT_NAME, -1); |
232 | -1, NULL, 0); | 232 | if (!mv64x60_wdt_dev) { |
233 | if (IS_ERR(mv64x60_wdt_dev)) { | 233 | ret = -ENOMEM; |
234 | ret = PTR_ERR(mv64x60_wdt_dev); | 234 | goto out; |
235 | } | ||
236 | |||
237 | ret = platform_device_add(mv64x60_wdt_dev); | ||
238 | if (ret) { | ||
239 | platform_device_put(mv64x60_wdt_dev); | ||
235 | goto out; | 240 | goto out; |
236 | } | 241 | } |
237 | 242 | ||
238 | ret = platform_driver_register(&mv64x60_wdt_driver); | 243 | ret = platform_driver_register(&mv64x60_wdt_driver); |
239 | out: | 244 | if (ret) { |
245 | platform_device_unregister(mv64x60_wdt_dev); | ||
246 | goto out; | ||
247 | } | ||
248 | |||
249 | out: | ||
240 | return ret; | 250 | return ret; |
241 | } | 251 | } |
242 | 252 | ||
diff --git a/drivers/firmware/dcdbas.c b/drivers/firmware/dcdbas.c index 4652512f7d1a..3a4e5c5b4e1f 100644 --- a/drivers/firmware/dcdbas.c +++ b/drivers/firmware/dcdbas.c | |||
@@ -530,30 +530,27 @@ static DCDBAS_DEV_ATTR_RW(host_control_action); | |||
530 | static DCDBAS_DEV_ATTR_RW(host_control_smi_type); | 530 | static DCDBAS_DEV_ATTR_RW(host_control_smi_type); |
531 | static DCDBAS_DEV_ATTR_RW(host_control_on_shutdown); | 531 | static DCDBAS_DEV_ATTR_RW(host_control_on_shutdown); |
532 | 532 | ||
533 | static struct device_attribute *dcdbas_dev_attrs[] = { | 533 | static struct attribute *dcdbas_dev_attrs[] = { |
534 | &dev_attr_smi_data_buf_size, | 534 | &dev_attr_smi_data_buf_size.attr, |
535 | &dev_attr_smi_data_buf_phys_addr, | 535 | &dev_attr_smi_data_buf_phys_addr.attr, |
536 | &dev_attr_smi_request, | 536 | &dev_attr_smi_request.attr, |
537 | &dev_attr_host_control_action, | 537 | &dev_attr_host_control_action.attr, |
538 | &dev_attr_host_control_smi_type, | 538 | &dev_attr_host_control_smi_type.attr, |
539 | &dev_attr_host_control_on_shutdown, | 539 | &dev_attr_host_control_on_shutdown.attr, |
540 | NULL | 540 | NULL |
541 | }; | 541 | }; |
542 | 542 | ||
543 | /** | 543 | static struct attribute_group dcdbas_attr_group = { |
544 | * dcdbas_init: initialize driver | 544 | .attrs = dcdbas_dev_attrs, |
545 | */ | 545 | }; |
546 | static int __init dcdbas_init(void) | 546 | |
547 | static int __devinit dcdbas_probe(struct platform_device *dev) | ||
547 | { | 548 | { |
548 | int i; | 549 | int i, error; |
549 | 550 | ||
550 | host_control_action = HC_ACTION_NONE; | 551 | host_control_action = HC_ACTION_NONE; |
551 | host_control_smi_type = HC_SMITYPE_NONE; | 552 | host_control_smi_type = HC_SMITYPE_NONE; |
552 | 553 | ||
553 | dcdbas_pdev = platform_device_register_simple(DRIVER_NAME, -1, NULL, 0); | ||
554 | if (IS_ERR(dcdbas_pdev)) | ||
555 | return PTR_ERR(dcdbas_pdev); | ||
556 | |||
557 | /* | 554 | /* |
558 | * BIOS SMI calls require buffer addresses be in 32-bit address space. | 555 | * BIOS SMI calls require buffer addresses be in 32-bit address space. |
559 | * This is done by setting the DMA mask below. | 556 | * This is done by setting the DMA mask below. |
@@ -561,19 +558,79 @@ static int __init dcdbas_init(void) | |||
561 | dcdbas_pdev->dev.coherent_dma_mask = DMA_32BIT_MASK; | 558 | dcdbas_pdev->dev.coherent_dma_mask = DMA_32BIT_MASK; |
562 | dcdbas_pdev->dev.dma_mask = &dcdbas_pdev->dev.coherent_dma_mask; | 559 | dcdbas_pdev->dev.dma_mask = &dcdbas_pdev->dev.coherent_dma_mask; |
563 | 560 | ||
561 | error = sysfs_create_group(&dev->dev.kobj, &dcdbas_attr_group); | ||
562 | if (error) | ||
563 | return error; | ||
564 | |||
565 | for (i = 0; dcdbas_bin_attrs[i]; i++) { | ||
566 | error = sysfs_create_bin_file(&dev->dev.kobj, | ||
567 | dcdbas_bin_attrs[i]); | ||
568 | if (error) { | ||
569 | while (--i >= 0) | ||
570 | sysfs_remove_bin_file(&dev->dev.kobj, | ||
571 | dcdbas_bin_attrs[i]); | ||
572 | sysfs_create_group(&dev->dev.kobj, &dcdbas_attr_group); | ||
573 | return error; | ||
574 | } | ||
575 | } | ||
576 | |||
564 | register_reboot_notifier(&dcdbas_reboot_nb); | 577 | register_reboot_notifier(&dcdbas_reboot_nb); |
565 | 578 | ||
579 | dev_info(&dev->dev, "%s (version %s)\n", | ||
580 | DRIVER_DESCRIPTION, DRIVER_VERSION); | ||
581 | |||
582 | return 0; | ||
583 | } | ||
584 | |||
585 | static int __devexit dcdbas_remove(struct platform_device *dev) | ||
586 | { | ||
587 | int i; | ||
588 | |||
589 | unregister_reboot_notifier(&dcdbas_reboot_nb); | ||
566 | for (i = 0; dcdbas_bin_attrs[i]; i++) | 590 | for (i = 0; dcdbas_bin_attrs[i]; i++) |
567 | sysfs_create_bin_file(&dcdbas_pdev->dev.kobj, | 591 | sysfs_remove_bin_file(&dev->dev.kobj, dcdbas_bin_attrs[i]); |
568 | dcdbas_bin_attrs[i]); | 592 | sysfs_remove_group(&dev->dev.kobj, &dcdbas_attr_group); |
569 | 593 | ||
570 | for (i = 0; dcdbas_dev_attrs[i]; i++) | 594 | return 0; |
571 | device_create_file(&dcdbas_pdev->dev, dcdbas_dev_attrs[i]); | 595 | } |
572 | 596 | ||
573 | dev_info(&dcdbas_pdev->dev, "%s (version %s)\n", | 597 | static struct platform_driver dcdbas_driver = { |
574 | DRIVER_DESCRIPTION, DRIVER_VERSION); | 598 | .driver = { |
599 | .name = DRIVER_NAME, | ||
600 | .owner = THIS_MODULE, | ||
601 | }, | ||
602 | .probe = dcdbas_probe, | ||
603 | .remove = __devexit_p(dcdbas_remove), | ||
604 | }; | ||
605 | |||
606 | /** | ||
607 | * dcdbas_init: initialize driver | ||
608 | */ | ||
609 | static int __init dcdbas_init(void) | ||
610 | { | ||
611 | int error; | ||
612 | |||
613 | error = platform_driver_register(&dcdbas_driver); | ||
614 | if (error) | ||
615 | return error; | ||
616 | |||
617 | dcdbas_pdev = platform_device_alloc(DRIVER_NAME, -1); | ||
618 | if (!dcdbas_pdev) { | ||
619 | error = -ENOMEM; | ||
620 | goto err_unregister_driver; | ||
621 | } | ||
622 | |||
623 | error = platform_device_add(dcdbas_pdev); | ||
624 | if (error) | ||
625 | goto err_free_device; | ||
575 | 626 | ||
576 | return 0; | 627 | return 0; |
628 | |||
629 | err_free_device: | ||
630 | platform_device_put(dcdbas_pdev); | ||
631 | err_unregister_driver: | ||
632 | platform_driver_unregister(&dcdbas_driver); | ||
633 | return error; | ||
577 | } | 634 | } |
578 | 635 | ||
579 | /** | 636 | /** |
@@ -588,6 +645,15 @@ static void __exit dcdbas_exit(void) | |||
588 | unregister_reboot_notifier(&dcdbas_reboot_nb); | 645 | unregister_reboot_notifier(&dcdbas_reboot_nb); |
589 | smi_data_buf_free(); | 646 | smi_data_buf_free(); |
590 | platform_device_unregister(dcdbas_pdev); | 647 | platform_device_unregister(dcdbas_pdev); |
648 | platform_driver_unregister(&dcdbas_driver); | ||
649 | |||
650 | /* | ||
651 | * We have to free the buffer here instead of dcdbas_remove | ||
652 | * because only in module exit function we can be sure that | ||
653 | * all sysfs attributes belonging to this module have been | ||
654 | * released. | ||
655 | */ | ||
656 | smi_data_buf_free(); | ||
591 | } | 657 | } |
592 | 658 | ||
593 | module_init(dcdbas_init); | 659 | module_init(dcdbas_init); |
diff --git a/drivers/md/dm.c b/drivers/md/dm.c index 88d60202b9db..26b08ee425c7 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c | |||
@@ -533,30 +533,35 @@ static void __clone_and_map(struct clone_info *ci) | |||
533 | 533 | ||
534 | } else { | 534 | } else { |
535 | /* | 535 | /* |
536 | * Create two copy bios to deal with io that has | 536 | * Handle a bvec that must be split between two or more targets. |
537 | * been split across a target. | ||
538 | */ | 537 | */ |
539 | struct bio_vec *bv = bio->bi_io_vec + ci->idx; | 538 | struct bio_vec *bv = bio->bi_io_vec + ci->idx; |
539 | sector_t remaining = to_sector(bv->bv_len); | ||
540 | unsigned int offset = 0; | ||
540 | 541 | ||
541 | clone = split_bvec(bio, ci->sector, ci->idx, | 542 | do { |
542 | bv->bv_offset, max); | 543 | if (offset) { |
543 | __map_bio(ti, clone, tio); | 544 | ti = dm_table_find_target(ci->map, ci->sector); |
545 | max = max_io_len(ci->md, ci->sector, ti); | ||
544 | 546 | ||
545 | ci->sector += max; | 547 | tio = alloc_tio(ci->md); |
546 | ci->sector_count -= max; | 548 | tio->io = ci->io; |
547 | ti = dm_table_find_target(ci->map, ci->sector); | 549 | tio->ti = ti; |
548 | 550 | memset(&tio->info, 0, sizeof(tio->info)); | |
549 | len = to_sector(bv->bv_len) - max; | 551 | } |
550 | clone = split_bvec(bio, ci->sector, ci->idx, | 552 | |
551 | bv->bv_offset + to_bytes(max), len); | 553 | len = min(remaining, max); |
552 | tio = alloc_tio(ci->md); | 554 | |
553 | tio->io = ci->io; | 555 | clone = split_bvec(bio, ci->sector, ci->idx, |
554 | tio->ti = ti; | 556 | bv->bv_offset + offset, len); |
555 | memset(&tio->info, 0, sizeof(tio->info)); | 557 | |
556 | __map_bio(ti, clone, tio); | 558 | __map_bio(ti, clone, tio); |
559 | |||
560 | ci->sector += len; | ||
561 | ci->sector_count -= len; | ||
562 | offset += to_bytes(len); | ||
563 | } while (remaining -= len); | ||
557 | 564 | ||
558 | ci->sector += len; | ||
559 | ci->sector_count -= len; | ||
560 | ci->idx++; | 565 | ci->idx++; |
561 | } | 566 | } |
562 | } | 567 | } |
diff --git a/drivers/media/dvb/bt8xx/Makefile b/drivers/media/dvb/bt8xx/Makefile index 9d197efb481d..d188e4c670b5 100644 --- a/drivers/media/dvb/bt8xx/Makefile +++ b/drivers/media/dvb/bt8xx/Makefile | |||
@@ -1,3 +1,3 @@ | |||
1 | obj-$(CONFIG_DVB_BT8XX) += bt878.o dvb-bt8xx.o dst.o dst_ca.o | 1 | obj-$(CONFIG_DVB_BT8XX) += bt878.o dvb-bt8xx.o dst.o dst_ca.o |
2 | 2 | ||
3 | EXTRA_CFLAGS = -Idrivers/media/dvb/dvb-core/ -Idrivers/media/video/bt8xx -Idrivers/media/dvb/frontends | 3 | EXTRA_CFLAGS = -Idrivers/media/dvb/dvb-core/ -Idrivers/media/video -Idrivers/media/dvb/frontends |
diff --git a/drivers/net/mv643xx_eth.h b/drivers/net/mv643xx_eth.h index 7754d1974b9e..4262c1da6d4a 100644 --- a/drivers/net/mv643xx_eth.h +++ b/drivers/net/mv643xx_eth.h | |||
@@ -42,13 +42,23 @@ | |||
42 | #define MAX_DESCS_PER_SKB 1 | 42 | #define MAX_DESCS_PER_SKB 1 |
43 | #endif | 43 | #endif |
44 | 44 | ||
45 | /* | ||
46 | * The MV643XX HW requires 8-byte alignment. However, when I/O | ||
47 | * is non-cache-coherent, we need to ensure that the I/O buffers | ||
48 | * we use don't share cache lines with other data. | ||
49 | */ | ||
50 | #if defined(CONFIG_DMA_NONCOHERENT) || defined(CONFIG_NOT_COHERENT_CACHE) | ||
51 | #define ETH_DMA_ALIGN L1_CACHE_BYTES | ||
52 | #else | ||
53 | #define ETH_DMA_ALIGN 8 | ||
54 | #endif | ||
55 | |||
45 | #define ETH_VLAN_HLEN 4 | 56 | #define ETH_VLAN_HLEN 4 |
46 | #define ETH_FCS_LEN 4 | 57 | #define ETH_FCS_LEN 4 |
47 | #define ETH_DMA_ALIGN 8 /* hw requires 8-byte alignment */ | 58 | #define ETH_HW_IP_ALIGN 2 /* hw aligns IP header */ |
48 | #define ETH_HW_IP_ALIGN 2 /* hw aligns IP header */ | ||
49 | #define ETH_WRAPPER_LEN (ETH_HW_IP_ALIGN + ETH_HLEN + \ | 59 | #define ETH_WRAPPER_LEN (ETH_HW_IP_ALIGN + ETH_HLEN + \ |
50 | ETH_VLAN_HLEN + ETH_FCS_LEN) | 60 | ETH_VLAN_HLEN + ETH_FCS_LEN) |
51 | #define ETH_RX_SKB_SIZE ((dev->mtu + ETH_WRAPPER_LEN + 7) & ~0x7) | 61 | #define ETH_RX_SKB_SIZE (dev->mtu + ETH_WRAPPER_LEN + ETH_DMA_ALIGN) |
52 | 62 | ||
53 | #define ETH_RX_QUEUES_ENABLED (1 << 0) /* use only Q0 for receive */ | 63 | #define ETH_RX_QUEUES_ENABLED (1 << 0) /* use only Q0 for receive */ |
54 | #define ETH_TX_QUEUES_ENABLED (1 << 0) /* use only Q0 for transmit */ | 64 | #define ETH_TX_QUEUES_ENABLED (1 << 0) /* use only Q0 for transmit */ |
diff --git a/drivers/net/pcnet32.c b/drivers/net/pcnet32.c index 7e900572eaf8..9595f74da93f 100644 --- a/drivers/net/pcnet32.c +++ b/drivers/net/pcnet32.c | |||
@@ -22,12 +22,12 @@ | |||
22 | *************************************************************************/ | 22 | *************************************************************************/ |
23 | 23 | ||
24 | #define DRV_NAME "pcnet32" | 24 | #define DRV_NAME "pcnet32" |
25 | #define DRV_VERSION "1.31c" | 25 | #define DRV_VERSION "1.32" |
26 | #define DRV_RELDATE "01.Nov.2005" | 26 | #define DRV_RELDATE "18.Mar.2006" |
27 | #define PFX DRV_NAME ": " | 27 | #define PFX DRV_NAME ": " |
28 | 28 | ||
29 | static const char * const version = | 29 | static const char *const version = |
30 | DRV_NAME ".c:v" DRV_VERSION " " DRV_RELDATE " tsbogend@alpha.franken.de\n"; | 30 | DRV_NAME ".c:v" DRV_VERSION " " DRV_RELDATE " tsbogend@alpha.franken.de\n"; |
31 | 31 | ||
32 | #include <linux/module.h> | 32 | #include <linux/module.h> |
33 | #include <linux/kernel.h> | 33 | #include <linux/kernel.h> |
@@ -58,18 +58,23 @@ DRV_NAME ".c:v" DRV_VERSION " " DRV_RELDATE " tsbogend@alpha.franken.de\n"; | |||
58 | * PCI device identifiers for "new style" Linux PCI Device Drivers | 58 | * PCI device identifiers for "new style" Linux PCI Device Drivers |
59 | */ | 59 | */ |
60 | static struct pci_device_id pcnet32_pci_tbl[] = { | 60 | static struct pci_device_id pcnet32_pci_tbl[] = { |
61 | { PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_LANCE_HOME, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0 }, | 61 | { PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_LANCE_HOME, |
62 | { PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_LANCE, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0 }, | 62 | PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0}, |
63 | /* | 63 | { PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_LANCE, |
64 | * Adapters that were sold with IBM's RS/6000 or pSeries hardware have | 64 | PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0}, |
65 | * the incorrect vendor id. | 65 | |
66 | */ | 66 | /* |
67 | { PCI_VENDOR_ID_TRIDENT, PCI_DEVICE_ID_AMD_LANCE, PCI_ANY_ID, PCI_ANY_ID, | 67 | * Adapters that were sold with IBM's RS/6000 or pSeries hardware have |
68 | PCI_CLASS_NETWORK_ETHERNET << 8, 0xffff00, 0 }, | 68 | * the incorrect vendor id. |
69 | { 0, } | 69 | */ |
70 | { PCI_VENDOR_ID_TRIDENT, PCI_DEVICE_ID_AMD_LANCE, | ||
71 | PCI_ANY_ID, PCI_ANY_ID, | ||
72 | PCI_CLASS_NETWORK_ETHERNET << 8, 0xffff00, 0}, | ||
73 | |||
74 | { } /* terminate list */ | ||
70 | }; | 75 | }; |
71 | 76 | ||
72 | MODULE_DEVICE_TABLE (pci, pcnet32_pci_tbl); | 77 | MODULE_DEVICE_TABLE(pci, pcnet32_pci_tbl); |
73 | 78 | ||
74 | static int cards_found; | 79 | static int cards_found; |
75 | 80 | ||
@@ -77,13 +82,11 @@ static int cards_found; | |||
77 | * VLB I/O addresses | 82 | * VLB I/O addresses |
78 | */ | 83 | */ |
79 | static unsigned int pcnet32_portlist[] __initdata = | 84 | static unsigned int pcnet32_portlist[] __initdata = |
80 | { 0x300, 0x320, 0x340, 0x360, 0 }; | 85 | { 0x300, 0x320, 0x340, 0x360, 0 }; |
81 | |||
82 | |||
83 | 86 | ||
84 | static int pcnet32_debug = 0; | 87 | static int pcnet32_debug = 0; |
85 | static int tx_start = 1; /* Mapping -- 0:20, 1:64, 2:128, 3:~220 (depends on chip vers) */ | 88 | static int tx_start = 1; /* Mapping -- 0:20, 1:64, 2:128, 3:~220 (depends on chip vers) */ |
86 | static int pcnet32vlb; /* check for VLB cards ? */ | 89 | static int pcnet32vlb; /* check for VLB cards ? */ |
87 | 90 | ||
88 | static struct net_device *pcnet32_dev; | 91 | static struct net_device *pcnet32_dev; |
89 | 92 | ||
@@ -110,32 +113,34 @@ static int rx_copybreak = 200; | |||
110 | * to internal options | 113 | * to internal options |
111 | */ | 114 | */ |
112 | static const unsigned char options_mapping[] = { | 115 | static const unsigned char options_mapping[] = { |
113 | PCNET32_PORT_ASEL, /* 0 Auto-select */ | 116 | PCNET32_PORT_ASEL, /* 0 Auto-select */ |
114 | PCNET32_PORT_AUI, /* 1 BNC/AUI */ | 117 | PCNET32_PORT_AUI, /* 1 BNC/AUI */ |
115 | PCNET32_PORT_AUI, /* 2 AUI/BNC */ | 118 | PCNET32_PORT_AUI, /* 2 AUI/BNC */ |
116 | PCNET32_PORT_ASEL, /* 3 not supported */ | 119 | PCNET32_PORT_ASEL, /* 3 not supported */ |
117 | PCNET32_PORT_10BT | PCNET32_PORT_FD, /* 4 10baseT-FD */ | 120 | PCNET32_PORT_10BT | PCNET32_PORT_FD, /* 4 10baseT-FD */ |
118 | PCNET32_PORT_ASEL, /* 5 not supported */ | 121 | PCNET32_PORT_ASEL, /* 5 not supported */ |
119 | PCNET32_PORT_ASEL, /* 6 not supported */ | 122 | PCNET32_PORT_ASEL, /* 6 not supported */ |
120 | PCNET32_PORT_ASEL, /* 7 not supported */ | 123 | PCNET32_PORT_ASEL, /* 7 not supported */ |
121 | PCNET32_PORT_ASEL, /* 8 not supported */ | 124 | PCNET32_PORT_ASEL, /* 8 not supported */ |
122 | PCNET32_PORT_MII, /* 9 MII 10baseT */ | 125 | PCNET32_PORT_MII, /* 9 MII 10baseT */ |
123 | PCNET32_PORT_MII | PCNET32_PORT_FD, /* 10 MII 10baseT-FD */ | 126 | PCNET32_PORT_MII | PCNET32_PORT_FD, /* 10 MII 10baseT-FD */ |
124 | PCNET32_PORT_MII, /* 11 MII (autosel) */ | 127 | PCNET32_PORT_MII, /* 11 MII (autosel) */ |
125 | PCNET32_PORT_10BT, /* 12 10BaseT */ | 128 | PCNET32_PORT_10BT, /* 12 10BaseT */ |
126 | PCNET32_PORT_MII | PCNET32_PORT_100, /* 13 MII 100BaseTx */ | 129 | PCNET32_PORT_MII | PCNET32_PORT_100, /* 13 MII 100BaseTx */ |
127 | PCNET32_PORT_MII | PCNET32_PORT_100 | PCNET32_PORT_FD, /* 14 MII 100BaseTx-FD */ | 130 | /* 14 MII 100BaseTx-FD */ |
128 | PCNET32_PORT_ASEL /* 15 not supported */ | 131 | PCNET32_PORT_MII | PCNET32_PORT_100 | PCNET32_PORT_FD, |
132 | PCNET32_PORT_ASEL /* 15 not supported */ | ||
129 | }; | 133 | }; |
130 | 134 | ||
131 | static const char pcnet32_gstrings_test[][ETH_GSTRING_LEN] = { | 135 | static const char pcnet32_gstrings_test[][ETH_GSTRING_LEN] = { |
132 | "Loopback test (offline)" | 136 | "Loopback test (offline)" |
133 | }; | 137 | }; |
138 | |||
134 | #define PCNET32_TEST_LEN (sizeof(pcnet32_gstrings_test) / ETH_GSTRING_LEN) | 139 | #define PCNET32_TEST_LEN (sizeof(pcnet32_gstrings_test) / ETH_GSTRING_LEN) |
135 | 140 | ||
136 | #define PCNET32_NUM_REGS 168 | 141 | #define PCNET32_NUM_REGS 136 |
137 | 142 | ||
138 | #define MAX_UNITS 8 /* More are supported, limit only on options */ | 143 | #define MAX_UNITS 8 /* More are supported, limit only on options */ |
139 | static int options[MAX_UNITS]; | 144 | static int options[MAX_UNITS]; |
140 | static int full_duplex[MAX_UNITS]; | 145 | static int full_duplex[MAX_UNITS]; |
141 | static int homepna[MAX_UNITS]; | 146 | static int homepna[MAX_UNITS]; |
@@ -151,124 +156,6 @@ static int homepna[MAX_UNITS]; | |||
151 | */ | 156 | */ |
152 | 157 | ||
153 | /* | 158 | /* |
154 | * History: | ||
155 | * v0.01: Initial version | ||
156 | * only tested on Alpha Noname Board | ||
157 | * v0.02: changed IRQ handling for new interrupt scheme (dev_id) | ||
158 | * tested on a ASUS SP3G | ||
159 | * v0.10: fixed an odd problem with the 79C974 in a Compaq Deskpro XL | ||
160 | * looks like the 974 doesn't like stopping and restarting in a | ||
161 | * short period of time; now we do a reinit of the lance; the | ||
162 | * bug was triggered by doing ifconfig eth0 <ip> broadcast <addr> | ||
163 | * and hangs the machine (thanks to Klaus Liedl for debugging) | ||
164 | * v0.12: by suggestion from Donald Becker: Renamed driver to pcnet32, | ||
165 | * made it standalone (no need for lance.c) | ||
166 | * v0.13: added additional PCI detecting for special PCI devices (Compaq) | ||
167 | * v0.14: stripped down additional PCI probe (thanks to David C Niemi | ||
168 | * and sveneric@xs4all.nl for testing this on their Compaq boxes) | ||
169 | * v0.15: added 79C965 (VLB) probe | ||
170 | * added interrupt sharing for PCI chips | ||
171 | * v0.16: fixed set_multicast_list on Alpha machines | ||
172 | * v0.17: removed hack from dev.c; now pcnet32 uses ethif_probe in Space.c | ||
173 | * v0.19: changed setting of autoselect bit | ||
174 | * v0.20: removed additional Compaq PCI probe; there is now a working one | ||
175 | * in arch/i386/bios32.c | ||
176 | * v0.21: added endian conversion for ppc, from work by cort@cs.nmt.edu | ||
177 | * v0.22: added printing of status to ring dump | ||
178 | * v0.23: changed enet_statistics to net_devive_stats | ||
179 | * v0.90: added multicast filter | ||
180 | * added module support | ||
181 | * changed irq probe to new style | ||
182 | * added PCnetFast chip id | ||
183 | * added fix for receive stalls with Intel saturn chipsets | ||
184 | * added in-place rx skbs like in the tulip driver | ||
185 | * minor cleanups | ||
186 | * v0.91: added PCnetFast+ chip id | ||
187 | * back port to 2.0.x | ||
188 | * v1.00: added some stuff from Donald Becker's 2.0.34 version | ||
189 | * added support for byte counters in net_dev_stats | ||
190 | * v1.01: do ring dumps, only when debugging the driver | ||
191 | * increased the transmit timeout | ||
192 | * v1.02: fixed memory leak in pcnet32_init_ring() | ||
193 | * v1.10: workaround for stopped transmitter | ||
194 | * added port selection for modules | ||
195 | * detect special T1/E1 WAN card and setup port selection | ||
196 | * v1.11: fixed wrong checking of Tx errors | ||
197 | * v1.20: added check of return value kmalloc (cpeterso@cs.washington.edu) | ||
198 | * added save original kmalloc addr for freeing (mcr@solidum.com) | ||
199 | * added support for PCnetHome chip (joe@MIT.EDU) | ||
200 | * rewritten PCI card detection | ||
201 | * added dwio mode to get driver working on some PPC machines | ||
202 | * v1.21: added mii selection and mii ioctl | ||
203 | * v1.22: changed pci scanning code to make PPC people happy | ||
204 | * fixed switching to 32bit mode in pcnet32_open() (thanks | ||
205 | * to Michael Richard <mcr@solidum.com> for noticing this one) | ||
206 | * added sub vendor/device id matching (thanks again to | ||
207 | * Michael Richard <mcr@solidum.com>) | ||
208 | * added chip id for 79c973/975 (thanks to Zach Brown <zab@zabbo.net>) | ||
209 | * v1.23 fixed small bug, when manual selecting MII speed/duplex | ||
210 | * v1.24 Applied Thomas' patch to use TxStartPoint and thus decrease TxFIFO | ||
211 | * underflows. Added tx_start_pt module parameter. Increased | ||
212 | * TX_RING_SIZE from 16 to 32. Added #ifdef'd code to use DXSUFLO | ||
213 | * for FAST[+] chipsets. <kaf@fc.hp.com> | ||
214 | * v1.24ac Added SMP spinlocking - Alan Cox <alan@redhat.com> | ||
215 | * v1.25kf Added No Interrupt on successful Tx for some Tx's <kaf@fc.hp.com> | ||
216 | * v1.26 Converted to pci_alloc_consistent, Jamey Hicks / George France | ||
217 | * <jamey@crl.dec.com> | ||
218 | * - Fixed a few bugs, related to running the controller in 32bit mode. | ||
219 | * 23 Oct, 2000. Carsten Langgaard, carstenl@mips.com | ||
220 | * Copyright (C) 2000 MIPS Technologies, Inc. All rights reserved. | ||
221 | * v1.26p Fix oops on rmmod+insmod; plug i/o resource leak - Paul Gortmaker | ||
222 | * v1.27 improved CSR/PROM address detection, lots of cleanups, | ||
223 | * new pcnet32vlb module option, HP-PARISC support, | ||
224 | * added module parameter descriptions, | ||
225 | * initial ethtool support - Helge Deller <deller@gmx.de> | ||
226 | * v1.27a Sun Feb 10 2002 Go Taniguchi <go@turbolinux.co.jp> | ||
227 | * use alloc_etherdev and register_netdev | ||
228 | * fix pci probe not increment cards_found | ||
229 | * FD auto negotiate error workaround for xSeries250 | ||
230 | * clean up and using new mii module | ||
231 | * v1.27b Sep 30 2002 Kent Yoder <yoder1@us.ibm.com> | ||
232 | * Added timer for cable connection state changes. | ||
233 | * v1.28 20 Feb 2004 Don Fry <brazilnut@us.ibm.com> | ||
234 | * Jon Mason <jonmason@us.ibm.com>, Chinmay Albal <albal@in.ibm.com> | ||
235 | * Now uses ethtool_ops, netif_msg_* and generic_mii_ioctl. | ||
236 | * Fixes bogus 'Bus master arbitration failure', pci_[un]map_single | ||
237 | * length errors, and transmit hangs. Cleans up after errors in open. | ||
238 | * Jim Lewis <jklewis@us.ibm.com> added ethernet loopback test. | ||
239 | * Thomas Munck Steenholdt <tmus@tmus.dk> non-mii ioctl corrections. | ||
240 | * v1.29 6 Apr 2004 Jim Lewis <jklewis@us.ibm.com> added physical | ||
241 | * identification code (blink led's) and register dump. | ||
242 | * Don Fry added timer for 971/972 so skbufs don't remain on tx ring | ||
243 | * forever. | ||
244 | * v1.30 18 May 2004 Don Fry removed timer and Last Transmit Interrupt | ||
245 | * (ltint) as they added complexity and didn't give good throughput. | ||
246 | * v1.30a 22 May 2004 Don Fry limit frames received during interrupt. | ||
247 | * v1.30b 24 May 2004 Don Fry fix bogus tx carrier errors with 79c973, | ||
248 | * assisted by Bruce Penrod <bmpenrod@endruntechnologies.com>. | ||
249 | * v1.30c 25 May 2004 Don Fry added netif_wake_queue after pcnet32_restart. | ||
250 | * v1.30d 01 Jun 2004 Don Fry discard oversize rx packets. | ||
251 | * v1.30e 11 Jun 2004 Don Fry recover after fifo error and rx hang. | ||
252 | * v1.30f 16 Jun 2004 Don Fry cleanup IRQ to allow 0 and 1 for PCI, | ||
253 | * expanding on suggestions from Ralf Baechle <ralf@linux-mips.org>, | ||
254 | * and Brian Murphy <brian@murphy.dk>. | ||
255 | * v1.30g 22 Jun 2004 Patrick Simmons <psimmons@flash.net> added option | ||
256 | * homepna for selecting HomePNA mode for PCNet/Home 79C978. | ||
257 | * v1.30h 24 Jun 2004 Don Fry correctly select auto, speed, duplex in bcr32. | ||
258 | * v1.30i 28 Jun 2004 Don Fry change to use module_param. | ||
259 | * v1.30j 29 Apr 2005 Don Fry fix skb/map leak with loopback test. | ||
260 | * v1.31 02 Sep 2005 Hubert WS Lin <wslin@tw.ibm.c0m> added set_ringparam(). | ||
261 | * v1.31a 12 Sep 2005 Hubert WS Lin <wslin@tw.ibm.c0m> set min ring size to 4 | ||
262 | * to allow loopback test to work unchanged. | ||
263 | * v1.31b 06 Oct 2005 Don Fry changed alloc_ring to show name of device | ||
264 | * if allocation fails | ||
265 | * v1.31c 01 Nov 2005 Don Fry Allied Telesyn 2700/2701 FX are 100Mbit only. | ||
266 | * Force 100Mbit FD if Auto (ASEL) is selected. | ||
267 | * See Bugzilla 2669 and 4551. | ||
268 | */ | ||
269 | |||
270 | |||
271 | /* | ||
272 | * Set the number of Tx and Rx buffers, using Log_2(# buffers). | 159 | * Set the number of Tx and Rx buffers, using Log_2(# buffers). |
273 | * Reasonable default values are 4 Tx buffers, and 16 Rx buffers. | 160 | * Reasonable default values are 4 Tx buffers, and 16 Rx buffers. |
274 | * That translates to 2 (4 == 2^^2) and 4 (16 == 2^^4). | 161 | * That translates to 2 (4 == 2^^2) and 4 (16 == 2^^4). |
@@ -303,42 +190,42 @@ static int homepna[MAX_UNITS]; | |||
303 | 190 | ||
304 | /* The PCNET32 Rx and Tx ring descriptors. */ | 191 | /* The PCNET32 Rx and Tx ring descriptors. */ |
305 | struct pcnet32_rx_head { | 192 | struct pcnet32_rx_head { |
306 | u32 base; | 193 | u32 base; |
307 | s16 buf_length; | 194 | s16 buf_length; |
308 | s16 status; | 195 | s16 status; |
309 | u32 msg_length; | 196 | u32 msg_length; |
310 | u32 reserved; | 197 | u32 reserved; |
311 | }; | 198 | }; |
312 | 199 | ||
313 | struct pcnet32_tx_head { | 200 | struct pcnet32_tx_head { |
314 | u32 base; | 201 | u32 base; |
315 | s16 length; | 202 | s16 length; |
316 | s16 status; | 203 | s16 status; |
317 | u32 misc; | 204 | u32 misc; |
318 | u32 reserved; | 205 | u32 reserved; |
319 | }; | 206 | }; |
320 | 207 | ||
321 | /* The PCNET32 32-Bit initialization block, described in databook. */ | 208 | /* The PCNET32 32-Bit initialization block, described in databook. */ |
322 | struct pcnet32_init_block { | 209 | struct pcnet32_init_block { |
323 | u16 mode; | 210 | u16 mode; |
324 | u16 tlen_rlen; | 211 | u16 tlen_rlen; |
325 | u8 phys_addr[6]; | 212 | u8 phys_addr[6]; |
326 | u16 reserved; | 213 | u16 reserved; |
327 | u32 filter[2]; | 214 | u32 filter[2]; |
328 | /* Receive and transmit ring base, along with extra bits. */ | 215 | /* Receive and transmit ring base, along with extra bits. */ |
329 | u32 rx_ring; | 216 | u32 rx_ring; |
330 | u32 tx_ring; | 217 | u32 tx_ring; |
331 | }; | 218 | }; |
332 | 219 | ||
333 | /* PCnet32 access functions */ | 220 | /* PCnet32 access functions */ |
334 | struct pcnet32_access { | 221 | struct pcnet32_access { |
335 | u16 (*read_csr)(unsigned long, int); | 222 | u16 (*read_csr) (unsigned long, int); |
336 | void (*write_csr)(unsigned long, int, u16); | 223 | void (*write_csr) (unsigned long, int, u16); |
337 | u16 (*read_bcr)(unsigned long, int); | 224 | u16 (*read_bcr) (unsigned long, int); |
338 | void (*write_bcr)(unsigned long, int, u16); | 225 | void (*write_bcr) (unsigned long, int, u16); |
339 | u16 (*read_rap)(unsigned long); | 226 | u16 (*read_rap) (unsigned long); |
340 | void (*write_rap)(unsigned long, u16); | 227 | void (*write_rap) (unsigned long, u16); |
341 | void (*reset)(unsigned long); | 228 | void (*reset) (unsigned long); |
342 | }; | 229 | }; |
343 | 230 | ||
344 | /* | 231 | /* |
@@ -346,760 +233,794 @@ struct pcnet32_access { | |||
346 | * so the structure should be allocated using pci_alloc_consistent(). | 233 | * so the structure should be allocated using pci_alloc_consistent(). |
347 | */ | 234 | */ |
348 | struct pcnet32_private { | 235 | struct pcnet32_private { |
349 | struct pcnet32_init_block init_block; | 236 | struct pcnet32_init_block init_block; |
350 | /* The Tx and Rx ring entries must be aligned on 16-byte boundaries in 32bit mode. */ | 237 | /* The Tx and Rx ring entries must be aligned on 16-byte boundaries in 32bit mode. */ |
351 | struct pcnet32_rx_head *rx_ring; | 238 | struct pcnet32_rx_head *rx_ring; |
352 | struct pcnet32_tx_head *tx_ring; | 239 | struct pcnet32_tx_head *tx_ring; |
353 | dma_addr_t dma_addr; /* DMA address of beginning of this | 240 | dma_addr_t dma_addr;/* DMA address of beginning of this |
354 | object, returned by | 241 | object, returned by pci_alloc_consistent */ |
355 | pci_alloc_consistent */ | 242 | struct pci_dev *pci_dev; |
356 | struct pci_dev *pci_dev; /* Pointer to the associated pci device | 243 | const char *name; |
357 | structure */ | 244 | /* The saved address of a sent-in-place packet/buffer, for skfree(). */ |
358 | const char *name; | 245 | struct sk_buff **tx_skbuff; |
359 | /* The saved address of a sent-in-place packet/buffer, for skfree(). */ | 246 | struct sk_buff **rx_skbuff; |
360 | struct sk_buff **tx_skbuff; | 247 | dma_addr_t *tx_dma_addr; |
361 | struct sk_buff **rx_skbuff; | 248 | dma_addr_t *rx_dma_addr; |
362 | dma_addr_t *tx_dma_addr; | 249 | struct pcnet32_access a; |
363 | dma_addr_t *rx_dma_addr; | 250 | spinlock_t lock; /* Guard lock */ |
364 | struct pcnet32_access a; | 251 | unsigned int cur_rx, cur_tx; /* The next free ring entry */ |
365 | spinlock_t lock; /* Guard lock */ | 252 | unsigned int rx_ring_size; /* current rx ring size */ |
366 | unsigned int cur_rx, cur_tx; /* The next free ring entry */ | 253 | unsigned int tx_ring_size; /* current tx ring size */ |
367 | unsigned int rx_ring_size; /* current rx ring size */ | 254 | unsigned int rx_mod_mask; /* rx ring modular mask */ |
368 | unsigned int tx_ring_size; /* current tx ring size */ | 255 | unsigned int tx_mod_mask; /* tx ring modular mask */ |
369 | unsigned int rx_mod_mask; /* rx ring modular mask */ | 256 | unsigned short rx_len_bits; |
370 | unsigned int tx_mod_mask; /* tx ring modular mask */ | 257 | unsigned short tx_len_bits; |
371 | unsigned short rx_len_bits; | 258 | dma_addr_t rx_ring_dma_addr; |
372 | unsigned short tx_len_bits; | 259 | dma_addr_t tx_ring_dma_addr; |
373 | dma_addr_t rx_ring_dma_addr; | 260 | unsigned int dirty_rx, /* ring entries to be freed. */ |
374 | dma_addr_t tx_ring_dma_addr; | 261 | dirty_tx; |
375 | unsigned int dirty_rx, dirty_tx; /* The ring entries to be free()ed. */ | 262 | |
376 | struct net_device_stats stats; | 263 | struct net_device_stats stats; |
377 | char tx_full; | 264 | char tx_full; |
378 | int options; | 265 | char phycount; /* number of phys found */ |
379 | unsigned int shared_irq:1, /* shared irq possible */ | 266 | int options; |
380 | dxsuflo:1, /* disable transmit stop on uflo */ | 267 | unsigned int shared_irq:1, /* shared irq possible */ |
381 | mii:1; /* mii port available */ | 268 | dxsuflo:1, /* disable transmit stop on uflo */ |
382 | struct net_device *next; | 269 | mii:1; /* mii port available */ |
383 | struct mii_if_info mii_if; | 270 | struct net_device *next; |
384 | struct timer_list watchdog_timer; | 271 | struct mii_if_info mii_if; |
385 | struct timer_list blink_timer; | 272 | struct timer_list watchdog_timer; |
386 | u32 msg_enable; /* debug message level */ | 273 | struct timer_list blink_timer; |
274 | u32 msg_enable; /* debug message level */ | ||
275 | |||
276 | /* each bit indicates an available PHY */ | ||
277 | u32 phymask; | ||
387 | }; | 278 | }; |
388 | 279 | ||
389 | static void pcnet32_probe_vlbus(void); | 280 | static void pcnet32_probe_vlbus(void); |
390 | static int pcnet32_probe_pci(struct pci_dev *, const struct pci_device_id *); | 281 | static int pcnet32_probe_pci(struct pci_dev *, const struct pci_device_id *); |
391 | static int pcnet32_probe1(unsigned long, int, struct pci_dev *); | 282 | static int pcnet32_probe1(unsigned long, int, struct pci_dev *); |
392 | static int pcnet32_open(struct net_device *); | 283 | static int pcnet32_open(struct net_device *); |
393 | static int pcnet32_init_ring(struct net_device *); | 284 | static int pcnet32_init_ring(struct net_device *); |
394 | static int pcnet32_start_xmit(struct sk_buff *, struct net_device *); | 285 | static int pcnet32_start_xmit(struct sk_buff *, struct net_device *); |
395 | static int pcnet32_rx(struct net_device *); | 286 | static int pcnet32_rx(struct net_device *); |
396 | static void pcnet32_tx_timeout (struct net_device *dev); | 287 | static void pcnet32_tx_timeout(struct net_device *dev); |
397 | static irqreturn_t pcnet32_interrupt(int, void *, struct pt_regs *); | 288 | static irqreturn_t pcnet32_interrupt(int, void *, struct pt_regs *); |
398 | static int pcnet32_close(struct net_device *); | 289 | static int pcnet32_close(struct net_device *); |
399 | static struct net_device_stats *pcnet32_get_stats(struct net_device *); | 290 | static struct net_device_stats *pcnet32_get_stats(struct net_device *); |
400 | static void pcnet32_load_multicast(struct net_device *dev); | 291 | static void pcnet32_load_multicast(struct net_device *dev); |
401 | static void pcnet32_set_multicast_list(struct net_device *); | 292 | static void pcnet32_set_multicast_list(struct net_device *); |
402 | static int pcnet32_ioctl(struct net_device *, struct ifreq *, int); | 293 | static int pcnet32_ioctl(struct net_device *, struct ifreq *, int); |
403 | static void pcnet32_watchdog(struct net_device *); | 294 | static void pcnet32_watchdog(struct net_device *); |
404 | static int mdio_read(struct net_device *dev, int phy_id, int reg_num); | 295 | static int mdio_read(struct net_device *dev, int phy_id, int reg_num); |
405 | static void mdio_write(struct net_device *dev, int phy_id, int reg_num, int val); | 296 | static void mdio_write(struct net_device *dev, int phy_id, int reg_num, |
297 | int val); | ||
406 | static void pcnet32_restart(struct net_device *dev, unsigned int csr0_bits); | 298 | static void pcnet32_restart(struct net_device *dev, unsigned int csr0_bits); |
407 | static void pcnet32_ethtool_test(struct net_device *dev, | 299 | static void pcnet32_ethtool_test(struct net_device *dev, |
408 | struct ethtool_test *eth_test, u64 *data); | 300 | struct ethtool_test *eth_test, u64 * data); |
409 | static int pcnet32_loopback_test(struct net_device *dev, uint64_t *data1); | 301 | static int pcnet32_loopback_test(struct net_device *dev, uint64_t * data1); |
410 | static int pcnet32_phys_id(struct net_device *dev, u32 data); | 302 | static int pcnet32_phys_id(struct net_device *dev, u32 data); |
411 | static void pcnet32_led_blink_callback(struct net_device *dev); | 303 | static void pcnet32_led_blink_callback(struct net_device *dev); |
412 | static int pcnet32_get_regs_len(struct net_device *dev); | 304 | static int pcnet32_get_regs_len(struct net_device *dev); |
413 | static void pcnet32_get_regs(struct net_device *dev, struct ethtool_regs *regs, | 305 | static void pcnet32_get_regs(struct net_device *dev, struct ethtool_regs *regs, |
414 | void *ptr); | 306 | void *ptr); |
415 | static void pcnet32_purge_tx_ring(struct net_device *dev); | 307 | static void pcnet32_purge_tx_ring(struct net_device *dev); |
416 | static int pcnet32_alloc_ring(struct net_device *dev, char *name); | 308 | static int pcnet32_alloc_ring(struct net_device *dev, char *name); |
417 | static void pcnet32_free_ring(struct net_device *dev); | 309 | static void pcnet32_free_ring(struct net_device *dev); |
418 | 310 | static void pcnet32_check_media(struct net_device *dev, int verbose); | |
419 | 311 | ||
420 | enum pci_flags_bit { | 312 | enum pci_flags_bit { |
421 | PCI_USES_IO=1, PCI_USES_MEM=2, PCI_USES_MASTER=4, | 313 | PCI_USES_IO = 1, PCI_USES_MEM = 2, PCI_USES_MASTER = 4, |
422 | PCI_ADDR0=0x10<<0, PCI_ADDR1=0x10<<1, PCI_ADDR2=0x10<<2, PCI_ADDR3=0x10<<3, | 314 | PCI_ADDR0 = 0x10 << 0, PCI_ADDR1 = 0x10 << 1, PCI_ADDR2 = |
315 | 0x10 << 2, PCI_ADDR3 = 0x10 << 3, | ||
423 | }; | 316 | }; |
424 | 317 | ||
425 | 318 | static u16 pcnet32_wio_read_csr(unsigned long addr, int index) | |
426 | static u16 pcnet32_wio_read_csr (unsigned long addr, int index) | ||
427 | { | 319 | { |
428 | outw (index, addr+PCNET32_WIO_RAP); | 320 | outw(index, addr + PCNET32_WIO_RAP); |
429 | return inw (addr+PCNET32_WIO_RDP); | 321 | return inw(addr + PCNET32_WIO_RDP); |
430 | } | 322 | } |
431 | 323 | ||
432 | static void pcnet32_wio_write_csr (unsigned long addr, int index, u16 val) | 324 | static void pcnet32_wio_write_csr(unsigned long addr, int index, u16 val) |
433 | { | 325 | { |
434 | outw (index, addr+PCNET32_WIO_RAP); | 326 | outw(index, addr + PCNET32_WIO_RAP); |
435 | outw (val, addr+PCNET32_WIO_RDP); | 327 | outw(val, addr + PCNET32_WIO_RDP); |
436 | } | 328 | } |
437 | 329 | ||
438 | static u16 pcnet32_wio_read_bcr (unsigned long addr, int index) | 330 | static u16 pcnet32_wio_read_bcr(unsigned long addr, int index) |
439 | { | 331 | { |
440 | outw (index, addr+PCNET32_WIO_RAP); | 332 | outw(index, addr + PCNET32_WIO_RAP); |
441 | return inw (addr+PCNET32_WIO_BDP); | 333 | return inw(addr + PCNET32_WIO_BDP); |
442 | } | 334 | } |
443 | 335 | ||
444 | static void pcnet32_wio_write_bcr (unsigned long addr, int index, u16 val) | 336 | static void pcnet32_wio_write_bcr(unsigned long addr, int index, u16 val) |
445 | { | 337 | { |
446 | outw (index, addr+PCNET32_WIO_RAP); | 338 | outw(index, addr + PCNET32_WIO_RAP); |
447 | outw (val, addr+PCNET32_WIO_BDP); | 339 | outw(val, addr + PCNET32_WIO_BDP); |
448 | } | 340 | } |
449 | 341 | ||
450 | static u16 pcnet32_wio_read_rap (unsigned long addr) | 342 | static u16 pcnet32_wio_read_rap(unsigned long addr) |
451 | { | 343 | { |
452 | return inw (addr+PCNET32_WIO_RAP); | 344 | return inw(addr + PCNET32_WIO_RAP); |
453 | } | 345 | } |
454 | 346 | ||
455 | static void pcnet32_wio_write_rap (unsigned long addr, u16 val) | 347 | static void pcnet32_wio_write_rap(unsigned long addr, u16 val) |
456 | { | 348 | { |
457 | outw (val, addr+PCNET32_WIO_RAP); | 349 | outw(val, addr + PCNET32_WIO_RAP); |
458 | } | 350 | } |
459 | 351 | ||
460 | static void pcnet32_wio_reset (unsigned long addr) | 352 | static void pcnet32_wio_reset(unsigned long addr) |
461 | { | 353 | { |
462 | inw (addr+PCNET32_WIO_RESET); | 354 | inw(addr + PCNET32_WIO_RESET); |
463 | } | 355 | } |
464 | 356 | ||
465 | static int pcnet32_wio_check (unsigned long addr) | 357 | static int pcnet32_wio_check(unsigned long addr) |
466 | { | 358 | { |
467 | outw (88, addr+PCNET32_WIO_RAP); | 359 | outw(88, addr + PCNET32_WIO_RAP); |
468 | return (inw (addr+PCNET32_WIO_RAP) == 88); | 360 | return (inw(addr + PCNET32_WIO_RAP) == 88); |
469 | } | 361 | } |
470 | 362 | ||
471 | static struct pcnet32_access pcnet32_wio = { | 363 | static struct pcnet32_access pcnet32_wio = { |
472 | .read_csr = pcnet32_wio_read_csr, | 364 | .read_csr = pcnet32_wio_read_csr, |
473 | .write_csr = pcnet32_wio_write_csr, | 365 | .write_csr = pcnet32_wio_write_csr, |
474 | .read_bcr = pcnet32_wio_read_bcr, | 366 | .read_bcr = pcnet32_wio_read_bcr, |
475 | .write_bcr = pcnet32_wio_write_bcr, | 367 | .write_bcr = pcnet32_wio_write_bcr, |
476 | .read_rap = pcnet32_wio_read_rap, | 368 | .read_rap = pcnet32_wio_read_rap, |
477 | .write_rap = pcnet32_wio_write_rap, | 369 | .write_rap = pcnet32_wio_write_rap, |
478 | .reset = pcnet32_wio_reset | 370 | .reset = pcnet32_wio_reset |
479 | }; | 371 | }; |
480 | 372 | ||
481 | static u16 pcnet32_dwio_read_csr (unsigned long addr, int index) | 373 | static u16 pcnet32_dwio_read_csr(unsigned long addr, int index) |
482 | { | 374 | { |
483 | outl (index, addr+PCNET32_DWIO_RAP); | 375 | outl(index, addr + PCNET32_DWIO_RAP); |
484 | return (inl (addr+PCNET32_DWIO_RDP) & 0xffff); | 376 | return (inl(addr + PCNET32_DWIO_RDP) & 0xffff); |
485 | } | 377 | } |
486 | 378 | ||
487 | static void pcnet32_dwio_write_csr (unsigned long addr, int index, u16 val) | 379 | static void pcnet32_dwio_write_csr(unsigned long addr, int index, u16 val) |
488 | { | 380 | { |
489 | outl (index, addr+PCNET32_DWIO_RAP); | 381 | outl(index, addr + PCNET32_DWIO_RAP); |
490 | outl (val, addr+PCNET32_DWIO_RDP); | 382 | outl(val, addr + PCNET32_DWIO_RDP); |
491 | } | 383 | } |
492 | 384 | ||
493 | static u16 pcnet32_dwio_read_bcr (unsigned long addr, int index) | 385 | static u16 pcnet32_dwio_read_bcr(unsigned long addr, int index) |
494 | { | 386 | { |
495 | outl (index, addr+PCNET32_DWIO_RAP); | 387 | outl(index, addr + PCNET32_DWIO_RAP); |
496 | return (inl (addr+PCNET32_DWIO_BDP) & 0xffff); | 388 | return (inl(addr + PCNET32_DWIO_BDP) & 0xffff); |
497 | } | 389 | } |
498 | 390 | ||
499 | static void pcnet32_dwio_write_bcr (unsigned long addr, int index, u16 val) | 391 | static void pcnet32_dwio_write_bcr(unsigned long addr, int index, u16 val) |
500 | { | 392 | { |
501 | outl (index, addr+PCNET32_DWIO_RAP); | 393 | outl(index, addr + PCNET32_DWIO_RAP); |
502 | outl (val, addr+PCNET32_DWIO_BDP); | 394 | outl(val, addr + PCNET32_DWIO_BDP); |
503 | } | 395 | } |
504 | 396 | ||
505 | static u16 pcnet32_dwio_read_rap (unsigned long addr) | 397 | static u16 pcnet32_dwio_read_rap(unsigned long addr) |
506 | { | 398 | { |
507 | return (inl (addr+PCNET32_DWIO_RAP) & 0xffff); | 399 | return (inl(addr + PCNET32_DWIO_RAP) & 0xffff); |
508 | } | 400 | } |
509 | 401 | ||
510 | static void pcnet32_dwio_write_rap (unsigned long addr, u16 val) | 402 | static void pcnet32_dwio_write_rap(unsigned long addr, u16 val) |
511 | { | 403 | { |
512 | outl (val, addr+PCNET32_DWIO_RAP); | 404 | outl(val, addr + PCNET32_DWIO_RAP); |
513 | } | 405 | } |
514 | 406 | ||
515 | static void pcnet32_dwio_reset (unsigned long addr) | 407 | static void pcnet32_dwio_reset(unsigned long addr) |
516 | { | 408 | { |
517 | inl (addr+PCNET32_DWIO_RESET); | 409 | inl(addr + PCNET32_DWIO_RESET); |
518 | } | 410 | } |
519 | 411 | ||
520 | static int pcnet32_dwio_check (unsigned long addr) | 412 | static int pcnet32_dwio_check(unsigned long addr) |
521 | { | 413 | { |
522 | outl (88, addr+PCNET32_DWIO_RAP); | 414 | outl(88, addr + PCNET32_DWIO_RAP); |
523 | return ((inl (addr+PCNET32_DWIO_RAP) & 0xffff) == 88); | 415 | return ((inl(addr + PCNET32_DWIO_RAP) & 0xffff) == 88); |
524 | } | 416 | } |
525 | 417 | ||
526 | static struct pcnet32_access pcnet32_dwio = { | 418 | static struct pcnet32_access pcnet32_dwio = { |
527 | .read_csr = pcnet32_dwio_read_csr, | 419 | .read_csr = pcnet32_dwio_read_csr, |
528 | .write_csr = pcnet32_dwio_write_csr, | 420 | .write_csr = pcnet32_dwio_write_csr, |
529 | .read_bcr = pcnet32_dwio_read_bcr, | 421 | .read_bcr = pcnet32_dwio_read_bcr, |
530 | .write_bcr = pcnet32_dwio_write_bcr, | 422 | .write_bcr = pcnet32_dwio_write_bcr, |
531 | .read_rap = pcnet32_dwio_read_rap, | 423 | .read_rap = pcnet32_dwio_read_rap, |
532 | .write_rap = pcnet32_dwio_write_rap, | 424 | .write_rap = pcnet32_dwio_write_rap, |
533 | .reset = pcnet32_dwio_reset | 425 | .reset = pcnet32_dwio_reset |
534 | }; | 426 | }; |
535 | 427 | ||
536 | #ifdef CONFIG_NET_POLL_CONTROLLER | 428 | #ifdef CONFIG_NET_POLL_CONTROLLER |
537 | static void pcnet32_poll_controller(struct net_device *dev) | 429 | static void pcnet32_poll_controller(struct net_device *dev) |
538 | { | 430 | { |
539 | disable_irq(dev->irq); | 431 | disable_irq(dev->irq); |
540 | pcnet32_interrupt(0, dev, NULL); | 432 | pcnet32_interrupt(0, dev, NULL); |
541 | enable_irq(dev->irq); | 433 | enable_irq(dev->irq); |
542 | } | 434 | } |
543 | #endif | 435 | #endif |
544 | 436 | ||
545 | |||
546 | static int pcnet32_get_settings(struct net_device *dev, struct ethtool_cmd *cmd) | 437 | static int pcnet32_get_settings(struct net_device *dev, struct ethtool_cmd *cmd) |
547 | { | 438 | { |
548 | struct pcnet32_private *lp = dev->priv; | 439 | struct pcnet32_private *lp = dev->priv; |
549 | unsigned long flags; | 440 | unsigned long flags; |
550 | int r = -EOPNOTSUPP; | 441 | int r = -EOPNOTSUPP; |
551 | 442 | ||
552 | if (lp->mii) { | 443 | if (lp->mii) { |
553 | spin_lock_irqsave(&lp->lock, flags); | 444 | spin_lock_irqsave(&lp->lock, flags); |
554 | mii_ethtool_gset(&lp->mii_if, cmd); | 445 | mii_ethtool_gset(&lp->mii_if, cmd); |
555 | spin_unlock_irqrestore(&lp->lock, flags); | 446 | spin_unlock_irqrestore(&lp->lock, flags); |
556 | r = 0; | 447 | r = 0; |
557 | } | 448 | } |
558 | return r; | 449 | return r; |
559 | } | 450 | } |
560 | 451 | ||
561 | static int pcnet32_set_settings(struct net_device *dev, struct ethtool_cmd *cmd) | 452 | static int pcnet32_set_settings(struct net_device *dev, struct ethtool_cmd *cmd) |
562 | { | 453 | { |
563 | struct pcnet32_private *lp = dev->priv; | 454 | struct pcnet32_private *lp = dev->priv; |
564 | unsigned long flags; | 455 | unsigned long flags; |
565 | int r = -EOPNOTSUPP; | 456 | int r = -EOPNOTSUPP; |
566 | 457 | ||
567 | if (lp->mii) { | 458 | if (lp->mii) { |
568 | spin_lock_irqsave(&lp->lock, flags); | 459 | spin_lock_irqsave(&lp->lock, flags); |
569 | r = mii_ethtool_sset(&lp->mii_if, cmd); | 460 | r = mii_ethtool_sset(&lp->mii_if, cmd); |
570 | spin_unlock_irqrestore(&lp->lock, flags); | 461 | spin_unlock_irqrestore(&lp->lock, flags); |
571 | } | 462 | } |
572 | return r; | 463 | return r; |
573 | } | 464 | } |
574 | 465 | ||
575 | static void pcnet32_get_drvinfo(struct net_device *dev, struct ethtool_drvinfo *info) | 466 | static void pcnet32_get_drvinfo(struct net_device *dev, |
467 | struct ethtool_drvinfo *info) | ||
576 | { | 468 | { |
577 | struct pcnet32_private *lp = dev->priv; | 469 | struct pcnet32_private *lp = dev->priv; |
578 | 470 | ||
579 | strcpy (info->driver, DRV_NAME); | 471 | strcpy(info->driver, DRV_NAME); |
580 | strcpy (info->version, DRV_VERSION); | 472 | strcpy(info->version, DRV_VERSION); |
581 | if (lp->pci_dev) | 473 | if (lp->pci_dev) |
582 | strcpy (info->bus_info, pci_name(lp->pci_dev)); | 474 | strcpy(info->bus_info, pci_name(lp->pci_dev)); |
583 | else | 475 | else |
584 | sprintf(info->bus_info, "VLB 0x%lx", dev->base_addr); | 476 | sprintf(info->bus_info, "VLB 0x%lx", dev->base_addr); |
585 | } | 477 | } |
586 | 478 | ||
587 | static u32 pcnet32_get_link(struct net_device *dev) | 479 | static u32 pcnet32_get_link(struct net_device *dev) |
588 | { | 480 | { |
589 | struct pcnet32_private *lp = dev->priv; | 481 | struct pcnet32_private *lp = dev->priv; |
590 | unsigned long flags; | 482 | unsigned long flags; |
591 | int r; | 483 | int r; |
592 | |||
593 | spin_lock_irqsave(&lp->lock, flags); | ||
594 | if (lp->mii) { | ||
595 | r = mii_link_ok(&lp->mii_if); | ||
596 | } else { | ||
597 | ulong ioaddr = dev->base_addr; /* card base I/O address */ | ||
598 | r = (lp->a.read_bcr(ioaddr, 4) != 0xc0); | ||
599 | } | ||
600 | spin_unlock_irqrestore(&lp->lock, flags); | ||
601 | 484 | ||
602 | return r; | 485 | spin_lock_irqsave(&lp->lock, flags); |
486 | if (lp->mii) { | ||
487 | r = mii_link_ok(&lp->mii_if); | ||
488 | } else { | ||
489 | ulong ioaddr = dev->base_addr; /* card base I/O address */ | ||
490 | r = (lp->a.read_bcr(ioaddr, 4) != 0xc0); | ||
491 | } | ||
492 | spin_unlock_irqrestore(&lp->lock, flags); | ||
493 | |||
494 | return r; | ||
603 | } | 495 | } |
604 | 496 | ||
605 | static u32 pcnet32_get_msglevel(struct net_device *dev) | 497 | static u32 pcnet32_get_msglevel(struct net_device *dev) |
606 | { | 498 | { |
607 | struct pcnet32_private *lp = dev->priv; | 499 | struct pcnet32_private *lp = dev->priv; |
608 | return lp->msg_enable; | 500 | return lp->msg_enable; |
609 | } | 501 | } |
610 | 502 | ||
611 | static void pcnet32_set_msglevel(struct net_device *dev, u32 value) | 503 | static void pcnet32_set_msglevel(struct net_device *dev, u32 value) |
612 | { | 504 | { |
613 | struct pcnet32_private *lp = dev->priv; | 505 | struct pcnet32_private *lp = dev->priv; |
614 | lp->msg_enable = value; | 506 | lp->msg_enable = value; |
615 | } | 507 | } |
616 | 508 | ||
617 | static int pcnet32_nway_reset(struct net_device *dev) | 509 | static int pcnet32_nway_reset(struct net_device *dev) |
618 | { | 510 | { |
619 | struct pcnet32_private *lp = dev->priv; | 511 | struct pcnet32_private *lp = dev->priv; |
620 | unsigned long flags; | 512 | unsigned long flags; |
621 | int r = -EOPNOTSUPP; | 513 | int r = -EOPNOTSUPP; |
622 | 514 | ||
623 | if (lp->mii) { | 515 | if (lp->mii) { |
624 | spin_lock_irqsave(&lp->lock, flags); | 516 | spin_lock_irqsave(&lp->lock, flags); |
625 | r = mii_nway_restart(&lp->mii_if); | 517 | r = mii_nway_restart(&lp->mii_if); |
626 | spin_unlock_irqrestore(&lp->lock, flags); | 518 | spin_unlock_irqrestore(&lp->lock, flags); |
627 | } | 519 | } |
628 | return r; | 520 | return r; |
629 | } | 521 | } |
630 | 522 | ||
631 | static void pcnet32_get_ringparam(struct net_device *dev, struct ethtool_ringparam *ering) | 523 | static void pcnet32_get_ringparam(struct net_device *dev, |
524 | struct ethtool_ringparam *ering) | ||
632 | { | 525 | { |
633 | struct pcnet32_private *lp = dev->priv; | 526 | struct pcnet32_private *lp = dev->priv; |
634 | 527 | ||
635 | ering->tx_max_pending = TX_MAX_RING_SIZE - 1; | 528 | ering->tx_max_pending = TX_MAX_RING_SIZE - 1; |
636 | ering->tx_pending = lp->tx_ring_size - 1; | 529 | ering->tx_pending = lp->tx_ring_size - 1; |
637 | ering->rx_max_pending = RX_MAX_RING_SIZE - 1; | 530 | ering->rx_max_pending = RX_MAX_RING_SIZE - 1; |
638 | ering->rx_pending = lp->rx_ring_size - 1; | 531 | ering->rx_pending = lp->rx_ring_size - 1; |
639 | } | 532 | } |
640 | 533 | ||
641 | static int pcnet32_set_ringparam(struct net_device *dev, struct ethtool_ringparam *ering) | 534 | static int pcnet32_set_ringparam(struct net_device *dev, |
535 | struct ethtool_ringparam *ering) | ||
642 | { | 536 | { |
643 | struct pcnet32_private *lp = dev->priv; | 537 | struct pcnet32_private *lp = dev->priv; |
644 | unsigned long flags; | 538 | unsigned long flags; |
645 | int i; | 539 | int i; |
646 | 540 | ||
647 | if (ering->rx_mini_pending || ering->rx_jumbo_pending) | 541 | if (ering->rx_mini_pending || ering->rx_jumbo_pending) |
648 | return -EINVAL; | 542 | return -EINVAL; |
649 | 543 | ||
650 | if (netif_running(dev)) | 544 | if (netif_running(dev)) |
651 | pcnet32_close(dev); | 545 | pcnet32_close(dev); |
652 | 546 | ||
653 | spin_lock_irqsave(&lp->lock, flags); | 547 | spin_lock_irqsave(&lp->lock, flags); |
654 | pcnet32_free_ring(dev); | ||
655 | lp->tx_ring_size = min(ering->tx_pending, (unsigned int) TX_MAX_RING_SIZE); | ||
656 | lp->rx_ring_size = min(ering->rx_pending, (unsigned int) RX_MAX_RING_SIZE); | ||
657 | |||
658 | /* set the minimum ring size to 4, to allow the loopback test to work | ||
659 | * unchanged. | ||
660 | */ | ||
661 | for (i = 2; i <= PCNET32_LOG_MAX_TX_BUFFERS; i++) { | ||
662 | if (lp->tx_ring_size <= (1 << i)) | ||
663 | break; | ||
664 | } | ||
665 | lp->tx_ring_size = (1 << i); | ||
666 | lp->tx_mod_mask = lp->tx_ring_size - 1; | ||
667 | lp->tx_len_bits = (i << 12); | ||
668 | |||
669 | for (i = 2; i <= PCNET32_LOG_MAX_RX_BUFFERS; i++) { | ||
670 | if (lp->rx_ring_size <= (1 << i)) | ||
671 | break; | ||
672 | } | ||
673 | lp->rx_ring_size = (1 << i); | ||
674 | lp->rx_mod_mask = lp->rx_ring_size - 1; | ||
675 | lp->rx_len_bits = (i << 4); | ||
676 | |||
677 | if (pcnet32_alloc_ring(dev, dev->name)) { | ||
678 | pcnet32_free_ring(dev); | 548 | pcnet32_free_ring(dev); |
679 | spin_unlock_irqrestore(&lp->lock, flags); | 549 | lp->tx_ring_size = |
680 | return -ENOMEM; | 550 | min(ering->tx_pending, (unsigned int)TX_MAX_RING_SIZE); |
681 | } | 551 | lp->rx_ring_size = |
552 | min(ering->rx_pending, (unsigned int)RX_MAX_RING_SIZE); | ||
553 | |||
554 | /* set the minimum ring size to 4, to allow the loopback test to work | ||
555 | * unchanged. | ||
556 | */ | ||
557 | for (i = 2; i <= PCNET32_LOG_MAX_TX_BUFFERS; i++) { | ||
558 | if (lp->tx_ring_size <= (1 << i)) | ||
559 | break; | ||
560 | } | ||
561 | lp->tx_ring_size = (1 << i); | ||
562 | lp->tx_mod_mask = lp->tx_ring_size - 1; | ||
563 | lp->tx_len_bits = (i << 12); | ||
682 | 564 | ||
683 | spin_unlock_irqrestore(&lp->lock, flags); | 565 | for (i = 2; i <= PCNET32_LOG_MAX_RX_BUFFERS; i++) { |
566 | if (lp->rx_ring_size <= (1 << i)) | ||
567 | break; | ||
568 | } | ||
569 | lp->rx_ring_size = (1 << i); | ||
570 | lp->rx_mod_mask = lp->rx_ring_size - 1; | ||
571 | lp->rx_len_bits = (i << 4); | ||
572 | |||
573 | if (pcnet32_alloc_ring(dev, dev->name)) { | ||
574 | pcnet32_free_ring(dev); | ||
575 | spin_unlock_irqrestore(&lp->lock, flags); | ||
576 | return -ENOMEM; | ||
577 | } | ||
684 | 578 | ||
685 | if (pcnet32_debug & NETIF_MSG_DRV) | 579 | spin_unlock_irqrestore(&lp->lock, flags); |
686 | printk(KERN_INFO PFX "%s: Ring Param Settings: RX: %d, TX: %d\n", | ||
687 | dev->name, lp->rx_ring_size, lp->tx_ring_size); | ||
688 | 580 | ||
689 | if (netif_running(dev)) | 581 | if (pcnet32_debug & NETIF_MSG_DRV) |
690 | pcnet32_open(dev); | 582 | printk(KERN_INFO PFX |
583 | "%s: Ring Param Settings: RX: %d, TX: %d\n", dev->name, | ||
584 | lp->rx_ring_size, lp->tx_ring_size); | ||
691 | 585 | ||
692 | return 0; | 586 | if (netif_running(dev)) |
587 | pcnet32_open(dev); | ||
588 | |||
589 | return 0; | ||
693 | } | 590 | } |
694 | 591 | ||
695 | static void pcnet32_get_strings(struct net_device *dev, u32 stringset, u8 *data) | 592 | static void pcnet32_get_strings(struct net_device *dev, u32 stringset, |
593 | u8 * data) | ||
696 | { | 594 | { |
697 | memcpy(data, pcnet32_gstrings_test, sizeof(pcnet32_gstrings_test)); | 595 | memcpy(data, pcnet32_gstrings_test, sizeof(pcnet32_gstrings_test)); |
698 | } | 596 | } |
699 | 597 | ||
700 | static int pcnet32_self_test_count(struct net_device *dev) | 598 | static int pcnet32_self_test_count(struct net_device *dev) |
701 | { | 599 | { |
702 | return PCNET32_TEST_LEN; | 600 | return PCNET32_TEST_LEN; |
703 | } | 601 | } |
704 | 602 | ||
705 | static void pcnet32_ethtool_test(struct net_device *dev, | 603 | static void pcnet32_ethtool_test(struct net_device *dev, |
706 | struct ethtool_test *test, u64 *data) | 604 | struct ethtool_test *test, u64 * data) |
707 | { | 605 | { |
708 | struct pcnet32_private *lp = dev->priv; | 606 | struct pcnet32_private *lp = dev->priv; |
709 | int rc; | 607 | int rc; |
710 | 608 | ||
711 | if (test->flags == ETH_TEST_FL_OFFLINE) { | 609 | if (test->flags == ETH_TEST_FL_OFFLINE) { |
712 | rc = pcnet32_loopback_test(dev, data); | 610 | rc = pcnet32_loopback_test(dev, data); |
713 | if (rc) { | 611 | if (rc) { |
714 | if (netif_msg_hw(lp)) | 612 | if (netif_msg_hw(lp)) |
715 | printk(KERN_DEBUG "%s: Loopback test failed.\n", dev->name); | 613 | printk(KERN_DEBUG "%s: Loopback test failed.\n", |
716 | test->flags |= ETH_TEST_FL_FAILED; | 614 | dev->name); |
615 | test->flags |= ETH_TEST_FL_FAILED; | ||
616 | } else if (netif_msg_hw(lp)) | ||
617 | printk(KERN_DEBUG "%s: Loopback test passed.\n", | ||
618 | dev->name); | ||
717 | } else if (netif_msg_hw(lp)) | 619 | } else if (netif_msg_hw(lp)) |
718 | printk(KERN_DEBUG "%s: Loopback test passed.\n", dev->name); | 620 | printk(KERN_DEBUG |
719 | } else if (netif_msg_hw(lp)) | 621 | "%s: No tests to run (specify 'Offline' on ethtool).", |
720 | printk(KERN_DEBUG "%s: No tests to run (specify 'Offline' on ethtool).", dev->name); | 622 | dev->name); |
721 | } /* end pcnet32_ethtool_test */ | 623 | } /* end pcnet32_ethtool_test */ |
722 | 624 | ||
723 | static int pcnet32_loopback_test(struct net_device *dev, uint64_t *data1) | 625 | static int pcnet32_loopback_test(struct net_device *dev, uint64_t * data1) |
724 | { | 626 | { |
725 | struct pcnet32_private *lp = dev->priv; | 627 | struct pcnet32_private *lp = dev->priv; |
726 | struct pcnet32_access *a = &lp->a; /* access to registers */ | 628 | struct pcnet32_access *a = &lp->a; /* access to registers */ |
727 | ulong ioaddr = dev->base_addr; /* card base I/O address */ | 629 | ulong ioaddr = dev->base_addr; /* card base I/O address */ |
728 | struct sk_buff *skb; /* sk buff */ | 630 | struct sk_buff *skb; /* sk buff */ |
729 | int x, i; /* counters */ | 631 | int x, i; /* counters */ |
730 | int numbuffs = 4; /* number of TX/RX buffers and descs */ | 632 | int numbuffs = 4; /* number of TX/RX buffers and descs */ |
731 | u16 status = 0x8300; /* TX ring status */ | 633 | u16 status = 0x8300; /* TX ring status */ |
732 | u16 teststatus; /* test of ring status */ | 634 | u16 teststatus; /* test of ring status */ |
733 | int rc; /* return code */ | 635 | int rc; /* return code */ |
734 | int size; /* size of packets */ | 636 | int size; /* size of packets */ |
735 | unsigned char *packet; /* source packet data */ | 637 | unsigned char *packet; /* source packet data */ |
736 | static const int data_len = 60; /* length of source packets */ | 638 | static const int data_len = 60; /* length of source packets */ |
737 | unsigned long flags; | 639 | unsigned long flags; |
738 | unsigned long ticks; | 640 | unsigned long ticks; |
739 | 641 | ||
740 | *data1 = 1; /* status of test, default to fail */ | 642 | *data1 = 1; /* status of test, default to fail */ |
741 | rc = 1; /* default to fail */ | 643 | rc = 1; /* default to fail */ |
742 | 644 | ||
743 | if (netif_running(dev)) | 645 | if (netif_running(dev)) |
744 | pcnet32_close(dev); | 646 | pcnet32_close(dev); |
745 | 647 | ||
746 | spin_lock_irqsave(&lp->lock, flags); | 648 | spin_lock_irqsave(&lp->lock, flags); |
747 | 649 | ||
748 | /* Reset the PCNET32 */ | 650 | /* Reset the PCNET32 */ |
749 | lp->a.reset (ioaddr); | 651 | lp->a.reset(ioaddr); |
750 | 652 | ||
751 | /* switch pcnet32 to 32bit mode */ | 653 | /* switch pcnet32 to 32bit mode */ |
752 | lp->a.write_bcr (ioaddr, 20, 2); | 654 | lp->a.write_bcr(ioaddr, 20, 2); |
753 | 655 | ||
754 | lp->init_block.mode = le16_to_cpu((lp->options & PCNET32_PORT_PORTSEL) << 7); | 656 | lp->init_block.mode = |
755 | lp->init_block.filter[0] = 0; | 657 | le16_to_cpu((lp->options & PCNET32_PORT_PORTSEL) << 7); |
756 | lp->init_block.filter[1] = 0; | 658 | lp->init_block.filter[0] = 0; |
757 | 659 | lp->init_block.filter[1] = 0; | |
758 | /* purge & init rings but don't actually restart */ | 660 | |
759 | pcnet32_restart(dev, 0x0000); | 661 | /* purge & init rings but don't actually restart */ |
760 | 662 | pcnet32_restart(dev, 0x0000); | |
761 | lp->a.write_csr(ioaddr, 0, 0x0004); /* Set STOP bit */ | 663 | |
762 | 664 | lp->a.write_csr(ioaddr, 0, 0x0004); /* Set STOP bit */ | |
763 | /* Initialize Transmit buffers. */ | 665 | |
764 | size = data_len + 15; | 666 | /* Initialize Transmit buffers. */ |
765 | for (x=0; x<numbuffs; x++) { | 667 | size = data_len + 15; |
766 | if (!(skb = dev_alloc_skb(size))) { | 668 | for (x = 0; x < numbuffs; x++) { |
767 | if (netif_msg_hw(lp)) | 669 | if (!(skb = dev_alloc_skb(size))) { |
768 | printk(KERN_DEBUG "%s: Cannot allocate skb at line: %d!\n", | 670 | if (netif_msg_hw(lp)) |
769 | dev->name, __LINE__); | 671 | printk(KERN_DEBUG |
770 | goto clean_up; | 672 | "%s: Cannot allocate skb at line: %d!\n", |
771 | } else { | 673 | dev->name, __LINE__); |
772 | packet = skb->data; | 674 | goto clean_up; |
773 | skb_put(skb, size); /* create space for data */ | 675 | } else { |
774 | lp->tx_skbuff[x] = skb; | 676 | packet = skb->data; |
775 | lp->tx_ring[x].length = le16_to_cpu(-skb->len); | 677 | skb_put(skb, size); /* create space for data */ |
776 | lp->tx_ring[x].misc = 0; | 678 | lp->tx_skbuff[x] = skb; |
777 | 679 | lp->tx_ring[x].length = le16_to_cpu(-skb->len); | |
778 | /* put DA and SA into the skb */ | 680 | lp->tx_ring[x].misc = 0; |
779 | for (i=0; i<6; i++) | 681 | |
780 | *packet++ = dev->dev_addr[i]; | 682 | /* put DA and SA into the skb */ |
781 | for (i=0; i<6; i++) | 683 | for (i = 0; i < 6; i++) |
782 | *packet++ = dev->dev_addr[i]; | 684 | *packet++ = dev->dev_addr[i]; |
783 | /* type */ | 685 | for (i = 0; i < 6; i++) |
784 | *packet++ = 0x08; | 686 | *packet++ = dev->dev_addr[i]; |
785 | *packet++ = 0x06; | 687 | /* type */ |
786 | /* packet number */ | 688 | *packet++ = 0x08; |
787 | *packet++ = x; | 689 | *packet++ = 0x06; |
788 | /* fill packet with data */ | 690 | /* packet number */ |
789 | for (i=0; i<data_len; i++) | 691 | *packet++ = x; |
790 | *packet++ = i; | 692 | /* fill packet with data */ |
791 | 693 | for (i = 0; i < data_len; i++) | |
792 | lp->tx_dma_addr[x] = pci_map_single(lp->pci_dev, skb->data, | 694 | *packet++ = i; |
793 | skb->len, PCI_DMA_TODEVICE); | 695 | |
794 | lp->tx_ring[x].base = (u32)le32_to_cpu(lp->tx_dma_addr[x]); | 696 | lp->tx_dma_addr[x] = |
795 | wmb(); /* Make sure owner changes after all others are visible */ | 697 | pci_map_single(lp->pci_dev, skb->data, skb->len, |
796 | lp->tx_ring[x].status = le16_to_cpu(status); | 698 | PCI_DMA_TODEVICE); |
797 | } | 699 | lp->tx_ring[x].base = |
798 | } | 700 | (u32) le32_to_cpu(lp->tx_dma_addr[x]); |
799 | 701 | wmb(); /* Make sure owner changes after all others are visible */ | |
800 | x = a->read_bcr(ioaddr, 32); /* set internal loopback in BSR32 */ | 702 | lp->tx_ring[x].status = le16_to_cpu(status); |
801 | x = x | 0x0002; | 703 | } |
802 | a->write_bcr(ioaddr, 32, x); | 704 | } |
803 | 705 | ||
804 | lp->a.write_csr (ioaddr, 15, 0x0044); /* set int loopback in CSR15 */ | 706 | x = a->read_bcr(ioaddr, 32); /* set internal loopback in BSR32 */ |
805 | 707 | x = x | 0x0002; | |
806 | teststatus = le16_to_cpu(0x8000); | 708 | a->write_bcr(ioaddr, 32, x); |
807 | lp->a.write_csr(ioaddr, 0, 0x0002); /* Set STRT bit */ | 709 | |
808 | 710 | lp->a.write_csr(ioaddr, 15, 0x0044); /* set int loopback in CSR15 */ | |
809 | /* Check status of descriptors */ | 711 | |
810 | for (x=0; x<numbuffs; x++) { | 712 | teststatus = le16_to_cpu(0x8000); |
811 | ticks = 0; | 713 | lp->a.write_csr(ioaddr, 0, 0x0002); /* Set STRT bit */ |
812 | rmb(); | 714 | |
813 | while ((lp->rx_ring[x].status & teststatus) && (ticks < 200)) { | 715 | /* Check status of descriptors */ |
814 | spin_unlock_irqrestore(&lp->lock, flags); | 716 | for (x = 0; x < numbuffs; x++) { |
815 | mdelay(1); | 717 | ticks = 0; |
816 | spin_lock_irqsave(&lp->lock, flags); | 718 | rmb(); |
817 | rmb(); | 719 | while ((lp->rx_ring[x].status & teststatus) && (ticks < 200)) { |
818 | ticks++; | 720 | spin_unlock_irqrestore(&lp->lock, flags); |
819 | } | 721 | mdelay(1); |
820 | if (ticks == 200) { | 722 | spin_lock_irqsave(&lp->lock, flags); |
821 | if (netif_msg_hw(lp)) | 723 | rmb(); |
822 | printk("%s: Desc %d failed to reset!\n",dev->name,x); | 724 | ticks++; |
823 | break; | 725 | } |
824 | } | 726 | if (ticks == 200) { |
825 | } | 727 | if (netif_msg_hw(lp)) |
826 | 728 | printk("%s: Desc %d failed to reset!\n", | |
827 | lp->a.write_csr(ioaddr, 0, 0x0004); /* Set STOP bit */ | 729 | dev->name, x); |
828 | wmb(); | 730 | break; |
829 | if (netif_msg_hw(lp) && netif_msg_pktdata(lp)) { | 731 | } |
830 | printk(KERN_DEBUG "%s: RX loopback packets:\n", dev->name); | 732 | } |
831 | 733 | ||
832 | for (x=0; x<numbuffs; x++) { | 734 | lp->a.write_csr(ioaddr, 0, 0x0004); /* Set STOP bit */ |
833 | printk(KERN_DEBUG "%s: Packet %d:\n", dev->name, x); | 735 | wmb(); |
834 | skb = lp->rx_skbuff[x]; | 736 | if (netif_msg_hw(lp) && netif_msg_pktdata(lp)) { |
835 | for (i=0; i<size; i++) { | 737 | printk(KERN_DEBUG "%s: RX loopback packets:\n", dev->name); |
836 | printk("%02x ", *(skb->data+i)); | 738 | |
837 | } | 739 | for (x = 0; x < numbuffs; x++) { |
838 | printk("\n"); | 740 | printk(KERN_DEBUG "%s: Packet %d:\n", dev->name, x); |
839 | } | 741 | skb = lp->rx_skbuff[x]; |
840 | } | 742 | for (i = 0; i < size; i++) { |
841 | 743 | printk("%02x ", *(skb->data + i)); | |
842 | x = 0; | 744 | } |
843 | rc = 0; | 745 | printk("\n"); |
844 | while (x<numbuffs && !rc) { | 746 | } |
845 | skb = lp->rx_skbuff[x]; | 747 | } |
846 | packet = lp->tx_skbuff[x]->data; | 748 | |
847 | for (i=0; i<size; i++) { | 749 | x = 0; |
848 | if (*(skb->data+i) != packet[i]) { | 750 | rc = 0; |
849 | if (netif_msg_hw(lp)) | 751 | while (x < numbuffs && !rc) { |
850 | printk(KERN_DEBUG "%s: Error in compare! %2x - %02x %02x\n", | 752 | skb = lp->rx_skbuff[x]; |
851 | dev->name, i, *(skb->data+i), packet[i]); | 753 | packet = lp->tx_skbuff[x]->data; |
852 | rc = 1; | 754 | for (i = 0; i < size; i++) { |
853 | break; | 755 | if (*(skb->data + i) != packet[i]) { |
854 | } | 756 | if (netif_msg_hw(lp)) |
757 | printk(KERN_DEBUG | ||
758 | "%s: Error in compare! %2x - %02x %02x\n", | ||
759 | dev->name, i, *(skb->data + i), | ||
760 | packet[i]); | ||
761 | rc = 1; | ||
762 | break; | ||
763 | } | ||
764 | } | ||
765 | x++; | ||
766 | } | ||
767 | if (!rc) { | ||
768 | *data1 = 0; | ||
855 | } | 769 | } |
856 | x++; | ||
857 | } | ||
858 | if (!rc) { | ||
859 | *data1 = 0; | ||
860 | } | ||
861 | 770 | ||
862 | clean_up: | 771 | clean_up: |
863 | pcnet32_purge_tx_ring(dev); | 772 | pcnet32_purge_tx_ring(dev); |
864 | x = a->read_csr(ioaddr, 15) & 0xFFFF; | 773 | x = a->read_csr(ioaddr, 15) & 0xFFFF; |
865 | a->write_csr(ioaddr, 15, (x & ~0x0044)); /* reset bits 6 and 2 */ | 774 | a->write_csr(ioaddr, 15, (x & ~0x0044)); /* reset bits 6 and 2 */ |
866 | 775 | ||
867 | x = a->read_bcr(ioaddr, 32); /* reset internal loopback */ | 776 | x = a->read_bcr(ioaddr, 32); /* reset internal loopback */ |
868 | x = x & ~0x0002; | 777 | x = x & ~0x0002; |
869 | a->write_bcr(ioaddr, 32, x); | 778 | a->write_bcr(ioaddr, 32, x); |
870 | 779 | ||
871 | spin_unlock_irqrestore(&lp->lock, flags); | 780 | spin_unlock_irqrestore(&lp->lock, flags); |
872 | 781 | ||
873 | if (netif_running(dev)) { | 782 | if (netif_running(dev)) { |
874 | pcnet32_open(dev); | 783 | pcnet32_open(dev); |
875 | } else { | 784 | } else { |
876 | lp->a.write_bcr (ioaddr, 20, 4); /* return to 16bit mode */ | 785 | lp->a.write_bcr(ioaddr, 20, 4); /* return to 16bit mode */ |
877 | } | 786 | } |
878 | 787 | ||
879 | return(rc); | 788 | return (rc); |
880 | } /* end pcnet32_loopback_test */ | 789 | } /* end pcnet32_loopback_test */ |
881 | 790 | ||
882 | static void pcnet32_led_blink_callback(struct net_device *dev) | 791 | static void pcnet32_led_blink_callback(struct net_device *dev) |
883 | { | 792 | { |
884 | struct pcnet32_private *lp = dev->priv; | 793 | struct pcnet32_private *lp = dev->priv; |
885 | struct pcnet32_access *a = &lp->a; | 794 | struct pcnet32_access *a = &lp->a; |
886 | ulong ioaddr = dev->base_addr; | 795 | ulong ioaddr = dev->base_addr; |
887 | unsigned long flags; | 796 | unsigned long flags; |
888 | int i; | 797 | int i; |
889 | 798 | ||
890 | spin_lock_irqsave(&lp->lock, flags); | 799 | spin_lock_irqsave(&lp->lock, flags); |
891 | for (i=4; i<8; i++) { | 800 | for (i = 4; i < 8; i++) { |
892 | a->write_bcr(ioaddr, i, a->read_bcr(ioaddr, i) ^ 0x4000); | 801 | a->write_bcr(ioaddr, i, a->read_bcr(ioaddr, i) ^ 0x4000); |
893 | } | 802 | } |
894 | spin_unlock_irqrestore(&lp->lock, flags); | 803 | spin_unlock_irqrestore(&lp->lock, flags); |
895 | 804 | ||
896 | mod_timer(&lp->blink_timer, PCNET32_BLINK_TIMEOUT); | 805 | mod_timer(&lp->blink_timer, PCNET32_BLINK_TIMEOUT); |
897 | } | 806 | } |
898 | 807 | ||
899 | static int pcnet32_phys_id(struct net_device *dev, u32 data) | 808 | static int pcnet32_phys_id(struct net_device *dev, u32 data) |
900 | { | 809 | { |
901 | struct pcnet32_private *lp = dev->priv; | 810 | struct pcnet32_private *lp = dev->priv; |
902 | struct pcnet32_access *a = &lp->a; | 811 | struct pcnet32_access *a = &lp->a; |
903 | ulong ioaddr = dev->base_addr; | 812 | ulong ioaddr = dev->base_addr; |
904 | unsigned long flags; | 813 | unsigned long flags; |
905 | int i, regs[4]; | 814 | int i, regs[4]; |
906 | 815 | ||
907 | if (!lp->blink_timer.function) { | 816 | if (!lp->blink_timer.function) { |
908 | init_timer(&lp->blink_timer); | 817 | init_timer(&lp->blink_timer); |
909 | lp->blink_timer.function = (void *) pcnet32_led_blink_callback; | 818 | lp->blink_timer.function = (void *)pcnet32_led_blink_callback; |
910 | lp->blink_timer.data = (unsigned long) dev; | 819 | lp->blink_timer.data = (unsigned long)dev; |
911 | } | 820 | } |
912 | 821 | ||
913 | /* Save the current value of the bcrs */ | 822 | /* Save the current value of the bcrs */ |
914 | spin_lock_irqsave(&lp->lock, flags); | 823 | spin_lock_irqsave(&lp->lock, flags); |
915 | for (i=4; i<8; i++) { | 824 | for (i = 4; i < 8; i++) { |
916 | regs[i-4] = a->read_bcr(ioaddr, i); | 825 | regs[i - 4] = a->read_bcr(ioaddr, i); |
917 | } | 826 | } |
918 | spin_unlock_irqrestore(&lp->lock, flags); | 827 | spin_unlock_irqrestore(&lp->lock, flags); |
919 | 828 | ||
920 | mod_timer(&lp->blink_timer, jiffies); | 829 | mod_timer(&lp->blink_timer, jiffies); |
921 | set_current_state(TASK_INTERRUPTIBLE); | 830 | set_current_state(TASK_INTERRUPTIBLE); |
922 | 831 | ||
923 | if ((!data) || (data > (u32)(MAX_SCHEDULE_TIMEOUT / HZ))) | 832 | if ((!data) || (data > (u32) (MAX_SCHEDULE_TIMEOUT / HZ))) |
924 | data = (u32)(MAX_SCHEDULE_TIMEOUT / HZ); | 833 | data = (u32) (MAX_SCHEDULE_TIMEOUT / HZ); |
925 | 834 | ||
926 | msleep_interruptible(data * 1000); | 835 | msleep_interruptible(data * 1000); |
927 | del_timer_sync(&lp->blink_timer); | 836 | del_timer_sync(&lp->blink_timer); |
928 | 837 | ||
929 | /* Restore the original value of the bcrs */ | 838 | /* Restore the original value of the bcrs */ |
930 | spin_lock_irqsave(&lp->lock, flags); | 839 | spin_lock_irqsave(&lp->lock, flags); |
931 | for (i=4; i<8; i++) { | 840 | for (i = 4; i < 8; i++) { |
932 | a->write_bcr(ioaddr, i, regs[i-4]); | 841 | a->write_bcr(ioaddr, i, regs[i - 4]); |
933 | } | 842 | } |
934 | spin_unlock_irqrestore(&lp->lock, flags); | 843 | spin_unlock_irqrestore(&lp->lock, flags); |
935 | 844 | ||
936 | return 0; | 845 | return 0; |
937 | } | 846 | } |
938 | 847 | ||
848 | #define PCNET32_REGS_PER_PHY 32 | ||
849 | #define PCNET32_MAX_PHYS 32 | ||
939 | static int pcnet32_get_regs_len(struct net_device *dev) | 850 | static int pcnet32_get_regs_len(struct net_device *dev) |
940 | { | 851 | { |
941 | return(PCNET32_NUM_REGS * sizeof(u16)); | 852 | struct pcnet32_private *lp = dev->priv; |
853 | int j = lp->phycount * PCNET32_REGS_PER_PHY; | ||
854 | |||
855 | return ((PCNET32_NUM_REGS + j) * sizeof(u16)); | ||
942 | } | 856 | } |
943 | 857 | ||
944 | static void pcnet32_get_regs(struct net_device *dev, struct ethtool_regs *regs, | 858 | static void pcnet32_get_regs(struct net_device *dev, struct ethtool_regs *regs, |
945 | void *ptr) | 859 | void *ptr) |
946 | { | 860 | { |
947 | int i, csr0; | 861 | int i, csr0; |
948 | u16 *buff = ptr; | 862 | u16 *buff = ptr; |
949 | struct pcnet32_private *lp = dev->priv; | 863 | struct pcnet32_private *lp = dev->priv; |
950 | struct pcnet32_access *a = &lp->a; | 864 | struct pcnet32_access *a = &lp->a; |
951 | ulong ioaddr = dev->base_addr; | 865 | ulong ioaddr = dev->base_addr; |
952 | int ticks; | 866 | int ticks; |
953 | unsigned long flags; | 867 | unsigned long flags; |
954 | |||
955 | spin_lock_irqsave(&lp->lock, flags); | ||
956 | |||
957 | csr0 = a->read_csr(ioaddr, 0); | ||
958 | if (!(csr0 & 0x0004)) { /* If not stopped */ | ||
959 | /* set SUSPEND (SPND) - CSR5 bit 0 */ | ||
960 | a->write_csr(ioaddr, 5, 0x0001); | ||
961 | |||
962 | /* poll waiting for bit to be set */ | ||
963 | ticks = 0; | ||
964 | while (!(a->read_csr(ioaddr, 5) & 0x0001)) { | ||
965 | spin_unlock_irqrestore(&lp->lock, flags); | ||
966 | mdelay(1); | ||
967 | spin_lock_irqsave(&lp->lock, flags); | ||
968 | ticks++; | ||
969 | if (ticks > 200) { | ||
970 | if (netif_msg_hw(lp)) | ||
971 | printk(KERN_DEBUG "%s: Error getting into suspend!\n", | ||
972 | dev->name); | ||
973 | break; | ||
974 | } | ||
975 | } | ||
976 | } | ||
977 | 868 | ||
978 | /* read address PROM */ | 869 | spin_lock_irqsave(&lp->lock, flags); |
979 | for (i=0; i<16; i += 2) | ||
980 | *buff++ = inw(ioaddr + i); | ||
981 | 870 | ||
982 | /* read control and status registers */ | 871 | csr0 = a->read_csr(ioaddr, 0); |
983 | for (i=0; i<90; i++) { | 872 | if (!(csr0 & 0x0004)) { /* If not stopped */ |
984 | *buff++ = a->read_csr(ioaddr, i); | 873 | /* set SUSPEND (SPND) - CSR5 bit 0 */ |
985 | } | 874 | a->write_csr(ioaddr, 5, 0x0001); |
875 | |||
876 | /* poll waiting for bit to be set */ | ||
877 | ticks = 0; | ||
878 | while (!(a->read_csr(ioaddr, 5) & 0x0001)) { | ||
879 | spin_unlock_irqrestore(&lp->lock, flags); | ||
880 | mdelay(1); | ||
881 | spin_lock_irqsave(&lp->lock, flags); | ||
882 | ticks++; | ||
883 | if (ticks > 200) { | ||
884 | if (netif_msg_hw(lp)) | ||
885 | printk(KERN_DEBUG | ||
886 | "%s: Error getting into suspend!\n", | ||
887 | dev->name); | ||
888 | break; | ||
889 | } | ||
890 | } | ||
891 | } | ||
986 | 892 | ||
987 | *buff++ = a->read_csr(ioaddr, 112); | 893 | /* read address PROM */ |
988 | *buff++ = a->read_csr(ioaddr, 114); | 894 | for (i = 0; i < 16; i += 2) |
895 | *buff++ = inw(ioaddr + i); | ||
989 | 896 | ||
990 | /* read bus configuration registers */ | 897 | /* read control and status registers */ |
991 | for (i=0; i<30; i++) { | 898 | for (i = 0; i < 90; i++) { |
992 | *buff++ = a->read_bcr(ioaddr, i); | 899 | *buff++ = a->read_csr(ioaddr, i); |
993 | } | 900 | } |
994 | *buff++ = 0; /* skip bcr30 so as not to hang 79C976 */ | 901 | |
995 | for (i=31; i<36; i++) { | 902 | *buff++ = a->read_csr(ioaddr, 112); |
996 | *buff++ = a->read_bcr(ioaddr, i); | 903 | *buff++ = a->read_csr(ioaddr, 114); |
997 | } | ||
998 | 904 | ||
999 | /* read mii phy registers */ | 905 | /* read bus configuration registers */ |
1000 | if (lp->mii) { | 906 | for (i = 0; i < 30; i++) { |
1001 | for (i=0; i<32; i++) { | 907 | *buff++ = a->read_bcr(ioaddr, i); |
1002 | lp->a.write_bcr(ioaddr, 33, ((lp->mii_if.phy_id) << 5) | i); | 908 | } |
1003 | *buff++ = lp->a.read_bcr(ioaddr, 34); | 909 | *buff++ = 0; /* skip bcr30 so as not to hang 79C976 */ |
910 | for (i = 31; i < 36; i++) { | ||
911 | *buff++ = a->read_bcr(ioaddr, i); | ||
1004 | } | 912 | } |
1005 | } | ||
1006 | 913 | ||
1007 | if (!(csr0 & 0x0004)) { /* If not stopped */ | 914 | /* read mii phy registers */ |
1008 | /* clear SUSPEND (SPND) - CSR5 bit 0 */ | 915 | if (lp->mii) { |
1009 | a->write_csr(ioaddr, 5, 0x0000); | 916 | int j; |
1010 | } | 917 | for (j = 0; j < PCNET32_MAX_PHYS; j++) { |
918 | if (lp->phymask & (1 << j)) { | ||
919 | for (i = 0; i < PCNET32_REGS_PER_PHY; i++) { | ||
920 | lp->a.write_bcr(ioaddr, 33, | ||
921 | (j << 5) | i); | ||
922 | *buff++ = lp->a.read_bcr(ioaddr, 34); | ||
923 | } | ||
924 | } | ||
925 | } | ||
926 | } | ||
1011 | 927 | ||
1012 | i = buff - (u16 *)ptr; | 928 | if (!(csr0 & 0x0004)) { /* If not stopped */ |
1013 | for (; i < PCNET32_NUM_REGS; i++) | 929 | /* clear SUSPEND (SPND) - CSR5 bit 0 */ |
1014 | *buff++ = 0; | 930 | a->write_csr(ioaddr, 5, 0x0000); |
931 | } | ||
1015 | 932 | ||
1016 | spin_unlock_irqrestore(&lp->lock, flags); | 933 | spin_unlock_irqrestore(&lp->lock, flags); |
1017 | } | 934 | } |
1018 | 935 | ||
1019 | static struct ethtool_ops pcnet32_ethtool_ops = { | 936 | static struct ethtool_ops pcnet32_ethtool_ops = { |
1020 | .get_settings = pcnet32_get_settings, | 937 | .get_settings = pcnet32_get_settings, |
1021 | .set_settings = pcnet32_set_settings, | 938 | .set_settings = pcnet32_set_settings, |
1022 | .get_drvinfo = pcnet32_get_drvinfo, | 939 | .get_drvinfo = pcnet32_get_drvinfo, |
1023 | .get_msglevel = pcnet32_get_msglevel, | 940 | .get_msglevel = pcnet32_get_msglevel, |
1024 | .set_msglevel = pcnet32_set_msglevel, | 941 | .set_msglevel = pcnet32_set_msglevel, |
1025 | .nway_reset = pcnet32_nway_reset, | 942 | .nway_reset = pcnet32_nway_reset, |
1026 | .get_link = pcnet32_get_link, | 943 | .get_link = pcnet32_get_link, |
1027 | .get_ringparam = pcnet32_get_ringparam, | 944 | .get_ringparam = pcnet32_get_ringparam, |
1028 | .set_ringparam = pcnet32_set_ringparam, | 945 | .set_ringparam = pcnet32_set_ringparam, |
1029 | .get_tx_csum = ethtool_op_get_tx_csum, | 946 | .get_tx_csum = ethtool_op_get_tx_csum, |
1030 | .get_sg = ethtool_op_get_sg, | 947 | .get_sg = ethtool_op_get_sg, |
1031 | .get_tso = ethtool_op_get_tso, | 948 | .get_tso = ethtool_op_get_tso, |
1032 | .get_strings = pcnet32_get_strings, | 949 | .get_strings = pcnet32_get_strings, |
1033 | .self_test_count = pcnet32_self_test_count, | 950 | .self_test_count = pcnet32_self_test_count, |
1034 | .self_test = pcnet32_ethtool_test, | 951 | .self_test = pcnet32_ethtool_test, |
1035 | .phys_id = pcnet32_phys_id, | 952 | .phys_id = pcnet32_phys_id, |
1036 | .get_regs_len = pcnet32_get_regs_len, | 953 | .get_regs_len = pcnet32_get_regs_len, |
1037 | .get_regs = pcnet32_get_regs, | 954 | .get_regs = pcnet32_get_regs, |
1038 | .get_perm_addr = ethtool_op_get_perm_addr, | 955 | .get_perm_addr = ethtool_op_get_perm_addr, |
1039 | }; | 956 | }; |
1040 | 957 | ||
1041 | /* only probes for non-PCI devices, the rest are handled by | 958 | /* only probes for non-PCI devices, the rest are handled by |
1042 | * pci_register_driver via pcnet32_probe_pci */ | 959 | * pci_register_driver via pcnet32_probe_pci */ |
1043 | 960 | ||
1044 | static void __devinit | 961 | static void __devinit pcnet32_probe_vlbus(void) |
1045 | pcnet32_probe_vlbus(void) | ||
1046 | { | 962 | { |
1047 | unsigned int *port, ioaddr; | 963 | unsigned int *port, ioaddr; |
1048 | 964 | ||
1049 | /* search for PCnet32 VLB cards at known addresses */ | 965 | /* search for PCnet32 VLB cards at known addresses */ |
1050 | for (port = pcnet32_portlist; (ioaddr = *port); port++) { | 966 | for (port = pcnet32_portlist; (ioaddr = *port); port++) { |
1051 | if (request_region(ioaddr, PCNET32_TOTAL_SIZE, "pcnet32_probe_vlbus")) { | 967 | if (request_region |
1052 | /* check if there is really a pcnet chip on that ioaddr */ | 968 | (ioaddr, PCNET32_TOTAL_SIZE, "pcnet32_probe_vlbus")) { |
1053 | if ((inb(ioaddr + 14) == 0x57) && (inb(ioaddr + 15) == 0x57)) { | 969 | /* check if there is really a pcnet chip on that ioaddr */ |
1054 | pcnet32_probe1(ioaddr, 0, NULL); | 970 | if ((inb(ioaddr + 14) == 0x57) |
1055 | } else { | 971 | && (inb(ioaddr + 15) == 0x57)) { |
1056 | release_region(ioaddr, PCNET32_TOTAL_SIZE); | 972 | pcnet32_probe1(ioaddr, 0, NULL); |
1057 | } | 973 | } else { |
1058 | } | 974 | release_region(ioaddr, PCNET32_TOTAL_SIZE); |
1059 | } | 975 | } |
976 | } | ||
977 | } | ||
1060 | } | 978 | } |
1061 | 979 | ||
1062 | |||
1063 | static int __devinit | 980 | static int __devinit |
1064 | pcnet32_probe_pci(struct pci_dev *pdev, const struct pci_device_id *ent) | 981 | pcnet32_probe_pci(struct pci_dev *pdev, const struct pci_device_id *ent) |
1065 | { | 982 | { |
1066 | unsigned long ioaddr; | 983 | unsigned long ioaddr; |
1067 | int err; | 984 | int err; |
1068 | 985 | ||
1069 | err = pci_enable_device(pdev); | 986 | err = pci_enable_device(pdev); |
1070 | if (err < 0) { | 987 | if (err < 0) { |
1071 | if (pcnet32_debug & NETIF_MSG_PROBE) | 988 | if (pcnet32_debug & NETIF_MSG_PROBE) |
1072 | printk(KERN_ERR PFX "failed to enable device -- err=%d\n", err); | 989 | printk(KERN_ERR PFX |
1073 | return err; | 990 | "failed to enable device -- err=%d\n", err); |
1074 | } | 991 | return err; |
1075 | pci_set_master(pdev); | 992 | } |
993 | pci_set_master(pdev); | ||
994 | |||
995 | ioaddr = pci_resource_start(pdev, 0); | ||
996 | if (!ioaddr) { | ||
997 | if (pcnet32_debug & NETIF_MSG_PROBE) | ||
998 | printk(KERN_ERR PFX | ||
999 | "card has no PCI IO resources, aborting\n"); | ||
1000 | return -ENODEV; | ||
1001 | } | ||
1076 | 1002 | ||
1077 | ioaddr = pci_resource_start (pdev, 0); | 1003 | if (!pci_dma_supported(pdev, PCNET32_DMA_MASK)) { |
1078 | if (!ioaddr) { | 1004 | if (pcnet32_debug & NETIF_MSG_PROBE) |
1079 | if (pcnet32_debug & NETIF_MSG_PROBE) | 1005 | printk(KERN_ERR PFX |
1080 | printk (KERN_ERR PFX "card has no PCI IO resources, aborting\n"); | 1006 | "architecture does not support 32bit PCI busmaster DMA\n"); |
1081 | return -ENODEV; | 1007 | return -ENODEV; |
1082 | } | 1008 | } |
1009 | if (request_region(ioaddr, PCNET32_TOTAL_SIZE, "pcnet32_probe_pci") == | ||
1010 | NULL) { | ||
1011 | if (pcnet32_debug & NETIF_MSG_PROBE) | ||
1012 | printk(KERN_ERR PFX | ||
1013 | "io address range already allocated\n"); | ||
1014 | return -EBUSY; | ||
1015 | } | ||
1083 | 1016 | ||
1084 | if (!pci_dma_supported(pdev, PCNET32_DMA_MASK)) { | 1017 | err = pcnet32_probe1(ioaddr, 1, pdev); |
1085 | if (pcnet32_debug & NETIF_MSG_PROBE) | 1018 | if (err < 0) { |
1086 | printk(KERN_ERR PFX "architecture does not support 32bit PCI busmaster DMA\n"); | 1019 | pci_disable_device(pdev); |
1087 | return -ENODEV; | 1020 | } |
1088 | } | 1021 | return err; |
1089 | if (request_region(ioaddr, PCNET32_TOTAL_SIZE, "pcnet32_probe_pci") == NULL) { | ||
1090 | if (pcnet32_debug & NETIF_MSG_PROBE) | ||
1091 | printk(KERN_ERR PFX "io address range already allocated\n"); | ||
1092 | return -EBUSY; | ||
1093 | } | ||
1094 | |||
1095 | err = pcnet32_probe1(ioaddr, 1, pdev); | ||
1096 | if (err < 0) { | ||
1097 | pci_disable_device(pdev); | ||
1098 | } | ||
1099 | return err; | ||
1100 | } | 1022 | } |
1101 | 1023 | ||
1102 | |||
1103 | /* pcnet32_probe1 | 1024 | /* pcnet32_probe1 |
1104 | * Called from both pcnet32_probe_vlbus and pcnet_probe_pci. | 1025 | * Called from both pcnet32_probe_vlbus and pcnet_probe_pci. |
1105 | * pdev will be NULL when called from pcnet32_probe_vlbus. | 1026 | * pdev will be NULL when called from pcnet32_probe_vlbus. |
@@ -1107,630 +1028,764 @@ pcnet32_probe_pci(struct pci_dev *pdev, const struct pci_device_id *ent) | |||
1107 | static int __devinit | 1028 | static int __devinit |
1108 | pcnet32_probe1(unsigned long ioaddr, int shared, struct pci_dev *pdev) | 1029 | pcnet32_probe1(unsigned long ioaddr, int shared, struct pci_dev *pdev) |
1109 | { | 1030 | { |
1110 | struct pcnet32_private *lp; | 1031 | struct pcnet32_private *lp; |
1111 | dma_addr_t lp_dma_addr; | 1032 | dma_addr_t lp_dma_addr; |
1112 | int i, media; | 1033 | int i, media; |
1113 | int fdx, mii, fset, dxsuflo; | 1034 | int fdx, mii, fset, dxsuflo; |
1114 | int chip_version; | 1035 | int chip_version; |
1115 | char *chipname; | 1036 | char *chipname; |
1116 | struct net_device *dev; | 1037 | struct net_device *dev; |
1117 | struct pcnet32_access *a = NULL; | 1038 | struct pcnet32_access *a = NULL; |
1118 | u8 promaddr[6]; | 1039 | u8 promaddr[6]; |
1119 | int ret = -ENODEV; | 1040 | int ret = -ENODEV; |
1120 | 1041 | ||
1121 | /* reset the chip */ | 1042 | /* reset the chip */ |
1122 | pcnet32_wio_reset(ioaddr); | 1043 | pcnet32_wio_reset(ioaddr); |
1123 | 1044 | ||
1124 | /* NOTE: 16-bit check is first, otherwise some older PCnet chips fail */ | 1045 | /* NOTE: 16-bit check is first, otherwise some older PCnet chips fail */ |
1125 | if (pcnet32_wio_read_csr(ioaddr, 0) == 4 && pcnet32_wio_check(ioaddr)) { | 1046 | if (pcnet32_wio_read_csr(ioaddr, 0) == 4 && pcnet32_wio_check(ioaddr)) { |
1126 | a = &pcnet32_wio; | 1047 | a = &pcnet32_wio; |
1127 | } else { | 1048 | } else { |
1128 | pcnet32_dwio_reset(ioaddr); | 1049 | pcnet32_dwio_reset(ioaddr); |
1129 | if (pcnet32_dwio_read_csr(ioaddr, 0) == 4 && pcnet32_dwio_check(ioaddr)) { | 1050 | if (pcnet32_dwio_read_csr(ioaddr, 0) == 4 |
1130 | a = &pcnet32_dwio; | 1051 | && pcnet32_dwio_check(ioaddr)) { |
1131 | } else | 1052 | a = &pcnet32_dwio; |
1132 | goto err_release_region; | 1053 | } else |
1133 | } | 1054 | goto err_release_region; |
1134 | 1055 | } | |
1135 | chip_version = a->read_csr(ioaddr, 88) | (a->read_csr(ioaddr,89) << 16); | 1056 | |
1136 | if ((pcnet32_debug & NETIF_MSG_PROBE) && (pcnet32_debug & NETIF_MSG_HW)) | 1057 | chip_version = |
1137 | printk(KERN_INFO " PCnet chip version is %#x.\n", chip_version); | 1058 | a->read_csr(ioaddr, 88) | (a->read_csr(ioaddr, 89) << 16); |
1138 | if ((chip_version & 0xfff) != 0x003) { | 1059 | if ((pcnet32_debug & NETIF_MSG_PROBE) && (pcnet32_debug & NETIF_MSG_HW)) |
1139 | if (pcnet32_debug & NETIF_MSG_PROBE) | 1060 | printk(KERN_INFO " PCnet chip version is %#x.\n", |
1140 | printk(KERN_INFO PFX "Unsupported chip version.\n"); | 1061 | chip_version); |
1141 | goto err_release_region; | 1062 | if ((chip_version & 0xfff) != 0x003) { |
1142 | } | 1063 | if (pcnet32_debug & NETIF_MSG_PROBE) |
1143 | 1064 | printk(KERN_INFO PFX "Unsupported chip version.\n"); | |
1144 | /* initialize variables */ | 1065 | goto err_release_region; |
1145 | fdx = mii = fset = dxsuflo = 0; | 1066 | } |
1146 | chip_version = (chip_version >> 12) & 0xffff; | 1067 | |
1147 | 1068 | /* initialize variables */ | |
1148 | switch (chip_version) { | 1069 | fdx = mii = fset = dxsuflo = 0; |
1149 | case 0x2420: | 1070 | chip_version = (chip_version >> 12) & 0xffff; |
1150 | chipname = "PCnet/PCI 79C970"; /* PCI */ | 1071 | |
1151 | break; | 1072 | switch (chip_version) { |
1152 | case 0x2430: | 1073 | case 0x2420: |
1153 | if (shared) | 1074 | chipname = "PCnet/PCI 79C970"; /* PCI */ |
1154 | chipname = "PCnet/PCI 79C970"; /* 970 gives the wrong chip id back */ | 1075 | break; |
1155 | else | 1076 | case 0x2430: |
1156 | chipname = "PCnet/32 79C965"; /* 486/VL bus */ | 1077 | if (shared) |
1157 | break; | 1078 | chipname = "PCnet/PCI 79C970"; /* 970 gives the wrong chip id back */ |
1158 | case 0x2621: | 1079 | else |
1159 | chipname = "PCnet/PCI II 79C970A"; /* PCI */ | 1080 | chipname = "PCnet/32 79C965"; /* 486/VL bus */ |
1160 | fdx = 1; | 1081 | break; |
1161 | break; | 1082 | case 0x2621: |
1162 | case 0x2623: | 1083 | chipname = "PCnet/PCI II 79C970A"; /* PCI */ |
1163 | chipname = "PCnet/FAST 79C971"; /* PCI */ | 1084 | fdx = 1; |
1164 | fdx = 1; mii = 1; fset = 1; | 1085 | break; |
1165 | break; | 1086 | case 0x2623: |
1166 | case 0x2624: | 1087 | chipname = "PCnet/FAST 79C971"; /* PCI */ |
1167 | chipname = "PCnet/FAST+ 79C972"; /* PCI */ | 1088 | fdx = 1; |
1168 | fdx = 1; mii = 1; fset = 1; | 1089 | mii = 1; |
1169 | break; | 1090 | fset = 1; |
1170 | case 0x2625: | 1091 | break; |
1171 | chipname = "PCnet/FAST III 79C973"; /* PCI */ | 1092 | case 0x2624: |
1172 | fdx = 1; mii = 1; | 1093 | chipname = "PCnet/FAST+ 79C972"; /* PCI */ |
1173 | break; | 1094 | fdx = 1; |
1174 | case 0x2626: | 1095 | mii = 1; |
1175 | chipname = "PCnet/Home 79C978"; /* PCI */ | 1096 | fset = 1; |
1176 | fdx = 1; | 1097 | break; |
1098 | case 0x2625: | ||
1099 | chipname = "PCnet/FAST III 79C973"; /* PCI */ | ||
1100 | fdx = 1; | ||
1101 | mii = 1; | ||
1102 | break; | ||
1103 | case 0x2626: | ||
1104 | chipname = "PCnet/Home 79C978"; /* PCI */ | ||
1105 | fdx = 1; | ||
1106 | /* | ||
1107 | * This is based on specs published at www.amd.com. This section | ||
1108 | * assumes that a card with a 79C978 wants to go into standard | ||
1109 | * ethernet mode. The 79C978 can also go into 1Mb HomePNA mode, | ||
1110 | * and the module option homepna=1 can select this instead. | ||
1111 | */ | ||
1112 | media = a->read_bcr(ioaddr, 49); | ||
1113 | media &= ~3; /* default to 10Mb ethernet */ | ||
1114 | if (cards_found < MAX_UNITS && homepna[cards_found]) | ||
1115 | media |= 1; /* switch to home wiring mode */ | ||
1116 | if (pcnet32_debug & NETIF_MSG_PROBE) | ||
1117 | printk(KERN_DEBUG PFX "media set to %sMbit mode.\n", | ||
1118 | (media & 1) ? "1" : "10"); | ||
1119 | a->write_bcr(ioaddr, 49, media); | ||
1120 | break; | ||
1121 | case 0x2627: | ||
1122 | chipname = "PCnet/FAST III 79C975"; /* PCI */ | ||
1123 | fdx = 1; | ||
1124 | mii = 1; | ||
1125 | break; | ||
1126 | case 0x2628: | ||
1127 | chipname = "PCnet/PRO 79C976"; | ||
1128 | fdx = 1; | ||
1129 | mii = 1; | ||
1130 | break; | ||
1131 | default: | ||
1132 | if (pcnet32_debug & NETIF_MSG_PROBE) | ||
1133 | printk(KERN_INFO PFX | ||
1134 | "PCnet version %#x, no PCnet32 chip.\n", | ||
1135 | chip_version); | ||
1136 | goto err_release_region; | ||
1137 | } | ||
1138 | |||
1177 | /* | 1139 | /* |
1178 | * This is based on specs published at www.amd.com. This section | 1140 | * On selected chips turn on the BCR18:NOUFLO bit. This stops transmit |
1179 | * assumes that a card with a 79C978 wants to go into standard | 1141 | * starting until the packet is loaded. Strike one for reliability, lose |
1180 | * ethernet mode. The 79C978 can also go into 1Mb HomePNA mode, | 1142 | * one for latency - although on PCI this isnt a big loss. Older chips |
1181 | * and the module option homepna=1 can select this instead. | 1143 | * have FIFO's smaller than a packet, so you can't do this. |
1144 | * Turn on BCR18:BurstRdEn and BCR18:BurstWrEn. | ||
1182 | */ | 1145 | */ |
1183 | media = a->read_bcr(ioaddr, 49); | 1146 | |
1184 | media &= ~3; /* default to 10Mb ethernet */ | 1147 | if (fset) { |
1185 | if (cards_found < MAX_UNITS && homepna[cards_found]) | 1148 | a->write_bcr(ioaddr, 18, (a->read_bcr(ioaddr, 18) | 0x0860)); |
1186 | media |= 1; /* switch to home wiring mode */ | 1149 | a->write_csr(ioaddr, 80, |
1187 | if (pcnet32_debug & NETIF_MSG_PROBE) | 1150 | (a->read_csr(ioaddr, 80) & 0x0C00) | 0x0c00); |
1188 | printk(KERN_DEBUG PFX "media set to %sMbit mode.\n", | 1151 | dxsuflo = 1; |
1189 | (media & 1) ? "1" : "10"); | 1152 | } |
1190 | a->write_bcr(ioaddr, 49, media); | 1153 | |
1191 | break; | 1154 | dev = alloc_etherdev(0); |
1192 | case 0x2627: | 1155 | if (!dev) { |
1193 | chipname = "PCnet/FAST III 79C975"; /* PCI */ | 1156 | if (pcnet32_debug & NETIF_MSG_PROBE) |
1194 | fdx = 1; mii = 1; | 1157 | printk(KERN_ERR PFX "Memory allocation failed.\n"); |
1195 | break; | 1158 | ret = -ENOMEM; |
1196 | case 0x2628: | 1159 | goto err_release_region; |
1197 | chipname = "PCnet/PRO 79C976"; | 1160 | } |
1198 | fdx = 1; mii = 1; | 1161 | SET_NETDEV_DEV(dev, &pdev->dev); |
1199 | break; | 1162 | |
1200 | default: | ||
1201 | if (pcnet32_debug & NETIF_MSG_PROBE) | ||
1202 | printk(KERN_INFO PFX "PCnet version %#x, no PCnet32 chip.\n", | ||
1203 | chip_version); | ||
1204 | goto err_release_region; | ||
1205 | } | ||
1206 | |||
1207 | /* | ||
1208 | * On selected chips turn on the BCR18:NOUFLO bit. This stops transmit | ||
1209 | * starting until the packet is loaded. Strike one for reliability, lose | ||
1210 | * one for latency - although on PCI this isnt a big loss. Older chips | ||
1211 | * have FIFO's smaller than a packet, so you can't do this. | ||
1212 | * Turn on BCR18:BurstRdEn and BCR18:BurstWrEn. | ||
1213 | */ | ||
1214 | |||
1215 | if (fset) { | ||
1216 | a->write_bcr(ioaddr, 18, (a->read_bcr(ioaddr, 18) | 0x0860)); | ||
1217 | a->write_csr(ioaddr, 80, (a->read_csr(ioaddr, 80) & 0x0C00) | 0x0c00); | ||
1218 | dxsuflo = 1; | ||
1219 | } | ||
1220 | |||
1221 | dev = alloc_etherdev(0); | ||
1222 | if (!dev) { | ||
1223 | if (pcnet32_debug & NETIF_MSG_PROBE) | 1163 | if (pcnet32_debug & NETIF_MSG_PROBE) |
1224 | printk(KERN_ERR PFX "Memory allocation failed.\n"); | 1164 | printk(KERN_INFO PFX "%s at %#3lx,", chipname, ioaddr); |
1225 | ret = -ENOMEM; | 1165 | |
1226 | goto err_release_region; | 1166 | /* In most chips, after a chip reset, the ethernet address is read from the |
1227 | } | 1167 | * station address PROM at the base address and programmed into the |
1228 | SET_NETDEV_DEV(dev, &pdev->dev); | 1168 | * "Physical Address Registers" CSR12-14. |
1229 | 1169 | * As a precautionary measure, we read the PROM values and complain if | |
1230 | if (pcnet32_debug & NETIF_MSG_PROBE) | 1170 | * they disagree with the CSRs. Either way, we use the CSR values, and |
1231 | printk(KERN_INFO PFX "%s at %#3lx,", chipname, ioaddr); | 1171 | * double check that they are valid. |
1232 | 1172 | */ | |
1233 | /* In most chips, after a chip reset, the ethernet address is read from the | 1173 | for (i = 0; i < 3; i++) { |
1234 | * station address PROM at the base address and programmed into the | 1174 | unsigned int val; |
1235 | * "Physical Address Registers" CSR12-14. | 1175 | val = a->read_csr(ioaddr, i + 12) & 0x0ffff; |
1236 | * As a precautionary measure, we read the PROM values and complain if | 1176 | /* There may be endianness issues here. */ |
1237 | * they disagree with the CSRs. Either way, we use the CSR values, and | 1177 | dev->dev_addr[2 * i] = val & 0x0ff; |
1238 | * double check that they are valid. | 1178 | dev->dev_addr[2 * i + 1] = (val >> 8) & 0x0ff; |
1239 | */ | 1179 | } |
1240 | for (i = 0; i < 3; i++) { | 1180 | |
1241 | unsigned int val; | 1181 | /* read PROM address and compare with CSR address */ |
1242 | val = a->read_csr(ioaddr, i+12) & 0x0ffff; | ||
1243 | /* There may be endianness issues here. */ | ||
1244 | dev->dev_addr[2*i] = val & 0x0ff; | ||
1245 | dev->dev_addr[2*i+1] = (val >> 8) & 0x0ff; | ||
1246 | } | ||
1247 | |||
1248 | /* read PROM address and compare with CSR address */ | ||
1249 | for (i = 0; i < 6; i++) | ||
1250 | promaddr[i] = inb(ioaddr + i); | ||
1251 | |||
1252 | if (memcmp(promaddr, dev->dev_addr, 6) | ||
1253 | || !is_valid_ether_addr(dev->dev_addr)) { | ||
1254 | if (is_valid_ether_addr(promaddr)) { | ||
1255 | if (pcnet32_debug & NETIF_MSG_PROBE) { | ||
1256 | printk(" warning: CSR address invalid,\n"); | ||
1257 | printk(KERN_INFO " using instead PROM address of"); | ||
1258 | } | ||
1259 | memcpy(dev->dev_addr, promaddr, 6); | ||
1260 | } | ||
1261 | } | ||
1262 | memcpy(dev->perm_addr, dev->dev_addr, dev->addr_len); | ||
1263 | |||
1264 | /* if the ethernet address is not valid, force to 00:00:00:00:00:00 */ | ||
1265 | if (!is_valid_ether_addr(dev->perm_addr)) | ||
1266 | memset(dev->dev_addr, 0, sizeof(dev->dev_addr)); | ||
1267 | |||
1268 | if (pcnet32_debug & NETIF_MSG_PROBE) { | ||
1269 | for (i = 0; i < 6; i++) | 1182 | for (i = 0; i < 6; i++) |
1270 | printk(" %2.2x", dev->dev_addr[i]); | 1183 | promaddr[i] = inb(ioaddr + i); |
1271 | 1184 | ||
1272 | /* Version 0x2623 and 0x2624 */ | 1185 | if (memcmp(promaddr, dev->dev_addr, 6) |
1273 | if (((chip_version + 1) & 0xfffe) == 0x2624) { | 1186 | || !is_valid_ether_addr(dev->dev_addr)) { |
1274 | i = a->read_csr(ioaddr, 80) & 0x0C00; /* Check tx_start_pt */ | 1187 | if (is_valid_ether_addr(promaddr)) { |
1275 | printk("\n" KERN_INFO " tx_start_pt(0x%04x):",i); | 1188 | if (pcnet32_debug & NETIF_MSG_PROBE) { |
1276 | switch(i>>10) { | 1189 | printk(" warning: CSR address invalid,\n"); |
1277 | case 0: printk(" 20 bytes,"); break; | 1190 | printk(KERN_INFO |
1278 | case 1: printk(" 64 bytes,"); break; | 1191 | " using instead PROM address of"); |
1279 | case 2: printk(" 128 bytes,"); break; | 1192 | } |
1280 | case 3: printk("~220 bytes,"); break; | 1193 | memcpy(dev->dev_addr, promaddr, 6); |
1281 | } | 1194 | } |
1282 | i = a->read_bcr(ioaddr, 18); /* Check Burst/Bus control */ | 1195 | } |
1283 | printk(" BCR18(%x):",i&0xffff); | 1196 | memcpy(dev->perm_addr, dev->dev_addr, dev->addr_len); |
1284 | if (i & (1<<5)) printk("BurstWrEn "); | 1197 | |
1285 | if (i & (1<<6)) printk("BurstRdEn "); | 1198 | /* if the ethernet address is not valid, force to 00:00:00:00:00:00 */ |
1286 | if (i & (1<<7)) printk("DWordIO "); | 1199 | if (!is_valid_ether_addr(dev->perm_addr)) |
1287 | if (i & (1<<11)) printk("NoUFlow "); | 1200 | memset(dev->dev_addr, 0, sizeof(dev->dev_addr)); |
1288 | i = a->read_bcr(ioaddr, 25); | 1201 | |
1289 | printk("\n" KERN_INFO " SRAMSIZE=0x%04x,",i<<8); | 1202 | if (pcnet32_debug & NETIF_MSG_PROBE) { |
1290 | i = a->read_bcr(ioaddr, 26); | 1203 | for (i = 0; i < 6; i++) |
1291 | printk(" SRAM_BND=0x%04x,",i<<8); | 1204 | printk(" %2.2x", dev->dev_addr[i]); |
1292 | i = a->read_bcr(ioaddr, 27); | 1205 | |
1293 | if (i & (1<<14)) printk("LowLatRx"); | 1206 | /* Version 0x2623 and 0x2624 */ |
1294 | } | 1207 | if (((chip_version + 1) & 0xfffe) == 0x2624) { |
1295 | } | 1208 | i = a->read_csr(ioaddr, 80) & 0x0C00; /* Check tx_start_pt */ |
1296 | 1209 | printk("\n" KERN_INFO " tx_start_pt(0x%04x):", i); | |
1297 | dev->base_addr = ioaddr; | 1210 | switch (i >> 10) { |
1298 | /* pci_alloc_consistent returns page-aligned memory, so we do not have to check the alignment */ | 1211 | case 0: |
1299 | if ((lp = pci_alloc_consistent(pdev, sizeof(*lp), &lp_dma_addr)) == NULL) { | 1212 | printk(" 20 bytes,"); |
1300 | if (pcnet32_debug & NETIF_MSG_PROBE) | 1213 | break; |
1301 | printk(KERN_ERR PFX "Consistent memory allocation failed.\n"); | 1214 | case 1: |
1302 | ret = -ENOMEM; | 1215 | printk(" 64 bytes,"); |
1303 | goto err_free_netdev; | 1216 | break; |
1304 | } | 1217 | case 2: |
1305 | 1218 | printk(" 128 bytes,"); | |
1306 | memset(lp, 0, sizeof(*lp)); | 1219 | break; |
1307 | lp->dma_addr = lp_dma_addr; | 1220 | case 3: |
1308 | lp->pci_dev = pdev; | 1221 | printk("~220 bytes,"); |
1309 | 1222 | break; | |
1310 | spin_lock_init(&lp->lock); | 1223 | } |
1311 | 1224 | i = a->read_bcr(ioaddr, 18); /* Check Burst/Bus control */ | |
1312 | SET_MODULE_OWNER(dev); | 1225 | printk(" BCR18(%x):", i & 0xffff); |
1313 | SET_NETDEV_DEV(dev, &pdev->dev); | 1226 | if (i & (1 << 5)) |
1314 | dev->priv = lp; | 1227 | printk("BurstWrEn "); |
1315 | lp->name = chipname; | 1228 | if (i & (1 << 6)) |
1316 | lp->shared_irq = shared; | 1229 | printk("BurstRdEn "); |
1317 | lp->tx_ring_size = TX_RING_SIZE; /* default tx ring size */ | 1230 | if (i & (1 << 7)) |
1318 | lp->rx_ring_size = RX_RING_SIZE; /* default rx ring size */ | 1231 | printk("DWordIO "); |
1319 | lp->tx_mod_mask = lp->tx_ring_size - 1; | 1232 | if (i & (1 << 11)) |
1320 | lp->rx_mod_mask = lp->rx_ring_size - 1; | 1233 | printk("NoUFlow "); |
1321 | lp->tx_len_bits = (PCNET32_LOG_TX_BUFFERS << 12); | 1234 | i = a->read_bcr(ioaddr, 25); |
1322 | lp->rx_len_bits = (PCNET32_LOG_RX_BUFFERS << 4); | 1235 | printk("\n" KERN_INFO " SRAMSIZE=0x%04x,", i << 8); |
1323 | lp->mii_if.full_duplex = fdx; | 1236 | i = a->read_bcr(ioaddr, 26); |
1324 | lp->mii_if.phy_id_mask = 0x1f; | 1237 | printk(" SRAM_BND=0x%04x,", i << 8); |
1325 | lp->mii_if.reg_num_mask = 0x1f; | 1238 | i = a->read_bcr(ioaddr, 27); |
1326 | lp->dxsuflo = dxsuflo; | 1239 | if (i & (1 << 14)) |
1327 | lp->mii = mii; | 1240 | printk("LowLatRx"); |
1328 | lp->msg_enable = pcnet32_debug; | 1241 | } |
1329 | if ((cards_found >= MAX_UNITS) || (options[cards_found] > sizeof(options_mapping))) | 1242 | } |
1330 | lp->options = PCNET32_PORT_ASEL; | 1243 | |
1331 | else | 1244 | dev->base_addr = ioaddr; |
1332 | lp->options = options_mapping[options[cards_found]]; | 1245 | /* pci_alloc_consistent returns page-aligned memory, so we do not have to check the alignment */ |
1333 | lp->mii_if.dev = dev; | 1246 | if ((lp = |
1334 | lp->mii_if.mdio_read = mdio_read; | 1247 | pci_alloc_consistent(pdev, sizeof(*lp), &lp_dma_addr)) == NULL) { |
1335 | lp->mii_if.mdio_write = mdio_write; | 1248 | if (pcnet32_debug & NETIF_MSG_PROBE) |
1336 | 1249 | printk(KERN_ERR PFX | |
1337 | if (fdx && !(lp->options & PCNET32_PORT_ASEL) && | 1250 | "Consistent memory allocation failed.\n"); |
1338 | ((cards_found>=MAX_UNITS) || full_duplex[cards_found])) | 1251 | ret = -ENOMEM; |
1339 | lp->options |= PCNET32_PORT_FD; | 1252 | goto err_free_netdev; |
1340 | 1253 | } | |
1341 | if (!a) { | 1254 | |
1342 | if (pcnet32_debug & NETIF_MSG_PROBE) | 1255 | memset(lp, 0, sizeof(*lp)); |
1343 | printk(KERN_ERR PFX "No access methods\n"); | 1256 | lp->dma_addr = lp_dma_addr; |
1344 | ret = -ENODEV; | 1257 | lp->pci_dev = pdev; |
1345 | goto err_free_consistent; | 1258 | |
1346 | } | 1259 | spin_lock_init(&lp->lock); |
1347 | lp->a = *a; | 1260 | |
1348 | 1261 | SET_MODULE_OWNER(dev); | |
1349 | /* prior to register_netdev, dev->name is not yet correct */ | 1262 | SET_NETDEV_DEV(dev, &pdev->dev); |
1350 | if (pcnet32_alloc_ring(dev, pci_name(lp->pci_dev))) { | 1263 | dev->priv = lp; |
1351 | ret = -ENOMEM; | 1264 | lp->name = chipname; |
1352 | goto err_free_ring; | 1265 | lp->shared_irq = shared; |
1353 | } | 1266 | lp->tx_ring_size = TX_RING_SIZE; /* default tx ring size */ |
1354 | /* detect special T1/E1 WAN card by checking for MAC address */ | 1267 | lp->rx_ring_size = RX_RING_SIZE; /* default rx ring size */ |
1355 | if (dev->dev_addr[0] == 0x00 && dev->dev_addr[1] == 0xe0 | 1268 | lp->tx_mod_mask = lp->tx_ring_size - 1; |
1269 | lp->rx_mod_mask = lp->rx_ring_size - 1; | ||
1270 | lp->tx_len_bits = (PCNET32_LOG_TX_BUFFERS << 12); | ||
1271 | lp->rx_len_bits = (PCNET32_LOG_RX_BUFFERS << 4); | ||
1272 | lp->mii_if.full_duplex = fdx; | ||
1273 | lp->mii_if.phy_id_mask = 0x1f; | ||
1274 | lp->mii_if.reg_num_mask = 0x1f; | ||
1275 | lp->dxsuflo = dxsuflo; | ||
1276 | lp->mii = mii; | ||
1277 | lp->msg_enable = pcnet32_debug; | ||
1278 | if ((cards_found >= MAX_UNITS) | ||
1279 | || (options[cards_found] > sizeof(options_mapping))) | ||
1280 | lp->options = PCNET32_PORT_ASEL; | ||
1281 | else | ||
1282 | lp->options = options_mapping[options[cards_found]]; | ||
1283 | lp->mii_if.dev = dev; | ||
1284 | lp->mii_if.mdio_read = mdio_read; | ||
1285 | lp->mii_if.mdio_write = mdio_write; | ||
1286 | |||
1287 | if (fdx && !(lp->options & PCNET32_PORT_ASEL) && | ||
1288 | ((cards_found >= MAX_UNITS) || full_duplex[cards_found])) | ||
1289 | lp->options |= PCNET32_PORT_FD; | ||
1290 | |||
1291 | if (!a) { | ||
1292 | if (pcnet32_debug & NETIF_MSG_PROBE) | ||
1293 | printk(KERN_ERR PFX "No access methods\n"); | ||
1294 | ret = -ENODEV; | ||
1295 | goto err_free_consistent; | ||
1296 | } | ||
1297 | lp->a = *a; | ||
1298 | |||
1299 | /* prior to register_netdev, dev->name is not yet correct */ | ||
1300 | if (pcnet32_alloc_ring(dev, pci_name(lp->pci_dev))) { | ||
1301 | ret = -ENOMEM; | ||
1302 | goto err_free_ring; | ||
1303 | } | ||
1304 | /* detect special T1/E1 WAN card by checking for MAC address */ | ||
1305 | if (dev->dev_addr[0] == 0x00 && dev->dev_addr[1] == 0xe0 | ||
1356 | && dev->dev_addr[2] == 0x75) | 1306 | && dev->dev_addr[2] == 0x75) |
1357 | lp->options = PCNET32_PORT_FD | PCNET32_PORT_GPSI; | 1307 | lp->options = PCNET32_PORT_FD | PCNET32_PORT_GPSI; |
1358 | |||
1359 | lp->init_block.mode = le16_to_cpu(0x0003); /* Disable Rx and Tx. */ | ||
1360 | lp->init_block.tlen_rlen = le16_to_cpu(lp->tx_len_bits | lp->rx_len_bits); | ||
1361 | for (i = 0; i < 6; i++) | ||
1362 | lp->init_block.phys_addr[i] = dev->dev_addr[i]; | ||
1363 | lp->init_block.filter[0] = 0x00000000; | ||
1364 | lp->init_block.filter[1] = 0x00000000; | ||
1365 | lp->init_block.rx_ring = (u32)le32_to_cpu(lp->rx_ring_dma_addr); | ||
1366 | lp->init_block.tx_ring = (u32)le32_to_cpu(lp->tx_ring_dma_addr); | ||
1367 | |||
1368 | /* switch pcnet32 to 32bit mode */ | ||
1369 | a->write_bcr(ioaddr, 20, 2); | ||
1370 | |||
1371 | a->write_csr(ioaddr, 1, (lp->dma_addr + offsetof(struct pcnet32_private, | ||
1372 | init_block)) & 0xffff); | ||
1373 | a->write_csr(ioaddr, 2, (lp->dma_addr + offsetof(struct pcnet32_private, | ||
1374 | init_block)) >> 16); | ||
1375 | |||
1376 | if (pdev) { /* use the IRQ provided by PCI */ | ||
1377 | dev->irq = pdev->irq; | ||
1378 | if (pcnet32_debug & NETIF_MSG_PROBE) | ||
1379 | printk(" assigned IRQ %d.\n", dev->irq); | ||
1380 | } else { | ||
1381 | unsigned long irq_mask = probe_irq_on(); | ||
1382 | 1308 | ||
1383 | /* | 1309 | lp->init_block.mode = le16_to_cpu(0x0003); /* Disable Rx and Tx. */ |
1384 | * To auto-IRQ we enable the initialization-done and DMA error | 1310 | lp->init_block.tlen_rlen = |
1385 | * interrupts. For ISA boards we get a DMA error, but VLB and PCI | 1311 | le16_to_cpu(lp->tx_len_bits | lp->rx_len_bits); |
1386 | * boards will work. | 1312 | for (i = 0; i < 6; i++) |
1387 | */ | 1313 | lp->init_block.phys_addr[i] = dev->dev_addr[i]; |
1388 | /* Trigger an initialization just for the interrupt. */ | 1314 | lp->init_block.filter[0] = 0x00000000; |
1389 | a->write_csr (ioaddr, 0, 0x41); | 1315 | lp->init_block.filter[1] = 0x00000000; |
1390 | mdelay (1); | 1316 | lp->init_block.rx_ring = (u32) le32_to_cpu(lp->rx_ring_dma_addr); |
1317 | lp->init_block.tx_ring = (u32) le32_to_cpu(lp->tx_ring_dma_addr); | ||
1318 | |||
1319 | /* switch pcnet32 to 32bit mode */ | ||
1320 | a->write_bcr(ioaddr, 20, 2); | ||
1321 | |||
1322 | a->write_csr(ioaddr, 1, (lp->dma_addr + offsetof(struct pcnet32_private, | ||
1323 | init_block)) & 0xffff); | ||
1324 | a->write_csr(ioaddr, 2, (lp->dma_addr + offsetof(struct pcnet32_private, | ||
1325 | init_block)) >> 16); | ||
1326 | |||
1327 | if (pdev) { /* use the IRQ provided by PCI */ | ||
1328 | dev->irq = pdev->irq; | ||
1329 | if (pcnet32_debug & NETIF_MSG_PROBE) | ||
1330 | printk(" assigned IRQ %d.\n", dev->irq); | ||
1331 | } else { | ||
1332 | unsigned long irq_mask = probe_irq_on(); | ||
1333 | |||
1334 | /* | ||
1335 | * To auto-IRQ we enable the initialization-done and DMA error | ||
1336 | * interrupts. For ISA boards we get a DMA error, but VLB and PCI | ||
1337 | * boards will work. | ||
1338 | */ | ||
1339 | /* Trigger an initialization just for the interrupt. */ | ||
1340 | a->write_csr(ioaddr, 0, 0x41); | ||
1341 | mdelay(1); | ||
1342 | |||
1343 | dev->irq = probe_irq_off(irq_mask); | ||
1344 | if (!dev->irq) { | ||
1345 | if (pcnet32_debug & NETIF_MSG_PROBE) | ||
1346 | printk(", failed to detect IRQ line.\n"); | ||
1347 | ret = -ENODEV; | ||
1348 | goto err_free_ring; | ||
1349 | } | ||
1350 | if (pcnet32_debug & NETIF_MSG_PROBE) | ||
1351 | printk(", probed IRQ %d.\n", dev->irq); | ||
1352 | } | ||
1391 | 1353 | ||
1392 | dev->irq = probe_irq_off (irq_mask); | 1354 | /* Set the mii phy_id so that we can query the link state */ |
1393 | if (!dev->irq) { | 1355 | if (lp->mii) { |
1394 | if (pcnet32_debug & NETIF_MSG_PROBE) | 1356 | /* lp->phycount and lp->phymask are set to 0 by memset above */ |
1395 | printk(", failed to detect IRQ line.\n"); | 1357 | |
1396 | ret = -ENODEV; | 1358 | lp->mii_if.phy_id = ((lp->a.read_bcr(ioaddr, 33)) >> 5) & 0x1f; |
1397 | goto err_free_ring; | 1359 | /* scan for PHYs */ |
1360 | for (i = 0; i < PCNET32_MAX_PHYS; i++) { | ||
1361 | unsigned short id1, id2; | ||
1362 | |||
1363 | id1 = mdio_read(dev, i, MII_PHYSID1); | ||
1364 | if (id1 == 0xffff) | ||
1365 | continue; | ||
1366 | id2 = mdio_read(dev, i, MII_PHYSID2); | ||
1367 | if (id2 == 0xffff) | ||
1368 | continue; | ||
1369 | if (i == 31 && ((chip_version + 1) & 0xfffe) == 0x2624) | ||
1370 | continue; /* 79C971 & 79C972 have phantom phy at id 31 */ | ||
1371 | lp->phycount++; | ||
1372 | lp->phymask |= (1 << i); | ||
1373 | lp->mii_if.phy_id = i; | ||
1374 | if (pcnet32_debug & NETIF_MSG_PROBE) | ||
1375 | printk(KERN_INFO PFX | ||
1376 | "Found PHY %04x:%04x at address %d.\n", | ||
1377 | id1, id2, i); | ||
1378 | } | ||
1379 | lp->a.write_bcr(ioaddr, 33, (lp->mii_if.phy_id) << 5); | ||
1380 | if (lp->phycount > 1) { | ||
1381 | lp->options |= PCNET32_PORT_MII; | ||
1382 | } | ||
1398 | } | 1383 | } |
1399 | if (pcnet32_debug & NETIF_MSG_PROBE) | 1384 | |
1400 | printk(", probed IRQ %d.\n", dev->irq); | 1385 | init_timer(&lp->watchdog_timer); |
1401 | } | 1386 | lp->watchdog_timer.data = (unsigned long)dev; |
1402 | 1387 | lp->watchdog_timer.function = (void *)&pcnet32_watchdog; | |
1403 | /* Set the mii phy_id so that we can query the link state */ | 1388 | |
1404 | if (lp->mii) | 1389 | /* The PCNET32-specific entries in the device structure. */ |
1405 | lp->mii_if.phy_id = ((lp->a.read_bcr (ioaddr, 33)) >> 5) & 0x1f; | 1390 | dev->open = &pcnet32_open; |
1406 | 1391 | dev->hard_start_xmit = &pcnet32_start_xmit; | |
1407 | init_timer (&lp->watchdog_timer); | 1392 | dev->stop = &pcnet32_close; |
1408 | lp->watchdog_timer.data = (unsigned long) dev; | 1393 | dev->get_stats = &pcnet32_get_stats; |
1409 | lp->watchdog_timer.function = (void *) &pcnet32_watchdog; | 1394 | dev->set_multicast_list = &pcnet32_set_multicast_list; |
1410 | 1395 | dev->do_ioctl = &pcnet32_ioctl; | |
1411 | /* The PCNET32-specific entries in the device structure. */ | 1396 | dev->ethtool_ops = &pcnet32_ethtool_ops; |
1412 | dev->open = &pcnet32_open; | 1397 | dev->tx_timeout = pcnet32_tx_timeout; |
1413 | dev->hard_start_xmit = &pcnet32_start_xmit; | 1398 | dev->watchdog_timeo = (5 * HZ); |
1414 | dev->stop = &pcnet32_close; | ||
1415 | dev->get_stats = &pcnet32_get_stats; | ||
1416 | dev->set_multicast_list = &pcnet32_set_multicast_list; | ||
1417 | dev->do_ioctl = &pcnet32_ioctl; | ||
1418 | dev->ethtool_ops = &pcnet32_ethtool_ops; | ||
1419 | dev->tx_timeout = pcnet32_tx_timeout; | ||
1420 | dev->watchdog_timeo = (5*HZ); | ||
1421 | 1399 | ||
1422 | #ifdef CONFIG_NET_POLL_CONTROLLER | 1400 | #ifdef CONFIG_NET_POLL_CONTROLLER |
1423 | dev->poll_controller = pcnet32_poll_controller; | 1401 | dev->poll_controller = pcnet32_poll_controller; |
1424 | #endif | 1402 | #endif |
1425 | 1403 | ||
1426 | /* Fill in the generic fields of the device structure. */ | 1404 | /* Fill in the generic fields of the device structure. */ |
1427 | if (register_netdev(dev)) | 1405 | if (register_netdev(dev)) |
1428 | goto err_free_ring; | 1406 | goto err_free_ring; |
1429 | 1407 | ||
1430 | if (pdev) { | 1408 | if (pdev) { |
1431 | pci_set_drvdata(pdev, dev); | 1409 | pci_set_drvdata(pdev, dev); |
1432 | } else { | 1410 | } else { |
1433 | lp->next = pcnet32_dev; | 1411 | lp->next = pcnet32_dev; |
1434 | pcnet32_dev = dev; | 1412 | pcnet32_dev = dev; |
1435 | } | 1413 | } |
1436 | |||
1437 | if (pcnet32_debug & NETIF_MSG_PROBE) | ||
1438 | printk(KERN_INFO "%s: registered as %s\n", dev->name, lp->name); | ||
1439 | cards_found++; | ||
1440 | |||
1441 | /* enable LED writes */ | ||
1442 | a->write_bcr(ioaddr, 2, a->read_bcr(ioaddr, 2) | 0x1000); | ||
1443 | |||
1444 | return 0; | ||
1445 | |||
1446 | err_free_ring: | ||
1447 | pcnet32_free_ring(dev); | ||
1448 | err_free_consistent: | ||
1449 | pci_free_consistent(lp->pci_dev, sizeof(*lp), lp, lp->dma_addr); | ||
1450 | err_free_netdev: | ||
1451 | free_netdev(dev); | ||
1452 | err_release_region: | ||
1453 | release_region(ioaddr, PCNET32_TOTAL_SIZE); | ||
1454 | return ret; | ||
1455 | } | ||
1456 | 1414 | ||
1415 | if (pcnet32_debug & NETIF_MSG_PROBE) | ||
1416 | printk(KERN_INFO "%s: registered as %s\n", dev->name, lp->name); | ||
1417 | cards_found++; | ||
1418 | |||
1419 | /* enable LED writes */ | ||
1420 | a->write_bcr(ioaddr, 2, a->read_bcr(ioaddr, 2) | 0x1000); | ||
1421 | |||
1422 | return 0; | ||
1423 | |||
1424 | err_free_ring: | ||
1425 | pcnet32_free_ring(dev); | ||
1426 | err_free_consistent: | ||
1427 | pci_free_consistent(lp->pci_dev, sizeof(*lp), lp, lp->dma_addr); | ||
1428 | err_free_netdev: | ||
1429 | free_netdev(dev); | ||
1430 | err_release_region: | ||
1431 | release_region(ioaddr, PCNET32_TOTAL_SIZE); | ||
1432 | return ret; | ||
1433 | } | ||
1457 | 1434 | ||
1458 | /* if any allocation fails, caller must also call pcnet32_free_ring */ | 1435 | /* if any allocation fails, caller must also call pcnet32_free_ring */ |
1459 | static int pcnet32_alloc_ring(struct net_device *dev, char *name) | 1436 | static int pcnet32_alloc_ring(struct net_device *dev, char *name) |
1460 | { | 1437 | { |
1461 | struct pcnet32_private *lp = dev->priv; | 1438 | struct pcnet32_private *lp = dev->priv; |
1462 | 1439 | ||
1463 | lp->tx_ring = pci_alloc_consistent(lp->pci_dev, | 1440 | lp->tx_ring = pci_alloc_consistent(lp->pci_dev, |
1464 | sizeof(struct pcnet32_tx_head) * lp->tx_ring_size, | 1441 | sizeof(struct pcnet32_tx_head) * |
1465 | &lp->tx_ring_dma_addr); | 1442 | lp->tx_ring_size, |
1466 | if (lp->tx_ring == NULL) { | 1443 | &lp->tx_ring_dma_addr); |
1467 | if (pcnet32_debug & NETIF_MSG_DRV) | 1444 | if (lp->tx_ring == NULL) { |
1468 | printk("\n" KERN_ERR PFX "%s: Consistent memory allocation failed.\n", | 1445 | if (pcnet32_debug & NETIF_MSG_DRV) |
1469 | name); | 1446 | printk("\n" KERN_ERR PFX |
1470 | return -ENOMEM; | 1447 | "%s: Consistent memory allocation failed.\n", |
1471 | } | 1448 | name); |
1472 | 1449 | return -ENOMEM; | |
1473 | lp->rx_ring = pci_alloc_consistent(lp->pci_dev, | 1450 | } |
1474 | sizeof(struct pcnet32_rx_head) * lp->rx_ring_size, | ||
1475 | &lp->rx_ring_dma_addr); | ||
1476 | if (lp->rx_ring == NULL) { | ||
1477 | if (pcnet32_debug & NETIF_MSG_DRV) | ||
1478 | printk("\n" KERN_ERR PFX "%s: Consistent memory allocation failed.\n", | ||
1479 | name); | ||
1480 | return -ENOMEM; | ||
1481 | } | ||
1482 | |||
1483 | lp->tx_dma_addr = kmalloc(sizeof(dma_addr_t) * lp->tx_ring_size, | ||
1484 | GFP_ATOMIC); | ||
1485 | if (!lp->tx_dma_addr) { | ||
1486 | if (pcnet32_debug & NETIF_MSG_DRV) | ||
1487 | printk("\n" KERN_ERR PFX "%s: Memory allocation failed.\n", name); | ||
1488 | return -ENOMEM; | ||
1489 | } | ||
1490 | memset(lp->tx_dma_addr, 0, sizeof(dma_addr_t) * lp->tx_ring_size); | ||
1491 | |||
1492 | lp->rx_dma_addr = kmalloc(sizeof(dma_addr_t) * lp->rx_ring_size, | ||
1493 | GFP_ATOMIC); | ||
1494 | if (!lp->rx_dma_addr) { | ||
1495 | if (pcnet32_debug & NETIF_MSG_DRV) | ||
1496 | printk("\n" KERN_ERR PFX "%s: Memory allocation failed.\n", name); | ||
1497 | return -ENOMEM; | ||
1498 | } | ||
1499 | memset(lp->rx_dma_addr, 0, sizeof(dma_addr_t) * lp->rx_ring_size); | ||
1500 | |||
1501 | lp->tx_skbuff = kmalloc(sizeof(struct sk_buff *) * lp->tx_ring_size, | ||
1502 | GFP_ATOMIC); | ||
1503 | if (!lp->tx_skbuff) { | ||
1504 | if (pcnet32_debug & NETIF_MSG_DRV) | ||
1505 | printk("\n" KERN_ERR PFX "%s: Memory allocation failed.\n", name); | ||
1506 | return -ENOMEM; | ||
1507 | } | ||
1508 | memset(lp->tx_skbuff, 0, sizeof(struct sk_buff *) * lp->tx_ring_size); | ||
1509 | |||
1510 | lp->rx_skbuff = kmalloc(sizeof(struct sk_buff *) * lp->rx_ring_size, | ||
1511 | GFP_ATOMIC); | ||
1512 | if (!lp->rx_skbuff) { | ||
1513 | if (pcnet32_debug & NETIF_MSG_DRV) | ||
1514 | printk("\n" KERN_ERR PFX "%s: Memory allocation failed.\n", name); | ||
1515 | return -ENOMEM; | ||
1516 | } | ||
1517 | memset(lp->rx_skbuff, 0, sizeof(struct sk_buff *) * lp->rx_ring_size); | ||
1518 | 1451 | ||
1519 | return 0; | 1452 | lp->rx_ring = pci_alloc_consistent(lp->pci_dev, |
1520 | } | 1453 | sizeof(struct pcnet32_rx_head) * |
1454 | lp->rx_ring_size, | ||
1455 | &lp->rx_ring_dma_addr); | ||
1456 | if (lp->rx_ring == NULL) { | ||
1457 | if (pcnet32_debug & NETIF_MSG_DRV) | ||
1458 | printk("\n" KERN_ERR PFX | ||
1459 | "%s: Consistent memory allocation failed.\n", | ||
1460 | name); | ||
1461 | return -ENOMEM; | ||
1462 | } | ||
1521 | 1463 | ||
1464 | lp->tx_dma_addr = kmalloc(sizeof(dma_addr_t) * lp->tx_ring_size, | ||
1465 | GFP_ATOMIC); | ||
1466 | if (!lp->tx_dma_addr) { | ||
1467 | if (pcnet32_debug & NETIF_MSG_DRV) | ||
1468 | printk("\n" KERN_ERR PFX | ||
1469 | "%s: Memory allocation failed.\n", name); | ||
1470 | return -ENOMEM; | ||
1471 | } | ||
1472 | memset(lp->tx_dma_addr, 0, sizeof(dma_addr_t) * lp->tx_ring_size); | ||
1473 | |||
1474 | lp->rx_dma_addr = kmalloc(sizeof(dma_addr_t) * lp->rx_ring_size, | ||
1475 | GFP_ATOMIC); | ||
1476 | if (!lp->rx_dma_addr) { | ||
1477 | if (pcnet32_debug & NETIF_MSG_DRV) | ||
1478 | printk("\n" KERN_ERR PFX | ||
1479 | "%s: Memory allocation failed.\n", name); | ||
1480 | return -ENOMEM; | ||
1481 | } | ||
1482 | memset(lp->rx_dma_addr, 0, sizeof(dma_addr_t) * lp->rx_ring_size); | ||
1483 | |||
1484 | lp->tx_skbuff = kmalloc(sizeof(struct sk_buff *) * lp->tx_ring_size, | ||
1485 | GFP_ATOMIC); | ||
1486 | if (!lp->tx_skbuff) { | ||
1487 | if (pcnet32_debug & NETIF_MSG_DRV) | ||
1488 | printk("\n" KERN_ERR PFX | ||
1489 | "%s: Memory allocation failed.\n", name); | ||
1490 | return -ENOMEM; | ||
1491 | } | ||
1492 | memset(lp->tx_skbuff, 0, sizeof(struct sk_buff *) * lp->tx_ring_size); | ||
1493 | |||
1494 | lp->rx_skbuff = kmalloc(sizeof(struct sk_buff *) * lp->rx_ring_size, | ||
1495 | GFP_ATOMIC); | ||
1496 | if (!lp->rx_skbuff) { | ||
1497 | if (pcnet32_debug & NETIF_MSG_DRV) | ||
1498 | printk("\n" KERN_ERR PFX | ||
1499 | "%s: Memory allocation failed.\n", name); | ||
1500 | return -ENOMEM; | ||
1501 | } | ||
1502 | memset(lp->rx_skbuff, 0, sizeof(struct sk_buff *) * lp->rx_ring_size); | ||
1503 | |||
1504 | return 0; | ||
1505 | } | ||
1522 | 1506 | ||
1523 | static void pcnet32_free_ring(struct net_device *dev) | 1507 | static void pcnet32_free_ring(struct net_device *dev) |
1524 | { | 1508 | { |
1525 | struct pcnet32_private *lp = dev->priv; | 1509 | struct pcnet32_private *lp = dev->priv; |
1526 | 1510 | ||
1527 | kfree(lp->tx_skbuff); | 1511 | kfree(lp->tx_skbuff); |
1528 | lp->tx_skbuff = NULL; | 1512 | lp->tx_skbuff = NULL; |
1529 | 1513 | ||
1530 | kfree(lp->rx_skbuff); | 1514 | kfree(lp->rx_skbuff); |
1531 | lp->rx_skbuff = NULL; | 1515 | lp->rx_skbuff = NULL; |
1532 | 1516 | ||
1533 | kfree(lp->tx_dma_addr); | 1517 | kfree(lp->tx_dma_addr); |
1534 | lp->tx_dma_addr = NULL; | 1518 | lp->tx_dma_addr = NULL; |
1535 | 1519 | ||
1536 | kfree(lp->rx_dma_addr); | 1520 | kfree(lp->rx_dma_addr); |
1537 | lp->rx_dma_addr = NULL; | 1521 | lp->rx_dma_addr = NULL; |
1538 | 1522 | ||
1539 | if (lp->tx_ring) { | 1523 | if (lp->tx_ring) { |
1540 | pci_free_consistent(lp->pci_dev, sizeof(struct pcnet32_tx_head) * lp->tx_ring_size, | 1524 | pci_free_consistent(lp->pci_dev, |
1541 | lp->tx_ring, lp->tx_ring_dma_addr); | 1525 | sizeof(struct pcnet32_tx_head) * |
1542 | lp->tx_ring = NULL; | 1526 | lp->tx_ring_size, lp->tx_ring, |
1543 | } | 1527 | lp->tx_ring_dma_addr); |
1528 | lp->tx_ring = NULL; | ||
1529 | } | ||
1544 | 1530 | ||
1545 | if (lp->rx_ring) { | 1531 | if (lp->rx_ring) { |
1546 | pci_free_consistent(lp->pci_dev, sizeof(struct pcnet32_rx_head) * lp->rx_ring_size, | 1532 | pci_free_consistent(lp->pci_dev, |
1547 | lp->rx_ring, lp->rx_ring_dma_addr); | 1533 | sizeof(struct pcnet32_rx_head) * |
1548 | lp->rx_ring = NULL; | 1534 | lp->rx_ring_size, lp->rx_ring, |
1549 | } | 1535 | lp->rx_ring_dma_addr); |
1536 | lp->rx_ring = NULL; | ||
1537 | } | ||
1550 | } | 1538 | } |
1551 | 1539 | ||
1552 | 1540 | static int pcnet32_open(struct net_device *dev) | |
1553 | static int | ||
1554 | pcnet32_open(struct net_device *dev) | ||
1555 | { | 1541 | { |
1556 | struct pcnet32_private *lp = dev->priv; | 1542 | struct pcnet32_private *lp = dev->priv; |
1557 | unsigned long ioaddr = dev->base_addr; | 1543 | unsigned long ioaddr = dev->base_addr; |
1558 | u16 val; | 1544 | u16 val; |
1559 | int i; | 1545 | int i; |
1560 | int rc; | 1546 | int rc; |
1561 | unsigned long flags; | 1547 | unsigned long flags; |
1562 | 1548 | ||
1563 | if (request_irq(dev->irq, &pcnet32_interrupt, | 1549 | if (request_irq(dev->irq, &pcnet32_interrupt, |
1564 | lp->shared_irq ? SA_SHIRQ : 0, dev->name, (void *)dev)) { | 1550 | lp->shared_irq ? SA_SHIRQ : 0, dev->name, |
1565 | return -EAGAIN; | 1551 | (void *)dev)) { |
1566 | } | 1552 | return -EAGAIN; |
1567 | 1553 | } | |
1568 | spin_lock_irqsave(&lp->lock, flags); | 1554 | |
1569 | /* Check for a valid station address */ | 1555 | spin_lock_irqsave(&lp->lock, flags); |
1570 | if (!is_valid_ether_addr(dev->dev_addr)) { | 1556 | /* Check for a valid station address */ |
1571 | rc = -EINVAL; | 1557 | if (!is_valid_ether_addr(dev->dev_addr)) { |
1572 | goto err_free_irq; | 1558 | rc = -EINVAL; |
1573 | } | 1559 | goto err_free_irq; |
1574 | 1560 | } | |
1575 | /* Reset the PCNET32 */ | 1561 | |
1576 | lp->a.reset (ioaddr); | 1562 | /* Reset the PCNET32 */ |
1577 | 1563 | lp->a.reset(ioaddr); | |
1578 | /* switch pcnet32 to 32bit mode */ | 1564 | |
1579 | lp->a.write_bcr (ioaddr, 20, 2); | 1565 | /* switch pcnet32 to 32bit mode */ |
1580 | 1566 | lp->a.write_bcr(ioaddr, 20, 2); | |
1581 | if (netif_msg_ifup(lp)) | 1567 | |
1582 | printk(KERN_DEBUG "%s: pcnet32_open() irq %d tx/rx rings %#x/%#x init %#x.\n", | 1568 | if (netif_msg_ifup(lp)) |
1583 | dev->name, dev->irq, | 1569 | printk(KERN_DEBUG |
1584 | (u32) (lp->tx_ring_dma_addr), | 1570 | "%s: pcnet32_open() irq %d tx/rx rings %#x/%#x init %#x.\n", |
1585 | (u32) (lp->rx_ring_dma_addr), | 1571 | dev->name, dev->irq, (u32) (lp->tx_ring_dma_addr), |
1586 | (u32) (lp->dma_addr + offsetof(struct pcnet32_private, init_block))); | 1572 | (u32) (lp->rx_ring_dma_addr), |
1587 | 1573 | (u32) (lp->dma_addr + | |
1588 | /* set/reset autoselect bit */ | 1574 | offsetof(struct pcnet32_private, init_block))); |
1589 | val = lp->a.read_bcr (ioaddr, 2) & ~2; | 1575 | |
1590 | if (lp->options & PCNET32_PORT_ASEL) | 1576 | /* set/reset autoselect bit */ |
1591 | val |= 2; | 1577 | val = lp->a.read_bcr(ioaddr, 2) & ~2; |
1592 | lp->a.write_bcr (ioaddr, 2, val); | 1578 | if (lp->options & PCNET32_PORT_ASEL) |
1593 | |||
1594 | /* handle full duplex setting */ | ||
1595 | if (lp->mii_if.full_duplex) { | ||
1596 | val = lp->a.read_bcr (ioaddr, 9) & ~3; | ||
1597 | if (lp->options & PCNET32_PORT_FD) { | ||
1598 | val |= 1; | ||
1599 | if (lp->options == (PCNET32_PORT_FD | PCNET32_PORT_AUI)) | ||
1600 | val |= 2; | 1579 | val |= 2; |
1601 | } else if (lp->options & PCNET32_PORT_ASEL) { | 1580 | lp->a.write_bcr(ioaddr, 2, val); |
1602 | /* workaround of xSeries250, turn on for 79C975 only */ | 1581 | |
1603 | i = ((lp->a.read_csr(ioaddr, 88) | | 1582 | /* handle full duplex setting */ |
1604 | (lp->a.read_csr(ioaddr,89) << 16)) >> 12) & 0xffff; | 1583 | if (lp->mii_if.full_duplex) { |
1605 | if (i == 0x2627) | 1584 | val = lp->a.read_bcr(ioaddr, 9) & ~3; |
1606 | val |= 3; | 1585 | if (lp->options & PCNET32_PORT_FD) { |
1607 | } | 1586 | val |= 1; |
1608 | lp->a.write_bcr (ioaddr, 9, val); | 1587 | if (lp->options == (PCNET32_PORT_FD | PCNET32_PORT_AUI)) |
1609 | } | 1588 | val |= 2; |
1610 | 1589 | } else if (lp->options & PCNET32_PORT_ASEL) { | |
1611 | /* set/reset GPSI bit in test register */ | 1590 | /* workaround of xSeries250, turn on for 79C975 only */ |
1612 | val = lp->a.read_csr (ioaddr, 124) & ~0x10; | 1591 | i = ((lp->a.read_csr(ioaddr, 88) | |
1613 | if ((lp->options & PCNET32_PORT_PORTSEL) == PCNET32_PORT_GPSI) | 1592 | (lp->a. |
1614 | val |= 0x10; | 1593 | read_csr(ioaddr, 89) << 16)) >> 12) & 0xffff; |
1615 | lp->a.write_csr (ioaddr, 124, val); | 1594 | if (i == 0x2627) |
1616 | 1595 | val |= 3; | |
1617 | /* Allied Telesyn AT 2700/2701 FX are 100Mbit only and do not negotiate */ | 1596 | } |
1618 | if (lp->pci_dev->subsystem_vendor == PCI_VENDOR_ID_AT && | 1597 | lp->a.write_bcr(ioaddr, 9, val); |
1598 | } | ||
1599 | |||
1600 | /* set/reset GPSI bit in test register */ | ||
1601 | val = lp->a.read_csr(ioaddr, 124) & ~0x10; | ||
1602 | if ((lp->options & PCNET32_PORT_PORTSEL) == PCNET32_PORT_GPSI) | ||
1603 | val |= 0x10; | ||
1604 | lp->a.write_csr(ioaddr, 124, val); | ||
1605 | |||
1606 | /* Allied Telesyn AT 2700/2701 FX are 100Mbit only and do not negotiate */ | ||
1607 | if (lp->pci_dev->subsystem_vendor == PCI_VENDOR_ID_AT && | ||
1619 | (lp->pci_dev->subsystem_device == PCI_SUBDEVICE_ID_AT_2700FX || | 1608 | (lp->pci_dev->subsystem_device == PCI_SUBDEVICE_ID_AT_2700FX || |
1620 | lp->pci_dev->subsystem_device == PCI_SUBDEVICE_ID_AT_2701FX)) { | 1609 | lp->pci_dev->subsystem_device == PCI_SUBDEVICE_ID_AT_2701FX)) { |
1621 | if (lp->options & PCNET32_PORT_ASEL) { | 1610 | if (lp->options & PCNET32_PORT_ASEL) { |
1622 | lp->options = PCNET32_PORT_FD | PCNET32_PORT_100; | 1611 | lp->options = PCNET32_PORT_FD | PCNET32_PORT_100; |
1623 | if (netif_msg_link(lp)) | 1612 | if (netif_msg_link(lp)) |
1624 | printk(KERN_DEBUG "%s: Setting 100Mb-Full Duplex.\n", | 1613 | printk(KERN_DEBUG |
1625 | dev->name); | 1614 | "%s: Setting 100Mb-Full Duplex.\n", |
1626 | } | 1615 | dev->name); |
1627 | } | 1616 | } |
1628 | { | 1617 | } |
1629 | /* | 1618 | if (lp->phycount < 2) { |
1630 | * 24 Jun 2004 according AMD, in order to change the PHY, | 1619 | /* |
1631 | * DANAS (or DISPM for 79C976) must be set; then select the speed, | 1620 | * 24 Jun 2004 according AMD, in order to change the PHY, |
1632 | * duplex, and/or enable auto negotiation, and clear DANAS | 1621 | * DANAS (or DISPM for 79C976) must be set; then select the speed, |
1633 | */ | 1622 | * duplex, and/or enable auto negotiation, and clear DANAS |
1634 | if (lp->mii && !(lp->options & PCNET32_PORT_ASEL)) { | 1623 | */ |
1635 | lp->a.write_bcr(ioaddr, 32, | 1624 | if (lp->mii && !(lp->options & PCNET32_PORT_ASEL)) { |
1636 | lp->a.read_bcr(ioaddr, 32) | 0x0080); | 1625 | lp->a.write_bcr(ioaddr, 32, |
1637 | /* disable Auto Negotiation, set 10Mpbs, HD */ | 1626 | lp->a.read_bcr(ioaddr, 32) | 0x0080); |
1638 | val = lp->a.read_bcr(ioaddr, 32) & ~0xb8; | 1627 | /* disable Auto Negotiation, set 10Mpbs, HD */ |
1639 | if (lp->options & PCNET32_PORT_FD) | 1628 | val = lp->a.read_bcr(ioaddr, 32) & ~0xb8; |
1640 | val |= 0x10; | 1629 | if (lp->options & PCNET32_PORT_FD) |
1641 | if (lp->options & PCNET32_PORT_100) | 1630 | val |= 0x10; |
1642 | val |= 0x08; | 1631 | if (lp->options & PCNET32_PORT_100) |
1643 | lp->a.write_bcr (ioaddr, 32, val); | 1632 | val |= 0x08; |
1633 | lp->a.write_bcr(ioaddr, 32, val); | ||
1634 | } else { | ||
1635 | if (lp->options & PCNET32_PORT_ASEL) { | ||
1636 | lp->a.write_bcr(ioaddr, 32, | ||
1637 | lp->a.read_bcr(ioaddr, | ||
1638 | 32) | 0x0080); | ||
1639 | /* enable auto negotiate, setup, disable fd */ | ||
1640 | val = lp->a.read_bcr(ioaddr, 32) & ~0x98; | ||
1641 | val |= 0x20; | ||
1642 | lp->a.write_bcr(ioaddr, 32, val); | ||
1643 | } | ||
1644 | } | ||
1644 | } else { | 1645 | } else { |
1645 | if (lp->options & PCNET32_PORT_ASEL) { | 1646 | int first_phy = -1; |
1646 | lp->a.write_bcr(ioaddr, 32, | 1647 | u16 bmcr; |
1647 | lp->a.read_bcr(ioaddr, 32) | 0x0080); | 1648 | u32 bcr9; |
1648 | /* enable auto negotiate, setup, disable fd */ | 1649 | struct ethtool_cmd ecmd; |
1649 | val = lp->a.read_bcr(ioaddr, 32) & ~0x98; | 1650 | |
1650 | val |= 0x20; | 1651 | /* |
1651 | lp->a.write_bcr(ioaddr, 32, val); | 1652 | * There is really no good other way to handle multiple PHYs |
1652 | } | 1653 | * other than turning off all automatics |
1654 | */ | ||
1655 | val = lp->a.read_bcr(ioaddr, 2); | ||
1656 | lp->a.write_bcr(ioaddr, 2, val & ~2); | ||
1657 | val = lp->a.read_bcr(ioaddr, 32); | ||
1658 | lp->a.write_bcr(ioaddr, 32, val & ~(1 << 7)); /* stop MII manager */ | ||
1659 | |||
1660 | if (!(lp->options & PCNET32_PORT_ASEL)) { | ||
1661 | /* setup ecmd */ | ||
1662 | ecmd.port = PORT_MII; | ||
1663 | ecmd.transceiver = XCVR_INTERNAL; | ||
1664 | ecmd.autoneg = AUTONEG_DISABLE; | ||
1665 | ecmd.speed = | ||
1666 | lp-> | ||
1667 | options & PCNET32_PORT_100 ? SPEED_100 : SPEED_10; | ||
1668 | bcr9 = lp->a.read_bcr(ioaddr, 9); | ||
1669 | |||
1670 | if (lp->options & PCNET32_PORT_FD) { | ||
1671 | ecmd.duplex = DUPLEX_FULL; | ||
1672 | bcr9 |= (1 << 0); | ||
1673 | } else { | ||
1674 | ecmd.duplex = DUPLEX_HALF; | ||
1675 | bcr9 |= ~(1 << 0); | ||
1676 | } | ||
1677 | lp->a.write_bcr(ioaddr, 9, bcr9); | ||
1678 | } | ||
1679 | |||
1680 | for (i = 0; i < PCNET32_MAX_PHYS; i++) { | ||
1681 | if (lp->phymask & (1 << i)) { | ||
1682 | /* isolate all but the first PHY */ | ||
1683 | bmcr = mdio_read(dev, i, MII_BMCR); | ||
1684 | if (first_phy == -1) { | ||
1685 | first_phy = i; | ||
1686 | mdio_write(dev, i, MII_BMCR, | ||
1687 | bmcr & ~BMCR_ISOLATE); | ||
1688 | } else { | ||
1689 | mdio_write(dev, i, MII_BMCR, | ||
1690 | bmcr | BMCR_ISOLATE); | ||
1691 | } | ||
1692 | /* use mii_ethtool_sset to setup PHY */ | ||
1693 | lp->mii_if.phy_id = i; | ||
1694 | ecmd.phy_address = i; | ||
1695 | if (lp->options & PCNET32_PORT_ASEL) { | ||
1696 | mii_ethtool_gset(&lp->mii_if, &ecmd); | ||
1697 | ecmd.autoneg = AUTONEG_ENABLE; | ||
1698 | } | ||
1699 | mii_ethtool_sset(&lp->mii_if, &ecmd); | ||
1700 | } | ||
1701 | } | ||
1702 | lp->mii_if.phy_id = first_phy; | ||
1703 | if (netif_msg_link(lp)) | ||
1704 | printk(KERN_INFO "%s: Using PHY number %d.\n", | ||
1705 | dev->name, first_phy); | ||
1653 | } | 1706 | } |
1654 | } | ||
1655 | 1707 | ||
1656 | #ifdef DO_DXSUFLO | 1708 | #ifdef DO_DXSUFLO |
1657 | if (lp->dxsuflo) { /* Disable transmit stop on underflow */ | 1709 | if (lp->dxsuflo) { /* Disable transmit stop on underflow */ |
1658 | val = lp->a.read_csr (ioaddr, 3); | 1710 | val = lp->a.read_csr(ioaddr, 3); |
1659 | val |= 0x40; | 1711 | val |= 0x40; |
1660 | lp->a.write_csr (ioaddr, 3, val); | 1712 | lp->a.write_csr(ioaddr, 3, val); |
1661 | } | 1713 | } |
1662 | #endif | 1714 | #endif |
1663 | 1715 | ||
1664 | lp->init_block.mode = le16_to_cpu((lp->options & PCNET32_PORT_PORTSEL) << 7); | 1716 | lp->init_block.mode = |
1665 | pcnet32_load_multicast(dev); | 1717 | le16_to_cpu((lp->options & PCNET32_PORT_PORTSEL) << 7); |
1666 | 1718 | pcnet32_load_multicast(dev); | |
1667 | if (pcnet32_init_ring(dev)) { | 1719 | |
1668 | rc = -ENOMEM; | 1720 | if (pcnet32_init_ring(dev)) { |
1669 | goto err_free_ring; | 1721 | rc = -ENOMEM; |
1670 | } | 1722 | goto err_free_ring; |
1671 | 1723 | } | |
1672 | /* Re-initialize the PCNET32, and start it when done. */ | 1724 | |
1673 | lp->a.write_csr (ioaddr, 1, (lp->dma_addr + | 1725 | /* Re-initialize the PCNET32, and start it when done. */ |
1674 | offsetof(struct pcnet32_private, init_block)) & 0xffff); | 1726 | lp->a.write_csr(ioaddr, 1, (lp->dma_addr + |
1675 | lp->a.write_csr (ioaddr, 2, (lp->dma_addr + | 1727 | offsetof(struct pcnet32_private, |
1676 | offsetof(struct pcnet32_private, init_block)) >> 16); | 1728 | init_block)) & 0xffff); |
1677 | 1729 | lp->a.write_csr(ioaddr, 2, | |
1678 | lp->a.write_csr (ioaddr, 4, 0x0915); | 1730 | (lp->dma_addr + |
1679 | lp->a.write_csr (ioaddr, 0, 0x0001); | 1731 | offsetof(struct pcnet32_private, init_block)) >> 16); |
1680 | 1732 | ||
1681 | netif_start_queue(dev); | 1733 | lp->a.write_csr(ioaddr, 4, 0x0915); |
1682 | 1734 | lp->a.write_csr(ioaddr, 0, 0x0001); | |
1683 | /* If we have mii, print the link status and start the watchdog */ | 1735 | |
1684 | if (lp->mii) { | 1736 | netif_start_queue(dev); |
1685 | mii_check_media (&lp->mii_if, netif_msg_link(lp), 1); | 1737 | |
1686 | mod_timer (&(lp->watchdog_timer), PCNET32_WATCHDOG_TIMEOUT); | 1738 | /* Print the link status and start the watchdog */ |
1687 | } | 1739 | pcnet32_check_media(dev, 1); |
1688 | 1740 | mod_timer(&(lp->watchdog_timer), PCNET32_WATCHDOG_TIMEOUT); | |
1689 | i = 0; | 1741 | |
1690 | while (i++ < 100) | 1742 | i = 0; |
1691 | if (lp->a.read_csr (ioaddr, 0) & 0x0100) | 1743 | while (i++ < 100) |
1692 | break; | 1744 | if (lp->a.read_csr(ioaddr, 0) & 0x0100) |
1693 | /* | 1745 | break; |
1694 | * We used to clear the InitDone bit, 0x0100, here but Mark Stockton | 1746 | /* |
1695 | * reports that doing so triggers a bug in the '974. | 1747 | * We used to clear the InitDone bit, 0x0100, here but Mark Stockton |
1696 | */ | 1748 | * reports that doing so triggers a bug in the '974. |
1697 | lp->a.write_csr (ioaddr, 0, 0x0042); | 1749 | */ |
1698 | 1750 | lp->a.write_csr(ioaddr, 0, 0x0042); | |
1699 | if (netif_msg_ifup(lp)) | 1751 | |
1700 | printk(KERN_DEBUG "%s: pcnet32 open after %d ticks, init block %#x csr0 %4.4x.\n", | 1752 | if (netif_msg_ifup(lp)) |
1701 | dev->name, i, (u32) (lp->dma_addr + | 1753 | printk(KERN_DEBUG |
1702 | offsetof(struct pcnet32_private, init_block)), | 1754 | "%s: pcnet32 open after %d ticks, init block %#x csr0 %4.4x.\n", |
1703 | lp->a.read_csr(ioaddr, 0)); | 1755 | dev->name, i, |
1704 | 1756 | (u32) (lp->dma_addr + | |
1705 | spin_unlock_irqrestore(&lp->lock, flags); | 1757 | offsetof(struct pcnet32_private, init_block)), |
1706 | 1758 | lp->a.read_csr(ioaddr, 0)); | |
1707 | return 0; /* Always succeed */ | 1759 | |
1708 | 1760 | spin_unlock_irqrestore(&lp->lock, flags); | |
1709 | err_free_ring: | 1761 | |
1710 | /* free any allocated skbuffs */ | 1762 | return 0; /* Always succeed */ |
1711 | for (i = 0; i < lp->rx_ring_size; i++) { | 1763 | |
1712 | lp->rx_ring[i].status = 0; | 1764 | err_free_ring: |
1713 | if (lp->rx_skbuff[i]) { | 1765 | /* free any allocated skbuffs */ |
1714 | pci_unmap_single(lp->pci_dev, lp->rx_dma_addr[i], PKT_BUF_SZ-2, | 1766 | for (i = 0; i < lp->rx_ring_size; i++) { |
1715 | PCI_DMA_FROMDEVICE); | 1767 | lp->rx_ring[i].status = 0; |
1716 | dev_kfree_skb(lp->rx_skbuff[i]); | 1768 | if (lp->rx_skbuff[i]) { |
1717 | } | 1769 | pci_unmap_single(lp->pci_dev, lp->rx_dma_addr[i], |
1718 | lp->rx_skbuff[i] = NULL; | 1770 | PKT_BUF_SZ - 2, PCI_DMA_FROMDEVICE); |
1719 | lp->rx_dma_addr[i] = 0; | 1771 | dev_kfree_skb(lp->rx_skbuff[i]); |
1720 | } | 1772 | } |
1721 | 1773 | lp->rx_skbuff[i] = NULL; | |
1722 | pcnet32_free_ring(dev); | 1774 | lp->rx_dma_addr[i] = 0; |
1723 | 1775 | } | |
1724 | /* | 1776 | |
1725 | * Switch back to 16bit mode to avoid problems with dumb | 1777 | pcnet32_free_ring(dev); |
1726 | * DOS packet driver after a warm reboot | 1778 | |
1727 | */ | 1779 | /* |
1728 | lp->a.write_bcr (ioaddr, 20, 4); | 1780 | * Switch back to 16bit mode to avoid problems with dumb |
1729 | 1781 | * DOS packet driver after a warm reboot | |
1730 | err_free_irq: | 1782 | */ |
1731 | spin_unlock_irqrestore(&lp->lock, flags); | 1783 | lp->a.write_bcr(ioaddr, 20, 4); |
1732 | free_irq(dev->irq, dev); | 1784 | |
1733 | return rc; | 1785 | err_free_irq: |
1786 | spin_unlock_irqrestore(&lp->lock, flags); | ||
1787 | free_irq(dev->irq, dev); | ||
1788 | return rc; | ||
1734 | } | 1789 | } |
1735 | 1790 | ||
1736 | /* | 1791 | /* |
@@ -1746,727 +1801,893 @@ err_free_irq: | |||
1746 | * restarting the chip, but I'm too lazy to do so right now. dplatt@3do.com | 1801 | * restarting the chip, but I'm too lazy to do so right now. dplatt@3do.com |
1747 | */ | 1802 | */ |
1748 | 1803 | ||
1749 | static void | 1804 | static void pcnet32_purge_tx_ring(struct net_device *dev) |
1750 | pcnet32_purge_tx_ring(struct net_device *dev) | ||
1751 | { | 1805 | { |
1752 | struct pcnet32_private *lp = dev->priv; | 1806 | struct pcnet32_private *lp = dev->priv; |
1753 | int i; | 1807 | int i; |
1754 | |||
1755 | for (i = 0; i < lp->tx_ring_size; i++) { | ||
1756 | lp->tx_ring[i].status = 0; /* CPU owns buffer */ | ||
1757 | wmb(); /* Make sure adapter sees owner change */ | ||
1758 | if (lp->tx_skbuff[i]) { | ||
1759 | pci_unmap_single(lp->pci_dev, lp->tx_dma_addr[i], | ||
1760 | lp->tx_skbuff[i]->len, PCI_DMA_TODEVICE); | ||
1761 | dev_kfree_skb_any(lp->tx_skbuff[i]); | ||
1762 | } | ||
1763 | lp->tx_skbuff[i] = NULL; | ||
1764 | lp->tx_dma_addr[i] = 0; | ||
1765 | } | ||
1766 | } | ||
1767 | 1808 | ||
1809 | for (i = 0; i < lp->tx_ring_size; i++) { | ||
1810 | lp->tx_ring[i].status = 0; /* CPU owns buffer */ | ||
1811 | wmb(); /* Make sure adapter sees owner change */ | ||
1812 | if (lp->tx_skbuff[i]) { | ||
1813 | pci_unmap_single(lp->pci_dev, lp->tx_dma_addr[i], | ||
1814 | lp->tx_skbuff[i]->len, | ||
1815 | PCI_DMA_TODEVICE); | ||
1816 | dev_kfree_skb_any(lp->tx_skbuff[i]); | ||
1817 | } | ||
1818 | lp->tx_skbuff[i] = NULL; | ||
1819 | lp->tx_dma_addr[i] = 0; | ||
1820 | } | ||
1821 | } | ||
1768 | 1822 | ||
1769 | /* Initialize the PCNET32 Rx and Tx rings. */ | 1823 | /* Initialize the PCNET32 Rx and Tx rings. */ |
1770 | static int | 1824 | static int pcnet32_init_ring(struct net_device *dev) |
1771 | pcnet32_init_ring(struct net_device *dev) | ||
1772 | { | 1825 | { |
1773 | struct pcnet32_private *lp = dev->priv; | 1826 | struct pcnet32_private *lp = dev->priv; |
1774 | int i; | 1827 | int i; |
1775 | 1828 | ||
1776 | lp->tx_full = 0; | 1829 | lp->tx_full = 0; |
1777 | lp->cur_rx = lp->cur_tx = 0; | 1830 | lp->cur_rx = lp->cur_tx = 0; |
1778 | lp->dirty_rx = lp->dirty_tx = 0; | 1831 | lp->dirty_rx = lp->dirty_tx = 0; |
1779 | 1832 | ||
1780 | for (i = 0; i < lp->rx_ring_size; i++) { | 1833 | for (i = 0; i < lp->rx_ring_size; i++) { |
1781 | struct sk_buff *rx_skbuff = lp->rx_skbuff[i]; | 1834 | struct sk_buff *rx_skbuff = lp->rx_skbuff[i]; |
1782 | if (rx_skbuff == NULL) { | 1835 | if (rx_skbuff == NULL) { |
1783 | if (!(rx_skbuff = lp->rx_skbuff[i] = dev_alloc_skb (PKT_BUF_SZ))) { | 1836 | if (! |
1784 | /* there is not much, we can do at this point */ | 1837 | (rx_skbuff = lp->rx_skbuff[i] = |
1785 | if (pcnet32_debug & NETIF_MSG_DRV) | 1838 | dev_alloc_skb(PKT_BUF_SZ))) { |
1786 | printk(KERN_ERR "%s: pcnet32_init_ring dev_alloc_skb failed.\n", | 1839 | /* there is not much, we can do at this point */ |
1787 | dev->name); | 1840 | if (pcnet32_debug & NETIF_MSG_DRV) |
1788 | return -1; | 1841 | printk(KERN_ERR |
1789 | } | 1842 | "%s: pcnet32_init_ring dev_alloc_skb failed.\n", |
1790 | skb_reserve (rx_skbuff, 2); | 1843 | dev->name); |
1791 | } | 1844 | return -1; |
1792 | 1845 | } | |
1793 | rmb(); | 1846 | skb_reserve(rx_skbuff, 2); |
1794 | if (lp->rx_dma_addr[i] == 0) | 1847 | } |
1795 | lp->rx_dma_addr[i] = pci_map_single(lp->pci_dev, rx_skbuff->data, | 1848 | |
1796 | PKT_BUF_SZ-2, PCI_DMA_FROMDEVICE); | 1849 | rmb(); |
1797 | lp->rx_ring[i].base = (u32)le32_to_cpu(lp->rx_dma_addr[i]); | 1850 | if (lp->rx_dma_addr[i] == 0) |
1798 | lp->rx_ring[i].buf_length = le16_to_cpu(2-PKT_BUF_SZ); | 1851 | lp->rx_dma_addr[i] = |
1799 | wmb(); /* Make sure owner changes after all others are visible */ | 1852 | pci_map_single(lp->pci_dev, rx_skbuff->data, |
1800 | lp->rx_ring[i].status = le16_to_cpu(0x8000); | 1853 | PKT_BUF_SZ - 2, PCI_DMA_FROMDEVICE); |
1801 | } | 1854 | lp->rx_ring[i].base = (u32) le32_to_cpu(lp->rx_dma_addr[i]); |
1802 | /* The Tx buffer address is filled in as needed, but we do need to clear | 1855 | lp->rx_ring[i].buf_length = le16_to_cpu(2 - PKT_BUF_SZ); |
1803 | * the upper ownership bit. */ | 1856 | wmb(); /* Make sure owner changes after all others are visible */ |
1804 | for (i = 0; i < lp->tx_ring_size; i++) { | 1857 | lp->rx_ring[i].status = le16_to_cpu(0x8000); |
1805 | lp->tx_ring[i].status = 0; /* CPU owns buffer */ | 1858 | } |
1806 | wmb(); /* Make sure adapter sees owner change */ | 1859 | /* The Tx buffer address is filled in as needed, but we do need to clear |
1807 | lp->tx_ring[i].base = 0; | 1860 | * the upper ownership bit. */ |
1808 | lp->tx_dma_addr[i] = 0; | 1861 | for (i = 0; i < lp->tx_ring_size; i++) { |
1809 | } | 1862 | lp->tx_ring[i].status = 0; /* CPU owns buffer */ |
1810 | 1863 | wmb(); /* Make sure adapter sees owner change */ | |
1811 | lp->init_block.tlen_rlen = le16_to_cpu(lp->tx_len_bits | lp->rx_len_bits); | 1864 | lp->tx_ring[i].base = 0; |
1812 | for (i = 0; i < 6; i++) | 1865 | lp->tx_dma_addr[i] = 0; |
1813 | lp->init_block.phys_addr[i] = dev->dev_addr[i]; | 1866 | } |
1814 | lp->init_block.rx_ring = (u32)le32_to_cpu(lp->rx_ring_dma_addr); | 1867 | |
1815 | lp->init_block.tx_ring = (u32)le32_to_cpu(lp->tx_ring_dma_addr); | 1868 | lp->init_block.tlen_rlen = |
1816 | wmb(); /* Make sure all changes are visible */ | 1869 | le16_to_cpu(lp->tx_len_bits | lp->rx_len_bits); |
1817 | return 0; | 1870 | for (i = 0; i < 6; i++) |
1871 | lp->init_block.phys_addr[i] = dev->dev_addr[i]; | ||
1872 | lp->init_block.rx_ring = (u32) le32_to_cpu(lp->rx_ring_dma_addr); | ||
1873 | lp->init_block.tx_ring = (u32) le32_to_cpu(lp->tx_ring_dma_addr); | ||
1874 | wmb(); /* Make sure all changes are visible */ | ||
1875 | return 0; | ||
1818 | } | 1876 | } |
1819 | 1877 | ||
1820 | /* the pcnet32 has been issued a stop or reset. Wait for the stop bit | 1878 | /* the pcnet32 has been issued a stop or reset. Wait for the stop bit |
1821 | * then flush the pending transmit operations, re-initialize the ring, | 1879 | * then flush the pending transmit operations, re-initialize the ring, |
1822 | * and tell the chip to initialize. | 1880 | * and tell the chip to initialize. |
1823 | */ | 1881 | */ |
1824 | static void | 1882 | static void pcnet32_restart(struct net_device *dev, unsigned int csr0_bits) |
1825 | pcnet32_restart(struct net_device *dev, unsigned int csr0_bits) | ||
1826 | { | 1883 | { |
1827 | struct pcnet32_private *lp = dev->priv; | 1884 | struct pcnet32_private *lp = dev->priv; |
1828 | unsigned long ioaddr = dev->base_addr; | 1885 | unsigned long ioaddr = dev->base_addr; |
1829 | int i; | 1886 | int i; |
1830 | 1887 | ||
1831 | /* wait for stop */ | 1888 | /* wait for stop */ |
1832 | for (i=0; i<100; i++) | 1889 | for (i = 0; i < 100; i++) |
1833 | if (lp->a.read_csr(ioaddr, 0) & 0x0004) | 1890 | if (lp->a.read_csr(ioaddr, 0) & 0x0004) |
1834 | break; | 1891 | break; |
1835 | 1892 | ||
1836 | if (i >= 100 && netif_msg_drv(lp)) | 1893 | if (i >= 100 && netif_msg_drv(lp)) |
1837 | printk(KERN_ERR "%s: pcnet32_restart timed out waiting for stop.\n", | 1894 | printk(KERN_ERR |
1838 | dev->name); | 1895 | "%s: pcnet32_restart timed out waiting for stop.\n", |
1896 | dev->name); | ||
1839 | 1897 | ||
1840 | pcnet32_purge_tx_ring(dev); | 1898 | pcnet32_purge_tx_ring(dev); |
1841 | if (pcnet32_init_ring(dev)) | 1899 | if (pcnet32_init_ring(dev)) |
1842 | return; | 1900 | return; |
1843 | 1901 | ||
1844 | /* ReInit Ring */ | 1902 | /* ReInit Ring */ |
1845 | lp->a.write_csr (ioaddr, 0, 1); | 1903 | lp->a.write_csr(ioaddr, 0, 1); |
1846 | i = 0; | 1904 | i = 0; |
1847 | while (i++ < 1000) | 1905 | while (i++ < 1000) |
1848 | if (lp->a.read_csr (ioaddr, 0) & 0x0100) | 1906 | if (lp->a.read_csr(ioaddr, 0) & 0x0100) |
1849 | break; | 1907 | break; |
1850 | 1908 | ||
1851 | lp->a.write_csr (ioaddr, 0, csr0_bits); | 1909 | lp->a.write_csr(ioaddr, 0, csr0_bits); |
1852 | } | 1910 | } |
1853 | 1911 | ||
1854 | 1912 | static void pcnet32_tx_timeout(struct net_device *dev) | |
1855 | static void | ||
1856 | pcnet32_tx_timeout (struct net_device *dev) | ||
1857 | { | 1913 | { |
1858 | struct pcnet32_private *lp = dev->priv; | 1914 | struct pcnet32_private *lp = dev->priv; |
1859 | unsigned long ioaddr = dev->base_addr, flags; | 1915 | unsigned long ioaddr = dev->base_addr, flags; |
1860 | 1916 | ||
1861 | spin_lock_irqsave(&lp->lock, flags); | 1917 | spin_lock_irqsave(&lp->lock, flags); |
1862 | /* Transmitter timeout, serious problems. */ | 1918 | /* Transmitter timeout, serious problems. */ |
1863 | if (pcnet32_debug & NETIF_MSG_DRV) | 1919 | if (pcnet32_debug & NETIF_MSG_DRV) |
1864 | printk(KERN_ERR "%s: transmit timed out, status %4.4x, resetting.\n", | 1920 | printk(KERN_ERR |
1865 | dev->name, lp->a.read_csr(ioaddr, 0)); | 1921 | "%s: transmit timed out, status %4.4x, resetting.\n", |
1866 | lp->a.write_csr (ioaddr, 0, 0x0004); | 1922 | dev->name, lp->a.read_csr(ioaddr, 0)); |
1867 | lp->stats.tx_errors++; | 1923 | lp->a.write_csr(ioaddr, 0, 0x0004); |
1868 | if (netif_msg_tx_err(lp)) { | 1924 | lp->stats.tx_errors++; |
1869 | int i; | 1925 | if (netif_msg_tx_err(lp)) { |
1870 | printk(KERN_DEBUG " Ring data dump: dirty_tx %d cur_tx %d%s cur_rx %d.", | 1926 | int i; |
1871 | lp->dirty_tx, lp->cur_tx, lp->tx_full ? " (full)" : "", | 1927 | printk(KERN_DEBUG |
1872 | lp->cur_rx); | 1928 | " Ring data dump: dirty_tx %d cur_tx %d%s cur_rx %d.", |
1873 | for (i = 0 ; i < lp->rx_ring_size; i++) | 1929 | lp->dirty_tx, lp->cur_tx, lp->tx_full ? " (full)" : "", |
1874 | printk("%s %08x %04x %08x %04x", i & 1 ? "" : "\n ", | 1930 | lp->cur_rx); |
1875 | le32_to_cpu(lp->rx_ring[i].base), | 1931 | for (i = 0; i < lp->rx_ring_size; i++) |
1876 | (-le16_to_cpu(lp->rx_ring[i].buf_length)) & 0xffff, | 1932 | printk("%s %08x %04x %08x %04x", i & 1 ? "" : "\n ", |
1877 | le32_to_cpu(lp->rx_ring[i].msg_length), | 1933 | le32_to_cpu(lp->rx_ring[i].base), |
1878 | le16_to_cpu(lp->rx_ring[i].status)); | 1934 | (-le16_to_cpu(lp->rx_ring[i].buf_length)) & |
1879 | for (i = 0 ; i < lp->tx_ring_size; i++) | 1935 | 0xffff, le32_to_cpu(lp->rx_ring[i].msg_length), |
1880 | printk("%s %08x %04x %08x %04x", i & 1 ? "" : "\n ", | 1936 | le16_to_cpu(lp->rx_ring[i].status)); |
1881 | le32_to_cpu(lp->tx_ring[i].base), | 1937 | for (i = 0; i < lp->tx_ring_size; i++) |
1882 | (-le16_to_cpu(lp->tx_ring[i].length)) & 0xffff, | 1938 | printk("%s %08x %04x %08x %04x", i & 1 ? "" : "\n ", |
1883 | le32_to_cpu(lp->tx_ring[i].misc), | 1939 | le32_to_cpu(lp->tx_ring[i].base), |
1884 | le16_to_cpu(lp->tx_ring[i].status)); | 1940 | (-le16_to_cpu(lp->tx_ring[i].length)) & 0xffff, |
1885 | printk("\n"); | 1941 | le32_to_cpu(lp->tx_ring[i].misc), |
1886 | } | 1942 | le16_to_cpu(lp->tx_ring[i].status)); |
1887 | pcnet32_restart(dev, 0x0042); | 1943 | printk("\n"); |
1888 | 1944 | } | |
1889 | dev->trans_start = jiffies; | 1945 | pcnet32_restart(dev, 0x0042); |
1890 | netif_wake_queue(dev); | 1946 | |
1891 | 1947 | dev->trans_start = jiffies; | |
1892 | spin_unlock_irqrestore(&lp->lock, flags); | 1948 | netif_wake_queue(dev); |
1893 | } | ||
1894 | 1949 | ||
1950 | spin_unlock_irqrestore(&lp->lock, flags); | ||
1951 | } | ||
1895 | 1952 | ||
1896 | static int | 1953 | static int pcnet32_start_xmit(struct sk_buff *skb, struct net_device *dev) |
1897 | pcnet32_start_xmit(struct sk_buff *skb, struct net_device *dev) | ||
1898 | { | 1954 | { |
1899 | struct pcnet32_private *lp = dev->priv; | 1955 | struct pcnet32_private *lp = dev->priv; |
1900 | unsigned long ioaddr = dev->base_addr; | 1956 | unsigned long ioaddr = dev->base_addr; |
1901 | u16 status; | 1957 | u16 status; |
1902 | int entry; | 1958 | int entry; |
1903 | unsigned long flags; | 1959 | unsigned long flags; |
1904 | 1960 | ||
1905 | spin_lock_irqsave(&lp->lock, flags); | 1961 | spin_lock_irqsave(&lp->lock, flags); |
1906 | 1962 | ||
1907 | if (netif_msg_tx_queued(lp)) { | 1963 | if (netif_msg_tx_queued(lp)) { |
1908 | printk(KERN_DEBUG "%s: pcnet32_start_xmit() called, csr0 %4.4x.\n", | 1964 | printk(KERN_DEBUG |
1909 | dev->name, lp->a.read_csr(ioaddr, 0)); | 1965 | "%s: pcnet32_start_xmit() called, csr0 %4.4x.\n", |
1910 | } | 1966 | dev->name, lp->a.read_csr(ioaddr, 0)); |
1967 | } | ||
1911 | 1968 | ||
1912 | /* Default status -- will not enable Successful-TxDone | 1969 | /* Default status -- will not enable Successful-TxDone |
1913 | * interrupt when that option is available to us. | 1970 | * interrupt when that option is available to us. |
1914 | */ | 1971 | */ |
1915 | status = 0x8300; | 1972 | status = 0x8300; |
1916 | 1973 | ||
1917 | /* Fill in a Tx ring entry */ | 1974 | /* Fill in a Tx ring entry */ |
1918 | 1975 | ||
1919 | /* Mask to ring buffer boundary. */ | 1976 | /* Mask to ring buffer boundary. */ |
1920 | entry = lp->cur_tx & lp->tx_mod_mask; | 1977 | entry = lp->cur_tx & lp->tx_mod_mask; |
1921 | 1978 | ||
1922 | /* Caution: the write order is important here, set the status | 1979 | /* Caution: the write order is important here, set the status |
1923 | * with the "ownership" bits last. */ | 1980 | * with the "ownership" bits last. */ |
1924 | 1981 | ||
1925 | lp->tx_ring[entry].length = le16_to_cpu(-skb->len); | 1982 | lp->tx_ring[entry].length = le16_to_cpu(-skb->len); |
1926 | 1983 | ||
1927 | lp->tx_ring[entry].misc = 0x00000000; | 1984 | lp->tx_ring[entry].misc = 0x00000000; |
1928 | 1985 | ||
1929 | lp->tx_skbuff[entry] = skb; | 1986 | lp->tx_skbuff[entry] = skb; |
1930 | lp->tx_dma_addr[entry] = pci_map_single(lp->pci_dev, skb->data, skb->len, | 1987 | lp->tx_dma_addr[entry] = |
1931 | PCI_DMA_TODEVICE); | 1988 | pci_map_single(lp->pci_dev, skb->data, skb->len, PCI_DMA_TODEVICE); |
1932 | lp->tx_ring[entry].base = (u32)le32_to_cpu(lp->tx_dma_addr[entry]); | 1989 | lp->tx_ring[entry].base = (u32) le32_to_cpu(lp->tx_dma_addr[entry]); |
1933 | wmb(); /* Make sure owner changes after all others are visible */ | 1990 | wmb(); /* Make sure owner changes after all others are visible */ |
1934 | lp->tx_ring[entry].status = le16_to_cpu(status); | 1991 | lp->tx_ring[entry].status = le16_to_cpu(status); |
1935 | 1992 | ||
1936 | lp->cur_tx++; | 1993 | lp->cur_tx++; |
1937 | lp->stats.tx_bytes += skb->len; | 1994 | lp->stats.tx_bytes += skb->len; |
1938 | 1995 | ||
1939 | /* Trigger an immediate send poll. */ | 1996 | /* Trigger an immediate send poll. */ |
1940 | lp->a.write_csr (ioaddr, 0, 0x0048); | 1997 | lp->a.write_csr(ioaddr, 0, 0x0048); |
1941 | 1998 | ||
1942 | dev->trans_start = jiffies; | 1999 | dev->trans_start = jiffies; |
1943 | 2000 | ||
1944 | if (lp->tx_ring[(entry+1) & lp->tx_mod_mask].base != 0) { | 2001 | if (lp->tx_ring[(entry + 1) & lp->tx_mod_mask].base != 0) { |
1945 | lp->tx_full = 1; | 2002 | lp->tx_full = 1; |
1946 | netif_stop_queue(dev); | 2003 | netif_stop_queue(dev); |
1947 | } | 2004 | } |
1948 | spin_unlock_irqrestore(&lp->lock, flags); | 2005 | spin_unlock_irqrestore(&lp->lock, flags); |
1949 | return 0; | 2006 | return 0; |
1950 | } | 2007 | } |
1951 | 2008 | ||
1952 | /* The PCNET32 interrupt handler. */ | 2009 | /* The PCNET32 interrupt handler. */ |
1953 | static irqreturn_t | 2010 | static irqreturn_t |
1954 | pcnet32_interrupt(int irq, void *dev_id, struct pt_regs * regs) | 2011 | pcnet32_interrupt(int irq, void *dev_id, struct pt_regs *regs) |
1955 | { | 2012 | { |
1956 | struct net_device *dev = dev_id; | 2013 | struct net_device *dev = dev_id; |
1957 | struct pcnet32_private *lp; | 2014 | struct pcnet32_private *lp; |
1958 | unsigned long ioaddr; | 2015 | unsigned long ioaddr; |
1959 | u16 csr0,rap; | 2016 | u16 csr0, rap; |
1960 | int boguscnt = max_interrupt_work; | 2017 | int boguscnt = max_interrupt_work; |
1961 | int must_restart; | 2018 | int must_restart; |
1962 | 2019 | ||
1963 | if (!dev) { | 2020 | if (!dev) { |
1964 | if (pcnet32_debug & NETIF_MSG_INTR) | 2021 | if (pcnet32_debug & NETIF_MSG_INTR) |
1965 | printk (KERN_DEBUG "%s(): irq %d for unknown device\n", | 2022 | printk(KERN_DEBUG "%s(): irq %d for unknown device\n", |
1966 | __FUNCTION__, irq); | 2023 | __FUNCTION__, irq); |
1967 | return IRQ_NONE; | 2024 | return IRQ_NONE; |
1968 | } | ||
1969 | |||
1970 | ioaddr = dev->base_addr; | ||
1971 | lp = dev->priv; | ||
1972 | |||
1973 | spin_lock(&lp->lock); | ||
1974 | |||
1975 | rap = lp->a.read_rap(ioaddr); | ||
1976 | while ((csr0 = lp->a.read_csr (ioaddr, 0)) & 0x8f00 && --boguscnt >= 0) { | ||
1977 | if (csr0 == 0xffff) { | ||
1978 | break; /* PCMCIA remove happened */ | ||
1979 | } | 2025 | } |
1980 | /* Acknowledge all of the current interrupt sources ASAP. */ | ||
1981 | lp->a.write_csr (ioaddr, 0, csr0 & ~0x004f); | ||
1982 | 2026 | ||
1983 | must_restart = 0; | 2027 | ioaddr = dev->base_addr; |
2028 | lp = dev->priv; | ||
1984 | 2029 | ||
1985 | if (netif_msg_intr(lp)) | 2030 | spin_lock(&lp->lock); |
1986 | printk(KERN_DEBUG "%s: interrupt csr0=%#2.2x new csr=%#2.2x.\n", | 2031 | |
1987 | dev->name, csr0, lp->a.read_csr (ioaddr, 0)); | 2032 | rap = lp->a.read_rap(ioaddr); |
1988 | 2033 | while ((csr0 = lp->a.read_csr(ioaddr, 0)) & 0x8f00 && --boguscnt >= 0) { | |
1989 | if (csr0 & 0x0400) /* Rx interrupt */ | 2034 | if (csr0 == 0xffff) { |
1990 | pcnet32_rx(dev); | 2035 | break; /* PCMCIA remove happened */ |
1991 | 2036 | } | |
1992 | if (csr0 & 0x0200) { /* Tx-done interrupt */ | 2037 | /* Acknowledge all of the current interrupt sources ASAP. */ |
1993 | unsigned int dirty_tx = lp->dirty_tx; | 2038 | lp->a.write_csr(ioaddr, 0, csr0 & ~0x004f); |
1994 | int delta; | 2039 | |
1995 | 2040 | must_restart = 0; | |
1996 | while (dirty_tx != lp->cur_tx) { | 2041 | |
1997 | int entry = dirty_tx & lp->tx_mod_mask; | 2042 | if (netif_msg_intr(lp)) |
1998 | int status = (short)le16_to_cpu(lp->tx_ring[entry].status); | 2043 | printk(KERN_DEBUG |
1999 | 2044 | "%s: interrupt csr0=%#2.2x new csr=%#2.2x.\n", | |
2000 | if (status < 0) | 2045 | dev->name, csr0, lp->a.read_csr(ioaddr, 0)); |
2001 | break; /* It still hasn't been Txed */ | 2046 | |
2002 | 2047 | if (csr0 & 0x0400) /* Rx interrupt */ | |
2003 | lp->tx_ring[entry].base = 0; | 2048 | pcnet32_rx(dev); |
2004 | 2049 | ||
2005 | if (status & 0x4000) { | 2050 | if (csr0 & 0x0200) { /* Tx-done interrupt */ |
2006 | /* There was an major error, log it. */ | 2051 | unsigned int dirty_tx = lp->dirty_tx; |
2007 | int err_status = le32_to_cpu(lp->tx_ring[entry].misc); | 2052 | int delta; |
2008 | lp->stats.tx_errors++; | 2053 | |
2009 | if (netif_msg_tx_err(lp)) | 2054 | while (dirty_tx != lp->cur_tx) { |
2010 | printk(KERN_ERR "%s: Tx error status=%04x err_status=%08x\n", | 2055 | int entry = dirty_tx & lp->tx_mod_mask; |
2011 | dev->name, status, err_status); | 2056 | int status = |
2012 | if (err_status & 0x04000000) lp->stats.tx_aborted_errors++; | 2057 | (short)le16_to_cpu(lp->tx_ring[entry]. |
2013 | if (err_status & 0x08000000) lp->stats.tx_carrier_errors++; | 2058 | status); |
2014 | if (err_status & 0x10000000) lp->stats.tx_window_errors++; | 2059 | |
2060 | if (status < 0) | ||
2061 | break; /* It still hasn't been Txed */ | ||
2062 | |||
2063 | lp->tx_ring[entry].base = 0; | ||
2064 | |||
2065 | if (status & 0x4000) { | ||
2066 | /* There was an major error, log it. */ | ||
2067 | int err_status = | ||
2068 | le32_to_cpu(lp->tx_ring[entry]. | ||
2069 | misc); | ||
2070 | lp->stats.tx_errors++; | ||
2071 | if (netif_msg_tx_err(lp)) | ||
2072 | printk(KERN_ERR | ||
2073 | "%s: Tx error status=%04x err_status=%08x\n", | ||
2074 | dev->name, status, | ||
2075 | err_status); | ||
2076 | if (err_status & 0x04000000) | ||
2077 | lp->stats.tx_aborted_errors++; | ||
2078 | if (err_status & 0x08000000) | ||
2079 | lp->stats.tx_carrier_errors++; | ||
2080 | if (err_status & 0x10000000) | ||
2081 | lp->stats.tx_window_errors++; | ||
2015 | #ifndef DO_DXSUFLO | 2082 | #ifndef DO_DXSUFLO |
2016 | if (err_status & 0x40000000) { | 2083 | if (err_status & 0x40000000) { |
2017 | lp->stats.tx_fifo_errors++; | 2084 | lp->stats.tx_fifo_errors++; |
2018 | /* Ackk! On FIFO errors the Tx unit is turned off! */ | 2085 | /* Ackk! On FIFO errors the Tx unit is turned off! */ |
2019 | /* Remove this verbosity later! */ | 2086 | /* Remove this verbosity later! */ |
2020 | if (netif_msg_tx_err(lp)) | 2087 | if (netif_msg_tx_err(lp)) |
2021 | printk(KERN_ERR "%s: Tx FIFO error! CSR0=%4.4x\n", | 2088 | printk(KERN_ERR |
2022 | dev->name, csr0); | 2089 | "%s: Tx FIFO error! CSR0=%4.4x\n", |
2023 | must_restart = 1; | 2090 | dev->name, csr0); |
2024 | } | 2091 | must_restart = 1; |
2092 | } | ||
2025 | #else | 2093 | #else |
2026 | if (err_status & 0x40000000) { | 2094 | if (err_status & 0x40000000) { |
2027 | lp->stats.tx_fifo_errors++; | 2095 | lp->stats.tx_fifo_errors++; |
2028 | if (! lp->dxsuflo) { /* If controller doesn't recover ... */ | 2096 | if (!lp->dxsuflo) { /* If controller doesn't recover ... */ |
2029 | /* Ackk! On FIFO errors the Tx unit is turned off! */ | 2097 | /* Ackk! On FIFO errors the Tx unit is turned off! */ |
2030 | /* Remove this verbosity later! */ | 2098 | /* Remove this verbosity later! */ |
2031 | if (netif_msg_tx_err(lp)) | 2099 | if (netif_msg_tx_err |
2032 | printk(KERN_ERR "%s: Tx FIFO error! CSR0=%4.4x\n", | 2100 | (lp)) |
2033 | dev->name, csr0); | 2101 | printk(KERN_ERR |
2034 | must_restart = 1; | 2102 | "%s: Tx FIFO error! CSR0=%4.4x\n", |
2035 | } | 2103 | dev-> |
2036 | } | 2104 | name, |
2105 | csr0); | ||
2106 | must_restart = 1; | ||
2107 | } | ||
2108 | } | ||
2037 | #endif | 2109 | #endif |
2038 | } else { | 2110 | } else { |
2039 | if (status & 0x1800) | 2111 | if (status & 0x1800) |
2040 | lp->stats.collisions++; | 2112 | lp->stats.collisions++; |
2041 | lp->stats.tx_packets++; | 2113 | lp->stats.tx_packets++; |
2114 | } | ||
2115 | |||
2116 | /* We must free the original skb */ | ||
2117 | if (lp->tx_skbuff[entry]) { | ||
2118 | pci_unmap_single(lp->pci_dev, | ||
2119 | lp->tx_dma_addr[entry], | ||
2120 | lp->tx_skbuff[entry]-> | ||
2121 | len, PCI_DMA_TODEVICE); | ||
2122 | dev_kfree_skb_irq(lp->tx_skbuff[entry]); | ||
2123 | lp->tx_skbuff[entry] = NULL; | ||
2124 | lp->tx_dma_addr[entry] = 0; | ||
2125 | } | ||
2126 | dirty_tx++; | ||
2127 | } | ||
2128 | |||
2129 | delta = | ||
2130 | (lp->cur_tx - dirty_tx) & (lp->tx_mod_mask + | ||
2131 | lp->tx_ring_size); | ||
2132 | if (delta > lp->tx_ring_size) { | ||
2133 | if (netif_msg_drv(lp)) | ||
2134 | printk(KERN_ERR | ||
2135 | "%s: out-of-sync dirty pointer, %d vs. %d, full=%d.\n", | ||
2136 | dev->name, dirty_tx, lp->cur_tx, | ||
2137 | lp->tx_full); | ||
2138 | dirty_tx += lp->tx_ring_size; | ||
2139 | delta -= lp->tx_ring_size; | ||
2140 | } | ||
2141 | |||
2142 | if (lp->tx_full && | ||
2143 | netif_queue_stopped(dev) && | ||
2144 | delta < lp->tx_ring_size - 2) { | ||
2145 | /* The ring is no longer full, clear tbusy. */ | ||
2146 | lp->tx_full = 0; | ||
2147 | netif_wake_queue(dev); | ||
2148 | } | ||
2149 | lp->dirty_tx = dirty_tx; | ||
2150 | } | ||
2151 | |||
2152 | /* Log misc errors. */ | ||
2153 | if (csr0 & 0x4000) | ||
2154 | lp->stats.tx_errors++; /* Tx babble. */ | ||
2155 | if (csr0 & 0x1000) { | ||
2156 | /* | ||
2157 | * this happens when our receive ring is full. This shouldn't | ||
2158 | * be a problem as we will see normal rx interrupts for the frames | ||
2159 | * in the receive ring. But there are some PCI chipsets (I can | ||
2160 | * reproduce this on SP3G with Intel saturn chipset) which have | ||
2161 | * sometimes problems and will fill up the receive ring with | ||
2162 | * error descriptors. In this situation we don't get a rx | ||
2163 | * interrupt, but a missed frame interrupt sooner or later. | ||
2164 | * So we try to clean up our receive ring here. | ||
2165 | */ | ||
2166 | pcnet32_rx(dev); | ||
2167 | lp->stats.rx_errors++; /* Missed a Rx frame. */ | ||
2168 | } | ||
2169 | if (csr0 & 0x0800) { | ||
2170 | if (netif_msg_drv(lp)) | ||
2171 | printk(KERN_ERR | ||
2172 | "%s: Bus master arbitration failure, status %4.4x.\n", | ||
2173 | dev->name, csr0); | ||
2174 | /* unlike for the lance, there is no restart needed */ | ||
2042 | } | 2175 | } |
2043 | 2176 | ||
2044 | /* We must free the original skb */ | 2177 | if (must_restart) { |
2045 | if (lp->tx_skbuff[entry]) { | 2178 | /* reset the chip to clear the error condition, then restart */ |
2046 | pci_unmap_single(lp->pci_dev, lp->tx_dma_addr[entry], | 2179 | lp->a.reset(ioaddr); |
2047 | lp->tx_skbuff[entry]->len, PCI_DMA_TODEVICE); | 2180 | lp->a.write_csr(ioaddr, 4, 0x0915); |
2048 | dev_kfree_skb_irq(lp->tx_skbuff[entry]); | 2181 | pcnet32_restart(dev, 0x0002); |
2049 | lp->tx_skbuff[entry] = NULL; | 2182 | netif_wake_queue(dev); |
2050 | lp->tx_dma_addr[entry] = 0; | ||
2051 | } | 2183 | } |
2052 | dirty_tx++; | 2184 | } |
2053 | } | 2185 | |
2054 | 2186 | /* Set interrupt enable. */ | |
2055 | delta = (lp->cur_tx - dirty_tx) & (lp->tx_mod_mask + lp->tx_ring_size); | 2187 | lp->a.write_csr(ioaddr, 0, 0x0040); |
2056 | if (delta > lp->tx_ring_size) { | 2188 | lp->a.write_rap(ioaddr, rap); |
2057 | if (netif_msg_drv(lp)) | 2189 | |
2058 | printk(KERN_ERR "%s: out-of-sync dirty pointer, %d vs. %d, full=%d.\n", | 2190 | if (netif_msg_intr(lp)) |
2059 | dev->name, dirty_tx, lp->cur_tx, lp->tx_full); | 2191 | printk(KERN_DEBUG "%s: exiting interrupt, csr0=%#4.4x.\n", |
2060 | dirty_tx += lp->tx_ring_size; | 2192 | dev->name, lp->a.read_csr(ioaddr, 0)); |
2061 | delta -= lp->tx_ring_size; | 2193 | |
2062 | } | 2194 | spin_unlock(&lp->lock); |
2063 | 2195 | ||
2064 | if (lp->tx_full && | 2196 | return IRQ_HANDLED; |
2065 | netif_queue_stopped(dev) && | ||
2066 | delta < lp->tx_ring_size - 2) { | ||
2067 | /* The ring is no longer full, clear tbusy. */ | ||
2068 | lp->tx_full = 0; | ||
2069 | netif_wake_queue (dev); | ||
2070 | } | ||
2071 | lp->dirty_tx = dirty_tx; | ||
2072 | } | ||
2073 | |||
2074 | /* Log misc errors. */ | ||
2075 | if (csr0 & 0x4000) lp->stats.tx_errors++; /* Tx babble. */ | ||
2076 | if (csr0 & 0x1000) { | ||
2077 | /* | ||
2078 | * this happens when our receive ring is full. This shouldn't | ||
2079 | * be a problem as we will see normal rx interrupts for the frames | ||
2080 | * in the receive ring. But there are some PCI chipsets (I can | ||
2081 | * reproduce this on SP3G with Intel saturn chipset) which have | ||
2082 | * sometimes problems and will fill up the receive ring with | ||
2083 | * error descriptors. In this situation we don't get a rx | ||
2084 | * interrupt, but a missed frame interrupt sooner or later. | ||
2085 | * So we try to clean up our receive ring here. | ||
2086 | */ | ||
2087 | pcnet32_rx(dev); | ||
2088 | lp->stats.rx_errors++; /* Missed a Rx frame. */ | ||
2089 | } | ||
2090 | if (csr0 & 0x0800) { | ||
2091 | if (netif_msg_drv(lp)) | ||
2092 | printk(KERN_ERR "%s: Bus master arbitration failure, status %4.4x.\n", | ||
2093 | dev->name, csr0); | ||
2094 | /* unlike for the lance, there is no restart needed */ | ||
2095 | } | ||
2096 | |||
2097 | if (must_restart) { | ||
2098 | /* reset the chip to clear the error condition, then restart */ | ||
2099 | lp->a.reset(ioaddr); | ||
2100 | lp->a.write_csr(ioaddr, 4, 0x0915); | ||
2101 | pcnet32_restart(dev, 0x0002); | ||
2102 | netif_wake_queue(dev); | ||
2103 | } | ||
2104 | } | ||
2105 | |||
2106 | /* Set interrupt enable. */ | ||
2107 | lp->a.write_csr (ioaddr, 0, 0x0040); | ||
2108 | lp->a.write_rap (ioaddr,rap); | ||
2109 | |||
2110 | if (netif_msg_intr(lp)) | ||
2111 | printk(KERN_DEBUG "%s: exiting interrupt, csr0=%#4.4x.\n", | ||
2112 | dev->name, lp->a.read_csr (ioaddr, 0)); | ||
2113 | |||
2114 | spin_unlock(&lp->lock); | ||
2115 | |||
2116 | return IRQ_HANDLED; | ||
2117 | } | 2197 | } |
2118 | 2198 | ||
2119 | static int | 2199 | static int pcnet32_rx(struct net_device *dev) |
2120 | pcnet32_rx(struct net_device *dev) | ||
2121 | { | 2200 | { |
2122 | struct pcnet32_private *lp = dev->priv; | 2201 | struct pcnet32_private *lp = dev->priv; |
2123 | int entry = lp->cur_rx & lp->rx_mod_mask; | 2202 | int entry = lp->cur_rx & lp->rx_mod_mask; |
2124 | int boguscnt = lp->rx_ring_size / 2; | 2203 | int boguscnt = lp->rx_ring_size / 2; |
2125 | 2204 | ||
2126 | /* If we own the next entry, it's a new packet. Send it up. */ | 2205 | /* If we own the next entry, it's a new packet. Send it up. */ |
2127 | while ((short)le16_to_cpu(lp->rx_ring[entry].status) >= 0) { | 2206 | while ((short)le16_to_cpu(lp->rx_ring[entry].status) >= 0) { |
2128 | int status = (short)le16_to_cpu(lp->rx_ring[entry].status) >> 8; | 2207 | int status = (short)le16_to_cpu(lp->rx_ring[entry].status) >> 8; |
2129 | 2208 | ||
2130 | if (status != 0x03) { /* There was an error. */ | 2209 | if (status != 0x03) { /* There was an error. */ |
2131 | /* | 2210 | /* |
2132 | * There is a tricky error noted by John Murphy, | 2211 | * There is a tricky error noted by John Murphy, |
2133 | * <murf@perftech.com> to Russ Nelson: Even with full-sized | 2212 | * <murf@perftech.com> to Russ Nelson: Even with full-sized |
2134 | * buffers it's possible for a jabber packet to use two | 2213 | * buffers it's possible for a jabber packet to use two |
2135 | * buffers, with only the last correctly noting the error. | 2214 | * buffers, with only the last correctly noting the error. |
2136 | */ | 2215 | */ |
2137 | if (status & 0x01) /* Only count a general error at the */ | 2216 | if (status & 0x01) /* Only count a general error at the */ |
2138 | lp->stats.rx_errors++; /* end of a packet.*/ | 2217 | lp->stats.rx_errors++; /* end of a packet. */ |
2139 | if (status & 0x20) lp->stats.rx_frame_errors++; | 2218 | if (status & 0x20) |
2140 | if (status & 0x10) lp->stats.rx_over_errors++; | 2219 | lp->stats.rx_frame_errors++; |
2141 | if (status & 0x08) lp->stats.rx_crc_errors++; | 2220 | if (status & 0x10) |
2142 | if (status & 0x04) lp->stats.rx_fifo_errors++; | 2221 | lp->stats.rx_over_errors++; |
2143 | lp->rx_ring[entry].status &= le16_to_cpu(0x03ff); | 2222 | if (status & 0x08) |
2144 | } else { | 2223 | lp->stats.rx_crc_errors++; |
2145 | /* Malloc up new buffer, compatible with net-2e. */ | 2224 | if (status & 0x04) |
2146 | short pkt_len = (le32_to_cpu(lp->rx_ring[entry].msg_length) & 0xfff)-4; | 2225 | lp->stats.rx_fifo_errors++; |
2147 | struct sk_buff *skb; | 2226 | lp->rx_ring[entry].status &= le16_to_cpu(0x03ff); |
2148 | |||
2149 | /* Discard oversize frames. */ | ||
2150 | if (unlikely(pkt_len > PKT_BUF_SZ - 2)) { | ||
2151 | if (netif_msg_drv(lp)) | ||
2152 | printk(KERN_ERR "%s: Impossible packet size %d!\n", | ||
2153 | dev->name, pkt_len); | ||
2154 | lp->stats.rx_errors++; | ||
2155 | } else if (pkt_len < 60) { | ||
2156 | if (netif_msg_rx_err(lp)) | ||
2157 | printk(KERN_ERR "%s: Runt packet!\n", dev->name); | ||
2158 | lp->stats.rx_errors++; | ||
2159 | } else { | ||
2160 | int rx_in_place = 0; | ||
2161 | |||
2162 | if (pkt_len > rx_copybreak) { | ||
2163 | struct sk_buff *newskb; | ||
2164 | |||
2165 | if ((newskb = dev_alloc_skb(PKT_BUF_SZ))) { | ||
2166 | skb_reserve (newskb, 2); | ||
2167 | skb = lp->rx_skbuff[entry]; | ||
2168 | pci_unmap_single(lp->pci_dev, lp->rx_dma_addr[entry], | ||
2169 | PKT_BUF_SZ-2, PCI_DMA_FROMDEVICE); | ||
2170 | skb_put (skb, pkt_len); | ||
2171 | lp->rx_skbuff[entry] = newskb; | ||
2172 | newskb->dev = dev; | ||
2173 | lp->rx_dma_addr[entry] = | ||
2174 | pci_map_single(lp->pci_dev, newskb->data, | ||
2175 | PKT_BUF_SZ-2, PCI_DMA_FROMDEVICE); | ||
2176 | lp->rx_ring[entry].base = le32_to_cpu(lp->rx_dma_addr[entry]); | ||
2177 | rx_in_place = 1; | ||
2178 | } else | ||
2179 | skb = NULL; | ||
2180 | } else { | 2227 | } else { |
2181 | skb = dev_alloc_skb(pkt_len+2); | 2228 | /* Malloc up new buffer, compatible with net-2e. */ |
2182 | } | 2229 | short pkt_len = |
2183 | 2230 | (le32_to_cpu(lp->rx_ring[entry].msg_length) & 0xfff) | |
2184 | if (skb == NULL) { | 2231 | - 4; |
2185 | int i; | 2232 | struct sk_buff *skb; |
2186 | if (netif_msg_drv(lp)) | 2233 | |
2187 | printk(KERN_ERR "%s: Memory squeeze, deferring packet.\n", | 2234 | /* Discard oversize frames. */ |
2188 | dev->name); | 2235 | if (unlikely(pkt_len > PKT_BUF_SZ - 2)) { |
2189 | for (i = 0; i < lp->rx_ring_size; i++) | 2236 | if (netif_msg_drv(lp)) |
2190 | if ((short)le16_to_cpu(lp->rx_ring[(entry+i) | 2237 | printk(KERN_ERR |
2191 | & lp->rx_mod_mask].status) < 0) | 2238 | "%s: Impossible packet size %d!\n", |
2192 | break; | 2239 | dev->name, pkt_len); |
2193 | 2240 | lp->stats.rx_errors++; | |
2194 | if (i > lp->rx_ring_size -2) { | 2241 | } else if (pkt_len < 60) { |
2195 | lp->stats.rx_dropped++; | 2242 | if (netif_msg_rx_err(lp)) |
2196 | lp->rx_ring[entry].status |= le16_to_cpu(0x8000); | 2243 | printk(KERN_ERR "%s: Runt packet!\n", |
2197 | wmb(); /* Make sure adapter sees owner change */ | 2244 | dev->name); |
2198 | lp->cur_rx++; | 2245 | lp->stats.rx_errors++; |
2199 | } | 2246 | } else { |
2200 | break; | 2247 | int rx_in_place = 0; |
2201 | } | 2248 | |
2202 | skb->dev = dev; | 2249 | if (pkt_len > rx_copybreak) { |
2203 | if (!rx_in_place) { | 2250 | struct sk_buff *newskb; |
2204 | skb_reserve(skb,2); /* 16 byte align */ | 2251 | |
2205 | skb_put(skb,pkt_len); /* Make room */ | 2252 | if ((newskb = |
2206 | pci_dma_sync_single_for_cpu(lp->pci_dev, | 2253 | dev_alloc_skb(PKT_BUF_SZ))) { |
2207 | lp->rx_dma_addr[entry], | 2254 | skb_reserve(newskb, 2); |
2208 | PKT_BUF_SZ-2, | 2255 | skb = lp->rx_skbuff[entry]; |
2209 | PCI_DMA_FROMDEVICE); | 2256 | pci_unmap_single(lp->pci_dev, |
2210 | eth_copy_and_sum(skb, | 2257 | lp-> |
2211 | (unsigned char *)(lp->rx_skbuff[entry]->data), | 2258 | rx_dma_addr |
2212 | pkt_len,0); | 2259 | [entry], |
2213 | pci_dma_sync_single_for_device(lp->pci_dev, | 2260 | PKT_BUF_SZ - 2, |
2214 | lp->rx_dma_addr[entry], | 2261 | PCI_DMA_FROMDEVICE); |
2215 | PKT_BUF_SZ-2, | 2262 | skb_put(skb, pkt_len); |
2216 | PCI_DMA_FROMDEVICE); | 2263 | lp->rx_skbuff[entry] = newskb; |
2264 | newskb->dev = dev; | ||
2265 | lp->rx_dma_addr[entry] = | ||
2266 | pci_map_single(lp->pci_dev, | ||
2267 | newskb->data, | ||
2268 | PKT_BUF_SZ - | ||
2269 | 2, | ||
2270 | PCI_DMA_FROMDEVICE); | ||
2271 | lp->rx_ring[entry].base = | ||
2272 | le32_to_cpu(lp-> | ||
2273 | rx_dma_addr | ||
2274 | [entry]); | ||
2275 | rx_in_place = 1; | ||
2276 | } else | ||
2277 | skb = NULL; | ||
2278 | } else { | ||
2279 | skb = dev_alloc_skb(pkt_len + 2); | ||
2280 | } | ||
2281 | |||
2282 | if (skb == NULL) { | ||
2283 | int i; | ||
2284 | if (netif_msg_drv(lp)) | ||
2285 | printk(KERN_ERR | ||
2286 | "%s: Memory squeeze, deferring packet.\n", | ||
2287 | dev->name); | ||
2288 | for (i = 0; i < lp->rx_ring_size; i++) | ||
2289 | if ((short) | ||
2290 | le16_to_cpu(lp-> | ||
2291 | rx_ring[(entry + | ||
2292 | i) | ||
2293 | & lp-> | ||
2294 | rx_mod_mask]. | ||
2295 | status) < 0) | ||
2296 | break; | ||
2297 | |||
2298 | if (i > lp->rx_ring_size - 2) { | ||
2299 | lp->stats.rx_dropped++; | ||
2300 | lp->rx_ring[entry].status |= | ||
2301 | le16_to_cpu(0x8000); | ||
2302 | wmb(); /* Make sure adapter sees owner change */ | ||
2303 | lp->cur_rx++; | ||
2304 | } | ||
2305 | break; | ||
2306 | } | ||
2307 | skb->dev = dev; | ||
2308 | if (!rx_in_place) { | ||
2309 | skb_reserve(skb, 2); /* 16 byte align */ | ||
2310 | skb_put(skb, pkt_len); /* Make room */ | ||
2311 | pci_dma_sync_single_for_cpu(lp->pci_dev, | ||
2312 | lp-> | ||
2313 | rx_dma_addr | ||
2314 | [entry], | ||
2315 | PKT_BUF_SZ - | ||
2316 | 2, | ||
2317 | PCI_DMA_FROMDEVICE); | ||
2318 | eth_copy_and_sum(skb, | ||
2319 | (unsigned char *)(lp-> | ||
2320 | rx_skbuff | ||
2321 | [entry]-> | ||
2322 | data), | ||
2323 | pkt_len, 0); | ||
2324 | pci_dma_sync_single_for_device(lp-> | ||
2325 | pci_dev, | ||
2326 | lp-> | ||
2327 | rx_dma_addr | ||
2328 | [entry], | ||
2329 | PKT_BUF_SZ | ||
2330 | - 2, | ||
2331 | PCI_DMA_FROMDEVICE); | ||
2332 | } | ||
2333 | lp->stats.rx_bytes += skb->len; | ||
2334 | skb->protocol = eth_type_trans(skb, dev); | ||
2335 | netif_rx(skb); | ||
2336 | dev->last_rx = jiffies; | ||
2337 | lp->stats.rx_packets++; | ||
2338 | } | ||
2217 | } | 2339 | } |
2218 | lp->stats.rx_bytes += skb->len; | 2340 | /* |
2219 | skb->protocol=eth_type_trans(skb,dev); | 2341 | * The docs say that the buffer length isn't touched, but Andrew Boyd |
2220 | netif_rx(skb); | 2342 | * of QNX reports that some revs of the 79C965 clear it. |
2221 | dev->last_rx = jiffies; | 2343 | */ |
2222 | lp->stats.rx_packets++; | 2344 | lp->rx_ring[entry].buf_length = le16_to_cpu(2 - PKT_BUF_SZ); |
2223 | } | 2345 | wmb(); /* Make sure owner changes after all others are visible */ |
2346 | lp->rx_ring[entry].status |= le16_to_cpu(0x8000); | ||
2347 | entry = (++lp->cur_rx) & lp->rx_mod_mask; | ||
2348 | if (--boguscnt <= 0) | ||
2349 | break; /* don't stay in loop forever */ | ||
2224 | } | 2350 | } |
2225 | /* | 2351 | |
2226 | * The docs say that the buffer length isn't touched, but Andrew Boyd | 2352 | return 0; |
2227 | * of QNX reports that some revs of the 79C965 clear it. | ||
2228 | */ | ||
2229 | lp->rx_ring[entry].buf_length = le16_to_cpu(2-PKT_BUF_SZ); | ||
2230 | wmb(); /* Make sure owner changes after all others are visible */ | ||
2231 | lp->rx_ring[entry].status |= le16_to_cpu(0x8000); | ||
2232 | entry = (++lp->cur_rx) & lp->rx_mod_mask; | ||
2233 | if (--boguscnt <= 0) break; /* don't stay in loop forever */ | ||
2234 | } | ||
2235 | |||
2236 | return 0; | ||
2237 | } | 2353 | } |
2238 | 2354 | ||
2239 | static int | 2355 | static int pcnet32_close(struct net_device *dev) |
2240 | pcnet32_close(struct net_device *dev) | ||
2241 | { | 2356 | { |
2242 | unsigned long ioaddr = dev->base_addr; | 2357 | unsigned long ioaddr = dev->base_addr; |
2243 | struct pcnet32_private *lp = dev->priv; | 2358 | struct pcnet32_private *lp = dev->priv; |
2244 | int i; | 2359 | int i; |
2245 | unsigned long flags; | 2360 | unsigned long flags; |
2246 | 2361 | ||
2247 | del_timer_sync(&lp->watchdog_timer); | 2362 | del_timer_sync(&lp->watchdog_timer); |
2248 | 2363 | ||
2249 | netif_stop_queue(dev); | 2364 | netif_stop_queue(dev); |
2250 | 2365 | ||
2251 | spin_lock_irqsave(&lp->lock, flags); | 2366 | spin_lock_irqsave(&lp->lock, flags); |
2252 | 2367 | ||
2253 | lp->stats.rx_missed_errors = lp->a.read_csr (ioaddr, 112); | 2368 | lp->stats.rx_missed_errors = lp->a.read_csr(ioaddr, 112); |
2254 | 2369 | ||
2255 | if (netif_msg_ifdown(lp)) | 2370 | if (netif_msg_ifdown(lp)) |
2256 | printk(KERN_DEBUG "%s: Shutting down ethercard, status was %2.2x.\n", | 2371 | printk(KERN_DEBUG |
2257 | dev->name, lp->a.read_csr (ioaddr, 0)); | 2372 | "%s: Shutting down ethercard, status was %2.2x.\n", |
2373 | dev->name, lp->a.read_csr(ioaddr, 0)); | ||
2258 | 2374 | ||
2259 | /* We stop the PCNET32 here -- it occasionally polls memory if we don't. */ | 2375 | /* We stop the PCNET32 here -- it occasionally polls memory if we don't. */ |
2260 | lp->a.write_csr (ioaddr, 0, 0x0004); | 2376 | lp->a.write_csr(ioaddr, 0, 0x0004); |
2261 | 2377 | ||
2262 | /* | 2378 | /* |
2263 | * Switch back to 16bit mode to avoid problems with dumb | 2379 | * Switch back to 16bit mode to avoid problems with dumb |
2264 | * DOS packet driver after a warm reboot | 2380 | * DOS packet driver after a warm reboot |
2265 | */ | 2381 | */ |
2266 | lp->a.write_bcr (ioaddr, 20, 4); | 2382 | lp->a.write_bcr(ioaddr, 20, 4); |
2267 | 2383 | ||
2268 | spin_unlock_irqrestore(&lp->lock, flags); | 2384 | spin_unlock_irqrestore(&lp->lock, flags); |
2269 | 2385 | ||
2270 | free_irq(dev->irq, dev); | 2386 | free_irq(dev->irq, dev); |
2271 | 2387 | ||
2272 | spin_lock_irqsave(&lp->lock, flags); | 2388 | spin_lock_irqsave(&lp->lock, flags); |
2273 | 2389 | ||
2274 | /* free all allocated skbuffs */ | 2390 | /* free all allocated skbuffs */ |
2275 | for (i = 0; i < lp->rx_ring_size; i++) { | 2391 | for (i = 0; i < lp->rx_ring_size; i++) { |
2276 | lp->rx_ring[i].status = 0; | 2392 | lp->rx_ring[i].status = 0; |
2277 | wmb(); /* Make sure adapter sees owner change */ | 2393 | wmb(); /* Make sure adapter sees owner change */ |
2278 | if (lp->rx_skbuff[i]) { | 2394 | if (lp->rx_skbuff[i]) { |
2279 | pci_unmap_single(lp->pci_dev, lp->rx_dma_addr[i], PKT_BUF_SZ-2, | 2395 | pci_unmap_single(lp->pci_dev, lp->rx_dma_addr[i], |
2280 | PCI_DMA_FROMDEVICE); | 2396 | PKT_BUF_SZ - 2, PCI_DMA_FROMDEVICE); |
2281 | dev_kfree_skb(lp->rx_skbuff[i]); | 2397 | dev_kfree_skb(lp->rx_skbuff[i]); |
2398 | } | ||
2399 | lp->rx_skbuff[i] = NULL; | ||
2400 | lp->rx_dma_addr[i] = 0; | ||
2282 | } | 2401 | } |
2283 | lp->rx_skbuff[i] = NULL; | ||
2284 | lp->rx_dma_addr[i] = 0; | ||
2285 | } | ||
2286 | 2402 | ||
2287 | for (i = 0; i < lp->tx_ring_size; i++) { | 2403 | for (i = 0; i < lp->tx_ring_size; i++) { |
2288 | lp->tx_ring[i].status = 0; /* CPU owns buffer */ | 2404 | lp->tx_ring[i].status = 0; /* CPU owns buffer */ |
2289 | wmb(); /* Make sure adapter sees owner change */ | 2405 | wmb(); /* Make sure adapter sees owner change */ |
2290 | if (lp->tx_skbuff[i]) { | 2406 | if (lp->tx_skbuff[i]) { |
2291 | pci_unmap_single(lp->pci_dev, lp->tx_dma_addr[i], | 2407 | pci_unmap_single(lp->pci_dev, lp->tx_dma_addr[i], |
2292 | lp->tx_skbuff[i]->len, PCI_DMA_TODEVICE); | 2408 | lp->tx_skbuff[i]->len, |
2293 | dev_kfree_skb(lp->tx_skbuff[i]); | 2409 | PCI_DMA_TODEVICE); |
2410 | dev_kfree_skb(lp->tx_skbuff[i]); | ||
2411 | } | ||
2412 | lp->tx_skbuff[i] = NULL; | ||
2413 | lp->tx_dma_addr[i] = 0; | ||
2294 | } | 2414 | } |
2295 | lp->tx_skbuff[i] = NULL; | ||
2296 | lp->tx_dma_addr[i] = 0; | ||
2297 | } | ||
2298 | 2415 | ||
2299 | spin_unlock_irqrestore(&lp->lock, flags); | 2416 | spin_unlock_irqrestore(&lp->lock, flags); |
2300 | 2417 | ||
2301 | return 0; | 2418 | return 0; |
2302 | } | 2419 | } |
2303 | 2420 | ||
2304 | static struct net_device_stats * | 2421 | static struct net_device_stats *pcnet32_get_stats(struct net_device *dev) |
2305 | pcnet32_get_stats(struct net_device *dev) | ||
2306 | { | 2422 | { |
2307 | struct pcnet32_private *lp = dev->priv; | 2423 | struct pcnet32_private *lp = dev->priv; |
2308 | unsigned long ioaddr = dev->base_addr; | 2424 | unsigned long ioaddr = dev->base_addr; |
2309 | u16 saved_addr; | 2425 | u16 saved_addr; |
2310 | unsigned long flags; | 2426 | unsigned long flags; |
2311 | 2427 | ||
2312 | spin_lock_irqsave(&lp->lock, flags); | 2428 | spin_lock_irqsave(&lp->lock, flags); |
2313 | saved_addr = lp->a.read_rap(ioaddr); | 2429 | saved_addr = lp->a.read_rap(ioaddr); |
2314 | lp->stats.rx_missed_errors = lp->a.read_csr (ioaddr, 112); | 2430 | lp->stats.rx_missed_errors = lp->a.read_csr(ioaddr, 112); |
2315 | lp->a.write_rap(ioaddr, saved_addr); | 2431 | lp->a.write_rap(ioaddr, saved_addr); |
2316 | spin_unlock_irqrestore(&lp->lock, flags); | 2432 | spin_unlock_irqrestore(&lp->lock, flags); |
2317 | 2433 | ||
2318 | return &lp->stats; | 2434 | return &lp->stats; |
2319 | } | 2435 | } |
2320 | 2436 | ||
2321 | /* taken from the sunlance driver, which it took from the depca driver */ | 2437 | /* taken from the sunlance driver, which it took from the depca driver */ |
2322 | static void pcnet32_load_multicast (struct net_device *dev) | 2438 | static void pcnet32_load_multicast(struct net_device *dev) |
2323 | { | 2439 | { |
2324 | struct pcnet32_private *lp = dev->priv; | 2440 | struct pcnet32_private *lp = dev->priv; |
2325 | volatile struct pcnet32_init_block *ib = &lp->init_block; | 2441 | volatile struct pcnet32_init_block *ib = &lp->init_block; |
2326 | volatile u16 *mcast_table = (u16 *)&ib->filter; | 2442 | volatile u16 *mcast_table = (u16 *) & ib->filter; |
2327 | struct dev_mc_list *dmi=dev->mc_list; | 2443 | struct dev_mc_list *dmi = dev->mc_list; |
2328 | char *addrs; | 2444 | char *addrs; |
2329 | int i; | 2445 | int i; |
2330 | u32 crc; | 2446 | u32 crc; |
2331 | 2447 | ||
2332 | /* set all multicast bits */ | 2448 | /* set all multicast bits */ |
2333 | if (dev->flags & IFF_ALLMULTI) { | 2449 | if (dev->flags & IFF_ALLMULTI) { |
2334 | ib->filter[0] = 0xffffffff; | 2450 | ib->filter[0] = 0xffffffff; |
2335 | ib->filter[1] = 0xffffffff; | 2451 | ib->filter[1] = 0xffffffff; |
2452 | return; | ||
2453 | } | ||
2454 | /* clear the multicast filter */ | ||
2455 | ib->filter[0] = 0; | ||
2456 | ib->filter[1] = 0; | ||
2457 | |||
2458 | /* Add addresses */ | ||
2459 | for (i = 0; i < dev->mc_count; i++) { | ||
2460 | addrs = dmi->dmi_addr; | ||
2461 | dmi = dmi->next; | ||
2462 | |||
2463 | /* multicast address? */ | ||
2464 | if (!(*addrs & 1)) | ||
2465 | continue; | ||
2466 | |||
2467 | crc = ether_crc_le(6, addrs); | ||
2468 | crc = crc >> 26; | ||
2469 | mcast_table[crc >> 4] = | ||
2470 | le16_to_cpu(le16_to_cpu(mcast_table[crc >> 4]) | | ||
2471 | (1 << (crc & 0xf))); | ||
2472 | } | ||
2336 | return; | 2473 | return; |
2337 | } | ||
2338 | /* clear the multicast filter */ | ||
2339 | ib->filter[0] = 0; | ||
2340 | ib->filter[1] = 0; | ||
2341 | |||
2342 | /* Add addresses */ | ||
2343 | for (i = 0; i < dev->mc_count; i++) { | ||
2344 | addrs = dmi->dmi_addr; | ||
2345 | dmi = dmi->next; | ||
2346 | |||
2347 | /* multicast address? */ | ||
2348 | if (!(*addrs & 1)) | ||
2349 | continue; | ||
2350 | |||
2351 | crc = ether_crc_le(6, addrs); | ||
2352 | crc = crc >> 26; | ||
2353 | mcast_table [crc >> 4] = le16_to_cpu( | ||
2354 | le16_to_cpu(mcast_table [crc >> 4]) | (1 << (crc & 0xf))); | ||
2355 | } | ||
2356 | return; | ||
2357 | } | 2474 | } |
2358 | 2475 | ||
2359 | |||
2360 | /* | 2476 | /* |
2361 | * Set or clear the multicast filter for this adaptor. | 2477 | * Set or clear the multicast filter for this adaptor. |
2362 | */ | 2478 | */ |
2363 | static void pcnet32_set_multicast_list(struct net_device *dev) | 2479 | static void pcnet32_set_multicast_list(struct net_device *dev) |
2364 | { | 2480 | { |
2365 | unsigned long ioaddr = dev->base_addr, flags; | 2481 | unsigned long ioaddr = dev->base_addr, flags; |
2366 | struct pcnet32_private *lp = dev->priv; | 2482 | struct pcnet32_private *lp = dev->priv; |
2367 | 2483 | ||
2368 | spin_lock_irqsave(&lp->lock, flags); | 2484 | spin_lock_irqsave(&lp->lock, flags); |
2369 | if (dev->flags&IFF_PROMISC) { | 2485 | if (dev->flags & IFF_PROMISC) { |
2370 | /* Log any net taps. */ | 2486 | /* Log any net taps. */ |
2371 | if (netif_msg_hw(lp)) | 2487 | if (netif_msg_hw(lp)) |
2372 | printk(KERN_INFO "%s: Promiscuous mode enabled.\n", dev->name); | 2488 | printk(KERN_INFO "%s: Promiscuous mode enabled.\n", |
2373 | lp->init_block.mode = le16_to_cpu(0x8000 | (lp->options & PCNET32_PORT_PORTSEL) << 7); | 2489 | dev->name); |
2374 | } else { | 2490 | lp->init_block.mode = |
2375 | lp->init_block.mode = le16_to_cpu((lp->options & PCNET32_PORT_PORTSEL) << 7); | 2491 | le16_to_cpu(0x8000 | (lp->options & PCNET32_PORT_PORTSEL) << |
2376 | pcnet32_load_multicast (dev); | 2492 | 7); |
2377 | } | 2493 | } else { |
2378 | 2494 | lp->init_block.mode = | |
2379 | lp->a.write_csr (ioaddr, 0, 0x0004); /* Temporarily stop the lance. */ | 2495 | le16_to_cpu((lp->options & PCNET32_PORT_PORTSEL) << 7); |
2380 | pcnet32_restart(dev, 0x0042); /* Resume normal operation */ | 2496 | pcnet32_load_multicast(dev); |
2381 | netif_wake_queue(dev); | 2497 | } |
2382 | 2498 | ||
2383 | spin_unlock_irqrestore(&lp->lock, flags); | 2499 | lp->a.write_csr(ioaddr, 0, 0x0004); /* Temporarily stop the lance. */ |
2500 | pcnet32_restart(dev, 0x0042); /* Resume normal operation */ | ||
2501 | netif_wake_queue(dev); | ||
2502 | |||
2503 | spin_unlock_irqrestore(&lp->lock, flags); | ||
2384 | } | 2504 | } |
2385 | 2505 | ||
2386 | /* This routine assumes that the lp->lock is held */ | 2506 | /* This routine assumes that the lp->lock is held */ |
2387 | static int mdio_read(struct net_device *dev, int phy_id, int reg_num) | 2507 | static int mdio_read(struct net_device *dev, int phy_id, int reg_num) |
2388 | { | 2508 | { |
2389 | struct pcnet32_private *lp = dev->priv; | 2509 | struct pcnet32_private *lp = dev->priv; |
2390 | unsigned long ioaddr = dev->base_addr; | 2510 | unsigned long ioaddr = dev->base_addr; |
2391 | u16 val_out; | 2511 | u16 val_out; |
2392 | 2512 | ||
2393 | if (!lp->mii) | 2513 | if (!lp->mii) |
2394 | return 0; | 2514 | return 0; |
2395 | 2515 | ||
2396 | lp->a.write_bcr(ioaddr, 33, ((phy_id & 0x1f) << 5) | (reg_num & 0x1f)); | 2516 | lp->a.write_bcr(ioaddr, 33, ((phy_id & 0x1f) << 5) | (reg_num & 0x1f)); |
2397 | val_out = lp->a.read_bcr(ioaddr, 34); | 2517 | val_out = lp->a.read_bcr(ioaddr, 34); |
2398 | 2518 | ||
2399 | return val_out; | 2519 | return val_out; |
2400 | } | 2520 | } |
2401 | 2521 | ||
2402 | /* This routine assumes that the lp->lock is held */ | 2522 | /* This routine assumes that the lp->lock is held */ |
2403 | static void mdio_write(struct net_device *dev, int phy_id, int reg_num, int val) | 2523 | static void mdio_write(struct net_device *dev, int phy_id, int reg_num, int val) |
2404 | { | 2524 | { |
2405 | struct pcnet32_private *lp = dev->priv; | 2525 | struct pcnet32_private *lp = dev->priv; |
2406 | unsigned long ioaddr = dev->base_addr; | 2526 | unsigned long ioaddr = dev->base_addr; |
2407 | 2527 | ||
2408 | if (!lp->mii) | 2528 | if (!lp->mii) |
2409 | return; | 2529 | return; |
2410 | 2530 | ||
2411 | lp->a.write_bcr(ioaddr, 33, ((phy_id & 0x1f) << 5) | (reg_num & 0x1f)); | 2531 | lp->a.write_bcr(ioaddr, 33, ((phy_id & 0x1f) << 5) | (reg_num & 0x1f)); |
2412 | lp->a.write_bcr(ioaddr, 34, val); | 2532 | lp->a.write_bcr(ioaddr, 34, val); |
2413 | } | 2533 | } |
2414 | 2534 | ||
2415 | static int pcnet32_ioctl(struct net_device *dev, struct ifreq *rq, int cmd) | 2535 | static int pcnet32_ioctl(struct net_device *dev, struct ifreq *rq, int cmd) |
2416 | { | 2536 | { |
2417 | struct pcnet32_private *lp = dev->priv; | 2537 | struct pcnet32_private *lp = dev->priv; |
2418 | int rc; | 2538 | int rc; |
2419 | unsigned long flags; | 2539 | unsigned long flags; |
2540 | |||
2541 | /* SIOC[GS]MIIxxx ioctls */ | ||
2542 | if (lp->mii) { | ||
2543 | spin_lock_irqsave(&lp->lock, flags); | ||
2544 | rc = generic_mii_ioctl(&lp->mii_if, if_mii(rq), cmd, NULL); | ||
2545 | spin_unlock_irqrestore(&lp->lock, flags); | ||
2546 | } else { | ||
2547 | rc = -EOPNOTSUPP; | ||
2548 | } | ||
2549 | |||
2550 | return rc; | ||
2551 | } | ||
2552 | |||
2553 | static int pcnet32_check_otherphy(struct net_device *dev) | ||
2554 | { | ||
2555 | struct pcnet32_private *lp = dev->priv; | ||
2556 | struct mii_if_info mii = lp->mii_if; | ||
2557 | u16 bmcr; | ||
2558 | int i; | ||
2420 | 2559 | ||
2421 | /* SIOC[GS]MIIxxx ioctls */ | 2560 | for (i = 0; i < PCNET32_MAX_PHYS; i++) { |
2422 | if (lp->mii) { | 2561 | if (i == lp->mii_if.phy_id) |
2423 | spin_lock_irqsave(&lp->lock, flags); | 2562 | continue; /* skip active phy */ |
2424 | rc = generic_mii_ioctl(&lp->mii_if, if_mii(rq), cmd, NULL); | 2563 | if (lp->phymask & (1 << i)) { |
2425 | spin_unlock_irqrestore(&lp->lock, flags); | 2564 | mii.phy_id = i; |
2426 | } else { | 2565 | if (mii_link_ok(&mii)) { |
2427 | rc = -EOPNOTSUPP; | 2566 | /* found PHY with active link */ |
2428 | } | 2567 | if (netif_msg_link(lp)) |
2568 | printk(KERN_INFO | ||
2569 | "%s: Using PHY number %d.\n", | ||
2570 | dev->name, i); | ||
2571 | |||
2572 | /* isolate inactive phy */ | ||
2573 | bmcr = | ||
2574 | mdio_read(dev, lp->mii_if.phy_id, MII_BMCR); | ||
2575 | mdio_write(dev, lp->mii_if.phy_id, MII_BMCR, | ||
2576 | bmcr | BMCR_ISOLATE); | ||
2577 | |||
2578 | /* de-isolate new phy */ | ||
2579 | bmcr = mdio_read(dev, i, MII_BMCR); | ||
2580 | mdio_write(dev, i, MII_BMCR, | ||
2581 | bmcr & ~BMCR_ISOLATE); | ||
2582 | |||
2583 | /* set new phy address */ | ||
2584 | lp->mii_if.phy_id = i; | ||
2585 | return 1; | ||
2586 | } | ||
2587 | } | ||
2588 | } | ||
2589 | return 0; | ||
2590 | } | ||
2591 | |||
2592 | /* | ||
2593 | * Show the status of the media. Similar to mii_check_media however it | ||
2594 | * correctly shows the link speed for all (tested) pcnet32 variants. | ||
2595 | * Devices with no mii just report link state without speed. | ||
2596 | * | ||
2597 | * Caller is assumed to hold and release the lp->lock. | ||
2598 | */ | ||
2429 | 2599 | ||
2430 | return rc; | 2600 | static void pcnet32_check_media(struct net_device *dev, int verbose) |
2601 | { | ||
2602 | struct pcnet32_private *lp = dev->priv; | ||
2603 | int curr_link; | ||
2604 | int prev_link = netif_carrier_ok(dev) ? 1 : 0; | ||
2605 | u32 bcr9; | ||
2606 | |||
2607 | if (lp->mii) { | ||
2608 | curr_link = mii_link_ok(&lp->mii_if); | ||
2609 | } else { | ||
2610 | ulong ioaddr = dev->base_addr; /* card base I/O address */ | ||
2611 | curr_link = (lp->a.read_bcr(ioaddr, 4) != 0xc0); | ||
2612 | } | ||
2613 | if (!curr_link) { | ||
2614 | if (prev_link || verbose) { | ||
2615 | netif_carrier_off(dev); | ||
2616 | if (netif_msg_link(lp)) | ||
2617 | printk(KERN_INFO "%s: link down\n", dev->name); | ||
2618 | } | ||
2619 | if (lp->phycount > 1) { | ||
2620 | curr_link = pcnet32_check_otherphy(dev); | ||
2621 | prev_link = 0; | ||
2622 | } | ||
2623 | } else if (verbose || !prev_link) { | ||
2624 | netif_carrier_on(dev); | ||
2625 | if (lp->mii) { | ||
2626 | if (netif_msg_link(lp)) { | ||
2627 | struct ethtool_cmd ecmd; | ||
2628 | mii_ethtool_gset(&lp->mii_if, &ecmd); | ||
2629 | printk(KERN_INFO | ||
2630 | "%s: link up, %sMbps, %s-duplex\n", | ||
2631 | dev->name, | ||
2632 | (ecmd.speed == SPEED_100) ? "100" : "10", | ||
2633 | (ecmd.duplex == | ||
2634 | DUPLEX_FULL) ? "full" : "half"); | ||
2635 | } | ||
2636 | bcr9 = lp->a.read_bcr(dev->base_addr, 9); | ||
2637 | if ((bcr9 & (1 << 0)) != lp->mii_if.full_duplex) { | ||
2638 | if (lp->mii_if.full_duplex) | ||
2639 | bcr9 |= (1 << 0); | ||
2640 | else | ||
2641 | bcr9 &= ~(1 << 0); | ||
2642 | lp->a.write_bcr(dev->base_addr, 9, bcr9); | ||
2643 | } | ||
2644 | } else { | ||
2645 | if (netif_msg_link(lp)) | ||
2646 | printk(KERN_INFO "%s: link up\n", dev->name); | ||
2647 | } | ||
2648 | } | ||
2431 | } | 2649 | } |
2432 | 2650 | ||
2651 | /* | ||
2652 | * Check for loss of link and link establishment. | ||
2653 | * Can not use mii_check_media because it does nothing if mode is forced. | ||
2654 | */ | ||
2655 | |||
2433 | static void pcnet32_watchdog(struct net_device *dev) | 2656 | static void pcnet32_watchdog(struct net_device *dev) |
2434 | { | 2657 | { |
2435 | struct pcnet32_private *lp = dev->priv; | 2658 | struct pcnet32_private *lp = dev->priv; |
2436 | unsigned long flags; | 2659 | unsigned long flags; |
2437 | 2660 | ||
2438 | /* Print the link status if it has changed */ | 2661 | /* Print the link status if it has changed */ |
2439 | if (lp->mii) { | ||
2440 | spin_lock_irqsave(&lp->lock, flags); | 2662 | spin_lock_irqsave(&lp->lock, flags); |
2441 | mii_check_media (&lp->mii_if, netif_msg_link(lp), 0); | 2663 | pcnet32_check_media(dev, 0); |
2442 | spin_unlock_irqrestore(&lp->lock, flags); | 2664 | spin_unlock_irqrestore(&lp->lock, flags); |
2443 | } | ||
2444 | 2665 | ||
2445 | mod_timer (&(lp->watchdog_timer), PCNET32_WATCHDOG_TIMEOUT); | 2666 | mod_timer(&(lp->watchdog_timer), PCNET32_WATCHDOG_TIMEOUT); |
2446 | } | 2667 | } |
2447 | 2668 | ||
2448 | static void __devexit pcnet32_remove_one(struct pci_dev *pdev) | 2669 | static void __devexit pcnet32_remove_one(struct pci_dev *pdev) |
2449 | { | 2670 | { |
2450 | struct net_device *dev = pci_get_drvdata(pdev); | 2671 | struct net_device *dev = pci_get_drvdata(pdev); |
2451 | 2672 | ||
2452 | if (dev) { | 2673 | if (dev) { |
2453 | struct pcnet32_private *lp = dev->priv; | 2674 | struct pcnet32_private *lp = dev->priv; |
2454 | 2675 | ||
2455 | unregister_netdev(dev); | 2676 | unregister_netdev(dev); |
2456 | pcnet32_free_ring(dev); | 2677 | pcnet32_free_ring(dev); |
2457 | release_region(dev->base_addr, PCNET32_TOTAL_SIZE); | 2678 | release_region(dev->base_addr, PCNET32_TOTAL_SIZE); |
2458 | pci_free_consistent(lp->pci_dev, sizeof(*lp), lp, lp->dma_addr); | 2679 | pci_free_consistent(lp->pci_dev, sizeof(*lp), lp, lp->dma_addr); |
2459 | free_netdev(dev); | 2680 | free_netdev(dev); |
2460 | pci_disable_device(pdev); | 2681 | pci_disable_device(pdev); |
2461 | pci_set_drvdata(pdev, NULL); | 2682 | pci_set_drvdata(pdev, NULL); |
2462 | } | 2683 | } |
2463 | } | 2684 | } |
2464 | 2685 | ||
2465 | static struct pci_driver pcnet32_driver = { | 2686 | static struct pci_driver pcnet32_driver = { |
2466 | .name = DRV_NAME, | 2687 | .name = DRV_NAME, |
2467 | .probe = pcnet32_probe_pci, | 2688 | .probe = pcnet32_probe_pci, |
2468 | .remove = __devexit_p(pcnet32_remove_one), | 2689 | .remove = __devexit_p(pcnet32_remove_one), |
2469 | .id_table = pcnet32_pci_tbl, | 2690 | .id_table = pcnet32_pci_tbl, |
2470 | }; | 2691 | }; |
2471 | 2692 | ||
2472 | /* An additional parameter that may be passed in... */ | 2693 | /* An additional parameter that may be passed in... */ |
@@ -2477,9 +2698,11 @@ static int pcnet32_have_pci; | |||
2477 | module_param(debug, int, 0); | 2698 | module_param(debug, int, 0); |
2478 | MODULE_PARM_DESC(debug, DRV_NAME " debug level"); | 2699 | MODULE_PARM_DESC(debug, DRV_NAME " debug level"); |
2479 | module_param(max_interrupt_work, int, 0); | 2700 | module_param(max_interrupt_work, int, 0); |
2480 | MODULE_PARM_DESC(max_interrupt_work, DRV_NAME " maximum events handled per interrupt"); | 2701 | MODULE_PARM_DESC(max_interrupt_work, |
2702 | DRV_NAME " maximum events handled per interrupt"); | ||
2481 | module_param(rx_copybreak, int, 0); | 2703 | module_param(rx_copybreak, int, 0); |
2482 | MODULE_PARM_DESC(rx_copybreak, DRV_NAME " copy breakpoint for copy-only-tiny-frames"); | 2704 | MODULE_PARM_DESC(rx_copybreak, |
2705 | DRV_NAME " copy breakpoint for copy-only-tiny-frames"); | ||
2483 | module_param(tx_start_pt, int, 0); | 2706 | module_param(tx_start_pt, int, 0); |
2484 | MODULE_PARM_DESC(tx_start_pt, DRV_NAME " transmit start point (0-3)"); | 2707 | MODULE_PARM_DESC(tx_start_pt, DRV_NAME " transmit start point (0-3)"); |
2485 | module_param(pcnet32vlb, int, 0); | 2708 | module_param(pcnet32vlb, int, 0); |
@@ -2490,7 +2713,9 @@ module_param_array(full_duplex, int, NULL, 0); | |||
2490 | MODULE_PARM_DESC(full_duplex, DRV_NAME " full duplex setting(s) (1)"); | 2713 | MODULE_PARM_DESC(full_duplex, DRV_NAME " full duplex setting(s) (1)"); |
2491 | /* Module Parameter for HomePNA cards added by Patrick Simmons, 2004 */ | 2714 | /* Module Parameter for HomePNA cards added by Patrick Simmons, 2004 */ |
2492 | module_param_array(homepna, int, NULL, 0); | 2715 | module_param_array(homepna, int, NULL, 0); |
2493 | MODULE_PARM_DESC(homepna, DRV_NAME " mode for 79C978 cards (1 for HomePNA, 0 for Ethernet, default Ethernet"); | 2716 | MODULE_PARM_DESC(homepna, |
2717 | DRV_NAME | ||
2718 | " mode for 79C978 cards (1 for HomePNA, 0 for Ethernet, default Ethernet"); | ||
2494 | 2719 | ||
2495 | MODULE_AUTHOR("Thomas Bogendoerfer"); | 2720 | MODULE_AUTHOR("Thomas Bogendoerfer"); |
2496 | MODULE_DESCRIPTION("Driver for PCnet32 and PCnetPCI based ethercards"); | 2721 | MODULE_DESCRIPTION("Driver for PCnet32 and PCnetPCI based ethercards"); |
@@ -2500,44 +2725,44 @@ MODULE_LICENSE("GPL"); | |||
2500 | 2725 | ||
2501 | static int __init pcnet32_init_module(void) | 2726 | static int __init pcnet32_init_module(void) |
2502 | { | 2727 | { |
2503 | printk(KERN_INFO "%s", version); | 2728 | printk(KERN_INFO "%s", version); |
2504 | 2729 | ||
2505 | pcnet32_debug = netif_msg_init(debug, PCNET32_MSG_DEFAULT); | 2730 | pcnet32_debug = netif_msg_init(debug, PCNET32_MSG_DEFAULT); |
2506 | 2731 | ||
2507 | if ((tx_start_pt >= 0) && (tx_start_pt <= 3)) | 2732 | if ((tx_start_pt >= 0) && (tx_start_pt <= 3)) |
2508 | tx_start = tx_start_pt; | 2733 | tx_start = tx_start_pt; |
2509 | 2734 | ||
2510 | /* find the PCI devices */ | 2735 | /* find the PCI devices */ |
2511 | if (!pci_module_init(&pcnet32_driver)) | 2736 | if (!pci_module_init(&pcnet32_driver)) |
2512 | pcnet32_have_pci = 1; | 2737 | pcnet32_have_pci = 1; |
2513 | 2738 | ||
2514 | /* should we find any remaining VLbus devices ? */ | 2739 | /* should we find any remaining VLbus devices ? */ |
2515 | if (pcnet32vlb) | 2740 | if (pcnet32vlb) |
2516 | pcnet32_probe_vlbus(); | 2741 | pcnet32_probe_vlbus(); |
2517 | 2742 | ||
2518 | if (cards_found && (pcnet32_debug & NETIF_MSG_PROBE)) | 2743 | if (cards_found && (pcnet32_debug & NETIF_MSG_PROBE)) |
2519 | printk(KERN_INFO PFX "%d cards_found.\n", cards_found); | 2744 | printk(KERN_INFO PFX "%d cards_found.\n", cards_found); |
2520 | 2745 | ||
2521 | return (pcnet32_have_pci + cards_found) ? 0 : -ENODEV; | 2746 | return (pcnet32_have_pci + cards_found) ? 0 : -ENODEV; |
2522 | } | 2747 | } |
2523 | 2748 | ||
2524 | static void __exit pcnet32_cleanup_module(void) | 2749 | static void __exit pcnet32_cleanup_module(void) |
2525 | { | 2750 | { |
2526 | struct net_device *next_dev; | 2751 | struct net_device *next_dev; |
2527 | 2752 | ||
2528 | while (pcnet32_dev) { | 2753 | while (pcnet32_dev) { |
2529 | struct pcnet32_private *lp = pcnet32_dev->priv; | 2754 | struct pcnet32_private *lp = pcnet32_dev->priv; |
2530 | next_dev = lp->next; | 2755 | next_dev = lp->next; |
2531 | unregister_netdev(pcnet32_dev); | 2756 | unregister_netdev(pcnet32_dev); |
2532 | pcnet32_free_ring(pcnet32_dev); | 2757 | pcnet32_free_ring(pcnet32_dev); |
2533 | release_region(pcnet32_dev->base_addr, PCNET32_TOTAL_SIZE); | 2758 | release_region(pcnet32_dev->base_addr, PCNET32_TOTAL_SIZE); |
2534 | pci_free_consistent(lp->pci_dev, sizeof(*lp), lp, lp->dma_addr); | 2759 | pci_free_consistent(lp->pci_dev, sizeof(*lp), lp, lp->dma_addr); |
2535 | free_netdev(pcnet32_dev); | 2760 | free_netdev(pcnet32_dev); |
2536 | pcnet32_dev = next_dev; | 2761 | pcnet32_dev = next_dev; |
2537 | } | 2762 | } |
2538 | 2763 | ||
2539 | if (pcnet32_have_pci) | 2764 | if (pcnet32_have_pci) |
2540 | pci_unregister_driver(&pcnet32_driver); | 2765 | pci_unregister_driver(&pcnet32_driver); |
2541 | } | 2766 | } |
2542 | 2767 | ||
2543 | module_init(pcnet32_init_module); | 2768 | module_init(pcnet32_init_module); |
diff --git a/drivers/net/skfp/fplustm.c b/drivers/net/skfp/fplustm.c index a4b2b6975d6c..0784f558ca9a 100644 --- a/drivers/net/skfp/fplustm.c +++ b/drivers/net/skfp/fplustm.c | |||
@@ -549,12 +549,12 @@ void formac_tx_restart(struct s_smc *smc) | |||
549 | static void enable_formac(struct s_smc *smc) | 549 | static void enable_formac(struct s_smc *smc) |
550 | { | 550 | { |
551 | /* set formac IMSK : 0 enables irq */ | 551 | /* set formac IMSK : 0 enables irq */ |
552 | outpw(FM_A(FM_IMSK1U),~mac_imsk1u) ; | 552 | outpw(FM_A(FM_IMSK1U),(unsigned short)~mac_imsk1u); |
553 | outpw(FM_A(FM_IMSK1L),~mac_imsk1l) ; | 553 | outpw(FM_A(FM_IMSK1L),(unsigned short)~mac_imsk1l); |
554 | outpw(FM_A(FM_IMSK2U),~mac_imsk2u) ; | 554 | outpw(FM_A(FM_IMSK2U),(unsigned short)~mac_imsk2u); |
555 | outpw(FM_A(FM_IMSK2L),~mac_imsk2l) ; | 555 | outpw(FM_A(FM_IMSK2L),(unsigned short)~mac_imsk2l); |
556 | outpw(FM_A(FM_IMSK3U),~mac_imsk3u) ; | 556 | outpw(FM_A(FM_IMSK3U),(unsigned short)~mac_imsk3u); |
557 | outpw(FM_A(FM_IMSK3L),~mac_imsk3l) ; | 557 | outpw(FM_A(FM_IMSK3L),(unsigned short)~mac_imsk3l); |
558 | } | 558 | } |
559 | 559 | ||
560 | #if 0 /* Removed because the driver should use the ASICs TX complete IRQ. */ | 560 | #if 0 /* Removed because the driver should use the ASICs TX complete IRQ. */ |
diff --git a/drivers/net/skge.c b/drivers/net/skge.c index 25e028b7ce48..4eda81d41b10 100644 --- a/drivers/net/skge.c +++ b/drivers/net/skge.c | |||
@@ -44,7 +44,7 @@ | |||
44 | #include "skge.h" | 44 | #include "skge.h" |
45 | 45 | ||
46 | #define DRV_NAME "skge" | 46 | #define DRV_NAME "skge" |
47 | #define DRV_VERSION "1.3" | 47 | #define DRV_VERSION "1.4" |
48 | #define PFX DRV_NAME " " | 48 | #define PFX DRV_NAME " " |
49 | 49 | ||
50 | #define DEFAULT_TX_RING_SIZE 128 | 50 | #define DEFAULT_TX_RING_SIZE 128 |
@@ -104,7 +104,6 @@ static const int txqaddr[] = { Q_XA1, Q_XA2 }; | |||
104 | static const int rxqaddr[] = { Q_R1, Q_R2 }; | 104 | static const int rxqaddr[] = { Q_R1, Q_R2 }; |
105 | static const u32 rxirqmask[] = { IS_R1_F, IS_R2_F }; | 105 | static const u32 rxirqmask[] = { IS_R1_F, IS_R2_F }; |
106 | static const u32 txirqmask[] = { IS_XA1_F, IS_XA2_F }; | 106 | static const u32 txirqmask[] = { IS_XA1_F, IS_XA2_F }; |
107 | static const u32 portirqmask[] = { IS_PORT_1, IS_PORT_2 }; | ||
108 | 107 | ||
109 | static int skge_get_regs_len(struct net_device *dev) | 108 | static int skge_get_regs_len(struct net_device *dev) |
110 | { | 109 | { |
@@ -728,19 +727,18 @@ static struct ethtool_ops skge_ethtool_ops = { | |||
728 | * Allocate ring elements and chain them together | 727 | * Allocate ring elements and chain them together |
729 | * One-to-one association of board descriptors with ring elements | 728 | * One-to-one association of board descriptors with ring elements |
730 | */ | 729 | */ |
731 | static int skge_ring_alloc(struct skge_ring *ring, void *vaddr, u64 base) | 730 | static int skge_ring_alloc(struct skge_ring *ring, void *vaddr, u32 base) |
732 | { | 731 | { |
733 | struct skge_tx_desc *d; | 732 | struct skge_tx_desc *d; |
734 | struct skge_element *e; | 733 | struct skge_element *e; |
735 | int i; | 734 | int i; |
736 | 735 | ||
737 | ring->start = kmalloc(sizeof(*e)*ring->count, GFP_KERNEL); | 736 | ring->start = kcalloc(sizeof(*e), ring->count, GFP_KERNEL); |
738 | if (!ring->start) | 737 | if (!ring->start) |
739 | return -ENOMEM; | 738 | return -ENOMEM; |
740 | 739 | ||
741 | for (i = 0, e = ring->start, d = vaddr; i < ring->count; i++, e++, d++) { | 740 | for (i = 0, e = ring->start, d = vaddr; i < ring->count; i++, e++, d++) { |
742 | e->desc = d; | 741 | e->desc = d; |
743 | e->skb = NULL; | ||
744 | if (i == ring->count - 1) { | 742 | if (i == ring->count - 1) { |
745 | e->next = ring->start; | 743 | e->next = ring->start; |
746 | d->next_offset = base; | 744 | d->next_offset = base; |
@@ -2169,27 +2167,31 @@ static int skge_up(struct net_device *dev) | |||
2169 | if (!skge->mem) | 2167 | if (!skge->mem) |
2170 | return -ENOMEM; | 2168 | return -ENOMEM; |
2171 | 2169 | ||
2170 | BUG_ON(skge->dma & 7); | ||
2171 | |||
2172 | if ((u64)skge->dma >> 32 != ((u64) skge->dma + skge->mem_size) >> 32) { | ||
2173 | printk(KERN_ERR PFX "pci_alloc_consistent region crosses 4G boundary\n"); | ||
2174 | err = -EINVAL; | ||
2175 | goto free_pci_mem; | ||
2176 | } | ||
2177 | |||
2172 | memset(skge->mem, 0, skge->mem_size); | 2178 | memset(skge->mem, 0, skge->mem_size); |
2173 | 2179 | ||
2174 | if ((err = skge_ring_alloc(&skge->rx_ring, skge->mem, skge->dma))) | 2180 | err = skge_ring_alloc(&skge->rx_ring, skge->mem, skge->dma); |
2181 | if (err) | ||
2175 | goto free_pci_mem; | 2182 | goto free_pci_mem; |
2176 | 2183 | ||
2177 | err = skge_rx_fill(skge); | 2184 | err = skge_rx_fill(skge); |
2178 | if (err) | 2185 | if (err) |
2179 | goto free_rx_ring; | 2186 | goto free_rx_ring; |
2180 | 2187 | ||
2181 | if ((err = skge_ring_alloc(&skge->tx_ring, skge->mem + rx_size, | 2188 | err = skge_ring_alloc(&skge->tx_ring, skge->mem + rx_size, |
2182 | skge->dma + rx_size))) | 2189 | skge->dma + rx_size); |
2190 | if (err) | ||
2183 | goto free_rx_ring; | 2191 | goto free_rx_ring; |
2184 | 2192 | ||
2185 | skge->tx_avail = skge->tx_ring.count - 1; | 2193 | skge->tx_avail = skge->tx_ring.count - 1; |
2186 | 2194 | ||
2187 | /* Enable IRQ from port */ | ||
2188 | spin_lock_irq(&hw->hw_lock); | ||
2189 | hw->intr_mask |= portirqmask[port]; | ||
2190 | skge_write32(hw, B0_IMSK, hw->intr_mask); | ||
2191 | spin_unlock_irq(&hw->hw_lock); | ||
2192 | |||
2193 | /* Initialize MAC */ | 2195 | /* Initialize MAC */ |
2194 | spin_lock_bh(&hw->phy_lock); | 2196 | spin_lock_bh(&hw->phy_lock); |
2195 | if (hw->chip_id == CHIP_ID_GENESIS) | 2197 | if (hw->chip_id == CHIP_ID_GENESIS) |
@@ -2246,11 +2248,6 @@ static int skge_down(struct net_device *dev) | |||
2246 | else | 2248 | else |
2247 | yukon_stop(skge); | 2249 | yukon_stop(skge); |
2248 | 2250 | ||
2249 | spin_lock_irq(&hw->hw_lock); | ||
2250 | hw->intr_mask &= ~portirqmask[skge->port]; | ||
2251 | skge_write32(hw, B0_IMSK, hw->intr_mask); | ||
2252 | spin_unlock_irq(&hw->hw_lock); | ||
2253 | |||
2254 | /* Stop transmitter */ | 2251 | /* Stop transmitter */ |
2255 | skge_write8(hw, Q_ADDR(txqaddr[port], Q_CSR), CSR_STOP); | 2252 | skge_write8(hw, Q_ADDR(txqaddr[port], Q_CSR), CSR_STOP); |
2256 | skge_write32(hw, RB_ADDR(txqaddr[port], RB_CTRL), | 2253 | skge_write32(hw, RB_ADDR(txqaddr[port], RB_CTRL), |
@@ -2307,18 +2304,15 @@ static int skge_xmit_frame(struct sk_buff *skb, struct net_device *dev) | |||
2307 | int i; | 2304 | int i; |
2308 | u32 control, len; | 2305 | u32 control, len; |
2309 | u64 map; | 2306 | u64 map; |
2310 | unsigned long flags; | ||
2311 | 2307 | ||
2312 | skb = skb_padto(skb, ETH_ZLEN); | 2308 | skb = skb_padto(skb, ETH_ZLEN); |
2313 | if (!skb) | 2309 | if (!skb) |
2314 | return NETDEV_TX_OK; | 2310 | return NETDEV_TX_OK; |
2315 | 2311 | ||
2316 | local_irq_save(flags); | ||
2317 | if (!spin_trylock(&skge->tx_lock)) { | 2312 | if (!spin_trylock(&skge->tx_lock)) { |
2318 | /* Collision - tell upper layer to requeue */ | 2313 | /* Collision - tell upper layer to requeue */ |
2319 | local_irq_restore(flags); | 2314 | return NETDEV_TX_LOCKED; |
2320 | return NETDEV_TX_LOCKED; | 2315 | } |
2321 | } | ||
2322 | 2316 | ||
2323 | if (unlikely(skge->tx_avail < skb_shinfo(skb)->nr_frags +1)) { | 2317 | if (unlikely(skge->tx_avail < skb_shinfo(skb)->nr_frags +1)) { |
2324 | if (!netif_queue_stopped(dev)) { | 2318 | if (!netif_queue_stopped(dev)) { |
@@ -2327,7 +2321,7 @@ static int skge_xmit_frame(struct sk_buff *skb, struct net_device *dev) | |||
2327 | printk(KERN_WARNING PFX "%s: ring full when queue awake!\n", | 2321 | printk(KERN_WARNING PFX "%s: ring full when queue awake!\n", |
2328 | dev->name); | 2322 | dev->name); |
2329 | } | 2323 | } |
2330 | spin_unlock_irqrestore(&skge->tx_lock, flags); | 2324 | spin_unlock(&skge->tx_lock); |
2331 | return NETDEV_TX_BUSY; | 2325 | return NETDEV_TX_BUSY; |
2332 | } | 2326 | } |
2333 | 2327 | ||
@@ -2402,8 +2396,10 @@ static int skge_xmit_frame(struct sk_buff *skb, struct net_device *dev) | |||
2402 | netif_stop_queue(dev); | 2396 | netif_stop_queue(dev); |
2403 | } | 2397 | } |
2404 | 2398 | ||
2399 | mmiowb(); | ||
2400 | spin_unlock(&skge->tx_lock); | ||
2401 | |||
2405 | dev->trans_start = jiffies; | 2402 | dev->trans_start = jiffies; |
2406 | spin_unlock_irqrestore(&skge->tx_lock, flags); | ||
2407 | 2403 | ||
2408 | return NETDEV_TX_OK; | 2404 | return NETDEV_TX_OK; |
2409 | } | 2405 | } |
@@ -2416,7 +2412,7 @@ static inline void skge_tx_free(struct skge_hw *hw, struct skge_element *e) | |||
2416 | pci_unmap_addr(e, mapaddr), | 2412 | pci_unmap_addr(e, mapaddr), |
2417 | pci_unmap_len(e, maplen), | 2413 | pci_unmap_len(e, maplen), |
2418 | PCI_DMA_TODEVICE); | 2414 | PCI_DMA_TODEVICE); |
2419 | dev_kfree_skb_any(e->skb); | 2415 | dev_kfree_skb(e->skb); |
2420 | e->skb = NULL; | 2416 | e->skb = NULL; |
2421 | } else { | 2417 | } else { |
2422 | pci_unmap_page(hw->pdev, | 2418 | pci_unmap_page(hw->pdev, |
@@ -2430,15 +2426,14 @@ static void skge_tx_clean(struct skge_port *skge) | |||
2430 | { | 2426 | { |
2431 | struct skge_ring *ring = &skge->tx_ring; | 2427 | struct skge_ring *ring = &skge->tx_ring; |
2432 | struct skge_element *e; | 2428 | struct skge_element *e; |
2433 | unsigned long flags; | ||
2434 | 2429 | ||
2435 | spin_lock_irqsave(&skge->tx_lock, flags); | 2430 | spin_lock_bh(&skge->tx_lock); |
2436 | for (e = ring->to_clean; e != ring->to_use; e = e->next) { | 2431 | for (e = ring->to_clean; e != ring->to_use; e = e->next) { |
2437 | ++skge->tx_avail; | 2432 | ++skge->tx_avail; |
2438 | skge_tx_free(skge->hw, e); | 2433 | skge_tx_free(skge->hw, e); |
2439 | } | 2434 | } |
2440 | ring->to_clean = e; | 2435 | ring->to_clean = e; |
2441 | spin_unlock_irqrestore(&skge->tx_lock, flags); | 2436 | spin_unlock_bh(&skge->tx_lock); |
2442 | } | 2437 | } |
2443 | 2438 | ||
2444 | static void skge_tx_timeout(struct net_device *dev) | 2439 | static void skge_tx_timeout(struct net_device *dev) |
@@ -2663,6 +2658,37 @@ resubmit: | |||
2663 | return NULL; | 2658 | return NULL; |
2664 | } | 2659 | } |
2665 | 2660 | ||
2661 | static void skge_tx_done(struct skge_port *skge) | ||
2662 | { | ||
2663 | struct skge_ring *ring = &skge->tx_ring; | ||
2664 | struct skge_element *e; | ||
2665 | |||
2666 | spin_lock(&skge->tx_lock); | ||
2667 | for (e = ring->to_clean; prefetch(e->next), e != ring->to_use; e = e->next) { | ||
2668 | struct skge_tx_desc *td = e->desc; | ||
2669 | u32 control; | ||
2670 | |||
2671 | rmb(); | ||
2672 | control = td->control; | ||
2673 | if (control & BMU_OWN) | ||
2674 | break; | ||
2675 | |||
2676 | if (unlikely(netif_msg_tx_done(skge))) | ||
2677 | printk(KERN_DEBUG PFX "%s: tx done slot %td status 0x%x\n", | ||
2678 | skge->netdev->name, e - ring->start, td->status); | ||
2679 | |||
2680 | skge_tx_free(skge->hw, e); | ||
2681 | e->skb = NULL; | ||
2682 | ++skge->tx_avail; | ||
2683 | } | ||
2684 | ring->to_clean = e; | ||
2685 | skge_write8(skge->hw, Q_ADDR(txqaddr[skge->port], Q_CSR), CSR_IRQ_CL_F); | ||
2686 | |||
2687 | if (skge->tx_avail > MAX_SKB_FRAGS + 1) | ||
2688 | netif_wake_queue(skge->netdev); | ||
2689 | |||
2690 | spin_unlock(&skge->tx_lock); | ||
2691 | } | ||
2666 | 2692 | ||
2667 | static int skge_poll(struct net_device *dev, int *budget) | 2693 | static int skge_poll(struct net_device *dev, int *budget) |
2668 | { | 2694 | { |
@@ -2670,8 +2696,10 @@ static int skge_poll(struct net_device *dev, int *budget) | |||
2670 | struct skge_hw *hw = skge->hw; | 2696 | struct skge_hw *hw = skge->hw; |
2671 | struct skge_ring *ring = &skge->rx_ring; | 2697 | struct skge_ring *ring = &skge->rx_ring; |
2672 | struct skge_element *e; | 2698 | struct skge_element *e; |
2673 | unsigned int to_do = min(dev->quota, *budget); | 2699 | int to_do = min(dev->quota, *budget); |
2674 | unsigned int work_done = 0; | 2700 | int work_done = 0; |
2701 | |||
2702 | skge_tx_done(skge); | ||
2675 | 2703 | ||
2676 | for (e = ring->to_clean; prefetch(e->next), work_done < to_do; e = e->next) { | 2704 | for (e = ring->to_clean; prefetch(e->next), work_done < to_do; e = e->next) { |
2677 | struct skge_rx_desc *rd = e->desc; | 2705 | struct skge_rx_desc *rd = e->desc; |
@@ -2683,8 +2711,8 @@ static int skge_poll(struct net_device *dev, int *budget) | |||
2683 | if (control & BMU_OWN) | 2711 | if (control & BMU_OWN) |
2684 | break; | 2712 | break; |
2685 | 2713 | ||
2686 | skb = skge_rx_get(skge, e, control, rd->status, | 2714 | skb = skge_rx_get(skge, e, control, rd->status, |
2687 | le16_to_cpu(rd->csum2)); | 2715 | le16_to_cpu(rd->csum2)); |
2688 | if (likely(skb)) { | 2716 | if (likely(skb)) { |
2689 | dev->last_rx = jiffies; | 2717 | dev->last_rx = jiffies; |
2690 | netif_receive_skb(skb); | 2718 | netif_receive_skb(skb); |
@@ -2705,49 +2733,15 @@ static int skge_poll(struct net_device *dev, int *budget) | |||
2705 | if (work_done >= to_do) | 2733 | if (work_done >= to_do) |
2706 | return 1; /* not done */ | 2734 | return 1; /* not done */ |
2707 | 2735 | ||
2708 | spin_lock_irq(&hw->hw_lock); | 2736 | netif_rx_complete(dev); |
2709 | __netif_rx_complete(dev); | 2737 | mmiowb(); |
2710 | hw->intr_mask |= portirqmask[skge->port]; | 2738 | |
2739 | hw->intr_mask |= skge->port == 0 ? (IS_R1_F|IS_XA1_F) : (IS_R2_F|IS_XA2_F); | ||
2711 | skge_write32(hw, B0_IMSK, hw->intr_mask); | 2740 | skge_write32(hw, B0_IMSK, hw->intr_mask); |
2712 | spin_unlock_irq(&hw->hw_lock); | ||
2713 | 2741 | ||
2714 | return 0; | 2742 | return 0; |
2715 | } | 2743 | } |
2716 | 2744 | ||
2717 | static inline void skge_tx_intr(struct net_device *dev) | ||
2718 | { | ||
2719 | struct skge_port *skge = netdev_priv(dev); | ||
2720 | struct skge_hw *hw = skge->hw; | ||
2721 | struct skge_ring *ring = &skge->tx_ring; | ||
2722 | struct skge_element *e; | ||
2723 | |||
2724 | spin_lock(&skge->tx_lock); | ||
2725 | for (e = ring->to_clean; prefetch(e->next), e != ring->to_use; e = e->next) { | ||
2726 | struct skge_tx_desc *td = e->desc; | ||
2727 | u32 control; | ||
2728 | |||
2729 | rmb(); | ||
2730 | control = td->control; | ||
2731 | if (control & BMU_OWN) | ||
2732 | break; | ||
2733 | |||
2734 | if (unlikely(netif_msg_tx_done(skge))) | ||
2735 | printk(KERN_DEBUG PFX "%s: tx done slot %td status 0x%x\n", | ||
2736 | dev->name, e - ring->start, td->status); | ||
2737 | |||
2738 | skge_tx_free(hw, e); | ||
2739 | e->skb = NULL; | ||
2740 | ++skge->tx_avail; | ||
2741 | } | ||
2742 | ring->to_clean = e; | ||
2743 | skge_write8(hw, Q_ADDR(txqaddr[skge->port], Q_CSR), CSR_IRQ_CL_F); | ||
2744 | |||
2745 | if (skge->tx_avail > MAX_SKB_FRAGS + 1) | ||
2746 | netif_wake_queue(dev); | ||
2747 | |||
2748 | spin_unlock(&skge->tx_lock); | ||
2749 | } | ||
2750 | |||
2751 | /* Parity errors seem to happen when Genesis is connected to a switch | 2745 | /* Parity errors seem to happen when Genesis is connected to a switch |
2752 | * with no other ports present. Heartbeat error?? | 2746 | * with no other ports present. Heartbeat error?? |
2753 | */ | 2747 | */ |
@@ -2770,17 +2764,6 @@ static void skge_mac_parity(struct skge_hw *hw, int port) | |||
2770 | ? GMF_CLI_TX_FC : GMF_CLI_TX_PE); | 2764 | ? GMF_CLI_TX_FC : GMF_CLI_TX_PE); |
2771 | } | 2765 | } |
2772 | 2766 | ||
2773 | static void skge_pci_clear(struct skge_hw *hw) | ||
2774 | { | ||
2775 | u16 status; | ||
2776 | |||
2777 | pci_read_config_word(hw->pdev, PCI_STATUS, &status); | ||
2778 | skge_write8(hw, B2_TST_CTRL1, TST_CFG_WRITE_ON); | ||
2779 | pci_write_config_word(hw->pdev, PCI_STATUS, | ||
2780 | status | PCI_STATUS_ERROR_BITS); | ||
2781 | skge_write8(hw, B2_TST_CTRL1, TST_CFG_WRITE_OFF); | ||
2782 | } | ||
2783 | |||
2784 | static void skge_mac_intr(struct skge_hw *hw, int port) | 2767 | static void skge_mac_intr(struct skge_hw *hw, int port) |
2785 | { | 2768 | { |
2786 | if (hw->chip_id == CHIP_ID_GENESIS) | 2769 | if (hw->chip_id == CHIP_ID_GENESIS) |
@@ -2822,23 +2805,39 @@ static void skge_error_irq(struct skge_hw *hw) | |||
2822 | if (hwstatus & IS_M2_PAR_ERR) | 2805 | if (hwstatus & IS_M2_PAR_ERR) |
2823 | skge_mac_parity(hw, 1); | 2806 | skge_mac_parity(hw, 1); |
2824 | 2807 | ||
2825 | if (hwstatus & IS_R1_PAR_ERR) | 2808 | if (hwstatus & IS_R1_PAR_ERR) { |
2809 | printk(KERN_ERR PFX "%s: receive queue parity error\n", | ||
2810 | hw->dev[0]->name); | ||
2826 | skge_write32(hw, B0_R1_CSR, CSR_IRQ_CL_P); | 2811 | skge_write32(hw, B0_R1_CSR, CSR_IRQ_CL_P); |
2812 | } | ||
2827 | 2813 | ||
2828 | if (hwstatus & IS_R2_PAR_ERR) | 2814 | if (hwstatus & IS_R2_PAR_ERR) { |
2815 | printk(KERN_ERR PFX "%s: receive queue parity error\n", | ||
2816 | hw->dev[1]->name); | ||
2829 | skge_write32(hw, B0_R2_CSR, CSR_IRQ_CL_P); | 2817 | skge_write32(hw, B0_R2_CSR, CSR_IRQ_CL_P); |
2818 | } | ||
2830 | 2819 | ||
2831 | if (hwstatus & (IS_IRQ_MST_ERR|IS_IRQ_STAT)) { | 2820 | if (hwstatus & (IS_IRQ_MST_ERR|IS_IRQ_STAT)) { |
2832 | printk(KERN_ERR PFX "hardware error detected (status 0x%x)\n", | 2821 | u16 pci_status, pci_cmd; |
2833 | hwstatus); | 2822 | |
2823 | pci_read_config_word(hw->pdev, PCI_COMMAND, &pci_cmd); | ||
2824 | pci_read_config_word(hw->pdev, PCI_STATUS, &pci_status); | ||
2834 | 2825 | ||
2835 | skge_pci_clear(hw); | 2826 | printk(KERN_ERR PFX "%s: PCI error cmd=%#x status=%#x\n", |
2827 | pci_name(hw->pdev), pci_cmd, pci_status); | ||
2828 | |||
2829 | /* Write the error bits back to clear them. */ | ||
2830 | pci_status &= PCI_STATUS_ERROR_BITS; | ||
2831 | skge_write8(hw, B2_TST_CTRL1, TST_CFG_WRITE_ON); | ||
2832 | pci_write_config_word(hw->pdev, PCI_COMMAND, | ||
2833 | pci_cmd | PCI_COMMAND_SERR | PCI_COMMAND_PARITY); | ||
2834 | pci_write_config_word(hw->pdev, PCI_STATUS, pci_status); | ||
2835 | skge_write8(hw, B2_TST_CTRL1, TST_CFG_WRITE_OFF); | ||
2836 | 2836 | ||
2837 | /* if error still set then just ignore it */ | 2837 | /* if error still set then just ignore it */ |
2838 | hwstatus = skge_read32(hw, B0_HWE_ISRC); | 2838 | hwstatus = skge_read32(hw, B0_HWE_ISRC); |
2839 | if (hwstatus & IS_IRQ_STAT) { | 2839 | if (hwstatus & IS_IRQ_STAT) { |
2840 | pr_debug("IRQ status %x: still set ignoring hardware errors\n", | 2840 | printk(KERN_INFO PFX "unable to clear error (so ignoring them)\n"); |
2841 | hwstatus); | ||
2842 | hw->intr_mask &= ~IS_HW_ERR; | 2841 | hw->intr_mask &= ~IS_HW_ERR; |
2843 | } | 2842 | } |
2844 | } | 2843 | } |
@@ -2855,12 +2854,11 @@ static void skge_extirq(unsigned long data) | |||
2855 | int port; | 2854 | int port; |
2856 | 2855 | ||
2857 | spin_lock(&hw->phy_lock); | 2856 | spin_lock(&hw->phy_lock); |
2858 | for (port = 0; port < 2; port++) { | 2857 | for (port = 0; port < hw->ports; port++) { |
2859 | struct net_device *dev = hw->dev[port]; | 2858 | struct net_device *dev = hw->dev[port]; |
2859 | struct skge_port *skge = netdev_priv(dev); | ||
2860 | 2860 | ||
2861 | if (dev && netif_running(dev)) { | 2861 | if (netif_running(dev)) { |
2862 | struct skge_port *skge = netdev_priv(dev); | ||
2863 | |||
2864 | if (hw->chip_id != CHIP_ID_GENESIS) | 2862 | if (hw->chip_id != CHIP_ID_GENESIS) |
2865 | yukon_phy_intr(skge); | 2863 | yukon_phy_intr(skge); |
2866 | else | 2864 | else |
@@ -2869,38 +2867,39 @@ static void skge_extirq(unsigned long data) | |||
2869 | } | 2867 | } |
2870 | spin_unlock(&hw->phy_lock); | 2868 | spin_unlock(&hw->phy_lock); |
2871 | 2869 | ||
2872 | spin_lock_irq(&hw->hw_lock); | ||
2873 | hw->intr_mask |= IS_EXT_REG; | 2870 | hw->intr_mask |= IS_EXT_REG; |
2874 | skge_write32(hw, B0_IMSK, hw->intr_mask); | 2871 | skge_write32(hw, B0_IMSK, hw->intr_mask); |
2875 | spin_unlock_irq(&hw->hw_lock); | ||
2876 | } | 2872 | } |
2877 | 2873 | ||
2878 | static irqreturn_t skge_intr(int irq, void *dev_id, struct pt_regs *regs) | 2874 | static irqreturn_t skge_intr(int irq, void *dev_id, struct pt_regs *regs) |
2879 | { | 2875 | { |
2880 | struct skge_hw *hw = dev_id; | 2876 | struct skge_hw *hw = dev_id; |
2881 | u32 status = skge_read32(hw, B0_SP_ISRC); | 2877 | u32 status; |
2882 | 2878 | ||
2883 | if (status == 0 || status == ~0) /* hotplug or shared irq */ | 2879 | /* Reading this register masks IRQ */ |
2880 | status = skge_read32(hw, B0_SP_ISRC); | ||
2881 | if (status == 0) | ||
2884 | return IRQ_NONE; | 2882 | return IRQ_NONE; |
2885 | 2883 | ||
2886 | spin_lock(&hw->hw_lock); | 2884 | if (status & IS_EXT_REG) { |
2887 | if (status & IS_R1_F) { | 2885 | hw->intr_mask &= ~IS_EXT_REG; |
2886 | tasklet_schedule(&hw->ext_tasklet); | ||
2887 | } | ||
2888 | |||
2889 | if (status & (IS_R1_F|IS_XA1_F)) { | ||
2888 | skge_write8(hw, Q_ADDR(Q_R1, Q_CSR), CSR_IRQ_CL_F); | 2890 | skge_write8(hw, Q_ADDR(Q_R1, Q_CSR), CSR_IRQ_CL_F); |
2889 | hw->intr_mask &= ~IS_R1_F; | 2891 | hw->intr_mask &= ~(IS_R1_F|IS_XA1_F); |
2890 | netif_rx_schedule(hw->dev[0]); | 2892 | netif_rx_schedule(hw->dev[0]); |
2891 | } | 2893 | } |
2892 | 2894 | ||
2893 | if (status & IS_R2_F) { | 2895 | if (status & (IS_R2_F|IS_XA2_F)) { |
2894 | skge_write8(hw, Q_ADDR(Q_R2, Q_CSR), CSR_IRQ_CL_F); | 2896 | skge_write8(hw, Q_ADDR(Q_R2, Q_CSR), CSR_IRQ_CL_F); |
2895 | hw->intr_mask &= ~IS_R2_F; | 2897 | hw->intr_mask &= ~(IS_R2_F|IS_XA2_F); |
2896 | netif_rx_schedule(hw->dev[1]); | 2898 | netif_rx_schedule(hw->dev[1]); |
2897 | } | 2899 | } |
2898 | 2900 | ||
2899 | if (status & IS_XA1_F) | 2901 | if (likely((status & hw->intr_mask) == 0)) |
2900 | skge_tx_intr(hw->dev[0]); | 2902 | return IRQ_HANDLED; |
2901 | |||
2902 | if (status & IS_XA2_F) | ||
2903 | skge_tx_intr(hw->dev[1]); | ||
2904 | 2903 | ||
2905 | if (status & IS_PA_TO_RX1) { | 2904 | if (status & IS_PA_TO_RX1) { |
2906 | struct skge_port *skge = netdev_priv(hw->dev[0]); | 2905 | struct skge_port *skge = netdev_priv(hw->dev[0]); |
@@ -2929,13 +2928,7 @@ static irqreturn_t skge_intr(int irq, void *dev_id, struct pt_regs *regs) | |||
2929 | if (status & IS_HW_ERR) | 2928 | if (status & IS_HW_ERR) |
2930 | skge_error_irq(hw); | 2929 | skge_error_irq(hw); |
2931 | 2930 | ||
2932 | if (status & IS_EXT_REG) { | ||
2933 | hw->intr_mask &= ~IS_EXT_REG; | ||
2934 | tasklet_schedule(&hw->ext_tasklet); | ||
2935 | } | ||
2936 | |||
2937 | skge_write32(hw, B0_IMSK, hw->intr_mask); | 2931 | skge_write32(hw, B0_IMSK, hw->intr_mask); |
2938 | spin_unlock(&hw->hw_lock); | ||
2939 | 2932 | ||
2940 | return IRQ_HANDLED; | 2933 | return IRQ_HANDLED; |
2941 | } | 2934 | } |
@@ -3010,7 +3003,7 @@ static const char *skge_board_name(const struct skge_hw *hw) | |||
3010 | static int skge_reset(struct skge_hw *hw) | 3003 | static int skge_reset(struct skge_hw *hw) |
3011 | { | 3004 | { |
3012 | u32 reg; | 3005 | u32 reg; |
3013 | u16 ctst; | 3006 | u16 ctst, pci_status; |
3014 | u8 t8, mac_cfg, pmd_type, phy_type; | 3007 | u8 t8, mac_cfg, pmd_type, phy_type; |
3015 | int i; | 3008 | int i; |
3016 | 3009 | ||
@@ -3021,8 +3014,13 @@ static int skge_reset(struct skge_hw *hw) | |||
3021 | skge_write8(hw, B0_CTST, CS_RST_CLR); | 3014 | skge_write8(hw, B0_CTST, CS_RST_CLR); |
3022 | 3015 | ||
3023 | /* clear PCI errors, if any */ | 3016 | /* clear PCI errors, if any */ |
3024 | skge_pci_clear(hw); | 3017 | skge_write8(hw, B2_TST_CTRL1, TST_CFG_WRITE_ON); |
3018 | skge_write8(hw, B2_TST_CTRL2, 0); | ||
3025 | 3019 | ||
3020 | pci_read_config_word(hw->pdev, PCI_STATUS, &pci_status); | ||
3021 | pci_write_config_word(hw->pdev, PCI_STATUS, | ||
3022 | pci_status | PCI_STATUS_ERROR_BITS); | ||
3023 | skge_write8(hw, B2_TST_CTRL1, TST_CFG_WRITE_OFF); | ||
3026 | skge_write8(hw, B0_CTST, CS_MRST_CLR); | 3024 | skge_write8(hw, B0_CTST, CS_MRST_CLR); |
3027 | 3025 | ||
3028 | /* restore CLK_RUN bits (for Yukon-Lite) */ | 3026 | /* restore CLK_RUN bits (for Yukon-Lite) */ |
@@ -3081,7 +3079,10 @@ static int skge_reset(struct skge_hw *hw) | |||
3081 | else | 3079 | else |
3082 | hw->ram_size = t8 * 4096; | 3080 | hw->ram_size = t8 * 4096; |
3083 | 3081 | ||
3084 | hw->intr_mask = IS_HW_ERR | IS_EXT_REG; | 3082 | hw->intr_mask = IS_HW_ERR | IS_EXT_REG | IS_PORT_1; |
3083 | if (hw->ports > 1) | ||
3084 | hw->intr_mask |= IS_PORT_2; | ||
3085 | |||
3085 | if (hw->chip_id == CHIP_ID_GENESIS) | 3086 | if (hw->chip_id == CHIP_ID_GENESIS) |
3086 | genesis_init(hw); | 3087 | genesis_init(hw); |
3087 | else { | 3088 | else { |
@@ -3251,13 +3252,15 @@ static int __devinit skge_probe(struct pci_dev *pdev, | |||
3251 | struct skge_hw *hw; | 3252 | struct skge_hw *hw; |
3252 | int err, using_dac = 0; | 3253 | int err, using_dac = 0; |
3253 | 3254 | ||
3254 | if ((err = pci_enable_device(pdev))) { | 3255 | err = pci_enable_device(pdev); |
3256 | if (err) { | ||
3255 | printk(KERN_ERR PFX "%s cannot enable PCI device\n", | 3257 | printk(KERN_ERR PFX "%s cannot enable PCI device\n", |
3256 | pci_name(pdev)); | 3258 | pci_name(pdev)); |
3257 | goto err_out; | 3259 | goto err_out; |
3258 | } | 3260 | } |
3259 | 3261 | ||
3260 | if ((err = pci_request_regions(pdev, DRV_NAME))) { | 3262 | err = pci_request_regions(pdev, DRV_NAME); |
3263 | if (err) { | ||
3261 | printk(KERN_ERR PFX "%s cannot obtain PCI resources\n", | 3264 | printk(KERN_ERR PFX "%s cannot obtain PCI resources\n", |
3262 | pci_name(pdev)); | 3265 | pci_name(pdev)); |
3263 | goto err_out_disable_pdev; | 3266 | goto err_out_disable_pdev; |
@@ -3265,22 +3268,18 @@ static int __devinit skge_probe(struct pci_dev *pdev, | |||
3265 | 3268 | ||
3266 | pci_set_master(pdev); | 3269 | pci_set_master(pdev); |
3267 | 3270 | ||
3268 | if (sizeof(dma_addr_t) > sizeof(u32) && | 3271 | if (!pci_set_dma_mask(pdev, DMA_64BIT_MASK)) { |
3269 | !(err = pci_set_dma_mask(pdev, DMA_64BIT_MASK))) { | ||
3270 | using_dac = 1; | 3272 | using_dac = 1; |
3271 | err = pci_set_consistent_dma_mask(pdev, DMA_64BIT_MASK); | 3273 | err = pci_set_consistent_dma_mask(pdev, DMA_64BIT_MASK); |
3272 | if (err < 0) { | 3274 | } else if (!(err = pci_set_dma_mask(pdev, DMA_32BIT_MASK))) { |
3273 | printk(KERN_ERR PFX "%s unable to obtain 64 bit DMA " | 3275 | using_dac = 0; |
3274 | "for consistent allocations\n", pci_name(pdev)); | 3276 | err = pci_set_consistent_dma_mask(pdev, DMA_32BIT_MASK); |
3275 | goto err_out_free_regions; | 3277 | } |
3276 | } | 3278 | |
3277 | } else { | 3279 | if (err) { |
3278 | err = pci_set_dma_mask(pdev, DMA_32BIT_MASK); | 3280 | printk(KERN_ERR PFX "%s no usable DMA configuration\n", |
3279 | if (err) { | 3281 | pci_name(pdev)); |
3280 | printk(KERN_ERR PFX "%s no usable DMA configuration\n", | 3282 | goto err_out_free_regions; |
3281 | pci_name(pdev)); | ||
3282 | goto err_out_free_regions; | ||
3283 | } | ||
3284 | } | 3283 | } |
3285 | 3284 | ||
3286 | #ifdef __BIG_ENDIAN | 3285 | #ifdef __BIG_ENDIAN |
@@ -3304,7 +3303,6 @@ static int __devinit skge_probe(struct pci_dev *pdev, | |||
3304 | 3303 | ||
3305 | hw->pdev = pdev; | 3304 | hw->pdev = pdev; |
3306 | spin_lock_init(&hw->phy_lock); | 3305 | spin_lock_init(&hw->phy_lock); |
3307 | spin_lock_init(&hw->hw_lock); | ||
3308 | tasklet_init(&hw->ext_tasklet, skge_extirq, (unsigned long) hw); | 3306 | tasklet_init(&hw->ext_tasklet, skge_extirq, (unsigned long) hw); |
3309 | 3307 | ||
3310 | hw->regs = ioremap_nocache(pci_resource_start(pdev, 0), 0x4000); | 3308 | hw->regs = ioremap_nocache(pci_resource_start(pdev, 0), 0x4000); |
@@ -3314,7 +3312,8 @@ static int __devinit skge_probe(struct pci_dev *pdev, | |||
3314 | goto err_out_free_hw; | 3312 | goto err_out_free_hw; |
3315 | } | 3313 | } |
3316 | 3314 | ||
3317 | if ((err = request_irq(pdev->irq, skge_intr, SA_SHIRQ, DRV_NAME, hw))) { | 3315 | err = request_irq(pdev->irq, skge_intr, SA_SHIRQ, DRV_NAME, hw); |
3316 | if (err) { | ||
3318 | printk(KERN_ERR PFX "%s: cannot assign irq %d\n", | 3317 | printk(KERN_ERR PFX "%s: cannot assign irq %d\n", |
3319 | pci_name(pdev), pdev->irq); | 3318 | pci_name(pdev), pdev->irq); |
3320 | goto err_out_iounmap; | 3319 | goto err_out_iounmap; |
@@ -3332,7 +3331,8 @@ static int __devinit skge_probe(struct pci_dev *pdev, | |||
3332 | if ((dev = skge_devinit(hw, 0, using_dac)) == NULL) | 3331 | if ((dev = skge_devinit(hw, 0, using_dac)) == NULL) |
3333 | goto err_out_led_off; | 3332 | goto err_out_led_off; |
3334 | 3333 | ||
3335 | if ((err = register_netdev(dev))) { | 3334 | err = register_netdev(dev); |
3335 | if (err) { | ||
3336 | printk(KERN_ERR PFX "%s: cannot register net device\n", | 3336 | printk(KERN_ERR PFX "%s: cannot register net device\n", |
3337 | pci_name(pdev)); | 3337 | pci_name(pdev)); |
3338 | goto err_out_free_netdev; | 3338 | goto err_out_free_netdev; |
@@ -3387,7 +3387,6 @@ static void __devexit skge_remove(struct pci_dev *pdev) | |||
3387 | 3387 | ||
3388 | skge_write32(hw, B0_IMSK, 0); | 3388 | skge_write32(hw, B0_IMSK, 0); |
3389 | skge_write16(hw, B0_LED, LED_STAT_OFF); | 3389 | skge_write16(hw, B0_LED, LED_STAT_OFF); |
3390 | skge_pci_clear(hw); | ||
3391 | skge_write8(hw, B0_CTST, CS_RST_SET); | 3390 | skge_write8(hw, B0_CTST, CS_RST_SET); |
3392 | 3391 | ||
3393 | tasklet_kill(&hw->ext_tasklet); | 3392 | tasklet_kill(&hw->ext_tasklet); |
diff --git a/drivers/net/skge.h b/drivers/net/skge.h index 941f12a333b6..2efdacc290e5 100644 --- a/drivers/net/skge.h +++ b/drivers/net/skge.h | |||
@@ -2402,7 +2402,6 @@ struct skge_hw { | |||
2402 | 2402 | ||
2403 | struct tasklet_struct ext_tasklet; | 2403 | struct tasklet_struct ext_tasklet; |
2404 | spinlock_t phy_lock; | 2404 | spinlock_t phy_lock; |
2405 | spinlock_t hw_lock; | ||
2406 | }; | 2405 | }; |
2407 | 2406 | ||
2408 | enum { | 2407 | enum { |
diff --git a/drivers/net/sky2.c b/drivers/net/sky2.c index 73260364cba3..f08fe6c884b2 100644 --- a/drivers/net/sky2.c +++ b/drivers/net/sky2.c | |||
@@ -51,7 +51,7 @@ | |||
51 | #include "sky2.h" | 51 | #include "sky2.h" |
52 | 52 | ||
53 | #define DRV_NAME "sky2" | 53 | #define DRV_NAME "sky2" |
54 | #define DRV_VERSION "0.15" | 54 | #define DRV_VERSION "1.1" |
55 | #define PFX DRV_NAME " " | 55 | #define PFX DRV_NAME " " |
56 | 56 | ||
57 | /* | 57 | /* |
@@ -61,10 +61,6 @@ | |||
61 | * a receive requires one (or two if using 64 bit dma). | 61 | * a receive requires one (or two if using 64 bit dma). |
62 | */ | 62 | */ |
63 | 63 | ||
64 | #define is_ec_a1(hw) \ | ||
65 | unlikely((hw)->chip_id == CHIP_ID_YUKON_EC && \ | ||
66 | (hw)->chip_rev == CHIP_REV_YU_EC_A1) | ||
67 | |||
68 | #define RX_LE_SIZE 512 | 64 | #define RX_LE_SIZE 512 |
69 | #define RX_LE_BYTES (RX_LE_SIZE*sizeof(struct sky2_rx_le)) | 65 | #define RX_LE_BYTES (RX_LE_SIZE*sizeof(struct sky2_rx_le)) |
70 | #define RX_MAX_PENDING (RX_LE_SIZE/2 - 2) | 66 | #define RX_MAX_PENDING (RX_LE_SIZE/2 - 2) |
@@ -96,6 +92,10 @@ static int copybreak __read_mostly = 256; | |||
96 | module_param(copybreak, int, 0); | 92 | module_param(copybreak, int, 0); |
97 | MODULE_PARM_DESC(copybreak, "Receive copy threshold"); | 93 | MODULE_PARM_DESC(copybreak, "Receive copy threshold"); |
98 | 94 | ||
95 | static int disable_msi = 0; | ||
96 | module_param(disable_msi, int, 0); | ||
97 | MODULE_PARM_DESC(disable_msi, "Disable Message Signaled Interrupt (MSI)"); | ||
98 | |||
99 | static const struct pci_device_id sky2_id_table[] = { | 99 | static const struct pci_device_id sky2_id_table[] = { |
100 | { PCI_DEVICE(PCI_VENDOR_ID_SYSKONNECT, 0x9000) }, | 100 | { PCI_DEVICE(PCI_VENDOR_ID_SYSKONNECT, 0x9000) }, |
101 | { PCI_DEVICE(PCI_VENDOR_ID_SYSKONNECT, 0x9E00) }, | 101 | { PCI_DEVICE(PCI_VENDOR_ID_SYSKONNECT, 0x9E00) }, |
@@ -504,9 +504,9 @@ static void sky2_phy_init(struct sky2_hw *hw, unsigned port) | |||
504 | /* Force a renegotiation */ | 504 | /* Force a renegotiation */ |
505 | static void sky2_phy_reinit(struct sky2_port *sky2) | 505 | static void sky2_phy_reinit(struct sky2_port *sky2) |
506 | { | 506 | { |
507 | down(&sky2->phy_sema); | 507 | spin_lock_bh(&sky2->phy_lock); |
508 | sky2_phy_init(sky2->hw, sky2->port); | 508 | sky2_phy_init(sky2->hw, sky2->port); |
509 | up(&sky2->phy_sema); | 509 | spin_unlock_bh(&sky2->phy_lock); |
510 | } | 510 | } |
511 | 511 | ||
512 | static void sky2_mac_init(struct sky2_hw *hw, unsigned port) | 512 | static void sky2_mac_init(struct sky2_hw *hw, unsigned port) |
@@ -571,9 +571,9 @@ static void sky2_mac_init(struct sky2_hw *hw, unsigned port) | |||
571 | 571 | ||
572 | sky2_read16(hw, SK_REG(port, GMAC_IRQ_SRC)); | 572 | sky2_read16(hw, SK_REG(port, GMAC_IRQ_SRC)); |
573 | 573 | ||
574 | down(&sky2->phy_sema); | 574 | spin_lock_bh(&sky2->phy_lock); |
575 | sky2_phy_init(hw, port); | 575 | sky2_phy_init(hw, port); |
576 | up(&sky2->phy_sema); | 576 | spin_unlock_bh(&sky2->phy_lock); |
577 | 577 | ||
578 | /* MIB clear */ | 578 | /* MIB clear */ |
579 | reg = gma_read16(hw, port, GM_PHY_ADDR); | 579 | reg = gma_read16(hw, port, GM_PHY_ADDR); |
@@ -725,37 +725,11 @@ static inline struct sky2_tx_le *get_tx_le(struct sky2_port *sky2) | |||
725 | return le; | 725 | return le; |
726 | } | 726 | } |
727 | 727 | ||
728 | /* | 728 | /* Update chip's next pointer */ |
729 | * This is a workaround code taken from SysKonnect sk98lin driver | 729 | static inline void sky2_put_idx(struct sky2_hw *hw, unsigned q, u16 idx) |
730 | * to deal with chip bug on Yukon EC rev 0 in the wraparound case. | ||
731 | */ | ||
732 | static void sky2_put_idx(struct sky2_hw *hw, unsigned q, | ||
733 | u16 idx, u16 *last, u16 size) | ||
734 | { | 730 | { |
735 | wmb(); | 731 | wmb(); |
736 | if (is_ec_a1(hw) && idx < *last) { | 732 | sky2_write16(hw, Y2_QADDR(q, PREF_UNIT_PUT_IDX), idx); |
737 | u16 hwget = sky2_read16(hw, Y2_QADDR(q, PREF_UNIT_GET_IDX)); | ||
738 | |||
739 | if (hwget == 0) { | ||
740 | /* Start prefetching again */ | ||
741 | sky2_write8(hw, Y2_QADDR(q, PREF_UNIT_FIFO_WM), 0xe0); | ||
742 | goto setnew; | ||
743 | } | ||
744 | |||
745 | if (hwget == size - 1) { | ||
746 | /* set watermark to one list element */ | ||
747 | sky2_write8(hw, Y2_QADDR(q, PREF_UNIT_FIFO_WM), 8); | ||
748 | |||
749 | /* set put index to first list element */ | ||
750 | sky2_write16(hw, Y2_QADDR(q, PREF_UNIT_PUT_IDX), 0); | ||
751 | } else /* have hardware go to end of list */ | ||
752 | sky2_write16(hw, Y2_QADDR(q, PREF_UNIT_PUT_IDX), | ||
753 | size - 1); | ||
754 | } else { | ||
755 | setnew: | ||
756 | sky2_write16(hw, Y2_QADDR(q, PREF_UNIT_PUT_IDX), idx); | ||
757 | } | ||
758 | *last = idx; | ||
759 | mmiowb(); | 733 | mmiowb(); |
760 | } | 734 | } |
761 | 735 | ||
@@ -878,7 +852,7 @@ static int sky2_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) | |||
878 | if (!netif_running(dev)) | 852 | if (!netif_running(dev)) |
879 | return -ENODEV; /* Phy still in reset */ | 853 | return -ENODEV; /* Phy still in reset */ |
880 | 854 | ||
881 | switch(cmd) { | 855 | switch (cmd) { |
882 | case SIOCGMIIPHY: | 856 | case SIOCGMIIPHY: |
883 | data->phy_id = PHY_ADDR_MARV; | 857 | data->phy_id = PHY_ADDR_MARV; |
884 | 858 | ||
@@ -886,9 +860,9 @@ static int sky2_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) | |||
886 | case SIOCGMIIREG: { | 860 | case SIOCGMIIREG: { |
887 | u16 val = 0; | 861 | u16 val = 0; |
888 | 862 | ||
889 | down(&sky2->phy_sema); | 863 | spin_lock_bh(&sky2->phy_lock); |
890 | err = __gm_phy_read(hw, sky2->port, data->reg_num & 0x1f, &val); | 864 | err = __gm_phy_read(hw, sky2->port, data->reg_num & 0x1f, &val); |
891 | up(&sky2->phy_sema); | 865 | spin_unlock_bh(&sky2->phy_lock); |
892 | 866 | ||
893 | data->val_out = val; | 867 | data->val_out = val; |
894 | break; | 868 | break; |
@@ -898,10 +872,10 @@ static int sky2_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) | |||
898 | if (!capable(CAP_NET_ADMIN)) | 872 | if (!capable(CAP_NET_ADMIN)) |
899 | return -EPERM; | 873 | return -EPERM; |
900 | 874 | ||
901 | down(&sky2->phy_sema); | 875 | spin_lock_bh(&sky2->phy_lock); |
902 | err = gm_phy_write(hw, sky2->port, data->reg_num & 0x1f, | 876 | err = gm_phy_write(hw, sky2->port, data->reg_num & 0x1f, |
903 | data->val_in); | 877 | data->val_in); |
904 | up(&sky2->phy_sema); | 878 | spin_unlock_bh(&sky2->phy_lock); |
905 | break; | 879 | break; |
906 | } | 880 | } |
907 | return err; | 881 | return err; |
@@ -1001,7 +975,6 @@ static int sky2_rx_start(struct sky2_port *sky2) | |||
1001 | 975 | ||
1002 | /* Tell chip about available buffers */ | 976 | /* Tell chip about available buffers */ |
1003 | sky2_write16(hw, Y2_QADDR(rxq, PREF_UNIT_PUT_IDX), sky2->rx_put); | 977 | sky2_write16(hw, Y2_QADDR(rxq, PREF_UNIT_PUT_IDX), sky2->rx_put); |
1004 | sky2->rx_last_put = sky2_read16(hw, Y2_QADDR(rxq, PREF_UNIT_PUT_IDX)); | ||
1005 | return 0; | 978 | return 0; |
1006 | nomem: | 979 | nomem: |
1007 | sky2_rx_clean(sky2); | 980 | sky2_rx_clean(sky2); |
@@ -1014,7 +987,7 @@ static int sky2_up(struct net_device *dev) | |||
1014 | struct sky2_port *sky2 = netdev_priv(dev); | 987 | struct sky2_port *sky2 = netdev_priv(dev); |
1015 | struct sky2_hw *hw = sky2->hw; | 988 | struct sky2_hw *hw = sky2->hw; |
1016 | unsigned port = sky2->port; | 989 | unsigned port = sky2->port; |
1017 | u32 ramsize, rxspace; | 990 | u32 ramsize, rxspace, imask; |
1018 | int err = -ENOMEM; | 991 | int err = -ENOMEM; |
1019 | 992 | ||
1020 | if (netif_msg_ifup(sky2)) | 993 | if (netif_msg_ifup(sky2)) |
@@ -1079,10 +1052,10 @@ static int sky2_up(struct net_device *dev) | |||
1079 | goto err_out; | 1052 | goto err_out; |
1080 | 1053 | ||
1081 | /* Enable interrupts from phy/mac for port */ | 1054 | /* Enable interrupts from phy/mac for port */ |
1082 | spin_lock_irq(&hw->hw_lock); | 1055 | imask = sky2_read32(hw, B0_IMSK); |
1083 | hw->intr_mask |= (port == 0) ? Y2_IS_PORT_1 : Y2_IS_PORT_2; | 1056 | imask |= (port == 0) ? Y2_IS_PORT_1 : Y2_IS_PORT_2; |
1084 | sky2_write32(hw, B0_IMSK, hw->intr_mask); | 1057 | sky2_write32(hw, B0_IMSK, imask); |
1085 | spin_unlock_irq(&hw->hw_lock); | 1058 | |
1086 | return 0; | 1059 | return 0; |
1087 | 1060 | ||
1088 | err_out: | 1061 | err_out: |
@@ -1299,8 +1272,7 @@ static int sky2_xmit_frame(struct sk_buff *skb, struct net_device *dev) | |||
1299 | netif_stop_queue(dev); | 1272 | netif_stop_queue(dev); |
1300 | } | 1273 | } |
1301 | 1274 | ||
1302 | sky2_put_idx(hw, txqaddr[sky2->port], sky2->tx_prod, | 1275 | sky2_put_idx(hw, txqaddr[sky2->port], sky2->tx_prod); |
1303 | &sky2->tx_last_put, TX_RING_SIZE); | ||
1304 | 1276 | ||
1305 | out_unlock: | 1277 | out_unlock: |
1306 | spin_unlock(&sky2->tx_lock); | 1278 | spin_unlock(&sky2->tx_lock); |
@@ -1332,7 +1304,7 @@ static void sky2_tx_complete(struct sky2_port *sky2, u16 done) | |||
1332 | struct tx_ring_info *re = sky2->tx_ring + put; | 1304 | struct tx_ring_info *re = sky2->tx_ring + put; |
1333 | struct sk_buff *skb = re->skb; | 1305 | struct sk_buff *skb = re->skb; |
1334 | 1306 | ||
1335 | nxt = re->idx; | 1307 | nxt = re->idx; |
1336 | BUG_ON(nxt >= TX_RING_SIZE); | 1308 | BUG_ON(nxt >= TX_RING_SIZE); |
1337 | prefetch(sky2->tx_ring + nxt); | 1309 | prefetch(sky2->tx_ring + nxt); |
1338 | 1310 | ||
@@ -1348,7 +1320,7 @@ static void sky2_tx_complete(struct sky2_port *sky2, u16 done) | |||
1348 | struct tx_ring_info *fre; | 1320 | struct tx_ring_info *fre; |
1349 | fre = sky2->tx_ring + (put + i + 1) % TX_RING_SIZE; | 1321 | fre = sky2->tx_ring + (put + i + 1) % TX_RING_SIZE; |
1350 | pci_unmap_page(pdev, pci_unmap_addr(fre, mapaddr), | 1322 | pci_unmap_page(pdev, pci_unmap_addr(fre, mapaddr), |
1351 | skb_shinfo(skb)->frags[i].size, | 1323 | skb_shinfo(skb)->frags[i].size, |
1352 | PCI_DMA_TODEVICE); | 1324 | PCI_DMA_TODEVICE); |
1353 | } | 1325 | } |
1354 | 1326 | ||
@@ -1356,7 +1328,7 @@ static void sky2_tx_complete(struct sky2_port *sky2, u16 done) | |||
1356 | } | 1328 | } |
1357 | 1329 | ||
1358 | sky2->tx_cons = put; | 1330 | sky2->tx_cons = put; |
1359 | if (netif_queue_stopped(dev) && tx_avail(sky2) > MAX_SKB_TX_LE) | 1331 | if (tx_avail(sky2) > MAX_SKB_TX_LE) |
1360 | netif_wake_queue(dev); | 1332 | netif_wake_queue(dev); |
1361 | } | 1333 | } |
1362 | 1334 | ||
@@ -1375,6 +1347,7 @@ static int sky2_down(struct net_device *dev) | |||
1375 | struct sky2_hw *hw = sky2->hw; | 1347 | struct sky2_hw *hw = sky2->hw; |
1376 | unsigned port = sky2->port; | 1348 | unsigned port = sky2->port; |
1377 | u16 ctrl; | 1349 | u16 ctrl; |
1350 | u32 imask; | ||
1378 | 1351 | ||
1379 | /* Never really got started! */ | 1352 | /* Never really got started! */ |
1380 | if (!sky2->tx_le) | 1353 | if (!sky2->tx_le) |
@@ -1386,14 +1359,6 @@ static int sky2_down(struct net_device *dev) | |||
1386 | /* Stop more packets from being queued */ | 1359 | /* Stop more packets from being queued */ |
1387 | netif_stop_queue(dev); | 1360 | netif_stop_queue(dev); |
1388 | 1361 | ||
1389 | /* Disable port IRQ */ | ||
1390 | spin_lock_irq(&hw->hw_lock); | ||
1391 | hw->intr_mask &= ~((sky2->port == 0) ? Y2_IS_IRQ_PHY1 : Y2_IS_IRQ_PHY2); | ||
1392 | sky2_write32(hw, B0_IMSK, hw->intr_mask); | ||
1393 | spin_unlock_irq(&hw->hw_lock); | ||
1394 | |||
1395 | flush_scheduled_work(); | ||
1396 | |||
1397 | sky2_phy_reset(hw, port); | 1362 | sky2_phy_reset(hw, port); |
1398 | 1363 | ||
1399 | /* Stop transmitter */ | 1364 | /* Stop transmitter */ |
@@ -1437,6 +1402,11 @@ static int sky2_down(struct net_device *dev) | |||
1437 | sky2_write8(hw, SK_REG(port, RX_GMF_CTRL_T), GMF_RST_SET); | 1402 | sky2_write8(hw, SK_REG(port, RX_GMF_CTRL_T), GMF_RST_SET); |
1438 | sky2_write8(hw, SK_REG(port, TX_GMF_CTRL_T), GMF_RST_SET); | 1403 | sky2_write8(hw, SK_REG(port, TX_GMF_CTRL_T), GMF_RST_SET); |
1439 | 1404 | ||
1405 | /* Disable port IRQ */ | ||
1406 | imask = sky2_read32(hw, B0_IMSK); | ||
1407 | imask &= ~(sky2->port == 0) ? Y2_IS_PORT_1 : Y2_IS_PORT_2; | ||
1408 | sky2_write32(hw, B0_IMSK, imask); | ||
1409 | |||
1440 | /* turn off LED's */ | 1410 | /* turn off LED's */ |
1441 | sky2_write16(hw, B0_Y2LED, LED_STAT_OFF); | 1411 | sky2_write16(hw, B0_Y2LED, LED_STAT_OFF); |
1442 | 1412 | ||
@@ -1631,20 +1601,19 @@ static int sky2_autoneg_done(struct sky2_port *sky2, u16 aux) | |||
1631 | return 0; | 1601 | return 0; |
1632 | } | 1602 | } |
1633 | 1603 | ||
1634 | /* | 1604 | /* Interrupt from PHY */ |
1635 | * Interrupt from PHY are handled outside of interrupt context | 1605 | static void sky2_phy_intr(struct sky2_hw *hw, unsigned port) |
1636 | * because accessing phy registers requires spin wait which might | ||
1637 | * cause excess interrupt latency. | ||
1638 | */ | ||
1639 | static void sky2_phy_task(void *arg) | ||
1640 | { | 1606 | { |
1641 | struct sky2_port *sky2 = arg; | 1607 | struct net_device *dev = hw->dev[port]; |
1642 | struct sky2_hw *hw = sky2->hw; | 1608 | struct sky2_port *sky2 = netdev_priv(dev); |
1643 | u16 istatus, phystat; | 1609 | u16 istatus, phystat; |
1644 | 1610 | ||
1645 | down(&sky2->phy_sema); | 1611 | spin_lock(&sky2->phy_lock); |
1646 | istatus = gm_phy_read(hw, sky2->port, PHY_MARV_INT_STAT); | 1612 | istatus = gm_phy_read(hw, port, PHY_MARV_INT_STAT); |
1647 | phystat = gm_phy_read(hw, sky2->port, PHY_MARV_PHY_STAT); | 1613 | phystat = gm_phy_read(hw, port, PHY_MARV_PHY_STAT); |
1614 | |||
1615 | if (!netif_running(dev)) | ||
1616 | goto out; | ||
1648 | 1617 | ||
1649 | if (netif_msg_intr(sky2)) | 1618 | if (netif_msg_intr(sky2)) |
1650 | printk(KERN_INFO PFX "%s: phy interrupt status 0x%x 0x%x\n", | 1619 | printk(KERN_INFO PFX "%s: phy interrupt status 0x%x 0x%x\n", |
@@ -1670,12 +1639,7 @@ static void sky2_phy_task(void *arg) | |||
1670 | sky2_link_down(sky2); | 1639 | sky2_link_down(sky2); |
1671 | } | 1640 | } |
1672 | out: | 1641 | out: |
1673 | up(&sky2->phy_sema); | 1642 | spin_unlock(&sky2->phy_lock); |
1674 | |||
1675 | spin_lock_irq(&hw->hw_lock); | ||
1676 | hw->intr_mask |= (sky2->port == 0) ? Y2_IS_IRQ_PHY1 : Y2_IS_IRQ_PHY2; | ||
1677 | sky2_write32(hw, B0_IMSK, hw->intr_mask); | ||
1678 | spin_unlock_irq(&hw->hw_lock); | ||
1679 | } | 1643 | } |
1680 | 1644 | ||
1681 | 1645 | ||
@@ -1687,31 +1651,40 @@ static void sky2_tx_timeout(struct net_device *dev) | |||
1687 | struct sky2_port *sky2 = netdev_priv(dev); | 1651 | struct sky2_port *sky2 = netdev_priv(dev); |
1688 | struct sky2_hw *hw = sky2->hw; | 1652 | struct sky2_hw *hw = sky2->hw; |
1689 | unsigned txq = txqaddr[sky2->port]; | 1653 | unsigned txq = txqaddr[sky2->port]; |
1690 | u16 ridx; | 1654 | u16 report, done; |
1691 | |||
1692 | /* Maybe we just missed an status interrupt */ | ||
1693 | spin_lock(&sky2->tx_lock); | ||
1694 | ridx = sky2_read16(hw, | ||
1695 | sky2->port == 0 ? STAT_TXA1_RIDX : STAT_TXA2_RIDX); | ||
1696 | sky2_tx_complete(sky2, ridx); | ||
1697 | spin_unlock(&sky2->tx_lock); | ||
1698 | |||
1699 | if (!netif_queue_stopped(dev)) { | ||
1700 | if (net_ratelimit()) | ||
1701 | pr_info(PFX "transmit interrupt missed? recovered\n"); | ||
1702 | return; | ||
1703 | } | ||
1704 | 1655 | ||
1705 | if (netif_msg_timer(sky2)) | 1656 | if (netif_msg_timer(sky2)) |
1706 | printk(KERN_ERR PFX "%s: tx timeout\n", dev->name); | 1657 | printk(KERN_ERR PFX "%s: tx timeout\n", dev->name); |
1707 | 1658 | ||
1708 | sky2_write32(hw, Q_ADDR(txq, Q_CSR), BMU_STOP); | 1659 | report = sky2_read16(hw, sky2->port == 0 ? STAT_TXA1_RIDX : STAT_TXA2_RIDX); |
1709 | sky2_write32(hw, Y2_QADDR(txq, PREF_UNIT_CTRL), PREF_UNIT_RST_SET); | 1660 | done = sky2_read16(hw, Q_ADDR(txq, Q_DONE)); |
1710 | 1661 | ||
1711 | sky2_tx_clean(sky2); | 1662 | printk(KERN_DEBUG PFX "%s: transmit ring %u .. %u report=%u done=%u\n", |
1663 | dev->name, | ||
1664 | sky2->tx_cons, sky2->tx_prod, report, done); | ||
1712 | 1665 | ||
1713 | sky2_qset(hw, txq); | 1666 | if (report != done) { |
1714 | sky2_prefetch_init(hw, txq, sky2->tx_le_map, TX_RING_SIZE - 1); | 1667 | printk(KERN_INFO PFX "status burst pending (irq moderation?)\n"); |
1668 | |||
1669 | sky2_write8(hw, STAT_TX_TIMER_CTRL, TIM_STOP); | ||
1670 | sky2_write8(hw, STAT_TX_TIMER_CTRL, TIM_START); | ||
1671 | } else if (report != sky2->tx_cons) { | ||
1672 | printk(KERN_INFO PFX "status report lost?\n"); | ||
1673 | |||
1674 | spin_lock_bh(&sky2->tx_lock); | ||
1675 | sky2_tx_complete(sky2, report); | ||
1676 | spin_unlock_bh(&sky2->tx_lock); | ||
1677 | } else { | ||
1678 | printk(KERN_INFO PFX "hardware hung? flushing\n"); | ||
1679 | |||
1680 | sky2_write32(hw, Q_ADDR(txq, Q_CSR), BMU_STOP); | ||
1681 | sky2_write32(hw, Y2_QADDR(txq, PREF_UNIT_CTRL), PREF_UNIT_RST_SET); | ||
1682 | |||
1683 | sky2_tx_clean(sky2); | ||
1684 | |||
1685 | sky2_qset(hw, txq); | ||
1686 | sky2_prefetch_init(hw, txq, sky2->tx_le_map, TX_RING_SIZE - 1); | ||
1687 | } | ||
1715 | } | 1688 | } |
1716 | 1689 | ||
1717 | 1690 | ||
@@ -1730,6 +1703,7 @@ static int sky2_change_mtu(struct net_device *dev, int new_mtu) | |||
1730 | struct sky2_hw *hw = sky2->hw; | 1703 | struct sky2_hw *hw = sky2->hw; |
1731 | int err; | 1704 | int err; |
1732 | u16 ctl, mode; | 1705 | u16 ctl, mode; |
1706 | u32 imask; | ||
1733 | 1707 | ||
1734 | if (new_mtu < ETH_ZLEN || new_mtu > ETH_JUMBO_MTU) | 1708 | if (new_mtu < ETH_ZLEN || new_mtu > ETH_JUMBO_MTU) |
1735 | return -EINVAL; | 1709 | return -EINVAL; |
@@ -1742,12 +1716,15 @@ static int sky2_change_mtu(struct net_device *dev, int new_mtu) | |||
1742 | return 0; | 1716 | return 0; |
1743 | } | 1717 | } |
1744 | 1718 | ||
1719 | imask = sky2_read32(hw, B0_IMSK); | ||
1745 | sky2_write32(hw, B0_IMSK, 0); | 1720 | sky2_write32(hw, B0_IMSK, 0); |
1746 | 1721 | ||
1747 | dev->trans_start = jiffies; /* prevent tx timeout */ | 1722 | dev->trans_start = jiffies; /* prevent tx timeout */ |
1748 | netif_stop_queue(dev); | 1723 | netif_stop_queue(dev); |
1749 | netif_poll_disable(hw->dev[0]); | 1724 | netif_poll_disable(hw->dev[0]); |
1750 | 1725 | ||
1726 | synchronize_irq(hw->pdev->irq); | ||
1727 | |||
1751 | ctl = gma_read16(hw, sky2->port, GM_GP_CTRL); | 1728 | ctl = gma_read16(hw, sky2->port, GM_GP_CTRL); |
1752 | gma_write16(hw, sky2->port, GM_GP_CTRL, ctl & ~GM_GPCR_RX_ENA); | 1729 | gma_write16(hw, sky2->port, GM_GP_CTRL, ctl & ~GM_GPCR_RX_ENA); |
1753 | sky2_rx_stop(sky2); | 1730 | sky2_rx_stop(sky2); |
@@ -1766,7 +1743,7 @@ static int sky2_change_mtu(struct net_device *dev, int new_mtu) | |||
1766 | sky2_write8(hw, RB_ADDR(rxqaddr[sky2->port], RB_CTRL), RB_ENA_OP_MD); | 1743 | sky2_write8(hw, RB_ADDR(rxqaddr[sky2->port], RB_CTRL), RB_ENA_OP_MD); |
1767 | 1744 | ||
1768 | err = sky2_rx_start(sky2); | 1745 | err = sky2_rx_start(sky2); |
1769 | sky2_write32(hw, B0_IMSK, hw->intr_mask); | 1746 | sky2_write32(hw, B0_IMSK, imask); |
1770 | 1747 | ||
1771 | if (err) | 1748 | if (err) |
1772 | dev_close(dev); | 1749 | dev_close(dev); |
@@ -1843,8 +1820,7 @@ resubmit: | |||
1843 | sky2_rx_add(sky2, re->mapaddr); | 1820 | sky2_rx_add(sky2, re->mapaddr); |
1844 | 1821 | ||
1845 | /* Tell receiver about new buffers. */ | 1822 | /* Tell receiver about new buffers. */ |
1846 | sky2_put_idx(sky2->hw, rxqaddr[sky2->port], sky2->rx_put, | 1823 | sky2_put_idx(sky2->hw, rxqaddr[sky2->port], sky2->rx_put); |
1847 | &sky2->rx_last_put, RX_LE_SIZE); | ||
1848 | 1824 | ||
1849 | return skb; | 1825 | return skb; |
1850 | 1826 | ||
@@ -1871,76 +1847,51 @@ error: | |||
1871 | goto resubmit; | 1847 | goto resubmit; |
1872 | } | 1848 | } |
1873 | 1849 | ||
1874 | /* | 1850 | /* Transmit complete */ |
1875 | * Check for transmit complete | 1851 | static inline void sky2_tx_done(struct net_device *dev, u16 last) |
1876 | */ | ||
1877 | #define TX_NO_STATUS 0xffff | ||
1878 | |||
1879 | static void sky2_tx_check(struct sky2_hw *hw, int port, u16 last) | ||
1880 | { | 1852 | { |
1881 | if (last != TX_NO_STATUS) { | 1853 | struct sky2_port *sky2 = netdev_priv(dev); |
1882 | struct net_device *dev = hw->dev[port]; | ||
1883 | if (dev && netif_running(dev)) { | ||
1884 | struct sky2_port *sky2 = netdev_priv(dev); | ||
1885 | 1854 | ||
1886 | spin_lock(&sky2->tx_lock); | 1855 | if (netif_running(dev)) { |
1887 | sky2_tx_complete(sky2, last); | 1856 | spin_lock(&sky2->tx_lock); |
1888 | spin_unlock(&sky2->tx_lock); | 1857 | sky2_tx_complete(sky2, last); |
1889 | } | 1858 | spin_unlock(&sky2->tx_lock); |
1890 | } | 1859 | } |
1891 | } | 1860 | } |
1892 | 1861 | ||
1893 | /* | 1862 | /* Process status response ring */ |
1894 | * Both ports share the same status interrupt, therefore there is only | 1863 | static int sky2_status_intr(struct sky2_hw *hw, int to_do) |
1895 | * one poll routine. | ||
1896 | */ | ||
1897 | static int sky2_poll(struct net_device *dev0, int *budget) | ||
1898 | { | 1864 | { |
1899 | struct sky2_hw *hw = ((struct sky2_port *) netdev_priv(dev0))->hw; | 1865 | int work_done = 0; |
1900 | unsigned int to_do = min(dev0->quota, *budget); | ||
1901 | unsigned int work_done = 0; | ||
1902 | u16 hwidx; | ||
1903 | u16 tx_done[2] = { TX_NO_STATUS, TX_NO_STATUS }; | ||
1904 | |||
1905 | sky2_write32(hw, STAT_CTRL, SC_STAT_CLR_IRQ); | ||
1906 | |||
1907 | /* | ||
1908 | * Kick the STAT_LEV_TIMER_CTRL timer. | ||
1909 | * This fixes my hangs on Yukon-EC (0xb6) rev 1. | ||
1910 | * The if clause is there to start the timer only if it has been | ||
1911 | * configured correctly and not been disabled via ethtool. | ||
1912 | */ | ||
1913 | if (sky2_read8(hw, STAT_LEV_TIMER_CTRL) == TIM_START) { | ||
1914 | sky2_write8(hw, STAT_LEV_TIMER_CTRL, TIM_STOP); | ||
1915 | sky2_write8(hw, STAT_LEV_TIMER_CTRL, TIM_START); | ||
1916 | } | ||
1917 | 1866 | ||
1918 | hwidx = sky2_read16(hw, STAT_PUT_IDX); | ||
1919 | BUG_ON(hwidx >= STATUS_RING_SIZE); | ||
1920 | rmb(); | 1867 | rmb(); |
1921 | 1868 | ||
1922 | while (hwidx != hw->st_idx) { | 1869 | for(;;) { |
1923 | struct sky2_status_le *le = hw->st_le + hw->st_idx; | 1870 | struct sky2_status_le *le = hw->st_le + hw->st_idx; |
1924 | struct net_device *dev; | 1871 | struct net_device *dev; |
1925 | struct sky2_port *sky2; | 1872 | struct sky2_port *sky2; |
1926 | struct sk_buff *skb; | 1873 | struct sk_buff *skb; |
1927 | u32 status; | 1874 | u32 status; |
1928 | u16 length; | 1875 | u16 length; |
1876 | u8 link, opcode; | ||
1877 | |||
1878 | opcode = le->opcode; | ||
1879 | if (!opcode) | ||
1880 | break; | ||
1881 | opcode &= ~HW_OWNER; | ||
1929 | 1882 | ||
1930 | le = hw->st_le + hw->st_idx; | ||
1931 | hw->st_idx = (hw->st_idx + 1) % STATUS_RING_SIZE; | 1883 | hw->st_idx = (hw->st_idx + 1) % STATUS_RING_SIZE; |
1932 | prefetch(hw->st_le + hw->st_idx); | 1884 | le->opcode = 0; |
1933 | 1885 | ||
1934 | BUG_ON(le->link >= 2); | 1886 | link = le->link; |
1935 | dev = hw->dev[le->link]; | 1887 | BUG_ON(link >= 2); |
1936 | if (dev == NULL || !netif_running(dev)) | 1888 | dev = hw->dev[link]; |
1937 | continue; | ||
1938 | 1889 | ||
1939 | sky2 = netdev_priv(dev); | 1890 | sky2 = netdev_priv(dev); |
1940 | status = le32_to_cpu(le->status); | 1891 | length = le->length; |
1941 | length = le16_to_cpu(le->length); | 1892 | status = le->status; |
1942 | 1893 | ||
1943 | switch (le->opcode & ~HW_OWNER) { | 1894 | switch (opcode) { |
1944 | case OP_RXSTAT: | 1895 | case OP_RXSTAT: |
1945 | skb = sky2_receive(sky2, length, status); | 1896 | skb = sky2_receive(sky2, length, status); |
1946 | if (!skb) | 1897 | if (!skb) |
@@ -1980,42 +1931,23 @@ static int sky2_poll(struct net_device *dev0, int *budget) | |||
1980 | 1931 | ||
1981 | case OP_TXINDEXLE: | 1932 | case OP_TXINDEXLE: |
1982 | /* TX index reports status for both ports */ | 1933 | /* TX index reports status for both ports */ |
1983 | tx_done[0] = status & 0xffff; | 1934 | sky2_tx_done(hw->dev[0], status & 0xffff); |
1984 | tx_done[1] = ((status >> 24) & 0xff) | 1935 | if (hw->dev[1]) |
1985 | | (u16)(length & 0xf) << 8; | 1936 | sky2_tx_done(hw->dev[1], |
1937 | ((status >> 24) & 0xff) | ||
1938 | | (u16)(length & 0xf) << 8); | ||
1986 | break; | 1939 | break; |
1987 | 1940 | ||
1988 | default: | 1941 | default: |
1989 | if (net_ratelimit()) | 1942 | if (net_ratelimit()) |
1990 | printk(KERN_WARNING PFX | 1943 | printk(KERN_WARNING PFX |
1991 | "unknown status opcode 0x%x\n", le->opcode); | 1944 | "unknown status opcode 0x%x\n", opcode); |
1992 | break; | 1945 | break; |
1993 | } | 1946 | } |
1994 | } | 1947 | } |
1995 | 1948 | ||
1996 | exit_loop: | 1949 | exit_loop: |
1997 | sky2_tx_check(hw, 0, tx_done[0]); | 1950 | return work_done; |
1998 | sky2_tx_check(hw, 1, tx_done[1]); | ||
1999 | |||
2000 | if (sky2_read8(hw, STAT_TX_TIMER_CTRL) == TIM_START) { | ||
2001 | sky2_write8(hw, STAT_TX_TIMER_CTRL, TIM_STOP); | ||
2002 | sky2_write8(hw, STAT_TX_TIMER_CTRL, TIM_START); | ||
2003 | } | ||
2004 | |||
2005 | if (likely(work_done < to_do)) { | ||
2006 | spin_lock_irq(&hw->hw_lock); | ||
2007 | __netif_rx_complete(dev0); | ||
2008 | |||
2009 | hw->intr_mask |= Y2_IS_STAT_BMU; | ||
2010 | sky2_write32(hw, B0_IMSK, hw->intr_mask); | ||
2011 | spin_unlock_irq(&hw->hw_lock); | ||
2012 | |||
2013 | return 0; | ||
2014 | } else { | ||
2015 | *budget -= work_done; | ||
2016 | dev0->quota -= work_done; | ||
2017 | return 1; | ||
2018 | } | ||
2019 | } | 1951 | } |
2020 | 1952 | ||
2021 | static void sky2_hw_error(struct sky2_hw *hw, unsigned port, u32 status) | 1953 | static void sky2_hw_error(struct sky2_hw *hw, unsigned port, u32 status) |
@@ -2134,57 +2066,97 @@ static void sky2_mac_intr(struct sky2_hw *hw, unsigned port) | |||
2134 | } | 2066 | } |
2135 | } | 2067 | } |
2136 | 2068 | ||
2137 | static void sky2_phy_intr(struct sky2_hw *hw, unsigned port) | 2069 | /* This should never happen it is a fatal situation */ |
2070 | static void sky2_descriptor_error(struct sky2_hw *hw, unsigned port, | ||
2071 | const char *rxtx, u32 mask) | ||
2138 | { | 2072 | { |
2139 | struct net_device *dev = hw->dev[port]; | 2073 | struct net_device *dev = hw->dev[port]; |
2140 | struct sky2_port *sky2 = netdev_priv(dev); | 2074 | struct sky2_port *sky2 = netdev_priv(dev); |
2075 | u32 imask; | ||
2076 | |||
2077 | printk(KERN_ERR PFX "%s: %s descriptor error (hardware problem)\n", | ||
2078 | dev ? dev->name : "<not registered>", rxtx); | ||
2141 | 2079 | ||
2142 | hw->intr_mask &= ~(port == 0 ? Y2_IS_IRQ_PHY1 : Y2_IS_IRQ_PHY2); | 2080 | imask = sky2_read32(hw, B0_IMSK); |
2143 | sky2_write32(hw, B0_IMSK, hw->intr_mask); | 2081 | imask &= ~mask; |
2082 | sky2_write32(hw, B0_IMSK, imask); | ||
2144 | 2083 | ||
2145 | schedule_work(&sky2->phy_task); | 2084 | if (dev) { |
2085 | spin_lock(&sky2->phy_lock); | ||
2086 | sky2_link_down(sky2); | ||
2087 | spin_unlock(&sky2->phy_lock); | ||
2088 | } | ||
2146 | } | 2089 | } |
2147 | 2090 | ||
2148 | static irqreturn_t sky2_intr(int irq, void *dev_id, struct pt_regs *regs) | 2091 | static int sky2_poll(struct net_device *dev0, int *budget) |
2149 | { | 2092 | { |
2150 | struct sky2_hw *hw = dev_id; | 2093 | struct sky2_hw *hw = ((struct sky2_port *) netdev_priv(dev0))->hw; |
2151 | struct net_device *dev0 = hw->dev[0]; | 2094 | int work_limit = min(dev0->quota, *budget); |
2152 | u32 status; | 2095 | int work_done = 0; |
2096 | u32 status = sky2_read32(hw, B0_Y2_SP_EISR); | ||
2153 | 2097 | ||
2154 | status = sky2_read32(hw, B0_Y2_SP_ISRC2); | 2098 | if (unlikely(status & ~Y2_IS_STAT_BMU)) { |
2155 | if (status == 0 || status == ~0) | 2099 | if (status & Y2_IS_HW_ERR) |
2156 | return IRQ_NONE; | 2100 | sky2_hw_intr(hw); |
2157 | 2101 | ||
2158 | spin_lock(&hw->hw_lock); | 2102 | if (status & Y2_IS_IRQ_PHY1) |
2159 | if (status & Y2_IS_HW_ERR) | 2103 | sky2_phy_intr(hw, 0); |
2160 | sky2_hw_intr(hw); | ||
2161 | 2104 | ||
2162 | /* Do NAPI for Rx and Tx status */ | 2105 | if (status & Y2_IS_IRQ_PHY2) |
2163 | if (status & Y2_IS_STAT_BMU) { | 2106 | sky2_phy_intr(hw, 1); |
2164 | hw->intr_mask &= ~Y2_IS_STAT_BMU; | ||
2165 | sky2_write32(hw, B0_IMSK, hw->intr_mask); | ||
2166 | 2107 | ||
2167 | if (likely(__netif_rx_schedule_prep(dev0))) { | 2108 | if (status & Y2_IS_IRQ_MAC1) |
2168 | prefetch(&hw->st_le[hw->st_idx]); | 2109 | sky2_mac_intr(hw, 0); |
2169 | __netif_rx_schedule(dev0); | 2110 | |
2170 | } | 2111 | if (status & Y2_IS_IRQ_MAC2) |
2112 | sky2_mac_intr(hw, 1); | ||
2113 | |||
2114 | if (status & Y2_IS_CHK_RX1) | ||
2115 | sky2_descriptor_error(hw, 0, "receive", Y2_IS_CHK_RX1); | ||
2116 | |||
2117 | if (status & Y2_IS_CHK_RX2) | ||
2118 | sky2_descriptor_error(hw, 1, "receive", Y2_IS_CHK_RX2); | ||
2119 | |||
2120 | if (status & Y2_IS_CHK_TXA1) | ||
2121 | sky2_descriptor_error(hw, 0, "transmit", Y2_IS_CHK_TXA1); | ||
2122 | |||
2123 | if (status & Y2_IS_CHK_TXA2) | ||
2124 | sky2_descriptor_error(hw, 1, "transmit", Y2_IS_CHK_TXA2); | ||
2171 | } | 2125 | } |
2172 | 2126 | ||
2173 | if (status & Y2_IS_IRQ_PHY1) | 2127 | if (status & Y2_IS_STAT_BMU) { |
2174 | sky2_phy_intr(hw, 0); | 2128 | work_done = sky2_status_intr(hw, work_limit); |
2129 | *budget -= work_done; | ||
2130 | dev0->quota -= work_done; | ||
2131 | |||
2132 | if (work_done >= work_limit) | ||
2133 | return 1; | ||
2175 | 2134 | ||
2176 | if (status & Y2_IS_IRQ_PHY2) | 2135 | sky2_write32(hw, STAT_CTRL, SC_STAT_CLR_IRQ); |
2177 | sky2_phy_intr(hw, 1); | 2136 | } |
2178 | 2137 | ||
2179 | if (status & Y2_IS_IRQ_MAC1) | 2138 | netif_rx_complete(dev0); |
2180 | sky2_mac_intr(hw, 0); | ||
2181 | 2139 | ||
2182 | if (status & Y2_IS_IRQ_MAC2) | 2140 | status = sky2_read32(hw, B0_Y2_SP_LISR); |
2183 | sky2_mac_intr(hw, 1); | 2141 | return 0; |
2142 | } | ||
2184 | 2143 | ||
2185 | sky2_write32(hw, B0_Y2_SP_ICR, 2); | 2144 | static irqreturn_t sky2_intr(int irq, void *dev_id, struct pt_regs *regs) |
2145 | { | ||
2146 | struct sky2_hw *hw = dev_id; | ||
2147 | struct net_device *dev0 = hw->dev[0]; | ||
2148 | u32 status; | ||
2186 | 2149 | ||
2187 | spin_unlock(&hw->hw_lock); | 2150 | /* Reading this mask interrupts as side effect */ |
2151 | status = sky2_read32(hw, B0_Y2_SP_ISRC2); | ||
2152 | if (status == 0 || status == ~0) | ||
2153 | return IRQ_NONE; | ||
2154 | |||
2155 | prefetch(&hw->st_le[hw->st_idx]); | ||
2156 | if (likely(__netif_rx_schedule_prep(dev0))) | ||
2157 | __netif_rx_schedule(dev0); | ||
2158 | else | ||
2159 | printk(KERN_DEBUG PFX "irq race detected\n"); | ||
2188 | 2160 | ||
2189 | return IRQ_HANDLED; | 2161 | return IRQ_HANDLED; |
2190 | } | 2162 | } |
@@ -2238,6 +2210,23 @@ static int sky2_reset(struct sky2_hw *hw) | |||
2238 | return -EOPNOTSUPP; | 2210 | return -EOPNOTSUPP; |
2239 | } | 2211 | } |
2240 | 2212 | ||
2213 | hw->chip_rev = (sky2_read8(hw, B2_MAC_CFG) & CFG_CHIP_R_MSK) >> 4; | ||
2214 | |||
2215 | /* This rev is really old, and requires untested workarounds */ | ||
2216 | if (hw->chip_id == CHIP_ID_YUKON_EC && hw->chip_rev == CHIP_REV_YU_EC_A1) { | ||
2217 | printk(KERN_ERR PFX "%s: unsupported revision Yukon-%s (0x%x) rev %d\n", | ||
2218 | pci_name(hw->pdev), yukon2_name[hw->chip_id - CHIP_ID_YUKON_XL], | ||
2219 | hw->chip_id, hw->chip_rev); | ||
2220 | return -EOPNOTSUPP; | ||
2221 | } | ||
2222 | |||
2223 | /* This chip is new and not tested yet */ | ||
2224 | if (hw->chip_id == CHIP_ID_YUKON_EC_U) { | ||
2225 | pr_info(PFX "%s: is a version of Yukon 2 chipset that has not been tested yet.\n", | ||
2226 | pci_name(hw->pdev)); | ||
2227 | pr_info("Please report success/failure to maintainer <shemminger@osdl.org>\n"); | ||
2228 | } | ||
2229 | |||
2241 | /* disable ASF */ | 2230 | /* disable ASF */ |
2242 | if (hw->chip_id <= CHIP_ID_YUKON_EC) { | 2231 | if (hw->chip_id <= CHIP_ID_YUKON_EC) { |
2243 | sky2_write8(hw, B28_Y2_ASF_STAT_CMD, Y2_ASF_RESET); | 2232 | sky2_write8(hw, B28_Y2_ASF_STAT_CMD, Y2_ASF_RESET); |
@@ -2258,7 +2247,7 @@ static int sky2_reset(struct sky2_hw *hw) | |||
2258 | sky2_write8(hw, B0_CTST, CS_MRST_CLR); | 2247 | sky2_write8(hw, B0_CTST, CS_MRST_CLR); |
2259 | 2248 | ||
2260 | /* clear any PEX errors */ | 2249 | /* clear any PEX errors */ |
2261 | if (pci_find_capability(hw->pdev, PCI_CAP_ID_EXP)) | 2250 | if (pci_find_capability(hw->pdev, PCI_CAP_ID_EXP)) |
2262 | sky2_pci_write32(hw, PEX_UNC_ERR_STAT, 0xffffffffUL); | 2251 | sky2_pci_write32(hw, PEX_UNC_ERR_STAT, 0xffffffffUL); |
2263 | 2252 | ||
2264 | 2253 | ||
@@ -2271,7 +2260,6 @@ static int sky2_reset(struct sky2_hw *hw) | |||
2271 | if (!(sky2_read8(hw, B2_Y2_CLK_GATE) & Y2_STATUS_LNK2_INAC)) | 2260 | if (!(sky2_read8(hw, B2_Y2_CLK_GATE) & Y2_STATUS_LNK2_INAC)) |
2272 | ++hw->ports; | 2261 | ++hw->ports; |
2273 | } | 2262 | } |
2274 | hw->chip_rev = (sky2_read8(hw, B2_MAC_CFG) & CFG_CHIP_R_MSK) >> 4; | ||
2275 | 2263 | ||
2276 | sky2_set_power_state(hw, PCI_D0); | 2264 | sky2_set_power_state(hw, PCI_D0); |
2277 | 2265 | ||
@@ -2337,30 +2325,18 @@ static int sky2_reset(struct sky2_hw *hw) | |||
2337 | /* Set the list last index */ | 2325 | /* Set the list last index */ |
2338 | sky2_write16(hw, STAT_LAST_IDX, STATUS_RING_SIZE - 1); | 2326 | sky2_write16(hw, STAT_LAST_IDX, STATUS_RING_SIZE - 1); |
2339 | 2327 | ||
2340 | /* These status setup values are copied from SysKonnect's driver */ | 2328 | sky2_write16(hw, STAT_TX_IDX_TH, 10); |
2341 | if (is_ec_a1(hw)) { | 2329 | sky2_write8(hw, STAT_FIFO_WM, 16); |
2342 | /* WA for dev. #4.3 */ | ||
2343 | sky2_write16(hw, STAT_TX_IDX_TH, 0xfff); /* Tx Threshold */ | ||
2344 | |||
2345 | /* set Status-FIFO watermark */ | ||
2346 | sky2_write8(hw, STAT_FIFO_WM, 0x21); /* WA for dev. #4.18 */ | ||
2347 | 2330 | ||
2348 | /* set Status-FIFO ISR watermark */ | 2331 | /* set Status-FIFO ISR watermark */ |
2349 | sky2_write8(hw, STAT_FIFO_ISR_WM, 0x07); /* WA for dev. #4.18 */ | 2332 | if (hw->chip_id == CHIP_ID_YUKON_XL && hw->chip_rev == 0) |
2350 | sky2_write32(hw, STAT_TX_TIMER_INI, sky2_us2clk(hw, 10000)); | 2333 | sky2_write8(hw, STAT_FIFO_ISR_WM, 4); |
2351 | } else { | 2334 | else |
2352 | sky2_write16(hw, STAT_TX_IDX_TH, 10); | 2335 | sky2_write8(hw, STAT_FIFO_ISR_WM, 16); |
2353 | sky2_write8(hw, STAT_FIFO_WM, 16); | ||
2354 | |||
2355 | /* set Status-FIFO ISR watermark */ | ||
2356 | if (hw->chip_id == CHIP_ID_YUKON_XL && hw->chip_rev == 0) | ||
2357 | sky2_write8(hw, STAT_FIFO_ISR_WM, 4); | ||
2358 | else | ||
2359 | sky2_write8(hw, STAT_FIFO_ISR_WM, 16); | ||
2360 | 2336 | ||
2361 | sky2_write32(hw, STAT_TX_TIMER_INI, sky2_us2clk(hw, 1000)); | 2337 | sky2_write32(hw, STAT_TX_TIMER_INI, sky2_us2clk(hw, 1000)); |
2362 | sky2_write32(hw, STAT_ISR_TIMER_INI, sky2_us2clk(hw, 7)); | 2338 | sky2_write32(hw, STAT_ISR_TIMER_INI, sky2_us2clk(hw, 20)); |
2363 | } | 2339 | sky2_write32(hw, STAT_LEV_TIMER_INI, sky2_us2clk(hw, 100)); |
2364 | 2340 | ||
2365 | /* enable status unit */ | 2341 | /* enable status unit */ |
2366 | sky2_write32(hw, STAT_CTRL, SC_STAT_OP_ON); | 2342 | sky2_write32(hw, STAT_CTRL, SC_STAT_OP_ON); |
@@ -2743,7 +2719,7 @@ static int sky2_phys_id(struct net_device *dev, u32 data) | |||
2743 | ms = data * 1000; | 2719 | ms = data * 1000; |
2744 | 2720 | ||
2745 | /* save initial values */ | 2721 | /* save initial values */ |
2746 | down(&sky2->phy_sema); | 2722 | spin_lock_bh(&sky2->phy_lock); |
2747 | if (hw->chip_id == CHIP_ID_YUKON_XL) { | 2723 | if (hw->chip_id == CHIP_ID_YUKON_XL) { |
2748 | u16 pg = gm_phy_read(hw, port, PHY_MARV_EXT_ADR); | 2724 | u16 pg = gm_phy_read(hw, port, PHY_MARV_EXT_ADR); |
2749 | gm_phy_write(hw, port, PHY_MARV_EXT_ADR, 3); | 2725 | gm_phy_write(hw, port, PHY_MARV_EXT_ADR, 3); |
@@ -2759,9 +2735,9 @@ static int sky2_phys_id(struct net_device *dev, u32 data) | |||
2759 | sky2_led(hw, port, onoff); | 2735 | sky2_led(hw, port, onoff); |
2760 | onoff = !onoff; | 2736 | onoff = !onoff; |
2761 | 2737 | ||
2762 | up(&sky2->phy_sema); | 2738 | spin_unlock_bh(&sky2->phy_lock); |
2763 | interrupted = msleep_interruptible(250); | 2739 | interrupted = msleep_interruptible(250); |
2764 | down(&sky2->phy_sema); | 2740 | spin_lock_bh(&sky2->phy_lock); |
2765 | 2741 | ||
2766 | ms -= 250; | 2742 | ms -= 250; |
2767 | } | 2743 | } |
@@ -2776,7 +2752,7 @@ static int sky2_phys_id(struct net_device *dev, u32 data) | |||
2776 | gm_phy_write(hw, port, PHY_MARV_LED_CTRL, ledctrl); | 2752 | gm_phy_write(hw, port, PHY_MARV_LED_CTRL, ledctrl); |
2777 | gm_phy_write(hw, port, PHY_MARV_LED_OVER, ledover); | 2753 | gm_phy_write(hw, port, PHY_MARV_LED_OVER, ledover); |
2778 | } | 2754 | } |
2779 | up(&sky2->phy_sema); | 2755 | spin_unlock_bh(&sky2->phy_lock); |
2780 | 2756 | ||
2781 | return 0; | 2757 | return 0; |
2782 | } | 2758 | } |
@@ -2806,38 +2782,6 @@ static int sky2_set_pauseparam(struct net_device *dev, | |||
2806 | return err; | 2782 | return err; |
2807 | } | 2783 | } |
2808 | 2784 | ||
2809 | #ifdef CONFIG_PM | ||
2810 | static void sky2_get_wol(struct net_device *dev, struct ethtool_wolinfo *wol) | ||
2811 | { | ||
2812 | struct sky2_port *sky2 = netdev_priv(dev); | ||
2813 | |||
2814 | wol->supported = WAKE_MAGIC; | ||
2815 | wol->wolopts = sky2->wol ? WAKE_MAGIC : 0; | ||
2816 | } | ||
2817 | |||
2818 | static int sky2_set_wol(struct net_device *dev, struct ethtool_wolinfo *wol) | ||
2819 | { | ||
2820 | struct sky2_port *sky2 = netdev_priv(dev); | ||
2821 | struct sky2_hw *hw = sky2->hw; | ||
2822 | |||
2823 | if (wol->wolopts != WAKE_MAGIC && wol->wolopts != 0) | ||
2824 | return -EOPNOTSUPP; | ||
2825 | |||
2826 | sky2->wol = wol->wolopts == WAKE_MAGIC; | ||
2827 | |||
2828 | if (sky2->wol) { | ||
2829 | memcpy_toio(hw->regs + WOL_MAC_ADDR, dev->dev_addr, ETH_ALEN); | ||
2830 | |||
2831 | sky2_write16(hw, WOL_CTRL_STAT, | ||
2832 | WOL_CTL_ENA_PME_ON_MAGIC_PKT | | ||
2833 | WOL_CTL_ENA_MAGIC_PKT_UNIT); | ||
2834 | } else | ||
2835 | sky2_write16(hw, WOL_CTRL_STAT, WOL_CTL_DEFAULT); | ||
2836 | |||
2837 | return 0; | ||
2838 | } | ||
2839 | #endif | ||
2840 | |||
2841 | static int sky2_get_coalesce(struct net_device *dev, | 2785 | static int sky2_get_coalesce(struct net_device *dev, |
2842 | struct ethtool_coalesce *ecmd) | 2786 | struct ethtool_coalesce *ecmd) |
2843 | { | 2787 | { |
@@ -2878,19 +2822,11 @@ static int sky2_set_coalesce(struct net_device *dev, | |||
2878 | { | 2822 | { |
2879 | struct sky2_port *sky2 = netdev_priv(dev); | 2823 | struct sky2_port *sky2 = netdev_priv(dev); |
2880 | struct sky2_hw *hw = sky2->hw; | 2824 | struct sky2_hw *hw = sky2->hw; |
2881 | const u32 tmin = sky2_clk2us(hw, 1); | 2825 | const u32 tmax = sky2_clk2us(hw, 0x0ffffff); |
2882 | const u32 tmax = 5000; | ||
2883 | |||
2884 | if (ecmd->tx_coalesce_usecs != 0 && | ||
2885 | (ecmd->tx_coalesce_usecs < tmin || ecmd->tx_coalesce_usecs > tmax)) | ||
2886 | return -EINVAL; | ||
2887 | |||
2888 | if (ecmd->rx_coalesce_usecs != 0 && | ||
2889 | (ecmd->rx_coalesce_usecs < tmin || ecmd->rx_coalesce_usecs > tmax)) | ||
2890 | return -EINVAL; | ||
2891 | 2826 | ||
2892 | if (ecmd->rx_coalesce_usecs_irq != 0 && | 2827 | if (ecmd->tx_coalesce_usecs > tmax || |
2893 | (ecmd->rx_coalesce_usecs_irq < tmin || ecmd->rx_coalesce_usecs_irq > tmax)) | 2828 | ecmd->rx_coalesce_usecs > tmax || |
2829 | ecmd->rx_coalesce_usecs_irq > tmax) | ||
2894 | return -EINVAL; | 2830 | return -EINVAL; |
2895 | 2831 | ||
2896 | if (ecmd->tx_max_coalesced_frames >= TX_RING_SIZE-1) | 2832 | if (ecmd->tx_max_coalesced_frames >= TX_RING_SIZE-1) |
@@ -3025,10 +2961,6 @@ static struct ethtool_ops sky2_ethtool_ops = { | |||
3025 | .set_ringparam = sky2_set_ringparam, | 2961 | .set_ringparam = sky2_set_ringparam, |
3026 | .get_pauseparam = sky2_get_pauseparam, | 2962 | .get_pauseparam = sky2_get_pauseparam, |
3027 | .set_pauseparam = sky2_set_pauseparam, | 2963 | .set_pauseparam = sky2_set_pauseparam, |
3028 | #ifdef CONFIG_PM | ||
3029 | .get_wol = sky2_get_wol, | ||
3030 | .set_wol = sky2_set_wol, | ||
3031 | #endif | ||
3032 | .phys_id = sky2_phys_id, | 2964 | .phys_id = sky2_phys_id, |
3033 | .get_stats_count = sky2_get_stats_count, | 2965 | .get_stats_count = sky2_get_stats_count, |
3034 | .get_ethtool_stats = sky2_get_ethtool_stats, | 2966 | .get_ethtool_stats = sky2_get_ethtool_stats, |
@@ -3082,16 +3014,15 @@ static __devinit struct net_device *sky2_init_netdev(struct sky2_hw *hw, | |||
3082 | sky2->speed = -1; | 3014 | sky2->speed = -1; |
3083 | sky2->advertising = sky2_supported_modes(hw); | 3015 | sky2->advertising = sky2_supported_modes(hw); |
3084 | 3016 | ||
3085 | /* Receive checksum disabled for Yukon XL | 3017 | /* Receive checksum disabled for Yukon XL |
3086 | * because of observed problems with incorrect | 3018 | * because of observed problems with incorrect |
3087 | * values when multiple packets are received in one interrupt | 3019 | * values when multiple packets are received in one interrupt |
3088 | */ | 3020 | */ |
3089 | sky2->rx_csum = (hw->chip_id != CHIP_ID_YUKON_XL); | 3021 | sky2->rx_csum = (hw->chip_id != CHIP_ID_YUKON_XL); |
3090 | 3022 | ||
3091 | INIT_WORK(&sky2->phy_task, sky2_phy_task, sky2); | 3023 | spin_lock_init(&sky2->phy_lock); |
3092 | init_MUTEX(&sky2->phy_sema); | ||
3093 | sky2->tx_pending = TX_DEF_PENDING; | 3024 | sky2->tx_pending = TX_DEF_PENDING; |
3094 | sky2->rx_pending = is_ec_a1(hw) ? 8 : RX_DEF_PENDING; | 3025 | sky2->rx_pending = RX_DEF_PENDING; |
3095 | sky2->rx_bufsize = sky2_buf_size(ETH_DATA_LEN); | 3026 | sky2->rx_bufsize = sky2_buf_size(ETH_DATA_LEN); |
3096 | 3027 | ||
3097 | hw->dev[port] = dev; | 3028 | hw->dev[port] = dev; |
@@ -3133,6 +3064,66 @@ static void __devinit sky2_show_addr(struct net_device *dev) | |||
3133 | dev->dev_addr[3], dev->dev_addr[4], dev->dev_addr[5]); | 3064 | dev->dev_addr[3], dev->dev_addr[4], dev->dev_addr[5]); |
3134 | } | 3065 | } |
3135 | 3066 | ||
3067 | /* Handle software interrupt used during MSI test */ | ||
3068 | static irqreturn_t __devinit sky2_test_intr(int irq, void *dev_id, | ||
3069 | struct pt_regs *regs) | ||
3070 | { | ||
3071 | struct sky2_hw *hw = dev_id; | ||
3072 | u32 status = sky2_read32(hw, B0_Y2_SP_ISRC2); | ||
3073 | |||
3074 | if (status == 0) | ||
3075 | return IRQ_NONE; | ||
3076 | |||
3077 | if (status & Y2_IS_IRQ_SW) { | ||
3078 | hw->msi_detected = 1; | ||
3079 | wake_up(&hw->msi_wait); | ||
3080 | sky2_write8(hw, B0_CTST, CS_CL_SW_IRQ); | ||
3081 | } | ||
3082 | sky2_write32(hw, B0_Y2_SP_ICR, 2); | ||
3083 | |||
3084 | return IRQ_HANDLED; | ||
3085 | } | ||
3086 | |||
3087 | /* Test interrupt path by forcing a a software IRQ */ | ||
3088 | static int __devinit sky2_test_msi(struct sky2_hw *hw) | ||
3089 | { | ||
3090 | struct pci_dev *pdev = hw->pdev; | ||
3091 | int err; | ||
3092 | |||
3093 | sky2_write32(hw, B0_IMSK, Y2_IS_IRQ_SW); | ||
3094 | |||
3095 | err = request_irq(pdev->irq, sky2_test_intr, SA_SHIRQ, DRV_NAME, hw); | ||
3096 | if (err) { | ||
3097 | printk(KERN_ERR PFX "%s: cannot assign irq %d\n", | ||
3098 | pci_name(pdev), pdev->irq); | ||
3099 | return err; | ||
3100 | } | ||
3101 | |||
3102 | init_waitqueue_head (&hw->msi_wait); | ||
3103 | |||
3104 | sky2_write8(hw, B0_CTST, CS_ST_SW_IRQ); | ||
3105 | wmb(); | ||
3106 | |||
3107 | wait_event_timeout(hw->msi_wait, hw->msi_detected, HZ/10); | ||
3108 | |||
3109 | if (!hw->msi_detected) { | ||
3110 | /* MSI test failed, go back to INTx mode */ | ||
3111 | printk(KERN_WARNING PFX "%s: No interrupt was generated using MSI, " | ||
3112 | "switching to INTx mode. Please report this failure to " | ||
3113 | "the PCI maintainer and include system chipset information.\n", | ||
3114 | pci_name(pdev)); | ||
3115 | |||
3116 | err = -EOPNOTSUPP; | ||
3117 | sky2_write8(hw, B0_CTST, CS_CL_SW_IRQ); | ||
3118 | } | ||
3119 | |||
3120 | sky2_write32(hw, B0_IMSK, 0); | ||
3121 | |||
3122 | free_irq(pdev->irq, hw); | ||
3123 | |||
3124 | return err; | ||
3125 | } | ||
3126 | |||
3136 | static int __devinit sky2_probe(struct pci_dev *pdev, | 3127 | static int __devinit sky2_probe(struct pci_dev *pdev, |
3137 | const struct pci_device_id *ent) | 3128 | const struct pci_device_id *ent) |
3138 | { | 3129 | { |
@@ -3201,7 +3192,6 @@ static int __devinit sky2_probe(struct pci_dev *pdev, | |||
3201 | goto err_out_free_hw; | 3192 | goto err_out_free_hw; |
3202 | } | 3193 | } |
3203 | hw->pm_cap = pm_cap; | 3194 | hw->pm_cap = pm_cap; |
3204 | spin_lock_init(&hw->hw_lock); | ||
3205 | 3195 | ||
3206 | #ifdef __BIG_ENDIAN | 3196 | #ifdef __BIG_ENDIAN |
3207 | /* byte swap descriptors in hardware */ | 3197 | /* byte swap descriptors in hardware */ |
@@ -3254,21 +3244,29 @@ static int __devinit sky2_probe(struct pci_dev *pdev, | |||
3254 | } | 3244 | } |
3255 | } | 3245 | } |
3256 | 3246 | ||
3257 | err = request_irq(pdev->irq, sky2_intr, SA_SHIRQ, DRV_NAME, hw); | 3247 | if (!disable_msi && pci_enable_msi(pdev) == 0) { |
3248 | err = sky2_test_msi(hw); | ||
3249 | if (err == -EOPNOTSUPP) | ||
3250 | pci_disable_msi(pdev); | ||
3251 | else if (err) | ||
3252 | goto err_out_unregister; | ||
3253 | } | ||
3254 | |||
3255 | err = request_irq(pdev->irq, sky2_intr, SA_SHIRQ, DRV_NAME, hw); | ||
3258 | if (err) { | 3256 | if (err) { |
3259 | printk(KERN_ERR PFX "%s: cannot assign irq %d\n", | 3257 | printk(KERN_ERR PFX "%s: cannot assign irq %d\n", |
3260 | pci_name(pdev), pdev->irq); | 3258 | pci_name(pdev), pdev->irq); |
3261 | goto err_out_unregister; | 3259 | goto err_out_unregister; |
3262 | } | 3260 | } |
3263 | 3261 | ||
3264 | hw->intr_mask = Y2_IS_BASE; | 3262 | sky2_write32(hw, B0_IMSK, Y2_IS_BASE); |
3265 | sky2_write32(hw, B0_IMSK, hw->intr_mask); | ||
3266 | 3263 | ||
3267 | pci_set_drvdata(pdev, hw); | 3264 | pci_set_drvdata(pdev, hw); |
3268 | 3265 | ||
3269 | return 0; | 3266 | return 0; |
3270 | 3267 | ||
3271 | err_out_unregister: | 3268 | err_out_unregister: |
3269 | pci_disable_msi(pdev); | ||
3272 | if (dev1) { | 3270 | if (dev1) { |
3273 | unregister_netdev(dev1); | 3271 | unregister_netdev(dev1); |
3274 | free_netdev(dev1); | 3272 | free_netdev(dev1); |
@@ -3311,6 +3309,7 @@ static void __devexit sky2_remove(struct pci_dev *pdev) | |||
3311 | sky2_read8(hw, B0_CTST); | 3309 | sky2_read8(hw, B0_CTST); |
3312 | 3310 | ||
3313 | free_irq(pdev->irq, hw); | 3311 | free_irq(pdev->irq, hw); |
3312 | pci_disable_msi(pdev); | ||
3314 | pci_free_consistent(pdev, STATUS_LE_BYTES, hw->st_le, hw->st_dma); | 3313 | pci_free_consistent(pdev, STATUS_LE_BYTES, hw->st_le, hw->st_dma); |
3315 | pci_release_regions(pdev); | 3314 | pci_release_regions(pdev); |
3316 | pci_disable_device(pdev); | 3315 | pci_disable_device(pdev); |
diff --git a/drivers/net/sky2.h b/drivers/net/sky2.h index dce955c76f3c..d63cd5a1b71c 100644 --- a/drivers/net/sky2.h +++ b/drivers/net/sky2.h | |||
@@ -278,13 +278,11 @@ enum { | |||
278 | Y2_IS_CHK_TXS1 = 1<<1, /* Descriptor error TXS 1 */ | 278 | Y2_IS_CHK_TXS1 = 1<<1, /* Descriptor error TXS 1 */ |
279 | Y2_IS_CHK_TXA1 = 1<<0, /* Descriptor error TXA 1 */ | 279 | Y2_IS_CHK_TXA1 = 1<<0, /* Descriptor error TXA 1 */ |
280 | 280 | ||
281 | Y2_IS_BASE = Y2_IS_HW_ERR | Y2_IS_STAT_BMU | | 281 | Y2_IS_BASE = Y2_IS_HW_ERR | Y2_IS_STAT_BMU, |
282 | Y2_IS_POLL_CHK | Y2_IS_TWSI_RDY | | 282 | Y2_IS_PORT_1 = Y2_IS_IRQ_PHY1 | Y2_IS_IRQ_MAC1 |
283 | Y2_IS_IRQ_SW | Y2_IS_TIMINT, | 283 | | Y2_IS_CHK_TXA1 | Y2_IS_CHK_RX1, |
284 | Y2_IS_PORT_1 = Y2_IS_IRQ_PHY1 | Y2_IS_IRQ_MAC1 | | 284 | Y2_IS_PORT_2 = Y2_IS_IRQ_PHY2 | Y2_IS_IRQ_MAC2 |
285 | Y2_IS_CHK_RX1 | Y2_IS_CHK_TXA1 | Y2_IS_CHK_TXS1, | 285 | | Y2_IS_CHK_TXA2 | Y2_IS_CHK_RX2, |
286 | Y2_IS_PORT_2 = Y2_IS_IRQ_PHY2 | Y2_IS_IRQ_MAC2 | | ||
287 | Y2_IS_CHK_RX2 | Y2_IS_CHK_TXA2 | Y2_IS_CHK_TXS2, | ||
288 | }; | 286 | }; |
289 | 287 | ||
290 | /* B2_IRQM_HWE_MSK 32 bit IRQ Moderation HW Error Mask */ | 288 | /* B2_IRQM_HWE_MSK 32 bit IRQ Moderation HW Error Mask */ |
@@ -1832,6 +1830,7 @@ struct sky2_port { | |||
1832 | struct net_device *netdev; | 1830 | struct net_device *netdev; |
1833 | unsigned port; | 1831 | unsigned port; |
1834 | u32 msg_enable; | 1832 | u32 msg_enable; |
1833 | spinlock_t phy_lock; | ||
1835 | 1834 | ||
1836 | spinlock_t tx_lock ____cacheline_aligned_in_smp; | 1835 | spinlock_t tx_lock ____cacheline_aligned_in_smp; |
1837 | struct tx_ring_info *tx_ring; | 1836 | struct tx_ring_info *tx_ring; |
@@ -1840,7 +1839,6 @@ struct sky2_port { | |||
1840 | u16 tx_prod; /* next le to use */ | 1839 | u16 tx_prod; /* next le to use */ |
1841 | u32 tx_addr64; | 1840 | u32 tx_addr64; |
1842 | u16 tx_pending; | 1841 | u16 tx_pending; |
1843 | u16 tx_last_put; | ||
1844 | u16 tx_last_mss; | 1842 | u16 tx_last_mss; |
1845 | 1843 | ||
1846 | struct ring_info *rx_ring ____cacheline_aligned_in_smp; | 1844 | struct ring_info *rx_ring ____cacheline_aligned_in_smp; |
@@ -1849,7 +1847,6 @@ struct sky2_port { | |||
1849 | u16 rx_next; /* next re to check */ | 1847 | u16 rx_next; /* next re to check */ |
1850 | u16 rx_put; /* next le index to use */ | 1848 | u16 rx_put; /* next le index to use */ |
1851 | u16 rx_pending; | 1849 | u16 rx_pending; |
1852 | u16 rx_last_put; | ||
1853 | u16 rx_bufsize; | 1850 | u16 rx_bufsize; |
1854 | #ifdef SKY2_VLAN_TAG_USED | 1851 | #ifdef SKY2_VLAN_TAG_USED |
1855 | u16 rx_tag; | 1852 | u16 rx_tag; |
@@ -1865,20 +1862,15 @@ struct sky2_port { | |||
1865 | u8 rx_pause; | 1862 | u8 rx_pause; |
1866 | u8 tx_pause; | 1863 | u8 tx_pause; |
1867 | u8 rx_csum; | 1864 | u8 rx_csum; |
1868 | u8 wol; | ||
1869 | 1865 | ||
1870 | struct net_device_stats net_stats; | 1866 | struct net_device_stats net_stats; |
1871 | 1867 | ||
1872 | struct work_struct phy_task; | ||
1873 | struct semaphore phy_sema; | ||
1874 | }; | 1868 | }; |
1875 | 1869 | ||
1876 | struct sky2_hw { | 1870 | struct sky2_hw { |
1877 | void __iomem *regs; | 1871 | void __iomem *regs; |
1878 | struct pci_dev *pdev; | 1872 | struct pci_dev *pdev; |
1879 | struct net_device *dev[2]; | 1873 | struct net_device *dev[2]; |
1880 | spinlock_t hw_lock; | ||
1881 | u32 intr_mask; | ||
1882 | 1874 | ||
1883 | int pm_cap; | 1875 | int pm_cap; |
1884 | u8 chip_id; | 1876 | u8 chip_id; |
@@ -1889,6 +1881,8 @@ struct sky2_hw { | |||
1889 | struct sky2_status_le *st_le; | 1881 | struct sky2_status_le *st_le; |
1890 | u32 st_idx; | 1882 | u32 st_idx; |
1891 | dma_addr_t st_dma; | 1883 | dma_addr_t st_dma; |
1884 | int msi_detected; | ||
1885 | wait_queue_head_t msi_wait; | ||
1892 | }; | 1886 | }; |
1893 | 1887 | ||
1894 | /* Register accessor for memory mapped device */ | 1888 | /* Register accessor for memory mapped device */ |
diff --git a/drivers/net/smc91x.c b/drivers/net/smc91x.c index 75e9b3b910cc..0e9833adf9fe 100644 --- a/drivers/net/smc91x.c +++ b/drivers/net/smc91x.c | |||
@@ -215,15 +215,12 @@ struct smc_local { | |||
215 | 215 | ||
216 | spinlock_t lock; | 216 | spinlock_t lock; |
217 | 217 | ||
218 | #ifdef SMC_CAN_USE_DATACS | ||
219 | u32 __iomem *datacs; | ||
220 | #endif | ||
221 | |||
222 | #ifdef SMC_USE_PXA_DMA | 218 | #ifdef SMC_USE_PXA_DMA |
223 | /* DMA needs the physical address of the chip */ | 219 | /* DMA needs the physical address of the chip */ |
224 | u_long physaddr; | 220 | u_long physaddr; |
225 | #endif | 221 | #endif |
226 | void __iomem *base; | 222 | void __iomem *base; |
223 | void __iomem *datacs; | ||
227 | }; | 224 | }; |
228 | 225 | ||
229 | #if SMC_DEBUG > 0 | 226 | #if SMC_DEBUG > 0 |
@@ -2104,9 +2101,8 @@ static int smc_enable_device(struct platform_device *pdev) | |||
2104 | * Set the appropriate byte/word mode. | 2101 | * Set the appropriate byte/word mode. |
2105 | */ | 2102 | */ |
2106 | ecsr = readb(addr + (ECSR << SMC_IO_SHIFT)) & ~ECSR_IOIS8; | 2103 | ecsr = readb(addr + (ECSR << SMC_IO_SHIFT)) & ~ECSR_IOIS8; |
2107 | #ifndef SMC_CAN_USE_16BIT | 2104 | if (!SMC_CAN_USE_16BIT) |
2108 | ecsr |= ECSR_IOIS8; | 2105 | ecsr |= ECSR_IOIS8; |
2109 | #endif | ||
2110 | writeb(ecsr, addr + (ECSR << SMC_IO_SHIFT)); | 2106 | writeb(ecsr, addr + (ECSR << SMC_IO_SHIFT)); |
2111 | local_irq_restore(flags); | 2107 | local_irq_restore(flags); |
2112 | 2108 | ||
@@ -2143,40 +2139,39 @@ static void smc_release_attrib(struct platform_device *pdev) | |||
2143 | release_mem_region(res->start, ATTRIB_SIZE); | 2139 | release_mem_region(res->start, ATTRIB_SIZE); |
2144 | } | 2140 | } |
2145 | 2141 | ||
2146 | #ifdef SMC_CAN_USE_DATACS | 2142 | static inline void smc_request_datacs(struct platform_device *pdev, struct net_device *ndev) |
2147 | static void smc_request_datacs(struct platform_device *pdev, struct net_device *ndev) | ||
2148 | { | 2143 | { |
2149 | struct resource * res = platform_get_resource_byname(pdev, IORESOURCE_MEM, "smc91x-data32"); | 2144 | if (SMC_CAN_USE_DATACS) { |
2150 | struct smc_local *lp = netdev_priv(ndev); | 2145 | struct resource * res = platform_get_resource_byname(pdev, IORESOURCE_MEM, "smc91x-data32"); |
2146 | struct smc_local *lp = netdev_priv(ndev); | ||
2151 | 2147 | ||
2152 | if (!res) | 2148 | if (!res) |
2153 | return; | 2149 | return; |
2154 | 2150 | ||
2155 | if(!request_mem_region(res->start, SMC_DATA_EXTENT, CARDNAME)) { | 2151 | if(!request_mem_region(res->start, SMC_DATA_EXTENT, CARDNAME)) { |
2156 | printk(KERN_INFO "%s: failed to request datacs memory region.\n", CARDNAME); | 2152 | printk(KERN_INFO "%s: failed to request datacs memory region.\n", CARDNAME); |
2157 | return; | 2153 | return; |
2158 | } | 2154 | } |
2159 | 2155 | ||
2160 | lp->datacs = ioremap(res->start, SMC_DATA_EXTENT); | 2156 | lp->datacs = ioremap(res->start, SMC_DATA_EXTENT); |
2157 | } | ||
2161 | } | 2158 | } |
2162 | 2159 | ||
2163 | static void smc_release_datacs(struct platform_device *pdev, struct net_device *ndev) | 2160 | static void smc_release_datacs(struct platform_device *pdev, struct net_device *ndev) |
2164 | { | 2161 | { |
2165 | struct smc_local *lp = netdev_priv(ndev); | 2162 | if (SMC_CAN_USE_DATACS) { |
2166 | struct resource * res = platform_get_resource_byname(pdev, IORESOURCE_MEM, "smc91x-data32"); | 2163 | struct smc_local *lp = netdev_priv(ndev); |
2164 | struct resource * res = platform_get_resource_byname(pdev, IORESOURCE_MEM, "smc91x-data32"); | ||
2167 | 2165 | ||
2168 | if (lp->datacs) | 2166 | if (lp->datacs) |
2169 | iounmap(lp->datacs); | 2167 | iounmap(lp->datacs); |
2170 | 2168 | ||
2171 | lp->datacs = NULL; | 2169 | lp->datacs = NULL; |
2172 | 2170 | ||
2173 | if (res) | 2171 | if (res) |
2174 | release_mem_region(res->start, SMC_DATA_EXTENT); | 2172 | release_mem_region(res->start, SMC_DATA_EXTENT); |
2173 | } | ||
2175 | } | 2174 | } |
2176 | #else | ||
2177 | static void smc_request_datacs(struct platform_device *pdev, struct net_device *ndev) {} | ||
2178 | static void smc_release_datacs(struct platform_device *pdev, struct net_device *ndev) {} | ||
2179 | #endif | ||
2180 | 2175 | ||
2181 | /* | 2176 | /* |
2182 | * smc_init(void) | 2177 | * smc_init(void) |
diff --git a/drivers/net/smc91x.h b/drivers/net/smc91x.h index e0efd1964e72..e1be1af51201 100644 --- a/drivers/net/smc91x.h +++ b/drivers/net/smc91x.h | |||
@@ -275,7 +275,10 @@ SMC_outw(u16 val, void __iomem *ioaddr, int reg) | |||
275 | #define SMC_insw(a,r,p,l) readsw ((void*) ((a) + (r)), p, l) | 275 | #define SMC_insw(a,r,p,l) readsw ((void*) ((a) + (r)), p, l) |
276 | #define SMC_outw(v,a,r) ({ writew ((v), (a) + (r)); LPD7A40X_IOBARRIER; }) | 276 | #define SMC_outw(v,a,r) ({ writew ((v), (a) + (r)); LPD7A40X_IOBARRIER; }) |
277 | 277 | ||
278 | static inline void SMC_outsw (unsigned long a, int r, unsigned char* p, int l) | 278 | #define SMC_outsw LPD7A40X_SMC_outsw |
279 | |||
280 | static inline void LPD7A40X_SMC_outsw(unsigned long a, int r, | ||
281 | unsigned char* p, int l) | ||
279 | { | 282 | { |
280 | unsigned short* ps = (unsigned short*) p; | 283 | unsigned short* ps = (unsigned short*) p; |
281 | while (l-- > 0) { | 284 | while (l-- > 0) { |
@@ -342,10 +345,6 @@ static inline void SMC_outsw (unsigned long a, int r, unsigned char* p, int l) | |||
342 | 345 | ||
343 | #endif | 346 | #endif |
344 | 347 | ||
345 | #ifndef SMC_IRQ_FLAGS | ||
346 | #define SMC_IRQ_FLAGS SA_TRIGGER_RISING | ||
347 | #endif | ||
348 | |||
349 | #ifdef SMC_USE_PXA_DMA | 348 | #ifdef SMC_USE_PXA_DMA |
350 | /* | 349 | /* |
351 | * Let's use the DMA engine on the XScale PXA2xx for RX packets. This is | 350 | * Let's use the DMA engine on the XScale PXA2xx for RX packets. This is |
@@ -441,10 +440,85 @@ smc_pxa_dma_irq(int dma, void *dummy, struct pt_regs *regs) | |||
441 | #endif /* SMC_USE_PXA_DMA */ | 440 | #endif /* SMC_USE_PXA_DMA */ |
442 | 441 | ||
443 | 442 | ||
444 | /* Because of bank switching, the LAN91x uses only 16 I/O ports */ | 443 | /* |
444 | * Everything a particular hardware setup needs should have been defined | ||
445 | * at this point. Add stubs for the undefined cases, mainly to avoid | ||
446 | * compilation warnings since they'll be optimized away, or to prevent buggy | ||
447 | * use of them. | ||
448 | */ | ||
449 | |||
450 | #if ! SMC_CAN_USE_32BIT | ||
451 | #define SMC_inl(ioaddr, reg) ({ BUG(); 0; }) | ||
452 | #define SMC_outl(x, ioaddr, reg) BUG() | ||
453 | #define SMC_insl(a, r, p, l) BUG() | ||
454 | #define SMC_outsl(a, r, p, l) BUG() | ||
455 | #endif | ||
456 | |||
457 | #if !defined(SMC_insl) || !defined(SMC_outsl) | ||
458 | #define SMC_insl(a, r, p, l) BUG() | ||
459 | #define SMC_outsl(a, r, p, l) BUG() | ||
460 | #endif | ||
461 | |||
462 | #if ! SMC_CAN_USE_16BIT | ||
463 | |||
464 | /* | ||
465 | * Any 16-bit access is performed with two 8-bit accesses if the hardware | ||
466 | * can't do it directly. Most registers are 16-bit so those are mandatory. | ||
467 | */ | ||
468 | #define SMC_outw(x, ioaddr, reg) \ | ||
469 | do { \ | ||
470 | unsigned int __val16 = (x); \ | ||
471 | SMC_outb( __val16, ioaddr, reg ); \ | ||
472 | SMC_outb( __val16 >> 8, ioaddr, reg + (1 << SMC_IO_SHIFT));\ | ||
473 | } while (0) | ||
474 | #define SMC_inw(ioaddr, reg) \ | ||
475 | ({ \ | ||
476 | unsigned int __val16; \ | ||
477 | __val16 = SMC_inb( ioaddr, reg ); \ | ||
478 | __val16 |= SMC_inb( ioaddr, reg + (1 << SMC_IO_SHIFT)) << 8; \ | ||
479 | __val16; \ | ||
480 | }) | ||
481 | |||
482 | #define SMC_insw(a, r, p, l) BUG() | ||
483 | #define SMC_outsw(a, r, p, l) BUG() | ||
484 | |||
485 | #endif | ||
486 | |||
487 | #if !defined(SMC_insw) || !defined(SMC_outsw) | ||
488 | #define SMC_insw(a, r, p, l) BUG() | ||
489 | #define SMC_outsw(a, r, p, l) BUG() | ||
490 | #endif | ||
491 | |||
492 | #if ! SMC_CAN_USE_8BIT | ||
493 | #define SMC_inb(ioaddr, reg) ({ BUG(); 0; }) | ||
494 | #define SMC_outb(x, ioaddr, reg) BUG() | ||
495 | #define SMC_insb(a, r, p, l) BUG() | ||
496 | #define SMC_outsb(a, r, p, l) BUG() | ||
497 | #endif | ||
498 | |||
499 | #if !defined(SMC_insb) || !defined(SMC_outsb) | ||
500 | #define SMC_insb(a, r, p, l) BUG() | ||
501 | #define SMC_outsb(a, r, p, l) BUG() | ||
502 | #endif | ||
503 | |||
504 | #ifndef SMC_CAN_USE_DATACS | ||
505 | #define SMC_CAN_USE_DATACS 0 | ||
506 | #endif | ||
507 | |||
445 | #ifndef SMC_IO_SHIFT | 508 | #ifndef SMC_IO_SHIFT |
446 | #define SMC_IO_SHIFT 0 | 509 | #define SMC_IO_SHIFT 0 |
447 | #endif | 510 | #endif |
511 | |||
512 | #ifndef SMC_IRQ_FLAGS | ||
513 | #define SMC_IRQ_FLAGS SA_TRIGGER_RISING | ||
514 | #endif | ||
515 | |||
516 | #ifndef SMC_INTERRUPT_PREAMBLE | ||
517 | #define SMC_INTERRUPT_PREAMBLE | ||
518 | #endif | ||
519 | |||
520 | |||
521 | /* Because of bank switching, the LAN91x uses only 16 I/O ports */ | ||
448 | #define SMC_IO_EXTENT (16 << SMC_IO_SHIFT) | 522 | #define SMC_IO_EXTENT (16 << SMC_IO_SHIFT) |
449 | #define SMC_DATA_EXTENT (4) | 523 | #define SMC_DATA_EXTENT (4) |
450 | 524 | ||
@@ -817,6 +891,11 @@ static const char * chip_ids[ 16 ] = { | |||
817 | * Note: the following macros do *not* select the bank -- this must | 891 | * Note: the following macros do *not* select the bank -- this must |
818 | * be done separately as needed in the main code. The SMC_REG() macro | 892 | * be done separately as needed in the main code. The SMC_REG() macro |
819 | * only uses the bank argument for debugging purposes (when enabled). | 893 | * only uses the bank argument for debugging purposes (when enabled). |
894 | * | ||
895 | * Note: despite inline functions being safer, everything leading to this | ||
896 | * should preferably be macros to let BUG() display the line number in | ||
897 | * the core source code since we're interested in the top call site | ||
898 | * not in any inline function location. | ||
820 | */ | 899 | */ |
821 | 900 | ||
822 | #if SMC_DEBUG > 0 | 901 | #if SMC_DEBUG > 0 |
@@ -834,62 +913,142 @@ static const char * chip_ids[ 16 ] = { | |||
834 | #define SMC_REG(reg, bank) (reg<<SMC_IO_SHIFT) | 913 | #define SMC_REG(reg, bank) (reg<<SMC_IO_SHIFT) |
835 | #endif | 914 | #endif |
836 | 915 | ||
837 | #if SMC_CAN_USE_8BIT | 916 | /* |
838 | #define SMC_GET_PN() SMC_inb( ioaddr, PN_REG ) | 917 | * Hack Alert: Some setups just can't write 8 or 16 bits reliably when not |
839 | #define SMC_SET_PN(x) SMC_outb( x, ioaddr, PN_REG ) | 918 | * aligned to a 32 bit boundary. I tell you that does exist! |
840 | #define SMC_GET_AR() SMC_inb( ioaddr, AR_REG ) | 919 | * Fortunately the affected register accesses can be easily worked around |
841 | #define SMC_GET_TXFIFO() SMC_inb( ioaddr, TXFIFO_REG ) | 920 | * since we can write zeroes to the preceeding 16 bits without adverse |
842 | #define SMC_GET_RXFIFO() SMC_inb( ioaddr, RXFIFO_REG ) | 921 | * effects and use a 32-bit access. |
843 | #define SMC_GET_INT() SMC_inb( ioaddr, INT_REG ) | 922 | * |
844 | #define SMC_ACK_INT(x) SMC_outb( x, ioaddr, INT_REG ) | 923 | * Enforce it on any 32-bit capable setup for now. |
845 | #define SMC_GET_INT_MASK() SMC_inb( ioaddr, IM_REG ) | 924 | */ |
846 | #define SMC_SET_INT_MASK(x) SMC_outb( x, ioaddr, IM_REG ) | 925 | #define SMC_MUST_ALIGN_WRITE SMC_CAN_USE_32BIT |
847 | #else | 926 | |
848 | #define SMC_GET_PN() (SMC_inw( ioaddr, PN_REG ) & 0xFF) | 927 | #define SMC_GET_PN() \ |
849 | #define SMC_SET_PN(x) SMC_outw( x, ioaddr, PN_REG ) | 928 | ( SMC_CAN_USE_8BIT ? (SMC_inb(ioaddr, PN_REG)) \ |
850 | #define SMC_GET_AR() (SMC_inw( ioaddr, PN_REG ) >> 8) | 929 | : (SMC_inw(ioaddr, PN_REG) & 0xFF) ) |
851 | #define SMC_GET_TXFIFO() (SMC_inw( ioaddr, TXFIFO_REG ) & 0xFF) | 930 | |
852 | #define SMC_GET_RXFIFO() (SMC_inw( ioaddr, TXFIFO_REG ) >> 8) | 931 | #define SMC_SET_PN(x) \ |
853 | #define SMC_GET_INT() (SMC_inw( ioaddr, INT_REG ) & 0xFF) | 932 | do { \ |
933 | if (SMC_MUST_ALIGN_WRITE) \ | ||
934 | SMC_outl((x)<<16, ioaddr, SMC_REG(0, 2)); \ | ||
935 | else if (SMC_CAN_USE_8BIT) \ | ||
936 | SMC_outb(x, ioaddr, PN_REG); \ | ||
937 | else \ | ||
938 | SMC_outw(x, ioaddr, PN_REG); \ | ||
939 | } while (0) | ||
940 | |||
941 | #define SMC_GET_AR() \ | ||
942 | ( SMC_CAN_USE_8BIT ? (SMC_inb(ioaddr, AR_REG)) \ | ||
943 | : (SMC_inw(ioaddr, PN_REG) >> 8) ) | ||
944 | |||
945 | #define SMC_GET_TXFIFO() \ | ||
946 | ( SMC_CAN_USE_8BIT ? (SMC_inb(ioaddr, TXFIFO_REG)) \ | ||
947 | : (SMC_inw(ioaddr, TXFIFO_REG) & 0xFF) ) | ||
948 | |||
949 | #define SMC_GET_RXFIFO() \ | ||
950 | ( SMC_CAN_USE_8BIT ? (SMC_inb(ioaddr, RXFIFO_REG)) \ | ||
951 | : (SMC_inw(ioaddr, TXFIFO_REG) >> 8) ) | ||
952 | |||
953 | #define SMC_GET_INT() \ | ||
954 | ( SMC_CAN_USE_8BIT ? (SMC_inb(ioaddr, INT_REG)) \ | ||
955 | : (SMC_inw(ioaddr, INT_REG) & 0xFF) ) | ||
956 | |||
854 | #define SMC_ACK_INT(x) \ | 957 | #define SMC_ACK_INT(x) \ |
855 | do { \ | 958 | do { \ |
856 | unsigned long __flags; \ | 959 | if (SMC_CAN_USE_8BIT) \ |
857 | int __mask; \ | 960 | SMC_outb(x, ioaddr, INT_REG); \ |
858 | local_irq_save(__flags); \ | 961 | else { \ |
859 | __mask = SMC_inw( ioaddr, INT_REG ) & ~0xff; \ | 962 | unsigned long __flags; \ |
860 | SMC_outw( __mask | (x), ioaddr, INT_REG ); \ | 963 | int __mask; \ |
861 | local_irq_restore(__flags); \ | 964 | local_irq_save(__flags); \ |
965 | __mask = SMC_inw( ioaddr, INT_REG ) & ~0xff; \ | ||
966 | SMC_outw( __mask | (x), ioaddr, INT_REG ); \ | ||
967 | local_irq_restore(__flags); \ | ||
968 | } \ | ||
969 | } while (0) | ||
970 | |||
971 | #define SMC_GET_INT_MASK() \ | ||
972 | ( SMC_CAN_USE_8BIT ? (SMC_inb(ioaddr, IM_REG)) \ | ||
973 | : (SMC_inw( ioaddr, INT_REG ) >> 8) ) | ||
974 | |||
975 | #define SMC_SET_INT_MASK(x) \ | ||
976 | do { \ | ||
977 | if (SMC_CAN_USE_8BIT) \ | ||
978 | SMC_outb(x, ioaddr, IM_REG); \ | ||
979 | else \ | ||
980 | SMC_outw((x) << 8, ioaddr, INT_REG); \ | ||
981 | } while (0) | ||
982 | |||
983 | #define SMC_CURRENT_BANK() SMC_inw(ioaddr, BANK_SELECT) | ||
984 | |||
985 | #define SMC_SELECT_BANK(x) \ | ||
986 | do { \ | ||
987 | if (SMC_MUST_ALIGN_WRITE) \ | ||
988 | SMC_outl((x)<<16, ioaddr, 12<<SMC_IO_SHIFT); \ | ||
989 | else \ | ||
990 | SMC_outw(x, ioaddr, BANK_SELECT); \ | ||
991 | } while (0) | ||
992 | |||
993 | #define SMC_GET_BASE() SMC_inw(ioaddr, BASE_REG) | ||
994 | |||
995 | #define SMC_SET_BASE(x) SMC_outw(x, ioaddr, BASE_REG) | ||
996 | |||
997 | #define SMC_GET_CONFIG() SMC_inw(ioaddr, CONFIG_REG) | ||
998 | |||
999 | #define SMC_SET_CONFIG(x) SMC_outw(x, ioaddr, CONFIG_REG) | ||
1000 | |||
1001 | #define SMC_GET_COUNTER() SMC_inw(ioaddr, COUNTER_REG) | ||
1002 | |||
1003 | #define SMC_GET_CTL() SMC_inw(ioaddr, CTL_REG) | ||
1004 | |||
1005 | #define SMC_SET_CTL(x) SMC_outw(x, ioaddr, CTL_REG) | ||
1006 | |||
1007 | #define SMC_GET_MII() SMC_inw(ioaddr, MII_REG) | ||
1008 | |||
1009 | #define SMC_SET_MII(x) SMC_outw(x, ioaddr, MII_REG) | ||
1010 | |||
1011 | #define SMC_GET_MIR() SMC_inw(ioaddr, MIR_REG) | ||
1012 | |||
1013 | #define SMC_SET_MIR(x) SMC_outw(x, ioaddr, MIR_REG) | ||
1014 | |||
1015 | #define SMC_GET_MMU_CMD() SMC_inw(ioaddr, MMU_CMD_REG) | ||
1016 | |||
1017 | #define SMC_SET_MMU_CMD(x) SMC_outw(x, ioaddr, MMU_CMD_REG) | ||
1018 | |||
1019 | #define SMC_GET_FIFO() SMC_inw(ioaddr, FIFO_REG) | ||
1020 | |||
1021 | #define SMC_GET_PTR() SMC_inw(ioaddr, PTR_REG) | ||
1022 | |||
1023 | #define SMC_SET_PTR(x) \ | ||
1024 | do { \ | ||
1025 | if (SMC_MUST_ALIGN_WRITE) \ | ||
1026 | SMC_outl((x)<<16, ioaddr, SMC_REG(4, 2)); \ | ||
1027 | else \ | ||
1028 | SMC_outw(x, ioaddr, PTR_REG); \ | ||
862 | } while (0) | 1029 | } while (0) |
863 | #define SMC_GET_INT_MASK() (SMC_inw( ioaddr, INT_REG ) >> 8) | ||
864 | #define SMC_SET_INT_MASK(x) SMC_outw( (x) << 8, ioaddr, INT_REG ) | ||
865 | #endif | ||
866 | 1030 | ||
867 | #define SMC_CURRENT_BANK() SMC_inw( ioaddr, BANK_SELECT ) | 1031 | #define SMC_GET_EPH_STATUS() SMC_inw(ioaddr, EPH_STATUS_REG) |
868 | #define SMC_SELECT_BANK(x) SMC_outw( x, ioaddr, BANK_SELECT ) | 1032 | |
869 | #define SMC_GET_BASE() SMC_inw( ioaddr, BASE_REG ) | 1033 | #define SMC_GET_RCR() SMC_inw(ioaddr, RCR_REG) |
870 | #define SMC_SET_BASE(x) SMC_outw( x, ioaddr, BASE_REG ) | 1034 | |
871 | #define SMC_GET_CONFIG() SMC_inw( ioaddr, CONFIG_REG ) | 1035 | #define SMC_SET_RCR(x) SMC_outw(x, ioaddr, RCR_REG) |
872 | #define SMC_SET_CONFIG(x) SMC_outw( x, ioaddr, CONFIG_REG ) | 1036 | |
873 | #define SMC_GET_COUNTER() SMC_inw( ioaddr, COUNTER_REG ) | 1037 | #define SMC_GET_REV() SMC_inw(ioaddr, REV_REG) |
874 | #define SMC_GET_CTL() SMC_inw( ioaddr, CTL_REG ) | 1038 | |
875 | #define SMC_SET_CTL(x) SMC_outw( x, ioaddr, CTL_REG ) | 1039 | #define SMC_GET_RPC() SMC_inw(ioaddr, RPC_REG) |
876 | #define SMC_GET_MII() SMC_inw( ioaddr, MII_REG ) | 1040 | |
877 | #define SMC_SET_MII(x) SMC_outw( x, ioaddr, MII_REG ) | 1041 | #define SMC_SET_RPC(x) \ |
878 | #define SMC_GET_MIR() SMC_inw( ioaddr, MIR_REG ) | 1042 | do { \ |
879 | #define SMC_SET_MIR(x) SMC_outw( x, ioaddr, MIR_REG ) | 1043 | if (SMC_MUST_ALIGN_WRITE) \ |
880 | #define SMC_GET_MMU_CMD() SMC_inw( ioaddr, MMU_CMD_REG ) | 1044 | SMC_outl((x)<<16, ioaddr, SMC_REG(8, 0)); \ |
881 | #define SMC_SET_MMU_CMD(x) SMC_outw( x, ioaddr, MMU_CMD_REG ) | 1045 | else \ |
882 | #define SMC_GET_FIFO() SMC_inw( ioaddr, FIFO_REG ) | 1046 | SMC_outw(x, ioaddr, RPC_REG); \ |
883 | #define SMC_GET_PTR() SMC_inw( ioaddr, PTR_REG ) | 1047 | } while (0) |
884 | #define SMC_SET_PTR(x) SMC_outw( x, ioaddr, PTR_REG ) | 1048 | |
885 | #define SMC_GET_EPH_STATUS() SMC_inw( ioaddr, EPH_STATUS_REG ) | 1049 | #define SMC_GET_TCR() SMC_inw(ioaddr, TCR_REG) |
886 | #define SMC_GET_RCR() SMC_inw( ioaddr, RCR_REG ) | 1050 | |
887 | #define SMC_SET_RCR(x) SMC_outw( x, ioaddr, RCR_REG ) | 1051 | #define SMC_SET_TCR(x) SMC_outw(x, ioaddr, TCR_REG) |
888 | #define SMC_GET_REV() SMC_inw( ioaddr, REV_REG ) | ||
889 | #define SMC_GET_RPC() SMC_inw( ioaddr, RPC_REG ) | ||
890 | #define SMC_SET_RPC(x) SMC_outw( x, ioaddr, RPC_REG ) | ||
891 | #define SMC_GET_TCR() SMC_inw( ioaddr, TCR_REG ) | ||
892 | #define SMC_SET_TCR(x) SMC_outw( x, ioaddr, TCR_REG ) | ||
893 | 1052 | ||
894 | #ifndef SMC_GET_MAC_ADDR | 1053 | #ifndef SMC_GET_MAC_ADDR |
895 | #define SMC_GET_MAC_ADDR(addr) \ | 1054 | #define SMC_GET_MAC_ADDR(addr) \ |
@@ -920,151 +1079,84 @@ static const char * chip_ids[ 16 ] = { | |||
920 | SMC_outw( mt[6] | (mt[7] << 8), ioaddr, MCAST_REG4 ); \ | 1079 | SMC_outw( mt[6] | (mt[7] << 8), ioaddr, MCAST_REG4 ); \ |
921 | } while (0) | 1080 | } while (0) |
922 | 1081 | ||
923 | #if SMC_CAN_USE_32BIT | ||
924 | /* | ||
925 | * Some setups just can't write 8 or 16 bits reliably when not aligned | ||
926 | * to a 32 bit boundary. I tell you that exists! | ||
927 | * We re-do the ones here that can be easily worked around if they can have | ||
928 | * their low parts written to 0 without adverse effects. | ||
929 | */ | ||
930 | #undef SMC_SELECT_BANK | ||
931 | #define SMC_SELECT_BANK(x) SMC_outl( (x)<<16, ioaddr, 12<<SMC_IO_SHIFT ) | ||
932 | #undef SMC_SET_RPC | ||
933 | #define SMC_SET_RPC(x) SMC_outl( (x)<<16, ioaddr, SMC_REG(8, 0) ) | ||
934 | #undef SMC_SET_PN | ||
935 | #define SMC_SET_PN(x) SMC_outl( (x)<<16, ioaddr, SMC_REG(0, 2) ) | ||
936 | #undef SMC_SET_PTR | ||
937 | #define SMC_SET_PTR(x) SMC_outl( (x)<<16, ioaddr, SMC_REG(4, 2) ) | ||
938 | #endif | ||
939 | |||
940 | #if SMC_CAN_USE_32BIT | ||
941 | #define SMC_PUT_PKT_HDR(status, length) \ | ||
942 | SMC_outl( (status) | (length) << 16, ioaddr, DATA_REG ) | ||
943 | #define SMC_GET_PKT_HDR(status, length) \ | ||
944 | do { \ | ||
945 | unsigned int __val = SMC_inl( ioaddr, DATA_REG ); \ | ||
946 | (status) = __val & 0xffff; \ | ||
947 | (length) = __val >> 16; \ | ||
948 | } while (0) | ||
949 | #else | ||
950 | #define SMC_PUT_PKT_HDR(status, length) \ | 1082 | #define SMC_PUT_PKT_HDR(status, length) \ |
951 | do { \ | 1083 | do { \ |
952 | SMC_outw( status, ioaddr, DATA_REG ); \ | 1084 | if (SMC_CAN_USE_32BIT) \ |
953 | SMC_outw( length, ioaddr, DATA_REG ); \ | 1085 | SMC_outl((status) | (length)<<16, ioaddr, DATA_REG); \ |
954 | } while (0) | 1086 | else { \ |
955 | #define SMC_GET_PKT_HDR(status, length) \ | 1087 | SMC_outw(status, ioaddr, DATA_REG); \ |
956 | do { \ | 1088 | SMC_outw(length, ioaddr, DATA_REG); \ |
957 | (status) = SMC_inw( ioaddr, DATA_REG ); \ | 1089 | } \ |
958 | (length) = SMC_inw( ioaddr, DATA_REG ); \ | ||
959 | } while (0) | 1090 | } while (0) |
960 | #endif | ||
961 | 1091 | ||
962 | #if SMC_CAN_USE_32BIT | 1092 | #define SMC_GET_PKT_HDR(status, length) \ |
963 | #define _SMC_PUSH_DATA(p, l) \ | ||
964 | do { \ | 1093 | do { \ |
965 | char *__ptr = (p); \ | 1094 | if (SMC_CAN_USE_32BIT) { \ |
966 | int __len = (l); \ | 1095 | unsigned int __val = SMC_inl(ioaddr, DATA_REG); \ |
967 | if (__len >= 2 && (unsigned long)__ptr & 2) { \ | 1096 | (status) = __val & 0xffff; \ |
968 | __len -= 2; \ | 1097 | (length) = __val >> 16; \ |
969 | SMC_outw( *(u16 *)__ptr, ioaddr, DATA_REG ); \ | 1098 | } else { \ |
970 | __ptr += 2; \ | 1099 | (status) = SMC_inw(ioaddr, DATA_REG); \ |
971 | } \ | 1100 | (length) = SMC_inw(ioaddr, DATA_REG); \ |
972 | SMC_outsl( ioaddr, DATA_REG, __ptr, __len >> 2); \ | ||
973 | if (__len & 2) { \ | ||
974 | __ptr += (__len & ~3); \ | ||
975 | SMC_outw( *((u16 *)__ptr), ioaddr, DATA_REG ); \ | ||
976 | } \ | 1101 | } \ |
977 | } while (0) | 1102 | } while (0) |
978 | #define _SMC_PULL_DATA(p, l) \ | ||
979 | do { \ | ||
980 | char *__ptr = (p); \ | ||
981 | int __len = (l); \ | ||
982 | if ((unsigned long)__ptr & 2) { \ | ||
983 | /* \ | ||
984 | * We want 32bit alignment here. \ | ||
985 | * Since some buses perform a full 32bit \ | ||
986 | * fetch even for 16bit data we can't use \ | ||
987 | * SMC_inw() here. Back both source (on chip \ | ||
988 | * and destination) pointers of 2 bytes. \ | ||
989 | */ \ | ||
990 | __ptr -= 2; \ | ||
991 | __len += 2; \ | ||
992 | SMC_SET_PTR( 2|PTR_READ|PTR_RCV|PTR_AUTOINC ); \ | ||
993 | } \ | ||
994 | __len += 2; \ | ||
995 | SMC_insl( ioaddr, DATA_REG, __ptr, __len >> 2); \ | ||
996 | } while (0) | ||
997 | #elif SMC_CAN_USE_16BIT | ||
998 | #define _SMC_PUSH_DATA(p, l) SMC_outsw( ioaddr, DATA_REG, p, (l) >> 1 ) | ||
999 | #define _SMC_PULL_DATA(p, l) SMC_insw ( ioaddr, DATA_REG, p, (l) >> 1 ) | ||
1000 | #elif SMC_CAN_USE_8BIT | ||
1001 | #define _SMC_PUSH_DATA(p, l) SMC_outsb( ioaddr, DATA_REG, p, l ) | ||
1002 | #define _SMC_PULL_DATA(p, l) SMC_insb ( ioaddr, DATA_REG, p, l ) | ||
1003 | #endif | ||
1004 | 1103 | ||
1005 | #if ! SMC_CAN_USE_16BIT | 1104 | #define SMC_PUSH_DATA(p, l) \ |
1006 | #define SMC_outw(x, ioaddr, reg) \ | ||
1007 | do { \ | 1105 | do { \ |
1008 | unsigned int __val16 = (x); \ | 1106 | if (SMC_CAN_USE_32BIT) { \ |
1009 | SMC_outb( __val16, ioaddr, reg ); \ | 1107 | void *__ptr = (p); \ |
1010 | SMC_outb( __val16 >> 8, ioaddr, reg + (1 << SMC_IO_SHIFT));\ | 1108 | int __len = (l); \ |
1109 | void *__ioaddr = ioaddr; \ | ||
1110 | if (__len >= 2 && (unsigned long)__ptr & 2) { \ | ||
1111 | __len -= 2; \ | ||
1112 | SMC_outw(*(u16 *)__ptr, ioaddr, DATA_REG); \ | ||
1113 | __ptr += 2; \ | ||
1114 | } \ | ||
1115 | if (SMC_CAN_USE_DATACS && lp->datacs) \ | ||
1116 | __ioaddr = lp->datacs; \ | ||
1117 | SMC_outsl(__ioaddr, DATA_REG, __ptr, __len>>2); \ | ||
1118 | if (__len & 2) { \ | ||
1119 | __ptr += (__len & ~3); \ | ||
1120 | SMC_outw(*((u16 *)__ptr), ioaddr, DATA_REG); \ | ||
1121 | } \ | ||
1122 | } else if (SMC_CAN_USE_16BIT) \ | ||
1123 | SMC_outsw(ioaddr, DATA_REG, p, (l) >> 1); \ | ||
1124 | else if (SMC_CAN_USE_8BIT) \ | ||
1125 | SMC_outsb(ioaddr, DATA_REG, p, l); \ | ||
1011 | } while (0) | 1126 | } while (0) |
1012 | #define SMC_inw(ioaddr, reg) \ | ||
1013 | ({ \ | ||
1014 | unsigned int __val16; \ | ||
1015 | __val16 = SMC_inb( ioaddr, reg ); \ | ||
1016 | __val16 |= SMC_inb( ioaddr, reg + (1 << SMC_IO_SHIFT)) << 8; \ | ||
1017 | __val16; \ | ||
1018 | }) | ||
1019 | #endif | ||
1020 | |||
1021 | #ifdef SMC_CAN_USE_DATACS | ||
1022 | #define SMC_PUSH_DATA(p, l) \ | ||
1023 | if ( lp->datacs ) { \ | ||
1024 | unsigned char *__ptr = (p); \ | ||
1025 | int __len = (l); \ | ||
1026 | if (__len >= 2 && (unsigned long)__ptr & 2) { \ | ||
1027 | __len -= 2; \ | ||
1028 | SMC_outw( *((u16 *)__ptr), ioaddr, DATA_REG ); \ | ||
1029 | __ptr += 2; \ | ||
1030 | } \ | ||
1031 | outsl(lp->datacs, __ptr, __len >> 2); \ | ||
1032 | if (__len & 2) { \ | ||
1033 | __ptr += (__len & ~3); \ | ||
1034 | SMC_outw( *((u16 *)__ptr), ioaddr, DATA_REG ); \ | ||
1035 | } \ | ||
1036 | } else { \ | ||
1037 | _SMC_PUSH_DATA(p, l); \ | ||
1038 | } | ||
1039 | 1127 | ||
1040 | #define SMC_PULL_DATA(p, l) \ | 1128 | #define SMC_PULL_DATA(p, l) \ |
1041 | if ( lp->datacs ) { \ | 1129 | do { \ |
1042 | unsigned char *__ptr = (p); \ | 1130 | if (SMC_CAN_USE_32BIT) { \ |
1043 | int __len = (l); \ | 1131 | void *__ptr = (p); \ |
1044 | if ((unsigned long)__ptr & 2) { \ | 1132 | int __len = (l); \ |
1045 | /* \ | 1133 | void *__ioaddr = ioaddr; \ |
1046 | * We want 32bit alignment here. \ | 1134 | if ((unsigned long)__ptr & 2) { \ |
1047 | * Since some buses perform a full 32bit \ | 1135 | /* \ |
1048 | * fetch even for 16bit data we can't use \ | 1136 | * We want 32bit alignment here. \ |
1049 | * SMC_inw() here. Back both source (on chip \ | 1137 | * Since some buses perform a full \ |
1050 | * and destination) pointers of 2 bytes. \ | 1138 | * 32bit fetch even for 16bit data \ |
1051 | */ \ | 1139 | * we can't use SMC_inw() here. \ |
1052 | __ptr -= 2; \ | 1140 | * Back both source (on-chip) and \ |
1141 | * destination pointers of 2 bytes. \ | ||
1142 | * This is possible since the call to \ | ||
1143 | * SMC_GET_PKT_HDR() already advanced \ | ||
1144 | * the source pointer of 4 bytes, and \ | ||
1145 | * the skb_reserve(skb, 2) advanced \ | ||
1146 | * the destination pointer of 2 bytes. \ | ||
1147 | */ \ | ||
1148 | __ptr -= 2; \ | ||
1149 | __len += 2; \ | ||
1150 | SMC_SET_PTR(2|PTR_READ|PTR_RCV|PTR_AUTOINC); \ | ||
1151 | } \ | ||
1152 | if (SMC_CAN_USE_DATACS && lp->datacs) \ | ||
1153 | __ioaddr = lp->datacs; \ | ||
1053 | __len += 2; \ | 1154 | __len += 2; \ |
1054 | SMC_SET_PTR( 2|PTR_READ|PTR_RCV|PTR_AUTOINC ); \ | 1155 | SMC_insl(__ioaddr, DATA_REG, __ptr, __len>>2); \ |
1055 | } \ | 1156 | } else if (SMC_CAN_USE_16BIT) \ |
1056 | __len += 2; \ | 1157 | SMC_insw(ioaddr, DATA_REG, p, (l) >> 1); \ |
1057 | insl( lp->datacs, __ptr, __len >> 2); \ | 1158 | else if (SMC_CAN_USE_8BIT) \ |
1058 | } else { \ | 1159 | SMC_insb(ioaddr, DATA_REG, p, l); \ |
1059 | _SMC_PULL_DATA(p, l); \ | 1160 | } while (0) |
1060 | } | ||
1061 | #else | ||
1062 | #define SMC_PUSH_DATA(p, l) _SMC_PUSH_DATA(p, l) | ||
1063 | #define SMC_PULL_DATA(p, l) _SMC_PULL_DATA(p, l) | ||
1064 | #endif | ||
1065 | |||
1066 | #if !defined (SMC_INTERRUPT_PREAMBLE) | ||
1067 | # define SMC_INTERRUPT_PREAMBLE | ||
1068 | #endif | ||
1069 | 1161 | ||
1070 | #endif /* _SMC91X_H_ */ | 1162 | #endif /* _SMC91X_H_ */ |
diff --git a/drivers/scsi/iscsi_tcp.c b/drivers/scsi/iscsi_tcp.c index ff79e68b347c..7b82ff090d42 100644 --- a/drivers/scsi/iscsi_tcp.c +++ b/drivers/scsi/iscsi_tcp.c | |||
@@ -3639,7 +3639,7 @@ iscsi_tcp_init(void) | |||
3639 | 3639 | ||
3640 | taskcache = kmem_cache_create("iscsi_taskcache", | 3640 | taskcache = kmem_cache_create("iscsi_taskcache", |
3641 | sizeof(struct iscsi_data_task), 0, | 3641 | sizeof(struct iscsi_data_task), 0, |
3642 | SLAB_HWCACHE_ALIGN | SLAB_NO_REAP, NULL, NULL); | 3642 | SLAB_HWCACHE_ALIGN, NULL, NULL); |
3643 | if (!taskcache) | 3643 | if (!taskcache) |
3644 | return -ENOMEM; | 3644 | return -ENOMEM; |
3645 | 3645 | ||
diff --git a/drivers/scsi/sg.c b/drivers/scsi/sg.c index a8b05ce5de52..7405d0df95db 100644 --- a/drivers/scsi/sg.c +++ b/drivers/scsi/sg.c | |||
@@ -1139,32 +1139,6 @@ sg_fasync(int fd, struct file *filp, int mode) | |||
1139 | return (retval < 0) ? retval : 0; | 1139 | return (retval < 0) ? retval : 0; |
1140 | } | 1140 | } |
1141 | 1141 | ||
1142 | /* When startFinish==1 increments page counts for pages other than the | ||
1143 | first of scatter gather elements obtained from alloc_pages(). | ||
1144 | When startFinish==0 decrements ... */ | ||
1145 | static void | ||
1146 | sg_rb_correct4mmap(Sg_scatter_hold * rsv_schp, int startFinish) | ||
1147 | { | ||
1148 | struct scatterlist *sg = rsv_schp->buffer; | ||
1149 | struct page *page; | ||
1150 | int k, m; | ||
1151 | |||
1152 | SCSI_LOG_TIMEOUT(3, printk("sg_rb_correct4mmap: startFinish=%d, scatg=%d\n", | ||
1153 | startFinish, rsv_schp->k_use_sg)); | ||
1154 | /* N.B. correction _not_ applied to base page of each allocation */ | ||
1155 | for (k = 0; k < rsv_schp->k_use_sg; ++k, ++sg) { | ||
1156 | for (m = PAGE_SIZE; m < sg->length; m += PAGE_SIZE) { | ||
1157 | page = sg->page; | ||
1158 | if (startFinish) | ||
1159 | get_page(page); | ||
1160 | else { | ||
1161 | if (page_count(page) > 0) | ||
1162 | __put_page(page); | ||
1163 | } | ||
1164 | } | ||
1165 | } | ||
1166 | } | ||
1167 | |||
1168 | static struct page * | 1142 | static struct page * |
1169 | sg_vma_nopage(struct vm_area_struct *vma, unsigned long addr, int *type) | 1143 | sg_vma_nopage(struct vm_area_struct *vma, unsigned long addr, int *type) |
1170 | { | 1144 | { |
@@ -1236,10 +1210,7 @@ sg_mmap(struct file *filp, struct vm_area_struct *vma) | |||
1236 | sa += len; | 1210 | sa += len; |
1237 | } | 1211 | } |
1238 | 1212 | ||
1239 | if (0 == sfp->mmap_called) { | 1213 | sfp->mmap_called = 1; |
1240 | sg_rb_correct4mmap(rsv_schp, 1); /* do only once per fd lifetime */ | ||
1241 | sfp->mmap_called = 1; | ||
1242 | } | ||
1243 | vma->vm_flags |= VM_RESERVED; | 1214 | vma->vm_flags |= VM_RESERVED; |
1244 | vma->vm_private_data = sfp; | 1215 | vma->vm_private_data = sfp; |
1245 | vma->vm_ops = &sg_mmap_vm_ops; | 1216 | vma->vm_ops = &sg_mmap_vm_ops; |
@@ -2388,8 +2359,6 @@ __sg_remove_sfp(Sg_device * sdp, Sg_fd * sfp) | |||
2388 | SCSI_LOG_TIMEOUT(6, | 2359 | SCSI_LOG_TIMEOUT(6, |
2389 | printk("__sg_remove_sfp: bufflen=%d, k_use_sg=%d\n", | 2360 | printk("__sg_remove_sfp: bufflen=%d, k_use_sg=%d\n", |
2390 | (int) sfp->reserve.bufflen, (int) sfp->reserve.k_use_sg)); | 2361 | (int) sfp->reserve.bufflen, (int) sfp->reserve.k_use_sg)); |
2391 | if (sfp->mmap_called) | ||
2392 | sg_rb_correct4mmap(&sfp->reserve, 0); /* undo correction */ | ||
2393 | sg_remove_scat(&sfp->reserve); | 2362 | sg_remove_scat(&sfp->reserve); |
2394 | } | 2363 | } |
2395 | sfp->parentdp = NULL; | 2364 | sfp->parentdp = NULL; |
@@ -2471,9 +2440,9 @@ sg_page_malloc(int rqSz, int lowDma, int *retSzp) | |||
2471 | return resp; | 2440 | return resp; |
2472 | 2441 | ||
2473 | if (lowDma) | 2442 | if (lowDma) |
2474 | page_mask = GFP_ATOMIC | GFP_DMA | __GFP_NOWARN; | 2443 | page_mask = GFP_ATOMIC | GFP_DMA | __GFP_COMP | __GFP_NOWARN; |
2475 | else | 2444 | else |
2476 | page_mask = GFP_ATOMIC | __GFP_NOWARN; | 2445 | page_mask = GFP_ATOMIC | __GFP_COMP | __GFP_NOWARN; |
2477 | 2446 | ||
2478 | for (order = 0, a_size = PAGE_SIZE; a_size < rqSz; | 2447 | for (order = 0, a_size = PAGE_SIZE; a_size < rqSz; |
2479 | order++, a_size <<= 1) ; | 2448 | order++, a_size <<= 1) ; |
diff --git a/drivers/serial/Kconfig b/drivers/serial/Kconfig index 89e5413cc2a3..c66ef96c71b4 100644 --- a/drivers/serial/Kconfig +++ b/drivers/serial/Kconfig | |||
@@ -866,7 +866,7 @@ config SERIAL_M32R_PLDSIO | |||
866 | 866 | ||
867 | config SERIAL_TXX9 | 867 | config SERIAL_TXX9 |
868 | bool "TMPTX39XX/49XX SIO support" | 868 | bool "TMPTX39XX/49XX SIO support" |
869 | depends HAS_TXX9_SERIAL && BROKEN | 869 | depends HAS_TXX9_SERIAL |
870 | select SERIAL_CORE | 870 | select SERIAL_CORE |
871 | default y | 871 | default y |
872 | 872 | ||
diff --git a/drivers/serial/serial_txx9.c b/drivers/serial/serial_txx9.c index ee98a867bc6d..141173efd463 100644 --- a/drivers/serial/serial_txx9.c +++ b/drivers/serial/serial_txx9.c | |||
@@ -33,6 +33,10 @@ | |||
33 | * 1.02 Cleanup. (import 8250.c changes) | 33 | * 1.02 Cleanup. (import 8250.c changes) |
34 | * 1.03 Fix low-latency mode. (import 8250.c changes) | 34 | * 1.03 Fix low-latency mode. (import 8250.c changes) |
35 | * 1.04 Remove usage of deprecated functions, cleanup. | 35 | * 1.04 Remove usage of deprecated functions, cleanup. |
36 | * 1.05 More strict check in verify_port. Cleanup. | ||
37 | * 1.06 Do not insert a char caused previous overrun. | ||
38 | * Fix some spin_locks. | ||
39 | * Do not call uart_add_one_port for absent ports. | ||
36 | */ | 40 | */ |
37 | #include <linux/config.h> | 41 | #include <linux/config.h> |
38 | 42 | ||
@@ -57,7 +61,7 @@ | |||
57 | #include <asm/io.h> | 61 | #include <asm/io.h> |
58 | #include <asm/irq.h> | 62 | #include <asm/irq.h> |
59 | 63 | ||
60 | static char *serial_version = "1.04"; | 64 | static char *serial_version = "1.06"; |
61 | static char *serial_name = "TX39/49 Serial driver"; | 65 | static char *serial_name = "TX39/49 Serial driver"; |
62 | 66 | ||
63 | #define PASS_LIMIT 256 | 67 | #define PASS_LIMIT 256 |
@@ -94,6 +98,8 @@ static char *serial_name = "TX39/49 Serial driver"; | |||
94 | #define UART_NR 4 | 98 | #define UART_NR 4 |
95 | #endif | 99 | #endif |
96 | 100 | ||
101 | #define HIGH_BITS_OFFSET ((sizeof(long)-sizeof(int))*8) | ||
102 | |||
97 | struct uart_txx9_port { | 103 | struct uart_txx9_port { |
98 | struct uart_port port; | 104 | struct uart_port port; |
99 | 105 | ||
@@ -210,7 +216,7 @@ static inline unsigned int sio_in(struct uart_txx9_port *up, int offset) | |||
210 | { | 216 | { |
211 | switch (up->port.iotype) { | 217 | switch (up->port.iotype) { |
212 | default: | 218 | default: |
213 | return *(volatile u32 *)(up->port.membase + offset); | 219 | return __raw_readl(up->port.membase + offset); |
214 | case UPIO_PORT: | 220 | case UPIO_PORT: |
215 | return inl(up->port.iobase + offset); | 221 | return inl(up->port.iobase + offset); |
216 | } | 222 | } |
@@ -221,7 +227,7 @@ sio_out(struct uart_txx9_port *up, int offset, int value) | |||
221 | { | 227 | { |
222 | switch (up->port.iotype) { | 228 | switch (up->port.iotype) { |
223 | default: | 229 | default: |
224 | *(volatile u32 *)(up->port.membase + offset) = value; | 230 | __raw_writel(value, up->port.membase + offset); |
225 | break; | 231 | break; |
226 | case UPIO_PORT: | 232 | case UPIO_PORT: |
227 | outl(value, up->port.iobase + offset); | 233 | outl(value, up->port.iobase + offset); |
@@ -259,34 +265,19 @@ sio_quot_set(struct uart_txx9_port *up, int quot) | |||
259 | static void serial_txx9_stop_tx(struct uart_port *port) | 265 | static void serial_txx9_stop_tx(struct uart_port *port) |
260 | { | 266 | { |
261 | struct uart_txx9_port *up = (struct uart_txx9_port *)port; | 267 | struct uart_txx9_port *up = (struct uart_txx9_port *)port; |
262 | unsigned long flags; | ||
263 | |||
264 | spin_lock_irqsave(&up->port.lock, flags); | ||
265 | sio_mask(up, TXX9_SIDICR, TXX9_SIDICR_TIE); | 268 | sio_mask(up, TXX9_SIDICR, TXX9_SIDICR_TIE); |
266 | spin_unlock_irqrestore(&up->port.lock, flags); | ||
267 | } | 269 | } |
268 | 270 | ||
269 | static void serial_txx9_start_tx(struct uart_port *port) | 271 | static void serial_txx9_start_tx(struct uart_port *port) |
270 | { | 272 | { |
271 | struct uart_txx9_port *up = (struct uart_txx9_port *)port; | 273 | struct uart_txx9_port *up = (struct uart_txx9_port *)port; |
272 | unsigned long flags; | ||
273 | |||
274 | spin_lock_irqsave(&up->port.lock, flags); | ||
275 | sio_set(up, TXX9_SIDICR, TXX9_SIDICR_TIE); | 274 | sio_set(up, TXX9_SIDICR, TXX9_SIDICR_TIE); |
276 | spin_unlock_irqrestore(&up->port.lock, flags); | ||
277 | } | 275 | } |
278 | 276 | ||
279 | static void serial_txx9_stop_rx(struct uart_port *port) | 277 | static void serial_txx9_stop_rx(struct uart_port *port) |
280 | { | 278 | { |
281 | struct uart_txx9_port *up = (struct uart_txx9_port *)port; | 279 | struct uart_txx9_port *up = (struct uart_txx9_port *)port; |
282 | unsigned long flags; | ||
283 | |||
284 | spin_lock_irqsave(&up->port.lock, flags); | ||
285 | up->port.read_status_mask &= ~TXX9_SIDISR_RDIS; | 280 | up->port.read_status_mask &= ~TXX9_SIDISR_RDIS; |
286 | #if 0 | ||
287 | sio_mask(up, TXX9_SIDICR, TXX9_SIDICR_RIE); | ||
288 | #endif | ||
289 | spin_unlock_irqrestore(&up->port.lock, flags); | ||
290 | } | 281 | } |
291 | 282 | ||
292 | static void serial_txx9_enable_ms(struct uart_port *port) | 283 | static void serial_txx9_enable_ms(struct uart_port *port) |
@@ -302,12 +293,16 @@ receive_chars(struct uart_txx9_port *up, unsigned int *status, struct pt_regs *r | |||
302 | unsigned int disr = *status; | 293 | unsigned int disr = *status; |
303 | int max_count = 256; | 294 | int max_count = 256; |
304 | char flag; | 295 | char flag; |
296 | unsigned int next_ignore_status_mask; | ||
305 | 297 | ||
306 | do { | 298 | do { |
307 | ch = sio_in(up, TXX9_SIRFIFO); | 299 | ch = sio_in(up, TXX9_SIRFIFO); |
308 | flag = TTY_NORMAL; | 300 | flag = TTY_NORMAL; |
309 | up->port.icount.rx++; | 301 | up->port.icount.rx++; |
310 | 302 | ||
303 | /* mask out RFDN_MASK bit added by previous overrun */ | ||
304 | next_ignore_status_mask = | ||
305 | up->port.ignore_status_mask & ~TXX9_SIDISR_RFDN_MASK; | ||
311 | if (unlikely(disr & (TXX9_SIDISR_UBRK | TXX9_SIDISR_UPER | | 306 | if (unlikely(disr & (TXX9_SIDISR_UBRK | TXX9_SIDISR_UPER | |
312 | TXX9_SIDISR_UFER | TXX9_SIDISR_UOER))) { | 307 | TXX9_SIDISR_UFER | TXX9_SIDISR_UOER))) { |
313 | /* | 308 | /* |
@@ -328,8 +323,17 @@ receive_chars(struct uart_txx9_port *up, unsigned int *status, struct pt_regs *r | |||
328 | up->port.icount.parity++; | 323 | up->port.icount.parity++; |
329 | else if (disr & TXX9_SIDISR_UFER) | 324 | else if (disr & TXX9_SIDISR_UFER) |
330 | up->port.icount.frame++; | 325 | up->port.icount.frame++; |
331 | if (disr & TXX9_SIDISR_UOER) | 326 | if (disr & TXX9_SIDISR_UOER) { |
332 | up->port.icount.overrun++; | 327 | up->port.icount.overrun++; |
328 | /* | ||
329 | * The receiver read buffer still hold | ||
330 | * a char which caused overrun. | ||
331 | * Ignore next char by adding RFDN_MASK | ||
332 | * to ignore_status_mask temporarily. | ||
333 | */ | ||
334 | next_ignore_status_mask |= | ||
335 | TXX9_SIDISR_RFDN_MASK; | ||
336 | } | ||
333 | 337 | ||
334 | /* | 338 | /* |
335 | * Mask off conditions which should be ingored. | 339 | * Mask off conditions which should be ingored. |
@@ -349,6 +353,7 @@ receive_chars(struct uart_txx9_port *up, unsigned int *status, struct pt_regs *r | |||
349 | uart_insert_char(&up->port, disr, TXX9_SIDISR_UOER, ch, flag); | 353 | uart_insert_char(&up->port, disr, TXX9_SIDISR_UOER, ch, flag); |
350 | 354 | ||
351 | ignore_char: | 355 | ignore_char: |
356 | up->port.ignore_status_mask = next_ignore_status_mask; | ||
352 | disr = sio_in(up, TXX9_SIDISR); | 357 | disr = sio_in(up, TXX9_SIDISR); |
353 | } while (!(disr & TXX9_SIDISR_UVALID) && (max_count-- > 0)); | 358 | } while (!(disr & TXX9_SIDISR_UVALID) && (max_count-- > 0)); |
354 | spin_unlock(&up->port.lock); | 359 | spin_unlock(&up->port.lock); |
@@ -450,14 +455,11 @@ static unsigned int serial_txx9_get_mctrl(struct uart_port *port) | |||
450 | static void serial_txx9_set_mctrl(struct uart_port *port, unsigned int mctrl) | 455 | static void serial_txx9_set_mctrl(struct uart_port *port, unsigned int mctrl) |
451 | { | 456 | { |
452 | struct uart_txx9_port *up = (struct uart_txx9_port *)port; | 457 | struct uart_txx9_port *up = (struct uart_txx9_port *)port; |
453 | unsigned long flags; | ||
454 | 458 | ||
455 | spin_lock_irqsave(&up->port.lock, flags); | ||
456 | if (mctrl & TIOCM_RTS) | 459 | if (mctrl & TIOCM_RTS) |
457 | sio_mask(up, TXX9_SIFLCR, TXX9_SIFLCR_RTSSC); | 460 | sio_mask(up, TXX9_SIFLCR, TXX9_SIFLCR_RTSSC); |
458 | else | 461 | else |
459 | sio_set(up, TXX9_SIFLCR, TXX9_SIFLCR_RTSSC); | 462 | sio_set(up, TXX9_SIFLCR, TXX9_SIFLCR_RTSSC); |
460 | spin_unlock_irqrestore(&up->port.lock, flags); | ||
461 | } | 463 | } |
462 | 464 | ||
463 | static void serial_txx9_break_ctl(struct uart_port *port, int break_state) | 465 | static void serial_txx9_break_ctl(struct uart_port *port, int break_state) |
@@ -784,8 +786,14 @@ static void serial_txx9_config_port(struct uart_port *port, int uflags) | |||
784 | static int | 786 | static int |
785 | serial_txx9_verify_port(struct uart_port *port, struct serial_struct *ser) | 787 | serial_txx9_verify_port(struct uart_port *port, struct serial_struct *ser) |
786 | { | 788 | { |
787 | if (ser->irq < 0 || | 789 | unsigned long new_port = ser->port; |
788 | ser->baud_base < 9600 || ser->type != PORT_TXX9) | 790 | if (HIGH_BITS_OFFSET) |
791 | new_port += (unsigned long)ser->port_high << HIGH_BITS_OFFSET; | ||
792 | if (ser->type != port->type || | ||
793 | ser->irq != port->irq || | ||
794 | ser->io_type != port->iotype || | ||
795 | new_port != port->iobase || | ||
796 | (unsigned long)ser->iomem_base != port->mapbase) | ||
789 | return -EINVAL; | 797 | return -EINVAL; |
790 | return 0; | 798 | return 0; |
791 | } | 799 | } |
@@ -827,7 +835,8 @@ static void __init serial_txx9_register_ports(struct uart_driver *drv) | |||
827 | 835 | ||
828 | up->port.line = i; | 836 | up->port.line = i; |
829 | up->port.ops = &serial_txx9_pops; | 837 | up->port.ops = &serial_txx9_pops; |
830 | uart_add_one_port(drv, &up->port); | 838 | if (up->port.iobase || up->port.mapbase) |
839 | uart_add_one_port(drv, &up->port); | ||
831 | } | 840 | } |
832 | } | 841 | } |
833 | 842 | ||
@@ -927,11 +936,6 @@ static int serial_txx9_console_setup(struct console *co, char *options) | |||
927 | return -ENODEV; | 936 | return -ENODEV; |
928 | 937 | ||
929 | /* | 938 | /* |
930 | * Temporary fix. | ||
931 | */ | ||
932 | spin_lock_init(&port->lock); | ||
933 | |||
934 | /* | ||
935 | * Disable UART interrupts, set DTR and RTS high | 939 | * Disable UART interrupts, set DTR and RTS high |
936 | * and set speed. | 940 | * and set speed. |
937 | */ | 941 | */ |
@@ -1041,11 +1045,10 @@ static int __devinit serial_txx9_register_port(struct uart_port *port) | |||
1041 | mutex_lock(&serial_txx9_mutex); | 1045 | mutex_lock(&serial_txx9_mutex); |
1042 | for (i = 0; i < UART_NR; i++) { | 1046 | for (i = 0; i < UART_NR; i++) { |
1043 | uart = &serial_txx9_ports[i]; | 1047 | uart = &serial_txx9_ports[i]; |
1044 | if (uart->port.type == PORT_UNKNOWN) | 1048 | if (!(uart->port.iobase || uart->port.mapbase)) |
1045 | break; | 1049 | break; |
1046 | } | 1050 | } |
1047 | if (i < UART_NR) { | 1051 | if (i < UART_NR) { |
1048 | uart_remove_one_port(&serial_txx9_reg, &uart->port); | ||
1049 | uart->port.iobase = port->iobase; | 1052 | uart->port.iobase = port->iobase; |
1050 | uart->port.membase = port->membase; | 1053 | uart->port.membase = port->membase; |
1051 | uart->port.irq = port->irq; | 1054 | uart->port.irq = port->irq; |
@@ -1080,9 +1083,8 @@ static void __devexit serial_txx9_unregister_port(int line) | |||
1080 | uart->port.type = PORT_UNKNOWN; | 1083 | uart->port.type = PORT_UNKNOWN; |
1081 | uart->port.iobase = 0; | 1084 | uart->port.iobase = 0; |
1082 | uart->port.mapbase = 0; | 1085 | uart->port.mapbase = 0; |
1083 | uart->port.membase = 0; | 1086 | uart->port.membase = NULL; |
1084 | uart->port.dev = NULL; | 1087 | uart->port.dev = NULL; |
1085 | uart_add_one_port(&serial_txx9_reg, &uart->port); | ||
1086 | mutex_unlock(&serial_txx9_mutex); | 1088 | mutex_unlock(&serial_txx9_mutex); |
1087 | } | 1089 | } |
1088 | 1090 | ||
@@ -1198,8 +1200,11 @@ static void __exit serial_txx9_exit(void) | |||
1198 | #ifdef ENABLE_SERIAL_TXX9_PCI | 1200 | #ifdef ENABLE_SERIAL_TXX9_PCI |
1199 | pci_unregister_driver(&serial_txx9_pci_driver); | 1201 | pci_unregister_driver(&serial_txx9_pci_driver); |
1200 | #endif | 1202 | #endif |
1201 | for (i = 0; i < UART_NR; i++) | 1203 | for (i = 0; i < UART_NR; i++) { |
1202 | uart_remove_one_port(&serial_txx9_reg, &serial_txx9_ports[i].port); | 1204 | struct uart_txx9_port *up = &serial_txx9_ports[i]; |
1205 | if (up->port.iobase || up->port.mapbase) | ||
1206 | uart_remove_one_port(&serial_txx9_reg, &up->port); | ||
1207 | } | ||
1203 | 1208 | ||
1204 | uart_unregister_driver(&serial_txx9_reg); | 1209 | uart_unregister_driver(&serial_txx9_reg); |
1205 | } | 1210 | } |
diff --git a/drivers/serial/vr41xx_siu.c b/drivers/serial/vr41xx_siu.c index d61494d185cd..bd6294132c18 100644 --- a/drivers/serial/vr41xx_siu.c +++ b/drivers/serial/vr41xx_siu.c | |||
@@ -919,7 +919,7 @@ static struct uart_driver siu_uart_driver = { | |||
919 | .cons = SERIAL_VR41XX_CONSOLE, | 919 | .cons = SERIAL_VR41XX_CONSOLE, |
920 | }; | 920 | }; |
921 | 921 | ||
922 | static int siu_probe(struct platform_device *dev) | 922 | static int __devinit siu_probe(struct platform_device *dev) |
923 | { | 923 | { |
924 | struct uart_port *port; | 924 | struct uart_port *port; |
925 | int num, i, retval; | 925 | int num, i, retval; |
@@ -953,7 +953,7 @@ static int siu_probe(struct platform_device *dev) | |||
953 | return 0; | 953 | return 0; |
954 | } | 954 | } |
955 | 955 | ||
956 | static int siu_remove(struct platform_device *dev) | 956 | static int __devexit siu_remove(struct platform_device *dev) |
957 | { | 957 | { |
958 | struct uart_port *port; | 958 | struct uart_port *port; |
959 | int i; | 959 | int i; |
@@ -1006,21 +1006,28 @@ static struct platform_device *siu_platform_device; | |||
1006 | 1006 | ||
1007 | static struct platform_driver siu_device_driver = { | 1007 | static struct platform_driver siu_device_driver = { |
1008 | .probe = siu_probe, | 1008 | .probe = siu_probe, |
1009 | .remove = siu_remove, | 1009 | .remove = __devexit_p(siu_remove), |
1010 | .suspend = siu_suspend, | 1010 | .suspend = siu_suspend, |
1011 | .resume = siu_resume, | 1011 | .resume = siu_resume, |
1012 | .driver = { | 1012 | .driver = { |
1013 | .name = "SIU", | 1013 | .name = "SIU", |
1014 | .owner = THIS_MODULE, | ||
1014 | }, | 1015 | }, |
1015 | }; | 1016 | }; |
1016 | 1017 | ||
1017 | static int __devinit vr41xx_siu_init(void) | 1018 | static int __init vr41xx_siu_init(void) |
1018 | { | 1019 | { |
1019 | int retval; | 1020 | int retval; |
1020 | 1021 | ||
1021 | siu_platform_device = platform_device_register_simple("SIU", -1, NULL, 0); | 1022 | siu_platform_device = platform_device_alloc("SIU", -1); |
1022 | if (IS_ERR(siu_platform_device)) | 1023 | if (!siu_platform_device) |
1023 | return PTR_ERR(siu_platform_device); | 1024 | return -ENOMEM; |
1025 | |||
1026 | retval = platform_device_add(siu_platform_device); | ||
1027 | if (retval < 0) { | ||
1028 | platform_device_put(siu_platform_device); | ||
1029 | return retval; | ||
1030 | } | ||
1024 | 1031 | ||
1025 | retval = platform_driver_register(&siu_device_driver); | 1032 | retval = platform_driver_register(&siu_device_driver); |
1026 | if (retval < 0) | 1033 | if (retval < 0) |
@@ -1029,10 +1036,9 @@ static int __devinit vr41xx_siu_init(void) | |||
1029 | return retval; | 1036 | return retval; |
1030 | } | 1037 | } |
1031 | 1038 | ||
1032 | static void __devexit vr41xx_siu_exit(void) | 1039 | static void __exit vr41xx_siu_exit(void) |
1033 | { | 1040 | { |
1034 | platform_driver_unregister(&siu_device_driver); | 1041 | platform_driver_unregister(&siu_device_driver); |
1035 | |||
1036 | platform_device_unregister(siu_platform_device); | 1042 | platform_device_unregister(siu_platform_device); |
1037 | } | 1043 | } |
1038 | 1044 | ||
diff --git a/drivers/sn/ioc4.c b/drivers/sn/ioc4.c index ea75b3d0612b..67140a5804f5 100644 --- a/drivers/sn/ioc4.c +++ b/drivers/sn/ioc4.c | |||
@@ -31,7 +31,7 @@ | |||
31 | #include <linux/ioc4.h> | 31 | #include <linux/ioc4.h> |
32 | #include <linux/mmtimer.h> | 32 | #include <linux/mmtimer.h> |
33 | #include <linux/rtc.h> | 33 | #include <linux/rtc.h> |
34 | #include <linux/rwsem.h> | 34 | #include <linux/mutex.h> |
35 | #include <asm/sn/addrs.h> | 35 | #include <asm/sn/addrs.h> |
36 | #include <asm/sn/clksupport.h> | 36 | #include <asm/sn/clksupport.h> |
37 | #include <asm/sn/shub_mmr.h> | 37 | #include <asm/sn/shub_mmr.h> |
@@ -54,11 +54,10 @@ | |||
54 | * Submodule management * | 54 | * Submodule management * |
55 | ************************/ | 55 | ************************/ |
56 | 56 | ||
57 | static LIST_HEAD(ioc4_devices); | 57 | static DEFINE_MUTEX(ioc4_mutex); |
58 | static DECLARE_RWSEM(ioc4_devices_rwsem); | ||
59 | 58 | ||
59 | static LIST_HEAD(ioc4_devices); | ||
60 | static LIST_HEAD(ioc4_submodules); | 60 | static LIST_HEAD(ioc4_submodules); |
61 | static DECLARE_RWSEM(ioc4_submodules_rwsem); | ||
62 | 61 | ||
63 | /* Register an IOC4 submodule */ | 62 | /* Register an IOC4 submodule */ |
64 | int | 63 | int |
@@ -66,15 +65,13 @@ ioc4_register_submodule(struct ioc4_submodule *is) | |||
66 | { | 65 | { |
67 | struct ioc4_driver_data *idd; | 66 | struct ioc4_driver_data *idd; |
68 | 67 | ||
69 | down_write(&ioc4_submodules_rwsem); | 68 | mutex_lock(&ioc4_mutex); |
70 | list_add(&is->is_list, &ioc4_submodules); | 69 | list_add(&is->is_list, &ioc4_submodules); |
71 | up_write(&ioc4_submodules_rwsem); | ||
72 | 70 | ||
73 | /* Initialize submodule for each IOC4 */ | 71 | /* Initialize submodule for each IOC4 */ |
74 | if (!is->is_probe) | 72 | if (!is->is_probe) |
75 | return 0; | 73 | goto out; |
76 | 74 | ||
77 | down_read(&ioc4_devices_rwsem); | ||
78 | list_for_each_entry(idd, &ioc4_devices, idd_list) { | 75 | list_for_each_entry(idd, &ioc4_devices, idd_list) { |
79 | if (is->is_probe(idd)) { | 76 | if (is->is_probe(idd)) { |
80 | printk(KERN_WARNING | 77 | printk(KERN_WARNING |
@@ -84,8 +81,8 @@ ioc4_register_submodule(struct ioc4_submodule *is) | |||
84 | pci_name(idd->idd_pdev)); | 81 | pci_name(idd->idd_pdev)); |
85 | } | 82 | } |
86 | } | 83 | } |
87 | up_read(&ioc4_devices_rwsem); | 84 | out: |
88 | 85 | mutex_unlock(&ioc4_mutex); | |
89 | return 0; | 86 | return 0; |
90 | } | 87 | } |
91 | 88 | ||
@@ -95,15 +92,13 @@ ioc4_unregister_submodule(struct ioc4_submodule *is) | |||
95 | { | 92 | { |
96 | struct ioc4_driver_data *idd; | 93 | struct ioc4_driver_data *idd; |
97 | 94 | ||
98 | down_write(&ioc4_submodules_rwsem); | 95 | mutex_lock(&ioc4_mutex); |
99 | list_del(&is->is_list); | 96 | list_del(&is->is_list); |
100 | up_write(&ioc4_submodules_rwsem); | ||
101 | 97 | ||
102 | /* Remove submodule for each IOC4 */ | 98 | /* Remove submodule for each IOC4 */ |
103 | if (!is->is_remove) | 99 | if (!is->is_remove) |
104 | return; | 100 | goto out; |
105 | 101 | ||
106 | down_read(&ioc4_devices_rwsem); | ||
107 | list_for_each_entry(idd, &ioc4_devices, idd_list) { | 102 | list_for_each_entry(idd, &ioc4_devices, idd_list) { |
108 | if (is->is_remove(idd)) { | 103 | if (is->is_remove(idd)) { |
109 | printk(KERN_WARNING | 104 | printk(KERN_WARNING |
@@ -113,7 +108,8 @@ ioc4_unregister_submodule(struct ioc4_submodule *is) | |||
113 | pci_name(idd->idd_pdev)); | 108 | pci_name(idd->idd_pdev)); |
114 | } | 109 | } |
115 | } | 110 | } |
116 | up_read(&ioc4_devices_rwsem); | 111 | out: |
112 | mutex_unlock(&ioc4_mutex); | ||
117 | } | 113 | } |
118 | 114 | ||
119 | /********************* | 115 | /********************* |
@@ -312,12 +308,11 @@ ioc4_probe(struct pci_dev *pdev, const struct pci_device_id *pci_id) | |||
312 | /* Track PCI-device specific data */ | 308 | /* Track PCI-device specific data */ |
313 | idd->idd_serial_data = NULL; | 309 | idd->idd_serial_data = NULL; |
314 | pci_set_drvdata(idd->idd_pdev, idd); | 310 | pci_set_drvdata(idd->idd_pdev, idd); |
315 | down_write(&ioc4_devices_rwsem); | 311 | |
312 | mutex_lock(&ioc4_mutex); | ||
316 | list_add(&idd->idd_list, &ioc4_devices); | 313 | list_add(&idd->idd_list, &ioc4_devices); |
317 | up_write(&ioc4_devices_rwsem); | ||
318 | 314 | ||
319 | /* Add this IOC4 to all submodules */ | 315 | /* Add this IOC4 to all submodules */ |
320 | down_read(&ioc4_submodules_rwsem); | ||
321 | list_for_each_entry(is, &ioc4_submodules, is_list) { | 316 | list_for_each_entry(is, &ioc4_submodules, is_list) { |
322 | if (is->is_probe && is->is_probe(idd)) { | 317 | if (is->is_probe && is->is_probe(idd)) { |
323 | printk(KERN_WARNING | 318 | printk(KERN_WARNING |
@@ -327,7 +322,7 @@ ioc4_probe(struct pci_dev *pdev, const struct pci_device_id *pci_id) | |||
327 | pci_name(idd->idd_pdev)); | 322 | pci_name(idd->idd_pdev)); |
328 | } | 323 | } |
329 | } | 324 | } |
330 | up_read(&ioc4_submodules_rwsem); | 325 | mutex_unlock(&ioc4_mutex); |
331 | 326 | ||
332 | return 0; | 327 | return 0; |
333 | 328 | ||
@@ -351,7 +346,7 @@ ioc4_remove(struct pci_dev *pdev) | |||
351 | idd = pci_get_drvdata(pdev); | 346 | idd = pci_get_drvdata(pdev); |
352 | 347 | ||
353 | /* Remove this IOC4 from all submodules */ | 348 | /* Remove this IOC4 from all submodules */ |
354 | down_read(&ioc4_submodules_rwsem); | 349 | mutex_lock(&ioc4_mutex); |
355 | list_for_each_entry(is, &ioc4_submodules, is_list) { | 350 | list_for_each_entry(is, &ioc4_submodules, is_list) { |
356 | if (is->is_remove && is->is_remove(idd)) { | 351 | if (is->is_remove && is->is_remove(idd)) { |
357 | printk(KERN_WARNING | 352 | printk(KERN_WARNING |
@@ -361,7 +356,7 @@ ioc4_remove(struct pci_dev *pdev) | |||
361 | pci_name(idd->idd_pdev)); | 356 | pci_name(idd->idd_pdev)); |
362 | } | 357 | } |
363 | } | 358 | } |
364 | up_read(&ioc4_submodules_rwsem); | 359 | mutex_unlock(&ioc4_mutex); |
365 | 360 | ||
366 | /* Release resources */ | 361 | /* Release resources */ |
367 | iounmap(idd->idd_misc_regs); | 362 | iounmap(idd->idd_misc_regs); |
@@ -377,9 +372,9 @@ ioc4_remove(struct pci_dev *pdev) | |||
377 | pci_disable_device(pdev); | 372 | pci_disable_device(pdev); |
378 | 373 | ||
379 | /* Remove and free driver data */ | 374 | /* Remove and free driver data */ |
380 | down_write(&ioc4_devices_rwsem); | 375 | mutex_lock(&ioc4_mutex); |
381 | list_del(&idd->idd_list); | 376 | list_del(&idd->idd_list); |
382 | up_write(&ioc4_devices_rwsem); | 377 | mutex_unlock(&ioc4_mutex); |
383 | kfree(idd); | 378 | kfree(idd); |
384 | } | 379 | } |
385 | 380 | ||
diff --git a/drivers/video/acornfb.c b/drivers/video/acornfb.c index b058273527bb..76448d6ae896 100644 --- a/drivers/video/acornfb.c +++ b/drivers/video/acornfb.c | |||
@@ -1269,7 +1269,7 @@ free_unused_pages(unsigned int virtual_start, unsigned int virtual_end) | |||
1269 | */ | 1269 | */ |
1270 | page = virt_to_page(virtual_start); | 1270 | page = virt_to_page(virtual_start); |
1271 | ClearPageReserved(page); | 1271 | ClearPageReserved(page); |
1272 | set_page_count(page, 1); | 1272 | init_page_count(page); |
1273 | free_page(virtual_start); | 1273 | free_page(virtual_start); |
1274 | 1274 | ||
1275 | virtual_start += PAGE_SIZE; | 1275 | virtual_start += PAGE_SIZE; |
diff --git a/drivers/video/i810/i810_main.c b/drivers/video/i810/i810_main.c index d8467c03b49f..788297e9d59e 100644 --- a/drivers/video/i810/i810_main.c +++ b/drivers/video/i810/i810_main.c | |||
@@ -1508,7 +1508,7 @@ static int i810fb_cursor(struct fb_info *info, struct fb_cursor *cursor) | |||
1508 | int size = ((cursor->image.width + 7) >> 3) * | 1508 | int size = ((cursor->image.width + 7) >> 3) * |
1509 | cursor->image.height; | 1509 | cursor->image.height; |
1510 | int i; | 1510 | int i; |
1511 | u8 *data = kmalloc(64 * 8, GFP_KERNEL); | 1511 | u8 *data = kmalloc(64 * 8, GFP_ATOMIC); |
1512 | 1512 | ||
1513 | if (data == NULL) | 1513 | if (data == NULL) |
1514 | return -ENOMEM; | 1514 | return -ENOMEM; |
diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c index 3ad8455f8577..651a9e14d9a9 100644 --- a/fs/9p/vfs_inode.c +++ b/fs/9p/vfs_inode.c | |||
@@ -614,6 +614,7 @@ static struct dentry *v9fs_vfs_lookup(struct inode *dir, struct dentry *dentry, | |||
614 | 614 | ||
615 | sb = dir->i_sb; | 615 | sb = dir->i_sb; |
616 | v9ses = v9fs_inode2v9ses(dir); | 616 | v9ses = v9fs_inode2v9ses(dir); |
617 | dentry->d_op = &v9fs_dentry_operations; | ||
617 | dirfid = v9fs_fid_lookup(dentry->d_parent); | 618 | dirfid = v9fs_fid_lookup(dentry->d_parent); |
618 | 619 | ||
619 | if (!dirfid) { | 620 | if (!dirfid) { |
@@ -681,8 +682,6 @@ static struct dentry *v9fs_vfs_lookup(struct inode *dir, struct dentry *dentry, | |||
681 | goto FreeFcall; | 682 | goto FreeFcall; |
682 | 683 | ||
683 | fid->qid = fcall->params.rstat.stat.qid; | 684 | fid->qid = fcall->params.rstat.stat.qid; |
684 | |||
685 | dentry->d_op = &v9fs_dentry_operations; | ||
686 | v9fs_stat2inode(&fcall->params.rstat.stat, inode, inode->i_sb); | 685 | v9fs_stat2inode(&fcall->params.rstat.stat, inode, inode->i_sb); |
687 | 686 | ||
688 | d_add(dentry, inode); | 687 | d_add(dentry, inode); |
diff --git a/fs/buffer.c b/fs/buffer.c index a9b399402007..1d3683d496f8 100644 --- a/fs/buffer.c +++ b/fs/buffer.c | |||
@@ -3051,68 +3051,6 @@ asmlinkage long sys_bdflush(int func, long data) | |||
3051 | } | 3051 | } |
3052 | 3052 | ||
3053 | /* | 3053 | /* |
3054 | * Migration function for pages with buffers. This function can only be used | ||
3055 | * if the underlying filesystem guarantees that no other references to "page" | ||
3056 | * exist. | ||
3057 | */ | ||
3058 | #ifdef CONFIG_MIGRATION | ||
3059 | int buffer_migrate_page(struct page *newpage, struct page *page) | ||
3060 | { | ||
3061 | struct address_space *mapping = page->mapping; | ||
3062 | struct buffer_head *bh, *head; | ||
3063 | int rc; | ||
3064 | |||
3065 | if (!mapping) | ||
3066 | return -EAGAIN; | ||
3067 | |||
3068 | if (!page_has_buffers(page)) | ||
3069 | return migrate_page(newpage, page); | ||
3070 | |||
3071 | head = page_buffers(page); | ||
3072 | |||
3073 | rc = migrate_page_remove_references(newpage, page, 3); | ||
3074 | if (rc) | ||
3075 | return rc; | ||
3076 | |||
3077 | bh = head; | ||
3078 | do { | ||
3079 | get_bh(bh); | ||
3080 | lock_buffer(bh); | ||
3081 | bh = bh->b_this_page; | ||
3082 | |||
3083 | } while (bh != head); | ||
3084 | |||
3085 | ClearPagePrivate(page); | ||
3086 | set_page_private(newpage, page_private(page)); | ||
3087 | set_page_private(page, 0); | ||
3088 | put_page(page); | ||
3089 | get_page(newpage); | ||
3090 | |||
3091 | bh = head; | ||
3092 | do { | ||
3093 | set_bh_page(bh, newpage, bh_offset(bh)); | ||
3094 | bh = bh->b_this_page; | ||
3095 | |||
3096 | } while (bh != head); | ||
3097 | |||
3098 | SetPagePrivate(newpage); | ||
3099 | |||
3100 | migrate_page_copy(newpage, page); | ||
3101 | |||
3102 | bh = head; | ||
3103 | do { | ||
3104 | unlock_buffer(bh); | ||
3105 | put_bh(bh); | ||
3106 | bh = bh->b_this_page; | ||
3107 | |||
3108 | } while (bh != head); | ||
3109 | |||
3110 | return 0; | ||
3111 | } | ||
3112 | EXPORT_SYMBOL(buffer_migrate_page); | ||
3113 | #endif | ||
3114 | |||
3115 | /* | ||
3116 | * Buffer-head allocation | 3054 | * Buffer-head allocation |
3117 | */ | 3055 | */ |
3118 | static kmem_cache_t *bh_cachep; | 3056 | static kmem_cache_t *bh_cachep; |
diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c index b35195289945..25fa8bba8cb5 100644 --- a/fs/hugetlbfs/inode.c +++ b/fs/hugetlbfs/inode.c | |||
@@ -56,48 +56,10 @@ static void huge_pagevec_release(struct pagevec *pvec) | |||
56 | pagevec_reinit(pvec); | 56 | pagevec_reinit(pvec); |
57 | } | 57 | } |
58 | 58 | ||
59 | /* | ||
60 | * huge_pages_needed tries to determine the number of new huge pages that | ||
61 | * will be required to fully populate this VMA. This will be equal to | ||
62 | * the size of the VMA in huge pages minus the number of huge pages | ||
63 | * (covered by this VMA) that are found in the page cache. | ||
64 | * | ||
65 | * Result is in bytes to be compatible with is_hugepage_mem_enough() | ||
66 | */ | ||
67 | static unsigned long | ||
68 | huge_pages_needed(struct address_space *mapping, struct vm_area_struct *vma) | ||
69 | { | ||
70 | int i; | ||
71 | struct pagevec pvec; | ||
72 | unsigned long start = vma->vm_start; | ||
73 | unsigned long end = vma->vm_end; | ||
74 | unsigned long hugepages = (end - start) >> HPAGE_SHIFT; | ||
75 | pgoff_t next = vma->vm_pgoff >> (HPAGE_SHIFT - PAGE_SHIFT); | ||
76 | pgoff_t endpg = next + hugepages; | ||
77 | |||
78 | pagevec_init(&pvec, 0); | ||
79 | while (next < endpg) { | ||
80 | if (!pagevec_lookup(&pvec, mapping, next, PAGEVEC_SIZE)) | ||
81 | break; | ||
82 | for (i = 0; i < pagevec_count(&pvec); i++) { | ||
83 | struct page *page = pvec.pages[i]; | ||
84 | if (page->index > next) | ||
85 | next = page->index; | ||
86 | if (page->index >= endpg) | ||
87 | break; | ||
88 | next++; | ||
89 | hugepages--; | ||
90 | } | ||
91 | huge_pagevec_release(&pvec); | ||
92 | } | ||
93 | return hugepages << HPAGE_SHIFT; | ||
94 | } | ||
95 | |||
96 | static int hugetlbfs_file_mmap(struct file *file, struct vm_area_struct *vma) | 59 | static int hugetlbfs_file_mmap(struct file *file, struct vm_area_struct *vma) |
97 | { | 60 | { |
98 | struct inode *inode = file->f_dentry->d_inode; | 61 | struct inode *inode = file->f_dentry->d_inode; |
99 | struct address_space *mapping = inode->i_mapping; | 62 | struct hugetlbfs_inode_info *info = HUGETLBFS_I(inode); |
100 | unsigned long bytes; | ||
101 | loff_t len, vma_len; | 63 | loff_t len, vma_len; |
102 | int ret; | 64 | int ret; |
103 | 65 | ||
@@ -113,10 +75,6 @@ static int hugetlbfs_file_mmap(struct file *file, struct vm_area_struct *vma) | |||
113 | if (vma->vm_end - vma->vm_start < HPAGE_SIZE) | 75 | if (vma->vm_end - vma->vm_start < HPAGE_SIZE) |
114 | return -EINVAL; | 76 | return -EINVAL; |
115 | 77 | ||
116 | bytes = huge_pages_needed(mapping, vma); | ||
117 | if (!is_hugepage_mem_enough(bytes)) | ||
118 | return -ENOMEM; | ||
119 | |||
120 | vma_len = (loff_t)(vma->vm_end - vma->vm_start); | 78 | vma_len = (loff_t)(vma->vm_end - vma->vm_start); |
121 | 79 | ||
122 | mutex_lock(&inode->i_mutex); | 80 | mutex_lock(&inode->i_mutex); |
@@ -129,6 +87,10 @@ static int hugetlbfs_file_mmap(struct file *file, struct vm_area_struct *vma) | |||
129 | if (!(vma->vm_flags & VM_WRITE) && len > inode->i_size) | 87 | if (!(vma->vm_flags & VM_WRITE) && len > inode->i_size) |
130 | goto out; | 88 | goto out; |
131 | 89 | ||
90 | if (vma->vm_flags & VM_MAYSHARE) | ||
91 | if (hugetlb_extend_reservation(info, len >> HPAGE_SHIFT) != 0) | ||
92 | goto out; | ||
93 | |||
132 | ret = 0; | 94 | ret = 0; |
133 | hugetlb_prefault_arch_hook(vma->vm_mm); | 95 | hugetlb_prefault_arch_hook(vma->vm_mm); |
134 | if (inode->i_size < len) | 96 | if (inode->i_size < len) |
@@ -227,13 +189,18 @@ static void truncate_huge_page(struct page *page) | |||
227 | put_page(page); | 189 | put_page(page); |
228 | } | 190 | } |
229 | 191 | ||
230 | static void truncate_hugepages(struct address_space *mapping, loff_t lstart) | 192 | static void truncate_hugepages(struct inode *inode, loff_t lstart) |
231 | { | 193 | { |
194 | struct address_space *mapping = &inode->i_data; | ||
232 | const pgoff_t start = lstart >> HPAGE_SHIFT; | 195 | const pgoff_t start = lstart >> HPAGE_SHIFT; |
233 | struct pagevec pvec; | 196 | struct pagevec pvec; |
234 | pgoff_t next; | 197 | pgoff_t next; |
235 | int i; | 198 | int i; |
236 | 199 | ||
200 | hugetlb_truncate_reservation(HUGETLBFS_I(inode), | ||
201 | lstart >> HPAGE_SHIFT); | ||
202 | if (!mapping->nrpages) | ||
203 | return; | ||
237 | pagevec_init(&pvec, 0); | 204 | pagevec_init(&pvec, 0); |
238 | next = start; | 205 | next = start; |
239 | while (1) { | 206 | while (1) { |
@@ -262,8 +229,7 @@ static void truncate_hugepages(struct address_space *mapping, loff_t lstart) | |||
262 | 229 | ||
263 | static void hugetlbfs_delete_inode(struct inode *inode) | 230 | static void hugetlbfs_delete_inode(struct inode *inode) |
264 | { | 231 | { |
265 | if (inode->i_data.nrpages) | 232 | truncate_hugepages(inode, 0); |
266 | truncate_hugepages(&inode->i_data, 0); | ||
267 | clear_inode(inode); | 233 | clear_inode(inode); |
268 | } | 234 | } |
269 | 235 | ||
@@ -296,8 +262,7 @@ static void hugetlbfs_forget_inode(struct inode *inode) | |||
296 | inode->i_state |= I_FREEING; | 262 | inode->i_state |= I_FREEING; |
297 | inodes_stat.nr_inodes--; | 263 | inodes_stat.nr_inodes--; |
298 | spin_unlock(&inode_lock); | 264 | spin_unlock(&inode_lock); |
299 | if (inode->i_data.nrpages) | 265 | truncate_hugepages(inode, 0); |
300 | truncate_hugepages(&inode->i_data, 0); | ||
301 | clear_inode(inode); | 266 | clear_inode(inode); |
302 | destroy_inode(inode); | 267 | destroy_inode(inode); |
303 | } | 268 | } |
@@ -356,7 +321,7 @@ static int hugetlb_vmtruncate(struct inode *inode, loff_t offset) | |||
356 | if (!prio_tree_empty(&mapping->i_mmap)) | 321 | if (!prio_tree_empty(&mapping->i_mmap)) |
357 | hugetlb_vmtruncate_list(&mapping->i_mmap, pgoff); | 322 | hugetlb_vmtruncate_list(&mapping->i_mmap, pgoff); |
358 | spin_unlock(&mapping->i_mmap_lock); | 323 | spin_unlock(&mapping->i_mmap_lock); |
359 | truncate_hugepages(mapping, offset); | 324 | truncate_hugepages(inode, offset); |
360 | return 0; | 325 | return 0; |
361 | } | 326 | } |
362 | 327 | ||
@@ -573,6 +538,7 @@ static struct inode *hugetlbfs_alloc_inode(struct super_block *sb) | |||
573 | hugetlbfs_inc_free_inodes(sbinfo); | 538 | hugetlbfs_inc_free_inodes(sbinfo); |
574 | return NULL; | 539 | return NULL; |
575 | } | 540 | } |
541 | p->prereserved_hpages = 0; | ||
576 | return &p->vfs_inode; | 542 | return &p->vfs_inode; |
577 | } | 543 | } |
578 | 544 | ||
@@ -771,21 +737,6 @@ static struct file_system_type hugetlbfs_fs_type = { | |||
771 | 737 | ||
772 | static struct vfsmount *hugetlbfs_vfsmount; | 738 | static struct vfsmount *hugetlbfs_vfsmount; |
773 | 739 | ||
774 | /* | ||
775 | * Return the next identifier for a shm file | ||
776 | */ | ||
777 | static unsigned long hugetlbfs_counter(void) | ||
778 | { | ||
779 | static DEFINE_SPINLOCK(lock); | ||
780 | static unsigned long counter; | ||
781 | unsigned long ret; | ||
782 | |||
783 | spin_lock(&lock); | ||
784 | ret = ++counter; | ||
785 | spin_unlock(&lock); | ||
786 | return ret; | ||
787 | } | ||
788 | |||
789 | static int can_do_hugetlb_shm(void) | 740 | static int can_do_hugetlb_shm(void) |
790 | { | 741 | { |
791 | return likely(capable(CAP_IPC_LOCK) || | 742 | return likely(capable(CAP_IPC_LOCK) || |
@@ -801,18 +752,16 @@ struct file *hugetlb_zero_setup(size_t size) | |||
801 | struct dentry *dentry, *root; | 752 | struct dentry *dentry, *root; |
802 | struct qstr quick_string; | 753 | struct qstr quick_string; |
803 | char buf[16]; | 754 | char buf[16]; |
755 | static atomic_t counter; | ||
804 | 756 | ||
805 | if (!can_do_hugetlb_shm()) | 757 | if (!can_do_hugetlb_shm()) |
806 | return ERR_PTR(-EPERM); | 758 | return ERR_PTR(-EPERM); |
807 | 759 | ||
808 | if (!is_hugepage_mem_enough(size)) | ||
809 | return ERR_PTR(-ENOMEM); | ||
810 | |||
811 | if (!user_shm_lock(size, current->user)) | 760 | if (!user_shm_lock(size, current->user)) |
812 | return ERR_PTR(-ENOMEM); | 761 | return ERR_PTR(-ENOMEM); |
813 | 762 | ||
814 | root = hugetlbfs_vfsmount->mnt_root; | 763 | root = hugetlbfs_vfsmount->mnt_root; |
815 | snprintf(buf, 16, "%lu", hugetlbfs_counter()); | 764 | snprintf(buf, 16, "%u", atomic_inc_return(&counter)); |
816 | quick_string.name = buf; | 765 | quick_string.name = buf; |
817 | quick_string.len = strlen(quick_string.name); | 766 | quick_string.len = strlen(quick_string.name); |
818 | quick_string.hash = 0; | 767 | quick_string.hash = 0; |
@@ -831,6 +780,11 @@ struct file *hugetlb_zero_setup(size_t size) | |||
831 | if (!inode) | 780 | if (!inode) |
832 | goto out_file; | 781 | goto out_file; |
833 | 782 | ||
783 | error = -ENOMEM; | ||
784 | if (hugetlb_extend_reservation(HUGETLBFS_I(inode), | ||
785 | size >> HPAGE_SHIFT) != 0) | ||
786 | goto out_inode; | ||
787 | |||
834 | d_instantiate(dentry, inode); | 788 | d_instantiate(dentry, inode); |
835 | inode->i_size = size; | 789 | inode->i_size = size; |
836 | inode->i_nlink = 0; | 790 | inode->i_nlink = 0; |
@@ -841,6 +795,8 @@ struct file *hugetlb_zero_setup(size_t size) | |||
841 | file->f_mode = FMODE_WRITE | FMODE_READ; | 795 | file->f_mode = FMODE_WRITE | FMODE_READ; |
842 | return file; | 796 | return file; |
843 | 797 | ||
798 | out_inode: | ||
799 | iput(inode); | ||
844 | out_file: | 800 | out_file: |
845 | put_filp(file); | 801 | put_filp(file); |
846 | out_dentry: | 802 | out_dentry: |
diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c index 8dd3aafec499..09e1c57a86a0 100644 --- a/fs/ocfs2/super.c +++ b/fs/ocfs2/super.c | |||
@@ -959,7 +959,7 @@ static int ocfs2_initialize_mem_caches(void) | |||
959 | ocfs2_lock_cache = kmem_cache_create("ocfs2_lock", | 959 | ocfs2_lock_cache = kmem_cache_create("ocfs2_lock", |
960 | sizeof(struct ocfs2_journal_lock), | 960 | sizeof(struct ocfs2_journal_lock), |
961 | 0, | 961 | 0, |
962 | SLAB_NO_REAP|SLAB_HWCACHE_ALIGN, | 962 | SLAB_HWCACHE_ALIGN, |
963 | NULL, NULL); | 963 | NULL, NULL); |
964 | if (!ocfs2_lock_cache) | 964 | if (!ocfs2_lock_cache) |
965 | return -ENOMEM; | 965 | return -ENOMEM; |
diff --git a/fs/ramfs/file-nommu.c b/fs/ramfs/file-nommu.c index 3f810acd0bfa..b1ca234068f6 100644 --- a/fs/ramfs/file-nommu.c +++ b/fs/ramfs/file-nommu.c | |||
@@ -87,8 +87,7 @@ static int ramfs_nommu_expand_for_mapping(struct inode *inode, size_t newsize) | |||
87 | xpages = 1UL << order; | 87 | xpages = 1UL << order; |
88 | npages = (newsize + PAGE_SIZE - 1) >> PAGE_SHIFT; | 88 | npages = (newsize + PAGE_SIZE - 1) >> PAGE_SHIFT; |
89 | 89 | ||
90 | for (loop = 0; loop < npages; loop++) | 90 | split_page(pages, order); |
91 | set_page_count(pages + loop, 1); | ||
92 | 91 | ||
93 | /* trim off any pages we don't actually require */ | 92 | /* trim off any pages we don't actually require */ |
94 | for (loop = npages; loop < xpages; loop++) | 93 | for (loop = npages; loop < xpages; loop++) |
diff --git a/fs/xfs/linux-2.6/xfs_buf.c b/fs/xfs/linux-2.6/xfs_buf.c index bfb4f2917bb6..8cdfa4151659 100644 --- a/fs/xfs/linux-2.6/xfs_buf.c +++ b/fs/xfs/linux-2.6/xfs_buf.c | |||
@@ -29,6 +29,7 @@ | |||
29 | #include <linux/blkdev.h> | 29 | #include <linux/blkdev.h> |
30 | #include <linux/hash.h> | 30 | #include <linux/hash.h> |
31 | #include <linux/kthread.h> | 31 | #include <linux/kthread.h> |
32 | #include <linux/migrate.h> | ||
32 | #include "xfs_linux.h" | 33 | #include "xfs_linux.h" |
33 | 34 | ||
34 | STATIC kmem_zone_t *xfs_buf_zone; | 35 | STATIC kmem_zone_t *xfs_buf_zone; |
diff --git a/include/asm-i386/acpi.h b/include/asm-i386/acpi.h index 55059abf9c95..20f523954218 100644 --- a/include/asm-i386/acpi.h +++ b/include/asm-i386/acpi.h | |||
@@ -103,6 +103,12 @@ __acpi_release_global_lock (unsigned int *lock) | |||
103 | :"=r"(n_hi), "=r"(n_lo) \ | 103 | :"=r"(n_hi), "=r"(n_lo) \ |
104 | :"0"(n_hi), "1"(n_lo)) | 104 | :"0"(n_hi), "1"(n_lo)) |
105 | 105 | ||
106 | #ifdef CONFIG_X86_IO_APIC | ||
107 | extern void check_acpi_pci(void); | ||
108 | #else | ||
109 | static inline void check_acpi_pci(void) { } | ||
110 | #endif | ||
111 | |||
106 | #ifdef CONFIG_ACPI | 112 | #ifdef CONFIG_ACPI |
107 | extern int acpi_lapic; | 113 | extern int acpi_lapic; |
108 | extern int acpi_ioapic; | 114 | extern int acpi_ioapic; |
@@ -128,8 +134,6 @@ extern int acpi_gsi_to_irq(u32 gsi, unsigned int *irq); | |||
128 | extern int skip_ioapic_setup; | 134 | extern int skip_ioapic_setup; |
129 | extern int acpi_skip_timer_override; | 135 | extern int acpi_skip_timer_override; |
130 | 136 | ||
131 | extern void check_acpi_pci(void); | ||
132 | |||
133 | static inline void disable_ioapic_setup(void) | 137 | static inline void disable_ioapic_setup(void) |
134 | { | 138 | { |
135 | skip_ioapic_setup = 1; | 139 | skip_ioapic_setup = 1; |
@@ -142,8 +146,6 @@ static inline int ioapic_setup_disabled(void) | |||
142 | 146 | ||
143 | #else | 147 | #else |
144 | static inline void disable_ioapic_setup(void) { } | 148 | static inline void disable_ioapic_setup(void) { } |
145 | static inline void check_acpi_pci(void) { } | ||
146 | |||
147 | #endif | 149 | #endif |
148 | 150 | ||
149 | static inline void acpi_noirq_set(void) { acpi_noirq = 1; } | 151 | static inline void acpi_noirq_set(void) { acpi_noirq = 1; } |
diff --git a/include/asm-i386/pgtable.h b/include/asm-i386/pgtable.h index 088a945bf26b..ee056c41a9fb 100644 --- a/include/asm-i386/pgtable.h +++ b/include/asm-i386/pgtable.h | |||
@@ -219,13 +219,12 @@ extern unsigned long pg0[]; | |||
219 | * The following only work if pte_present() is true. | 219 | * The following only work if pte_present() is true. |
220 | * Undefined behaviour if not.. | 220 | * Undefined behaviour if not.. |
221 | */ | 221 | */ |
222 | #define __LARGE_PTE (_PAGE_PSE | _PAGE_PRESENT) | ||
223 | static inline int pte_user(pte_t pte) { return (pte).pte_low & _PAGE_USER; } | 222 | static inline int pte_user(pte_t pte) { return (pte).pte_low & _PAGE_USER; } |
224 | static inline int pte_read(pte_t pte) { return (pte).pte_low & _PAGE_USER; } | 223 | static inline int pte_read(pte_t pte) { return (pte).pte_low & _PAGE_USER; } |
225 | static inline int pte_dirty(pte_t pte) { return (pte).pte_low & _PAGE_DIRTY; } | 224 | static inline int pte_dirty(pte_t pte) { return (pte).pte_low & _PAGE_DIRTY; } |
226 | static inline int pte_young(pte_t pte) { return (pte).pte_low & _PAGE_ACCESSED; } | 225 | static inline int pte_young(pte_t pte) { return (pte).pte_low & _PAGE_ACCESSED; } |
227 | static inline int pte_write(pte_t pte) { return (pte).pte_low & _PAGE_RW; } | 226 | static inline int pte_write(pte_t pte) { return (pte).pte_low & _PAGE_RW; } |
228 | static inline int pte_huge(pte_t pte) { return ((pte).pte_low & __LARGE_PTE) == __LARGE_PTE; } | 227 | static inline int pte_huge(pte_t pte) { return (pte).pte_low & _PAGE_PSE; } |
229 | 228 | ||
230 | /* | 229 | /* |
231 | * The following only works if pte_present() is not true. | 230 | * The following only works if pte_present() is not true. |
@@ -242,7 +241,7 @@ static inline pte_t pte_mkexec(pte_t pte) { (pte).pte_low |= _PAGE_USER; return | |||
242 | static inline pte_t pte_mkdirty(pte_t pte) { (pte).pte_low |= _PAGE_DIRTY; return pte; } | 241 | static inline pte_t pte_mkdirty(pte_t pte) { (pte).pte_low |= _PAGE_DIRTY; return pte; } |
243 | static inline pte_t pte_mkyoung(pte_t pte) { (pte).pte_low |= _PAGE_ACCESSED; return pte; } | 242 | static inline pte_t pte_mkyoung(pte_t pte) { (pte).pte_low |= _PAGE_ACCESSED; return pte; } |
244 | static inline pte_t pte_mkwrite(pte_t pte) { (pte).pte_low |= _PAGE_RW; return pte; } | 243 | static inline pte_t pte_mkwrite(pte_t pte) { (pte).pte_low |= _PAGE_RW; return pte; } |
245 | static inline pte_t pte_mkhuge(pte_t pte) { (pte).pte_low |= __LARGE_PTE; return pte; } | 244 | static inline pte_t pte_mkhuge(pte_t pte) { (pte).pte_low |= _PAGE_PSE; return pte; } |
246 | 245 | ||
247 | #ifdef CONFIG_X86_PAE | 246 | #ifdef CONFIG_X86_PAE |
248 | # include <asm/pgtable-3level.h> | 247 | # include <asm/pgtable-3level.h> |
diff --git a/include/asm-ia64/intel_intrin.h b/include/asm-ia64/intel_intrin.h index a7122d850177..d069b6acddce 100644 --- a/include/asm-ia64/intel_intrin.h +++ b/include/asm-ia64/intel_intrin.h | |||
@@ -5,113 +5,10 @@ | |||
5 | * | 5 | * |
6 | * Copyright (C) 2002,2003 Jun Nakajima <jun.nakajima@intel.com> | 6 | * Copyright (C) 2002,2003 Jun Nakajima <jun.nakajima@intel.com> |
7 | * Copyright (C) 2002,2003 Suresh Siddha <suresh.b.siddha@intel.com> | 7 | * Copyright (C) 2002,2003 Suresh Siddha <suresh.b.siddha@intel.com> |
8 | * Copyright (C) 2005,2006 Hongjiu Lu <hongjiu.lu@intel.com> | ||
8 | * | 9 | * |
9 | */ | 10 | */ |
10 | #include <asm/types.h> | 11 | #include <ia64intrin.h> |
11 | |||
12 | void __lfetch(int lfhint, void *y); | ||
13 | void __lfetch_excl(int lfhint, void *y); | ||
14 | void __lfetch_fault(int lfhint, void *y); | ||
15 | void __lfetch_fault_excl(int lfhint, void *y); | ||
16 | |||
17 | /* In the following, whichFloatReg should be an integer from 0-127 */ | ||
18 | void __ldfs(const int whichFloatReg, void *src); | ||
19 | void __ldfd(const int whichFloatReg, void *src); | ||
20 | void __ldfe(const int whichFloatReg, void *src); | ||
21 | void __ldf8(const int whichFloatReg, void *src); | ||
22 | void __ldf_fill(const int whichFloatReg, void *src); | ||
23 | void __stfs(void *dst, const int whichFloatReg); | ||
24 | void __stfd(void *dst, const int whichFloatReg); | ||
25 | void __stfe(void *dst, const int whichFloatReg); | ||
26 | void __stf8(void *dst, const int whichFloatReg); | ||
27 | void __stf_spill(void *dst, const int whichFloatReg); | ||
28 | |||
29 | void __st1_rel(void *dst, const __s8 value); | ||
30 | void __st2_rel(void *dst, const __s16 value); | ||
31 | void __st4_rel(void *dst, const __s32 value); | ||
32 | void __st8_rel(void *dst, const __s64 value); | ||
33 | __u8 __ld1_acq(void *src); | ||
34 | __u16 __ld2_acq(void *src); | ||
35 | __u32 __ld4_acq(void *src); | ||
36 | __u64 __ld8_acq(void *src); | ||
37 | |||
38 | __u64 __fetchadd4_acq(__u32 *addend, const int increment); | ||
39 | __u64 __fetchadd4_rel(__u32 *addend, const int increment); | ||
40 | __u64 __fetchadd8_acq(__u64 *addend, const int increment); | ||
41 | __u64 __fetchadd8_rel(__u64 *addend, const int increment); | ||
42 | |||
43 | __u64 __getf_exp(double d); | ||
44 | |||
45 | /* OS Related Itanium(R) Intrinsics */ | ||
46 | |||
47 | /* The names to use for whichReg and whichIndReg below come from | ||
48 | the include file asm/ia64regs.h */ | ||
49 | |||
50 | __u64 __getIndReg(const int whichIndReg, __s64 index); | ||
51 | __u64 __getReg(const int whichReg); | ||
52 | |||
53 | void __setIndReg(const int whichIndReg, __s64 index, __u64 value); | ||
54 | void __setReg(const int whichReg, __u64 value); | ||
55 | |||
56 | void __mf(void); | ||
57 | void __mfa(void); | ||
58 | void __synci(void); | ||
59 | void __itcd(__s64 pa); | ||
60 | void __itci(__s64 pa); | ||
61 | void __itrd(__s64 whichTransReg, __s64 pa); | ||
62 | void __itri(__s64 whichTransReg, __s64 pa); | ||
63 | void __ptce(__s64 va); | ||
64 | void __ptcl(__s64 va, __s64 pagesz); | ||
65 | void __ptcg(__s64 va, __s64 pagesz); | ||
66 | void __ptcga(__s64 va, __s64 pagesz); | ||
67 | void __ptri(__s64 va, __s64 pagesz); | ||
68 | void __ptrd(__s64 va, __s64 pagesz); | ||
69 | void __invala (void); | ||
70 | void __invala_gr(const int whichGeneralReg /* 0-127 */ ); | ||
71 | void __invala_fr(const int whichFloatReg /* 0-127 */ ); | ||
72 | void __nop(const int); | ||
73 | void __fc(__u64 *addr); | ||
74 | void __sum(int mask); | ||
75 | void __rum(int mask); | ||
76 | void __ssm(int mask); | ||
77 | void __rsm(int mask); | ||
78 | __u64 __thash(__s64); | ||
79 | __u64 __ttag(__s64); | ||
80 | __s64 __tpa(__s64); | ||
81 | |||
82 | /* Intrinsics for implementing get/put_user macros */ | ||
83 | void __st_user(const char *tableName, __u64 addr, char size, char relocType, __u64 val); | ||
84 | void __ld_user(const char *tableName, __u64 addr, char size, char relocType); | ||
85 | |||
86 | /* This intrinsic does not generate code, it creates a barrier across which | ||
87 | * the compiler will not schedule data access instructions. | ||
88 | */ | ||
89 | void __memory_barrier(void); | ||
90 | |||
91 | void __isrlz(void); | ||
92 | void __dsrlz(void); | ||
93 | |||
94 | __u64 _m64_mux1(__u64 a, const int n); | ||
95 | __u64 __thash(__u64); | ||
96 | |||
97 | /* Lock and Atomic Operation Related Intrinsics */ | ||
98 | __u64 _InterlockedExchange8(volatile __u8 *trgt, __u8 value); | ||
99 | __u64 _InterlockedExchange16(volatile __u16 *trgt, __u16 value); | ||
100 | __s64 _InterlockedExchange(volatile __u32 *trgt, __u32 value); | ||
101 | __s64 _InterlockedExchange64(volatile __u64 *trgt, __u64 value); | ||
102 | |||
103 | __u64 _InterlockedCompareExchange8_rel(volatile __u8 *dest, __u64 xchg, __u64 comp); | ||
104 | __u64 _InterlockedCompareExchange8_acq(volatile __u8 *dest, __u64 xchg, __u64 comp); | ||
105 | __u64 _InterlockedCompareExchange16_rel(volatile __u16 *dest, __u64 xchg, __u64 comp); | ||
106 | __u64 _InterlockedCompareExchange16_acq(volatile __u16 *dest, __u64 xchg, __u64 comp); | ||
107 | __u64 _InterlockedCompareExchange_rel(volatile __u32 *dest, __u64 xchg, __u64 comp); | ||
108 | __u64 _InterlockedCompareExchange_acq(volatile __u32 *dest, __u64 xchg, __u64 comp); | ||
109 | __u64 _InterlockedCompareExchange64_rel(volatile __u64 *dest, __u64 xchg, __u64 comp); | ||
110 | __u64 _InterlockedCompareExchange64_acq(volatile __u64 *dest, __u64 xchg, __u64 comp); | ||
111 | |||
112 | __s64 _m64_dep_mi(const int v, __s64 s, const int p, const int len); | ||
113 | __s64 _m64_shrp(__s64 a, __s64 b, const int count); | ||
114 | __s64 _m64_popcnt(__s64 a); | ||
115 | 12 | ||
116 | #define ia64_barrier() __memory_barrier() | 13 | #define ia64_barrier() __memory_barrier() |
117 | 14 | ||
@@ -122,15 +19,16 @@ __s64 _m64_popcnt(__s64 a); | |||
122 | #define ia64_getreg __getReg | 19 | #define ia64_getreg __getReg |
123 | #define ia64_setreg __setReg | 20 | #define ia64_setreg __setReg |
124 | 21 | ||
125 | #define ia64_hint(x) | 22 | #define ia64_hint __hint |
23 | #define ia64_hint_pause __hint_pause | ||
126 | 24 | ||
127 | #define ia64_mux1_brcst 0 | 25 | #define ia64_mux1_brcst _m64_mux1_brcst |
128 | #define ia64_mux1_mix 8 | 26 | #define ia64_mux1_mix _m64_mux1_mix |
129 | #define ia64_mux1_shuf 9 | 27 | #define ia64_mux1_shuf _m64_mux1_shuf |
130 | #define ia64_mux1_alt 10 | 28 | #define ia64_mux1_alt _m64_mux1_alt |
131 | #define ia64_mux1_rev 11 | 29 | #define ia64_mux1_rev _m64_mux1_rev |
132 | 30 | ||
133 | #define ia64_mux1 _m64_mux1 | 31 | #define ia64_mux1(x,v) _m_to_int64(_m64_mux1(_m_from_int64(x), (v))) |
134 | #define ia64_popcnt _m64_popcnt | 32 | #define ia64_popcnt _m64_popcnt |
135 | #define ia64_getf_exp __getf_exp | 33 | #define ia64_getf_exp __getf_exp |
136 | #define ia64_shrp _m64_shrp | 34 | #define ia64_shrp _m64_shrp |
@@ -158,7 +56,7 @@ __s64 _m64_popcnt(__s64 a); | |||
158 | #define ia64_stf8 __stf8 | 56 | #define ia64_stf8 __stf8 |
159 | #define ia64_stf_spill __stf_spill | 57 | #define ia64_stf_spill __stf_spill |
160 | 58 | ||
161 | #define ia64_mf __mf | 59 | #define ia64_mf __mf |
162 | #define ia64_mfa __mfa | 60 | #define ia64_mfa __mfa |
163 | 61 | ||
164 | #define ia64_fetchadd4_acq __fetchadd4_acq | 62 | #define ia64_fetchadd4_acq __fetchadd4_acq |
@@ -234,10 +132,10 @@ __s64 _m64_popcnt(__s64 a); | |||
234 | 132 | ||
235 | /* Values for lfhint in __lfetch and __lfetch_fault */ | 133 | /* Values for lfhint in __lfetch and __lfetch_fault */ |
236 | 134 | ||
237 | #define ia64_lfhint_none 0 | 135 | #define ia64_lfhint_none __lfhint_none |
238 | #define ia64_lfhint_nt1 1 | 136 | #define ia64_lfhint_nt1 __lfhint_nt1 |
239 | #define ia64_lfhint_nt2 2 | 137 | #define ia64_lfhint_nt2 __lfhint_nt2 |
240 | #define ia64_lfhint_nta 3 | 138 | #define ia64_lfhint_nta __lfhint_nta |
241 | 139 | ||
242 | #define ia64_lfetch __lfetch | 140 | #define ia64_lfetch __lfetch |
243 | #define ia64_lfetch_excl __lfetch_excl | 141 | #define ia64_lfetch_excl __lfetch_excl |
@@ -254,4 +152,6 @@ do { \ | |||
254 | } \ | 152 | } \ |
255 | } while (0) | 153 | } while (0) |
256 | 154 | ||
155 | #define __builtin_trap() __break(0); | ||
156 | |||
257 | #endif /* _ASM_IA64_INTEL_INTRIN_H */ | 157 | #endif /* _ASM_IA64_INTEL_INTRIN_H */ |
diff --git a/include/asm-ia64/machvec.h b/include/asm-ia64/machvec.h index ca5ea994d688..c3e4ed8a3e17 100644 --- a/include/asm-ia64/machvec.h +++ b/include/asm-ia64/machvec.h | |||
@@ -20,6 +20,7 @@ struct scatterlist; | |||
20 | struct page; | 20 | struct page; |
21 | struct mm_struct; | 21 | struct mm_struct; |
22 | struct pci_bus; | 22 | struct pci_bus; |
23 | struct task_struct; | ||
23 | 24 | ||
24 | typedef void ia64_mv_setup_t (char **); | 25 | typedef void ia64_mv_setup_t (char **); |
25 | typedef void ia64_mv_cpu_init_t (void); | 26 | typedef void ia64_mv_cpu_init_t (void); |
@@ -34,6 +35,7 @@ typedef int ia64_mv_pci_legacy_read_t (struct pci_bus *, u16 port, u32 *val, | |||
34 | u8 size); | 35 | u8 size); |
35 | typedef int ia64_mv_pci_legacy_write_t (struct pci_bus *, u16 port, u32 val, | 36 | typedef int ia64_mv_pci_legacy_write_t (struct pci_bus *, u16 port, u32 val, |
36 | u8 size); | 37 | u8 size); |
38 | typedef void ia64_mv_migrate_t(struct task_struct * task); | ||
37 | 39 | ||
38 | /* DMA-mapping interface: */ | 40 | /* DMA-mapping interface: */ |
39 | typedef void ia64_mv_dma_init (void); | 41 | typedef void ia64_mv_dma_init (void); |
@@ -85,6 +87,11 @@ machvec_noop_mm (struct mm_struct *mm) | |||
85 | { | 87 | { |
86 | } | 88 | } |
87 | 89 | ||
90 | static inline void | ||
91 | machvec_noop_task (struct task_struct *task) | ||
92 | { | ||
93 | } | ||
94 | |||
88 | extern void machvec_setup (char **); | 95 | extern void machvec_setup (char **); |
89 | extern void machvec_timer_interrupt (int, void *, struct pt_regs *); | 96 | extern void machvec_timer_interrupt (int, void *, struct pt_regs *); |
90 | extern void machvec_dma_sync_single (struct device *, dma_addr_t, size_t, int); | 97 | extern void machvec_dma_sync_single (struct device *, dma_addr_t, size_t, int); |
@@ -146,6 +153,7 @@ extern void machvec_tlb_migrate_finish (struct mm_struct *); | |||
146 | # define platform_readw_relaxed ia64_mv.readw_relaxed | 153 | # define platform_readw_relaxed ia64_mv.readw_relaxed |
147 | # define platform_readl_relaxed ia64_mv.readl_relaxed | 154 | # define platform_readl_relaxed ia64_mv.readl_relaxed |
148 | # define platform_readq_relaxed ia64_mv.readq_relaxed | 155 | # define platform_readq_relaxed ia64_mv.readq_relaxed |
156 | # define platform_migrate ia64_mv.migrate | ||
149 | # endif | 157 | # endif |
150 | 158 | ||
151 | /* __attribute__((__aligned__(16))) is required to make size of the | 159 | /* __attribute__((__aligned__(16))) is required to make size of the |
@@ -194,6 +202,7 @@ struct ia64_machine_vector { | |||
194 | ia64_mv_readw_relaxed_t *readw_relaxed; | 202 | ia64_mv_readw_relaxed_t *readw_relaxed; |
195 | ia64_mv_readl_relaxed_t *readl_relaxed; | 203 | ia64_mv_readl_relaxed_t *readl_relaxed; |
196 | ia64_mv_readq_relaxed_t *readq_relaxed; | 204 | ia64_mv_readq_relaxed_t *readq_relaxed; |
205 | ia64_mv_migrate_t *migrate; | ||
197 | } __attribute__((__aligned__(16))); /* align attrib? see above comment */ | 206 | } __attribute__((__aligned__(16))); /* align attrib? see above comment */ |
198 | 207 | ||
199 | #define MACHVEC_INIT(name) \ | 208 | #define MACHVEC_INIT(name) \ |
@@ -238,6 +247,7 @@ struct ia64_machine_vector { | |||
238 | platform_readw_relaxed, \ | 247 | platform_readw_relaxed, \ |
239 | platform_readl_relaxed, \ | 248 | platform_readl_relaxed, \ |
240 | platform_readq_relaxed, \ | 249 | platform_readq_relaxed, \ |
250 | platform_migrate, \ | ||
241 | } | 251 | } |
242 | 252 | ||
243 | extern struct ia64_machine_vector ia64_mv; | 253 | extern struct ia64_machine_vector ia64_mv; |
@@ -386,5 +396,8 @@ extern ia64_mv_dma_supported swiotlb_dma_supported; | |||
386 | #ifndef platform_readq_relaxed | 396 | #ifndef platform_readq_relaxed |
387 | # define platform_readq_relaxed __ia64_readq_relaxed | 397 | # define platform_readq_relaxed __ia64_readq_relaxed |
388 | #endif | 398 | #endif |
399 | #ifndef platform_migrate | ||
400 | # define platform_migrate machvec_noop_task | ||
401 | #endif | ||
389 | 402 | ||
390 | #endif /* _ASM_IA64_MACHVEC_H */ | 403 | #endif /* _ASM_IA64_MACHVEC_H */ |
diff --git a/include/asm-ia64/machvec_sn2.h b/include/asm-ia64/machvec_sn2.h index 03d00faf03b5..da1d43755afe 100644 --- a/include/asm-ia64/machvec_sn2.h +++ b/include/asm-ia64/machvec_sn2.h | |||
@@ -1,5 +1,5 @@ | |||
1 | /* | 1 | /* |
2 | * Copyright (c) 2002-2003, 2006 Silicon Graphics, Inc. All Rights Reserved. | 2 | * Copyright (c) 2002-2003,2006 Silicon Graphics, Inc. All Rights Reserved. |
3 | * | 3 | * |
4 | * This program is free software; you can redistribute it and/or modify it | 4 | * This program is free software; you can redistribute it and/or modify it |
5 | * under the terms of version 2 of the GNU General Public License | 5 | * under the terms of version 2 of the GNU General Public License |
@@ -66,6 +66,7 @@ extern ia64_mv_dma_sync_single_for_device sn_dma_sync_single_for_device; | |||
66 | extern ia64_mv_dma_sync_sg_for_device sn_dma_sync_sg_for_device; | 66 | extern ia64_mv_dma_sync_sg_for_device sn_dma_sync_sg_for_device; |
67 | extern ia64_mv_dma_mapping_error sn_dma_mapping_error; | 67 | extern ia64_mv_dma_mapping_error sn_dma_mapping_error; |
68 | extern ia64_mv_dma_supported sn_dma_supported; | 68 | extern ia64_mv_dma_supported sn_dma_supported; |
69 | extern ia64_mv_migrate_t sn_migrate; | ||
69 | 70 | ||
70 | /* | 71 | /* |
71 | * This stuff has dual use! | 72 | * This stuff has dual use! |
@@ -115,6 +116,7 @@ extern ia64_mv_dma_supported sn_dma_supported; | |||
115 | #define platform_dma_sync_sg_for_device sn_dma_sync_sg_for_device | 116 | #define platform_dma_sync_sg_for_device sn_dma_sync_sg_for_device |
116 | #define platform_dma_mapping_error sn_dma_mapping_error | 117 | #define platform_dma_mapping_error sn_dma_mapping_error |
117 | #define platform_dma_supported sn_dma_supported | 118 | #define platform_dma_supported sn_dma_supported |
119 | #define platform_migrate sn_migrate | ||
118 | 120 | ||
119 | #include <asm/sn/io.h> | 121 | #include <asm/sn/io.h> |
120 | 122 | ||
diff --git a/include/asm-ia64/mca.h b/include/asm-ia64/mca.h index c7d9c9ed38ba..bfbbb8da79c7 100644 --- a/include/asm-ia64/mca.h +++ b/include/asm-ia64/mca.h | |||
@@ -131,6 +131,8 @@ struct ia64_mca_cpu { | |||
131 | /* Array of physical addresses of each CPU's MCA area. */ | 131 | /* Array of physical addresses of each CPU's MCA area. */ |
132 | extern unsigned long __per_cpu_mca[NR_CPUS]; | 132 | extern unsigned long __per_cpu_mca[NR_CPUS]; |
133 | 133 | ||
134 | extern int cpe_vector; | ||
135 | extern int ia64_cpe_irq; | ||
134 | extern void ia64_mca_init(void); | 136 | extern void ia64_mca_init(void); |
135 | extern void ia64_mca_cpu_init(void *); | 137 | extern void ia64_mca_cpu_init(void *); |
136 | extern void ia64_os_mca_dispatch(void); | 138 | extern void ia64_os_mca_dispatch(void); |
diff --git a/include/asm-ia64/mutex.h b/include/asm-ia64/mutex.h index 458c1f7fbc18..5a3224f6af38 100644 --- a/include/asm-ia64/mutex.h +++ b/include/asm-ia64/mutex.h | |||
@@ -1,9 +1,92 @@ | |||
1 | /* | 1 | /* |
2 | * Pull in the generic implementation for the mutex fastpath. | 2 | * ia64 implementation of the mutex fastpath. |
3 | * | 3 | * |
4 | * TODO: implement optimized primitives instead, or leave the generic | 4 | * Copyright (C) 2006 Ken Chen <kenneth.w.chen@intel.com> |
5 | * implementation in place, or pick the atomic_xchg() based generic | 5 | * |
6 | * implementation. (see asm-generic/mutex-xchg.h for details) | 6 | */ |
7 | |||
8 | #ifndef _ASM_MUTEX_H | ||
9 | #define _ASM_MUTEX_H | ||
10 | |||
11 | /** | ||
12 | * __mutex_fastpath_lock - try to take the lock by moving the count | ||
13 | * from 1 to a 0 value | ||
14 | * @count: pointer of type atomic_t | ||
15 | * @fail_fn: function to call if the original value was not 1 | ||
16 | * | ||
17 | * Change the count from 1 to a value lower than 1, and call <fail_fn> if | ||
18 | * it wasn't 1 originally. This function MUST leave the value lower than | ||
19 | * 1 even when the "1" assertion wasn't true. | ||
20 | */ | ||
21 | static inline void | ||
22 | __mutex_fastpath_lock(atomic_t *count, void (*fail_fn)(atomic_t *)) | ||
23 | { | ||
24 | if (unlikely(ia64_fetchadd4_acq(count, -1) != 1)) | ||
25 | fail_fn(count); | ||
26 | } | ||
27 | |||
28 | /** | ||
29 | * __mutex_fastpath_lock_retval - try to take the lock by moving the count | ||
30 | * from 1 to a 0 value | ||
31 | * @count: pointer of type atomic_t | ||
32 | * @fail_fn: function to call if the original value was not 1 | ||
33 | * | ||
34 | * Change the count from 1 to a value lower than 1, and call <fail_fn> if | ||
35 | * it wasn't 1 originally. This function returns 0 if the fastpath succeeds, | ||
36 | * or anything the slow path function returns. | ||
37 | */ | ||
38 | static inline int | ||
39 | __mutex_fastpath_lock_retval(atomic_t *count, int (*fail_fn)(atomic_t *)) | ||
40 | { | ||
41 | if (unlikely(ia64_fetchadd4_acq(count, -1) != 1)) | ||
42 | return fail_fn(count); | ||
43 | return 0; | ||
44 | } | ||
45 | |||
46 | /** | ||
47 | * __mutex_fastpath_unlock - try to promote the count from 0 to 1 | ||
48 | * @count: pointer of type atomic_t | ||
49 | * @fail_fn: function to call if the original value was not 0 | ||
50 | * | ||
51 | * Try to promote the count from 0 to 1. If it wasn't 0, call <fail_fn>. | ||
52 | * In the failure case, this function is allowed to either set the value to | ||
53 | * 1, or to set it to a value lower than 1. | ||
54 | * | ||
55 | * If the implementation sets it to a value of lower than 1, then the | ||
56 | * __mutex_slowpath_needs_to_unlock() macro needs to return 1, it needs | ||
57 | * to return 0 otherwise. | ||
58 | */ | ||
59 | static inline void | ||
60 | __mutex_fastpath_unlock(atomic_t *count, void (*fail_fn)(atomic_t *)) | ||
61 | { | ||
62 | int ret = ia64_fetchadd4_rel(count, 1); | ||
63 | if (unlikely(ret < 0)) | ||
64 | fail_fn(count); | ||
65 | } | ||
66 | |||
67 | #define __mutex_slowpath_needs_to_unlock() 1 | ||
68 | |||
69 | /** | ||
70 | * __mutex_fastpath_trylock - try to acquire the mutex, without waiting | ||
71 | * | ||
72 | * @count: pointer of type atomic_t | ||
73 | * @fail_fn: fallback function | ||
74 | * | ||
75 | * Change the count from 1 to a value lower than 1, and return 0 (failure) | ||
76 | * if it wasn't 1 originally, or return 1 (success) otherwise. This function | ||
77 | * MUST leave the value lower than 1 even when the "1" assertion wasn't true. | ||
78 | * Additionally, if the value was < 0 originally, this function must not leave | ||
79 | * it to 0 on failure. | ||
80 | * | ||
81 | * If the architecture has no effective trylock variant, it should call the | ||
82 | * <fail_fn> spinlock-based trylock variant unconditionally. | ||
7 | */ | 83 | */ |
84 | static inline int | ||
85 | __mutex_fastpath_trylock(atomic_t *count, int (*fail_fn)(atomic_t *)) | ||
86 | { | ||
87 | if (likely(cmpxchg_acq(count, 1, 0)) == 1) | ||
88 | return 1; | ||
89 | return 0; | ||
90 | } | ||
8 | 91 | ||
9 | #include <asm-generic/mutex-dec.h> | 92 | #endif |
diff --git a/include/asm-ia64/page.h b/include/asm-ia64/page.h index 5e6362a786b7..3ab27333dae4 100644 --- a/include/asm-ia64/page.h +++ b/include/asm-ia64/page.h | |||
@@ -57,6 +57,8 @@ | |||
57 | 57 | ||
58 | # define HAVE_ARCH_HUGETLB_UNMAPPED_AREA | 58 | # define HAVE_ARCH_HUGETLB_UNMAPPED_AREA |
59 | # define ARCH_HAS_HUGEPAGE_ONLY_RANGE | 59 | # define ARCH_HAS_HUGEPAGE_ONLY_RANGE |
60 | # define ARCH_HAS_PREPARE_HUGEPAGE_RANGE | ||
61 | # define ARCH_HAS_HUGETLB_FREE_PGD_RANGE | ||
60 | #endif /* CONFIG_HUGETLB_PAGE */ | 62 | #endif /* CONFIG_HUGETLB_PAGE */ |
61 | 63 | ||
62 | #ifdef __ASSEMBLY__ | 64 | #ifdef __ASSEMBLY__ |
diff --git a/include/asm-ia64/pgtable.h b/include/asm-ia64/pgtable.h index e2560c58384b..c0f8144f2349 100644 --- a/include/asm-ia64/pgtable.h +++ b/include/asm-ia64/pgtable.h | |||
@@ -314,7 +314,7 @@ ia64_phys_addr_valid (unsigned long addr) | |||
314 | #define pte_mkyoung(pte) (__pte(pte_val(pte) | _PAGE_A)) | 314 | #define pte_mkyoung(pte) (__pte(pte_val(pte) | _PAGE_A)) |
315 | #define pte_mkclean(pte) (__pte(pte_val(pte) & ~_PAGE_D)) | 315 | #define pte_mkclean(pte) (__pte(pte_val(pte) & ~_PAGE_D)) |
316 | #define pte_mkdirty(pte) (__pte(pte_val(pte) | _PAGE_D)) | 316 | #define pte_mkdirty(pte) (__pte(pte_val(pte) | _PAGE_D)) |
317 | #define pte_mkhuge(pte) (__pte(pte_val(pte) | _PAGE_P)) | 317 | #define pte_mkhuge(pte) (__pte(pte_val(pte))) |
318 | 318 | ||
319 | /* | 319 | /* |
320 | * Macro to a page protection value as "uncacheable". Note that "protection" is really a | 320 | * Macro to a page protection value as "uncacheable". Note that "protection" is really a |
@@ -505,9 +505,6 @@ extern struct page *zero_page_memmap_ptr; | |||
505 | #define HUGETLB_PGDIR_SHIFT (HPAGE_SHIFT + 2*(PAGE_SHIFT-3)) | 505 | #define HUGETLB_PGDIR_SHIFT (HPAGE_SHIFT + 2*(PAGE_SHIFT-3)) |
506 | #define HUGETLB_PGDIR_SIZE (__IA64_UL(1) << HUGETLB_PGDIR_SHIFT) | 506 | #define HUGETLB_PGDIR_SIZE (__IA64_UL(1) << HUGETLB_PGDIR_SHIFT) |
507 | #define HUGETLB_PGDIR_MASK (~(HUGETLB_PGDIR_SIZE-1)) | 507 | #define HUGETLB_PGDIR_MASK (~(HUGETLB_PGDIR_SIZE-1)) |
508 | struct mmu_gather; | ||
509 | void hugetlb_free_pgd_range(struct mmu_gather **tlb, unsigned long addr, | ||
510 | unsigned long end, unsigned long floor, unsigned long ceiling); | ||
511 | #endif | 508 | #endif |
512 | 509 | ||
513 | /* | 510 | /* |
diff --git a/include/asm-ia64/processor.h b/include/asm-ia64/processor.h index 23c8e1be1911..128fefd8056f 100644 --- a/include/asm-ia64/processor.h +++ b/include/asm-ia64/processor.h | |||
@@ -50,7 +50,8 @@ | |||
50 | #define IA64_THREAD_PM_VALID (__IA64_UL(1) << 2) /* performance registers valid? */ | 50 | #define IA64_THREAD_PM_VALID (__IA64_UL(1) << 2) /* performance registers valid? */ |
51 | #define IA64_THREAD_UAC_NOPRINT (__IA64_UL(1) << 3) /* don't log unaligned accesses */ | 51 | #define IA64_THREAD_UAC_NOPRINT (__IA64_UL(1) << 3) /* don't log unaligned accesses */ |
52 | #define IA64_THREAD_UAC_SIGBUS (__IA64_UL(1) << 4) /* generate SIGBUS on unaligned acc. */ | 52 | #define IA64_THREAD_UAC_SIGBUS (__IA64_UL(1) << 4) /* generate SIGBUS on unaligned acc. */ |
53 | /* bit 5 is currently unused */ | 53 | #define IA64_THREAD_MIGRATION (__IA64_UL(1) << 5) /* require migration |
54 | sync at ctx sw */ | ||
54 | #define IA64_THREAD_FPEMU_NOPRINT (__IA64_UL(1) << 6) /* don't log any fpswa faults */ | 55 | #define IA64_THREAD_FPEMU_NOPRINT (__IA64_UL(1) << 6) /* don't log any fpswa faults */ |
55 | #define IA64_THREAD_FPEMU_SIGFPE (__IA64_UL(1) << 7) /* send a SIGFPE for fpswa faults */ | 56 | #define IA64_THREAD_FPEMU_SIGFPE (__IA64_UL(1) << 7) /* send a SIGFPE for fpswa faults */ |
56 | 57 | ||
diff --git a/include/asm-ia64/signal.h b/include/asm-ia64/signal.h index 608168d713d3..5e328ed5d01d 100644 --- a/include/asm-ia64/signal.h +++ b/include/asm-ia64/signal.h | |||
@@ -158,8 +158,6 @@ struct k_sigaction { | |||
158 | 158 | ||
159 | #define ptrace_signal_deliver(regs, cookie) do { } while (0) | 159 | #define ptrace_signal_deliver(regs, cookie) do { } while (0) |
160 | 160 | ||
161 | void set_sigdelayed(pid_t pid, int signo, int code, void __user *addr); | ||
162 | |||
163 | #endif /* __KERNEL__ */ | 161 | #endif /* __KERNEL__ */ |
164 | 162 | ||
165 | # endif /* !__ASSEMBLY__ */ | 163 | # endif /* !__ASSEMBLY__ */ |
diff --git a/include/asm-ia64/sn/addrs.h b/include/asm-ia64/sn/addrs.h index 2c32e4b77b54..1d9efe541662 100644 --- a/include/asm-ia64/sn/addrs.h +++ b/include/asm-ia64/sn/addrs.h | |||
@@ -283,5 +283,13 @@ | |||
283 | #define REMOTE_HUB_L(n, a) HUB_L(REMOTE_HUB_ADDR((n), (a))) | 283 | #define REMOTE_HUB_L(n, a) HUB_L(REMOTE_HUB_ADDR((n), (a))) |
284 | #define REMOTE_HUB_S(n, a, d) HUB_S(REMOTE_HUB_ADDR((n), (a)), (d)) | 284 | #define REMOTE_HUB_S(n, a, d) HUB_S(REMOTE_HUB_ADDR((n), (a)), (d)) |
285 | 285 | ||
286 | /* | ||
287 | * Coretalk address breakdown | ||
288 | */ | ||
289 | #define CTALK_NASID_SHFT 40 | ||
290 | #define CTALK_NASID_MASK (0x3FFFULL << CTALK_NASID_SHFT) | ||
291 | #define CTALK_CID_SHFT 38 | ||
292 | #define CTALK_CID_MASK (0x3ULL << CTALK_CID_SHFT) | ||
293 | #define CTALK_NODE_OFFSET 0x3FFFFFFFFF | ||
286 | 294 | ||
287 | #endif /* _ASM_IA64_SN_ADDRS_H */ | 295 | #endif /* _ASM_IA64_SN_ADDRS_H */ |
diff --git a/include/asm-ia64/sn/rw_mmr.h b/include/asm-ia64/sn/rw_mmr.h index f40fd1a5510d..2d78f4c5a45e 100644 --- a/include/asm-ia64/sn/rw_mmr.h +++ b/include/asm-ia64/sn/rw_mmr.h | |||
@@ -3,15 +3,14 @@ | |||
3 | * License. See the file "COPYING" in the main directory of this archive | 3 | * License. See the file "COPYING" in the main directory of this archive |
4 | * for more details. | 4 | * for more details. |
5 | * | 5 | * |
6 | * Copyright (C) 2002-2004 Silicon Graphics, Inc. All Rights Reserved. | 6 | * Copyright (C) 2002-2006 Silicon Graphics, Inc. All Rights Reserved. |
7 | */ | 7 | */ |
8 | #ifndef _ASM_IA64_SN_RW_MMR_H | 8 | #ifndef _ASM_IA64_SN_RW_MMR_H |
9 | #define _ASM_IA64_SN_RW_MMR_H | 9 | #define _ASM_IA64_SN_RW_MMR_H |
10 | 10 | ||
11 | 11 | ||
12 | /* | 12 | /* |
13 | * This file contains macros used to access MMR registers via | 13 | * This file that access MMRs via uncached physical addresses. |
14 | * uncached physical addresses. | ||
15 | * pio_phys_read_mmr - read an MMR | 14 | * pio_phys_read_mmr - read an MMR |
16 | * pio_phys_write_mmr - write an MMR | 15 | * pio_phys_write_mmr - write an MMR |
17 | * pio_atomic_phys_write_mmrs - atomically write 1 or 2 MMRs with psr.ic=0 | 16 | * pio_atomic_phys_write_mmrs - atomically write 1 or 2 MMRs with psr.ic=0 |
@@ -22,53 +21,8 @@ | |||
22 | */ | 21 | */ |
23 | 22 | ||
24 | 23 | ||
25 | extern inline long | 24 | extern long pio_phys_read_mmr(volatile long *mmr); |
26 | pio_phys_read_mmr(volatile long *mmr) | 25 | extern void pio_phys_write_mmr(volatile long *mmr, long val); |
27 | { | 26 | extern void pio_atomic_phys_write_mmrs(volatile long *mmr1, long val1, volatile long *mmr2, long val2); |
28 | long val; | ||
29 | asm volatile | ||
30 | ("mov r2=psr;;" | ||
31 | "rsm psr.i | psr.dt;;" | ||
32 | "srlz.i;;" | ||
33 | "ld8.acq %0=[%1];;" | ||
34 | "mov psr.l=r2;;" | ||
35 | "srlz.i;;" | ||
36 | : "=r"(val) | ||
37 | : "r"(mmr) | ||
38 | : "r2"); | ||
39 | return val; | ||
40 | } | ||
41 | |||
42 | |||
43 | |||
44 | extern inline void | ||
45 | pio_phys_write_mmr(volatile long *mmr, long val) | ||
46 | { | ||
47 | asm volatile | ||
48 | ("mov r2=psr;;" | ||
49 | "rsm psr.i | psr.dt;;" | ||
50 | "srlz.i;;" | ||
51 | "st8.rel [%0]=%1;;" | ||
52 | "mov psr.l=r2;;" | ||
53 | "srlz.i;;" | ||
54 | :: "r"(mmr), "r"(val) | ||
55 | : "r2", "memory"); | ||
56 | } | ||
57 | |||
58 | extern inline void | ||
59 | pio_atomic_phys_write_mmrs(volatile long *mmr1, long val1, volatile long *mmr2, long val2) | ||
60 | { | ||
61 | asm volatile | ||
62 | ("mov r2=psr;;" | ||
63 | "rsm psr.i | psr.dt | psr.ic;;" | ||
64 | "cmp.ne p9,p0=%2,r0;" | ||
65 | "srlz.i;;" | ||
66 | "st8.rel [%0]=%1;" | ||
67 | "(p9) st8.rel [%2]=%3;;" | ||
68 | "mov psr.l=r2;;" | ||
69 | "srlz.i;;" | ||
70 | :: "r"(mmr1), "r"(val1), "r"(mmr2), "r"(val2) | ||
71 | : "p9", "r2", "memory"); | ||
72 | } | ||
73 | 27 | ||
74 | #endif /* _ASM_IA64_SN_RW_MMR_H */ | 28 | #endif /* _ASM_IA64_SN_RW_MMR_H */ |
diff --git a/include/asm-ia64/sn/tioce.h b/include/asm-ia64/sn/tioce.h index d4c990712eac..893468e1b41b 100644 --- a/include/asm-ia64/sn/tioce.h +++ b/include/asm-ia64/sn/tioce.h | |||
@@ -11,7 +11,7 @@ | |||
11 | 11 | ||
12 | /* CE ASIC part & mfgr information */ | 12 | /* CE ASIC part & mfgr information */ |
13 | #define TIOCE_PART_NUM 0xCE00 | 13 | #define TIOCE_PART_NUM 0xCE00 |
14 | #define TIOCE_MFGR_NUM 0x36 | 14 | #define TIOCE_SRC_ID 0x01 |
15 | #define TIOCE_REV_A 0x1 | 15 | #define TIOCE_REV_A 0x1 |
16 | 16 | ||
17 | /* CE Virtual PPB Vendor/Device IDs */ | 17 | /* CE Virtual PPB Vendor/Device IDs */ |
@@ -20,7 +20,7 @@ | |||
20 | 20 | ||
21 | /* CE Host Bridge Vendor/Device IDs */ | 21 | /* CE Host Bridge Vendor/Device IDs */ |
22 | #define CE_HOST_BRIDGE_VENDOR_ID 0x10a9 | 22 | #define CE_HOST_BRIDGE_VENDOR_ID 0x10a9 |
23 | #define CE_HOST_BRIDGE_DEVICE_ID 0x4003 | 23 | #define CE_HOST_BRIDGE_DEVICE_ID 0x4001 |
24 | 24 | ||
25 | 25 | ||
26 | #define TIOCE_NUM_M40_ATES 4096 | 26 | #define TIOCE_NUM_M40_ATES 4096 |
@@ -463,6 +463,25 @@ typedef volatile struct tioce { | |||
463 | u64 ce_end_of_struct; /* 0x044400 */ | 463 | u64 ce_end_of_struct; /* 0x044400 */ |
464 | } tioce_t; | 464 | } tioce_t; |
465 | 465 | ||
466 | /* ce_lsiX_gb_cfg1 register bit masks & shifts */ | ||
467 | #define CE_LSI_GB_CFG1_RXL0S_THS_SHFT 0 | ||
468 | #define CE_LSI_GB_CFG1_RXL0S_THS_MASK (0xffULL << 0) | ||
469 | #define CE_LSI_GB_CFG1_RXL0S_SMP_SHFT 8 | ||
470 | #define CE_LSI_GB_CFG1_RXL0S_SMP_MASK (0xfULL << 8); | ||
471 | #define CE_LSI_GB_CFG1_RXL0S_ADJ_SHFT 12 | ||
472 | #define CE_LSI_GB_CFG1_RXL0S_ADJ_MASK (0x7ULL << 12) | ||
473 | #define CE_LSI_GB_CFG1_RXL0S_FLT_SHFT 15 | ||
474 | #define CE_LSI_GB_CFG1_RXL0S_FLT_MASK (0x1ULL << 15) | ||
475 | #define CE_LSI_GB_CFG1_LPBK_SEL_SHFT 16 | ||
476 | #define CE_LSI_GB_CFG1_LPBK_SEL_MASK (0x3ULL << 16) | ||
477 | #define CE_LSI_GB_CFG1_LPBK_EN_SHFT 18 | ||
478 | #define CE_LSI_GB_CFG1_LPBK_EN_MASK (0x1ULL << 18) | ||
479 | #define CE_LSI_GB_CFG1_RVRS_LB_SHFT 19 | ||
480 | #define CE_LSI_GB_CFG1_RVRS_LB_MASK (0x1ULL << 19) | ||
481 | #define CE_LSI_GB_CFG1_RVRS_CLK_SHFT 20 | ||
482 | #define CE_LSI_GB_CFG1_RVRS_CLK_MASK (0x3ULL << 20) | ||
483 | #define CE_LSI_GB_CFG1_SLF_TS_SHFT 24 | ||
484 | #define CE_LSI_GB_CFG1_SLF_TS_MASK (0xfULL << 24) | ||
466 | 485 | ||
467 | /* ce_adm_int_mask/ce_adm_int_status register bit defines */ | 486 | /* ce_adm_int_mask/ce_adm_int_status register bit defines */ |
468 | #define CE_ADM_INT_CE_ERROR_SHFT 0 | 487 | #define CE_ADM_INT_CE_ERROR_SHFT 0 |
@@ -592,6 +611,11 @@ typedef volatile struct tioce { | |||
592 | #define CE_URE_RD_MRG_ENABLE (0x1ULL << 0) | 611 | #define CE_URE_RD_MRG_ENABLE (0x1ULL << 0) |
593 | #define CE_URE_WRT_MRG_ENABLE1 (0x1ULL << 4) | 612 | #define CE_URE_WRT_MRG_ENABLE1 (0x1ULL << 4) |
594 | #define CE_URE_WRT_MRG_ENABLE2 (0x1ULL << 5) | 613 | #define CE_URE_WRT_MRG_ENABLE2 (0x1ULL << 5) |
614 | #define CE_URE_WRT_MRG_TIMER_SHFT 12 | ||
615 | #define CE_URE_WRT_MRG_TIMER_MASK (0x7FFULL << CE_URE_WRT_MRG_TIMER_SHFT) | ||
616 | #define CE_URE_WRT_MRG_TIMER(x) (((u64)(x) << \ | ||
617 | CE_URE_WRT_MRG_TIMER_SHFT) & \ | ||
618 | CE_URE_WRT_MRG_TIMER_MASK) | ||
595 | #define CE_URE_RSPQ_BYPASS_DISABLE (0x1ULL << 24) | 619 | #define CE_URE_RSPQ_BYPASS_DISABLE (0x1ULL << 24) |
596 | #define CE_URE_UPS_DAT1_PAR_DISABLE (0x1ULL << 32) | 620 | #define CE_URE_UPS_DAT1_PAR_DISABLE (0x1ULL << 32) |
597 | #define CE_URE_UPS_HDR1_PAR_DISABLE (0x1ULL << 33) | 621 | #define CE_URE_UPS_HDR1_PAR_DISABLE (0x1ULL << 33) |
@@ -653,8 +677,12 @@ typedef volatile struct tioce { | |||
653 | #define CE_URE_SI (0x1ULL << 0) | 677 | #define CE_URE_SI (0x1ULL << 0) |
654 | #define CE_URE_ELAL_SHFT 4 | 678 | #define CE_URE_ELAL_SHFT 4 |
655 | #define CE_URE_ELAL_MASK (0x7ULL << CE_URE_ELAL_SHFT) | 679 | #define CE_URE_ELAL_MASK (0x7ULL << CE_URE_ELAL_SHFT) |
680 | #define CE_URE_ELAL_SET(n) (((u64)(n) << CE_URE_ELAL_SHFT) & \ | ||
681 | CE_URE_ELAL_MASK) | ||
656 | #define CE_URE_ELAL1_SHFT 8 | 682 | #define CE_URE_ELAL1_SHFT 8 |
657 | #define CE_URE_ELAL1_MASK (0x7ULL << CE_URE_ELAL1_SHFT) | 683 | #define CE_URE_ELAL1_MASK (0x7ULL << CE_URE_ELAL1_SHFT) |
684 | #define CE_URE_ELAL1_SET(n) (((u64)(n) << CE_URE_ELAL1_SHFT) & \ | ||
685 | CE_URE_ELAL1_MASK) | ||
658 | #define CE_URE_SCC (0x1ULL << 12) | 686 | #define CE_URE_SCC (0x1ULL << 12) |
659 | #define CE_URE_PN1_SHFT 16 | 687 | #define CE_URE_PN1_SHFT 16 |
660 | #define CE_URE_PN1_MASK (0xFFULL << CE_URE_PN1_SHFT) | 688 | #define CE_URE_PN1_MASK (0xFFULL << CE_URE_PN1_SHFT) |
@@ -675,8 +703,12 @@ typedef volatile struct tioce { | |||
675 | #define CE_URE_HPC (0x1ULL << 6) | 703 | #define CE_URE_HPC (0x1ULL << 6) |
676 | #define CE_URE_SPLV_SHFT 7 | 704 | #define CE_URE_SPLV_SHFT 7 |
677 | #define CE_URE_SPLV_MASK (0xFFULL << CE_URE_SPLV_SHFT) | 705 | #define CE_URE_SPLV_MASK (0xFFULL << CE_URE_SPLV_SHFT) |
706 | #define CE_URE_SPLV_SET(n) (((u64)(n) << CE_URE_SPLV_SHFT) & \ | ||
707 | CE_URE_SPLV_MASK) | ||
678 | #define CE_URE_SPLS_SHFT 15 | 708 | #define CE_URE_SPLS_SHFT 15 |
679 | #define CE_URE_SPLS_MASK (0x3ULL << CE_URE_SPLS_SHFT) | 709 | #define CE_URE_SPLS_MASK (0x3ULL << CE_URE_SPLS_SHFT) |
710 | #define CE_URE_SPLS_SET(n) (((u64)(n) << CE_URE_SPLS_SHFT) & \ | ||
711 | CE_URE_SPLS_MASK) | ||
680 | #define CE_URE_PSN1_SHFT 19 | 712 | #define CE_URE_PSN1_SHFT 19 |
681 | #define CE_URE_PSN1_MASK (0x1FFFULL << CE_URE_PSN1_SHFT) | 713 | #define CE_URE_PSN1_MASK (0x1FFFULL << CE_URE_PSN1_SHFT) |
682 | #define CE_URE_PSN2_SHFT 32 | 714 | #define CE_URE_PSN2_SHFT 32 |
diff --git a/include/asm-ia64/sn/xpc.h b/include/asm-ia64/sn/xpc.h index df7f5f4f3cde..aa3b8ace9030 100644 --- a/include/asm-ia64/sn/xpc.h +++ b/include/asm-ia64/sn/xpc.h | |||
@@ -1227,28 +1227,6 @@ xpc_map_bte_errors(bte_result_t error) | |||
1227 | 1227 | ||
1228 | 1228 | ||
1229 | 1229 | ||
1230 | static inline void * | ||
1231 | xpc_kmalloc_cacheline_aligned(size_t size, gfp_t flags, void **base) | ||
1232 | { | ||
1233 | /* see if kmalloc will give us cachline aligned memory by default */ | ||
1234 | *base = kmalloc(size, flags); | ||
1235 | if (*base == NULL) { | ||
1236 | return NULL; | ||
1237 | } | ||
1238 | if ((u64) *base == L1_CACHE_ALIGN((u64) *base)) { | ||
1239 | return *base; | ||
1240 | } | ||
1241 | kfree(*base); | ||
1242 | |||
1243 | /* nope, we'll have to do it ourselves */ | ||
1244 | *base = kmalloc(size + L1_CACHE_BYTES, flags); | ||
1245 | if (*base == NULL) { | ||
1246 | return NULL; | ||
1247 | } | ||
1248 | return (void *) L1_CACHE_ALIGN((u64) *base); | ||
1249 | } | ||
1250 | |||
1251 | |||
1252 | /* | 1230 | /* |
1253 | * Check to see if there is any channel activity to/from the specified | 1231 | * Check to see if there is any channel activity to/from the specified |
1254 | * partition. | 1232 | * partition. |
diff --git a/include/asm-ia64/system.h b/include/asm-ia64/system.h index 062538715623..cd4233d66f15 100644 --- a/include/asm-ia64/system.h +++ b/include/asm-ia64/system.h | |||
@@ -244,6 +244,13 @@ extern void ia64_load_extra (struct task_struct *task); | |||
244 | __ia64_save_fpu((prev)->thread.fph); \ | 244 | __ia64_save_fpu((prev)->thread.fph); \ |
245 | } \ | 245 | } \ |
246 | __switch_to(prev, next, last); \ | 246 | __switch_to(prev, next, last); \ |
247 | /* "next" in old context is "current" in new context */ \ | ||
248 | if (unlikely((current->thread.flags & IA64_THREAD_MIGRATION) && \ | ||
249 | (task_cpu(current) != \ | ||
250 | task_thread_info(current)->last_cpu))) { \ | ||
251 | platform_migrate(current); \ | ||
252 | task_thread_info(current)->last_cpu = task_cpu(current); \ | ||
253 | } \ | ||
247 | } while (0) | 254 | } while (0) |
248 | #else | 255 | #else |
249 | # define switch_to(prev,next,last) __switch_to(prev, next, last) | 256 | # define switch_to(prev,next,last) __switch_to(prev, next, last) |
diff --git a/include/asm-ia64/thread_info.h b/include/asm-ia64/thread_info.h index 1d6518fe1f02..56394a2c7055 100644 --- a/include/asm-ia64/thread_info.h +++ b/include/asm-ia64/thread_info.h | |||
@@ -26,16 +26,10 @@ struct thread_info { | |||
26 | struct exec_domain *exec_domain;/* execution domain */ | 26 | struct exec_domain *exec_domain;/* execution domain */ |
27 | __u32 flags; /* thread_info flags (see TIF_*) */ | 27 | __u32 flags; /* thread_info flags (see TIF_*) */ |
28 | __u32 cpu; /* current CPU */ | 28 | __u32 cpu; /* current CPU */ |
29 | __u32 last_cpu; /* Last CPU thread ran on */ | ||
29 | mm_segment_t addr_limit; /* user-level address space limit */ | 30 | mm_segment_t addr_limit; /* user-level address space limit */ |
30 | int preempt_count; /* 0=premptable, <0=BUG; will also serve as bh-counter */ | 31 | int preempt_count; /* 0=premptable, <0=BUG; will also serve as bh-counter */ |
31 | struct restart_block restart_block; | 32 | struct restart_block restart_block; |
32 | struct { | ||
33 | int signo; | ||
34 | int code; | ||
35 | void __user *addr; | ||
36 | unsigned long start_time; | ||
37 | pid_t pid; | ||
38 | } sigdelayed; /* Saved information for TIF_SIGDELAYED */ | ||
39 | }; | 33 | }; |
40 | 34 | ||
41 | #define THREAD_SIZE KERNEL_STACK_SIZE | 35 | #define THREAD_SIZE KERNEL_STACK_SIZE |
@@ -89,7 +83,6 @@ struct thread_info { | |||
89 | #define TIF_NEED_RESCHED 2 /* rescheduling necessary */ | 83 | #define TIF_NEED_RESCHED 2 /* rescheduling necessary */ |
90 | #define TIF_SYSCALL_TRACE 3 /* syscall trace active */ | 84 | #define TIF_SYSCALL_TRACE 3 /* syscall trace active */ |
91 | #define TIF_SYSCALL_AUDIT 4 /* syscall auditing active */ | 85 | #define TIF_SYSCALL_AUDIT 4 /* syscall auditing active */ |
92 | #define TIF_SIGDELAYED 5 /* signal delayed from MCA/INIT/NMI/PMI context */ | ||
93 | #define TIF_POLLING_NRFLAG 16 /* true if poll_idle() is polling TIF_NEED_RESCHED */ | 86 | #define TIF_POLLING_NRFLAG 16 /* true if poll_idle() is polling TIF_NEED_RESCHED */ |
94 | #define TIF_MEMDIE 17 | 87 | #define TIF_MEMDIE 17 |
95 | #define TIF_MCA_INIT 18 /* this task is processing MCA or INIT */ | 88 | #define TIF_MCA_INIT 18 /* this task is processing MCA or INIT */ |
@@ -101,13 +94,12 @@ struct thread_info { | |||
101 | #define _TIF_NOTIFY_RESUME (1 << TIF_NOTIFY_RESUME) | 94 | #define _TIF_NOTIFY_RESUME (1 << TIF_NOTIFY_RESUME) |
102 | #define _TIF_SIGPENDING (1 << TIF_SIGPENDING) | 95 | #define _TIF_SIGPENDING (1 << TIF_SIGPENDING) |
103 | #define _TIF_NEED_RESCHED (1 << TIF_NEED_RESCHED) | 96 | #define _TIF_NEED_RESCHED (1 << TIF_NEED_RESCHED) |
104 | #define _TIF_SIGDELAYED (1 << TIF_SIGDELAYED) | ||
105 | #define _TIF_POLLING_NRFLAG (1 << TIF_POLLING_NRFLAG) | 97 | #define _TIF_POLLING_NRFLAG (1 << TIF_POLLING_NRFLAG) |
106 | #define _TIF_MCA_INIT (1 << TIF_MCA_INIT) | 98 | #define _TIF_MCA_INIT (1 << TIF_MCA_INIT) |
107 | #define _TIF_DB_DISABLED (1 << TIF_DB_DISABLED) | 99 | #define _TIF_DB_DISABLED (1 << TIF_DB_DISABLED) |
108 | 100 | ||
109 | /* "work to do on user-return" bits */ | 101 | /* "work to do on user-return" bits */ |
110 | #define TIF_ALLWORK_MASK (_TIF_NOTIFY_RESUME|_TIF_SIGPENDING|_TIF_NEED_RESCHED|_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SIGDELAYED) | 102 | #define TIF_ALLWORK_MASK (_TIF_NOTIFY_RESUME|_TIF_SIGPENDING|_TIF_NEED_RESCHED|_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT) |
111 | /* like TIF_ALLWORK_BITS but sans TIF_SYSCALL_TRACE or TIF_SYSCALL_AUDIT */ | 103 | /* like TIF_ALLWORK_BITS but sans TIF_SYSCALL_TRACE or TIF_SYSCALL_AUDIT */ |
112 | #define TIF_WORK_MASK (TIF_ALLWORK_MASK&~(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT)) | 104 | #define TIF_WORK_MASK (TIF_ALLWORK_MASK&~(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT)) |
113 | 105 | ||
diff --git a/include/asm-powerpc/pgtable.h b/include/asm-powerpc/pgtable.h index e38931379a72..185ee15963a1 100644 --- a/include/asm-powerpc/pgtable.h +++ b/include/asm-powerpc/pgtable.h | |||
@@ -468,11 +468,6 @@ extern pgd_t swapper_pg_dir[]; | |||
468 | 468 | ||
469 | extern void paging_init(void); | 469 | extern void paging_init(void); |
470 | 470 | ||
471 | #ifdef CONFIG_HUGETLB_PAGE | ||
472 | #define hugetlb_free_pgd_range(tlb, addr, end, floor, ceiling) \ | ||
473 | free_pgd_range(tlb, addr, end, floor, ceiling) | ||
474 | #endif | ||
475 | |||
476 | /* | 471 | /* |
477 | * This gets called at the end of handling a page fault, when | 472 | * This gets called at the end of handling a page fault, when |
478 | * the kernel has put a new PTE into the page table for the process. | 473 | * the kernel has put a new PTE into the page table for the process. |
diff --git a/include/asm-s390/pgalloc.h b/include/asm-s390/pgalloc.h index 3417dd71ab43..e28aaf28e4a8 100644 --- a/include/asm-s390/pgalloc.h +++ b/include/asm-s390/pgalloc.h | |||
@@ -158,11 +158,4 @@ static inline void pte_free(struct page *pte) | |||
158 | 158 | ||
159 | #define __pte_free_tlb(tlb,pte) tlb_remove_page(tlb,pte) | 159 | #define __pte_free_tlb(tlb,pte) tlb_remove_page(tlb,pte) |
160 | 160 | ||
161 | /* | ||
162 | * This establishes kernel virtual mappings (e.g., as a result of a | ||
163 | * vmalloc call). Since s390-esame uses a separate kernel page table, | ||
164 | * there is nothing to do here... :) | ||
165 | */ | ||
166 | #define set_pgdir(addr,entry) do { } while(0) | ||
167 | |||
168 | #endif /* _S390_PGALLOC_H */ | 161 | #endif /* _S390_PGALLOC_H */ |
diff --git a/include/asm-sh64/pgalloc.h b/include/asm-sh64/pgalloc.h index 678251ac1db8..b29dd468817e 100644 --- a/include/asm-sh64/pgalloc.h +++ b/include/asm-sh64/pgalloc.h | |||
@@ -167,22 +167,6 @@ static __inline__ void pmd_free(pmd_t *pmd) | |||
167 | 167 | ||
168 | extern int do_check_pgt_cache(int, int); | 168 | extern int do_check_pgt_cache(int, int); |
169 | 169 | ||
170 | static inline void set_pgdir(unsigned long address, pgd_t entry) | ||
171 | { | ||
172 | struct task_struct * p; | ||
173 | pgd_t *pgd; | ||
174 | |||
175 | read_lock(&tasklist_lock); | ||
176 | for_each_process(p) { | ||
177 | if (!p->mm) | ||
178 | continue; | ||
179 | *pgd_offset(p->mm,address) = entry; | ||
180 | } | ||
181 | read_unlock(&tasklist_lock); | ||
182 | for (pgd = (pgd_t *)pgd_quicklist; pgd; pgd = (pgd_t *)*(unsigned long *)pgd) | ||
183 | pgd[address >> PGDIR_SHIFT] = entry; | ||
184 | } | ||
185 | |||
186 | #define pmd_populate_kernel(mm, pmd, pte) \ | 170 | #define pmd_populate_kernel(mm, pmd, pte) \ |
187 | set_pmd(pmd, __pmd(_PAGE_TABLE + (unsigned long) (pte))) | 171 | set_pmd(pmd, __pmd(_PAGE_TABLE + (unsigned long) (pte))) |
188 | 172 | ||
diff --git a/include/asm-x86_64/pgtable.h b/include/asm-x86_64/pgtable.h index 715fd94cf577..a617d364d08d 100644 --- a/include/asm-x86_64/pgtable.h +++ b/include/asm-x86_64/pgtable.h | |||
@@ -273,7 +273,7 @@ static inline int pte_dirty(pte_t pte) { return pte_val(pte) & _PAGE_DIRTY; } | |||
273 | static inline int pte_young(pte_t pte) { return pte_val(pte) & _PAGE_ACCESSED; } | 273 | static inline int pte_young(pte_t pte) { return pte_val(pte) & _PAGE_ACCESSED; } |
274 | static inline int pte_write(pte_t pte) { return pte_val(pte) & _PAGE_RW; } | 274 | static inline int pte_write(pte_t pte) { return pte_val(pte) & _PAGE_RW; } |
275 | static inline int pte_file(pte_t pte) { return pte_val(pte) & _PAGE_FILE; } | 275 | static inline int pte_file(pte_t pte) { return pte_val(pte) & _PAGE_FILE; } |
276 | static inline int pte_huge(pte_t pte) { return (pte_val(pte) & __LARGE_PTE) == __LARGE_PTE; } | 276 | static inline int pte_huge(pte_t pte) { return pte_val(pte) & _PAGE_PSE; } |
277 | 277 | ||
278 | static inline pte_t pte_rdprotect(pte_t pte) { set_pte(&pte, __pte(pte_val(pte) & ~_PAGE_USER)); return pte; } | 278 | static inline pte_t pte_rdprotect(pte_t pte) { set_pte(&pte, __pte(pte_val(pte) & ~_PAGE_USER)); return pte; } |
279 | static inline pte_t pte_exprotect(pte_t pte) { set_pte(&pte, __pte(pte_val(pte) & ~_PAGE_USER)); return pte; } | 279 | static inline pte_t pte_exprotect(pte_t pte) { set_pte(&pte, __pte(pte_val(pte) & ~_PAGE_USER)); return pte; } |
@@ -285,7 +285,7 @@ static inline pte_t pte_mkexec(pte_t pte) { set_pte(&pte, __pte(pte_val(pte) | _ | |||
285 | static inline pte_t pte_mkdirty(pte_t pte) { set_pte(&pte, __pte(pte_val(pte) | _PAGE_DIRTY)); return pte; } | 285 | static inline pte_t pte_mkdirty(pte_t pte) { set_pte(&pte, __pte(pte_val(pte) | _PAGE_DIRTY)); return pte; } |
286 | static inline pte_t pte_mkyoung(pte_t pte) { set_pte(&pte, __pte(pte_val(pte) | _PAGE_ACCESSED)); return pte; } | 286 | static inline pte_t pte_mkyoung(pte_t pte) { set_pte(&pte, __pte(pte_val(pte) | _PAGE_ACCESSED)); return pte; } |
287 | static inline pte_t pte_mkwrite(pte_t pte) { set_pte(&pte, __pte(pte_val(pte) | _PAGE_RW)); return pte; } | 287 | static inline pte_t pte_mkwrite(pte_t pte) { set_pte(&pte, __pte(pte_val(pte) | _PAGE_RW)); return pte; } |
288 | static inline pte_t pte_mkhuge(pte_t pte) { set_pte(&pte, __pte(pte_val(pte) | __LARGE_PTE)); return pte; } | 288 | static inline pte_t pte_mkhuge(pte_t pte) { set_pte(&pte, __pte(pte_val(pte) | _PAGE_PSE)); return pte; } |
289 | 289 | ||
290 | struct vm_area_struct; | 290 | struct vm_area_struct; |
291 | 291 | ||
diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h index 68d82ad6b17c..d6f1019625af 100644 --- a/include/linux/hugetlb.h +++ b/include/linux/hugetlb.h | |||
@@ -20,10 +20,7 @@ void unmap_hugepage_range(struct vm_area_struct *, unsigned long, unsigned long) | |||
20 | int hugetlb_prefault(struct address_space *, struct vm_area_struct *); | 20 | int hugetlb_prefault(struct address_space *, struct vm_area_struct *); |
21 | int hugetlb_report_meminfo(char *); | 21 | int hugetlb_report_meminfo(char *); |
22 | int hugetlb_report_node_meminfo(int, char *); | 22 | int hugetlb_report_node_meminfo(int, char *); |
23 | int is_hugepage_mem_enough(size_t); | ||
24 | unsigned long hugetlb_total_pages(void); | 23 | unsigned long hugetlb_total_pages(void); |
25 | struct page *alloc_huge_page(struct vm_area_struct *, unsigned long); | ||
26 | void free_huge_page(struct page *); | ||
27 | int hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma, | 24 | int hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma, |
28 | unsigned long address, int write_access); | 25 | unsigned long address, int write_access); |
29 | 26 | ||
@@ -39,18 +36,35 @@ struct page *follow_huge_addr(struct mm_struct *mm, unsigned long address, | |||
39 | int write); | 36 | int write); |
40 | struct page *follow_huge_pmd(struct mm_struct *mm, unsigned long address, | 37 | struct page *follow_huge_pmd(struct mm_struct *mm, unsigned long address, |
41 | pmd_t *pmd, int write); | 38 | pmd_t *pmd, int write); |
42 | int is_aligned_hugepage_range(unsigned long addr, unsigned long len); | ||
43 | int pmd_huge(pmd_t pmd); | 39 | int pmd_huge(pmd_t pmd); |
40 | void hugetlb_change_protection(struct vm_area_struct *vma, | ||
41 | unsigned long address, unsigned long end, pgprot_t newprot); | ||
44 | 42 | ||
45 | #ifndef ARCH_HAS_HUGEPAGE_ONLY_RANGE | 43 | #ifndef ARCH_HAS_HUGEPAGE_ONLY_RANGE |
46 | #define is_hugepage_only_range(mm, addr, len) 0 | 44 | #define is_hugepage_only_range(mm, addr, len) 0 |
47 | #define hugetlb_free_pgd_range(tlb, addr, end, floor, ceiling) \ | 45 | #endif |
48 | do { } while (0) | 46 | |
47 | #ifndef ARCH_HAS_HUGETLB_FREE_PGD_RANGE | ||
48 | #define hugetlb_free_pgd_range free_pgd_range | ||
49 | #else | ||
50 | void hugetlb_free_pgd_range(struct mmu_gather **tlb, unsigned long addr, | ||
51 | unsigned long end, unsigned long floor, | ||
52 | unsigned long ceiling); | ||
49 | #endif | 53 | #endif |
50 | 54 | ||
51 | #ifndef ARCH_HAS_PREPARE_HUGEPAGE_RANGE | 55 | #ifndef ARCH_HAS_PREPARE_HUGEPAGE_RANGE |
52 | #define prepare_hugepage_range(addr, len) \ | 56 | /* |
53 | is_aligned_hugepage_range(addr, len) | 57 | * If the arch doesn't supply something else, assume that hugepage |
58 | * size aligned regions are ok without further preparation. | ||
59 | */ | ||
60 | static inline int prepare_hugepage_range(unsigned long addr, unsigned long len) | ||
61 | { | ||
62 | if (len & ~HPAGE_MASK) | ||
63 | return -EINVAL; | ||
64 | if (addr & ~HPAGE_MASK) | ||
65 | return -EINVAL; | ||
66 | return 0; | ||
67 | } | ||
54 | #else | 68 | #else |
55 | int prepare_hugepage_range(unsigned long addr, unsigned long len); | 69 | int prepare_hugepage_range(unsigned long addr, unsigned long len); |
56 | #endif | 70 | #endif |
@@ -87,20 +101,17 @@ static inline unsigned long hugetlb_total_pages(void) | |||
87 | #define copy_hugetlb_page_range(src, dst, vma) ({ BUG(); 0; }) | 101 | #define copy_hugetlb_page_range(src, dst, vma) ({ BUG(); 0; }) |
88 | #define hugetlb_prefault(mapping, vma) ({ BUG(); 0; }) | 102 | #define hugetlb_prefault(mapping, vma) ({ BUG(); 0; }) |
89 | #define unmap_hugepage_range(vma, start, end) BUG() | 103 | #define unmap_hugepage_range(vma, start, end) BUG() |
90 | #define is_hugepage_mem_enough(size) 0 | ||
91 | #define hugetlb_report_meminfo(buf) 0 | 104 | #define hugetlb_report_meminfo(buf) 0 |
92 | #define hugetlb_report_node_meminfo(n, buf) 0 | 105 | #define hugetlb_report_node_meminfo(n, buf) 0 |
93 | #define follow_huge_pmd(mm, addr, pmd, write) NULL | 106 | #define follow_huge_pmd(mm, addr, pmd, write) NULL |
94 | #define is_aligned_hugepage_range(addr, len) 0 | ||
95 | #define prepare_hugepage_range(addr, len) (-EINVAL) | 107 | #define prepare_hugepage_range(addr, len) (-EINVAL) |
96 | #define pmd_huge(x) 0 | 108 | #define pmd_huge(x) 0 |
97 | #define is_hugepage_only_range(mm, addr, len) 0 | 109 | #define is_hugepage_only_range(mm, addr, len) 0 |
98 | #define hugetlb_free_pgd_range(tlb, addr, end, floor, ceiling) \ | 110 | #define hugetlb_free_pgd_range(tlb, addr, end, floor, ceiling) ({BUG(); 0; }) |
99 | do { } while (0) | ||
100 | #define alloc_huge_page(vma, addr) ({ NULL; }) | ||
101 | #define free_huge_page(p) ({ (void)(p); BUG(); }) | ||
102 | #define hugetlb_fault(mm, vma, addr, write) ({ BUG(); 0; }) | 111 | #define hugetlb_fault(mm, vma, addr, write) ({ BUG(); 0; }) |
103 | 112 | ||
113 | #define hugetlb_change_protection(vma, address, end, newprot) | ||
114 | |||
104 | #ifndef HPAGE_MASK | 115 | #ifndef HPAGE_MASK |
105 | #define HPAGE_MASK PAGE_MASK /* Keep the compiler happy */ | 116 | #define HPAGE_MASK PAGE_MASK /* Keep the compiler happy */ |
106 | #define HPAGE_SIZE PAGE_SIZE | 117 | #define HPAGE_SIZE PAGE_SIZE |
@@ -128,6 +139,8 @@ struct hugetlbfs_sb_info { | |||
128 | 139 | ||
129 | struct hugetlbfs_inode_info { | 140 | struct hugetlbfs_inode_info { |
130 | struct shared_policy policy; | 141 | struct shared_policy policy; |
142 | /* Protected by the (global) hugetlb_lock */ | ||
143 | unsigned long prereserved_hpages; | ||
131 | struct inode vfs_inode; | 144 | struct inode vfs_inode; |
132 | }; | 145 | }; |
133 | 146 | ||
@@ -144,6 +157,10 @@ static inline struct hugetlbfs_sb_info *HUGETLBFS_SB(struct super_block *sb) | |||
144 | extern struct file_operations hugetlbfs_file_operations; | 157 | extern struct file_operations hugetlbfs_file_operations; |
145 | extern struct vm_operations_struct hugetlb_vm_ops; | 158 | extern struct vm_operations_struct hugetlb_vm_ops; |
146 | struct file *hugetlb_zero_setup(size_t); | 159 | struct file *hugetlb_zero_setup(size_t); |
160 | int hugetlb_extend_reservation(struct hugetlbfs_inode_info *info, | ||
161 | unsigned long atleast_hpages); | ||
162 | void hugetlb_truncate_reservation(struct hugetlbfs_inode_info *info, | ||
163 | unsigned long atmost_hpages); | ||
147 | int hugetlb_get_quota(struct address_space *mapping); | 164 | int hugetlb_get_quota(struct address_space *mapping); |
148 | void hugetlb_put_quota(struct address_space *mapping); | 165 | void hugetlb_put_quota(struct address_space *mapping); |
149 | 166 | ||
diff --git a/include/linux/migrate.h b/include/linux/migrate.h new file mode 100644 index 000000000000..7d09962c3c0b --- /dev/null +++ b/include/linux/migrate.h | |||
@@ -0,0 +1,36 @@ | |||
1 | #ifndef _LINUX_MIGRATE_H | ||
2 | #define _LINUX_MIGRATE_H | ||
3 | |||
4 | #include <linux/config.h> | ||
5 | #include <linux/mm.h> | ||
6 | |||
7 | #ifdef CONFIG_MIGRATION | ||
8 | extern int isolate_lru_page(struct page *p, struct list_head *pagelist); | ||
9 | extern int putback_lru_pages(struct list_head *l); | ||
10 | extern int migrate_page(struct page *, struct page *); | ||
11 | extern void migrate_page_copy(struct page *, struct page *); | ||
12 | extern int migrate_page_remove_references(struct page *, struct page *, int); | ||
13 | extern int migrate_pages(struct list_head *l, struct list_head *t, | ||
14 | struct list_head *moved, struct list_head *failed); | ||
15 | int migrate_pages_to(struct list_head *pagelist, | ||
16 | struct vm_area_struct *vma, int dest); | ||
17 | extern int fail_migrate_page(struct page *, struct page *); | ||
18 | |||
19 | extern int migrate_prep(void); | ||
20 | |||
21 | #else | ||
22 | |||
23 | static inline int isolate_lru_page(struct page *p, struct list_head *list) | ||
24 | { return -ENOSYS; } | ||
25 | static inline int putback_lru_pages(struct list_head *l) { return 0; } | ||
26 | static inline int migrate_pages(struct list_head *l, struct list_head *t, | ||
27 | struct list_head *moved, struct list_head *failed) { return -ENOSYS; } | ||
28 | |||
29 | static inline int migrate_prep(void) { return -ENOSYS; } | ||
30 | |||
31 | /* Possible settings for the migrate_page() method in address_operations */ | ||
32 | #define migrate_page NULL | ||
33 | #define fail_migrate_page NULL | ||
34 | |||
35 | #endif /* CONFIG_MIGRATION */ | ||
36 | #endif /* _LINUX_MIGRATE_H */ | ||
diff --git a/include/linux/mm.h b/include/linux/mm.h index 498ff8778fb6..6aa016f1d3ae 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h | |||
@@ -286,43 +286,34 @@ struct page { | |||
286 | * | 286 | * |
287 | * Also, many kernel routines increase the page count before a critical | 287 | * Also, many kernel routines increase the page count before a critical |
288 | * routine so they can be sure the page doesn't go away from under them. | 288 | * routine so they can be sure the page doesn't go away from under them. |
289 | * | ||
290 | * Since 2.6.6 (approx), a free page has ->_count = -1. This is so that we | ||
291 | * can use atomic_add_negative(-1, page->_count) to detect when the page | ||
292 | * becomes free and so that we can also use atomic_inc_and_test to atomically | ||
293 | * detect when we just tried to grab a ref on a page which some other CPU has | ||
294 | * already deemed to be freeable. | ||
295 | * | ||
296 | * NO code should make assumptions about this internal detail! Use the provided | ||
297 | * macros which retain the old rules: page_count(page) == 0 is a free page. | ||
298 | */ | 289 | */ |
299 | 290 | ||
300 | /* | 291 | /* |
301 | * Drop a ref, return true if the logical refcount fell to zero (the page has | 292 | * Drop a ref, return true if the logical refcount fell to zero (the page has |
302 | * no users) | 293 | * no users) |
303 | */ | 294 | */ |
304 | #define put_page_testzero(p) \ | 295 | static inline int put_page_testzero(struct page *page) |
305 | ({ \ | 296 | { |
306 | BUG_ON(atomic_read(&(p)->_count) == -1);\ | 297 | BUG_ON(atomic_read(&page->_count) == 0); |
307 | atomic_add_negative(-1, &(p)->_count); \ | 298 | return atomic_dec_and_test(&page->_count); |
308 | }) | 299 | } |
309 | 300 | ||
310 | /* | 301 | /* |
311 | * Grab a ref, return true if the page previously had a logical refcount of | 302 | * Try to grab a ref unless the page has a refcount of zero, return false if |
312 | * zero. ie: returns true if we just grabbed an already-deemed-to-be-free page | 303 | * that is the case. |
313 | */ | 304 | */ |
314 | #define get_page_testone(p) atomic_inc_and_test(&(p)->_count) | 305 | static inline int get_page_unless_zero(struct page *page) |
315 | 306 | { | |
316 | #define set_page_count(p,v) atomic_set(&(p)->_count, (v) - 1) | 307 | return atomic_inc_not_zero(&page->_count); |
317 | #define __put_page(p) atomic_dec(&(p)->_count) | 308 | } |
318 | 309 | ||
319 | extern void FASTCALL(__page_cache_release(struct page *)); | 310 | extern void FASTCALL(__page_cache_release(struct page *)); |
320 | 311 | ||
321 | static inline int page_count(struct page *page) | 312 | static inline int page_count(struct page *page) |
322 | { | 313 | { |
323 | if (PageCompound(page)) | 314 | if (unlikely(PageCompound(page))) |
324 | page = (struct page *)page_private(page); | 315 | page = (struct page *)page_private(page); |
325 | return atomic_read(&page->_count) + 1; | 316 | return atomic_read(&page->_count); |
326 | } | 317 | } |
327 | 318 | ||
328 | static inline void get_page(struct page *page) | 319 | static inline void get_page(struct page *page) |
@@ -332,8 +323,19 @@ static inline void get_page(struct page *page) | |||
332 | atomic_inc(&page->_count); | 323 | atomic_inc(&page->_count); |
333 | } | 324 | } |
334 | 325 | ||
326 | /* | ||
327 | * Setup the page count before being freed into the page allocator for | ||
328 | * the first time (boot or memory hotplug) | ||
329 | */ | ||
330 | static inline void init_page_count(struct page *page) | ||
331 | { | ||
332 | atomic_set(&page->_count, 1); | ||
333 | } | ||
334 | |||
335 | void put_page(struct page *page); | 335 | void put_page(struct page *page); |
336 | 336 | ||
337 | void split_page(struct page *page, unsigned int order); | ||
338 | |||
337 | /* | 339 | /* |
338 | * Multiple processes may "see" the same page. E.g. for untouched | 340 | * Multiple processes may "see" the same page. E.g. for untouched |
339 | * mappings of /dev/null, all processes see the same page full of | 341 | * mappings of /dev/null, all processes see the same page full of |
@@ -1046,7 +1048,7 @@ int in_gate_area_no_task(unsigned long addr); | |||
1046 | 1048 | ||
1047 | int drop_caches_sysctl_handler(struct ctl_table *, int, struct file *, | 1049 | int drop_caches_sysctl_handler(struct ctl_table *, int, struct file *, |
1048 | void __user *, size_t *, loff_t *); | 1050 | void __user *, size_t *, loff_t *); |
1049 | int shrink_slab(unsigned long scanned, gfp_t gfp_mask, | 1051 | unsigned long shrink_slab(unsigned long scanned, gfp_t gfp_mask, |
1050 | unsigned long lru_pages); | 1052 | unsigned long lru_pages); |
1051 | void drop_pagecache(void); | 1053 | void drop_pagecache(void); |
1052 | void drop_slab(void); | 1054 | void drop_slab(void); |
diff --git a/include/linux/mm_inline.h b/include/linux/mm_inline.h index 8ac854f7f190..3b6723dfaff3 100644 --- a/include/linux/mm_inline.h +++ b/include/linux/mm_inline.h | |||
@@ -32,7 +32,7 @@ del_page_from_lru(struct zone *zone, struct page *page) | |||
32 | { | 32 | { |
33 | list_del(&page->lru); | 33 | list_del(&page->lru); |
34 | if (PageActive(page)) { | 34 | if (PageActive(page)) { |
35 | ClearPageActive(page); | 35 | __ClearPageActive(page); |
36 | zone->nr_active--; | 36 | zone->nr_active--; |
37 | } else { | 37 | } else { |
38 | zone->nr_inactive--; | 38 | zone->nr_inactive--; |
diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h index d52999c43336..9ea629c02a4b 100644 --- a/include/linux/page-flags.h +++ b/include/linux/page-flags.h | |||
@@ -86,8 +86,9 @@ | |||
86 | * - The __xxx_page_state variants can be used safely when interrupts are | 86 | * - The __xxx_page_state variants can be used safely when interrupts are |
87 | * disabled. | 87 | * disabled. |
88 | * - The __xxx_page_state variants can be used if the field is only | 88 | * - The __xxx_page_state variants can be used if the field is only |
89 | * modified from process context, or only modified from interrupt context. | 89 | * modified from process context and protected from preemption, or only |
90 | * In this case, the field should be commented here. | 90 | * modified from interrupt context. In this case, the field should be |
91 | * commented here. | ||
91 | */ | 92 | */ |
92 | struct page_state { | 93 | struct page_state { |
93 | unsigned long nr_dirty; /* Dirty writeable pages */ | 94 | unsigned long nr_dirty; /* Dirty writeable pages */ |
@@ -239,22 +240,19 @@ extern void __mod_page_state_offset(unsigned long offset, unsigned long delta); | |||
239 | #define __ClearPageDirty(page) __clear_bit(PG_dirty, &(page)->flags) | 240 | #define __ClearPageDirty(page) __clear_bit(PG_dirty, &(page)->flags) |
240 | #define TestClearPageDirty(page) test_and_clear_bit(PG_dirty, &(page)->flags) | 241 | #define TestClearPageDirty(page) test_and_clear_bit(PG_dirty, &(page)->flags) |
241 | 242 | ||
242 | #define SetPageLRU(page) set_bit(PG_lru, &(page)->flags) | ||
243 | #define PageLRU(page) test_bit(PG_lru, &(page)->flags) | 243 | #define PageLRU(page) test_bit(PG_lru, &(page)->flags) |
244 | #define TestSetPageLRU(page) test_and_set_bit(PG_lru, &(page)->flags) | 244 | #define SetPageLRU(page) set_bit(PG_lru, &(page)->flags) |
245 | #define TestClearPageLRU(page) test_and_clear_bit(PG_lru, &(page)->flags) | 245 | #define ClearPageLRU(page) clear_bit(PG_lru, &(page)->flags) |
246 | #define __ClearPageLRU(page) __clear_bit(PG_lru, &(page)->flags) | ||
246 | 247 | ||
247 | #define PageActive(page) test_bit(PG_active, &(page)->flags) | 248 | #define PageActive(page) test_bit(PG_active, &(page)->flags) |
248 | #define SetPageActive(page) set_bit(PG_active, &(page)->flags) | 249 | #define SetPageActive(page) set_bit(PG_active, &(page)->flags) |
249 | #define ClearPageActive(page) clear_bit(PG_active, &(page)->flags) | 250 | #define ClearPageActive(page) clear_bit(PG_active, &(page)->flags) |
250 | #define TestClearPageActive(page) test_and_clear_bit(PG_active, &(page)->flags) | 251 | #define __ClearPageActive(page) __clear_bit(PG_active, &(page)->flags) |
251 | #define TestSetPageActive(page) test_and_set_bit(PG_active, &(page)->flags) | ||
252 | 252 | ||
253 | #define PageSlab(page) test_bit(PG_slab, &(page)->flags) | 253 | #define PageSlab(page) test_bit(PG_slab, &(page)->flags) |
254 | #define SetPageSlab(page) set_bit(PG_slab, &(page)->flags) | 254 | #define __SetPageSlab(page) __set_bit(PG_slab, &(page)->flags) |
255 | #define ClearPageSlab(page) clear_bit(PG_slab, &(page)->flags) | 255 | #define __ClearPageSlab(page) __clear_bit(PG_slab, &(page)->flags) |
256 | #define TestClearPageSlab(page) test_and_clear_bit(PG_slab, &(page)->flags) | ||
257 | #define TestSetPageSlab(page) test_and_set_bit(PG_slab, &(page)->flags) | ||
258 | 256 | ||
259 | #ifdef CONFIG_HIGHMEM | 257 | #ifdef CONFIG_HIGHMEM |
260 | #define PageHighMem(page) is_highmem(page_zone(page)) | 258 | #define PageHighMem(page) is_highmem(page_zone(page)) |
@@ -329,8 +327,8 @@ extern void __mod_page_state_offset(unsigned long offset, unsigned long delta); | |||
329 | #define TestClearPageReclaim(page) test_and_clear_bit(PG_reclaim, &(page)->flags) | 327 | #define TestClearPageReclaim(page) test_and_clear_bit(PG_reclaim, &(page)->flags) |
330 | 328 | ||
331 | #define PageCompound(page) test_bit(PG_compound, &(page)->flags) | 329 | #define PageCompound(page) test_bit(PG_compound, &(page)->flags) |
332 | #define SetPageCompound(page) set_bit(PG_compound, &(page)->flags) | 330 | #define __SetPageCompound(page) __set_bit(PG_compound, &(page)->flags) |
333 | #define ClearPageCompound(page) clear_bit(PG_compound, &(page)->flags) | 331 | #define __ClearPageCompound(page) __clear_bit(PG_compound, &(page)->flags) |
334 | 332 | ||
335 | #ifdef CONFIG_SWAP | 333 | #ifdef CONFIG_SWAP |
336 | #define PageSwapCache(page) test_bit(PG_swapcache, &(page)->flags) | 334 | #define PageSwapCache(page) test_bit(PG_swapcache, &(page)->flags) |
diff --git a/include/linux/rtc.h b/include/linux/rtc.h index 0b2ba67ff13c..b739ac1f7ca0 100644 --- a/include/linux/rtc.h +++ b/include/linux/rtc.h | |||
@@ -11,8 +11,6 @@ | |||
11 | #ifndef _LINUX_RTC_H_ | 11 | #ifndef _LINUX_RTC_H_ |
12 | #define _LINUX_RTC_H_ | 12 | #define _LINUX_RTC_H_ |
13 | 13 | ||
14 | #include <linux/interrupt.h> | ||
15 | |||
16 | /* | 14 | /* |
17 | * The struct used to pass data via the following ioctl. Similar to the | 15 | * The struct used to pass data via the following ioctl. Similar to the |
18 | * struct tm in <time.h>, but it needs to be here so that the kernel | 16 | * struct tm in <time.h>, but it needs to be here so that the kernel |
@@ -95,6 +93,8 @@ struct rtc_pll_info { | |||
95 | 93 | ||
96 | #ifdef __KERNEL__ | 94 | #ifdef __KERNEL__ |
97 | 95 | ||
96 | #include <linux/interrupt.h> | ||
97 | |||
98 | typedef struct rtc_task { | 98 | typedef struct rtc_task { |
99 | void (*func)(void *private_data); | 99 | void (*func)(void *private_data); |
100 | void *private_data; | 100 | void *private_data; |
diff --git a/include/linux/slab.h b/include/linux/slab.h index 8cf52939d0ab..2b28c849d75a 100644 --- a/include/linux/slab.h +++ b/include/linux/slab.h | |||
@@ -38,7 +38,6 @@ typedef struct kmem_cache kmem_cache_t; | |||
38 | #define SLAB_DEBUG_INITIAL 0x00000200UL /* Call constructor (as verifier) */ | 38 | #define SLAB_DEBUG_INITIAL 0x00000200UL /* Call constructor (as verifier) */ |
39 | #define SLAB_RED_ZONE 0x00000400UL /* Red zone objs in a cache */ | 39 | #define SLAB_RED_ZONE 0x00000400UL /* Red zone objs in a cache */ |
40 | #define SLAB_POISON 0x00000800UL /* Poison objects */ | 40 | #define SLAB_POISON 0x00000800UL /* Poison objects */ |
41 | #define SLAB_NO_REAP 0x00001000UL /* never reap from the cache */ | ||
42 | #define SLAB_HWCACHE_ALIGN 0x00002000UL /* align objs on a h/w cache lines */ | 41 | #define SLAB_HWCACHE_ALIGN 0x00002000UL /* align objs on a h/w cache lines */ |
43 | #define SLAB_CACHE_DMA 0x00004000UL /* use GFP_DMA memory */ | 42 | #define SLAB_CACHE_DMA 0x00004000UL /* use GFP_DMA memory */ |
44 | #define SLAB_MUST_HWCACHE_ALIGN 0x00008000UL /* force alignment */ | 43 | #define SLAB_MUST_HWCACHE_ALIGN 0x00008000UL /* force alignment */ |
@@ -118,7 +117,7 @@ extern void *kzalloc(size_t, gfp_t); | |||
118 | */ | 117 | */ |
119 | static inline void *kcalloc(size_t n, size_t size, gfp_t flags) | 118 | static inline void *kcalloc(size_t n, size_t size, gfp_t flags) |
120 | { | 119 | { |
121 | if (n != 0 && size > INT_MAX / n) | 120 | if (n != 0 && size > ULONG_MAX / n) |
122 | return NULL; | 121 | return NULL; |
123 | return kzalloc(n * size, flags); | 122 | return kzalloc(n * size, flags); |
124 | } | 123 | } |
diff --git a/include/linux/smp.h b/include/linux/smp.h index 44153fdf73fc..d699a16b0cb2 100644 --- a/include/linux/smp.h +++ b/include/linux/smp.h | |||
@@ -52,23 +52,12 @@ extern void smp_cpus_done(unsigned int max_cpus); | |||
52 | /* | 52 | /* |
53 | * Call a function on all other processors | 53 | * Call a function on all other processors |
54 | */ | 54 | */ |
55 | extern int smp_call_function (void (*func) (void *info), void *info, | 55 | int smp_call_function(void(*func)(void *info), void *info, int retry, int wait); |
56 | int retry, int wait); | ||
57 | 56 | ||
58 | /* | 57 | /* |
59 | * Call a function on all processors | 58 | * Call a function on all processors |
60 | */ | 59 | */ |
61 | static inline int on_each_cpu(void (*func) (void *info), void *info, | 60 | int on_each_cpu(void (*func) (void *info), void *info, int retry, int wait); |
62 | int retry, int wait) | ||
63 | { | ||
64 | int ret = 0; | ||
65 | |||
66 | preempt_disable(); | ||
67 | ret = smp_call_function(func, info, retry, wait); | ||
68 | func(info); | ||
69 | preempt_enable(); | ||
70 | return ret; | ||
71 | } | ||
72 | 61 | ||
73 | #define MSG_ALL_BUT_SELF 0x8000 /* Assume <32768 CPU's */ | 62 | #define MSG_ALL_BUT_SELF 0x8000 /* Assume <32768 CPU's */ |
74 | #define MSG_ALL 0x8001 | 63 | #define MSG_ALL 0x8001 |
@@ -94,7 +83,13 @@ void smp_prepare_boot_cpu(void); | |||
94 | #define raw_smp_processor_id() 0 | 83 | #define raw_smp_processor_id() 0 |
95 | #define hard_smp_processor_id() 0 | 84 | #define hard_smp_processor_id() 0 |
96 | #define smp_call_function(func,info,retry,wait) ({ 0; }) | 85 | #define smp_call_function(func,info,retry,wait) ({ 0; }) |
97 | #define on_each_cpu(func,info,retry,wait) ({ func(info); 0; }) | 86 | #define on_each_cpu(func,info,retry,wait) \ |
87 | ({ \ | ||
88 | local_irq_disable(); \ | ||
89 | func(info); \ | ||
90 | local_irq_enable(); \ | ||
91 | 0; \ | ||
92 | }) | ||
98 | static inline void smp_send_reschedule(int cpu) { } | 93 | static inline void smp_send_reschedule(int cpu) { } |
99 | #define num_booting_cpus() 1 | 94 | #define num_booting_cpus() 1 |
100 | #define smp_prepare_boot_cpu() do {} while (0) | 95 | #define smp_prepare_boot_cpu() do {} while (0) |
diff --git a/include/linux/swap.h b/include/linux/swap.h index d572b19afb7d..12415dd94451 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h | |||
@@ -172,9 +172,24 @@ extern int rotate_reclaimable_page(struct page *page); | |||
172 | extern void swap_setup(void); | 172 | extern void swap_setup(void); |
173 | 173 | ||
174 | /* linux/mm/vmscan.c */ | 174 | /* linux/mm/vmscan.c */ |
175 | extern int try_to_free_pages(struct zone **, gfp_t); | 175 | extern unsigned long try_to_free_pages(struct zone **, gfp_t); |
176 | extern int shrink_all_memory(int); | 176 | extern unsigned long shrink_all_memory(unsigned long nr_pages); |
177 | extern int vm_swappiness; | 177 | extern int vm_swappiness; |
178 | extern int remove_mapping(struct address_space *mapping, struct page *page); | ||
179 | |||
180 | /* possible outcome of pageout() */ | ||
181 | typedef enum { | ||
182 | /* failed to write page out, page is locked */ | ||
183 | PAGE_KEEP, | ||
184 | /* move page to the active list, page is locked */ | ||
185 | PAGE_ACTIVATE, | ||
186 | /* page has been sent to the disk successfully, page is unlocked */ | ||
187 | PAGE_SUCCESS, | ||
188 | /* page is clean and locked */ | ||
189 | PAGE_CLEAN, | ||
190 | } pageout_t; | ||
191 | |||
192 | extern pageout_t pageout(struct page *page, struct address_space *mapping); | ||
178 | 193 | ||
179 | #ifdef CONFIG_NUMA | 194 | #ifdef CONFIG_NUMA |
180 | extern int zone_reclaim_mode; | 195 | extern int zone_reclaim_mode; |
@@ -188,25 +203,6 @@ static inline int zone_reclaim(struct zone *z, gfp_t mask, unsigned int order) | |||
188 | } | 203 | } |
189 | #endif | 204 | #endif |
190 | 205 | ||
191 | #ifdef CONFIG_MIGRATION | ||
192 | extern int isolate_lru_page(struct page *p); | ||
193 | extern int putback_lru_pages(struct list_head *l); | ||
194 | extern int migrate_page(struct page *, struct page *); | ||
195 | extern void migrate_page_copy(struct page *, struct page *); | ||
196 | extern int migrate_page_remove_references(struct page *, struct page *, int); | ||
197 | extern int migrate_pages(struct list_head *l, struct list_head *t, | ||
198 | struct list_head *moved, struct list_head *failed); | ||
199 | extern int fail_migrate_page(struct page *, struct page *); | ||
200 | #else | ||
201 | static inline int isolate_lru_page(struct page *p) { return -ENOSYS; } | ||
202 | static inline int putback_lru_pages(struct list_head *l) { return 0; } | ||
203 | static inline int migrate_pages(struct list_head *l, struct list_head *t, | ||
204 | struct list_head *moved, struct list_head *failed) { return -ENOSYS; } | ||
205 | /* Possible settings for the migrate_page() method in address_operations */ | ||
206 | #define migrate_page NULL | ||
207 | #define fail_migrate_page NULL | ||
208 | #endif | ||
209 | |||
210 | #ifdef CONFIG_MMU | 206 | #ifdef CONFIG_MMU |
211 | /* linux/mm/shmem.c */ | 207 | /* linux/mm/shmem.c */ |
212 | extern int shmem_unuse(swp_entry_t entry, struct page *page); | 208 | extern int shmem_unuse(swp_entry_t entry, struct page *page); |
diff --git a/kernel/fork.c b/kernel/fork.c index b373322ca497..9bd7b65ee418 100644 --- a/kernel/fork.c +++ b/kernel/fork.c | |||
@@ -1534,6 +1534,12 @@ asmlinkage long sys_unshare(unsigned long unshare_flags) | |||
1534 | 1534 | ||
1535 | check_unshare_flags(&unshare_flags); | 1535 | check_unshare_flags(&unshare_flags); |
1536 | 1536 | ||
1537 | /* Return -EINVAL for all unsupported flags */ | ||
1538 | err = -EINVAL; | ||
1539 | if (unshare_flags & ~(CLONE_THREAD|CLONE_FS|CLONE_NEWNS|CLONE_SIGHAND| | ||
1540 | CLONE_VM|CLONE_FILES|CLONE_SYSVSEM)) | ||
1541 | goto bad_unshare_out; | ||
1542 | |||
1537 | if ((err = unshare_thread(unshare_flags))) | 1543 | if ((err = unshare_thread(unshare_flags))) |
1538 | goto bad_unshare_out; | 1544 | goto bad_unshare_out; |
1539 | if ((err = unshare_fs(unshare_flags, &new_fs))) | 1545 | if ((err = unshare_fs(unshare_flags, &new_fs))) |
diff --git a/kernel/sched.c b/kernel/sched.c index 4d46e90f59c3..6b6e0d70eb30 100644 --- a/kernel/sched.c +++ b/kernel/sched.c | |||
@@ -707,12 +707,6 @@ static int recalc_task_prio(task_t *p, unsigned long long now) | |||
707 | DEF_TIMESLICE); | 707 | DEF_TIMESLICE); |
708 | } else { | 708 | } else { |
709 | /* | 709 | /* |
710 | * The lower the sleep avg a task has the more | ||
711 | * rapidly it will rise with sleep time. | ||
712 | */ | ||
713 | sleep_time *= (MAX_BONUS - CURRENT_BONUS(p)) ? : 1; | ||
714 | |||
715 | /* | ||
716 | * Tasks waking from uninterruptible sleep are | 710 | * Tasks waking from uninterruptible sleep are |
717 | * limited in their sleep_avg rise as they | 711 | * limited in their sleep_avg rise as they |
718 | * are likely to be waiting on I/O | 712 | * are likely to be waiting on I/O |
diff --git a/kernel/softirq.c b/kernel/softirq.c index ad3295cdded5..ec8fed42a86f 100644 --- a/kernel/softirq.c +++ b/kernel/softirq.c | |||
@@ -16,6 +16,7 @@ | |||
16 | #include <linux/cpu.h> | 16 | #include <linux/cpu.h> |
17 | #include <linux/kthread.h> | 17 | #include <linux/kthread.h> |
18 | #include <linux/rcupdate.h> | 18 | #include <linux/rcupdate.h> |
19 | #include <linux/smp.h> | ||
19 | 20 | ||
20 | #include <asm/irq.h> | 21 | #include <asm/irq.h> |
21 | /* | 22 | /* |
@@ -495,3 +496,22 @@ __init int spawn_ksoftirqd(void) | |||
495 | register_cpu_notifier(&cpu_nfb); | 496 | register_cpu_notifier(&cpu_nfb); |
496 | return 0; | 497 | return 0; |
497 | } | 498 | } |
499 | |||
500 | #ifdef CONFIG_SMP | ||
501 | /* | ||
502 | * Call a function on all processors | ||
503 | */ | ||
504 | int on_each_cpu(void (*func) (void *info), void *info, int retry, int wait) | ||
505 | { | ||
506 | int ret = 0; | ||
507 | |||
508 | preempt_disable(); | ||
509 | ret = smp_call_function(func, info, retry, wait); | ||
510 | local_irq_disable(); | ||
511 | func(info); | ||
512 | local_irq_enable(); | ||
513 | preempt_enable(); | ||
514 | return ret; | ||
515 | } | ||
516 | EXPORT_SYMBOL(on_each_cpu); | ||
517 | #endif | ||
diff --git a/lib/string.c b/lib/string.c index 037a48acedbb..b3c28a3f6332 100644 --- a/lib/string.c +++ b/lib/string.c | |||
@@ -403,7 +403,6 @@ char *strpbrk(const char *cs, const char *ct) | |||
403 | } | 403 | } |
404 | return NULL; | 404 | return NULL; |
405 | } | 405 | } |
406 | EXPORT_SYMBOL(strpbrk); | ||
407 | #endif | 406 | #endif |
408 | 407 | ||
409 | #ifndef __HAVE_ARCH_STRSEP | 408 | #ifndef __HAVE_ARCH_STRSEP |
diff --git a/mm/Kconfig b/mm/Kconfig index a9cb80ae6409..bd80460360db 100644 --- a/mm/Kconfig +++ b/mm/Kconfig | |||
@@ -137,5 +137,11 @@ config SPLIT_PTLOCK_CPUS | |||
137 | # support for page migration | 137 | # support for page migration |
138 | # | 138 | # |
139 | config MIGRATION | 139 | config MIGRATION |
140 | bool "Page migration" | ||
140 | def_bool y if NUMA || SPARSEMEM || DISCONTIGMEM | 141 | def_bool y if NUMA || SPARSEMEM || DISCONTIGMEM |
141 | depends on SWAP | 142 | depends on SWAP |
143 | help | ||
144 | Allows the migration of the physical location of pages of processes | ||
145 | while the virtual addresses are not changed. This is useful for | ||
146 | example on NUMA systems to put pages nearer to the processors accessing | ||
147 | the page. | ||
diff --git a/mm/Makefile b/mm/Makefile index 9aa03fa1dcc3..f10c753dce6d 100644 --- a/mm/Makefile +++ b/mm/Makefile | |||
@@ -22,3 +22,5 @@ obj-$(CONFIG_SLOB) += slob.o | |||
22 | obj-$(CONFIG_SLAB) += slab.o | 22 | obj-$(CONFIG_SLAB) += slab.o |
23 | obj-$(CONFIG_MEMORY_HOTPLUG) += memory_hotplug.o | 23 | obj-$(CONFIG_MEMORY_HOTPLUG) += memory_hotplug.o |
24 | obj-$(CONFIG_FS_XIP) += filemap_xip.o | 24 | obj-$(CONFIG_FS_XIP) += filemap_xip.o |
25 | obj-$(CONFIG_MIGRATION) += migrate.o | ||
26 | |||
diff --git a/mm/filemap.c b/mm/filemap.c index 44da3d476994..e8f58f7dd7a5 100644 --- a/mm/filemap.c +++ b/mm/filemap.c | |||
@@ -30,6 +30,8 @@ | |||
30 | #include <linux/security.h> | 30 | #include <linux/security.h> |
31 | #include <linux/syscalls.h> | 31 | #include <linux/syscalls.h> |
32 | #include "filemap.h" | 32 | #include "filemap.h" |
33 | #include "internal.h" | ||
34 | |||
33 | /* | 35 | /* |
34 | * FIXME: remove all knowledge of the buffer layer from the core VM | 36 | * FIXME: remove all knowledge of the buffer layer from the core VM |
35 | */ | 37 | */ |
diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 508707704d2c..ebad6bbb3501 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c | |||
@@ -13,24 +13,48 @@ | |||
13 | #include <linux/pagemap.h> | 13 | #include <linux/pagemap.h> |
14 | #include <linux/mempolicy.h> | 14 | #include <linux/mempolicy.h> |
15 | #include <linux/cpuset.h> | 15 | #include <linux/cpuset.h> |
16 | #include <linux/mutex.h> | ||
16 | 17 | ||
17 | #include <asm/page.h> | 18 | #include <asm/page.h> |
18 | #include <asm/pgtable.h> | 19 | #include <asm/pgtable.h> |
19 | 20 | ||
20 | #include <linux/hugetlb.h> | 21 | #include <linux/hugetlb.h> |
22 | #include "internal.h" | ||
21 | 23 | ||
22 | const unsigned long hugetlb_zero = 0, hugetlb_infinity = ~0UL; | 24 | const unsigned long hugetlb_zero = 0, hugetlb_infinity = ~0UL; |
23 | static unsigned long nr_huge_pages, free_huge_pages; | 25 | static unsigned long nr_huge_pages, free_huge_pages, reserved_huge_pages; |
24 | unsigned long max_huge_pages; | 26 | unsigned long max_huge_pages; |
25 | static struct list_head hugepage_freelists[MAX_NUMNODES]; | 27 | static struct list_head hugepage_freelists[MAX_NUMNODES]; |
26 | static unsigned int nr_huge_pages_node[MAX_NUMNODES]; | 28 | static unsigned int nr_huge_pages_node[MAX_NUMNODES]; |
27 | static unsigned int free_huge_pages_node[MAX_NUMNODES]; | 29 | static unsigned int free_huge_pages_node[MAX_NUMNODES]; |
28 | |||
29 | /* | 30 | /* |
30 | * Protects updates to hugepage_freelists, nr_huge_pages, and free_huge_pages | 31 | * Protects updates to hugepage_freelists, nr_huge_pages, and free_huge_pages |
31 | */ | 32 | */ |
32 | static DEFINE_SPINLOCK(hugetlb_lock); | 33 | static DEFINE_SPINLOCK(hugetlb_lock); |
33 | 34 | ||
35 | static void clear_huge_page(struct page *page, unsigned long addr) | ||
36 | { | ||
37 | int i; | ||
38 | |||
39 | might_sleep(); | ||
40 | for (i = 0; i < (HPAGE_SIZE/PAGE_SIZE); i++) { | ||
41 | cond_resched(); | ||
42 | clear_user_highpage(page + i, addr); | ||
43 | } | ||
44 | } | ||
45 | |||
46 | static void copy_huge_page(struct page *dst, struct page *src, | ||
47 | unsigned long addr) | ||
48 | { | ||
49 | int i; | ||
50 | |||
51 | might_sleep(); | ||
52 | for (i = 0; i < HPAGE_SIZE/PAGE_SIZE; i++) { | ||
53 | cond_resched(); | ||
54 | copy_user_highpage(dst + i, src + i, addr + i*PAGE_SIZE); | ||
55 | } | ||
56 | } | ||
57 | |||
34 | static void enqueue_huge_page(struct page *page) | 58 | static void enqueue_huge_page(struct page *page) |
35 | { | 59 | { |
36 | int nid = page_to_nid(page); | 60 | int nid = page_to_nid(page); |
@@ -64,57 +88,176 @@ static struct page *dequeue_huge_page(struct vm_area_struct *vma, | |||
64 | return page; | 88 | return page; |
65 | } | 89 | } |
66 | 90 | ||
67 | static struct page *alloc_fresh_huge_page(void) | 91 | static void free_huge_page(struct page *page) |
92 | { | ||
93 | BUG_ON(page_count(page)); | ||
94 | |||
95 | INIT_LIST_HEAD(&page->lru); | ||
96 | |||
97 | spin_lock(&hugetlb_lock); | ||
98 | enqueue_huge_page(page); | ||
99 | spin_unlock(&hugetlb_lock); | ||
100 | } | ||
101 | |||
102 | static int alloc_fresh_huge_page(void) | ||
68 | { | 103 | { |
69 | static int nid = 0; | 104 | static int nid = 0; |
70 | struct page *page; | 105 | struct page *page; |
71 | page = alloc_pages_node(nid, GFP_HIGHUSER|__GFP_COMP|__GFP_NOWARN, | 106 | page = alloc_pages_node(nid, GFP_HIGHUSER|__GFP_COMP|__GFP_NOWARN, |
72 | HUGETLB_PAGE_ORDER); | 107 | HUGETLB_PAGE_ORDER); |
73 | nid = (nid + 1) % num_online_nodes(); | 108 | nid = next_node(nid, node_online_map); |
109 | if (nid == MAX_NUMNODES) | ||
110 | nid = first_node(node_online_map); | ||
74 | if (page) { | 111 | if (page) { |
112 | page[1].lru.next = (void *)free_huge_page; /* dtor */ | ||
75 | spin_lock(&hugetlb_lock); | 113 | spin_lock(&hugetlb_lock); |
76 | nr_huge_pages++; | 114 | nr_huge_pages++; |
77 | nr_huge_pages_node[page_to_nid(page)]++; | 115 | nr_huge_pages_node[page_to_nid(page)]++; |
78 | spin_unlock(&hugetlb_lock); | 116 | spin_unlock(&hugetlb_lock); |
117 | put_page(page); /* free it into the hugepage allocator */ | ||
118 | return 1; | ||
79 | } | 119 | } |
80 | return page; | 120 | return 0; |
81 | } | 121 | } |
82 | 122 | ||
83 | void free_huge_page(struct page *page) | 123 | static struct page *alloc_huge_page(struct vm_area_struct *vma, |
124 | unsigned long addr) | ||
84 | { | 125 | { |
85 | BUG_ON(page_count(page)); | 126 | struct inode *inode = vma->vm_file->f_dentry->d_inode; |
127 | struct page *page; | ||
128 | int use_reserve = 0; | ||
129 | unsigned long idx; | ||
86 | 130 | ||
87 | INIT_LIST_HEAD(&page->lru); | 131 | spin_lock(&hugetlb_lock); |
88 | page[1].lru.next = NULL; /* reset dtor */ | 132 | |
133 | if (vma->vm_flags & VM_MAYSHARE) { | ||
134 | |||
135 | /* idx = radix tree index, i.e. offset into file in | ||
136 | * HPAGE_SIZE units */ | ||
137 | idx = ((addr - vma->vm_start) >> HPAGE_SHIFT) | ||
138 | + (vma->vm_pgoff >> (HPAGE_SHIFT - PAGE_SHIFT)); | ||
139 | |||
140 | /* The hugetlbfs specific inode info stores the number | ||
141 | * of "guaranteed available" (huge) pages. That is, | ||
142 | * the first 'prereserved_hpages' pages of the inode | ||
143 | * are either already instantiated, or have been | ||
144 | * pre-reserved (by hugetlb_reserve_for_inode()). Here | ||
145 | * we're in the process of instantiating the page, so | ||
146 | * we use this to determine whether to draw from the | ||
147 | * pre-reserved pool or the truly free pool. */ | ||
148 | if (idx < HUGETLBFS_I(inode)->prereserved_hpages) | ||
149 | use_reserve = 1; | ||
150 | } | ||
151 | |||
152 | if (!use_reserve) { | ||
153 | if (free_huge_pages <= reserved_huge_pages) | ||
154 | goto fail; | ||
155 | } else { | ||
156 | BUG_ON(reserved_huge_pages == 0); | ||
157 | reserved_huge_pages--; | ||
158 | } | ||
159 | |||
160 | page = dequeue_huge_page(vma, addr); | ||
161 | if (!page) | ||
162 | goto fail; | ||
163 | |||
164 | spin_unlock(&hugetlb_lock); | ||
165 | set_page_refcounted(page); | ||
166 | return page; | ||
167 | |||
168 | fail: | ||
169 | WARN_ON(use_reserve); /* reserved allocations shouldn't fail */ | ||
170 | spin_unlock(&hugetlb_lock); | ||
171 | return NULL; | ||
172 | } | ||
173 | |||
174 | /* hugetlb_extend_reservation() | ||
175 | * | ||
176 | * Ensure that at least 'atleast' hugepages are, and will remain, | ||
177 | * available to instantiate the first 'atleast' pages of the given | ||
178 | * inode. If the inode doesn't already have this many pages reserved | ||
179 | * or instantiated, set aside some hugepages in the reserved pool to | ||
180 | * satisfy later faults (or fail now if there aren't enough, rather | ||
181 | * than getting the SIGBUS later). | ||
182 | */ | ||
183 | int hugetlb_extend_reservation(struct hugetlbfs_inode_info *info, | ||
184 | unsigned long atleast) | ||
185 | { | ||
186 | struct inode *inode = &info->vfs_inode; | ||
187 | unsigned long change_in_reserve = 0; | ||
188 | int ret = 0; | ||
89 | 189 | ||
90 | spin_lock(&hugetlb_lock); | 190 | spin_lock(&hugetlb_lock); |
91 | enqueue_huge_page(page); | 191 | read_lock_irq(&inode->i_mapping->tree_lock); |
192 | |||
193 | if (info->prereserved_hpages >= atleast) | ||
194 | goto out; | ||
195 | |||
196 | /* Because we always call this on shared mappings, none of the | ||
197 | * pages beyond info->prereserved_hpages can have been | ||
198 | * instantiated, so we need to reserve all of them now. */ | ||
199 | change_in_reserve = atleast - info->prereserved_hpages; | ||
200 | |||
201 | if ((reserved_huge_pages + change_in_reserve) > free_huge_pages) { | ||
202 | ret = -ENOMEM; | ||
203 | goto out; | ||
204 | } | ||
205 | |||
206 | reserved_huge_pages += change_in_reserve; | ||
207 | info->prereserved_hpages = atleast; | ||
208 | |||
209 | out: | ||
210 | read_unlock_irq(&inode->i_mapping->tree_lock); | ||
92 | spin_unlock(&hugetlb_lock); | 211 | spin_unlock(&hugetlb_lock); |
212 | |||
213 | return ret; | ||
93 | } | 214 | } |
94 | 215 | ||
95 | struct page *alloc_huge_page(struct vm_area_struct *vma, unsigned long addr) | 216 | /* hugetlb_truncate_reservation() |
217 | * | ||
218 | * This returns pages reserved for the given inode to the general free | ||
219 | * hugepage pool. If the inode has any pages prereserved, but not | ||
220 | * instantiated, beyond offset (atmost << HPAGE_SIZE), then release | ||
221 | * them. | ||
222 | */ | ||
223 | void hugetlb_truncate_reservation(struct hugetlbfs_inode_info *info, | ||
224 | unsigned long atmost) | ||
96 | { | 225 | { |
226 | struct inode *inode = &info->vfs_inode; | ||
227 | struct address_space *mapping = inode->i_mapping; | ||
228 | unsigned long idx; | ||
229 | unsigned long change_in_reserve = 0; | ||
97 | struct page *page; | 230 | struct page *page; |
98 | int i; | ||
99 | 231 | ||
100 | spin_lock(&hugetlb_lock); | 232 | spin_lock(&hugetlb_lock); |
101 | page = dequeue_huge_page(vma, addr); | 233 | read_lock_irq(&inode->i_mapping->tree_lock); |
102 | if (!page) { | 234 | |
103 | spin_unlock(&hugetlb_lock); | 235 | if (info->prereserved_hpages <= atmost) |
104 | return NULL; | 236 | goto out; |
237 | |||
238 | /* Count pages which were reserved, but not instantiated, and | ||
239 | * which we can now release. */ | ||
240 | for (idx = atmost; idx < info->prereserved_hpages; idx++) { | ||
241 | page = radix_tree_lookup(&mapping->page_tree, idx); | ||
242 | if (!page) | ||
243 | /* Pages which are already instantiated can't | ||
244 | * be unreserved (and in fact have already | ||
245 | * been removed from the reserved pool) */ | ||
246 | change_in_reserve++; | ||
105 | } | 247 | } |
248 | |||
249 | BUG_ON(reserved_huge_pages < change_in_reserve); | ||
250 | reserved_huge_pages -= change_in_reserve; | ||
251 | info->prereserved_hpages = atmost; | ||
252 | |||
253 | out: | ||
254 | read_unlock_irq(&inode->i_mapping->tree_lock); | ||
106 | spin_unlock(&hugetlb_lock); | 255 | spin_unlock(&hugetlb_lock); |
107 | set_page_count(page, 1); | ||
108 | page[1].lru.next = (void *)free_huge_page; /* set dtor */ | ||
109 | for (i = 0; i < (HPAGE_SIZE/PAGE_SIZE); ++i) | ||
110 | clear_user_highpage(&page[i], addr); | ||
111 | return page; | ||
112 | } | 256 | } |
113 | 257 | ||
114 | static int __init hugetlb_init(void) | 258 | static int __init hugetlb_init(void) |
115 | { | 259 | { |
116 | unsigned long i; | 260 | unsigned long i; |
117 | struct page *page; | ||
118 | 261 | ||
119 | if (HPAGE_SHIFT == 0) | 262 | if (HPAGE_SHIFT == 0) |
120 | return 0; | 263 | return 0; |
@@ -123,12 +266,8 @@ static int __init hugetlb_init(void) | |||
123 | INIT_LIST_HEAD(&hugepage_freelists[i]); | 266 | INIT_LIST_HEAD(&hugepage_freelists[i]); |
124 | 267 | ||
125 | for (i = 0; i < max_huge_pages; ++i) { | 268 | for (i = 0; i < max_huge_pages; ++i) { |
126 | page = alloc_fresh_huge_page(); | 269 | if (!alloc_fresh_huge_page()) |
127 | if (!page) | ||
128 | break; | 270 | break; |
129 | spin_lock(&hugetlb_lock); | ||
130 | enqueue_huge_page(page); | ||
131 | spin_unlock(&hugetlb_lock); | ||
132 | } | 271 | } |
133 | max_huge_pages = free_huge_pages = nr_huge_pages = i; | 272 | max_huge_pages = free_huge_pages = nr_huge_pages = i; |
134 | printk("Total HugeTLB memory allocated, %ld\n", free_huge_pages); | 273 | printk("Total HugeTLB memory allocated, %ld\n", free_huge_pages); |
@@ -154,9 +293,9 @@ static void update_and_free_page(struct page *page) | |||
154 | page[i].flags &= ~(1 << PG_locked | 1 << PG_error | 1 << PG_referenced | | 293 | page[i].flags &= ~(1 << PG_locked | 1 << PG_error | 1 << PG_referenced | |
155 | 1 << PG_dirty | 1 << PG_active | 1 << PG_reserved | | 294 | 1 << PG_dirty | 1 << PG_active | 1 << PG_reserved | |
156 | 1 << PG_private | 1<< PG_writeback); | 295 | 1 << PG_private | 1<< PG_writeback); |
157 | set_page_count(&page[i], 0); | ||
158 | } | 296 | } |
159 | set_page_count(page, 1); | 297 | page[1].lru.next = NULL; |
298 | set_page_refcounted(page); | ||
160 | __free_pages(page, HUGETLB_PAGE_ORDER); | 299 | __free_pages(page, HUGETLB_PAGE_ORDER); |
161 | } | 300 | } |
162 | 301 | ||
@@ -188,12 +327,8 @@ static inline void try_to_free_low(unsigned long count) | |||
188 | static unsigned long set_max_huge_pages(unsigned long count) | 327 | static unsigned long set_max_huge_pages(unsigned long count) |
189 | { | 328 | { |
190 | while (count > nr_huge_pages) { | 329 | while (count > nr_huge_pages) { |
191 | struct page *page = alloc_fresh_huge_page(); | 330 | if (!alloc_fresh_huge_page()) |
192 | if (!page) | ||
193 | return nr_huge_pages; | 331 | return nr_huge_pages; |
194 | spin_lock(&hugetlb_lock); | ||
195 | enqueue_huge_page(page); | ||
196 | spin_unlock(&hugetlb_lock); | ||
197 | } | 332 | } |
198 | if (count >= nr_huge_pages) | 333 | if (count >= nr_huge_pages) |
199 | return nr_huge_pages; | 334 | return nr_huge_pages; |
@@ -225,9 +360,11 @@ int hugetlb_report_meminfo(char *buf) | |||
225 | return sprintf(buf, | 360 | return sprintf(buf, |
226 | "HugePages_Total: %5lu\n" | 361 | "HugePages_Total: %5lu\n" |
227 | "HugePages_Free: %5lu\n" | 362 | "HugePages_Free: %5lu\n" |
363 | "HugePages_Rsvd: %5lu\n" | ||
228 | "Hugepagesize: %5lu kB\n", | 364 | "Hugepagesize: %5lu kB\n", |
229 | nr_huge_pages, | 365 | nr_huge_pages, |
230 | free_huge_pages, | 366 | free_huge_pages, |
367 | reserved_huge_pages, | ||
231 | HPAGE_SIZE/1024); | 368 | HPAGE_SIZE/1024); |
232 | } | 369 | } |
233 | 370 | ||
@@ -240,11 +377,6 @@ int hugetlb_report_node_meminfo(int nid, char *buf) | |||
240 | nid, free_huge_pages_node[nid]); | 377 | nid, free_huge_pages_node[nid]); |
241 | } | 378 | } |
242 | 379 | ||
243 | int is_hugepage_mem_enough(size_t size) | ||
244 | { | ||
245 | return (size + ~HPAGE_MASK)/HPAGE_SIZE <= free_huge_pages; | ||
246 | } | ||
247 | |||
248 | /* Return the number pages of memory we physically have, in PAGE_SIZE units. */ | 380 | /* Return the number pages of memory we physically have, in PAGE_SIZE units. */ |
249 | unsigned long hugetlb_total_pages(void) | 381 | unsigned long hugetlb_total_pages(void) |
250 | { | 382 | { |
@@ -374,7 +506,7 @@ static int hugetlb_cow(struct mm_struct *mm, struct vm_area_struct *vma, | |||
374 | unsigned long address, pte_t *ptep, pte_t pte) | 506 | unsigned long address, pte_t *ptep, pte_t pte) |
375 | { | 507 | { |
376 | struct page *old_page, *new_page; | 508 | struct page *old_page, *new_page; |
377 | int i, avoidcopy; | 509 | int avoidcopy; |
378 | 510 | ||
379 | old_page = pte_page(pte); | 511 | old_page = pte_page(pte); |
380 | 512 | ||
@@ -395,9 +527,7 @@ static int hugetlb_cow(struct mm_struct *mm, struct vm_area_struct *vma, | |||
395 | } | 527 | } |
396 | 528 | ||
397 | spin_unlock(&mm->page_table_lock); | 529 | spin_unlock(&mm->page_table_lock); |
398 | for (i = 0; i < HPAGE_SIZE/PAGE_SIZE; i++) | 530 | copy_huge_page(new_page, old_page, address); |
399 | copy_user_highpage(new_page + i, old_page + i, | ||
400 | address + i*PAGE_SIZE); | ||
401 | spin_lock(&mm->page_table_lock); | 531 | spin_lock(&mm->page_table_lock); |
402 | 532 | ||
403 | ptep = huge_pte_offset(mm, address & HPAGE_MASK); | 533 | ptep = huge_pte_offset(mm, address & HPAGE_MASK); |
@@ -442,6 +572,7 @@ retry: | |||
442 | ret = VM_FAULT_OOM; | 572 | ret = VM_FAULT_OOM; |
443 | goto out; | 573 | goto out; |
444 | } | 574 | } |
575 | clear_huge_page(page, address); | ||
445 | 576 | ||
446 | if (vma->vm_flags & VM_SHARED) { | 577 | if (vma->vm_flags & VM_SHARED) { |
447 | int err; | 578 | int err; |
@@ -496,14 +627,24 @@ int hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma, | |||
496 | pte_t *ptep; | 627 | pte_t *ptep; |
497 | pte_t entry; | 628 | pte_t entry; |
498 | int ret; | 629 | int ret; |
630 | static DEFINE_MUTEX(hugetlb_instantiation_mutex); | ||
499 | 631 | ||
500 | ptep = huge_pte_alloc(mm, address); | 632 | ptep = huge_pte_alloc(mm, address); |
501 | if (!ptep) | 633 | if (!ptep) |
502 | return VM_FAULT_OOM; | 634 | return VM_FAULT_OOM; |
503 | 635 | ||
636 | /* | ||
637 | * Serialize hugepage allocation and instantiation, so that we don't | ||
638 | * get spurious allocation failures if two CPUs race to instantiate | ||
639 | * the same page in the page cache. | ||
640 | */ | ||
641 | mutex_lock(&hugetlb_instantiation_mutex); | ||
504 | entry = *ptep; | 642 | entry = *ptep; |
505 | if (pte_none(entry)) | 643 | if (pte_none(entry)) { |
506 | return hugetlb_no_page(mm, vma, address, ptep, write_access); | 644 | ret = hugetlb_no_page(mm, vma, address, ptep, write_access); |
645 | mutex_unlock(&hugetlb_instantiation_mutex); | ||
646 | return ret; | ||
647 | } | ||
507 | 648 | ||
508 | ret = VM_FAULT_MINOR; | 649 | ret = VM_FAULT_MINOR; |
509 | 650 | ||
@@ -513,6 +654,7 @@ int hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma, | |||
513 | if (write_access && !pte_write(entry)) | 654 | if (write_access && !pte_write(entry)) |
514 | ret = hugetlb_cow(mm, vma, address, ptep, entry); | 655 | ret = hugetlb_cow(mm, vma, address, ptep, entry); |
515 | spin_unlock(&mm->page_table_lock); | 656 | spin_unlock(&mm->page_table_lock); |
657 | mutex_unlock(&hugetlb_instantiation_mutex); | ||
516 | 658 | ||
517 | return ret; | 659 | return ret; |
518 | } | 660 | } |
@@ -521,10 +663,10 @@ int follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma, | |||
521 | struct page **pages, struct vm_area_struct **vmas, | 663 | struct page **pages, struct vm_area_struct **vmas, |
522 | unsigned long *position, int *length, int i) | 664 | unsigned long *position, int *length, int i) |
523 | { | 665 | { |
524 | unsigned long vpfn, vaddr = *position; | 666 | unsigned long pfn_offset; |
667 | unsigned long vaddr = *position; | ||
525 | int remainder = *length; | 668 | int remainder = *length; |
526 | 669 | ||
527 | vpfn = vaddr/PAGE_SIZE; | ||
528 | spin_lock(&mm->page_table_lock); | 670 | spin_lock(&mm->page_table_lock); |
529 | while (vaddr < vma->vm_end && remainder) { | 671 | while (vaddr < vma->vm_end && remainder) { |
530 | pte_t *pte; | 672 | pte_t *pte; |
@@ -552,19 +694,28 @@ int follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma, | |||
552 | break; | 694 | break; |
553 | } | 695 | } |
554 | 696 | ||
555 | if (pages) { | 697 | pfn_offset = (vaddr & ~HPAGE_MASK) >> PAGE_SHIFT; |
556 | page = &pte_page(*pte)[vpfn % (HPAGE_SIZE/PAGE_SIZE)]; | 698 | page = pte_page(*pte); |
557 | get_page(page); | 699 | same_page: |
558 | pages[i] = page; | 700 | get_page(page); |
559 | } | 701 | if (pages) |
702 | pages[i] = page + pfn_offset; | ||
560 | 703 | ||
561 | if (vmas) | 704 | if (vmas) |
562 | vmas[i] = vma; | 705 | vmas[i] = vma; |
563 | 706 | ||
564 | vaddr += PAGE_SIZE; | 707 | vaddr += PAGE_SIZE; |
565 | ++vpfn; | 708 | ++pfn_offset; |
566 | --remainder; | 709 | --remainder; |
567 | ++i; | 710 | ++i; |
711 | if (vaddr < vma->vm_end && remainder && | ||
712 | pfn_offset < HPAGE_SIZE/PAGE_SIZE) { | ||
713 | /* | ||
714 | * We use pfn_offset to avoid touching the pageframes | ||
715 | * of this compound page. | ||
716 | */ | ||
717 | goto same_page; | ||
718 | } | ||
568 | } | 719 | } |
569 | spin_unlock(&mm->page_table_lock); | 720 | spin_unlock(&mm->page_table_lock); |
570 | *length = remainder; | 721 | *length = remainder; |
@@ -572,3 +723,32 @@ int follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma, | |||
572 | 723 | ||
573 | return i; | 724 | return i; |
574 | } | 725 | } |
726 | |||
727 | void hugetlb_change_protection(struct vm_area_struct *vma, | ||
728 | unsigned long address, unsigned long end, pgprot_t newprot) | ||
729 | { | ||
730 | struct mm_struct *mm = vma->vm_mm; | ||
731 | unsigned long start = address; | ||
732 | pte_t *ptep; | ||
733 | pte_t pte; | ||
734 | |||
735 | BUG_ON(address >= end); | ||
736 | flush_cache_range(vma, address, end); | ||
737 | |||
738 | spin_lock(&mm->page_table_lock); | ||
739 | for (; address < end; address += HPAGE_SIZE) { | ||
740 | ptep = huge_pte_offset(mm, address); | ||
741 | if (!ptep) | ||
742 | continue; | ||
743 | if (!pte_none(*ptep)) { | ||
744 | pte = huge_ptep_get_and_clear(mm, address, ptep); | ||
745 | pte = pte_mkhuge(pte_modify(pte, newprot)); | ||
746 | set_huge_pte_at(mm, address, ptep, pte); | ||
747 | lazy_mmu_prot_update(pte); | ||
748 | } | ||
749 | } | ||
750 | spin_unlock(&mm->page_table_lock); | ||
751 | |||
752 | flush_tlb_range(vma, start, end); | ||
753 | } | ||
754 | |||
diff --git a/mm/internal.h b/mm/internal.h index 17256bb2f4ef..d20e3cc4aef0 100644 --- a/mm/internal.h +++ b/mm/internal.h | |||
@@ -8,23 +8,33 @@ | |||
8 | * as published by the Free Software Foundation; either version | 8 | * as published by the Free Software Foundation; either version |
9 | * 2 of the License, or (at your option) any later version. | 9 | * 2 of the License, or (at your option) any later version. |
10 | */ | 10 | */ |
11 | #ifndef __MM_INTERNAL_H | ||
12 | #define __MM_INTERNAL_H | ||
11 | 13 | ||
12 | static inline void set_page_refs(struct page *page, int order) | 14 | #include <linux/mm.h> |
15 | |||
16 | static inline void set_page_count(struct page *page, int v) | ||
17 | { | ||
18 | atomic_set(&page->_count, v); | ||
19 | } | ||
20 | |||
21 | /* | ||
22 | * Turn a non-refcounted page (->_count == 0) into refcounted with | ||
23 | * a count of one. | ||
24 | */ | ||
25 | static inline void set_page_refcounted(struct page *page) | ||
13 | { | 26 | { |
14 | #ifdef CONFIG_MMU | 27 | BUG_ON(PageCompound(page) && page_private(page) != (unsigned long)page); |
28 | BUG_ON(atomic_read(&page->_count)); | ||
15 | set_page_count(page, 1); | 29 | set_page_count(page, 1); |
16 | #else | 30 | } |
17 | int i; | ||
18 | 31 | ||
19 | /* | 32 | static inline void __put_page(struct page *page) |
20 | * We need to reference all the pages for this order, otherwise if | 33 | { |
21 | * anyone accesses one of the pages with (get/put) it will be freed. | 34 | atomic_dec(&page->_count); |
22 | * - eg: access_process_vm() | ||
23 | */ | ||
24 | for (i = 0; i < (1 << order); i++) | ||
25 | set_page_count(page + i, 1); | ||
26 | #endif /* CONFIG_MMU */ | ||
27 | } | 35 | } |
28 | 36 | ||
29 | extern void fastcall __init __free_pages_bootmem(struct page *page, | 37 | extern void fastcall __init __free_pages_bootmem(struct page *page, |
30 | unsigned int order); | 38 | unsigned int order); |
39 | |||
40 | #endif | ||
diff --git a/mm/memory.c b/mm/memory.c index 85e80a57db29..80c3fb370f91 100644 --- a/mm/memory.c +++ b/mm/memory.c | |||
@@ -277,7 +277,7 @@ void free_pgtables(struct mmu_gather **tlb, struct vm_area_struct *vma, | |||
277 | anon_vma_unlink(vma); | 277 | anon_vma_unlink(vma); |
278 | unlink_file_vma(vma); | 278 | unlink_file_vma(vma); |
279 | 279 | ||
280 | if (is_hugepage_only_range(vma->vm_mm, addr, HPAGE_SIZE)) { | 280 | if (is_vm_hugetlb_page(vma)) { |
281 | hugetlb_free_pgd_range(tlb, addr, vma->vm_end, | 281 | hugetlb_free_pgd_range(tlb, addr, vma->vm_end, |
282 | floor, next? next->vm_start: ceiling); | 282 | floor, next? next->vm_start: ceiling); |
283 | } else { | 283 | } else { |
@@ -285,8 +285,7 @@ void free_pgtables(struct mmu_gather **tlb, struct vm_area_struct *vma, | |||
285 | * Optimization: gather nearby vmas into one call down | 285 | * Optimization: gather nearby vmas into one call down |
286 | */ | 286 | */ |
287 | while (next && next->vm_start <= vma->vm_end + PMD_SIZE | 287 | while (next && next->vm_start <= vma->vm_end + PMD_SIZE |
288 | && !is_hugepage_only_range(vma->vm_mm, next->vm_start, | 288 | && !is_vm_hugetlb_page(next)) { |
289 | HPAGE_SIZE)) { | ||
290 | vma = next; | 289 | vma = next; |
291 | next = vma->vm_next; | 290 | next = vma->vm_next; |
292 | anon_vma_unlink(vma); | 291 | anon_vma_unlink(vma); |
@@ -388,7 +387,7 @@ struct page *vm_normal_page(struct vm_area_struct *vma, unsigned long addr, pte_ | |||
388 | { | 387 | { |
389 | unsigned long pfn = pte_pfn(pte); | 388 | unsigned long pfn = pte_pfn(pte); |
390 | 389 | ||
391 | if (vma->vm_flags & VM_PFNMAP) { | 390 | if (unlikely(vma->vm_flags & VM_PFNMAP)) { |
392 | unsigned long off = (addr - vma->vm_start) >> PAGE_SHIFT; | 391 | unsigned long off = (addr - vma->vm_start) >> PAGE_SHIFT; |
393 | if (pfn == vma->vm_pgoff + off) | 392 | if (pfn == vma->vm_pgoff + off) |
394 | return NULL; | 393 | return NULL; |
@@ -396,18 +395,12 @@ struct page *vm_normal_page(struct vm_area_struct *vma, unsigned long addr, pte_ | |||
396 | return NULL; | 395 | return NULL; |
397 | } | 396 | } |
398 | 397 | ||
399 | /* | 398 | #ifdef CONFIG_DEBUG_VM |
400 | * Add some anal sanity checks for now. Eventually, | ||
401 | * we should just do "return pfn_to_page(pfn)", but | ||
402 | * in the meantime we check that we get a valid pfn, | ||
403 | * and that the resulting page looks ok. | ||
404 | * | ||
405 | * Remove this test eventually! | ||
406 | */ | ||
407 | if (unlikely(!pfn_valid(pfn))) { | 399 | if (unlikely(!pfn_valid(pfn))) { |
408 | print_bad_pte(vma, pte, addr); | 400 | print_bad_pte(vma, pte, addr); |
409 | return NULL; | 401 | return NULL; |
410 | } | 402 | } |
403 | #endif | ||
411 | 404 | ||
412 | /* | 405 | /* |
413 | * NOTE! We still have PageReserved() pages in the page | 406 | * NOTE! We still have PageReserved() pages in the page |
@@ -1221,9 +1214,7 @@ out: | |||
1221 | * The page has to be a nice clean _individual_ kernel allocation. | 1214 | * The page has to be a nice clean _individual_ kernel allocation. |
1222 | * If you allocate a compound page, you need to have marked it as | 1215 | * If you allocate a compound page, you need to have marked it as |
1223 | * such (__GFP_COMP), or manually just split the page up yourself | 1216 | * such (__GFP_COMP), or manually just split the page up yourself |
1224 | * (which is mainly an issue of doing "set_page_count(page, 1)" for | 1217 | * (see split_page()). |
1225 | * each sub-page, and then freeing them one by one when you free | ||
1226 | * them rather than freeing it as a compound page). | ||
1227 | * | 1218 | * |
1228 | * NOTE! Traditionally this was done with "remap_pfn_range()" which | 1219 | * NOTE! Traditionally this was done with "remap_pfn_range()" which |
1229 | * took an arbitrary page protection parameter. This doesn't allow | 1220 | * took an arbitrary page protection parameter. This doesn't allow |
diff --git a/mm/mempolicy.c b/mm/mempolicy.c index b21869a39f0b..e93cc740c22b 100644 --- a/mm/mempolicy.c +++ b/mm/mempolicy.c | |||
@@ -86,6 +86,7 @@ | |||
86 | #include <linux/swap.h> | 86 | #include <linux/swap.h> |
87 | #include <linux/seq_file.h> | 87 | #include <linux/seq_file.h> |
88 | #include <linux/proc_fs.h> | 88 | #include <linux/proc_fs.h> |
89 | #include <linux/migrate.h> | ||
89 | 90 | ||
90 | #include <asm/tlbflush.h> | 91 | #include <asm/tlbflush.h> |
91 | #include <asm/uaccess.h> | 92 | #include <asm/uaccess.h> |
@@ -95,11 +96,8 @@ | |||
95 | #define MPOL_MF_INVERT (MPOL_MF_INTERNAL << 1) /* Invert check for nodemask */ | 96 | #define MPOL_MF_INVERT (MPOL_MF_INTERNAL << 1) /* Invert check for nodemask */ |
96 | #define MPOL_MF_STATS (MPOL_MF_INTERNAL << 2) /* Gather statistics */ | 97 | #define MPOL_MF_STATS (MPOL_MF_INTERNAL << 2) /* Gather statistics */ |
97 | 98 | ||
98 | /* The number of pages to migrate per call to migrate_pages() */ | 99 | static struct kmem_cache *policy_cache; |
99 | #define MIGRATE_CHUNK_SIZE 256 | 100 | static struct kmem_cache *sn_cache; |
100 | |||
101 | static kmem_cache_t *policy_cache; | ||
102 | static kmem_cache_t *sn_cache; | ||
103 | 101 | ||
104 | #define PDprintk(fmt...) | 102 | #define PDprintk(fmt...) |
105 | 103 | ||
@@ -331,17 +329,10 @@ check_range(struct mm_struct *mm, unsigned long start, unsigned long end, | |||
331 | struct vm_area_struct *first, *vma, *prev; | 329 | struct vm_area_struct *first, *vma, *prev; |
332 | 330 | ||
333 | if (flags & (MPOL_MF_MOVE | MPOL_MF_MOVE_ALL)) { | 331 | if (flags & (MPOL_MF_MOVE | MPOL_MF_MOVE_ALL)) { |
334 | /* Must have swap device for migration */ | ||
335 | if (nr_swap_pages <= 0) | ||
336 | return ERR_PTR(-ENODEV); | ||
337 | 332 | ||
338 | /* | 333 | err = migrate_prep(); |
339 | * Clear the LRU lists so pages can be isolated. | 334 | if (err) |
340 | * Note that pages may be moved off the LRU after we have | 335 | return ERR_PTR(err); |
341 | * drained them. Those pages will fail to migrate like other | ||
342 | * pages that may be busy. | ||
343 | */ | ||
344 | lru_add_drain_all(); | ||
345 | } | 336 | } |
346 | 337 | ||
347 | first = find_vma(mm, start); | 338 | first = find_vma(mm, start); |
@@ -550,92 +541,18 @@ long do_get_mempolicy(int *policy, nodemask_t *nmask, | |||
550 | return err; | 541 | return err; |
551 | } | 542 | } |
552 | 543 | ||
544 | #ifdef CONFIG_MIGRATION | ||
553 | /* | 545 | /* |
554 | * page migration | 546 | * page migration |
555 | */ | 547 | */ |
556 | |||
557 | static void migrate_page_add(struct page *page, struct list_head *pagelist, | 548 | static void migrate_page_add(struct page *page, struct list_head *pagelist, |
558 | unsigned long flags) | 549 | unsigned long flags) |
559 | { | 550 | { |
560 | /* | 551 | /* |
561 | * Avoid migrating a page that is shared with others. | 552 | * Avoid migrating a page that is shared with others. |
562 | */ | 553 | */ |
563 | if ((flags & MPOL_MF_MOVE_ALL) || page_mapcount(page) == 1) { | 554 | if ((flags & MPOL_MF_MOVE_ALL) || page_mapcount(page) == 1) |
564 | if (isolate_lru_page(page)) | 555 | isolate_lru_page(page, pagelist); |
565 | list_add_tail(&page->lru, pagelist); | ||
566 | } | ||
567 | } | ||
568 | |||
569 | /* | ||
570 | * Migrate the list 'pagelist' of pages to a certain destination. | ||
571 | * | ||
572 | * Specify destination with either non-NULL vma or dest_node >= 0 | ||
573 | * Return the number of pages not migrated or error code | ||
574 | */ | ||
575 | static int migrate_pages_to(struct list_head *pagelist, | ||
576 | struct vm_area_struct *vma, int dest) | ||
577 | { | ||
578 | LIST_HEAD(newlist); | ||
579 | LIST_HEAD(moved); | ||
580 | LIST_HEAD(failed); | ||
581 | int err = 0; | ||
582 | unsigned long offset = 0; | ||
583 | int nr_pages; | ||
584 | struct page *page; | ||
585 | struct list_head *p; | ||
586 | |||
587 | redo: | ||
588 | nr_pages = 0; | ||
589 | list_for_each(p, pagelist) { | ||
590 | if (vma) { | ||
591 | /* | ||
592 | * The address passed to alloc_page_vma is used to | ||
593 | * generate the proper interleave behavior. We fake | ||
594 | * the address here by an increasing offset in order | ||
595 | * to get the proper distribution of pages. | ||
596 | * | ||
597 | * No decision has been made as to which page | ||
598 | * a certain old page is moved to so we cannot | ||
599 | * specify the correct address. | ||
600 | */ | ||
601 | page = alloc_page_vma(GFP_HIGHUSER, vma, | ||
602 | offset + vma->vm_start); | ||
603 | offset += PAGE_SIZE; | ||
604 | } | ||
605 | else | ||
606 | page = alloc_pages_node(dest, GFP_HIGHUSER, 0); | ||
607 | |||
608 | if (!page) { | ||
609 | err = -ENOMEM; | ||
610 | goto out; | ||
611 | } | ||
612 | list_add_tail(&page->lru, &newlist); | ||
613 | nr_pages++; | ||
614 | if (nr_pages > MIGRATE_CHUNK_SIZE) | ||
615 | break; | ||
616 | } | ||
617 | err = migrate_pages(pagelist, &newlist, &moved, &failed); | ||
618 | |||
619 | putback_lru_pages(&moved); /* Call release pages instead ?? */ | ||
620 | |||
621 | if (err >= 0 && list_empty(&newlist) && !list_empty(pagelist)) | ||
622 | goto redo; | ||
623 | out: | ||
624 | /* Return leftover allocated pages */ | ||
625 | while (!list_empty(&newlist)) { | ||
626 | page = list_entry(newlist.next, struct page, lru); | ||
627 | list_del(&page->lru); | ||
628 | __free_page(page); | ||
629 | } | ||
630 | list_splice(&failed, pagelist); | ||
631 | if (err < 0) | ||
632 | return err; | ||
633 | |||
634 | /* Calculate number of leftover pages */ | ||
635 | nr_pages = 0; | ||
636 | list_for_each(p, pagelist) | ||
637 | nr_pages++; | ||
638 | return nr_pages; | ||
639 | } | 556 | } |
640 | 557 | ||
641 | /* | 558 | /* |
@@ -742,8 +659,23 @@ int do_migrate_pages(struct mm_struct *mm, | |||
742 | if (err < 0) | 659 | if (err < 0) |
743 | return err; | 660 | return err; |
744 | return busy; | 661 | return busy; |
662 | |||
745 | } | 663 | } |
746 | 664 | ||
665 | #else | ||
666 | |||
667 | static void migrate_page_add(struct page *page, struct list_head *pagelist, | ||
668 | unsigned long flags) | ||
669 | { | ||
670 | } | ||
671 | |||
672 | int do_migrate_pages(struct mm_struct *mm, | ||
673 | const nodemask_t *from_nodes, const nodemask_t *to_nodes, int flags) | ||
674 | { | ||
675 | return -ENOSYS; | ||
676 | } | ||
677 | #endif | ||
678 | |||
747 | long do_mbind(unsigned long start, unsigned long len, | 679 | long do_mbind(unsigned long start, unsigned long len, |
748 | unsigned long mode, nodemask_t *nmask, unsigned long flags) | 680 | unsigned long mode, nodemask_t *nmask, unsigned long flags) |
749 | { | 681 | { |
@@ -808,6 +740,7 @@ long do_mbind(unsigned long start, unsigned long len, | |||
808 | if (!err && nr_failed && (flags & MPOL_MF_STRICT)) | 740 | if (!err && nr_failed && (flags & MPOL_MF_STRICT)) |
809 | err = -EIO; | 741 | err = -EIO; |
810 | } | 742 | } |
743 | |||
811 | if (!list_empty(&pagelist)) | 744 | if (!list_empty(&pagelist)) |
812 | putback_lru_pages(&pagelist); | 745 | putback_lru_pages(&pagelist); |
813 | 746 | ||
diff --git a/mm/mempool.c b/mm/mempool.c index 1a99b80480d3..f71893ed3543 100644 --- a/mm/mempool.c +++ b/mm/mempool.c | |||
@@ -278,14 +278,14 @@ EXPORT_SYMBOL(mempool_free); | |||
278 | */ | 278 | */ |
279 | void *mempool_alloc_slab(gfp_t gfp_mask, void *pool_data) | 279 | void *mempool_alloc_slab(gfp_t gfp_mask, void *pool_data) |
280 | { | 280 | { |
281 | kmem_cache_t *mem = (kmem_cache_t *) pool_data; | 281 | struct kmem_cache *mem = pool_data; |
282 | return kmem_cache_alloc(mem, gfp_mask); | 282 | return kmem_cache_alloc(mem, gfp_mask); |
283 | } | 283 | } |
284 | EXPORT_SYMBOL(mempool_alloc_slab); | 284 | EXPORT_SYMBOL(mempool_alloc_slab); |
285 | 285 | ||
286 | void mempool_free_slab(void *element, void *pool_data) | 286 | void mempool_free_slab(void *element, void *pool_data) |
287 | { | 287 | { |
288 | kmem_cache_t *mem = (kmem_cache_t *) pool_data; | 288 | struct kmem_cache *mem = pool_data; |
289 | kmem_cache_free(mem, element); | 289 | kmem_cache_free(mem, element); |
290 | } | 290 | } |
291 | EXPORT_SYMBOL(mempool_free_slab); | 291 | EXPORT_SYMBOL(mempool_free_slab); |
diff --git a/mm/migrate.c b/mm/migrate.c new file mode 100644 index 000000000000..09f6e4aa87fc --- /dev/null +++ b/mm/migrate.c | |||
@@ -0,0 +1,655 @@ | |||
1 | /* | ||
2 | * Memory Migration functionality - linux/mm/migration.c | ||
3 | * | ||
4 | * Copyright (C) 2006 Silicon Graphics, Inc., Christoph Lameter | ||
5 | * | ||
6 | * Page migration was first developed in the context of the memory hotplug | ||
7 | * project. The main authors of the migration code are: | ||
8 | * | ||
9 | * IWAMOTO Toshihiro <iwamoto@valinux.co.jp> | ||
10 | * Hirokazu Takahashi <taka@valinux.co.jp> | ||
11 | * Dave Hansen <haveblue@us.ibm.com> | ||
12 | * Christoph Lameter <clameter@sgi.com> | ||
13 | */ | ||
14 | |||
15 | #include <linux/migrate.h> | ||
16 | #include <linux/module.h> | ||
17 | #include <linux/swap.h> | ||
18 | #include <linux/pagemap.h> | ||
19 | #include <linux/buffer_head.h> /* for try_to_release_page(), | ||
20 | buffer_heads_over_limit */ | ||
21 | #include <linux/mm_inline.h> | ||
22 | #include <linux/pagevec.h> | ||
23 | #include <linux/rmap.h> | ||
24 | #include <linux/topology.h> | ||
25 | #include <linux/cpu.h> | ||
26 | #include <linux/cpuset.h> | ||
27 | #include <linux/swapops.h> | ||
28 | |||
29 | #include "internal.h" | ||
30 | |||
31 | #include "internal.h" | ||
32 | |||
33 | /* The maximum number of pages to take off the LRU for migration */ | ||
34 | #define MIGRATE_CHUNK_SIZE 256 | ||
35 | |||
36 | #define lru_to_page(_head) (list_entry((_head)->prev, struct page, lru)) | ||
37 | |||
38 | /* | ||
39 | * Isolate one page from the LRU lists. If successful put it onto | ||
40 | * the indicated list with elevated page count. | ||
41 | * | ||
42 | * Result: | ||
43 | * -EBUSY: page not on LRU list | ||
44 | * 0: page removed from LRU list and added to the specified list. | ||
45 | */ | ||
46 | int isolate_lru_page(struct page *page, struct list_head *pagelist) | ||
47 | { | ||
48 | int ret = -EBUSY; | ||
49 | |||
50 | if (PageLRU(page)) { | ||
51 | struct zone *zone = page_zone(page); | ||
52 | |||
53 | spin_lock_irq(&zone->lru_lock); | ||
54 | if (PageLRU(page)) { | ||
55 | ret = 0; | ||
56 | get_page(page); | ||
57 | ClearPageLRU(page); | ||
58 | if (PageActive(page)) | ||
59 | del_page_from_active_list(zone, page); | ||
60 | else | ||
61 | del_page_from_inactive_list(zone, page); | ||
62 | list_add_tail(&page->lru, pagelist); | ||
63 | } | ||
64 | spin_unlock_irq(&zone->lru_lock); | ||
65 | } | ||
66 | return ret; | ||
67 | } | ||
68 | |||
69 | /* | ||
70 | * migrate_prep() needs to be called after we have compiled the list of pages | ||
71 | * to be migrated using isolate_lru_page() but before we begin a series of calls | ||
72 | * to migrate_pages(). | ||
73 | */ | ||
74 | int migrate_prep(void) | ||
75 | { | ||
76 | /* Must have swap device for migration */ | ||
77 | if (nr_swap_pages <= 0) | ||
78 | return -ENODEV; | ||
79 | |||
80 | /* | ||
81 | * Clear the LRU lists so pages can be isolated. | ||
82 | * Note that pages may be moved off the LRU after we have | ||
83 | * drained them. Those pages will fail to migrate like other | ||
84 | * pages that may be busy. | ||
85 | */ | ||
86 | lru_add_drain_all(); | ||
87 | |||
88 | return 0; | ||
89 | } | ||
90 | |||
91 | static inline void move_to_lru(struct page *page) | ||
92 | { | ||
93 | list_del(&page->lru); | ||
94 | if (PageActive(page)) { | ||
95 | /* | ||
96 | * lru_cache_add_active checks that | ||
97 | * the PG_active bit is off. | ||
98 | */ | ||
99 | ClearPageActive(page); | ||
100 | lru_cache_add_active(page); | ||
101 | } else { | ||
102 | lru_cache_add(page); | ||
103 | } | ||
104 | put_page(page); | ||
105 | } | ||
106 | |||
107 | /* | ||
108 | * Add isolated pages on the list back to the LRU. | ||
109 | * | ||
110 | * returns the number of pages put back. | ||
111 | */ | ||
112 | int putback_lru_pages(struct list_head *l) | ||
113 | { | ||
114 | struct page *page; | ||
115 | struct page *page2; | ||
116 | int count = 0; | ||
117 | |||
118 | list_for_each_entry_safe(page, page2, l, lru) { | ||
119 | move_to_lru(page); | ||
120 | count++; | ||
121 | } | ||
122 | return count; | ||
123 | } | ||
124 | |||
125 | /* | ||
126 | * Non migratable page | ||
127 | */ | ||
128 | int fail_migrate_page(struct page *newpage, struct page *page) | ||
129 | { | ||
130 | return -EIO; | ||
131 | } | ||
132 | EXPORT_SYMBOL(fail_migrate_page); | ||
133 | |||
134 | /* | ||
135 | * swapout a single page | ||
136 | * page is locked upon entry, unlocked on exit | ||
137 | */ | ||
138 | static int swap_page(struct page *page) | ||
139 | { | ||
140 | struct address_space *mapping = page_mapping(page); | ||
141 | |||
142 | if (page_mapped(page) && mapping) | ||
143 | if (try_to_unmap(page, 1) != SWAP_SUCCESS) | ||
144 | goto unlock_retry; | ||
145 | |||
146 | if (PageDirty(page)) { | ||
147 | /* Page is dirty, try to write it out here */ | ||
148 | switch(pageout(page, mapping)) { | ||
149 | case PAGE_KEEP: | ||
150 | case PAGE_ACTIVATE: | ||
151 | goto unlock_retry; | ||
152 | |||
153 | case PAGE_SUCCESS: | ||
154 | goto retry; | ||
155 | |||
156 | case PAGE_CLEAN: | ||
157 | ; /* try to free the page below */ | ||
158 | } | ||
159 | } | ||
160 | |||
161 | if (PagePrivate(page)) { | ||
162 | if (!try_to_release_page(page, GFP_KERNEL) || | ||
163 | (!mapping && page_count(page) == 1)) | ||
164 | goto unlock_retry; | ||
165 | } | ||
166 | |||
167 | if (remove_mapping(mapping, page)) { | ||
168 | /* Success */ | ||
169 | unlock_page(page); | ||
170 | return 0; | ||
171 | } | ||
172 | |||
173 | unlock_retry: | ||
174 | unlock_page(page); | ||
175 | |||
176 | retry: | ||
177 | return -EAGAIN; | ||
178 | } | ||
179 | EXPORT_SYMBOL(swap_page); | ||
180 | |||
181 | /* | ||
182 | * Remove references for a page and establish the new page with the correct | ||
183 | * basic settings to be able to stop accesses to the page. | ||
184 | */ | ||
185 | int migrate_page_remove_references(struct page *newpage, | ||
186 | struct page *page, int nr_refs) | ||
187 | { | ||
188 | struct address_space *mapping = page_mapping(page); | ||
189 | struct page **radix_pointer; | ||
190 | |||
191 | /* | ||
192 | * Avoid doing any of the following work if the page count | ||
193 | * indicates that the page is in use or truncate has removed | ||
194 | * the page. | ||
195 | */ | ||
196 | if (!mapping || page_mapcount(page) + nr_refs != page_count(page)) | ||
197 | return -EAGAIN; | ||
198 | |||
199 | /* | ||
200 | * Establish swap ptes for anonymous pages or destroy pte | ||
201 | * maps for files. | ||
202 | * | ||
203 | * In order to reestablish file backed mappings the fault handlers | ||
204 | * will take the radix tree_lock which may then be used to stop | ||
205 | * processses from accessing this page until the new page is ready. | ||
206 | * | ||
207 | * A process accessing via a swap pte (an anonymous page) will take a | ||
208 | * page_lock on the old page which will block the process until the | ||
209 | * migration attempt is complete. At that time the PageSwapCache bit | ||
210 | * will be examined. If the page was migrated then the PageSwapCache | ||
211 | * bit will be clear and the operation to retrieve the page will be | ||
212 | * retried which will find the new page in the radix tree. Then a new | ||
213 | * direct mapping may be generated based on the radix tree contents. | ||
214 | * | ||
215 | * If the page was not migrated then the PageSwapCache bit | ||
216 | * is still set and the operation may continue. | ||
217 | */ | ||
218 | if (try_to_unmap(page, 1) == SWAP_FAIL) | ||
219 | /* A vma has VM_LOCKED set -> permanent failure */ | ||
220 | return -EPERM; | ||
221 | |||
222 | /* | ||
223 | * Give up if we were unable to remove all mappings. | ||
224 | */ | ||
225 | if (page_mapcount(page)) | ||
226 | return -EAGAIN; | ||
227 | |||
228 | write_lock_irq(&mapping->tree_lock); | ||
229 | |||
230 | radix_pointer = (struct page **)radix_tree_lookup_slot( | ||
231 | &mapping->page_tree, | ||
232 | page_index(page)); | ||
233 | |||
234 | if (!page_mapping(page) || page_count(page) != nr_refs || | ||
235 | *radix_pointer != page) { | ||
236 | write_unlock_irq(&mapping->tree_lock); | ||
237 | return 1; | ||
238 | } | ||
239 | |||
240 | /* | ||
241 | * Now we know that no one else is looking at the page. | ||
242 | * | ||
243 | * Certain minimal information about a page must be available | ||
244 | * in order for other subsystems to properly handle the page if they | ||
245 | * find it through the radix tree update before we are finished | ||
246 | * copying the page. | ||
247 | */ | ||
248 | get_page(newpage); | ||
249 | newpage->index = page->index; | ||
250 | newpage->mapping = page->mapping; | ||
251 | if (PageSwapCache(page)) { | ||
252 | SetPageSwapCache(newpage); | ||
253 | set_page_private(newpage, page_private(page)); | ||
254 | } | ||
255 | |||
256 | *radix_pointer = newpage; | ||
257 | __put_page(page); | ||
258 | write_unlock_irq(&mapping->tree_lock); | ||
259 | |||
260 | return 0; | ||
261 | } | ||
262 | EXPORT_SYMBOL(migrate_page_remove_references); | ||
263 | |||
264 | /* | ||
265 | * Copy the page to its new location | ||
266 | */ | ||
267 | void migrate_page_copy(struct page *newpage, struct page *page) | ||
268 | { | ||
269 | copy_highpage(newpage, page); | ||
270 | |||
271 | if (PageError(page)) | ||
272 | SetPageError(newpage); | ||
273 | if (PageReferenced(page)) | ||
274 | SetPageReferenced(newpage); | ||
275 | if (PageUptodate(page)) | ||
276 | SetPageUptodate(newpage); | ||
277 | if (PageActive(page)) | ||
278 | SetPageActive(newpage); | ||
279 | if (PageChecked(page)) | ||
280 | SetPageChecked(newpage); | ||
281 | if (PageMappedToDisk(page)) | ||
282 | SetPageMappedToDisk(newpage); | ||
283 | |||
284 | if (PageDirty(page)) { | ||
285 | clear_page_dirty_for_io(page); | ||
286 | set_page_dirty(newpage); | ||
287 | } | ||
288 | |||
289 | ClearPageSwapCache(page); | ||
290 | ClearPageActive(page); | ||
291 | ClearPagePrivate(page); | ||
292 | set_page_private(page, 0); | ||
293 | page->mapping = NULL; | ||
294 | |||
295 | /* | ||
296 | * If any waiters have accumulated on the new page then | ||
297 | * wake them up. | ||
298 | */ | ||
299 | if (PageWriteback(newpage)) | ||
300 | end_page_writeback(newpage); | ||
301 | } | ||
302 | EXPORT_SYMBOL(migrate_page_copy); | ||
303 | |||
304 | /* | ||
305 | * Common logic to directly migrate a single page suitable for | ||
306 | * pages that do not use PagePrivate. | ||
307 | * | ||
308 | * Pages are locked upon entry and exit. | ||
309 | */ | ||
310 | int migrate_page(struct page *newpage, struct page *page) | ||
311 | { | ||
312 | int rc; | ||
313 | |||
314 | BUG_ON(PageWriteback(page)); /* Writeback must be complete */ | ||
315 | |||
316 | rc = migrate_page_remove_references(newpage, page, 2); | ||
317 | |||
318 | if (rc) | ||
319 | return rc; | ||
320 | |||
321 | migrate_page_copy(newpage, page); | ||
322 | |||
323 | /* | ||
324 | * Remove auxiliary swap entries and replace | ||
325 | * them with real ptes. | ||
326 | * | ||
327 | * Note that a real pte entry will allow processes that are not | ||
328 | * waiting on the page lock to use the new page via the page tables | ||
329 | * before the new page is unlocked. | ||
330 | */ | ||
331 | remove_from_swap(newpage); | ||
332 | return 0; | ||
333 | } | ||
334 | EXPORT_SYMBOL(migrate_page); | ||
335 | |||
336 | /* | ||
337 | * migrate_pages | ||
338 | * | ||
339 | * Two lists are passed to this function. The first list | ||
340 | * contains the pages isolated from the LRU to be migrated. | ||
341 | * The second list contains new pages that the pages isolated | ||
342 | * can be moved to. If the second list is NULL then all | ||
343 | * pages are swapped out. | ||
344 | * | ||
345 | * The function returns after 10 attempts or if no pages | ||
346 | * are movable anymore because to has become empty | ||
347 | * or no retryable pages exist anymore. | ||
348 | * | ||
349 | * Return: Number of pages not migrated when "to" ran empty. | ||
350 | */ | ||
351 | int migrate_pages(struct list_head *from, struct list_head *to, | ||
352 | struct list_head *moved, struct list_head *failed) | ||
353 | { | ||
354 | int retry; | ||
355 | int nr_failed = 0; | ||
356 | int pass = 0; | ||
357 | struct page *page; | ||
358 | struct page *page2; | ||
359 | int swapwrite = current->flags & PF_SWAPWRITE; | ||
360 | int rc; | ||
361 | |||
362 | if (!swapwrite) | ||
363 | current->flags |= PF_SWAPWRITE; | ||
364 | |||
365 | redo: | ||
366 | retry = 0; | ||
367 | |||
368 | list_for_each_entry_safe(page, page2, from, lru) { | ||
369 | struct page *newpage = NULL; | ||
370 | struct address_space *mapping; | ||
371 | |||
372 | cond_resched(); | ||
373 | |||
374 | rc = 0; | ||
375 | if (page_count(page) == 1) | ||
376 | /* page was freed from under us. So we are done. */ | ||
377 | goto next; | ||
378 | |||
379 | if (to && list_empty(to)) | ||
380 | break; | ||
381 | |||
382 | /* | ||
383 | * Skip locked pages during the first two passes to give the | ||
384 | * functions holding the lock time to release the page. Later we | ||
385 | * use lock_page() to have a higher chance of acquiring the | ||
386 | * lock. | ||
387 | */ | ||
388 | rc = -EAGAIN; | ||
389 | if (pass > 2) | ||
390 | lock_page(page); | ||
391 | else | ||
392 | if (TestSetPageLocked(page)) | ||
393 | goto next; | ||
394 | |||
395 | /* | ||
396 | * Only wait on writeback if we have already done a pass where | ||
397 | * we we may have triggered writeouts for lots of pages. | ||
398 | */ | ||
399 | if (pass > 0) { | ||
400 | wait_on_page_writeback(page); | ||
401 | } else { | ||
402 | if (PageWriteback(page)) | ||
403 | goto unlock_page; | ||
404 | } | ||
405 | |||
406 | /* | ||
407 | * Anonymous pages must have swap cache references otherwise | ||
408 | * the information contained in the page maps cannot be | ||
409 | * preserved. | ||
410 | */ | ||
411 | if (PageAnon(page) && !PageSwapCache(page)) { | ||
412 | if (!add_to_swap(page, GFP_KERNEL)) { | ||
413 | rc = -ENOMEM; | ||
414 | goto unlock_page; | ||
415 | } | ||
416 | } | ||
417 | |||
418 | if (!to) { | ||
419 | rc = swap_page(page); | ||
420 | goto next; | ||
421 | } | ||
422 | |||
423 | newpage = lru_to_page(to); | ||
424 | lock_page(newpage); | ||
425 | |||
426 | /* | ||
427 | * Pages are properly locked and writeback is complete. | ||
428 | * Try to migrate the page. | ||
429 | */ | ||
430 | mapping = page_mapping(page); | ||
431 | if (!mapping) | ||
432 | goto unlock_both; | ||
433 | |||
434 | if (mapping->a_ops->migratepage) { | ||
435 | /* | ||
436 | * Most pages have a mapping and most filesystems | ||
437 | * should provide a migration function. Anonymous | ||
438 | * pages are part of swap space which also has its | ||
439 | * own migration function. This is the most common | ||
440 | * path for page migration. | ||
441 | */ | ||
442 | rc = mapping->a_ops->migratepage(newpage, page); | ||
443 | goto unlock_both; | ||
444 | } | ||
445 | |||
446 | /* | ||
447 | * Default handling if a filesystem does not provide | ||
448 | * a migration function. We can only migrate clean | ||
449 | * pages so try to write out any dirty pages first. | ||
450 | */ | ||
451 | if (PageDirty(page)) { | ||
452 | switch (pageout(page, mapping)) { | ||
453 | case PAGE_KEEP: | ||
454 | case PAGE_ACTIVATE: | ||
455 | goto unlock_both; | ||
456 | |||
457 | case PAGE_SUCCESS: | ||
458 | unlock_page(newpage); | ||
459 | goto next; | ||
460 | |||
461 | case PAGE_CLEAN: | ||
462 | ; /* try to migrate the page below */ | ||
463 | } | ||
464 | } | ||
465 | |||
466 | /* | ||
467 | * Buffers are managed in a filesystem specific way. | ||
468 | * We must have no buffers or drop them. | ||
469 | */ | ||
470 | if (!page_has_buffers(page) || | ||
471 | try_to_release_page(page, GFP_KERNEL)) { | ||
472 | rc = migrate_page(newpage, page); | ||
473 | goto unlock_both; | ||
474 | } | ||
475 | |||
476 | /* | ||
477 | * On early passes with mapped pages simply | ||
478 | * retry. There may be a lock held for some | ||
479 | * buffers that may go away. Later | ||
480 | * swap them out. | ||
481 | */ | ||
482 | if (pass > 4) { | ||
483 | /* | ||
484 | * Persistently unable to drop buffers..... As a | ||
485 | * measure of last resort we fall back to | ||
486 | * swap_page(). | ||
487 | */ | ||
488 | unlock_page(newpage); | ||
489 | newpage = NULL; | ||
490 | rc = swap_page(page); | ||
491 | goto next; | ||
492 | } | ||
493 | |||
494 | unlock_both: | ||
495 | unlock_page(newpage); | ||
496 | |||
497 | unlock_page: | ||
498 | unlock_page(page); | ||
499 | |||
500 | next: | ||
501 | if (rc == -EAGAIN) { | ||
502 | retry++; | ||
503 | } else if (rc) { | ||
504 | /* Permanent failure */ | ||
505 | list_move(&page->lru, failed); | ||
506 | nr_failed++; | ||
507 | } else { | ||
508 | if (newpage) { | ||
509 | /* Successful migration. Return page to LRU */ | ||
510 | move_to_lru(newpage); | ||
511 | } | ||
512 | list_move(&page->lru, moved); | ||
513 | } | ||
514 | } | ||
515 | if (retry && pass++ < 10) | ||
516 | goto redo; | ||
517 | |||
518 | if (!swapwrite) | ||
519 | current->flags &= ~PF_SWAPWRITE; | ||
520 | |||
521 | return nr_failed + retry; | ||
522 | } | ||
523 | |||
524 | /* | ||
525 | * Migration function for pages with buffers. This function can only be used | ||
526 | * if the underlying filesystem guarantees that no other references to "page" | ||
527 | * exist. | ||
528 | */ | ||
529 | int buffer_migrate_page(struct page *newpage, struct page *page) | ||
530 | { | ||
531 | struct address_space *mapping = page->mapping; | ||
532 | struct buffer_head *bh, *head; | ||
533 | int rc; | ||
534 | |||
535 | if (!mapping) | ||
536 | return -EAGAIN; | ||
537 | |||
538 | if (!page_has_buffers(page)) | ||
539 | return migrate_page(newpage, page); | ||
540 | |||
541 | head = page_buffers(page); | ||
542 | |||
543 | rc = migrate_page_remove_references(newpage, page, 3); | ||
544 | |||
545 | if (rc) | ||
546 | return rc; | ||
547 | |||
548 | bh = head; | ||
549 | do { | ||
550 | get_bh(bh); | ||
551 | lock_buffer(bh); | ||
552 | bh = bh->b_this_page; | ||
553 | |||
554 | } while (bh != head); | ||
555 | |||
556 | ClearPagePrivate(page); | ||
557 | set_page_private(newpage, page_private(page)); | ||
558 | set_page_private(page, 0); | ||
559 | put_page(page); | ||
560 | get_page(newpage); | ||
561 | |||
562 | bh = head; | ||
563 | do { | ||
564 | set_bh_page(bh, newpage, bh_offset(bh)); | ||
565 | bh = bh->b_this_page; | ||
566 | |||
567 | } while (bh != head); | ||
568 | |||
569 | SetPagePrivate(newpage); | ||
570 | |||
571 | migrate_page_copy(newpage, page); | ||
572 | |||
573 | bh = head; | ||
574 | do { | ||
575 | unlock_buffer(bh); | ||
576 | put_bh(bh); | ||
577 | bh = bh->b_this_page; | ||
578 | |||
579 | } while (bh != head); | ||
580 | |||
581 | return 0; | ||
582 | } | ||
583 | EXPORT_SYMBOL(buffer_migrate_page); | ||
584 | |||
585 | /* | ||
586 | * Migrate the list 'pagelist' of pages to a certain destination. | ||
587 | * | ||
588 | * Specify destination with either non-NULL vma or dest_node >= 0 | ||
589 | * Return the number of pages not migrated or error code | ||
590 | */ | ||
591 | int migrate_pages_to(struct list_head *pagelist, | ||
592 | struct vm_area_struct *vma, int dest) | ||
593 | { | ||
594 | LIST_HEAD(newlist); | ||
595 | LIST_HEAD(moved); | ||
596 | LIST_HEAD(failed); | ||
597 | int err = 0; | ||
598 | unsigned long offset = 0; | ||
599 | int nr_pages; | ||
600 | struct page *page; | ||
601 | struct list_head *p; | ||
602 | |||
603 | redo: | ||
604 | nr_pages = 0; | ||
605 | list_for_each(p, pagelist) { | ||
606 | if (vma) { | ||
607 | /* | ||
608 | * The address passed to alloc_page_vma is used to | ||
609 | * generate the proper interleave behavior. We fake | ||
610 | * the address here by an increasing offset in order | ||
611 | * to get the proper distribution of pages. | ||
612 | * | ||
613 | * No decision has been made as to which page | ||
614 | * a certain old page is moved to so we cannot | ||
615 | * specify the correct address. | ||
616 | */ | ||
617 | page = alloc_page_vma(GFP_HIGHUSER, vma, | ||
618 | offset + vma->vm_start); | ||
619 | offset += PAGE_SIZE; | ||
620 | } | ||
621 | else | ||
622 | page = alloc_pages_node(dest, GFP_HIGHUSER, 0); | ||
623 | |||
624 | if (!page) { | ||
625 | err = -ENOMEM; | ||
626 | goto out; | ||
627 | } | ||
628 | list_add_tail(&page->lru, &newlist); | ||
629 | nr_pages++; | ||
630 | if (nr_pages > MIGRATE_CHUNK_SIZE) | ||
631 | break; | ||
632 | } | ||
633 | err = migrate_pages(pagelist, &newlist, &moved, &failed); | ||
634 | |||
635 | putback_lru_pages(&moved); /* Call release pages instead ?? */ | ||
636 | |||
637 | if (err >= 0 && list_empty(&newlist) && !list_empty(pagelist)) | ||
638 | goto redo; | ||
639 | out: | ||
640 | /* Return leftover allocated pages */ | ||
641 | while (!list_empty(&newlist)) { | ||
642 | page = list_entry(newlist.next, struct page, lru); | ||
643 | list_del(&page->lru); | ||
644 | __free_page(page); | ||
645 | } | ||
646 | list_splice(&failed, pagelist); | ||
647 | if (err < 0) | ||
648 | return err; | ||
649 | |||
650 | /* Calculate number of leftover pages */ | ||
651 | nr_pages = 0; | ||
652 | list_for_each(p, pagelist) | ||
653 | nr_pages++; | ||
654 | return nr_pages; | ||
655 | } | ||
@@ -612,7 +612,7 @@ again: remove_next = 1 + (end > next->vm_end); | |||
612 | * If the vma has a ->close operation then the driver probably needs to release | 612 | * If the vma has a ->close operation then the driver probably needs to release |
613 | * per-vma resources, so we don't attempt to merge those. | 613 | * per-vma resources, so we don't attempt to merge those. |
614 | */ | 614 | */ |
615 | #define VM_SPECIAL (VM_IO | VM_DONTCOPY | VM_DONTEXPAND | VM_RESERVED | VM_PFNMAP) | 615 | #define VM_SPECIAL (VM_IO | VM_DONTEXPAND | VM_RESERVED | VM_PFNMAP) |
616 | 616 | ||
617 | static inline int is_mergeable_vma(struct vm_area_struct *vma, | 617 | static inline int is_mergeable_vma(struct vm_area_struct *vma, |
618 | struct file *file, unsigned long vm_flags) | 618 | struct file *file, unsigned long vm_flags) |
@@ -845,14 +845,6 @@ void vm_stat_account(struct mm_struct *mm, unsigned long flags, | |||
845 | const unsigned long stack_flags | 845 | const unsigned long stack_flags |
846 | = VM_STACK_FLAGS & (VM_GROWSUP|VM_GROWSDOWN); | 846 | = VM_STACK_FLAGS & (VM_GROWSUP|VM_GROWSDOWN); |
847 | 847 | ||
848 | #ifdef CONFIG_HUGETLB | ||
849 | if (flags & VM_HUGETLB) { | ||
850 | if (!(flags & VM_DONTCOPY)) | ||
851 | mm->shared_vm += pages; | ||
852 | return; | ||
853 | } | ||
854 | #endif /* CONFIG_HUGETLB */ | ||
855 | |||
856 | if (file) { | 848 | if (file) { |
857 | mm->shared_vm += pages; | 849 | mm->shared_vm += pages; |
858 | if ((flags & (VM_EXEC|VM_WRITE)) == VM_EXEC) | 850 | if ((flags & (VM_EXEC|VM_WRITE)) == VM_EXEC) |
diff --git a/mm/mprotect.c b/mm/mprotect.c index 653b8571c1ed..4c14d4289b61 100644 --- a/mm/mprotect.c +++ b/mm/mprotect.c | |||
@@ -124,7 +124,7 @@ mprotect_fixup(struct vm_area_struct *vma, struct vm_area_struct **pprev, | |||
124 | * a MAP_NORESERVE private mapping to writable will now reserve. | 124 | * a MAP_NORESERVE private mapping to writable will now reserve. |
125 | */ | 125 | */ |
126 | if (newflags & VM_WRITE) { | 126 | if (newflags & VM_WRITE) { |
127 | if (!(oldflags & (VM_ACCOUNT|VM_WRITE|VM_SHARED|VM_HUGETLB))) { | 127 | if (!(oldflags & (VM_ACCOUNT|VM_WRITE|VM_SHARED))) { |
128 | charged = nrpages; | 128 | charged = nrpages; |
129 | if (security_vm_enough_memory(charged)) | 129 | if (security_vm_enough_memory(charged)) |
130 | return -ENOMEM; | 130 | return -ENOMEM; |
@@ -166,7 +166,10 @@ success: | |||
166 | */ | 166 | */ |
167 | vma->vm_flags = newflags; | 167 | vma->vm_flags = newflags; |
168 | vma->vm_page_prot = newprot; | 168 | vma->vm_page_prot = newprot; |
169 | change_protection(vma, start, end, newprot); | 169 | if (is_vm_hugetlb_page(vma)) |
170 | hugetlb_change_protection(vma, start, end, newprot); | ||
171 | else | ||
172 | change_protection(vma, start, end, newprot); | ||
170 | vm_stat_account(mm, oldflags, vma->vm_file, -nrpages); | 173 | vm_stat_account(mm, oldflags, vma->vm_file, -nrpages); |
171 | vm_stat_account(mm, newflags, vma->vm_file, nrpages); | 174 | vm_stat_account(mm, newflags, vma->vm_file, nrpages); |
172 | return 0; | 175 | return 0; |
@@ -240,11 +243,6 @@ sys_mprotect(unsigned long start, size_t len, unsigned long prot) | |||
240 | 243 | ||
241 | /* Here we know that vma->vm_start <= nstart < vma->vm_end. */ | 244 | /* Here we know that vma->vm_start <= nstart < vma->vm_end. */ |
242 | 245 | ||
243 | if (is_vm_hugetlb_page(vma)) { | ||
244 | error = -EACCES; | ||
245 | goto out; | ||
246 | } | ||
247 | |||
248 | newflags = vm_flags | (vma->vm_flags & ~(VM_READ | VM_WRITE | VM_EXEC)); | 246 | newflags = vm_flags | (vma->vm_flags & ~(VM_READ | VM_WRITE | VM_EXEC)); |
249 | 247 | ||
250 | /* newflags >> 4 shift VM_MAY% in place of VM_% */ | 248 | /* newflags >> 4 shift VM_MAY% in place of VM_% */ |
diff --git a/mm/nommu.c b/mm/nommu.c index 4951f4786f28..db45efac17cc 100644 --- a/mm/nommu.c +++ b/mm/nommu.c | |||
@@ -159,7 +159,7 @@ void *__vmalloc(unsigned long size, gfp_t gfp_mask, pgprot_t prot) | |||
159 | /* | 159 | /* |
160 | * kmalloc doesn't like __GFP_HIGHMEM for some reason | 160 | * kmalloc doesn't like __GFP_HIGHMEM for some reason |
161 | */ | 161 | */ |
162 | return kmalloc(size, gfp_mask & ~__GFP_HIGHMEM); | 162 | return kmalloc(size, (gfp_mask | __GFP_COMP) & ~__GFP_HIGHMEM); |
163 | } | 163 | } |
164 | 164 | ||
165 | struct page * vmalloc_to_page(void *addr) | 165 | struct page * vmalloc_to_page(void *addr) |
@@ -623,7 +623,7 @@ static int do_mmap_private(struct vm_area_struct *vma, unsigned long len) | |||
623 | * - note that this may not return a page-aligned address if the object | 623 | * - note that this may not return a page-aligned address if the object |
624 | * we're allocating is smaller than a page | 624 | * we're allocating is smaller than a page |
625 | */ | 625 | */ |
626 | base = kmalloc(len, GFP_KERNEL); | 626 | base = kmalloc(len, GFP_KERNEL|__GFP_COMP); |
627 | if (!base) | 627 | if (!base) |
628 | goto enomem; | 628 | goto enomem; |
629 | 629 | ||
diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 234bd4895d14..b7f14a4799a5 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c | |||
@@ -55,7 +55,6 @@ unsigned long totalhigh_pages __read_mostly; | |||
55 | long nr_swap_pages; | 55 | long nr_swap_pages; |
56 | int percpu_pagelist_fraction; | 56 | int percpu_pagelist_fraction; |
57 | 57 | ||
58 | static void fastcall free_hot_cold_page(struct page *page, int cold); | ||
59 | static void __free_pages_ok(struct page *page, unsigned int order); | 58 | static void __free_pages_ok(struct page *page, unsigned int order); |
60 | 59 | ||
61 | /* | 60 | /* |
@@ -190,7 +189,7 @@ static void prep_compound_page(struct page *page, unsigned long order) | |||
190 | for (i = 0; i < nr_pages; i++) { | 189 | for (i = 0; i < nr_pages; i++) { |
191 | struct page *p = page + i; | 190 | struct page *p = page + i; |
192 | 191 | ||
193 | SetPageCompound(p); | 192 | __SetPageCompound(p); |
194 | set_page_private(p, (unsigned long)page); | 193 | set_page_private(p, (unsigned long)page); |
195 | } | 194 | } |
196 | } | 195 | } |
@@ -209,10 +208,24 @@ static void destroy_compound_page(struct page *page, unsigned long order) | |||
209 | if (unlikely(!PageCompound(p) | | 208 | if (unlikely(!PageCompound(p) | |
210 | (page_private(p) != (unsigned long)page))) | 209 | (page_private(p) != (unsigned long)page))) |
211 | bad_page(page); | 210 | bad_page(page); |
212 | ClearPageCompound(p); | 211 | __ClearPageCompound(p); |
213 | } | 212 | } |
214 | } | 213 | } |
215 | 214 | ||
215 | static inline void prep_zero_page(struct page *page, int order, gfp_t gfp_flags) | ||
216 | { | ||
217 | int i; | ||
218 | |||
219 | BUG_ON((gfp_flags & (__GFP_WAIT | __GFP_HIGHMEM)) == __GFP_HIGHMEM); | ||
220 | /* | ||
221 | * clear_highpage() will use KM_USER0, so it's a bug to use __GFP_ZERO | ||
222 | * and __GFP_HIGHMEM from hard or soft interrupt context. | ||
223 | */ | ||
224 | BUG_ON((gfp_flags & __GFP_HIGHMEM) && in_interrupt()); | ||
225 | for (i = 0; i < (1 << order); i++) | ||
226 | clear_highpage(page + i); | ||
227 | } | ||
228 | |||
216 | /* | 229 | /* |
217 | * function for dealing with page's order in buddy system. | 230 | * function for dealing with page's order in buddy system. |
218 | * zone->lock is already acquired when we use these. | 231 | * zone->lock is already acquired when we use these. |
@@ -423,11 +436,6 @@ static void __free_pages_ok(struct page *page, unsigned int order) | |||
423 | mutex_debug_check_no_locks_freed(page_address(page), | 436 | mutex_debug_check_no_locks_freed(page_address(page), |
424 | PAGE_SIZE<<order); | 437 | PAGE_SIZE<<order); |
425 | 438 | ||
426 | #ifndef CONFIG_MMU | ||
427 | for (i = 1 ; i < (1 << order) ; ++i) | ||
428 | __put_page(page + i); | ||
429 | #endif | ||
430 | |||
431 | for (i = 0 ; i < (1 << order) ; ++i) | 439 | for (i = 0 ; i < (1 << order) ; ++i) |
432 | reserved += free_pages_check(page + i); | 440 | reserved += free_pages_check(page + i); |
433 | if (reserved) | 441 | if (reserved) |
@@ -448,28 +456,23 @@ void fastcall __init __free_pages_bootmem(struct page *page, unsigned int order) | |||
448 | if (order == 0) { | 456 | if (order == 0) { |
449 | __ClearPageReserved(page); | 457 | __ClearPageReserved(page); |
450 | set_page_count(page, 0); | 458 | set_page_count(page, 0); |
451 | 459 | set_page_refcounted(page); | |
452 | free_hot_cold_page(page, 0); | 460 | __free_page(page); |
453 | } else { | 461 | } else { |
454 | LIST_HEAD(list); | ||
455 | int loop; | 462 | int loop; |
456 | 463 | ||
464 | prefetchw(page); | ||
457 | for (loop = 0; loop < BITS_PER_LONG; loop++) { | 465 | for (loop = 0; loop < BITS_PER_LONG; loop++) { |
458 | struct page *p = &page[loop]; | 466 | struct page *p = &page[loop]; |
459 | 467 | ||
460 | if (loop + 16 < BITS_PER_LONG) | 468 | if (loop + 1 < BITS_PER_LONG) |
461 | prefetchw(p + 16); | 469 | prefetchw(p + 1); |
462 | __ClearPageReserved(p); | 470 | __ClearPageReserved(p); |
463 | set_page_count(p, 0); | 471 | set_page_count(p, 0); |
464 | } | 472 | } |
465 | 473 | ||
466 | arch_free_page(page, order); | 474 | set_page_refcounted(page); |
467 | 475 | __free_pages(page, order); | |
468 | mod_page_state(pgfree, 1 << order); | ||
469 | |||
470 | list_add(&page->lru, &list); | ||
471 | kernel_map_pages(page, 1 << order, 0); | ||
472 | free_pages_bulk(page_zone(page), 1, &list, order); | ||
473 | } | 476 | } |
474 | } | 477 | } |
475 | 478 | ||
@@ -507,7 +510,7 @@ static inline void expand(struct zone *zone, struct page *page, | |||
507 | /* | 510 | /* |
508 | * This page is about to be returned from the page allocator | 511 | * This page is about to be returned from the page allocator |
509 | */ | 512 | */ |
510 | static int prep_new_page(struct page *page, int order) | 513 | static int prep_new_page(struct page *page, int order, gfp_t gfp_flags) |
511 | { | 514 | { |
512 | if (unlikely(page_mapcount(page) | | 515 | if (unlikely(page_mapcount(page) | |
513 | (page->mapping != NULL) | | 516 | (page->mapping != NULL) | |
@@ -536,8 +539,15 @@ static int prep_new_page(struct page *page, int order) | |||
536 | 1 << PG_referenced | 1 << PG_arch_1 | | 539 | 1 << PG_referenced | 1 << PG_arch_1 | |
537 | 1 << PG_checked | 1 << PG_mappedtodisk); | 540 | 1 << PG_checked | 1 << PG_mappedtodisk); |
538 | set_page_private(page, 0); | 541 | set_page_private(page, 0); |
539 | set_page_refs(page, order); | 542 | set_page_refcounted(page); |
540 | kernel_map_pages(page, 1 << order, 1); | 543 | kernel_map_pages(page, 1 << order, 1); |
544 | |||
545 | if (gfp_flags & __GFP_ZERO) | ||
546 | prep_zero_page(page, order, gfp_flags); | ||
547 | |||
548 | if (order && (gfp_flags & __GFP_COMP)) | ||
549 | prep_compound_page(page, order); | ||
550 | |||
541 | return 0; | 551 | return 0; |
542 | } | 552 | } |
543 | 553 | ||
@@ -593,13 +603,14 @@ static int rmqueue_bulk(struct zone *zone, unsigned int order, | |||
593 | /* | 603 | /* |
594 | * Called from the slab reaper to drain pagesets on a particular node that | 604 | * Called from the slab reaper to drain pagesets on a particular node that |
595 | * belong to the currently executing processor. | 605 | * belong to the currently executing processor. |
606 | * Note that this function must be called with the thread pinned to | ||
607 | * a single processor. | ||
596 | */ | 608 | */ |
597 | void drain_node_pages(int nodeid) | 609 | void drain_node_pages(int nodeid) |
598 | { | 610 | { |
599 | int i, z; | 611 | int i, z; |
600 | unsigned long flags; | 612 | unsigned long flags; |
601 | 613 | ||
602 | local_irq_save(flags); | ||
603 | for (z = 0; z < MAX_NR_ZONES; z++) { | 614 | for (z = 0; z < MAX_NR_ZONES; z++) { |
604 | struct zone *zone = NODE_DATA(nodeid)->node_zones + z; | 615 | struct zone *zone = NODE_DATA(nodeid)->node_zones + z; |
605 | struct per_cpu_pageset *pset; | 616 | struct per_cpu_pageset *pset; |
@@ -609,11 +620,14 @@ void drain_node_pages(int nodeid) | |||
609 | struct per_cpu_pages *pcp; | 620 | struct per_cpu_pages *pcp; |
610 | 621 | ||
611 | pcp = &pset->pcp[i]; | 622 | pcp = &pset->pcp[i]; |
612 | free_pages_bulk(zone, pcp->count, &pcp->list, 0); | 623 | if (pcp->count) { |
613 | pcp->count = 0; | 624 | local_irq_save(flags); |
625 | free_pages_bulk(zone, pcp->count, &pcp->list, 0); | ||
626 | pcp->count = 0; | ||
627 | local_irq_restore(flags); | ||
628 | } | ||
614 | } | 629 | } |
615 | } | 630 | } |
616 | local_irq_restore(flags); | ||
617 | } | 631 | } |
618 | #endif | 632 | #endif |
619 | 633 | ||
@@ -743,13 +757,22 @@ void fastcall free_cold_page(struct page *page) | |||
743 | free_hot_cold_page(page, 1); | 757 | free_hot_cold_page(page, 1); |
744 | } | 758 | } |
745 | 759 | ||
746 | static inline void prep_zero_page(struct page *page, int order, gfp_t gfp_flags) | 760 | /* |
761 | * split_page takes a non-compound higher-order page, and splits it into | ||
762 | * n (1<<order) sub-pages: page[0..n] | ||
763 | * Each sub-page must be freed individually. | ||
764 | * | ||
765 | * Note: this is probably too low level an operation for use in drivers. | ||
766 | * Please consult with lkml before using this in your driver. | ||
767 | */ | ||
768 | void split_page(struct page *page, unsigned int order) | ||
747 | { | 769 | { |
748 | int i; | 770 | int i; |
749 | 771 | ||
750 | BUG_ON((gfp_flags & (__GFP_WAIT | __GFP_HIGHMEM)) == __GFP_HIGHMEM); | 772 | BUG_ON(PageCompound(page)); |
751 | for(i = 0; i < (1 << order); i++) | 773 | BUG_ON(!page_count(page)); |
752 | clear_highpage(page + i); | 774 | for (i = 1; i < (1 << order); i++) |
775 | set_page_refcounted(page + i); | ||
753 | } | 776 | } |
754 | 777 | ||
755 | /* | 778 | /* |
@@ -795,14 +818,8 @@ again: | |||
795 | put_cpu(); | 818 | put_cpu(); |
796 | 819 | ||
797 | BUG_ON(bad_range(zone, page)); | 820 | BUG_ON(bad_range(zone, page)); |
798 | if (prep_new_page(page, order)) | 821 | if (prep_new_page(page, order, gfp_flags)) |
799 | goto again; | 822 | goto again; |
800 | |||
801 | if (gfp_flags & __GFP_ZERO) | ||
802 | prep_zero_page(page, order, gfp_flags); | ||
803 | |||
804 | if (order && (gfp_flags & __GFP_COMP)) | ||
805 | prep_compound_page(page, order); | ||
806 | return page; | 823 | return page; |
807 | 824 | ||
808 | failed: | 825 | failed: |
@@ -1214,24 +1231,22 @@ DEFINE_PER_CPU(long, nr_pagecache_local) = 0; | |||
1214 | 1231 | ||
1215 | static void __get_page_state(struct page_state *ret, int nr, cpumask_t *cpumask) | 1232 | static void __get_page_state(struct page_state *ret, int nr, cpumask_t *cpumask) |
1216 | { | 1233 | { |
1217 | int cpu = 0; | 1234 | unsigned cpu; |
1218 | 1235 | ||
1219 | memset(ret, 0, nr * sizeof(unsigned long)); | 1236 | memset(ret, 0, nr * sizeof(unsigned long)); |
1220 | cpus_and(*cpumask, *cpumask, cpu_online_map); | 1237 | cpus_and(*cpumask, *cpumask, cpu_online_map); |
1221 | 1238 | ||
1222 | cpu = first_cpu(*cpumask); | 1239 | for_each_cpu_mask(cpu, *cpumask) { |
1223 | while (cpu < NR_CPUS) { | 1240 | unsigned long *in; |
1224 | unsigned long *in, *out, off; | 1241 | unsigned long *out; |
1225 | 1242 | unsigned off; | |
1226 | if (!cpu_isset(cpu, *cpumask)) | 1243 | unsigned next_cpu; |
1227 | continue; | ||
1228 | 1244 | ||
1229 | in = (unsigned long *)&per_cpu(page_states, cpu); | 1245 | in = (unsigned long *)&per_cpu(page_states, cpu); |
1230 | 1246 | ||
1231 | cpu = next_cpu(cpu, *cpumask); | 1247 | next_cpu = next_cpu(cpu, *cpumask); |
1232 | 1248 | if (likely(next_cpu < NR_CPUS)) | |
1233 | if (likely(cpu < NR_CPUS)) | 1249 | prefetch(&per_cpu(page_states, next_cpu)); |
1234 | prefetch(&per_cpu(page_states, cpu)); | ||
1235 | 1250 | ||
1236 | out = (unsigned long *)ret; | 1251 | out = (unsigned long *)ret; |
1237 | for (off = 0; off < nr; off++) | 1252 | for (off = 0; off < nr; off++) |
@@ -1764,7 +1779,7 @@ void __meminit memmap_init_zone(unsigned long size, int nid, unsigned long zone, | |||
1764 | continue; | 1779 | continue; |
1765 | page = pfn_to_page(pfn); | 1780 | page = pfn_to_page(pfn); |
1766 | set_page_links(page, zone, nid, pfn); | 1781 | set_page_links(page, zone, nid, pfn); |
1767 | set_page_count(page, 1); | 1782 | init_page_count(page); |
1768 | reset_page_mapcount(page); | 1783 | reset_page_mapcount(page); |
1769 | SetPageReserved(page); | 1784 | SetPageReserved(page); |
1770 | INIT_LIST_HEAD(&page->lru); | 1785 | INIT_LIST_HEAD(&page->lru); |
diff --git a/mm/readahead.c b/mm/readahead.c index 8d6eeaaa6296..301b36c4a0ce 100644 --- a/mm/readahead.c +++ b/mm/readahead.c | |||
@@ -52,13 +52,24 @@ static inline unsigned long get_min_readahead(struct file_ra_state *ra) | |||
52 | return (VM_MIN_READAHEAD * 1024) / PAGE_CACHE_SIZE; | 52 | return (VM_MIN_READAHEAD * 1024) / PAGE_CACHE_SIZE; |
53 | } | 53 | } |
54 | 54 | ||
55 | static inline void reset_ahead_window(struct file_ra_state *ra) | ||
56 | { | ||
57 | /* | ||
58 | * ... but preserve ahead_start + ahead_size value, | ||
59 | * see 'recheck:' label in page_cache_readahead(). | ||
60 | * Note: We never use ->ahead_size as rvalue without | ||
61 | * checking ->ahead_start != 0 first. | ||
62 | */ | ||
63 | ra->ahead_size += ra->ahead_start; | ||
64 | ra->ahead_start = 0; | ||
65 | } | ||
66 | |||
55 | static inline void ra_off(struct file_ra_state *ra) | 67 | static inline void ra_off(struct file_ra_state *ra) |
56 | { | 68 | { |
57 | ra->start = 0; | 69 | ra->start = 0; |
58 | ra->flags = 0; | 70 | ra->flags = 0; |
59 | ra->size = 0; | 71 | ra->size = 0; |
60 | ra->ahead_start = 0; | 72 | reset_ahead_window(ra); |
61 | ra->ahead_size = 0; | ||
62 | return; | 73 | return; |
63 | } | 74 | } |
64 | 75 | ||
@@ -72,10 +83,10 @@ static unsigned long get_init_ra_size(unsigned long size, unsigned long max) | |||
72 | { | 83 | { |
73 | unsigned long newsize = roundup_pow_of_two(size); | 84 | unsigned long newsize = roundup_pow_of_two(size); |
74 | 85 | ||
75 | if (newsize <= max / 64) | 86 | if (newsize <= max / 32) |
76 | newsize = newsize * newsize; | 87 | newsize = newsize * 4; |
77 | else if (newsize <= max / 4) | 88 | else if (newsize <= max / 4) |
78 | newsize = max / 4; | 89 | newsize = newsize * 2; |
79 | else | 90 | else |
80 | newsize = max; | 91 | newsize = max; |
81 | return newsize; | 92 | return newsize; |
@@ -426,8 +437,7 @@ static int make_ahead_window(struct address_space *mapping, struct file *filp, | |||
426 | * congestion. The ahead window will any way be closed | 437 | * congestion. The ahead window will any way be closed |
427 | * in case we failed due to excessive page cache hits. | 438 | * in case we failed due to excessive page cache hits. |
428 | */ | 439 | */ |
429 | ra->ahead_start = 0; | 440 | reset_ahead_window(ra); |
430 | ra->ahead_size = 0; | ||
431 | } | 441 | } |
432 | 442 | ||
433 | return ret; | 443 | return ret; |
@@ -520,11 +530,11 @@ page_cache_readahead(struct address_space *mapping, struct file_ra_state *ra, | |||
520 | * If we get here we are doing sequential IO and this was not the first | 530 | * If we get here we are doing sequential IO and this was not the first |
521 | * occurence (ie we have an existing window) | 531 | * occurence (ie we have an existing window) |
522 | */ | 532 | */ |
523 | |||
524 | if (ra->ahead_start == 0) { /* no ahead window yet */ | 533 | if (ra->ahead_start == 0) { /* no ahead window yet */ |
525 | if (!make_ahead_window(mapping, filp, ra, 0)) | 534 | if (!make_ahead_window(mapping, filp, ra, 0)) |
526 | goto out; | 535 | goto recheck; |
527 | } | 536 | } |
537 | |||
528 | /* | 538 | /* |
529 | * Already have an ahead window, check if we crossed into it. | 539 | * Already have an ahead window, check if we crossed into it. |
530 | * If so, shift windows and issue a new ahead window. | 540 | * If so, shift windows and issue a new ahead window. |
@@ -536,6 +546,10 @@ page_cache_readahead(struct address_space *mapping, struct file_ra_state *ra, | |||
536 | ra->start = ra->ahead_start; | 546 | ra->start = ra->ahead_start; |
537 | ra->size = ra->ahead_size; | 547 | ra->size = ra->ahead_size; |
538 | make_ahead_window(mapping, filp, ra, 0); | 548 | make_ahead_window(mapping, filp, ra, 0); |
549 | recheck: | ||
550 | /* prev_page shouldn't overrun the ahead window */ | ||
551 | ra->prev_page = min(ra->prev_page, | ||
552 | ra->ahead_start + ra->ahead_size - 1); | ||
539 | } | 553 | } |
540 | 554 | ||
541 | out: | 555 | out: |
@@ -56,13 +56,11 @@ | |||
56 | 56 | ||
57 | #include <asm/tlbflush.h> | 57 | #include <asm/tlbflush.h> |
58 | 58 | ||
59 | //#define RMAP_DEBUG /* can be enabled only for debugging */ | 59 | struct kmem_cache *anon_vma_cachep; |
60 | |||
61 | kmem_cache_t *anon_vma_cachep; | ||
62 | 60 | ||
63 | static inline void validate_anon_vma(struct vm_area_struct *find_vma) | 61 | static inline void validate_anon_vma(struct vm_area_struct *find_vma) |
64 | { | 62 | { |
65 | #ifdef RMAP_DEBUG | 63 | #ifdef CONFIG_DEBUG_VM |
66 | struct anon_vma *anon_vma = find_vma->anon_vma; | 64 | struct anon_vma *anon_vma = find_vma->anon_vma; |
67 | struct vm_area_struct *vma; | 65 | struct vm_area_struct *vma; |
68 | unsigned int mapcount = 0; | 66 | unsigned int mapcount = 0; |
@@ -166,7 +164,8 @@ void anon_vma_unlink(struct vm_area_struct *vma) | |||
166 | anon_vma_free(anon_vma); | 164 | anon_vma_free(anon_vma); |
167 | } | 165 | } |
168 | 166 | ||
169 | static void anon_vma_ctor(void *data, kmem_cache_t *cachep, unsigned long flags) | 167 | static void anon_vma_ctor(void *data, struct kmem_cache *cachep, |
168 | unsigned long flags) | ||
170 | { | 169 | { |
171 | if ((flags & (SLAB_CTOR_VERIFY|SLAB_CTOR_CONSTRUCTOR)) == | 170 | if ((flags & (SLAB_CTOR_VERIFY|SLAB_CTOR_CONSTRUCTOR)) == |
172 | SLAB_CTOR_CONSTRUCTOR) { | 171 | SLAB_CTOR_CONSTRUCTOR) { |
@@ -550,13 +549,14 @@ void page_add_file_rmap(struct page *page) | |||
550 | void page_remove_rmap(struct page *page) | 549 | void page_remove_rmap(struct page *page) |
551 | { | 550 | { |
552 | if (atomic_add_negative(-1, &page->_mapcount)) { | 551 | if (atomic_add_negative(-1, &page->_mapcount)) { |
553 | if (page_mapcount(page) < 0) { | 552 | #ifdef CONFIG_DEBUG_VM |
553 | if (unlikely(page_mapcount(page) < 0)) { | ||
554 | printk (KERN_EMERG "Eeek! page_mapcount(page) went negative! (%d)\n", page_mapcount(page)); | 554 | printk (KERN_EMERG "Eeek! page_mapcount(page) went negative! (%d)\n", page_mapcount(page)); |
555 | printk (KERN_EMERG " page->flags = %lx\n", page->flags); | 555 | printk (KERN_EMERG " page->flags = %lx\n", page->flags); |
556 | printk (KERN_EMERG " page->count = %x\n", page_count(page)); | 556 | printk (KERN_EMERG " page->count = %x\n", page_count(page)); |
557 | printk (KERN_EMERG " page->mapping = %p\n", page->mapping); | 557 | printk (KERN_EMERG " page->mapping = %p\n", page->mapping); |
558 | } | 558 | } |
559 | 559 | #endif | |
560 | BUG_ON(page_mapcount(page) < 0); | 560 | BUG_ON(page_mapcount(page) < 0); |
561 | /* | 561 | /* |
562 | * It would be tidy to reset the PageAnon mapping here, | 562 | * It would be tidy to reset the PageAnon mapping here, |
diff --git a/mm/shmem.c b/mm/shmem.c index 7c455fbaff7b..37eaf42ed2c6 100644 --- a/mm/shmem.c +++ b/mm/shmem.c | |||
@@ -875,7 +875,7 @@ redirty: | |||
875 | } | 875 | } |
876 | 876 | ||
877 | #ifdef CONFIG_NUMA | 877 | #ifdef CONFIG_NUMA |
878 | static int shmem_parse_mpol(char *value, int *policy, nodemask_t *policy_nodes) | 878 | static inline int shmem_parse_mpol(char *value, int *policy, nodemask_t *policy_nodes) |
879 | { | 879 | { |
880 | char *nodelist = strchr(value, ':'); | 880 | char *nodelist = strchr(value, ':'); |
881 | int err = 1; | 881 | int err = 1; |
@@ -2119,7 +2119,7 @@ failed: | |||
2119 | return err; | 2119 | return err; |
2120 | } | 2120 | } |
2121 | 2121 | ||
2122 | static kmem_cache_t *shmem_inode_cachep; | 2122 | static struct kmem_cache *shmem_inode_cachep; |
2123 | 2123 | ||
2124 | static struct inode *shmem_alloc_inode(struct super_block *sb) | 2124 | static struct inode *shmem_alloc_inode(struct super_block *sb) |
2125 | { | 2125 | { |
@@ -2139,7 +2139,8 @@ static void shmem_destroy_inode(struct inode *inode) | |||
2139 | kmem_cache_free(shmem_inode_cachep, SHMEM_I(inode)); | 2139 | kmem_cache_free(shmem_inode_cachep, SHMEM_I(inode)); |
2140 | } | 2140 | } |
2141 | 2141 | ||
2142 | static void init_once(void *foo, kmem_cache_t *cachep, unsigned long flags) | 2142 | static void init_once(void *foo, struct kmem_cache *cachep, |
2143 | unsigned long flags) | ||
2143 | { | 2144 | { |
2144 | struct shmem_inode_info *p = (struct shmem_inode_info *) foo; | 2145 | struct shmem_inode_info *p = (struct shmem_inode_info *) foo; |
2145 | 2146 | ||
@@ -50,7 +50,7 @@ | |||
50 | * The head array is strictly LIFO and should improve the cache hit rates. | 50 | * The head array is strictly LIFO and should improve the cache hit rates. |
51 | * On SMP, it additionally reduces the spinlock operations. | 51 | * On SMP, it additionally reduces the spinlock operations. |
52 | * | 52 | * |
53 | * The c_cpuarray may not be read with enabled local interrupts - | 53 | * The c_cpuarray may not be read with enabled local interrupts - |
54 | * it's changed with a smp_call_function(). | 54 | * it's changed with a smp_call_function(). |
55 | * | 55 | * |
56 | * SMP synchronization: | 56 | * SMP synchronization: |
@@ -170,12 +170,12 @@ | |||
170 | #if DEBUG | 170 | #if DEBUG |
171 | # define CREATE_MASK (SLAB_DEBUG_INITIAL | SLAB_RED_ZONE | \ | 171 | # define CREATE_MASK (SLAB_DEBUG_INITIAL | SLAB_RED_ZONE | \ |
172 | SLAB_POISON | SLAB_HWCACHE_ALIGN | \ | 172 | SLAB_POISON | SLAB_HWCACHE_ALIGN | \ |
173 | SLAB_NO_REAP | SLAB_CACHE_DMA | \ | 173 | SLAB_CACHE_DMA | \ |
174 | SLAB_MUST_HWCACHE_ALIGN | SLAB_STORE_USER | \ | 174 | SLAB_MUST_HWCACHE_ALIGN | SLAB_STORE_USER | \ |
175 | SLAB_RECLAIM_ACCOUNT | SLAB_PANIC | \ | 175 | SLAB_RECLAIM_ACCOUNT | SLAB_PANIC | \ |
176 | SLAB_DESTROY_BY_RCU) | 176 | SLAB_DESTROY_BY_RCU) |
177 | #else | 177 | #else |
178 | # define CREATE_MASK (SLAB_HWCACHE_ALIGN | SLAB_NO_REAP | \ | 178 | # define CREATE_MASK (SLAB_HWCACHE_ALIGN | \ |
179 | SLAB_CACHE_DMA | SLAB_MUST_HWCACHE_ALIGN | \ | 179 | SLAB_CACHE_DMA | SLAB_MUST_HWCACHE_ALIGN | \ |
180 | SLAB_RECLAIM_ACCOUNT | SLAB_PANIC | \ | 180 | SLAB_RECLAIM_ACCOUNT | SLAB_PANIC | \ |
181 | SLAB_DESTROY_BY_RCU) | 181 | SLAB_DESTROY_BY_RCU) |
@@ -266,16 +266,17 @@ struct array_cache { | |||
266 | unsigned int batchcount; | 266 | unsigned int batchcount; |
267 | unsigned int touched; | 267 | unsigned int touched; |
268 | spinlock_t lock; | 268 | spinlock_t lock; |
269 | void *entry[0]; /* | 269 | void *entry[0]; /* |
270 | * Must have this definition in here for the proper | 270 | * Must have this definition in here for the proper |
271 | * alignment of array_cache. Also simplifies accessing | 271 | * alignment of array_cache. Also simplifies accessing |
272 | * the entries. | 272 | * the entries. |
273 | * [0] is for gcc 2.95. It should really be []. | 273 | * [0] is for gcc 2.95. It should really be []. |
274 | */ | 274 | */ |
275 | }; | 275 | }; |
276 | 276 | ||
277 | /* bootstrap: The caches do not work without cpuarrays anymore, | 277 | /* |
278 | * but the cpuarrays are allocated from the generic caches... | 278 | * bootstrap: The caches do not work without cpuarrays anymore, but the |
279 | * cpuarrays are allocated from the generic caches... | ||
279 | */ | 280 | */ |
280 | #define BOOT_CPUCACHE_ENTRIES 1 | 281 | #define BOOT_CPUCACHE_ENTRIES 1 |
281 | struct arraycache_init { | 282 | struct arraycache_init { |
@@ -291,13 +292,13 @@ struct kmem_list3 { | |||
291 | struct list_head slabs_full; | 292 | struct list_head slabs_full; |
292 | struct list_head slabs_free; | 293 | struct list_head slabs_free; |
293 | unsigned long free_objects; | 294 | unsigned long free_objects; |
294 | unsigned long next_reap; | ||
295 | int free_touched; | ||
296 | unsigned int free_limit; | 295 | unsigned int free_limit; |
297 | unsigned int colour_next; /* Per-node cache coloring */ | 296 | unsigned int colour_next; /* Per-node cache coloring */ |
298 | spinlock_t list_lock; | 297 | spinlock_t list_lock; |
299 | struct array_cache *shared; /* shared per node */ | 298 | struct array_cache *shared; /* shared per node */ |
300 | struct array_cache **alien; /* on other nodes */ | 299 | struct array_cache **alien; /* on other nodes */ |
300 | unsigned long next_reap; /* updated without locking */ | ||
301 | int free_touched; /* updated without locking */ | ||
301 | }; | 302 | }; |
302 | 303 | ||
303 | /* | 304 | /* |
@@ -310,10 +311,8 @@ struct kmem_list3 __initdata initkmem_list3[NUM_INIT_LISTS]; | |||
310 | #define SIZE_L3 (1 + MAX_NUMNODES) | 311 | #define SIZE_L3 (1 + MAX_NUMNODES) |
311 | 312 | ||
312 | /* | 313 | /* |
313 | * This function must be completely optimized away if | 314 | * This function must be completely optimized away if a constant is passed to |
314 | * a constant is passed to it. Mostly the same as | 315 | * it. Mostly the same as what is in linux/slab.h except it returns an index. |
315 | * what is in linux/slab.h except it returns an | ||
316 | * index. | ||
317 | */ | 316 | */ |
318 | static __always_inline int index_of(const size_t size) | 317 | static __always_inline int index_of(const size_t size) |
319 | { | 318 | { |
@@ -351,14 +350,14 @@ static void kmem_list3_init(struct kmem_list3 *parent) | |||
351 | parent->free_touched = 0; | 350 | parent->free_touched = 0; |
352 | } | 351 | } |
353 | 352 | ||
354 | #define MAKE_LIST(cachep, listp, slab, nodeid) \ | 353 | #define MAKE_LIST(cachep, listp, slab, nodeid) \ |
355 | do { \ | 354 | do { \ |
356 | INIT_LIST_HEAD(listp); \ | 355 | INIT_LIST_HEAD(listp); \ |
357 | list_splice(&(cachep->nodelists[nodeid]->slab), listp); \ | 356 | list_splice(&(cachep->nodelists[nodeid]->slab), listp); \ |
358 | } while (0) | 357 | } while (0) |
359 | 358 | ||
360 | #define MAKE_ALL_LISTS(cachep, ptr, nodeid) \ | 359 | #define MAKE_ALL_LISTS(cachep, ptr, nodeid) \ |
361 | do { \ | 360 | do { \ |
362 | MAKE_LIST((cachep), (&(ptr)->slabs_full), slabs_full, nodeid); \ | 361 | MAKE_LIST((cachep), (&(ptr)->slabs_full), slabs_full, nodeid); \ |
363 | MAKE_LIST((cachep), (&(ptr)->slabs_partial), slabs_partial, nodeid); \ | 362 | MAKE_LIST((cachep), (&(ptr)->slabs_partial), slabs_partial, nodeid); \ |
364 | MAKE_LIST((cachep), (&(ptr)->slabs_free), slabs_free, nodeid); \ | 363 | MAKE_LIST((cachep), (&(ptr)->slabs_free), slabs_free, nodeid); \ |
@@ -373,28 +372,30 @@ static void kmem_list3_init(struct kmem_list3 *parent) | |||
373 | struct kmem_cache { | 372 | struct kmem_cache { |
374 | /* 1) per-cpu data, touched during every alloc/free */ | 373 | /* 1) per-cpu data, touched during every alloc/free */ |
375 | struct array_cache *array[NR_CPUS]; | 374 | struct array_cache *array[NR_CPUS]; |
375 | /* 2) Cache tunables. Protected by cache_chain_mutex */ | ||
376 | unsigned int batchcount; | 376 | unsigned int batchcount; |
377 | unsigned int limit; | 377 | unsigned int limit; |
378 | unsigned int shared; | 378 | unsigned int shared; |
379 | |||
379 | unsigned int buffer_size; | 380 | unsigned int buffer_size; |
380 | /* 2) touched by every alloc & free from the backend */ | 381 | /* 3) touched by every alloc & free from the backend */ |
381 | struct kmem_list3 *nodelists[MAX_NUMNODES]; | 382 | struct kmem_list3 *nodelists[MAX_NUMNODES]; |
382 | unsigned int flags; /* constant flags */ | ||
383 | unsigned int num; /* # of objs per slab */ | ||
384 | spinlock_t spinlock; | ||
385 | 383 | ||
386 | /* 3) cache_grow/shrink */ | 384 | unsigned int flags; /* constant flags */ |
385 | unsigned int num; /* # of objs per slab */ | ||
386 | |||
387 | /* 4) cache_grow/shrink */ | ||
387 | /* order of pgs per slab (2^n) */ | 388 | /* order of pgs per slab (2^n) */ |
388 | unsigned int gfporder; | 389 | unsigned int gfporder; |
389 | 390 | ||
390 | /* force GFP flags, e.g. GFP_DMA */ | 391 | /* force GFP flags, e.g. GFP_DMA */ |
391 | gfp_t gfpflags; | 392 | gfp_t gfpflags; |
392 | 393 | ||
393 | size_t colour; /* cache colouring range */ | 394 | size_t colour; /* cache colouring range */ |
394 | unsigned int colour_off; /* colour offset */ | 395 | unsigned int colour_off; /* colour offset */ |
395 | struct kmem_cache *slabp_cache; | 396 | struct kmem_cache *slabp_cache; |
396 | unsigned int slab_size; | 397 | unsigned int slab_size; |
397 | unsigned int dflags; /* dynamic flags */ | 398 | unsigned int dflags; /* dynamic flags */ |
398 | 399 | ||
399 | /* constructor func */ | 400 | /* constructor func */ |
400 | void (*ctor) (void *, struct kmem_cache *, unsigned long); | 401 | void (*ctor) (void *, struct kmem_cache *, unsigned long); |
@@ -402,11 +403,11 @@ struct kmem_cache { | |||
402 | /* de-constructor func */ | 403 | /* de-constructor func */ |
403 | void (*dtor) (void *, struct kmem_cache *, unsigned long); | 404 | void (*dtor) (void *, struct kmem_cache *, unsigned long); |
404 | 405 | ||
405 | /* 4) cache creation/removal */ | 406 | /* 5) cache creation/removal */ |
406 | const char *name; | 407 | const char *name; |
407 | struct list_head next; | 408 | struct list_head next; |
408 | 409 | ||
409 | /* 5) statistics */ | 410 | /* 6) statistics */ |
410 | #if STATS | 411 | #if STATS |
411 | unsigned long num_active; | 412 | unsigned long num_active; |
412 | unsigned long num_allocations; | 413 | unsigned long num_allocations; |
@@ -438,8 +439,9 @@ struct kmem_cache { | |||
438 | #define OFF_SLAB(x) ((x)->flags & CFLGS_OFF_SLAB) | 439 | #define OFF_SLAB(x) ((x)->flags & CFLGS_OFF_SLAB) |
439 | 440 | ||
440 | #define BATCHREFILL_LIMIT 16 | 441 | #define BATCHREFILL_LIMIT 16 |
441 | /* Optimization question: fewer reaps means less | 442 | /* |
442 | * probability for unnessary cpucache drain/refill cycles. | 443 | * Optimization question: fewer reaps means less probability for unnessary |
444 | * cpucache drain/refill cycles. | ||
443 | * | 445 | * |
444 | * OTOH the cpuarrays can contain lots of objects, | 446 | * OTOH the cpuarrays can contain lots of objects, |
445 | * which could lock up otherwise freeable slabs. | 447 | * which could lock up otherwise freeable slabs. |
@@ -453,17 +455,19 @@ struct kmem_cache { | |||
453 | #define STATS_INC_ALLOCED(x) ((x)->num_allocations++) | 455 | #define STATS_INC_ALLOCED(x) ((x)->num_allocations++) |
454 | #define STATS_INC_GROWN(x) ((x)->grown++) | 456 | #define STATS_INC_GROWN(x) ((x)->grown++) |
455 | #define STATS_INC_REAPED(x) ((x)->reaped++) | 457 | #define STATS_INC_REAPED(x) ((x)->reaped++) |
456 | #define STATS_SET_HIGH(x) do { if ((x)->num_active > (x)->high_mark) \ | 458 | #define STATS_SET_HIGH(x) \ |
457 | (x)->high_mark = (x)->num_active; \ | 459 | do { \ |
458 | } while (0) | 460 | if ((x)->num_active > (x)->high_mark) \ |
461 | (x)->high_mark = (x)->num_active; \ | ||
462 | } while (0) | ||
459 | #define STATS_INC_ERR(x) ((x)->errors++) | 463 | #define STATS_INC_ERR(x) ((x)->errors++) |
460 | #define STATS_INC_NODEALLOCS(x) ((x)->node_allocs++) | 464 | #define STATS_INC_NODEALLOCS(x) ((x)->node_allocs++) |
461 | #define STATS_INC_NODEFREES(x) ((x)->node_frees++) | 465 | #define STATS_INC_NODEFREES(x) ((x)->node_frees++) |
462 | #define STATS_SET_FREEABLE(x, i) \ | 466 | #define STATS_SET_FREEABLE(x, i) \ |
463 | do { if ((x)->max_freeable < i) \ | 467 | do { \ |
464 | (x)->max_freeable = i; \ | 468 | if ((x)->max_freeable < i) \ |
465 | } while (0) | 469 | (x)->max_freeable = i; \ |
466 | 470 | } while (0) | |
467 | #define STATS_INC_ALLOCHIT(x) atomic_inc(&(x)->allochit) | 471 | #define STATS_INC_ALLOCHIT(x) atomic_inc(&(x)->allochit) |
468 | #define STATS_INC_ALLOCMISS(x) atomic_inc(&(x)->allocmiss) | 472 | #define STATS_INC_ALLOCMISS(x) atomic_inc(&(x)->allocmiss) |
469 | #define STATS_INC_FREEHIT(x) atomic_inc(&(x)->freehit) | 473 | #define STATS_INC_FREEHIT(x) atomic_inc(&(x)->freehit) |
@@ -478,9 +482,7 @@ struct kmem_cache { | |||
478 | #define STATS_INC_ERR(x) do { } while (0) | 482 | #define STATS_INC_ERR(x) do { } while (0) |
479 | #define STATS_INC_NODEALLOCS(x) do { } while (0) | 483 | #define STATS_INC_NODEALLOCS(x) do { } while (0) |
480 | #define STATS_INC_NODEFREES(x) do { } while (0) | 484 | #define STATS_INC_NODEFREES(x) do { } while (0) |
481 | #define STATS_SET_FREEABLE(x, i) \ | 485 | #define STATS_SET_FREEABLE(x, i) do { } while (0) |
482 | do { } while (0) | ||
483 | |||
484 | #define STATS_INC_ALLOCHIT(x) do { } while (0) | 486 | #define STATS_INC_ALLOCHIT(x) do { } while (0) |
485 | #define STATS_INC_ALLOCMISS(x) do { } while (0) | 487 | #define STATS_INC_ALLOCMISS(x) do { } while (0) |
486 | #define STATS_INC_FREEHIT(x) do { } while (0) | 488 | #define STATS_INC_FREEHIT(x) do { } while (0) |
@@ -488,7 +490,8 @@ struct kmem_cache { | |||
488 | #endif | 490 | #endif |
489 | 491 | ||
490 | #if DEBUG | 492 | #if DEBUG |
491 | /* Magic nums for obj red zoning. | 493 | /* |
494 | * Magic nums for obj red zoning. | ||
492 | * Placed in the first word before and the first word after an obj. | 495 | * Placed in the first word before and the first word after an obj. |
493 | */ | 496 | */ |
494 | #define RED_INACTIVE 0x5A2CF071UL /* when obj is inactive */ | 497 | #define RED_INACTIVE 0x5A2CF071UL /* when obj is inactive */ |
@@ -499,7 +502,8 @@ struct kmem_cache { | |||
499 | #define POISON_FREE 0x6b /* for use-after-free poisoning */ | 502 | #define POISON_FREE 0x6b /* for use-after-free poisoning */ |
500 | #define POISON_END 0xa5 /* end-byte of poisoning */ | 503 | #define POISON_END 0xa5 /* end-byte of poisoning */ |
501 | 504 | ||
502 | /* memory layout of objects: | 505 | /* |
506 | * memory layout of objects: | ||
503 | * 0 : objp | 507 | * 0 : objp |
504 | * 0 .. cachep->obj_offset - BYTES_PER_WORD - 1: padding. This ensures that | 508 | * 0 .. cachep->obj_offset - BYTES_PER_WORD - 1: padding. This ensures that |
505 | * the end of an object is aligned with the end of the real | 509 | * the end of an object is aligned with the end of the real |
@@ -508,7 +512,8 @@ struct kmem_cache { | |||
508 | * redzone word. | 512 | * redzone word. |
509 | * cachep->obj_offset: The real object. | 513 | * cachep->obj_offset: The real object. |
510 | * cachep->buffer_size - 2* BYTES_PER_WORD: redzone word [BYTES_PER_WORD long] | 514 | * cachep->buffer_size - 2* BYTES_PER_WORD: redzone word [BYTES_PER_WORD long] |
511 | * cachep->buffer_size - 1* BYTES_PER_WORD: last caller address [BYTES_PER_WORD long] | 515 | * cachep->buffer_size - 1* BYTES_PER_WORD: last caller address |
516 | * [BYTES_PER_WORD long] | ||
512 | */ | 517 | */ |
513 | static int obj_offset(struct kmem_cache *cachep) | 518 | static int obj_offset(struct kmem_cache *cachep) |
514 | { | 519 | { |
@@ -552,8 +557,8 @@ static void **dbg_userword(struct kmem_cache *cachep, void *objp) | |||
552 | #endif | 557 | #endif |
553 | 558 | ||
554 | /* | 559 | /* |
555 | * Maximum size of an obj (in 2^order pages) | 560 | * Maximum size of an obj (in 2^order pages) and absolute limit for the gfp |
556 | * and absolute limit for the gfp order. | 561 | * order. |
557 | */ | 562 | */ |
558 | #if defined(CONFIG_LARGE_ALLOCS) | 563 | #if defined(CONFIG_LARGE_ALLOCS) |
559 | #define MAX_OBJ_ORDER 13 /* up to 32Mb */ | 564 | #define MAX_OBJ_ORDER 13 /* up to 32Mb */ |
@@ -573,9 +578,10 @@ static void **dbg_userword(struct kmem_cache *cachep, void *objp) | |||
573 | #define BREAK_GFP_ORDER_LO 0 | 578 | #define BREAK_GFP_ORDER_LO 0 |
574 | static int slab_break_gfp_order = BREAK_GFP_ORDER_LO; | 579 | static int slab_break_gfp_order = BREAK_GFP_ORDER_LO; |
575 | 580 | ||
576 | /* Functions for storing/retrieving the cachep and or slab from the | 581 | /* |
577 | * global 'mem_map'. These are used to find the slab an obj belongs to. | 582 | * Functions for storing/retrieving the cachep and or slab from the page |
578 | * With kfree(), these are used to find the cache which an obj belongs to. | 583 | * allocator. These are used to find the slab an obj belongs to. With kfree(), |
584 | * these are used to find the cache which an obj belongs to. | ||
579 | */ | 585 | */ |
580 | static inline void page_set_cache(struct page *page, struct kmem_cache *cache) | 586 | static inline void page_set_cache(struct page *page, struct kmem_cache *cache) |
581 | { | 587 | { |
@@ -584,6 +590,8 @@ static inline void page_set_cache(struct page *page, struct kmem_cache *cache) | |||
584 | 590 | ||
585 | static inline struct kmem_cache *page_get_cache(struct page *page) | 591 | static inline struct kmem_cache *page_get_cache(struct page *page) |
586 | { | 592 | { |
593 | if (unlikely(PageCompound(page))) | ||
594 | page = (struct page *)page_private(page); | ||
587 | return (struct kmem_cache *)page->lru.next; | 595 | return (struct kmem_cache *)page->lru.next; |
588 | } | 596 | } |
589 | 597 | ||
@@ -594,6 +602,8 @@ static inline void page_set_slab(struct page *page, struct slab *slab) | |||
594 | 602 | ||
595 | static inline struct slab *page_get_slab(struct page *page) | 603 | static inline struct slab *page_get_slab(struct page *page) |
596 | { | 604 | { |
605 | if (unlikely(PageCompound(page))) | ||
606 | page = (struct page *)page_private(page); | ||
597 | return (struct slab *)page->lru.prev; | 607 | return (struct slab *)page->lru.prev; |
598 | } | 608 | } |
599 | 609 | ||
@@ -609,7 +619,21 @@ static inline struct slab *virt_to_slab(const void *obj) | |||
609 | return page_get_slab(page); | 619 | return page_get_slab(page); |
610 | } | 620 | } |
611 | 621 | ||
612 | /* These are the default caches for kmalloc. Custom caches can have other sizes. */ | 622 | static inline void *index_to_obj(struct kmem_cache *cache, struct slab *slab, |
623 | unsigned int idx) | ||
624 | { | ||
625 | return slab->s_mem + cache->buffer_size * idx; | ||
626 | } | ||
627 | |||
628 | static inline unsigned int obj_to_index(struct kmem_cache *cache, | ||
629 | struct slab *slab, void *obj) | ||
630 | { | ||
631 | return (unsigned)(obj - slab->s_mem) / cache->buffer_size; | ||
632 | } | ||
633 | |||
634 | /* | ||
635 | * These are the default caches for kmalloc. Custom caches can have other sizes. | ||
636 | */ | ||
613 | struct cache_sizes malloc_sizes[] = { | 637 | struct cache_sizes malloc_sizes[] = { |
614 | #define CACHE(x) { .cs_size = (x) }, | 638 | #define CACHE(x) { .cs_size = (x) }, |
615 | #include <linux/kmalloc_sizes.h> | 639 | #include <linux/kmalloc_sizes.h> |
@@ -642,8 +666,6 @@ static struct kmem_cache cache_cache = { | |||
642 | .limit = BOOT_CPUCACHE_ENTRIES, | 666 | .limit = BOOT_CPUCACHE_ENTRIES, |
643 | .shared = 1, | 667 | .shared = 1, |
644 | .buffer_size = sizeof(struct kmem_cache), | 668 | .buffer_size = sizeof(struct kmem_cache), |
645 | .flags = SLAB_NO_REAP, | ||
646 | .spinlock = SPIN_LOCK_UNLOCKED, | ||
647 | .name = "kmem_cache", | 669 | .name = "kmem_cache", |
648 | #if DEBUG | 670 | #if DEBUG |
649 | .obj_size = sizeof(struct kmem_cache), | 671 | .obj_size = sizeof(struct kmem_cache), |
@@ -655,8 +677,8 @@ static DEFINE_MUTEX(cache_chain_mutex); | |||
655 | static struct list_head cache_chain; | 677 | static struct list_head cache_chain; |
656 | 678 | ||
657 | /* | 679 | /* |
658 | * vm_enough_memory() looks at this to determine how many | 680 | * vm_enough_memory() looks at this to determine how many slab-allocated pages |
659 | * slab-allocated pages are possibly freeable under pressure | 681 | * are possibly freeable under pressure |
660 | * | 682 | * |
661 | * SLAB_RECLAIM_ACCOUNT turns this on per-slab | 683 | * SLAB_RECLAIM_ACCOUNT turns this on per-slab |
662 | */ | 684 | */ |
@@ -675,7 +697,8 @@ static enum { | |||
675 | 697 | ||
676 | static DEFINE_PER_CPU(struct work_struct, reap_work); | 698 | static DEFINE_PER_CPU(struct work_struct, reap_work); |
677 | 699 | ||
678 | static void free_block(struct kmem_cache *cachep, void **objpp, int len, int node); | 700 | static void free_block(struct kmem_cache *cachep, void **objpp, int len, |
701 | int node); | ||
679 | static void enable_cpucache(struct kmem_cache *cachep); | 702 | static void enable_cpucache(struct kmem_cache *cachep); |
680 | static void cache_reap(void *unused); | 703 | static void cache_reap(void *unused); |
681 | static int __node_shrink(struct kmem_cache *cachep, int node); | 704 | static int __node_shrink(struct kmem_cache *cachep, int node); |
@@ -685,7 +708,8 @@ static inline struct array_cache *cpu_cache_get(struct kmem_cache *cachep) | |||
685 | return cachep->array[smp_processor_id()]; | 708 | return cachep->array[smp_processor_id()]; |
686 | } | 709 | } |
687 | 710 | ||
688 | static inline struct kmem_cache *__find_general_cachep(size_t size, gfp_t gfpflags) | 711 | static inline struct kmem_cache *__find_general_cachep(size_t size, |
712 | gfp_t gfpflags) | ||
689 | { | 713 | { |
690 | struct cache_sizes *csizep = malloc_sizes; | 714 | struct cache_sizes *csizep = malloc_sizes; |
691 | 715 | ||
@@ -720,8 +744,9 @@ static size_t slab_mgmt_size(size_t nr_objs, size_t align) | |||
720 | return ALIGN(sizeof(struct slab)+nr_objs*sizeof(kmem_bufctl_t), align); | 744 | return ALIGN(sizeof(struct slab)+nr_objs*sizeof(kmem_bufctl_t), align); |
721 | } | 745 | } |
722 | 746 | ||
723 | /* Calculate the number of objects and left-over bytes for a given | 747 | /* |
724 | buffer size. */ | 748 | * Calculate the number of objects and left-over bytes for a given buffer size. |
749 | */ | ||
725 | static void cache_estimate(unsigned long gfporder, size_t buffer_size, | 750 | static void cache_estimate(unsigned long gfporder, size_t buffer_size, |
726 | size_t align, int flags, size_t *left_over, | 751 | size_t align, int flags, size_t *left_over, |
727 | unsigned int *num) | 752 | unsigned int *num) |
@@ -782,7 +807,8 @@ static void cache_estimate(unsigned long gfporder, size_t buffer_size, | |||
782 | 807 | ||
783 | #define slab_error(cachep, msg) __slab_error(__FUNCTION__, cachep, msg) | 808 | #define slab_error(cachep, msg) __slab_error(__FUNCTION__, cachep, msg) |
784 | 809 | ||
785 | static void __slab_error(const char *function, struct kmem_cache *cachep, char *msg) | 810 | static void __slab_error(const char *function, struct kmem_cache *cachep, |
811 | char *msg) | ||
786 | { | 812 | { |
787 | printk(KERN_ERR "slab error in %s(): cache `%s': %s\n", | 813 | printk(KERN_ERR "slab error in %s(): cache `%s': %s\n", |
788 | function, cachep->name, msg); | 814 | function, cachep->name, msg); |
@@ -804,7 +830,7 @@ static void init_reap_node(int cpu) | |||
804 | 830 | ||
805 | node = next_node(cpu_to_node(cpu), node_online_map); | 831 | node = next_node(cpu_to_node(cpu), node_online_map); |
806 | if (node == MAX_NUMNODES) | 832 | if (node == MAX_NUMNODES) |
807 | node = 0; | 833 | node = first_node(node_online_map); |
808 | 834 | ||
809 | __get_cpu_var(reap_node) = node; | 835 | __get_cpu_var(reap_node) = node; |
810 | } | 836 | } |
@@ -906,10 +932,8 @@ static void free_alien_cache(struct array_cache **ac_ptr) | |||
906 | 932 | ||
907 | if (!ac_ptr) | 933 | if (!ac_ptr) |
908 | return; | 934 | return; |
909 | |||
910 | for_each_node(i) | 935 | for_each_node(i) |
911 | kfree(ac_ptr[i]); | 936 | kfree(ac_ptr[i]); |
912 | |||
913 | kfree(ac_ptr); | 937 | kfree(ac_ptr); |
914 | } | 938 | } |
915 | 939 | ||
@@ -943,7 +967,8 @@ static void reap_alien(struct kmem_cache *cachep, struct kmem_list3 *l3) | |||
943 | } | 967 | } |
944 | } | 968 | } |
945 | 969 | ||
946 | static void drain_alien_cache(struct kmem_cache *cachep, struct array_cache **alien) | 970 | static void drain_alien_cache(struct kmem_cache *cachep, |
971 | struct array_cache **alien) | ||
947 | { | 972 | { |
948 | int i = 0; | 973 | int i = 0; |
949 | struct array_cache *ac; | 974 | struct array_cache *ac; |
@@ -986,20 +1011,22 @@ static int __devinit cpuup_callback(struct notifier_block *nfb, | |||
986 | switch (action) { | 1011 | switch (action) { |
987 | case CPU_UP_PREPARE: | 1012 | case CPU_UP_PREPARE: |
988 | mutex_lock(&cache_chain_mutex); | 1013 | mutex_lock(&cache_chain_mutex); |
989 | /* we need to do this right in the beginning since | 1014 | /* |
1015 | * We need to do this right in the beginning since | ||
990 | * alloc_arraycache's are going to use this list. | 1016 | * alloc_arraycache's are going to use this list. |
991 | * kmalloc_node allows us to add the slab to the right | 1017 | * kmalloc_node allows us to add the slab to the right |
992 | * kmem_list3 and not this cpu's kmem_list3 | 1018 | * kmem_list3 and not this cpu's kmem_list3 |
993 | */ | 1019 | */ |
994 | 1020 | ||
995 | list_for_each_entry(cachep, &cache_chain, next) { | 1021 | list_for_each_entry(cachep, &cache_chain, next) { |
996 | /* setup the size64 kmemlist for cpu before we can | 1022 | /* |
1023 | * Set up the size64 kmemlist for cpu before we can | ||
997 | * begin anything. Make sure some other cpu on this | 1024 | * begin anything. Make sure some other cpu on this |
998 | * node has not already allocated this | 1025 | * node has not already allocated this |
999 | */ | 1026 | */ |
1000 | if (!cachep->nodelists[node]) { | 1027 | if (!cachep->nodelists[node]) { |
1001 | if (!(l3 = kmalloc_node(memsize, | 1028 | l3 = kmalloc_node(memsize, GFP_KERNEL, node); |
1002 | GFP_KERNEL, node))) | 1029 | if (!l3) |
1003 | goto bad; | 1030 | goto bad; |
1004 | kmem_list3_init(l3); | 1031 | kmem_list3_init(l3); |
1005 | l3->next_reap = jiffies + REAPTIMEOUT_LIST3 + | 1032 | l3->next_reap = jiffies + REAPTIMEOUT_LIST3 + |
@@ -1015,13 +1042,15 @@ static int __devinit cpuup_callback(struct notifier_block *nfb, | |||
1015 | 1042 | ||
1016 | spin_lock_irq(&cachep->nodelists[node]->list_lock); | 1043 | spin_lock_irq(&cachep->nodelists[node]->list_lock); |
1017 | cachep->nodelists[node]->free_limit = | 1044 | cachep->nodelists[node]->free_limit = |
1018 | (1 + nr_cpus_node(node)) * | 1045 | (1 + nr_cpus_node(node)) * |
1019 | cachep->batchcount + cachep->num; | 1046 | cachep->batchcount + cachep->num; |
1020 | spin_unlock_irq(&cachep->nodelists[node]->list_lock); | 1047 | spin_unlock_irq(&cachep->nodelists[node]->list_lock); |
1021 | } | 1048 | } |
1022 | 1049 | ||
1023 | /* Now we can go ahead with allocating the shared array's | 1050 | /* |
1024 | & array cache's */ | 1051 | * Now we can go ahead with allocating the shared arrays and |
1052 | * array caches | ||
1053 | */ | ||
1025 | list_for_each_entry(cachep, &cache_chain, next) { | 1054 | list_for_each_entry(cachep, &cache_chain, next) { |
1026 | struct array_cache *nc; | 1055 | struct array_cache *nc; |
1027 | struct array_cache *shared; | 1056 | struct array_cache *shared; |
@@ -1041,7 +1070,6 @@ static int __devinit cpuup_callback(struct notifier_block *nfb, | |||
1041 | if (!alien) | 1070 | if (!alien) |
1042 | goto bad; | 1071 | goto bad; |
1043 | cachep->array[cpu] = nc; | 1072 | cachep->array[cpu] = nc; |
1044 | |||
1045 | l3 = cachep->nodelists[node]; | 1073 | l3 = cachep->nodelists[node]; |
1046 | BUG_ON(!l3); | 1074 | BUG_ON(!l3); |
1047 | 1075 | ||
@@ -1061,7 +1089,6 @@ static int __devinit cpuup_callback(struct notifier_block *nfb, | |||
1061 | } | 1089 | } |
1062 | #endif | 1090 | #endif |
1063 | spin_unlock_irq(&l3->list_lock); | 1091 | spin_unlock_irq(&l3->list_lock); |
1064 | |||
1065 | kfree(shared); | 1092 | kfree(shared); |
1066 | free_alien_cache(alien); | 1093 | free_alien_cache(alien); |
1067 | } | 1094 | } |
@@ -1083,7 +1110,6 @@ static int __devinit cpuup_callback(struct notifier_block *nfb, | |||
1083 | /* fall thru */ | 1110 | /* fall thru */ |
1084 | case CPU_UP_CANCELED: | 1111 | case CPU_UP_CANCELED: |
1085 | mutex_lock(&cache_chain_mutex); | 1112 | mutex_lock(&cache_chain_mutex); |
1086 | |||
1087 | list_for_each_entry(cachep, &cache_chain, next) { | 1113 | list_for_each_entry(cachep, &cache_chain, next) { |
1088 | struct array_cache *nc; | 1114 | struct array_cache *nc; |
1089 | struct array_cache *shared; | 1115 | struct array_cache *shared; |
@@ -1150,7 +1176,7 @@ free_array_cache: | |||
1150 | #endif | 1176 | #endif |
1151 | } | 1177 | } |
1152 | return NOTIFY_OK; | 1178 | return NOTIFY_OK; |
1153 | bad: | 1179 | bad: |
1154 | mutex_unlock(&cache_chain_mutex); | 1180 | mutex_unlock(&cache_chain_mutex); |
1155 | return NOTIFY_BAD; | 1181 | return NOTIFY_BAD; |
1156 | } | 1182 | } |
@@ -1160,7 +1186,8 @@ static struct notifier_block cpucache_notifier = { &cpuup_callback, NULL, 0 }; | |||
1160 | /* | 1186 | /* |
1161 | * swap the static kmem_list3 with kmalloced memory | 1187 | * swap the static kmem_list3 with kmalloced memory |
1162 | */ | 1188 | */ |
1163 | static void init_list(struct kmem_cache *cachep, struct kmem_list3 *list, int nodeid) | 1189 | static void init_list(struct kmem_cache *cachep, struct kmem_list3 *list, |
1190 | int nodeid) | ||
1164 | { | 1191 | { |
1165 | struct kmem_list3 *ptr; | 1192 | struct kmem_list3 *ptr; |
1166 | 1193 | ||
@@ -1175,8 +1202,9 @@ static void init_list(struct kmem_cache *cachep, struct kmem_list3 *list, int no | |||
1175 | local_irq_enable(); | 1202 | local_irq_enable(); |
1176 | } | 1203 | } |
1177 | 1204 | ||
1178 | /* Initialisation. | 1205 | /* |
1179 | * Called after the gfp() functions have been enabled, and before smp_init(). | 1206 | * Initialisation. Called after the page allocator have been initialised and |
1207 | * before smp_init(). | ||
1180 | */ | 1208 | */ |
1181 | void __init kmem_cache_init(void) | 1209 | void __init kmem_cache_init(void) |
1182 | { | 1210 | { |
@@ -1201,9 +1229,9 @@ void __init kmem_cache_init(void) | |||
1201 | 1229 | ||
1202 | /* Bootstrap is tricky, because several objects are allocated | 1230 | /* Bootstrap is tricky, because several objects are allocated |
1203 | * from caches that do not exist yet: | 1231 | * from caches that do not exist yet: |
1204 | * 1) initialize the cache_cache cache: it contains the struct kmem_cache | 1232 | * 1) initialize the cache_cache cache: it contains the struct |
1205 | * structures of all caches, except cache_cache itself: cache_cache | 1233 | * kmem_cache structures of all caches, except cache_cache itself: |
1206 | * is statically allocated. | 1234 | * cache_cache is statically allocated. |
1207 | * Initially an __init data area is used for the head array and the | 1235 | * Initially an __init data area is used for the head array and the |
1208 | * kmem_list3 structures, it's replaced with a kmalloc allocated | 1236 | * kmem_list3 structures, it's replaced with a kmalloc allocated |
1209 | * array at the end of the bootstrap. | 1237 | * array at the end of the bootstrap. |
@@ -1226,7 +1254,8 @@ void __init kmem_cache_init(void) | |||
1226 | cache_cache.array[smp_processor_id()] = &initarray_cache.cache; | 1254 | cache_cache.array[smp_processor_id()] = &initarray_cache.cache; |
1227 | cache_cache.nodelists[numa_node_id()] = &initkmem_list3[CACHE_CACHE]; | 1255 | cache_cache.nodelists[numa_node_id()] = &initkmem_list3[CACHE_CACHE]; |
1228 | 1256 | ||
1229 | cache_cache.buffer_size = ALIGN(cache_cache.buffer_size, cache_line_size()); | 1257 | cache_cache.buffer_size = ALIGN(cache_cache.buffer_size, |
1258 | cache_line_size()); | ||
1230 | 1259 | ||
1231 | for (order = 0; order < MAX_ORDER; order++) { | 1260 | for (order = 0; order < MAX_ORDER; order++) { |
1232 | cache_estimate(order, cache_cache.buffer_size, | 1261 | cache_estimate(order, cache_cache.buffer_size, |
@@ -1245,24 +1274,26 @@ void __init kmem_cache_init(void) | |||
1245 | sizes = malloc_sizes; | 1274 | sizes = malloc_sizes; |
1246 | names = cache_names; | 1275 | names = cache_names; |
1247 | 1276 | ||
1248 | /* Initialize the caches that provide memory for the array cache | 1277 | /* |
1249 | * and the kmem_list3 structures first. | 1278 | * Initialize the caches that provide memory for the array cache and the |
1250 | * Without this, further allocations will bug | 1279 | * kmem_list3 structures first. Without this, further allocations will |
1280 | * bug. | ||
1251 | */ | 1281 | */ |
1252 | 1282 | ||
1253 | sizes[INDEX_AC].cs_cachep = kmem_cache_create(names[INDEX_AC].name, | 1283 | sizes[INDEX_AC].cs_cachep = kmem_cache_create(names[INDEX_AC].name, |
1254 | sizes[INDEX_AC].cs_size, | 1284 | sizes[INDEX_AC].cs_size, |
1255 | ARCH_KMALLOC_MINALIGN, | 1285 | ARCH_KMALLOC_MINALIGN, |
1256 | (ARCH_KMALLOC_FLAGS | | 1286 | ARCH_KMALLOC_FLAGS|SLAB_PANIC, |
1257 | SLAB_PANIC), NULL, NULL); | 1287 | NULL, NULL); |
1258 | 1288 | ||
1259 | if (INDEX_AC != INDEX_L3) | 1289 | if (INDEX_AC != INDEX_L3) { |
1260 | sizes[INDEX_L3].cs_cachep = | 1290 | sizes[INDEX_L3].cs_cachep = |
1261 | kmem_cache_create(names[INDEX_L3].name, | 1291 | kmem_cache_create(names[INDEX_L3].name, |
1262 | sizes[INDEX_L3].cs_size, | 1292 | sizes[INDEX_L3].cs_size, |
1263 | ARCH_KMALLOC_MINALIGN, | 1293 | ARCH_KMALLOC_MINALIGN, |
1264 | (ARCH_KMALLOC_FLAGS | SLAB_PANIC), NULL, | 1294 | ARCH_KMALLOC_FLAGS|SLAB_PANIC, |
1265 | NULL); | 1295 | NULL, NULL); |
1296 | } | ||
1266 | 1297 | ||
1267 | while (sizes->cs_size != ULONG_MAX) { | 1298 | while (sizes->cs_size != ULONG_MAX) { |
1268 | /* | 1299 | /* |
@@ -1272,13 +1303,13 @@ void __init kmem_cache_init(void) | |||
1272 | * Note for systems short on memory removing the alignment will | 1303 | * Note for systems short on memory removing the alignment will |
1273 | * allow tighter packing of the smaller caches. | 1304 | * allow tighter packing of the smaller caches. |
1274 | */ | 1305 | */ |
1275 | if (!sizes->cs_cachep) | 1306 | if (!sizes->cs_cachep) { |
1276 | sizes->cs_cachep = kmem_cache_create(names->name, | 1307 | sizes->cs_cachep = kmem_cache_create(names->name, |
1277 | sizes->cs_size, | 1308 | sizes->cs_size, |
1278 | ARCH_KMALLOC_MINALIGN, | 1309 | ARCH_KMALLOC_MINALIGN, |
1279 | (ARCH_KMALLOC_FLAGS | 1310 | ARCH_KMALLOC_FLAGS|SLAB_PANIC, |
1280 | | SLAB_PANIC), | 1311 | NULL, NULL); |
1281 | NULL, NULL); | 1312 | } |
1282 | 1313 | ||
1283 | /* Inc off-slab bufctl limit until the ceiling is hit. */ | 1314 | /* Inc off-slab bufctl limit until the ceiling is hit. */ |
1284 | if (!(OFF_SLAB(sizes->cs_cachep))) { | 1315 | if (!(OFF_SLAB(sizes->cs_cachep))) { |
@@ -1287,13 +1318,11 @@ void __init kmem_cache_init(void) | |||
1287 | } | 1318 | } |
1288 | 1319 | ||
1289 | sizes->cs_dmacachep = kmem_cache_create(names->name_dma, | 1320 | sizes->cs_dmacachep = kmem_cache_create(names->name_dma, |
1290 | sizes->cs_size, | 1321 | sizes->cs_size, |
1291 | ARCH_KMALLOC_MINALIGN, | 1322 | ARCH_KMALLOC_MINALIGN, |
1292 | (ARCH_KMALLOC_FLAGS | | 1323 | ARCH_KMALLOC_FLAGS|SLAB_CACHE_DMA| |
1293 | SLAB_CACHE_DMA | | 1324 | SLAB_PANIC, |
1294 | SLAB_PANIC), NULL, | 1325 | NULL, NULL); |
1295 | NULL); | ||
1296 | |||
1297 | sizes++; | 1326 | sizes++; |
1298 | names++; | 1327 | names++; |
1299 | } | 1328 | } |
@@ -1345,20 +1374,22 @@ void __init kmem_cache_init(void) | |||
1345 | struct kmem_cache *cachep; | 1374 | struct kmem_cache *cachep; |
1346 | mutex_lock(&cache_chain_mutex); | 1375 | mutex_lock(&cache_chain_mutex); |
1347 | list_for_each_entry(cachep, &cache_chain, next) | 1376 | list_for_each_entry(cachep, &cache_chain, next) |
1348 | enable_cpucache(cachep); | 1377 | enable_cpucache(cachep); |
1349 | mutex_unlock(&cache_chain_mutex); | 1378 | mutex_unlock(&cache_chain_mutex); |
1350 | } | 1379 | } |
1351 | 1380 | ||
1352 | /* Done! */ | 1381 | /* Done! */ |
1353 | g_cpucache_up = FULL; | 1382 | g_cpucache_up = FULL; |
1354 | 1383 | ||
1355 | /* Register a cpu startup notifier callback | 1384 | /* |
1356 | * that initializes cpu_cache_get for all new cpus | 1385 | * Register a cpu startup notifier callback that initializes |
1386 | * cpu_cache_get for all new cpus | ||
1357 | */ | 1387 | */ |
1358 | register_cpu_notifier(&cpucache_notifier); | 1388 | register_cpu_notifier(&cpucache_notifier); |
1359 | 1389 | ||
1360 | /* The reap timers are started later, with a module init call: | 1390 | /* |
1361 | * That part of the kernel is not yet operational. | 1391 | * The reap timers are started later, with a module init call: That part |
1392 | * of the kernel is not yet operational. | ||
1362 | */ | 1393 | */ |
1363 | } | 1394 | } |
1364 | 1395 | ||
@@ -1366,16 +1397,13 @@ static int __init cpucache_init(void) | |||
1366 | { | 1397 | { |
1367 | int cpu; | 1398 | int cpu; |
1368 | 1399 | ||
1369 | /* | 1400 | /* |
1370 | * Register the timers that return unneeded | 1401 | * Register the timers that return unneeded pages to the page allocator |
1371 | * pages to gfp. | ||
1372 | */ | 1402 | */ |
1373 | for_each_online_cpu(cpu) | 1403 | for_each_online_cpu(cpu) |
1374 | start_cpu_timer(cpu); | 1404 | start_cpu_timer(cpu); |
1375 | |||
1376 | return 0; | 1405 | return 0; |
1377 | } | 1406 | } |
1378 | |||
1379 | __initcall(cpucache_init); | 1407 | __initcall(cpucache_init); |
1380 | 1408 | ||
1381 | /* | 1409 | /* |
@@ -1402,7 +1430,7 @@ static void *kmem_getpages(struct kmem_cache *cachep, gfp_t flags, int nodeid) | |||
1402 | atomic_add(i, &slab_reclaim_pages); | 1430 | atomic_add(i, &slab_reclaim_pages); |
1403 | add_page_state(nr_slab, i); | 1431 | add_page_state(nr_slab, i); |
1404 | while (i--) { | 1432 | while (i--) { |
1405 | SetPageSlab(page); | 1433 | __SetPageSlab(page); |
1406 | page++; | 1434 | page++; |
1407 | } | 1435 | } |
1408 | return addr; | 1436 | return addr; |
@@ -1418,8 +1446,8 @@ static void kmem_freepages(struct kmem_cache *cachep, void *addr) | |||
1418 | const unsigned long nr_freed = i; | 1446 | const unsigned long nr_freed = i; |
1419 | 1447 | ||
1420 | while (i--) { | 1448 | while (i--) { |
1421 | if (!TestClearPageSlab(page)) | 1449 | BUG_ON(!PageSlab(page)); |
1422 | BUG(); | 1450 | __ClearPageSlab(page); |
1423 | page++; | 1451 | page++; |
1424 | } | 1452 | } |
1425 | sub_page_state(nr_slab, nr_freed); | 1453 | sub_page_state(nr_slab, nr_freed); |
@@ -1489,9 +1517,8 @@ static void dump_line(char *data, int offset, int limit) | |||
1489 | { | 1517 | { |
1490 | int i; | 1518 | int i; |
1491 | printk(KERN_ERR "%03x:", offset); | 1519 | printk(KERN_ERR "%03x:", offset); |
1492 | for (i = 0; i < limit; i++) { | 1520 | for (i = 0; i < limit; i++) |
1493 | printk(" %02x", (unsigned char)data[offset + i]); | 1521 | printk(" %02x", (unsigned char)data[offset + i]); |
1494 | } | ||
1495 | printk("\n"); | 1522 | printk("\n"); |
1496 | } | 1523 | } |
1497 | #endif | 1524 | #endif |
@@ -1505,15 +1532,15 @@ static void print_objinfo(struct kmem_cache *cachep, void *objp, int lines) | |||
1505 | 1532 | ||
1506 | if (cachep->flags & SLAB_RED_ZONE) { | 1533 | if (cachep->flags & SLAB_RED_ZONE) { |
1507 | printk(KERN_ERR "Redzone: 0x%lx/0x%lx.\n", | 1534 | printk(KERN_ERR "Redzone: 0x%lx/0x%lx.\n", |
1508 | *dbg_redzone1(cachep, objp), | 1535 | *dbg_redzone1(cachep, objp), |
1509 | *dbg_redzone2(cachep, objp)); | 1536 | *dbg_redzone2(cachep, objp)); |
1510 | } | 1537 | } |
1511 | 1538 | ||
1512 | if (cachep->flags & SLAB_STORE_USER) { | 1539 | if (cachep->flags & SLAB_STORE_USER) { |
1513 | printk(KERN_ERR "Last user: [<%p>]", | 1540 | printk(KERN_ERR "Last user: [<%p>]", |
1514 | *dbg_userword(cachep, objp)); | 1541 | *dbg_userword(cachep, objp)); |
1515 | print_symbol("(%s)", | 1542 | print_symbol("(%s)", |
1516 | (unsigned long)*dbg_userword(cachep, objp)); | 1543 | (unsigned long)*dbg_userword(cachep, objp)); |
1517 | printk("\n"); | 1544 | printk("\n"); |
1518 | } | 1545 | } |
1519 | realobj = (char *)objp + obj_offset(cachep); | 1546 | realobj = (char *)objp + obj_offset(cachep); |
@@ -1546,8 +1573,8 @@ static void check_poison_obj(struct kmem_cache *cachep, void *objp) | |||
1546 | /* Print header */ | 1573 | /* Print header */ |
1547 | if (lines == 0) { | 1574 | if (lines == 0) { |
1548 | printk(KERN_ERR | 1575 | printk(KERN_ERR |
1549 | "Slab corruption: start=%p, len=%d\n", | 1576 | "Slab corruption: start=%p, len=%d\n", |
1550 | realobj, size); | 1577 | realobj, size); |
1551 | print_objinfo(cachep, objp, 0); | 1578 | print_objinfo(cachep, objp, 0); |
1552 | } | 1579 | } |
1553 | /* Hexdump the affected line */ | 1580 | /* Hexdump the affected line */ |
@@ -1568,18 +1595,18 @@ static void check_poison_obj(struct kmem_cache *cachep, void *objp) | |||
1568 | * exist: | 1595 | * exist: |
1569 | */ | 1596 | */ |
1570 | struct slab *slabp = virt_to_slab(objp); | 1597 | struct slab *slabp = virt_to_slab(objp); |
1571 | int objnr; | 1598 | unsigned int objnr; |
1572 | 1599 | ||
1573 | objnr = (unsigned)(objp - slabp->s_mem) / cachep->buffer_size; | 1600 | objnr = obj_to_index(cachep, slabp, objp); |
1574 | if (objnr) { | 1601 | if (objnr) { |
1575 | objp = slabp->s_mem + (objnr - 1) * cachep->buffer_size; | 1602 | objp = index_to_obj(cachep, slabp, objnr - 1); |
1576 | realobj = (char *)objp + obj_offset(cachep); | 1603 | realobj = (char *)objp + obj_offset(cachep); |
1577 | printk(KERN_ERR "Prev obj: start=%p, len=%d\n", | 1604 | printk(KERN_ERR "Prev obj: start=%p, len=%d\n", |
1578 | realobj, size); | 1605 | realobj, size); |
1579 | print_objinfo(cachep, objp, 2); | 1606 | print_objinfo(cachep, objp, 2); |
1580 | } | 1607 | } |
1581 | if (objnr + 1 < cachep->num) { | 1608 | if (objnr + 1 < cachep->num) { |
1582 | objp = slabp->s_mem + (objnr + 1) * cachep->buffer_size; | 1609 | objp = index_to_obj(cachep, slabp, objnr + 1); |
1583 | realobj = (char *)objp + obj_offset(cachep); | 1610 | realobj = (char *)objp + obj_offset(cachep); |
1584 | printk(KERN_ERR "Next obj: start=%p, len=%d\n", | 1611 | printk(KERN_ERR "Next obj: start=%p, len=%d\n", |
1585 | realobj, size); | 1612 | realobj, size); |
@@ -1591,22 +1618,25 @@ static void check_poison_obj(struct kmem_cache *cachep, void *objp) | |||
1591 | 1618 | ||
1592 | #if DEBUG | 1619 | #if DEBUG |
1593 | /** | 1620 | /** |
1594 | * slab_destroy_objs - call the registered destructor for each object in | 1621 | * slab_destroy_objs - destroy a slab and its objects |
1595 | * a slab that is to be destroyed. | 1622 | * @cachep: cache pointer being destroyed |
1623 | * @slabp: slab pointer being destroyed | ||
1624 | * | ||
1625 | * Call the registered destructor for each object in a slab that is being | ||
1626 | * destroyed. | ||
1596 | */ | 1627 | */ |
1597 | static void slab_destroy_objs(struct kmem_cache *cachep, struct slab *slabp) | 1628 | static void slab_destroy_objs(struct kmem_cache *cachep, struct slab *slabp) |
1598 | { | 1629 | { |
1599 | int i; | 1630 | int i; |
1600 | for (i = 0; i < cachep->num; i++) { | 1631 | for (i = 0; i < cachep->num; i++) { |
1601 | void *objp = slabp->s_mem + cachep->buffer_size * i; | 1632 | void *objp = index_to_obj(cachep, slabp, i); |
1602 | 1633 | ||
1603 | if (cachep->flags & SLAB_POISON) { | 1634 | if (cachep->flags & SLAB_POISON) { |
1604 | #ifdef CONFIG_DEBUG_PAGEALLOC | 1635 | #ifdef CONFIG_DEBUG_PAGEALLOC |
1605 | if ((cachep->buffer_size % PAGE_SIZE) == 0 | 1636 | if (cachep->buffer_size % PAGE_SIZE == 0 && |
1606 | && OFF_SLAB(cachep)) | 1637 | OFF_SLAB(cachep)) |
1607 | kernel_map_pages(virt_to_page(objp), | 1638 | kernel_map_pages(virt_to_page(objp), |
1608 | cachep->buffer_size / PAGE_SIZE, | 1639 | cachep->buffer_size / PAGE_SIZE, 1); |
1609 | 1); | ||
1610 | else | 1640 | else |
1611 | check_poison_obj(cachep, objp); | 1641 | check_poison_obj(cachep, objp); |
1612 | #else | 1642 | #else |
@@ -1631,7 +1661,7 @@ static void slab_destroy_objs(struct kmem_cache *cachep, struct slab *slabp) | |||
1631 | if (cachep->dtor) { | 1661 | if (cachep->dtor) { |
1632 | int i; | 1662 | int i; |
1633 | for (i = 0; i < cachep->num; i++) { | 1663 | for (i = 0; i < cachep->num; i++) { |
1634 | void *objp = slabp->s_mem + cachep->buffer_size * i; | 1664 | void *objp = index_to_obj(cachep, slabp, i); |
1635 | (cachep->dtor) (objp, cachep, 0); | 1665 | (cachep->dtor) (objp, cachep, 0); |
1636 | } | 1666 | } |
1637 | } | 1667 | } |
@@ -1639,9 +1669,13 @@ static void slab_destroy_objs(struct kmem_cache *cachep, struct slab *slabp) | |||
1639 | #endif | 1669 | #endif |
1640 | 1670 | ||
1641 | /** | 1671 | /** |
1672 | * slab_destroy - destroy and release all objects in a slab | ||
1673 | * @cachep: cache pointer being destroyed | ||
1674 | * @slabp: slab pointer being destroyed | ||
1675 | * | ||
1642 | * Destroy all the objs in a slab, and release the mem back to the system. | 1676 | * Destroy all the objs in a slab, and release the mem back to the system. |
1643 | * Before calling the slab must have been unlinked from the cache. | 1677 | * Before calling the slab must have been unlinked from the cache. The |
1644 | * The cache-lock is not held/needed. | 1678 | * cache-lock is not held/needed. |
1645 | */ | 1679 | */ |
1646 | static void slab_destroy(struct kmem_cache *cachep, struct slab *slabp) | 1680 | static void slab_destroy(struct kmem_cache *cachep, struct slab *slabp) |
1647 | { | 1681 | { |
@@ -1662,8 +1696,10 @@ static void slab_destroy(struct kmem_cache *cachep, struct slab *slabp) | |||
1662 | } | 1696 | } |
1663 | } | 1697 | } |
1664 | 1698 | ||
1665 | /* For setting up all the kmem_list3s for cache whose buffer_size is same | 1699 | /* |
1666 | as size of kmem_list3. */ | 1700 | * For setting up all the kmem_list3s for cache whose buffer_size is same as |
1701 | * size of kmem_list3. | ||
1702 | */ | ||
1667 | static void set_up_list3s(struct kmem_cache *cachep, int index) | 1703 | static void set_up_list3s(struct kmem_cache *cachep, int index) |
1668 | { | 1704 | { |
1669 | int node; | 1705 | int node; |
@@ -1689,13 +1725,13 @@ static void set_up_list3s(struct kmem_cache *cachep, int index) | |||
1689 | * high order pages for slabs. When the gfp() functions are more friendly | 1725 | * high order pages for slabs. When the gfp() functions are more friendly |
1690 | * towards high-order requests, this should be changed. | 1726 | * towards high-order requests, this should be changed. |
1691 | */ | 1727 | */ |
1692 | static inline size_t calculate_slab_order(struct kmem_cache *cachep, | 1728 | static size_t calculate_slab_order(struct kmem_cache *cachep, |
1693 | size_t size, size_t align, unsigned long flags) | 1729 | size_t size, size_t align, unsigned long flags) |
1694 | { | 1730 | { |
1695 | size_t left_over = 0; | 1731 | size_t left_over = 0; |
1696 | int gfporder; | 1732 | int gfporder; |
1697 | 1733 | ||
1698 | for (gfporder = 0 ; gfporder <= MAX_GFP_ORDER; gfporder++) { | 1734 | for (gfporder = 0; gfporder <= MAX_GFP_ORDER; gfporder++) { |
1699 | unsigned int num; | 1735 | unsigned int num; |
1700 | size_t remainder; | 1736 | size_t remainder; |
1701 | 1737 | ||
@@ -1730,12 +1766,66 @@ static inline size_t calculate_slab_order(struct kmem_cache *cachep, | |||
1730 | /* | 1766 | /* |
1731 | * Acceptable internal fragmentation? | 1767 | * Acceptable internal fragmentation? |
1732 | */ | 1768 | */ |
1733 | if ((left_over * 8) <= (PAGE_SIZE << gfporder)) | 1769 | if (left_over * 8 <= (PAGE_SIZE << gfporder)) |
1734 | break; | 1770 | break; |
1735 | } | 1771 | } |
1736 | return left_over; | 1772 | return left_over; |
1737 | } | 1773 | } |
1738 | 1774 | ||
1775 | static void setup_cpu_cache(struct kmem_cache *cachep) | ||
1776 | { | ||
1777 | if (g_cpucache_up == FULL) { | ||
1778 | enable_cpucache(cachep); | ||
1779 | return; | ||
1780 | } | ||
1781 | if (g_cpucache_up == NONE) { | ||
1782 | /* | ||
1783 | * Note: the first kmem_cache_create must create the cache | ||
1784 | * that's used by kmalloc(24), otherwise the creation of | ||
1785 | * further caches will BUG(). | ||
1786 | */ | ||
1787 | cachep->array[smp_processor_id()] = &initarray_generic.cache; | ||
1788 | |||
1789 | /* | ||
1790 | * If the cache that's used by kmalloc(sizeof(kmem_list3)) is | ||
1791 | * the first cache, then we need to set up all its list3s, | ||
1792 | * otherwise the creation of further caches will BUG(). | ||
1793 | */ | ||
1794 | set_up_list3s(cachep, SIZE_AC); | ||
1795 | if (INDEX_AC == INDEX_L3) | ||
1796 | g_cpucache_up = PARTIAL_L3; | ||
1797 | else | ||
1798 | g_cpucache_up = PARTIAL_AC; | ||
1799 | } else { | ||
1800 | cachep->array[smp_processor_id()] = | ||
1801 | kmalloc(sizeof(struct arraycache_init), GFP_KERNEL); | ||
1802 | |||
1803 | if (g_cpucache_up == PARTIAL_AC) { | ||
1804 | set_up_list3s(cachep, SIZE_L3); | ||
1805 | g_cpucache_up = PARTIAL_L3; | ||
1806 | } else { | ||
1807 | int node; | ||
1808 | for_each_online_node(node) { | ||
1809 | cachep->nodelists[node] = | ||
1810 | kmalloc_node(sizeof(struct kmem_list3), | ||
1811 | GFP_KERNEL, node); | ||
1812 | BUG_ON(!cachep->nodelists[node]); | ||
1813 | kmem_list3_init(cachep->nodelists[node]); | ||
1814 | } | ||
1815 | } | ||
1816 | } | ||
1817 | cachep->nodelists[numa_node_id()]->next_reap = | ||
1818 | jiffies + REAPTIMEOUT_LIST3 + | ||
1819 | ((unsigned long)cachep) % REAPTIMEOUT_LIST3; | ||
1820 | |||
1821 | cpu_cache_get(cachep)->avail = 0; | ||
1822 | cpu_cache_get(cachep)->limit = BOOT_CPUCACHE_ENTRIES; | ||
1823 | cpu_cache_get(cachep)->batchcount = 1; | ||
1824 | cpu_cache_get(cachep)->touched = 0; | ||
1825 | cachep->batchcount = 1; | ||
1826 | cachep->limit = BOOT_CPUCACHE_ENTRIES; | ||
1827 | } | ||
1828 | |||
1739 | /** | 1829 | /** |
1740 | * kmem_cache_create - Create a cache. | 1830 | * kmem_cache_create - Create a cache. |
1741 | * @name: A string which is used in /proc/slabinfo to identify this cache. | 1831 | * @name: A string which is used in /proc/slabinfo to identify this cache. |
@@ -1751,9 +1841,8 @@ static inline size_t calculate_slab_order(struct kmem_cache *cachep, | |||
1751 | * and the @dtor is run before the pages are handed back. | 1841 | * and the @dtor is run before the pages are handed back. |
1752 | * | 1842 | * |
1753 | * @name must be valid until the cache is destroyed. This implies that | 1843 | * @name must be valid until the cache is destroyed. This implies that |
1754 | * the module calling this has to destroy the cache before getting | 1844 | * the module calling this has to destroy the cache before getting unloaded. |
1755 | * unloaded. | 1845 | * |
1756 | * | ||
1757 | * The flags are | 1846 | * The flags are |
1758 | * | 1847 | * |
1759 | * %SLAB_POISON - Poison the slab with a known test pattern (a5a5a5a5) | 1848 | * %SLAB_POISON - Poison the slab with a known test pattern (a5a5a5a5) |
@@ -1762,16 +1851,14 @@ static inline size_t calculate_slab_order(struct kmem_cache *cachep, | |||
1762 | * %SLAB_RED_ZONE - Insert `Red' zones around the allocated memory to check | 1851 | * %SLAB_RED_ZONE - Insert `Red' zones around the allocated memory to check |
1763 | * for buffer overruns. | 1852 | * for buffer overruns. |
1764 | * | 1853 | * |
1765 | * %SLAB_NO_REAP - Don't automatically reap this cache when we're under | ||
1766 | * memory pressure. | ||
1767 | * | ||
1768 | * %SLAB_HWCACHE_ALIGN - Align the objects in this cache to a hardware | 1854 | * %SLAB_HWCACHE_ALIGN - Align the objects in this cache to a hardware |
1769 | * cacheline. This can be beneficial if you're counting cycles as closely | 1855 | * cacheline. This can be beneficial if you're counting cycles as closely |
1770 | * as davem. | 1856 | * as davem. |
1771 | */ | 1857 | */ |
1772 | struct kmem_cache * | 1858 | struct kmem_cache * |
1773 | kmem_cache_create (const char *name, size_t size, size_t align, | 1859 | kmem_cache_create (const char *name, size_t size, size_t align, |
1774 | unsigned long flags, void (*ctor)(void*, struct kmem_cache *, unsigned long), | 1860 | unsigned long flags, |
1861 | void (*ctor)(void*, struct kmem_cache *, unsigned long), | ||
1775 | void (*dtor)(void*, struct kmem_cache *, unsigned long)) | 1862 | void (*dtor)(void*, struct kmem_cache *, unsigned long)) |
1776 | { | 1863 | { |
1777 | size_t left_over, slab_size, ralign; | 1864 | size_t left_over, slab_size, ralign; |
@@ -1781,12 +1868,10 @@ kmem_cache_create (const char *name, size_t size, size_t align, | |||
1781 | /* | 1868 | /* |
1782 | * Sanity checks... these are all serious usage bugs. | 1869 | * Sanity checks... these are all serious usage bugs. |
1783 | */ | 1870 | */ |
1784 | if ((!name) || | 1871 | if (!name || in_interrupt() || (size < BYTES_PER_WORD) || |
1785 | in_interrupt() || | ||
1786 | (size < BYTES_PER_WORD) || | ||
1787 | (size > (1 << MAX_OBJ_ORDER) * PAGE_SIZE) || (dtor && !ctor)) { | 1872 | (size > (1 << MAX_OBJ_ORDER) * PAGE_SIZE) || (dtor && !ctor)) { |
1788 | printk(KERN_ERR "%s: Early error in slab %s\n", | 1873 | printk(KERN_ERR "%s: Early error in slab %s\n", __FUNCTION__, |
1789 | __FUNCTION__, name); | 1874 | name); |
1790 | BUG(); | 1875 | BUG(); |
1791 | } | 1876 | } |
1792 | 1877 | ||
@@ -1840,8 +1925,7 @@ kmem_cache_create (const char *name, size_t size, size_t align, | |||
1840 | * above the next power of two: caches with object sizes just above a | 1925 | * above the next power of two: caches with object sizes just above a |
1841 | * power of two have a significant amount of internal fragmentation. | 1926 | * power of two have a significant amount of internal fragmentation. |
1842 | */ | 1927 | */ |
1843 | if ((size < 4096 | 1928 | if (size < 4096 || fls(size - 1) == fls(size-1 + 3 * BYTES_PER_WORD)) |
1844 | || fls(size - 1) == fls(size - 1 + 3 * BYTES_PER_WORD))) | ||
1845 | flags |= SLAB_RED_ZONE | SLAB_STORE_USER; | 1929 | flags |= SLAB_RED_ZONE | SLAB_STORE_USER; |
1846 | if (!(flags & SLAB_DESTROY_BY_RCU)) | 1930 | if (!(flags & SLAB_DESTROY_BY_RCU)) |
1847 | flags |= SLAB_POISON; | 1931 | flags |= SLAB_POISON; |
@@ -1853,13 +1937,14 @@ kmem_cache_create (const char *name, size_t size, size_t align, | |||
1853 | BUG_ON(dtor); | 1937 | BUG_ON(dtor); |
1854 | 1938 | ||
1855 | /* | 1939 | /* |
1856 | * Always checks flags, a caller might be expecting debug | 1940 | * Always checks flags, a caller might be expecting debug support which |
1857 | * support which isn't available. | 1941 | * isn't available. |
1858 | */ | 1942 | */ |
1859 | if (flags & ~CREATE_MASK) | 1943 | if (flags & ~CREATE_MASK) |
1860 | BUG(); | 1944 | BUG(); |
1861 | 1945 | ||
1862 | /* Check that size is in terms of words. This is needed to avoid | 1946 | /* |
1947 | * Check that size is in terms of words. This is needed to avoid | ||
1863 | * unaligned accesses for some archs when redzoning is used, and makes | 1948 | * unaligned accesses for some archs when redzoning is used, and makes |
1864 | * sure any on-slab bufctl's are also correctly aligned. | 1949 | * sure any on-slab bufctl's are also correctly aligned. |
1865 | */ | 1950 | */ |
@@ -1868,12 +1953,14 @@ kmem_cache_create (const char *name, size_t size, size_t align, | |||
1868 | size &= ~(BYTES_PER_WORD - 1); | 1953 | size &= ~(BYTES_PER_WORD - 1); |
1869 | } | 1954 | } |
1870 | 1955 | ||
1871 | /* calculate out the final buffer alignment: */ | 1956 | /* calculate the final buffer alignment: */ |
1957 | |||
1872 | /* 1) arch recommendation: can be overridden for debug */ | 1958 | /* 1) arch recommendation: can be overridden for debug */ |
1873 | if (flags & SLAB_HWCACHE_ALIGN) { | 1959 | if (flags & SLAB_HWCACHE_ALIGN) { |
1874 | /* Default alignment: as specified by the arch code. | 1960 | /* |
1875 | * Except if an object is really small, then squeeze multiple | 1961 | * Default alignment: as specified by the arch code. Except if |
1876 | * objects into one cacheline. | 1962 | * an object is really small, then squeeze multiple objects into |
1963 | * one cacheline. | ||
1877 | */ | 1964 | */ |
1878 | ralign = cache_line_size(); | 1965 | ralign = cache_line_size(); |
1879 | while (size <= ralign / 2) | 1966 | while (size <= ralign / 2) |
@@ -1893,7 +1980,8 @@ kmem_cache_create (const char *name, size_t size, size_t align, | |||
1893 | if (ralign > BYTES_PER_WORD) | 1980 | if (ralign > BYTES_PER_WORD) |
1894 | flags &= ~(SLAB_RED_ZONE | SLAB_STORE_USER); | 1981 | flags &= ~(SLAB_RED_ZONE | SLAB_STORE_USER); |
1895 | } | 1982 | } |
1896 | /* 4) Store it. Note that the debug code below can reduce | 1983 | /* |
1984 | * 4) Store it. Note that the debug code below can reduce | ||
1897 | * the alignment to BYTES_PER_WORD. | 1985 | * the alignment to BYTES_PER_WORD. |
1898 | */ | 1986 | */ |
1899 | align = ralign; | 1987 | align = ralign; |
@@ -1978,7 +2066,6 @@ kmem_cache_create (const char *name, size_t size, size_t align, | |||
1978 | cachep->gfpflags = 0; | 2066 | cachep->gfpflags = 0; |
1979 | if (flags & SLAB_CACHE_DMA) | 2067 | if (flags & SLAB_CACHE_DMA) |
1980 | cachep->gfpflags |= GFP_DMA; | 2068 | cachep->gfpflags |= GFP_DMA; |
1981 | spin_lock_init(&cachep->spinlock); | ||
1982 | cachep->buffer_size = size; | 2069 | cachep->buffer_size = size; |
1983 | 2070 | ||
1984 | if (flags & CFLGS_OFF_SLAB) | 2071 | if (flags & CFLGS_OFF_SLAB) |
@@ -1988,64 +2075,11 @@ kmem_cache_create (const char *name, size_t size, size_t align, | |||
1988 | cachep->name = name; | 2075 | cachep->name = name; |
1989 | 2076 | ||
1990 | 2077 | ||
1991 | if (g_cpucache_up == FULL) { | 2078 | setup_cpu_cache(cachep); |
1992 | enable_cpucache(cachep); | ||
1993 | } else { | ||
1994 | if (g_cpucache_up == NONE) { | ||
1995 | /* Note: the first kmem_cache_create must create | ||
1996 | * the cache that's used by kmalloc(24), otherwise | ||
1997 | * the creation of further caches will BUG(). | ||
1998 | */ | ||
1999 | cachep->array[smp_processor_id()] = | ||
2000 | &initarray_generic.cache; | ||
2001 | |||
2002 | /* If the cache that's used by | ||
2003 | * kmalloc(sizeof(kmem_list3)) is the first cache, | ||
2004 | * then we need to set up all its list3s, otherwise | ||
2005 | * the creation of further caches will BUG(). | ||
2006 | */ | ||
2007 | set_up_list3s(cachep, SIZE_AC); | ||
2008 | if (INDEX_AC == INDEX_L3) | ||
2009 | g_cpucache_up = PARTIAL_L3; | ||
2010 | else | ||
2011 | g_cpucache_up = PARTIAL_AC; | ||
2012 | } else { | ||
2013 | cachep->array[smp_processor_id()] = | ||
2014 | kmalloc(sizeof(struct arraycache_init), GFP_KERNEL); | ||
2015 | |||
2016 | if (g_cpucache_up == PARTIAL_AC) { | ||
2017 | set_up_list3s(cachep, SIZE_L3); | ||
2018 | g_cpucache_up = PARTIAL_L3; | ||
2019 | } else { | ||
2020 | int node; | ||
2021 | for_each_online_node(node) { | ||
2022 | |||
2023 | cachep->nodelists[node] = | ||
2024 | kmalloc_node(sizeof | ||
2025 | (struct kmem_list3), | ||
2026 | GFP_KERNEL, node); | ||
2027 | BUG_ON(!cachep->nodelists[node]); | ||
2028 | kmem_list3_init(cachep-> | ||
2029 | nodelists[node]); | ||
2030 | } | ||
2031 | } | ||
2032 | } | ||
2033 | cachep->nodelists[numa_node_id()]->next_reap = | ||
2034 | jiffies + REAPTIMEOUT_LIST3 + | ||
2035 | ((unsigned long)cachep) % REAPTIMEOUT_LIST3; | ||
2036 | |||
2037 | BUG_ON(!cpu_cache_get(cachep)); | ||
2038 | cpu_cache_get(cachep)->avail = 0; | ||
2039 | cpu_cache_get(cachep)->limit = BOOT_CPUCACHE_ENTRIES; | ||
2040 | cpu_cache_get(cachep)->batchcount = 1; | ||
2041 | cpu_cache_get(cachep)->touched = 0; | ||
2042 | cachep->batchcount = 1; | ||
2043 | cachep->limit = BOOT_CPUCACHE_ENTRIES; | ||
2044 | } | ||
2045 | 2079 | ||
2046 | /* cache setup completed, link it into the list */ | 2080 | /* cache setup completed, link it into the list */ |
2047 | list_add(&cachep->next, &cache_chain); | 2081 | list_add(&cachep->next, &cache_chain); |
2048 | oops: | 2082 | oops: |
2049 | if (!cachep && (flags & SLAB_PANIC)) | 2083 | if (!cachep && (flags & SLAB_PANIC)) |
2050 | panic("kmem_cache_create(): failed to create slab `%s'\n", | 2084 | panic("kmem_cache_create(): failed to create slab `%s'\n", |
2051 | name); | 2085 | name); |
@@ -2089,30 +2123,13 @@ static void check_spinlock_acquired_node(struct kmem_cache *cachep, int node) | |||
2089 | #define check_spinlock_acquired_node(x, y) do { } while(0) | 2123 | #define check_spinlock_acquired_node(x, y) do { } while(0) |
2090 | #endif | 2124 | #endif |
2091 | 2125 | ||
2092 | /* | 2126 | static void drain_array(struct kmem_cache *cachep, struct kmem_list3 *l3, |
2093 | * Waits for all CPUs to execute func(). | 2127 | struct array_cache *ac, |
2094 | */ | 2128 | int force, int node); |
2095 | static void smp_call_function_all_cpus(void (*func)(void *arg), void *arg) | ||
2096 | { | ||
2097 | check_irq_on(); | ||
2098 | preempt_disable(); | ||
2099 | |||
2100 | local_irq_disable(); | ||
2101 | func(arg); | ||
2102 | local_irq_enable(); | ||
2103 | |||
2104 | if (smp_call_function(func, arg, 1, 1)) | ||
2105 | BUG(); | ||
2106 | |||
2107 | preempt_enable(); | ||
2108 | } | ||
2109 | |||
2110 | static void drain_array_locked(struct kmem_cache *cachep, struct array_cache *ac, | ||
2111 | int force, int node); | ||
2112 | 2129 | ||
2113 | static void do_drain(void *arg) | 2130 | static void do_drain(void *arg) |
2114 | { | 2131 | { |
2115 | struct kmem_cache *cachep = (struct kmem_cache *) arg; | 2132 | struct kmem_cache *cachep = arg; |
2116 | struct array_cache *ac; | 2133 | struct array_cache *ac; |
2117 | int node = numa_node_id(); | 2134 | int node = numa_node_id(); |
2118 | 2135 | ||
@@ -2129,14 +2146,12 @@ static void drain_cpu_caches(struct kmem_cache *cachep) | |||
2129 | struct kmem_list3 *l3; | 2146 | struct kmem_list3 *l3; |
2130 | int node; | 2147 | int node; |
2131 | 2148 | ||
2132 | smp_call_function_all_cpus(do_drain, cachep); | 2149 | on_each_cpu(do_drain, cachep, 1, 1); |
2133 | check_irq_on(); | 2150 | check_irq_on(); |
2134 | for_each_online_node(node) { | 2151 | for_each_online_node(node) { |
2135 | l3 = cachep->nodelists[node]; | 2152 | l3 = cachep->nodelists[node]; |
2136 | if (l3) { | 2153 | if (l3) { |
2137 | spin_lock_irq(&l3->list_lock); | 2154 | drain_array(cachep, l3, l3->shared, 1, node); |
2138 | drain_array_locked(cachep, l3->shared, 1, node); | ||
2139 | spin_unlock_irq(&l3->list_lock); | ||
2140 | if (l3->alien) | 2155 | if (l3->alien) |
2141 | drain_alien_cache(cachep, l3->alien); | 2156 | drain_alien_cache(cachep, l3->alien); |
2142 | } | 2157 | } |
@@ -2260,16 +2275,15 @@ int kmem_cache_destroy(struct kmem_cache *cachep) | |||
2260 | 2275 | ||
2261 | /* NUMA: free the list3 structures */ | 2276 | /* NUMA: free the list3 structures */ |
2262 | for_each_online_node(i) { | 2277 | for_each_online_node(i) { |
2263 | if ((l3 = cachep->nodelists[i])) { | 2278 | l3 = cachep->nodelists[i]; |
2279 | if (l3) { | ||
2264 | kfree(l3->shared); | 2280 | kfree(l3->shared); |
2265 | free_alien_cache(l3->alien); | 2281 | free_alien_cache(l3->alien); |
2266 | kfree(l3); | 2282 | kfree(l3); |
2267 | } | 2283 | } |
2268 | } | 2284 | } |
2269 | kmem_cache_free(&cache_cache, cachep); | 2285 | kmem_cache_free(&cache_cache, cachep); |
2270 | |||
2271 | unlock_cpu_hotplug(); | 2286 | unlock_cpu_hotplug(); |
2272 | |||
2273 | return 0; | 2287 | return 0; |
2274 | } | 2288 | } |
2275 | EXPORT_SYMBOL(kmem_cache_destroy); | 2289 | EXPORT_SYMBOL(kmem_cache_destroy); |
@@ -2292,7 +2306,6 @@ static struct slab *alloc_slabmgmt(struct kmem_cache *cachep, void *objp, | |||
2292 | slabp->inuse = 0; | 2306 | slabp->inuse = 0; |
2293 | slabp->colouroff = colour_off; | 2307 | slabp->colouroff = colour_off; |
2294 | slabp->s_mem = objp + colour_off; | 2308 | slabp->s_mem = objp + colour_off; |
2295 | |||
2296 | return slabp; | 2309 | return slabp; |
2297 | } | 2310 | } |
2298 | 2311 | ||
@@ -2307,7 +2320,7 @@ static void cache_init_objs(struct kmem_cache *cachep, | |||
2307 | int i; | 2320 | int i; |
2308 | 2321 | ||
2309 | for (i = 0; i < cachep->num; i++) { | 2322 | for (i = 0; i < cachep->num; i++) { |
2310 | void *objp = slabp->s_mem + cachep->buffer_size * i; | 2323 | void *objp = index_to_obj(cachep, slabp, i); |
2311 | #if DEBUG | 2324 | #if DEBUG |
2312 | /* need to poison the objs? */ | 2325 | /* need to poison the objs? */ |
2313 | if (cachep->flags & SLAB_POISON) | 2326 | if (cachep->flags & SLAB_POISON) |
@@ -2320,9 +2333,9 @@ static void cache_init_objs(struct kmem_cache *cachep, | |||
2320 | *dbg_redzone2(cachep, objp) = RED_INACTIVE; | 2333 | *dbg_redzone2(cachep, objp) = RED_INACTIVE; |
2321 | } | 2334 | } |
2322 | /* | 2335 | /* |
2323 | * Constructors are not allowed to allocate memory from | 2336 | * Constructors are not allowed to allocate memory from the same |
2324 | * the same cache which they are a constructor for. | 2337 | * cache which they are a constructor for. Otherwise, deadlock. |
2325 | * Otherwise, deadlock. They must also be threaded. | 2338 | * They must also be threaded. |
2326 | */ | 2339 | */ |
2327 | if (cachep->ctor && !(cachep->flags & SLAB_POISON)) | 2340 | if (cachep->ctor && !(cachep->flags & SLAB_POISON)) |
2328 | cachep->ctor(objp + obj_offset(cachep), cachep, | 2341 | cachep->ctor(objp + obj_offset(cachep), cachep, |
@@ -2336,8 +2349,8 @@ static void cache_init_objs(struct kmem_cache *cachep, | |||
2336 | slab_error(cachep, "constructor overwrote the" | 2349 | slab_error(cachep, "constructor overwrote the" |
2337 | " start of an object"); | 2350 | " start of an object"); |
2338 | } | 2351 | } |
2339 | if ((cachep->buffer_size % PAGE_SIZE) == 0 && OFF_SLAB(cachep) | 2352 | if ((cachep->buffer_size % PAGE_SIZE) == 0 && |
2340 | && cachep->flags & SLAB_POISON) | 2353 | OFF_SLAB(cachep) && cachep->flags & SLAB_POISON) |
2341 | kernel_map_pages(virt_to_page(objp), | 2354 | kernel_map_pages(virt_to_page(objp), |
2342 | cachep->buffer_size / PAGE_SIZE, 0); | 2355 | cachep->buffer_size / PAGE_SIZE, 0); |
2343 | #else | 2356 | #else |
@@ -2352,18 +2365,16 @@ static void cache_init_objs(struct kmem_cache *cachep, | |||
2352 | 2365 | ||
2353 | static void kmem_flagcheck(struct kmem_cache *cachep, gfp_t flags) | 2366 | static void kmem_flagcheck(struct kmem_cache *cachep, gfp_t flags) |
2354 | { | 2367 | { |
2355 | if (flags & SLAB_DMA) { | 2368 | if (flags & SLAB_DMA) |
2356 | if (!(cachep->gfpflags & GFP_DMA)) | 2369 | BUG_ON(!(cachep->gfpflags & GFP_DMA)); |
2357 | BUG(); | 2370 | else |
2358 | } else { | 2371 | BUG_ON(cachep->gfpflags & GFP_DMA); |
2359 | if (cachep->gfpflags & GFP_DMA) | ||
2360 | BUG(); | ||
2361 | } | ||
2362 | } | 2372 | } |
2363 | 2373 | ||
2364 | static void *slab_get_obj(struct kmem_cache *cachep, struct slab *slabp, int nodeid) | 2374 | static void *slab_get_obj(struct kmem_cache *cachep, struct slab *slabp, |
2375 | int nodeid) | ||
2365 | { | 2376 | { |
2366 | void *objp = slabp->s_mem + (slabp->free * cachep->buffer_size); | 2377 | void *objp = index_to_obj(cachep, slabp, slabp->free); |
2367 | kmem_bufctl_t next; | 2378 | kmem_bufctl_t next; |
2368 | 2379 | ||
2369 | slabp->inuse++; | 2380 | slabp->inuse++; |
@@ -2377,10 +2388,10 @@ static void *slab_get_obj(struct kmem_cache *cachep, struct slab *slabp, int nod | |||
2377 | return objp; | 2388 | return objp; |
2378 | } | 2389 | } |
2379 | 2390 | ||
2380 | static void slab_put_obj(struct kmem_cache *cachep, struct slab *slabp, void *objp, | 2391 | static void slab_put_obj(struct kmem_cache *cachep, struct slab *slabp, |
2381 | int nodeid) | 2392 | void *objp, int nodeid) |
2382 | { | 2393 | { |
2383 | unsigned int objnr = (unsigned)(objp-slabp->s_mem) / cachep->buffer_size; | 2394 | unsigned int objnr = obj_to_index(cachep, slabp, objp); |
2384 | 2395 | ||
2385 | #if DEBUG | 2396 | #if DEBUG |
2386 | /* Verify that the slab belongs to the intended node */ | 2397 | /* Verify that the slab belongs to the intended node */ |
@@ -2388,7 +2399,7 @@ static void slab_put_obj(struct kmem_cache *cachep, struct slab *slabp, void *ob | |||
2388 | 2399 | ||
2389 | if (slab_bufctl(slabp)[objnr] != BUFCTL_FREE) { | 2400 | if (slab_bufctl(slabp)[objnr] != BUFCTL_FREE) { |
2390 | printk(KERN_ERR "slab: double free detected in cache " | 2401 | printk(KERN_ERR "slab: double free detected in cache " |
2391 | "'%s', objp %p\n", cachep->name, objp); | 2402 | "'%s', objp %p\n", cachep->name, objp); |
2392 | BUG(); | 2403 | BUG(); |
2393 | } | 2404 | } |
2394 | #endif | 2405 | #endif |
@@ -2397,14 +2408,18 @@ static void slab_put_obj(struct kmem_cache *cachep, struct slab *slabp, void *ob | |||
2397 | slabp->inuse--; | 2408 | slabp->inuse--; |
2398 | } | 2409 | } |
2399 | 2410 | ||
2400 | static void set_slab_attr(struct kmem_cache *cachep, struct slab *slabp, void *objp) | 2411 | static void set_slab_attr(struct kmem_cache *cachep, struct slab *slabp, |
2412 | void *objp) | ||
2401 | { | 2413 | { |
2402 | int i; | 2414 | int i; |
2403 | struct page *page; | 2415 | struct page *page; |
2404 | 2416 | ||
2405 | /* Nasty!!!!!! I hope this is OK. */ | 2417 | /* Nasty!!!!!! I hope this is OK. */ |
2406 | i = 1 << cachep->gfporder; | ||
2407 | page = virt_to_page(objp); | 2418 | page = virt_to_page(objp); |
2419 | |||
2420 | i = 1; | ||
2421 | if (likely(!PageCompound(page))) | ||
2422 | i <<= cachep->gfporder; | ||
2408 | do { | 2423 | do { |
2409 | page_set_cache(page, cachep); | 2424 | page_set_cache(page, cachep); |
2410 | page_set_slab(page, slabp); | 2425 | page_set_slab(page, slabp); |
@@ -2425,8 +2440,9 @@ static int cache_grow(struct kmem_cache *cachep, gfp_t flags, int nodeid) | |||
2425 | unsigned long ctor_flags; | 2440 | unsigned long ctor_flags; |
2426 | struct kmem_list3 *l3; | 2441 | struct kmem_list3 *l3; |
2427 | 2442 | ||
2428 | /* Be lazy and only check for valid flags here, | 2443 | /* |
2429 | * keeping it out of the critical path in kmem_cache_alloc(). | 2444 | * Be lazy and only check for valid flags here, keeping it out of the |
2445 | * critical path in kmem_cache_alloc(). | ||
2430 | */ | 2446 | */ |
2431 | if (flags & ~(SLAB_DMA | SLAB_LEVEL_MASK | SLAB_NO_GROW)) | 2447 | if (flags & ~(SLAB_DMA | SLAB_LEVEL_MASK | SLAB_NO_GROW)) |
2432 | BUG(); | 2448 | BUG(); |
@@ -2467,14 +2483,17 @@ static int cache_grow(struct kmem_cache *cachep, gfp_t flags, int nodeid) | |||
2467 | */ | 2483 | */ |
2468 | kmem_flagcheck(cachep, flags); | 2484 | kmem_flagcheck(cachep, flags); |
2469 | 2485 | ||
2470 | /* Get mem for the objs. | 2486 | /* |
2471 | * Attempt to allocate a physical page from 'nodeid', | 2487 | * Get mem for the objs. Attempt to allocate a physical page from |
2488 | * 'nodeid'. | ||
2472 | */ | 2489 | */ |
2473 | if (!(objp = kmem_getpages(cachep, flags, nodeid))) | 2490 | objp = kmem_getpages(cachep, flags, nodeid); |
2491 | if (!objp) | ||
2474 | goto failed; | 2492 | goto failed; |
2475 | 2493 | ||
2476 | /* Get slab management. */ | 2494 | /* Get slab management. */ |
2477 | if (!(slabp = alloc_slabmgmt(cachep, objp, offset, local_flags))) | 2495 | slabp = alloc_slabmgmt(cachep, objp, offset, local_flags); |
2496 | if (!slabp) | ||
2478 | goto opps1; | 2497 | goto opps1; |
2479 | 2498 | ||
2480 | slabp->nodeid = nodeid; | 2499 | slabp->nodeid = nodeid; |
@@ -2493,9 +2512,9 @@ static int cache_grow(struct kmem_cache *cachep, gfp_t flags, int nodeid) | |||
2493 | l3->free_objects += cachep->num; | 2512 | l3->free_objects += cachep->num; |
2494 | spin_unlock(&l3->list_lock); | 2513 | spin_unlock(&l3->list_lock); |
2495 | return 1; | 2514 | return 1; |
2496 | opps1: | 2515 | opps1: |
2497 | kmem_freepages(cachep, objp); | 2516 | kmem_freepages(cachep, objp); |
2498 | failed: | 2517 | failed: |
2499 | if (local_flags & __GFP_WAIT) | 2518 | if (local_flags & __GFP_WAIT) |
2500 | local_irq_disable(); | 2519 | local_irq_disable(); |
2501 | return 0; | 2520 | return 0; |
@@ -2538,8 +2557,8 @@ static void *cache_free_debugcheck(struct kmem_cache *cachep, void *objp, | |||
2538 | page = virt_to_page(objp); | 2557 | page = virt_to_page(objp); |
2539 | 2558 | ||
2540 | if (page_get_cache(page) != cachep) { | 2559 | if (page_get_cache(page) != cachep) { |
2541 | printk(KERN_ERR | 2560 | printk(KERN_ERR "mismatch in kmem_cache_free: expected " |
2542 | "mismatch in kmem_cache_free: expected cache %p, got %p\n", | 2561 | "cache %p, got %p\n", |
2543 | page_get_cache(page), cachep); | 2562 | page_get_cache(page), cachep); |
2544 | printk(KERN_ERR "%p is %s.\n", cachep, cachep->name); | 2563 | printk(KERN_ERR "%p is %s.\n", cachep, cachep->name); |
2545 | printk(KERN_ERR "%p is %s.\n", page_get_cache(page), | 2564 | printk(KERN_ERR "%p is %s.\n", page_get_cache(page), |
@@ -2549,13 +2568,12 @@ static void *cache_free_debugcheck(struct kmem_cache *cachep, void *objp, | |||
2549 | slabp = page_get_slab(page); | 2568 | slabp = page_get_slab(page); |
2550 | 2569 | ||
2551 | if (cachep->flags & SLAB_RED_ZONE) { | 2570 | if (cachep->flags & SLAB_RED_ZONE) { |
2552 | if (*dbg_redzone1(cachep, objp) != RED_ACTIVE | 2571 | if (*dbg_redzone1(cachep, objp) != RED_ACTIVE || |
2553 | || *dbg_redzone2(cachep, objp) != RED_ACTIVE) { | 2572 | *dbg_redzone2(cachep, objp) != RED_ACTIVE) { |
2554 | slab_error(cachep, | 2573 | slab_error(cachep, "double free, or memory outside" |
2555 | "double free, or memory outside" | 2574 | " object was overwritten"); |
2556 | " object was overwritten"); | 2575 | printk(KERN_ERR "%p: redzone 1:0x%lx, " |
2557 | printk(KERN_ERR | 2576 | "redzone 2:0x%lx.\n", |
2558 | "%p: redzone 1: 0x%lx, redzone 2: 0x%lx.\n", | ||
2559 | objp, *dbg_redzone1(cachep, objp), | 2577 | objp, *dbg_redzone1(cachep, objp), |
2560 | *dbg_redzone2(cachep, objp)); | 2578 | *dbg_redzone2(cachep, objp)); |
2561 | } | 2579 | } |
@@ -2565,15 +2583,16 @@ static void *cache_free_debugcheck(struct kmem_cache *cachep, void *objp, | |||
2565 | if (cachep->flags & SLAB_STORE_USER) | 2583 | if (cachep->flags & SLAB_STORE_USER) |
2566 | *dbg_userword(cachep, objp) = caller; | 2584 | *dbg_userword(cachep, objp) = caller; |
2567 | 2585 | ||
2568 | objnr = (unsigned)(objp - slabp->s_mem) / cachep->buffer_size; | 2586 | objnr = obj_to_index(cachep, slabp, objp); |
2569 | 2587 | ||
2570 | BUG_ON(objnr >= cachep->num); | 2588 | BUG_ON(objnr >= cachep->num); |
2571 | BUG_ON(objp != slabp->s_mem + objnr * cachep->buffer_size); | 2589 | BUG_ON(objp != index_to_obj(cachep, slabp, objnr)); |
2572 | 2590 | ||
2573 | if (cachep->flags & SLAB_DEBUG_INITIAL) { | 2591 | if (cachep->flags & SLAB_DEBUG_INITIAL) { |
2574 | /* Need to call the slab's constructor so the | 2592 | /* |
2575 | * caller can perform a verify of its state (debugging). | 2593 | * Need to call the slab's constructor so the caller can |
2576 | * Called without the cache-lock held. | 2594 | * perform a verify of its state (debugging). Called without |
2595 | * the cache-lock held. | ||
2577 | */ | 2596 | */ |
2578 | cachep->ctor(objp + obj_offset(cachep), | 2597 | cachep->ctor(objp + obj_offset(cachep), |
2579 | cachep, SLAB_CTOR_CONSTRUCTOR | SLAB_CTOR_VERIFY); | 2598 | cachep, SLAB_CTOR_CONSTRUCTOR | SLAB_CTOR_VERIFY); |
@@ -2586,7 +2605,7 @@ static void *cache_free_debugcheck(struct kmem_cache *cachep, void *objp, | |||
2586 | } | 2605 | } |
2587 | if (cachep->flags & SLAB_POISON) { | 2606 | if (cachep->flags & SLAB_POISON) { |
2588 | #ifdef CONFIG_DEBUG_PAGEALLOC | 2607 | #ifdef CONFIG_DEBUG_PAGEALLOC |
2589 | if ((cachep->buffer_size % PAGE_SIZE) == 0 && OFF_SLAB(cachep)) { | 2608 | if ((cachep->buffer_size % PAGE_SIZE)==0 && OFF_SLAB(cachep)) { |
2590 | store_stackinfo(cachep, objp, (unsigned long)caller); | 2609 | store_stackinfo(cachep, objp, (unsigned long)caller); |
2591 | kernel_map_pages(virt_to_page(objp), | 2610 | kernel_map_pages(virt_to_page(objp), |
2592 | cachep->buffer_size / PAGE_SIZE, 0); | 2611 | cachep->buffer_size / PAGE_SIZE, 0); |
@@ -2612,14 +2631,14 @@ static void check_slabp(struct kmem_cache *cachep, struct slab *slabp) | |||
2612 | goto bad; | 2631 | goto bad; |
2613 | } | 2632 | } |
2614 | if (entries != cachep->num - slabp->inuse) { | 2633 | if (entries != cachep->num - slabp->inuse) { |
2615 | bad: | 2634 | bad: |
2616 | printk(KERN_ERR | 2635 | printk(KERN_ERR "slab: Internal list corruption detected in " |
2617 | "slab: Internal list corruption detected in cache '%s'(%d), slabp %p(%d). Hexdump:\n", | 2636 | "cache '%s'(%d), slabp %p(%d). Hexdump:\n", |
2618 | cachep->name, cachep->num, slabp, slabp->inuse); | 2637 | cachep->name, cachep->num, slabp, slabp->inuse); |
2619 | for (i = 0; | 2638 | for (i = 0; |
2620 | i < sizeof(*slabp) + cachep->num * sizeof(kmem_bufctl_t); | 2639 | i < sizeof(*slabp) + cachep->num * sizeof(kmem_bufctl_t); |
2621 | i++) { | 2640 | i++) { |
2622 | if ((i % 16) == 0) | 2641 | if (i % 16 == 0) |
2623 | printk("\n%03x:", i); | 2642 | printk("\n%03x:", i); |
2624 | printk(" %02x", ((unsigned char *)slabp)[i]); | 2643 | printk(" %02x", ((unsigned char *)slabp)[i]); |
2625 | } | 2644 | } |
@@ -2641,12 +2660,13 @@ static void *cache_alloc_refill(struct kmem_cache *cachep, gfp_t flags) | |||
2641 | 2660 | ||
2642 | check_irq_off(); | 2661 | check_irq_off(); |
2643 | ac = cpu_cache_get(cachep); | 2662 | ac = cpu_cache_get(cachep); |
2644 | retry: | 2663 | retry: |
2645 | batchcount = ac->batchcount; | 2664 | batchcount = ac->batchcount; |
2646 | if (!ac->touched && batchcount > BATCHREFILL_LIMIT) { | 2665 | if (!ac->touched && batchcount > BATCHREFILL_LIMIT) { |
2647 | /* if there was little recent activity on this | 2666 | /* |
2648 | * cache, then perform only a partial refill. | 2667 | * If there was little recent activity on this cache, then |
2649 | * Otherwise we could generate refill bouncing. | 2668 | * perform only a partial refill. Otherwise we could generate |
2669 | * refill bouncing. | ||
2650 | */ | 2670 | */ |
2651 | batchcount = BATCHREFILL_LIMIT; | 2671 | batchcount = BATCHREFILL_LIMIT; |
2652 | } | 2672 | } |
@@ -2702,29 +2722,29 @@ static void *cache_alloc_refill(struct kmem_cache *cachep, gfp_t flags) | |||
2702 | list_add(&slabp->list, &l3->slabs_partial); | 2722 | list_add(&slabp->list, &l3->slabs_partial); |
2703 | } | 2723 | } |
2704 | 2724 | ||
2705 | must_grow: | 2725 | must_grow: |
2706 | l3->free_objects -= ac->avail; | 2726 | l3->free_objects -= ac->avail; |
2707 | alloc_done: | 2727 | alloc_done: |
2708 | spin_unlock(&l3->list_lock); | 2728 | spin_unlock(&l3->list_lock); |
2709 | 2729 | ||
2710 | if (unlikely(!ac->avail)) { | 2730 | if (unlikely(!ac->avail)) { |
2711 | int x; | 2731 | int x; |
2712 | x = cache_grow(cachep, flags, numa_node_id()); | 2732 | x = cache_grow(cachep, flags, numa_node_id()); |
2713 | 2733 | ||
2714 | // cache_grow can reenable interrupts, then ac could change. | 2734 | /* cache_grow can reenable interrupts, then ac could change. */ |
2715 | ac = cpu_cache_get(cachep); | 2735 | ac = cpu_cache_get(cachep); |
2716 | if (!x && ac->avail == 0) // no objects in sight? abort | 2736 | if (!x && ac->avail == 0) /* no objects in sight? abort */ |
2717 | return NULL; | 2737 | return NULL; |
2718 | 2738 | ||
2719 | if (!ac->avail) // objects refilled by interrupt? | 2739 | if (!ac->avail) /* objects refilled by interrupt? */ |
2720 | goto retry; | 2740 | goto retry; |
2721 | } | 2741 | } |
2722 | ac->touched = 1; | 2742 | ac->touched = 1; |
2723 | return ac->entry[--ac->avail]; | 2743 | return ac->entry[--ac->avail]; |
2724 | } | 2744 | } |
2725 | 2745 | ||
2726 | static inline void | 2746 | static inline void cache_alloc_debugcheck_before(struct kmem_cache *cachep, |
2727 | cache_alloc_debugcheck_before(struct kmem_cache *cachep, gfp_t flags) | 2747 | gfp_t flags) |
2728 | { | 2748 | { |
2729 | might_sleep_if(flags & __GFP_WAIT); | 2749 | might_sleep_if(flags & __GFP_WAIT); |
2730 | #if DEBUG | 2750 | #if DEBUG |
@@ -2733,8 +2753,8 @@ cache_alloc_debugcheck_before(struct kmem_cache *cachep, gfp_t flags) | |||
2733 | } | 2753 | } |
2734 | 2754 | ||
2735 | #if DEBUG | 2755 | #if DEBUG |
2736 | static void *cache_alloc_debugcheck_after(struct kmem_cache *cachep, gfp_t flags, | 2756 | static void *cache_alloc_debugcheck_after(struct kmem_cache *cachep, |
2737 | void *objp, void *caller) | 2757 | gfp_t flags, void *objp, void *caller) |
2738 | { | 2758 | { |
2739 | if (!objp) | 2759 | if (!objp) |
2740 | return objp; | 2760 | return objp; |
@@ -2754,15 +2774,14 @@ static void *cache_alloc_debugcheck_after(struct kmem_cache *cachep, gfp_t flags | |||
2754 | *dbg_userword(cachep, objp) = caller; | 2774 | *dbg_userword(cachep, objp) = caller; |
2755 | 2775 | ||
2756 | if (cachep->flags & SLAB_RED_ZONE) { | 2776 | if (cachep->flags & SLAB_RED_ZONE) { |
2757 | if (*dbg_redzone1(cachep, objp) != RED_INACTIVE | 2777 | if (*dbg_redzone1(cachep, objp) != RED_INACTIVE || |
2758 | || *dbg_redzone2(cachep, objp) != RED_INACTIVE) { | 2778 | *dbg_redzone2(cachep, objp) != RED_INACTIVE) { |
2759 | slab_error(cachep, | 2779 | slab_error(cachep, "double free, or memory outside" |
2760 | "double free, or memory outside" | 2780 | " object was overwritten"); |
2761 | " object was overwritten"); | ||
2762 | printk(KERN_ERR | 2781 | printk(KERN_ERR |
2763 | "%p: redzone 1: 0x%lx, redzone 2: 0x%lx.\n", | 2782 | "%p: redzone 1:0x%lx, redzone 2:0x%lx\n", |
2764 | objp, *dbg_redzone1(cachep, objp), | 2783 | objp, *dbg_redzone1(cachep, objp), |
2765 | *dbg_redzone2(cachep, objp)); | 2784 | *dbg_redzone2(cachep, objp)); |
2766 | } | 2785 | } |
2767 | *dbg_redzone1(cachep, objp) = RED_ACTIVE; | 2786 | *dbg_redzone1(cachep, objp) = RED_ACTIVE; |
2768 | *dbg_redzone2(cachep, objp) = RED_ACTIVE; | 2787 | *dbg_redzone2(cachep, objp) = RED_ACTIVE; |
@@ -2809,8 +2828,8 @@ static inline void *____cache_alloc(struct kmem_cache *cachep, gfp_t flags) | |||
2809 | return objp; | 2828 | return objp; |
2810 | } | 2829 | } |
2811 | 2830 | ||
2812 | static __always_inline void * | 2831 | static __always_inline void *__cache_alloc(struct kmem_cache *cachep, |
2813 | __cache_alloc(struct kmem_cache *cachep, gfp_t flags, void *caller) | 2832 | gfp_t flags, void *caller) |
2814 | { | 2833 | { |
2815 | unsigned long save_flags; | 2834 | unsigned long save_flags; |
2816 | void *objp; | 2835 | void *objp; |
@@ -2830,7 +2849,8 @@ __cache_alloc(struct kmem_cache *cachep, gfp_t flags, void *caller) | |||
2830 | /* | 2849 | /* |
2831 | * A interface to enable slab creation on nodeid | 2850 | * A interface to enable slab creation on nodeid |
2832 | */ | 2851 | */ |
2833 | static void *__cache_alloc_node(struct kmem_cache *cachep, gfp_t flags, int nodeid) | 2852 | static void *__cache_alloc_node(struct kmem_cache *cachep, gfp_t flags, |
2853 | int nodeid) | ||
2834 | { | 2854 | { |
2835 | struct list_head *entry; | 2855 | struct list_head *entry; |
2836 | struct slab *slabp; | 2856 | struct slab *slabp; |
@@ -2841,7 +2861,7 @@ static void *__cache_alloc_node(struct kmem_cache *cachep, gfp_t flags, int node | |||
2841 | l3 = cachep->nodelists[nodeid]; | 2861 | l3 = cachep->nodelists[nodeid]; |
2842 | BUG_ON(!l3); | 2862 | BUG_ON(!l3); |
2843 | 2863 | ||
2844 | retry: | 2864 | retry: |
2845 | check_irq_off(); | 2865 | check_irq_off(); |
2846 | spin_lock(&l3->list_lock); | 2866 | spin_lock(&l3->list_lock); |
2847 | entry = l3->slabs_partial.next; | 2867 | entry = l3->slabs_partial.next; |
@@ -2868,16 +2888,15 @@ static void *__cache_alloc_node(struct kmem_cache *cachep, gfp_t flags, int node | |||
2868 | /* move slabp to correct slabp list: */ | 2888 | /* move slabp to correct slabp list: */ |
2869 | list_del(&slabp->list); | 2889 | list_del(&slabp->list); |
2870 | 2890 | ||
2871 | if (slabp->free == BUFCTL_END) { | 2891 | if (slabp->free == BUFCTL_END) |
2872 | list_add(&slabp->list, &l3->slabs_full); | 2892 | list_add(&slabp->list, &l3->slabs_full); |
2873 | } else { | 2893 | else |
2874 | list_add(&slabp->list, &l3->slabs_partial); | 2894 | list_add(&slabp->list, &l3->slabs_partial); |
2875 | } | ||
2876 | 2895 | ||
2877 | spin_unlock(&l3->list_lock); | 2896 | spin_unlock(&l3->list_lock); |
2878 | goto done; | 2897 | goto done; |
2879 | 2898 | ||
2880 | must_grow: | 2899 | must_grow: |
2881 | spin_unlock(&l3->list_lock); | 2900 | spin_unlock(&l3->list_lock); |
2882 | x = cache_grow(cachep, flags, nodeid); | 2901 | x = cache_grow(cachep, flags, nodeid); |
2883 | 2902 | ||
@@ -2885,7 +2904,7 @@ static void *__cache_alloc_node(struct kmem_cache *cachep, gfp_t flags, int node | |||
2885 | return NULL; | 2904 | return NULL; |
2886 | 2905 | ||
2887 | goto retry; | 2906 | goto retry; |
2888 | done: | 2907 | done: |
2889 | return obj; | 2908 | return obj; |
2890 | } | 2909 | } |
2891 | #endif | 2910 | #endif |
@@ -2958,7 +2977,7 @@ static void cache_flusharray(struct kmem_cache *cachep, struct array_cache *ac) | |||
2958 | } | 2977 | } |
2959 | 2978 | ||
2960 | free_block(cachep, ac->entry, batchcount, node); | 2979 | free_block(cachep, ac->entry, batchcount, node); |
2961 | free_done: | 2980 | free_done: |
2962 | #if STATS | 2981 | #if STATS |
2963 | { | 2982 | { |
2964 | int i = 0; | 2983 | int i = 0; |
@@ -2979,16 +2998,12 @@ static void cache_flusharray(struct kmem_cache *cachep, struct array_cache *ac) | |||
2979 | #endif | 2998 | #endif |
2980 | spin_unlock(&l3->list_lock); | 2999 | spin_unlock(&l3->list_lock); |
2981 | ac->avail -= batchcount; | 3000 | ac->avail -= batchcount; |
2982 | memmove(ac->entry, &(ac->entry[batchcount]), | 3001 | memmove(ac->entry, &(ac->entry[batchcount]), sizeof(void *)*ac->avail); |
2983 | sizeof(void *) * ac->avail); | ||
2984 | } | 3002 | } |
2985 | 3003 | ||
2986 | /* | 3004 | /* |
2987 | * __cache_free | 3005 | * Release an obj back to its cache. If the obj has a constructed state, it must |
2988 | * Release an obj back to its cache. If the obj has a constructed | 3006 | * be in this state _before_ it is released. Called with disabled ints. |
2989 | * state, it must be in this state _before_ it is released. | ||
2990 | * | ||
2991 | * Called with disabled ints. | ||
2992 | */ | 3007 | */ |
2993 | static inline void __cache_free(struct kmem_cache *cachep, void *objp) | 3008 | static inline void __cache_free(struct kmem_cache *cachep, void *objp) |
2994 | { | 3009 | { |
@@ -3007,9 +3022,9 @@ static inline void __cache_free(struct kmem_cache *cachep, void *objp) | |||
3007 | if (unlikely(slabp->nodeid != numa_node_id())) { | 3022 | if (unlikely(slabp->nodeid != numa_node_id())) { |
3008 | struct array_cache *alien = NULL; | 3023 | struct array_cache *alien = NULL; |
3009 | int nodeid = slabp->nodeid; | 3024 | int nodeid = slabp->nodeid; |
3010 | struct kmem_list3 *l3 = | 3025 | struct kmem_list3 *l3; |
3011 | cachep->nodelists[numa_node_id()]; | ||
3012 | 3026 | ||
3027 | l3 = cachep->nodelists[numa_node_id()]; | ||
3013 | STATS_INC_NODEFREES(cachep); | 3028 | STATS_INC_NODEFREES(cachep); |
3014 | if (l3->alien && l3->alien[nodeid]) { | 3029 | if (l3->alien && l3->alien[nodeid]) { |
3015 | alien = l3->alien[nodeid]; | 3030 | alien = l3->alien[nodeid]; |
@@ -3093,7 +3108,7 @@ int fastcall kmem_ptr_validate(struct kmem_cache *cachep, void *ptr) | |||
3093 | if (unlikely(page_get_cache(page) != cachep)) | 3108 | if (unlikely(page_get_cache(page) != cachep)) |
3094 | goto out; | 3109 | goto out; |
3095 | return 1; | 3110 | return 1; |
3096 | out: | 3111 | out: |
3097 | return 0; | 3112 | return 0; |
3098 | } | 3113 | } |
3099 | 3114 | ||
@@ -3119,7 +3134,7 @@ void *kmem_cache_alloc_node(struct kmem_cache *cachep, gfp_t flags, int nodeid) | |||
3119 | local_irq_save(save_flags); | 3134 | local_irq_save(save_flags); |
3120 | 3135 | ||
3121 | if (nodeid == -1 || nodeid == numa_node_id() || | 3136 | if (nodeid == -1 || nodeid == numa_node_id() || |
3122 | !cachep->nodelists[nodeid]) | 3137 | !cachep->nodelists[nodeid]) |
3123 | ptr = ____cache_alloc(cachep, flags); | 3138 | ptr = ____cache_alloc(cachep, flags); |
3124 | else | 3139 | else |
3125 | ptr = __cache_alloc_node(cachep, flags, nodeid); | 3140 | ptr = __cache_alloc_node(cachep, flags, nodeid); |
@@ -3148,6 +3163,7 @@ EXPORT_SYMBOL(kmalloc_node); | |||
3148 | * kmalloc - allocate memory | 3163 | * kmalloc - allocate memory |
3149 | * @size: how many bytes of memory are required. | 3164 | * @size: how many bytes of memory are required. |
3150 | * @flags: the type of memory to allocate. | 3165 | * @flags: the type of memory to allocate. |
3166 | * @caller: function caller for debug tracking of the caller | ||
3151 | * | 3167 | * |
3152 | * kmalloc is the normal method of allocating memory | 3168 | * kmalloc is the normal method of allocating memory |
3153 | * in the kernel. | 3169 | * in the kernel. |
@@ -3236,7 +3252,7 @@ void *__alloc_percpu(size_t size) | |||
3236 | /* Catch derefs w/o wrappers */ | 3252 | /* Catch derefs w/o wrappers */ |
3237 | return (void *)(~(unsigned long)pdata); | 3253 | return (void *)(~(unsigned long)pdata); |
3238 | 3254 | ||
3239 | unwind_oom: | 3255 | unwind_oom: |
3240 | while (--i >= 0) { | 3256 | while (--i >= 0) { |
3241 | if (!cpu_possible(i)) | 3257 | if (!cpu_possible(i)) |
3242 | continue; | 3258 | continue; |
@@ -3339,18 +3355,20 @@ static int alloc_kmemlist(struct kmem_cache *cachep) | |||
3339 | struct array_cache *nc = NULL, *new; | 3355 | struct array_cache *nc = NULL, *new; |
3340 | struct array_cache **new_alien = NULL; | 3356 | struct array_cache **new_alien = NULL; |
3341 | #ifdef CONFIG_NUMA | 3357 | #ifdef CONFIG_NUMA |
3342 | if (!(new_alien = alloc_alien_cache(node, cachep->limit))) | 3358 | new_alien = alloc_alien_cache(node, cachep->limit); |
3359 | if (!new_alien) | ||
3343 | goto fail; | 3360 | goto fail; |
3344 | #endif | 3361 | #endif |
3345 | if (!(new = alloc_arraycache(node, (cachep->shared * | 3362 | new = alloc_arraycache(node, cachep->shared*cachep->batchcount, |
3346 | cachep->batchcount), | 3363 | 0xbaadf00d); |
3347 | 0xbaadf00d))) | 3364 | if (!new) |
3348 | goto fail; | 3365 | goto fail; |
3349 | if ((l3 = cachep->nodelists[node])) { | 3366 | l3 = cachep->nodelists[node]; |
3350 | 3367 | if (l3) { | |
3351 | spin_lock_irq(&l3->list_lock); | 3368 | spin_lock_irq(&l3->list_lock); |
3352 | 3369 | ||
3353 | if ((nc = cachep->nodelists[node]->shared)) | 3370 | nc = cachep->nodelists[node]->shared; |
3371 | if (nc) | ||
3354 | free_block(cachep, nc->entry, nc->avail, node); | 3372 | free_block(cachep, nc->entry, nc->avail, node); |
3355 | 3373 | ||
3356 | l3->shared = new; | 3374 | l3->shared = new; |
@@ -3359,27 +3377,27 @@ static int alloc_kmemlist(struct kmem_cache *cachep) | |||
3359 | new_alien = NULL; | 3377 | new_alien = NULL; |
3360 | } | 3378 | } |
3361 | l3->free_limit = (1 + nr_cpus_node(node)) * | 3379 | l3->free_limit = (1 + nr_cpus_node(node)) * |
3362 | cachep->batchcount + cachep->num; | 3380 | cachep->batchcount + cachep->num; |
3363 | spin_unlock_irq(&l3->list_lock); | 3381 | spin_unlock_irq(&l3->list_lock); |
3364 | kfree(nc); | 3382 | kfree(nc); |
3365 | free_alien_cache(new_alien); | 3383 | free_alien_cache(new_alien); |
3366 | continue; | 3384 | continue; |
3367 | } | 3385 | } |
3368 | if (!(l3 = kmalloc_node(sizeof(struct kmem_list3), | 3386 | l3 = kmalloc_node(sizeof(struct kmem_list3), GFP_KERNEL, node); |
3369 | GFP_KERNEL, node))) | 3387 | if (!l3) |
3370 | goto fail; | 3388 | goto fail; |
3371 | 3389 | ||
3372 | kmem_list3_init(l3); | 3390 | kmem_list3_init(l3); |
3373 | l3->next_reap = jiffies + REAPTIMEOUT_LIST3 + | 3391 | l3->next_reap = jiffies + REAPTIMEOUT_LIST3 + |
3374 | ((unsigned long)cachep) % REAPTIMEOUT_LIST3; | 3392 | ((unsigned long)cachep) % REAPTIMEOUT_LIST3; |
3375 | l3->shared = new; | 3393 | l3->shared = new; |
3376 | l3->alien = new_alien; | 3394 | l3->alien = new_alien; |
3377 | l3->free_limit = (1 + nr_cpus_node(node)) * | 3395 | l3->free_limit = (1 + nr_cpus_node(node)) * |
3378 | cachep->batchcount + cachep->num; | 3396 | cachep->batchcount + cachep->num; |
3379 | cachep->nodelists[node] = l3; | 3397 | cachep->nodelists[node] = l3; |
3380 | } | 3398 | } |
3381 | return err; | 3399 | return err; |
3382 | fail: | 3400 | fail: |
3383 | err = -ENOMEM; | 3401 | err = -ENOMEM; |
3384 | return err; | 3402 | return err; |
3385 | } | 3403 | } |
@@ -3391,7 +3409,7 @@ struct ccupdate_struct { | |||
3391 | 3409 | ||
3392 | static void do_ccupdate_local(void *info) | 3410 | static void do_ccupdate_local(void *info) |
3393 | { | 3411 | { |
3394 | struct ccupdate_struct *new = (struct ccupdate_struct *)info; | 3412 | struct ccupdate_struct *new = info; |
3395 | struct array_cache *old; | 3413 | struct array_cache *old; |
3396 | 3414 | ||
3397 | check_irq_off(); | 3415 | check_irq_off(); |
@@ -3401,16 +3419,17 @@ static void do_ccupdate_local(void *info) | |||
3401 | new->new[smp_processor_id()] = old; | 3419 | new->new[smp_processor_id()] = old; |
3402 | } | 3420 | } |
3403 | 3421 | ||
3404 | static int do_tune_cpucache(struct kmem_cache *cachep, int limit, int batchcount, | 3422 | /* Always called with the cache_chain_mutex held */ |
3405 | int shared) | 3423 | static int do_tune_cpucache(struct kmem_cache *cachep, int limit, |
3424 | int batchcount, int shared) | ||
3406 | { | 3425 | { |
3407 | struct ccupdate_struct new; | 3426 | struct ccupdate_struct new; |
3408 | int i, err; | 3427 | int i, err; |
3409 | 3428 | ||
3410 | memset(&new.new, 0, sizeof(new.new)); | 3429 | memset(&new.new, 0, sizeof(new.new)); |
3411 | for_each_online_cpu(i) { | 3430 | for_each_online_cpu(i) { |
3412 | new.new[i] = | 3431 | new.new[i] = alloc_arraycache(cpu_to_node(i), limit, |
3413 | alloc_arraycache(cpu_to_node(i), limit, batchcount); | 3432 | batchcount); |
3414 | if (!new.new[i]) { | 3433 | if (!new.new[i]) { |
3415 | for (i--; i >= 0; i--) | 3434 | for (i--; i >= 0; i--) |
3416 | kfree(new.new[i]); | 3435 | kfree(new.new[i]); |
@@ -3419,14 +3438,12 @@ static int do_tune_cpucache(struct kmem_cache *cachep, int limit, int batchcount | |||
3419 | } | 3438 | } |
3420 | new.cachep = cachep; | 3439 | new.cachep = cachep; |
3421 | 3440 | ||
3422 | smp_call_function_all_cpus(do_ccupdate_local, (void *)&new); | 3441 | on_each_cpu(do_ccupdate_local, (void *)&new, 1, 1); |
3423 | 3442 | ||
3424 | check_irq_on(); | 3443 | check_irq_on(); |
3425 | spin_lock(&cachep->spinlock); | ||
3426 | cachep->batchcount = batchcount; | 3444 | cachep->batchcount = batchcount; |
3427 | cachep->limit = limit; | 3445 | cachep->limit = limit; |
3428 | cachep->shared = shared; | 3446 | cachep->shared = shared; |
3429 | spin_unlock(&cachep->spinlock); | ||
3430 | 3447 | ||
3431 | for_each_online_cpu(i) { | 3448 | for_each_online_cpu(i) { |
3432 | struct array_cache *ccold = new.new[i]; | 3449 | struct array_cache *ccold = new.new[i]; |
@@ -3447,15 +3464,17 @@ static int do_tune_cpucache(struct kmem_cache *cachep, int limit, int batchcount | |||
3447 | return 0; | 3464 | return 0; |
3448 | } | 3465 | } |
3449 | 3466 | ||
3467 | /* Called with cache_chain_mutex held always */ | ||
3450 | static void enable_cpucache(struct kmem_cache *cachep) | 3468 | static void enable_cpucache(struct kmem_cache *cachep) |
3451 | { | 3469 | { |
3452 | int err; | 3470 | int err; |
3453 | int limit, shared; | 3471 | int limit, shared; |
3454 | 3472 | ||
3455 | /* The head array serves three purposes: | 3473 | /* |
3474 | * The head array serves three purposes: | ||
3456 | * - create a LIFO ordering, i.e. return objects that are cache-warm | 3475 | * - create a LIFO ordering, i.e. return objects that are cache-warm |
3457 | * - reduce the number of spinlock operations. | 3476 | * - reduce the number of spinlock operations. |
3458 | * - reduce the number of linked list operations on the slab and | 3477 | * - reduce the number of linked list operations on the slab and |
3459 | * bufctl chains: array operations are cheaper. | 3478 | * bufctl chains: array operations are cheaper. |
3460 | * The numbers are guessed, we should auto-tune as described by | 3479 | * The numbers are guessed, we should auto-tune as described by |
3461 | * Bonwick. | 3480 | * Bonwick. |
@@ -3471,7 +3490,8 @@ static void enable_cpucache(struct kmem_cache *cachep) | |||
3471 | else | 3490 | else |
3472 | limit = 120; | 3491 | limit = 120; |
3473 | 3492 | ||
3474 | /* Cpu bound tasks (e.g. network routing) can exhibit cpu bound | 3493 | /* |
3494 | * CPU bound tasks (e.g. network routing) can exhibit cpu bound | ||
3475 | * allocation behaviour: Most allocs on one cpu, most free operations | 3495 | * allocation behaviour: Most allocs on one cpu, most free operations |
3476 | * on another cpu. For these cases, an efficient object passing between | 3496 | * on another cpu. For these cases, an efficient object passing between |
3477 | * cpus is necessary. This is provided by a shared array. The array | 3497 | * cpus is necessary. This is provided by a shared array. The array |
@@ -3486,9 +3506,9 @@ static void enable_cpucache(struct kmem_cache *cachep) | |||
3486 | #endif | 3506 | #endif |
3487 | 3507 | ||
3488 | #if DEBUG | 3508 | #if DEBUG |
3489 | /* With debugging enabled, large batchcount lead to excessively | 3509 | /* |
3490 | * long periods with disabled local interrupts. Limit the | 3510 | * With debugging enabled, large batchcount lead to excessively long |
3491 | * batchcount | 3511 | * periods with disabled local interrupts. Limit the batchcount |
3492 | */ | 3512 | */ |
3493 | if (limit > 32) | 3513 | if (limit > 32) |
3494 | limit = 32; | 3514 | limit = 32; |
@@ -3499,23 +3519,32 @@ static void enable_cpucache(struct kmem_cache *cachep) | |||
3499 | cachep->name, -err); | 3519 | cachep->name, -err); |
3500 | } | 3520 | } |
3501 | 3521 | ||
3502 | static void drain_array_locked(struct kmem_cache *cachep, struct array_cache *ac, | 3522 | /* |
3503 | int force, int node) | 3523 | * Drain an array if it contains any elements taking the l3 lock only if |
3524 | * necessary. Note that the l3 listlock also protects the array_cache | ||
3525 | * if drain_array() is used on the shared array. | ||
3526 | */ | ||
3527 | void drain_array(struct kmem_cache *cachep, struct kmem_list3 *l3, | ||
3528 | struct array_cache *ac, int force, int node) | ||
3504 | { | 3529 | { |
3505 | int tofree; | 3530 | int tofree; |
3506 | 3531 | ||
3507 | check_spinlock_acquired_node(cachep, node); | 3532 | if (!ac || !ac->avail) |
3533 | return; | ||
3508 | if (ac->touched && !force) { | 3534 | if (ac->touched && !force) { |
3509 | ac->touched = 0; | 3535 | ac->touched = 0; |
3510 | } else if (ac->avail) { | 3536 | } else { |
3511 | tofree = force ? ac->avail : (ac->limit + 4) / 5; | 3537 | spin_lock_irq(&l3->list_lock); |
3512 | if (tofree > ac->avail) { | 3538 | if (ac->avail) { |
3513 | tofree = (ac->avail + 1) / 2; | 3539 | tofree = force ? ac->avail : (ac->limit + 4) / 5; |
3540 | if (tofree > ac->avail) | ||
3541 | tofree = (ac->avail + 1) / 2; | ||
3542 | free_block(cachep, ac->entry, tofree, node); | ||
3543 | ac->avail -= tofree; | ||
3544 | memmove(ac->entry, &(ac->entry[tofree]), | ||
3545 | sizeof(void *) * ac->avail); | ||
3514 | } | 3546 | } |
3515 | free_block(cachep, ac->entry, tofree, node); | 3547 | spin_unlock_irq(&l3->list_lock); |
3516 | ac->avail -= tofree; | ||
3517 | memmove(ac->entry, &(ac->entry[tofree]), | ||
3518 | sizeof(void *) * ac->avail); | ||
3519 | } | 3548 | } |
3520 | } | 3549 | } |
3521 | 3550 | ||
@@ -3528,13 +3557,14 @@ static void drain_array_locked(struct kmem_cache *cachep, struct array_cache *ac | |||
3528 | * - clear the per-cpu caches for this CPU. | 3557 | * - clear the per-cpu caches for this CPU. |
3529 | * - return freeable pages to the main free memory pool. | 3558 | * - return freeable pages to the main free memory pool. |
3530 | * | 3559 | * |
3531 | * If we cannot acquire the cache chain mutex then just give up - we'll | 3560 | * If we cannot acquire the cache chain mutex then just give up - we'll try |
3532 | * try again on the next iteration. | 3561 | * again on the next iteration. |
3533 | */ | 3562 | */ |
3534 | static void cache_reap(void *unused) | 3563 | static void cache_reap(void *unused) |
3535 | { | 3564 | { |
3536 | struct list_head *walk; | 3565 | struct list_head *walk; |
3537 | struct kmem_list3 *l3; | 3566 | struct kmem_list3 *l3; |
3567 | int node = numa_node_id(); | ||
3538 | 3568 | ||
3539 | if (!mutex_trylock(&cache_chain_mutex)) { | 3569 | if (!mutex_trylock(&cache_chain_mutex)) { |
3540 | /* Give up. Setup the next iteration. */ | 3570 | /* Give up. Setup the next iteration. */ |
@@ -3550,65 +3580,72 @@ static void cache_reap(void *unused) | |||
3550 | struct slab *slabp; | 3580 | struct slab *slabp; |
3551 | 3581 | ||
3552 | searchp = list_entry(walk, struct kmem_cache, next); | 3582 | searchp = list_entry(walk, struct kmem_cache, next); |
3553 | |||
3554 | if (searchp->flags & SLAB_NO_REAP) | ||
3555 | goto next; | ||
3556 | |||
3557 | check_irq_on(); | 3583 | check_irq_on(); |
3558 | 3584 | ||
3559 | l3 = searchp->nodelists[numa_node_id()]; | 3585 | /* |
3586 | * We only take the l3 lock if absolutely necessary and we | ||
3587 | * have established with reasonable certainty that | ||
3588 | * we can do some work if the lock was obtained. | ||
3589 | */ | ||
3590 | l3 = searchp->nodelists[node]; | ||
3591 | |||
3560 | reap_alien(searchp, l3); | 3592 | reap_alien(searchp, l3); |
3561 | spin_lock_irq(&l3->list_lock); | ||
3562 | 3593 | ||
3563 | drain_array_locked(searchp, cpu_cache_get(searchp), 0, | 3594 | drain_array(searchp, l3, cpu_cache_get(searchp), 0, node); |
3564 | numa_node_id()); | ||
3565 | 3595 | ||
3596 | /* | ||
3597 | * These are racy checks but it does not matter | ||
3598 | * if we skip one check or scan twice. | ||
3599 | */ | ||
3566 | if (time_after(l3->next_reap, jiffies)) | 3600 | if (time_after(l3->next_reap, jiffies)) |
3567 | goto next_unlock; | 3601 | goto next; |
3568 | 3602 | ||
3569 | l3->next_reap = jiffies + REAPTIMEOUT_LIST3; | 3603 | l3->next_reap = jiffies + REAPTIMEOUT_LIST3; |
3570 | 3604 | ||
3571 | if (l3->shared) | 3605 | drain_array(searchp, l3, l3->shared, 0, node); |
3572 | drain_array_locked(searchp, l3->shared, 0, | ||
3573 | numa_node_id()); | ||
3574 | 3606 | ||
3575 | if (l3->free_touched) { | 3607 | if (l3->free_touched) { |
3576 | l3->free_touched = 0; | 3608 | l3->free_touched = 0; |
3577 | goto next_unlock; | 3609 | goto next; |
3578 | } | 3610 | } |
3579 | 3611 | ||
3580 | tofree = | 3612 | tofree = (l3->free_limit + 5 * searchp->num - 1) / |
3581 | (l3->free_limit + 5 * searchp->num - | 3613 | (5 * searchp->num); |
3582 | 1) / (5 * searchp->num); | ||
3583 | do { | 3614 | do { |
3615 | /* | ||
3616 | * Do not lock if there are no free blocks. | ||
3617 | */ | ||
3618 | if (list_empty(&l3->slabs_free)) | ||
3619 | break; | ||
3620 | |||
3621 | spin_lock_irq(&l3->list_lock); | ||
3584 | p = l3->slabs_free.next; | 3622 | p = l3->slabs_free.next; |
3585 | if (p == &(l3->slabs_free)) | 3623 | if (p == &(l3->slabs_free)) { |
3624 | spin_unlock_irq(&l3->list_lock); | ||
3586 | break; | 3625 | break; |
3626 | } | ||
3587 | 3627 | ||
3588 | slabp = list_entry(p, struct slab, list); | 3628 | slabp = list_entry(p, struct slab, list); |
3589 | BUG_ON(slabp->inuse); | 3629 | BUG_ON(slabp->inuse); |
3590 | list_del(&slabp->list); | 3630 | list_del(&slabp->list); |
3591 | STATS_INC_REAPED(searchp); | 3631 | STATS_INC_REAPED(searchp); |
3592 | 3632 | ||
3593 | /* Safe to drop the lock. The slab is no longer | 3633 | /* |
3594 | * linked to the cache. | 3634 | * Safe to drop the lock. The slab is no longer linked |
3595 | * searchp cannot disappear, we hold | 3635 | * to the cache. searchp cannot disappear, we hold |
3596 | * cache_chain_lock | 3636 | * cache_chain_lock |
3597 | */ | 3637 | */ |
3598 | l3->free_objects -= searchp->num; | 3638 | l3->free_objects -= searchp->num; |
3599 | spin_unlock_irq(&l3->list_lock); | 3639 | spin_unlock_irq(&l3->list_lock); |
3600 | slab_destroy(searchp, slabp); | 3640 | slab_destroy(searchp, slabp); |
3601 | spin_lock_irq(&l3->list_lock); | ||
3602 | } while (--tofree > 0); | 3641 | } while (--tofree > 0); |
3603 | next_unlock: | 3642 | next: |
3604 | spin_unlock_irq(&l3->list_lock); | ||
3605 | next: | ||
3606 | cond_resched(); | 3643 | cond_resched(); |
3607 | } | 3644 | } |
3608 | check_irq_on(); | 3645 | check_irq_on(); |
3609 | mutex_unlock(&cache_chain_mutex); | 3646 | mutex_unlock(&cache_chain_mutex); |
3610 | next_reap_node(); | 3647 | next_reap_node(); |
3611 | /* Setup the next iteration */ | 3648 | /* Set up the next iteration */ |
3612 | schedule_delayed_work(&__get_cpu_var(reap_work), REAPTIMEOUT_CPUC); | 3649 | schedule_delayed_work(&__get_cpu_var(reap_work), REAPTIMEOUT_CPUC); |
3613 | } | 3650 | } |
3614 | 3651 | ||
@@ -3658,8 +3695,8 @@ static void *s_next(struct seq_file *m, void *p, loff_t *pos) | |||
3658 | { | 3695 | { |
3659 | struct kmem_cache *cachep = p; | 3696 | struct kmem_cache *cachep = p; |
3660 | ++*pos; | 3697 | ++*pos; |
3661 | return cachep->next.next == &cache_chain ? NULL | 3698 | return cachep->next.next == &cache_chain ? |
3662 | : list_entry(cachep->next.next, struct kmem_cache, next); | 3699 | NULL : list_entry(cachep->next.next, struct kmem_cache, next); |
3663 | } | 3700 | } |
3664 | 3701 | ||
3665 | static void s_stop(struct seq_file *m, void *p) | 3702 | static void s_stop(struct seq_file *m, void *p) |
@@ -3681,7 +3718,6 @@ static int s_show(struct seq_file *m, void *p) | |||
3681 | int node; | 3718 | int node; |
3682 | struct kmem_list3 *l3; | 3719 | struct kmem_list3 *l3; |
3683 | 3720 | ||
3684 | spin_lock(&cachep->spinlock); | ||
3685 | active_objs = 0; | 3721 | active_objs = 0; |
3686 | num_slabs = 0; | 3722 | num_slabs = 0; |
3687 | for_each_online_node(node) { | 3723 | for_each_online_node(node) { |
@@ -3748,7 +3784,9 @@ static int s_show(struct seq_file *m, void *p) | |||
3748 | unsigned long node_frees = cachep->node_frees; | 3784 | unsigned long node_frees = cachep->node_frees; |
3749 | 3785 | ||
3750 | seq_printf(m, " : globalstat %7lu %6lu %5lu %4lu \ | 3786 | seq_printf(m, " : globalstat %7lu %6lu %5lu %4lu \ |
3751 | %4lu %4lu %4lu %4lu", allocs, high, grown, reaped, errors, max_freeable, node_allocs, node_frees); | 3787 | %4lu %4lu %4lu %4lu", allocs, high, grown, |
3788 | reaped, errors, max_freeable, node_allocs, | ||
3789 | node_frees); | ||
3752 | } | 3790 | } |
3753 | /* cpu stats */ | 3791 | /* cpu stats */ |
3754 | { | 3792 | { |
@@ -3762,7 +3800,6 @@ static int s_show(struct seq_file *m, void *p) | |||
3762 | } | 3800 | } |
3763 | #endif | 3801 | #endif |
3764 | seq_putc(m, '\n'); | 3802 | seq_putc(m, '\n'); |
3765 | spin_unlock(&cachep->spinlock); | ||
3766 | return 0; | 3803 | return 0; |
3767 | } | 3804 | } |
3768 | 3805 | ||
@@ -3820,13 +3857,12 @@ ssize_t slabinfo_write(struct file *file, const char __user * buffer, | |||
3820 | mutex_lock(&cache_chain_mutex); | 3857 | mutex_lock(&cache_chain_mutex); |
3821 | res = -EINVAL; | 3858 | res = -EINVAL; |
3822 | list_for_each(p, &cache_chain) { | 3859 | list_for_each(p, &cache_chain) { |
3823 | struct kmem_cache *cachep = list_entry(p, struct kmem_cache, | 3860 | struct kmem_cache *cachep; |
3824 | next); | ||
3825 | 3861 | ||
3862 | cachep = list_entry(p, struct kmem_cache, next); | ||
3826 | if (!strcmp(cachep->name, kbuf)) { | 3863 | if (!strcmp(cachep->name, kbuf)) { |
3827 | if (limit < 1 || | 3864 | if (limit < 1 || batchcount < 1 || |
3828 | batchcount < 1 || | 3865 | batchcount > limit || shared < 0) { |
3829 | batchcount > limit || shared < 0) { | ||
3830 | res = 0; | 3866 | res = 0; |
3831 | } else { | 3867 | } else { |
3832 | res = do_tune_cpucache(cachep, limit, | 3868 | res = do_tune_cpucache(cachep, limit, |
@@ -209,19 +209,18 @@ int lru_add_drain_all(void) | |||
209 | */ | 209 | */ |
210 | void fastcall __page_cache_release(struct page *page) | 210 | void fastcall __page_cache_release(struct page *page) |
211 | { | 211 | { |
212 | unsigned long flags; | 212 | if (PageLRU(page)) { |
213 | struct zone *zone = page_zone(page); | 213 | unsigned long flags; |
214 | struct zone *zone = page_zone(page); | ||
214 | 215 | ||
215 | spin_lock_irqsave(&zone->lru_lock, flags); | 216 | spin_lock_irqsave(&zone->lru_lock, flags); |
216 | if (TestClearPageLRU(page)) | 217 | BUG_ON(!PageLRU(page)); |
218 | __ClearPageLRU(page); | ||
217 | del_page_from_lru(zone, page); | 219 | del_page_from_lru(zone, page); |
218 | if (page_count(page) != 0) | 220 | spin_unlock_irqrestore(&zone->lru_lock, flags); |
219 | page = NULL; | 221 | } |
220 | spin_unlock_irqrestore(&zone->lru_lock, flags); | 222 | free_hot_page(page); |
221 | if (page) | ||
222 | free_hot_page(page); | ||
223 | } | 223 | } |
224 | |||
225 | EXPORT_SYMBOL(__page_cache_release); | 224 | EXPORT_SYMBOL(__page_cache_release); |
226 | 225 | ||
227 | /* | 226 | /* |
@@ -245,7 +244,6 @@ void release_pages(struct page **pages, int nr, int cold) | |||
245 | pagevec_init(&pages_to_free, cold); | 244 | pagevec_init(&pages_to_free, cold); |
246 | for (i = 0; i < nr; i++) { | 245 | for (i = 0; i < nr; i++) { |
247 | struct page *page = pages[i]; | 246 | struct page *page = pages[i]; |
248 | struct zone *pagezone; | ||
249 | 247 | ||
250 | if (unlikely(PageCompound(page))) { | 248 | if (unlikely(PageCompound(page))) { |
251 | if (zone) { | 249 | if (zone) { |
@@ -259,23 +257,27 @@ void release_pages(struct page **pages, int nr, int cold) | |||
259 | if (!put_page_testzero(page)) | 257 | if (!put_page_testzero(page)) |
260 | continue; | 258 | continue; |
261 | 259 | ||
262 | pagezone = page_zone(page); | 260 | if (PageLRU(page)) { |
263 | if (pagezone != zone) { | 261 | struct zone *pagezone = page_zone(page); |
264 | if (zone) | 262 | if (pagezone != zone) { |
265 | spin_unlock_irq(&zone->lru_lock); | 263 | if (zone) |
266 | zone = pagezone; | 264 | spin_unlock_irq(&zone->lru_lock); |
267 | spin_lock_irq(&zone->lru_lock); | 265 | zone = pagezone; |
268 | } | 266 | spin_lock_irq(&zone->lru_lock); |
269 | if (TestClearPageLRU(page)) | 267 | } |
268 | BUG_ON(!PageLRU(page)); | ||
269 | __ClearPageLRU(page); | ||
270 | del_page_from_lru(zone, page); | 270 | del_page_from_lru(zone, page); |
271 | if (page_count(page) == 0) { | 271 | } |
272 | if (!pagevec_add(&pages_to_free, page)) { | 272 | |
273 | if (!pagevec_add(&pages_to_free, page)) { | ||
274 | if (zone) { | ||
273 | spin_unlock_irq(&zone->lru_lock); | 275 | spin_unlock_irq(&zone->lru_lock); |
274 | __pagevec_free(&pages_to_free); | 276 | zone = NULL; |
275 | pagevec_reinit(&pages_to_free); | ||
276 | zone = NULL; /* No lock is held */ | ||
277 | } | 277 | } |
278 | } | 278 | __pagevec_free(&pages_to_free); |
279 | pagevec_reinit(&pages_to_free); | ||
280 | } | ||
279 | } | 281 | } |
280 | if (zone) | 282 | if (zone) |
281 | spin_unlock_irq(&zone->lru_lock); | 283 | spin_unlock_irq(&zone->lru_lock); |
@@ -343,8 +345,8 @@ void __pagevec_lru_add(struct pagevec *pvec) | |||
343 | zone = pagezone; | 345 | zone = pagezone; |
344 | spin_lock_irq(&zone->lru_lock); | 346 | spin_lock_irq(&zone->lru_lock); |
345 | } | 347 | } |
346 | if (TestSetPageLRU(page)) | 348 | BUG_ON(PageLRU(page)); |
347 | BUG(); | 349 | SetPageLRU(page); |
348 | add_page_to_inactive_list(zone, page); | 350 | add_page_to_inactive_list(zone, page); |
349 | } | 351 | } |
350 | if (zone) | 352 | if (zone) |
@@ -370,10 +372,10 @@ void __pagevec_lru_add_active(struct pagevec *pvec) | |||
370 | zone = pagezone; | 372 | zone = pagezone; |
371 | spin_lock_irq(&zone->lru_lock); | 373 | spin_lock_irq(&zone->lru_lock); |
372 | } | 374 | } |
373 | if (TestSetPageLRU(page)) | 375 | BUG_ON(PageLRU(page)); |
374 | BUG(); | 376 | SetPageLRU(page); |
375 | if (TestSetPageActive(page)) | 377 | BUG_ON(PageActive(page)); |
376 | BUG(); | 378 | SetPageActive(page); |
377 | add_page_to_active_list(zone, page); | 379 | add_page_to_active_list(zone, page); |
378 | } | 380 | } |
379 | if (zone) | 381 | if (zone) |
diff --git a/mm/swap_state.c b/mm/swap_state.c index db8a3d3e1636..d7af296833fc 100644 --- a/mm/swap_state.c +++ b/mm/swap_state.c | |||
@@ -15,6 +15,7 @@ | |||
15 | #include <linux/buffer_head.h> | 15 | #include <linux/buffer_head.h> |
16 | #include <linux/backing-dev.h> | 16 | #include <linux/backing-dev.h> |
17 | #include <linux/pagevec.h> | 17 | #include <linux/pagevec.h> |
18 | #include <linux/migrate.h> | ||
18 | 19 | ||
19 | #include <asm/pgtable.h> | 20 | #include <asm/pgtable.h> |
20 | 21 | ||
diff --git a/mm/swapfile.c b/mm/swapfile.c index 1f9cf0d073b8..365ed6ff182d 100644 --- a/mm/swapfile.c +++ b/mm/swapfile.c | |||
@@ -116,7 +116,7 @@ static inline unsigned long scan_swap_map(struct swap_info_struct *si) | |||
116 | last_in_cluster = offset + SWAPFILE_CLUSTER; | 116 | last_in_cluster = offset + SWAPFILE_CLUSTER; |
117 | else if (offset == last_in_cluster) { | 117 | else if (offset == last_in_cluster) { |
118 | spin_lock(&swap_lock); | 118 | spin_lock(&swap_lock); |
119 | si->cluster_next = offset-SWAPFILE_CLUSTER-1; | 119 | si->cluster_next = offset-SWAPFILE_CLUSTER+1; |
120 | goto cluster; | 120 | goto cluster; |
121 | } | 121 | } |
122 | if (unlikely(--latency_ration < 0)) { | 122 | if (unlikely(--latency_ration < 0)) { |
diff --git a/mm/vmscan.c b/mm/vmscan.c index 4fe7e3aa02e2..fd572bbdc9f5 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c | |||
@@ -33,39 +33,21 @@ | |||
33 | #include <linux/cpuset.h> | 33 | #include <linux/cpuset.h> |
34 | #include <linux/notifier.h> | 34 | #include <linux/notifier.h> |
35 | #include <linux/rwsem.h> | 35 | #include <linux/rwsem.h> |
36 | #include <linux/delay.h> | ||
36 | 37 | ||
37 | #include <asm/tlbflush.h> | 38 | #include <asm/tlbflush.h> |
38 | #include <asm/div64.h> | 39 | #include <asm/div64.h> |
39 | 40 | ||
40 | #include <linux/swapops.h> | 41 | #include <linux/swapops.h> |
41 | 42 | ||
42 | /* possible outcome of pageout() */ | 43 | #include "internal.h" |
43 | typedef enum { | ||
44 | /* failed to write page out, page is locked */ | ||
45 | PAGE_KEEP, | ||
46 | /* move page to the active list, page is locked */ | ||
47 | PAGE_ACTIVATE, | ||
48 | /* page has been sent to the disk successfully, page is unlocked */ | ||
49 | PAGE_SUCCESS, | ||
50 | /* page is clean and locked */ | ||
51 | PAGE_CLEAN, | ||
52 | } pageout_t; | ||
53 | 44 | ||
54 | struct scan_control { | 45 | struct scan_control { |
55 | /* Ask refill_inactive_zone, or shrink_cache to scan this many pages */ | ||
56 | unsigned long nr_to_scan; | ||
57 | |||
58 | /* Incremented by the number of inactive pages that were scanned */ | 46 | /* Incremented by the number of inactive pages that were scanned */ |
59 | unsigned long nr_scanned; | 47 | unsigned long nr_scanned; |
60 | 48 | ||
61 | /* Incremented by the number of pages reclaimed */ | ||
62 | unsigned long nr_reclaimed; | ||
63 | |||
64 | unsigned long nr_mapped; /* From page_state */ | 49 | unsigned long nr_mapped; /* From page_state */ |
65 | 50 | ||
66 | /* Ask shrink_caches, or shrink_zone to scan at this priority */ | ||
67 | unsigned int priority; | ||
68 | |||
69 | /* This context's GFP mask */ | 51 | /* This context's GFP mask */ |
70 | gfp_t gfp_mask; | 52 | gfp_t gfp_mask; |
71 | 53 | ||
@@ -183,10 +165,11 @@ EXPORT_SYMBOL(remove_shrinker); | |||
183 | * | 165 | * |
184 | * Returns the number of slab objects which we shrunk. | 166 | * Returns the number of slab objects which we shrunk. |
185 | */ | 167 | */ |
186 | int shrink_slab(unsigned long scanned, gfp_t gfp_mask, unsigned long lru_pages) | 168 | unsigned long shrink_slab(unsigned long scanned, gfp_t gfp_mask, |
169 | unsigned long lru_pages) | ||
187 | { | 170 | { |
188 | struct shrinker *shrinker; | 171 | struct shrinker *shrinker; |
189 | int ret = 0; | 172 | unsigned long ret = 0; |
190 | 173 | ||
191 | if (scanned == 0) | 174 | if (scanned == 0) |
192 | scanned = SWAP_CLUSTER_MAX; | 175 | scanned = SWAP_CLUSTER_MAX; |
@@ -306,9 +289,10 @@ static void handle_write_error(struct address_space *mapping, | |||
306 | } | 289 | } |
307 | 290 | ||
308 | /* | 291 | /* |
309 | * pageout is called by shrink_list() for each dirty page. Calls ->writepage(). | 292 | * pageout is called by shrink_page_list() for each dirty page. |
293 | * Calls ->writepage(). | ||
310 | */ | 294 | */ |
311 | static pageout_t pageout(struct page *page, struct address_space *mapping) | 295 | pageout_t pageout(struct page *page, struct address_space *mapping) |
312 | { | 296 | { |
313 | /* | 297 | /* |
314 | * If the page is dirty, only perform writeback if that write | 298 | * If the page is dirty, only perform writeback if that write |
@@ -376,7 +360,7 @@ static pageout_t pageout(struct page *page, struct address_space *mapping) | |||
376 | return PAGE_CLEAN; | 360 | return PAGE_CLEAN; |
377 | } | 361 | } |
378 | 362 | ||
379 | static int remove_mapping(struct address_space *mapping, struct page *page) | 363 | int remove_mapping(struct address_space *mapping, struct page *page) |
380 | { | 364 | { |
381 | if (!mapping) | 365 | if (!mapping) |
382 | return 0; /* truncate got there first */ | 366 | return 0; /* truncate got there first */ |
@@ -414,14 +398,15 @@ cannot_free: | |||
414 | } | 398 | } |
415 | 399 | ||
416 | /* | 400 | /* |
417 | * shrink_list adds the number of reclaimed pages to sc->nr_reclaimed | 401 | * shrink_page_list() returns the number of reclaimed pages |
418 | */ | 402 | */ |
419 | static int shrink_list(struct list_head *page_list, struct scan_control *sc) | 403 | static unsigned long shrink_page_list(struct list_head *page_list, |
404 | struct scan_control *sc) | ||
420 | { | 405 | { |
421 | LIST_HEAD(ret_pages); | 406 | LIST_HEAD(ret_pages); |
422 | struct pagevec freed_pvec; | 407 | struct pagevec freed_pvec; |
423 | int pgactivate = 0; | 408 | int pgactivate = 0; |
424 | int reclaimed = 0; | 409 | unsigned long nr_reclaimed = 0; |
425 | 410 | ||
426 | cond_resched(); | 411 | cond_resched(); |
427 | 412 | ||
@@ -464,12 +449,9 @@ static int shrink_list(struct list_head *page_list, struct scan_control *sc) | |||
464 | * Anonymous process memory has backing store? | 449 | * Anonymous process memory has backing store? |
465 | * Try to allocate it some swap space here. | 450 | * Try to allocate it some swap space here. |
466 | */ | 451 | */ |
467 | if (PageAnon(page) && !PageSwapCache(page)) { | 452 | if (PageAnon(page) && !PageSwapCache(page)) |
468 | if (!sc->may_swap) | ||
469 | goto keep_locked; | ||
470 | if (!add_to_swap(page, GFP_ATOMIC)) | 453 | if (!add_to_swap(page, GFP_ATOMIC)) |
471 | goto activate_locked; | 454 | goto activate_locked; |
472 | } | ||
473 | #endif /* CONFIG_SWAP */ | 455 | #endif /* CONFIG_SWAP */ |
474 | 456 | ||
475 | mapping = page_mapping(page); | 457 | mapping = page_mapping(page); |
@@ -481,12 +463,6 @@ static int shrink_list(struct list_head *page_list, struct scan_control *sc) | |||
481 | * processes. Try to unmap it here. | 463 | * processes. Try to unmap it here. |
482 | */ | 464 | */ |
483 | if (page_mapped(page) && mapping) { | 465 | if (page_mapped(page) && mapping) { |
484 | /* | ||
485 | * No unmapping if we do not swap | ||
486 | */ | ||
487 | if (!sc->may_swap) | ||
488 | goto keep_locked; | ||
489 | |||
490 | switch (try_to_unmap(page, 0)) { | 466 | switch (try_to_unmap(page, 0)) { |
491 | case SWAP_FAIL: | 467 | case SWAP_FAIL: |
492 | goto activate_locked; | 468 | goto activate_locked; |
@@ -561,7 +537,7 @@ static int shrink_list(struct list_head *page_list, struct scan_control *sc) | |||
561 | 537 | ||
562 | free_it: | 538 | free_it: |
563 | unlock_page(page); | 539 | unlock_page(page); |
564 | reclaimed++; | 540 | nr_reclaimed++; |
565 | if (!pagevec_add(&freed_pvec, page)) | 541 | if (!pagevec_add(&freed_pvec, page)) |
566 | __pagevec_release_nonlru(&freed_pvec); | 542 | __pagevec_release_nonlru(&freed_pvec); |
567 | continue; | 543 | continue; |
@@ -579,483 +555,8 @@ keep: | |||
579 | if (pagevec_count(&freed_pvec)) | 555 | if (pagevec_count(&freed_pvec)) |
580 | __pagevec_release_nonlru(&freed_pvec); | 556 | __pagevec_release_nonlru(&freed_pvec); |
581 | mod_page_state(pgactivate, pgactivate); | 557 | mod_page_state(pgactivate, pgactivate); |
582 | sc->nr_reclaimed += reclaimed; | 558 | return nr_reclaimed; |
583 | return reclaimed; | ||
584 | } | ||
585 | |||
586 | #ifdef CONFIG_MIGRATION | ||
587 | static inline void move_to_lru(struct page *page) | ||
588 | { | ||
589 | list_del(&page->lru); | ||
590 | if (PageActive(page)) { | ||
591 | /* | ||
592 | * lru_cache_add_active checks that | ||
593 | * the PG_active bit is off. | ||
594 | */ | ||
595 | ClearPageActive(page); | ||
596 | lru_cache_add_active(page); | ||
597 | } else { | ||
598 | lru_cache_add(page); | ||
599 | } | ||
600 | put_page(page); | ||
601 | } | ||
602 | |||
603 | /* | ||
604 | * Add isolated pages on the list back to the LRU. | ||
605 | * | ||
606 | * returns the number of pages put back. | ||
607 | */ | ||
608 | int putback_lru_pages(struct list_head *l) | ||
609 | { | ||
610 | struct page *page; | ||
611 | struct page *page2; | ||
612 | int count = 0; | ||
613 | |||
614 | list_for_each_entry_safe(page, page2, l, lru) { | ||
615 | move_to_lru(page); | ||
616 | count++; | ||
617 | } | ||
618 | return count; | ||
619 | } | ||
620 | |||
621 | /* | ||
622 | * Non migratable page | ||
623 | */ | ||
624 | int fail_migrate_page(struct page *newpage, struct page *page) | ||
625 | { | ||
626 | return -EIO; | ||
627 | } | ||
628 | EXPORT_SYMBOL(fail_migrate_page); | ||
629 | |||
630 | /* | ||
631 | * swapout a single page | ||
632 | * page is locked upon entry, unlocked on exit | ||
633 | */ | ||
634 | static int swap_page(struct page *page) | ||
635 | { | ||
636 | struct address_space *mapping = page_mapping(page); | ||
637 | |||
638 | if (page_mapped(page) && mapping) | ||
639 | if (try_to_unmap(page, 1) != SWAP_SUCCESS) | ||
640 | goto unlock_retry; | ||
641 | |||
642 | if (PageDirty(page)) { | ||
643 | /* Page is dirty, try to write it out here */ | ||
644 | switch(pageout(page, mapping)) { | ||
645 | case PAGE_KEEP: | ||
646 | case PAGE_ACTIVATE: | ||
647 | goto unlock_retry; | ||
648 | |||
649 | case PAGE_SUCCESS: | ||
650 | goto retry; | ||
651 | |||
652 | case PAGE_CLEAN: | ||
653 | ; /* try to free the page below */ | ||
654 | } | ||
655 | } | ||
656 | |||
657 | if (PagePrivate(page)) { | ||
658 | if (!try_to_release_page(page, GFP_KERNEL) || | ||
659 | (!mapping && page_count(page) == 1)) | ||
660 | goto unlock_retry; | ||
661 | } | ||
662 | |||
663 | if (remove_mapping(mapping, page)) { | ||
664 | /* Success */ | ||
665 | unlock_page(page); | ||
666 | return 0; | ||
667 | } | ||
668 | |||
669 | unlock_retry: | ||
670 | unlock_page(page); | ||
671 | |||
672 | retry: | ||
673 | return -EAGAIN; | ||
674 | } | ||
675 | EXPORT_SYMBOL(swap_page); | ||
676 | |||
677 | /* | ||
678 | * Page migration was first developed in the context of the memory hotplug | ||
679 | * project. The main authors of the migration code are: | ||
680 | * | ||
681 | * IWAMOTO Toshihiro <iwamoto@valinux.co.jp> | ||
682 | * Hirokazu Takahashi <taka@valinux.co.jp> | ||
683 | * Dave Hansen <haveblue@us.ibm.com> | ||
684 | * Christoph Lameter <clameter@sgi.com> | ||
685 | */ | ||
686 | |||
687 | /* | ||
688 | * Remove references for a page and establish the new page with the correct | ||
689 | * basic settings to be able to stop accesses to the page. | ||
690 | */ | ||
691 | int migrate_page_remove_references(struct page *newpage, | ||
692 | struct page *page, int nr_refs) | ||
693 | { | ||
694 | struct address_space *mapping = page_mapping(page); | ||
695 | struct page **radix_pointer; | ||
696 | |||
697 | /* | ||
698 | * Avoid doing any of the following work if the page count | ||
699 | * indicates that the page is in use or truncate has removed | ||
700 | * the page. | ||
701 | */ | ||
702 | if (!mapping || page_mapcount(page) + nr_refs != page_count(page)) | ||
703 | return -EAGAIN; | ||
704 | |||
705 | /* | ||
706 | * Establish swap ptes for anonymous pages or destroy pte | ||
707 | * maps for files. | ||
708 | * | ||
709 | * In order to reestablish file backed mappings the fault handlers | ||
710 | * will take the radix tree_lock which may then be used to stop | ||
711 | * processses from accessing this page until the new page is ready. | ||
712 | * | ||
713 | * A process accessing via a swap pte (an anonymous page) will take a | ||
714 | * page_lock on the old page which will block the process until the | ||
715 | * migration attempt is complete. At that time the PageSwapCache bit | ||
716 | * will be examined. If the page was migrated then the PageSwapCache | ||
717 | * bit will be clear and the operation to retrieve the page will be | ||
718 | * retried which will find the new page in the radix tree. Then a new | ||
719 | * direct mapping may be generated based on the radix tree contents. | ||
720 | * | ||
721 | * If the page was not migrated then the PageSwapCache bit | ||
722 | * is still set and the operation may continue. | ||
723 | */ | ||
724 | if (try_to_unmap(page, 1) == SWAP_FAIL) | ||
725 | /* A vma has VM_LOCKED set -> Permanent failure */ | ||
726 | return -EPERM; | ||
727 | |||
728 | /* | ||
729 | * Give up if we were unable to remove all mappings. | ||
730 | */ | ||
731 | if (page_mapcount(page)) | ||
732 | return -EAGAIN; | ||
733 | |||
734 | write_lock_irq(&mapping->tree_lock); | ||
735 | |||
736 | radix_pointer = (struct page **)radix_tree_lookup_slot( | ||
737 | &mapping->page_tree, | ||
738 | page_index(page)); | ||
739 | |||
740 | if (!page_mapping(page) || page_count(page) != nr_refs || | ||
741 | *radix_pointer != page) { | ||
742 | write_unlock_irq(&mapping->tree_lock); | ||
743 | return -EAGAIN; | ||
744 | } | ||
745 | |||
746 | /* | ||
747 | * Now we know that no one else is looking at the page. | ||
748 | * | ||
749 | * Certain minimal information about a page must be available | ||
750 | * in order for other subsystems to properly handle the page if they | ||
751 | * find it through the radix tree update before we are finished | ||
752 | * copying the page. | ||
753 | */ | ||
754 | get_page(newpage); | ||
755 | newpage->index = page->index; | ||
756 | newpage->mapping = page->mapping; | ||
757 | if (PageSwapCache(page)) { | ||
758 | SetPageSwapCache(newpage); | ||
759 | set_page_private(newpage, page_private(page)); | ||
760 | } | ||
761 | |||
762 | *radix_pointer = newpage; | ||
763 | __put_page(page); | ||
764 | write_unlock_irq(&mapping->tree_lock); | ||
765 | |||
766 | return 0; | ||
767 | } | ||
768 | EXPORT_SYMBOL(migrate_page_remove_references); | ||
769 | |||
770 | /* | ||
771 | * Copy the page to its new location | ||
772 | */ | ||
773 | void migrate_page_copy(struct page *newpage, struct page *page) | ||
774 | { | ||
775 | copy_highpage(newpage, page); | ||
776 | |||
777 | if (PageError(page)) | ||
778 | SetPageError(newpage); | ||
779 | if (PageReferenced(page)) | ||
780 | SetPageReferenced(newpage); | ||
781 | if (PageUptodate(page)) | ||
782 | SetPageUptodate(newpage); | ||
783 | if (PageActive(page)) | ||
784 | SetPageActive(newpage); | ||
785 | if (PageChecked(page)) | ||
786 | SetPageChecked(newpage); | ||
787 | if (PageMappedToDisk(page)) | ||
788 | SetPageMappedToDisk(newpage); | ||
789 | |||
790 | if (PageDirty(page)) { | ||
791 | clear_page_dirty_for_io(page); | ||
792 | set_page_dirty(newpage); | ||
793 | } | ||
794 | |||
795 | ClearPageSwapCache(page); | ||
796 | ClearPageActive(page); | ||
797 | ClearPagePrivate(page); | ||
798 | set_page_private(page, 0); | ||
799 | page->mapping = NULL; | ||
800 | |||
801 | /* | ||
802 | * If any waiters have accumulated on the new page then | ||
803 | * wake them up. | ||
804 | */ | ||
805 | if (PageWriteback(newpage)) | ||
806 | end_page_writeback(newpage); | ||
807 | } | ||
808 | EXPORT_SYMBOL(migrate_page_copy); | ||
809 | |||
810 | /* | ||
811 | * Common logic to directly migrate a single page suitable for | ||
812 | * pages that do not use PagePrivate. | ||
813 | * | ||
814 | * Pages are locked upon entry and exit. | ||
815 | */ | ||
816 | int migrate_page(struct page *newpage, struct page *page) | ||
817 | { | ||
818 | int rc; | ||
819 | |||
820 | BUG_ON(PageWriteback(page)); /* Writeback must be complete */ | ||
821 | |||
822 | rc = migrate_page_remove_references(newpage, page, 2); | ||
823 | |||
824 | if (rc) | ||
825 | return rc; | ||
826 | |||
827 | migrate_page_copy(newpage, page); | ||
828 | |||
829 | /* | ||
830 | * Remove auxiliary swap entries and replace | ||
831 | * them with real ptes. | ||
832 | * | ||
833 | * Note that a real pte entry will allow processes that are not | ||
834 | * waiting on the page lock to use the new page via the page tables | ||
835 | * before the new page is unlocked. | ||
836 | */ | ||
837 | remove_from_swap(newpage); | ||
838 | return 0; | ||
839 | } | 559 | } |
840 | EXPORT_SYMBOL(migrate_page); | ||
841 | |||
842 | /* | ||
843 | * migrate_pages | ||
844 | * | ||
845 | * Two lists are passed to this function. The first list | ||
846 | * contains the pages isolated from the LRU to be migrated. | ||
847 | * The second list contains new pages that the pages isolated | ||
848 | * can be moved to. If the second list is NULL then all | ||
849 | * pages are swapped out. | ||
850 | * | ||
851 | * The function returns after 10 attempts or if no pages | ||
852 | * are movable anymore because to has become empty | ||
853 | * or no retryable pages exist anymore. | ||
854 | * | ||
855 | * Return: Number of pages not migrated when "to" ran empty. | ||
856 | */ | ||
857 | int migrate_pages(struct list_head *from, struct list_head *to, | ||
858 | struct list_head *moved, struct list_head *failed) | ||
859 | { | ||
860 | int retry; | ||
861 | int nr_failed = 0; | ||
862 | int pass = 0; | ||
863 | struct page *page; | ||
864 | struct page *page2; | ||
865 | int swapwrite = current->flags & PF_SWAPWRITE; | ||
866 | int rc; | ||
867 | |||
868 | if (!swapwrite) | ||
869 | current->flags |= PF_SWAPWRITE; | ||
870 | |||
871 | redo: | ||
872 | retry = 0; | ||
873 | |||
874 | list_for_each_entry_safe(page, page2, from, lru) { | ||
875 | struct page *newpage = NULL; | ||
876 | struct address_space *mapping; | ||
877 | |||
878 | cond_resched(); | ||
879 | |||
880 | rc = 0; | ||
881 | if (page_count(page) == 1) | ||
882 | /* page was freed from under us. So we are done. */ | ||
883 | goto next; | ||
884 | |||
885 | if (to && list_empty(to)) | ||
886 | break; | ||
887 | |||
888 | /* | ||
889 | * Skip locked pages during the first two passes to give the | ||
890 | * functions holding the lock time to release the page. Later we | ||
891 | * use lock_page() to have a higher chance of acquiring the | ||
892 | * lock. | ||
893 | */ | ||
894 | rc = -EAGAIN; | ||
895 | if (pass > 2) | ||
896 | lock_page(page); | ||
897 | else | ||
898 | if (TestSetPageLocked(page)) | ||
899 | goto next; | ||
900 | |||
901 | /* | ||
902 | * Only wait on writeback if we have already done a pass where | ||
903 | * we we may have triggered writeouts for lots of pages. | ||
904 | */ | ||
905 | if (pass > 0) { | ||
906 | wait_on_page_writeback(page); | ||
907 | } else { | ||
908 | if (PageWriteback(page)) | ||
909 | goto unlock_page; | ||
910 | } | ||
911 | |||
912 | /* | ||
913 | * Anonymous pages must have swap cache references otherwise | ||
914 | * the information contained in the page maps cannot be | ||
915 | * preserved. | ||
916 | */ | ||
917 | if (PageAnon(page) && !PageSwapCache(page)) { | ||
918 | if (!add_to_swap(page, GFP_KERNEL)) { | ||
919 | rc = -ENOMEM; | ||
920 | goto unlock_page; | ||
921 | } | ||
922 | } | ||
923 | |||
924 | if (!to) { | ||
925 | rc = swap_page(page); | ||
926 | goto next; | ||
927 | } | ||
928 | |||
929 | newpage = lru_to_page(to); | ||
930 | lock_page(newpage); | ||
931 | |||
932 | /* | ||
933 | * Pages are properly locked and writeback is complete. | ||
934 | * Try to migrate the page. | ||
935 | */ | ||
936 | mapping = page_mapping(page); | ||
937 | if (!mapping) | ||
938 | goto unlock_both; | ||
939 | |||
940 | if (mapping->a_ops->migratepage) { | ||
941 | /* | ||
942 | * Most pages have a mapping and most filesystems | ||
943 | * should provide a migration function. Anonymous | ||
944 | * pages are part of swap space which also has its | ||
945 | * own migration function. This is the most common | ||
946 | * path for page migration. | ||
947 | */ | ||
948 | rc = mapping->a_ops->migratepage(newpage, page); | ||
949 | goto unlock_both; | ||
950 | } | ||
951 | |||
952 | /* | ||
953 | * Default handling if a filesystem does not provide | ||
954 | * a migration function. We can only migrate clean | ||
955 | * pages so try to write out any dirty pages first. | ||
956 | */ | ||
957 | if (PageDirty(page)) { | ||
958 | switch (pageout(page, mapping)) { | ||
959 | case PAGE_KEEP: | ||
960 | case PAGE_ACTIVATE: | ||
961 | goto unlock_both; | ||
962 | |||
963 | case PAGE_SUCCESS: | ||
964 | unlock_page(newpage); | ||
965 | goto next; | ||
966 | |||
967 | case PAGE_CLEAN: | ||
968 | ; /* try to migrate the page below */ | ||
969 | } | ||
970 | } | ||
971 | |||
972 | /* | ||
973 | * Buffers are managed in a filesystem specific way. | ||
974 | * We must have no buffers or drop them. | ||
975 | */ | ||
976 | if (!page_has_buffers(page) || | ||
977 | try_to_release_page(page, GFP_KERNEL)) { | ||
978 | rc = migrate_page(newpage, page); | ||
979 | goto unlock_both; | ||
980 | } | ||
981 | |||
982 | /* | ||
983 | * On early passes with mapped pages simply | ||
984 | * retry. There may be a lock held for some | ||
985 | * buffers that may go away. Later | ||
986 | * swap them out. | ||
987 | */ | ||
988 | if (pass > 4) { | ||
989 | /* | ||
990 | * Persistently unable to drop buffers..... As a | ||
991 | * measure of last resort we fall back to | ||
992 | * swap_page(). | ||
993 | */ | ||
994 | unlock_page(newpage); | ||
995 | newpage = NULL; | ||
996 | rc = swap_page(page); | ||
997 | goto next; | ||
998 | } | ||
999 | |||
1000 | unlock_both: | ||
1001 | unlock_page(newpage); | ||
1002 | |||
1003 | unlock_page: | ||
1004 | unlock_page(page); | ||
1005 | |||
1006 | next: | ||
1007 | if (rc == -EAGAIN) { | ||
1008 | retry++; | ||
1009 | } else if (rc) { | ||
1010 | /* Permanent failure */ | ||
1011 | list_move(&page->lru, failed); | ||
1012 | nr_failed++; | ||
1013 | } else { | ||
1014 | if (newpage) { | ||
1015 | /* Successful migration. Return page to LRU */ | ||
1016 | move_to_lru(newpage); | ||
1017 | } | ||
1018 | list_move(&page->lru, moved); | ||
1019 | } | ||
1020 | } | ||
1021 | if (retry && pass++ < 10) | ||
1022 | goto redo; | ||
1023 | |||
1024 | if (!swapwrite) | ||
1025 | current->flags &= ~PF_SWAPWRITE; | ||
1026 | |||
1027 | return nr_failed + retry; | ||
1028 | } | ||
1029 | |||
1030 | /* | ||
1031 | * Isolate one page from the LRU lists and put it on the | ||
1032 | * indicated list with elevated refcount. | ||
1033 | * | ||
1034 | * Result: | ||
1035 | * 0 = page not on LRU list | ||
1036 | * 1 = page removed from LRU list and added to the specified list. | ||
1037 | */ | ||
1038 | int isolate_lru_page(struct page *page) | ||
1039 | { | ||
1040 | int ret = 0; | ||
1041 | |||
1042 | if (PageLRU(page)) { | ||
1043 | struct zone *zone = page_zone(page); | ||
1044 | spin_lock_irq(&zone->lru_lock); | ||
1045 | if (TestClearPageLRU(page)) { | ||
1046 | ret = 1; | ||
1047 | get_page(page); | ||
1048 | if (PageActive(page)) | ||
1049 | del_page_from_active_list(zone, page); | ||
1050 | else | ||
1051 | del_page_from_inactive_list(zone, page); | ||
1052 | } | ||
1053 | spin_unlock_irq(&zone->lru_lock); | ||
1054 | } | ||
1055 | |||
1056 | return ret; | ||
1057 | } | ||
1058 | #endif | ||
1059 | 560 | ||
1060 | /* | 561 | /* |
1061 | * zone->lru_lock is heavily contended. Some of the functions that | 562 | * zone->lru_lock is heavily contended. Some of the functions that |
@@ -1074,32 +575,35 @@ int isolate_lru_page(struct page *page) | |||
1074 | * | 575 | * |
1075 | * returns how many pages were moved onto *@dst. | 576 | * returns how many pages were moved onto *@dst. |
1076 | */ | 577 | */ |
1077 | static int isolate_lru_pages(int nr_to_scan, struct list_head *src, | 578 | static unsigned long isolate_lru_pages(unsigned long nr_to_scan, |
1078 | struct list_head *dst, int *scanned) | 579 | struct list_head *src, struct list_head *dst, |
580 | unsigned long *scanned) | ||
1079 | { | 581 | { |
1080 | int nr_taken = 0; | 582 | unsigned long nr_taken = 0; |
1081 | struct page *page; | 583 | struct page *page; |
1082 | int scan = 0; | 584 | unsigned long scan; |
1083 | 585 | ||
1084 | while (scan++ < nr_to_scan && !list_empty(src)) { | 586 | for (scan = 0; scan < nr_to_scan && !list_empty(src); scan++) { |
587 | struct list_head *target; | ||
1085 | page = lru_to_page(src); | 588 | page = lru_to_page(src); |
1086 | prefetchw_prev_lru_page(page, src, flags); | 589 | prefetchw_prev_lru_page(page, src, flags); |
1087 | 590 | ||
1088 | if (!TestClearPageLRU(page)) | 591 | BUG_ON(!PageLRU(page)); |
1089 | BUG(); | 592 | |
1090 | list_del(&page->lru); | 593 | list_del(&page->lru); |
1091 | if (get_page_testone(page)) { | 594 | target = src; |
595 | if (likely(get_page_unless_zero(page))) { | ||
1092 | /* | 596 | /* |
1093 | * It is being freed elsewhere | 597 | * Be careful not to clear PageLRU until after we're |
598 | * sure the page is not being freed elsewhere -- the | ||
599 | * page release code relies on it. | ||
1094 | */ | 600 | */ |
1095 | __put_page(page); | 601 | ClearPageLRU(page); |
1096 | SetPageLRU(page); | 602 | target = dst; |
1097 | list_add(&page->lru, src); | ||
1098 | continue; | ||
1099 | } else { | ||
1100 | list_add(&page->lru, dst); | ||
1101 | nr_taken++; | 603 | nr_taken++; |
1102 | } | 604 | } /* else it is being freed elsewhere */ |
605 | |||
606 | list_add(&page->lru, target); | ||
1103 | } | 607 | } |
1104 | 608 | ||
1105 | *scanned = scan; | 609 | *scanned = scan; |
@@ -1107,23 +611,26 @@ static int isolate_lru_pages(int nr_to_scan, struct list_head *src, | |||
1107 | } | 611 | } |
1108 | 612 | ||
1109 | /* | 613 | /* |
1110 | * shrink_cache() adds the number of pages reclaimed to sc->nr_reclaimed | 614 | * shrink_inactive_list() is a helper for shrink_zone(). It returns the number |
615 | * of reclaimed pages | ||
1111 | */ | 616 | */ |
1112 | static void shrink_cache(struct zone *zone, struct scan_control *sc) | 617 | static unsigned long shrink_inactive_list(unsigned long max_scan, |
618 | struct zone *zone, struct scan_control *sc) | ||
1113 | { | 619 | { |
1114 | LIST_HEAD(page_list); | 620 | LIST_HEAD(page_list); |
1115 | struct pagevec pvec; | 621 | struct pagevec pvec; |
1116 | int max_scan = sc->nr_to_scan; | 622 | unsigned long nr_scanned = 0; |
623 | unsigned long nr_reclaimed = 0; | ||
1117 | 624 | ||
1118 | pagevec_init(&pvec, 1); | 625 | pagevec_init(&pvec, 1); |
1119 | 626 | ||
1120 | lru_add_drain(); | 627 | lru_add_drain(); |
1121 | spin_lock_irq(&zone->lru_lock); | 628 | spin_lock_irq(&zone->lru_lock); |
1122 | while (max_scan > 0) { | 629 | do { |
1123 | struct page *page; | 630 | struct page *page; |
1124 | int nr_taken; | 631 | unsigned long nr_taken; |
1125 | int nr_scan; | 632 | unsigned long nr_scan; |
1126 | int nr_freed; | 633 | unsigned long nr_freed; |
1127 | 634 | ||
1128 | nr_taken = isolate_lru_pages(sc->swap_cluster_max, | 635 | nr_taken = isolate_lru_pages(sc->swap_cluster_max, |
1129 | &zone->inactive_list, | 636 | &zone->inactive_list, |
@@ -1132,12 +639,9 @@ static void shrink_cache(struct zone *zone, struct scan_control *sc) | |||
1132 | zone->pages_scanned += nr_scan; | 639 | zone->pages_scanned += nr_scan; |
1133 | spin_unlock_irq(&zone->lru_lock); | 640 | spin_unlock_irq(&zone->lru_lock); |
1134 | 641 | ||
1135 | if (nr_taken == 0) | 642 | nr_scanned += nr_scan; |
1136 | goto done; | 643 | nr_freed = shrink_page_list(&page_list, sc); |
1137 | 644 | nr_reclaimed += nr_freed; | |
1138 | max_scan -= nr_scan; | ||
1139 | nr_freed = shrink_list(&page_list, sc); | ||
1140 | |||
1141 | local_irq_disable(); | 645 | local_irq_disable(); |
1142 | if (current_is_kswapd()) { | 646 | if (current_is_kswapd()) { |
1143 | __mod_page_state_zone(zone, pgscan_kswapd, nr_scan); | 647 | __mod_page_state_zone(zone, pgscan_kswapd, nr_scan); |
@@ -1146,14 +650,17 @@ static void shrink_cache(struct zone *zone, struct scan_control *sc) | |||
1146 | __mod_page_state_zone(zone, pgscan_direct, nr_scan); | 650 | __mod_page_state_zone(zone, pgscan_direct, nr_scan); |
1147 | __mod_page_state_zone(zone, pgsteal, nr_freed); | 651 | __mod_page_state_zone(zone, pgsteal, nr_freed); |
1148 | 652 | ||
653 | if (nr_taken == 0) | ||
654 | goto done; | ||
655 | |||
1149 | spin_lock(&zone->lru_lock); | 656 | spin_lock(&zone->lru_lock); |
1150 | /* | 657 | /* |
1151 | * Put back any unfreeable pages. | 658 | * Put back any unfreeable pages. |
1152 | */ | 659 | */ |
1153 | while (!list_empty(&page_list)) { | 660 | while (!list_empty(&page_list)) { |
1154 | page = lru_to_page(&page_list); | 661 | page = lru_to_page(&page_list); |
1155 | if (TestSetPageLRU(page)) | 662 | BUG_ON(PageLRU(page)); |
1156 | BUG(); | 663 | SetPageLRU(page); |
1157 | list_del(&page->lru); | 664 | list_del(&page->lru); |
1158 | if (PageActive(page)) | 665 | if (PageActive(page)) |
1159 | add_page_to_active_list(zone, page); | 666 | add_page_to_active_list(zone, page); |
@@ -1165,10 +672,12 @@ static void shrink_cache(struct zone *zone, struct scan_control *sc) | |||
1165 | spin_lock_irq(&zone->lru_lock); | 672 | spin_lock_irq(&zone->lru_lock); |
1166 | } | 673 | } |
1167 | } | 674 | } |
1168 | } | 675 | } while (nr_scanned < max_scan); |
1169 | spin_unlock_irq(&zone->lru_lock); | 676 | spin_unlock(&zone->lru_lock); |
1170 | done: | 677 | done: |
678 | local_irq_enable(); | ||
1171 | pagevec_release(&pvec); | 679 | pagevec_release(&pvec); |
680 | return nr_reclaimed; | ||
1172 | } | 681 | } |
1173 | 682 | ||
1174 | /* | 683 | /* |
@@ -1188,13 +697,12 @@ done: | |||
1188 | * The downside is that we have to touch page->_count against each page. | 697 | * The downside is that we have to touch page->_count against each page. |
1189 | * But we had to alter page->flags anyway. | 698 | * But we had to alter page->flags anyway. |
1190 | */ | 699 | */ |
1191 | static void | 700 | static void shrink_active_list(unsigned long nr_pages, struct zone *zone, |
1192 | refill_inactive_zone(struct zone *zone, struct scan_control *sc) | 701 | struct scan_control *sc) |
1193 | { | 702 | { |
1194 | int pgmoved; | 703 | unsigned long pgmoved; |
1195 | int pgdeactivate = 0; | 704 | int pgdeactivate = 0; |
1196 | int pgscanned; | 705 | unsigned long pgscanned; |
1197 | int nr_pages = sc->nr_to_scan; | ||
1198 | LIST_HEAD(l_hold); /* The pages which were snipped off */ | 706 | LIST_HEAD(l_hold); /* The pages which were snipped off */ |
1199 | LIST_HEAD(l_inactive); /* Pages to go onto the inactive_list */ | 707 | LIST_HEAD(l_inactive); /* Pages to go onto the inactive_list */ |
1200 | LIST_HEAD(l_active); /* Pages to go onto the active_list */ | 708 | LIST_HEAD(l_active); /* Pages to go onto the active_list */ |
@@ -1202,7 +710,7 @@ refill_inactive_zone(struct zone *zone, struct scan_control *sc) | |||
1202 | struct pagevec pvec; | 710 | struct pagevec pvec; |
1203 | int reclaim_mapped = 0; | 711 | int reclaim_mapped = 0; |
1204 | 712 | ||
1205 | if (unlikely(sc->may_swap)) { | 713 | if (sc->may_swap) { |
1206 | long mapped_ratio; | 714 | long mapped_ratio; |
1207 | long distress; | 715 | long distress; |
1208 | long swap_tendency; | 716 | long swap_tendency; |
@@ -1272,10 +780,11 @@ refill_inactive_zone(struct zone *zone, struct scan_control *sc) | |||
1272 | while (!list_empty(&l_inactive)) { | 780 | while (!list_empty(&l_inactive)) { |
1273 | page = lru_to_page(&l_inactive); | 781 | page = lru_to_page(&l_inactive); |
1274 | prefetchw_prev_lru_page(page, &l_inactive, flags); | 782 | prefetchw_prev_lru_page(page, &l_inactive, flags); |
1275 | if (TestSetPageLRU(page)) | 783 | BUG_ON(PageLRU(page)); |
1276 | BUG(); | 784 | SetPageLRU(page); |
1277 | if (!TestClearPageActive(page)) | 785 | BUG_ON(!PageActive(page)); |
1278 | BUG(); | 786 | ClearPageActive(page); |
787 | |||
1279 | list_move(&page->lru, &zone->inactive_list); | 788 | list_move(&page->lru, &zone->inactive_list); |
1280 | pgmoved++; | 789 | pgmoved++; |
1281 | if (!pagevec_add(&pvec, page)) { | 790 | if (!pagevec_add(&pvec, page)) { |
@@ -1301,8 +810,8 @@ refill_inactive_zone(struct zone *zone, struct scan_control *sc) | |||
1301 | while (!list_empty(&l_active)) { | 810 | while (!list_empty(&l_active)) { |
1302 | page = lru_to_page(&l_active); | 811 | page = lru_to_page(&l_active); |
1303 | prefetchw_prev_lru_page(page, &l_active, flags); | 812 | prefetchw_prev_lru_page(page, &l_active, flags); |
1304 | if (TestSetPageLRU(page)) | 813 | BUG_ON(PageLRU(page)); |
1305 | BUG(); | 814 | SetPageLRU(page); |
1306 | BUG_ON(!PageActive(page)); | 815 | BUG_ON(!PageActive(page)); |
1307 | list_move(&page->lru, &zone->active_list); | 816 | list_move(&page->lru, &zone->active_list); |
1308 | pgmoved++; | 817 | pgmoved++; |
@@ -1327,11 +836,13 @@ refill_inactive_zone(struct zone *zone, struct scan_control *sc) | |||
1327 | /* | 836 | /* |
1328 | * This is a basic per-zone page freer. Used by both kswapd and direct reclaim. | 837 | * This is a basic per-zone page freer. Used by both kswapd and direct reclaim. |
1329 | */ | 838 | */ |
1330 | static void | 839 | static unsigned long shrink_zone(int priority, struct zone *zone, |
1331 | shrink_zone(struct zone *zone, struct scan_control *sc) | 840 | struct scan_control *sc) |
1332 | { | 841 | { |
1333 | unsigned long nr_active; | 842 | unsigned long nr_active; |
1334 | unsigned long nr_inactive; | 843 | unsigned long nr_inactive; |
844 | unsigned long nr_to_scan; | ||
845 | unsigned long nr_reclaimed = 0; | ||
1335 | 846 | ||
1336 | atomic_inc(&zone->reclaim_in_progress); | 847 | atomic_inc(&zone->reclaim_in_progress); |
1337 | 848 | ||
@@ -1339,14 +850,14 @@ shrink_zone(struct zone *zone, struct scan_control *sc) | |||
1339 | * Add one to `nr_to_scan' just to make sure that the kernel will | 850 | * Add one to `nr_to_scan' just to make sure that the kernel will |
1340 | * slowly sift through the active list. | 851 | * slowly sift through the active list. |
1341 | */ | 852 | */ |
1342 | zone->nr_scan_active += (zone->nr_active >> sc->priority) + 1; | 853 | zone->nr_scan_active += (zone->nr_active >> priority) + 1; |
1343 | nr_active = zone->nr_scan_active; | 854 | nr_active = zone->nr_scan_active; |
1344 | if (nr_active >= sc->swap_cluster_max) | 855 | if (nr_active >= sc->swap_cluster_max) |
1345 | zone->nr_scan_active = 0; | 856 | zone->nr_scan_active = 0; |
1346 | else | 857 | else |
1347 | nr_active = 0; | 858 | nr_active = 0; |
1348 | 859 | ||
1349 | zone->nr_scan_inactive += (zone->nr_inactive >> sc->priority) + 1; | 860 | zone->nr_scan_inactive += (zone->nr_inactive >> priority) + 1; |
1350 | nr_inactive = zone->nr_scan_inactive; | 861 | nr_inactive = zone->nr_scan_inactive; |
1351 | if (nr_inactive >= sc->swap_cluster_max) | 862 | if (nr_inactive >= sc->swap_cluster_max) |
1352 | zone->nr_scan_inactive = 0; | 863 | zone->nr_scan_inactive = 0; |
@@ -1355,23 +866,25 @@ shrink_zone(struct zone *zone, struct scan_control *sc) | |||
1355 | 866 | ||
1356 | while (nr_active || nr_inactive) { | 867 | while (nr_active || nr_inactive) { |
1357 | if (nr_active) { | 868 | if (nr_active) { |
1358 | sc->nr_to_scan = min(nr_active, | 869 | nr_to_scan = min(nr_active, |
1359 | (unsigned long)sc->swap_cluster_max); | 870 | (unsigned long)sc->swap_cluster_max); |
1360 | nr_active -= sc->nr_to_scan; | 871 | nr_active -= nr_to_scan; |
1361 | refill_inactive_zone(zone, sc); | 872 | shrink_active_list(nr_to_scan, zone, sc); |
1362 | } | 873 | } |
1363 | 874 | ||
1364 | if (nr_inactive) { | 875 | if (nr_inactive) { |
1365 | sc->nr_to_scan = min(nr_inactive, | 876 | nr_to_scan = min(nr_inactive, |
1366 | (unsigned long)sc->swap_cluster_max); | 877 | (unsigned long)sc->swap_cluster_max); |
1367 | nr_inactive -= sc->nr_to_scan; | 878 | nr_inactive -= nr_to_scan; |
1368 | shrink_cache(zone, sc); | 879 | nr_reclaimed += shrink_inactive_list(nr_to_scan, zone, |
880 | sc); | ||
1369 | } | 881 | } |
1370 | } | 882 | } |
1371 | 883 | ||
1372 | throttle_vm_writeout(); | 884 | throttle_vm_writeout(); |
1373 | 885 | ||
1374 | atomic_dec(&zone->reclaim_in_progress); | 886 | atomic_dec(&zone->reclaim_in_progress); |
887 | return nr_reclaimed; | ||
1375 | } | 888 | } |
1376 | 889 | ||
1377 | /* | 890 | /* |
@@ -1390,9 +903,10 @@ shrink_zone(struct zone *zone, struct scan_control *sc) | |||
1390 | * If a zone is deemed to be full of pinned pages then just give it a light | 903 | * If a zone is deemed to be full of pinned pages then just give it a light |
1391 | * scan then give up on it. | 904 | * scan then give up on it. |
1392 | */ | 905 | */ |
1393 | static void | 906 | static unsigned long shrink_zones(int priority, struct zone **zones, |
1394 | shrink_caches(struct zone **zones, struct scan_control *sc) | 907 | struct scan_control *sc) |
1395 | { | 908 | { |
909 | unsigned long nr_reclaimed = 0; | ||
1396 | int i; | 910 | int i; |
1397 | 911 | ||
1398 | for (i = 0; zones[i] != NULL; i++) { | 912 | for (i = 0; zones[i] != NULL; i++) { |
@@ -1404,15 +918,16 @@ shrink_caches(struct zone **zones, struct scan_control *sc) | |||
1404 | if (!cpuset_zone_allowed(zone, __GFP_HARDWALL)) | 918 | if (!cpuset_zone_allowed(zone, __GFP_HARDWALL)) |
1405 | continue; | 919 | continue; |
1406 | 920 | ||
1407 | zone->temp_priority = sc->priority; | 921 | zone->temp_priority = priority; |
1408 | if (zone->prev_priority > sc->priority) | 922 | if (zone->prev_priority > priority) |
1409 | zone->prev_priority = sc->priority; | 923 | zone->prev_priority = priority; |
1410 | 924 | ||
1411 | if (zone->all_unreclaimable && sc->priority != DEF_PRIORITY) | 925 | if (zone->all_unreclaimable && priority != DEF_PRIORITY) |
1412 | continue; /* Let kswapd poll it */ | 926 | continue; /* Let kswapd poll it */ |
1413 | 927 | ||
1414 | shrink_zone(zone, sc); | 928 | nr_reclaimed += shrink_zone(priority, zone, sc); |
1415 | } | 929 | } |
930 | return nr_reclaimed; | ||
1416 | } | 931 | } |
1417 | 932 | ||
1418 | /* | 933 | /* |
@@ -1428,19 +943,21 @@ shrink_caches(struct zone **zones, struct scan_control *sc) | |||
1428 | * holds filesystem locks which prevent writeout this might not work, and the | 943 | * holds filesystem locks which prevent writeout this might not work, and the |
1429 | * allocation attempt will fail. | 944 | * allocation attempt will fail. |
1430 | */ | 945 | */ |
1431 | int try_to_free_pages(struct zone **zones, gfp_t gfp_mask) | 946 | unsigned long try_to_free_pages(struct zone **zones, gfp_t gfp_mask) |
1432 | { | 947 | { |
1433 | int priority; | 948 | int priority; |
1434 | int ret = 0; | 949 | int ret = 0; |
1435 | int total_scanned = 0, total_reclaimed = 0; | 950 | unsigned long total_scanned = 0; |
951 | unsigned long nr_reclaimed = 0; | ||
1436 | struct reclaim_state *reclaim_state = current->reclaim_state; | 952 | struct reclaim_state *reclaim_state = current->reclaim_state; |
1437 | struct scan_control sc; | ||
1438 | unsigned long lru_pages = 0; | 953 | unsigned long lru_pages = 0; |
1439 | int i; | 954 | int i; |
1440 | 955 | struct scan_control sc = { | |
1441 | sc.gfp_mask = gfp_mask; | 956 | .gfp_mask = gfp_mask, |
1442 | sc.may_writepage = !laptop_mode; | 957 | .may_writepage = !laptop_mode, |
1443 | sc.may_swap = 1; | 958 | .swap_cluster_max = SWAP_CLUSTER_MAX, |
959 | .may_swap = 1, | ||
960 | }; | ||
1444 | 961 | ||
1445 | inc_page_state(allocstall); | 962 | inc_page_state(allocstall); |
1446 | 963 | ||
@@ -1457,20 +974,16 @@ int try_to_free_pages(struct zone **zones, gfp_t gfp_mask) | |||
1457 | for (priority = DEF_PRIORITY; priority >= 0; priority--) { | 974 | for (priority = DEF_PRIORITY; priority >= 0; priority--) { |
1458 | sc.nr_mapped = read_page_state(nr_mapped); | 975 | sc.nr_mapped = read_page_state(nr_mapped); |
1459 | sc.nr_scanned = 0; | 976 | sc.nr_scanned = 0; |
1460 | sc.nr_reclaimed = 0; | ||
1461 | sc.priority = priority; | ||
1462 | sc.swap_cluster_max = SWAP_CLUSTER_MAX; | ||
1463 | if (!priority) | 977 | if (!priority) |
1464 | disable_swap_token(); | 978 | disable_swap_token(); |
1465 | shrink_caches(zones, &sc); | 979 | nr_reclaimed += shrink_zones(priority, zones, &sc); |
1466 | shrink_slab(sc.nr_scanned, gfp_mask, lru_pages); | 980 | shrink_slab(sc.nr_scanned, gfp_mask, lru_pages); |
1467 | if (reclaim_state) { | 981 | if (reclaim_state) { |
1468 | sc.nr_reclaimed += reclaim_state->reclaimed_slab; | 982 | nr_reclaimed += reclaim_state->reclaimed_slab; |
1469 | reclaim_state->reclaimed_slab = 0; | 983 | reclaim_state->reclaimed_slab = 0; |
1470 | } | 984 | } |
1471 | total_scanned += sc.nr_scanned; | 985 | total_scanned += sc.nr_scanned; |
1472 | total_reclaimed += sc.nr_reclaimed; | 986 | if (nr_reclaimed >= sc.swap_cluster_max) { |
1473 | if (total_reclaimed >= sc.swap_cluster_max) { | ||
1474 | ret = 1; | 987 | ret = 1; |
1475 | goto out; | 988 | goto out; |
1476 | } | 989 | } |
@@ -1482,7 +995,8 @@ int try_to_free_pages(struct zone **zones, gfp_t gfp_mask) | |||
1482 | * that's undesirable in laptop mode, where we *want* lumpy | 995 | * that's undesirable in laptop mode, where we *want* lumpy |
1483 | * writeout. So in laptop mode, write out the whole world. | 996 | * writeout. So in laptop mode, write out the whole world. |
1484 | */ | 997 | */ |
1485 | if (total_scanned > sc.swap_cluster_max + sc.swap_cluster_max/2) { | 998 | if (total_scanned > sc.swap_cluster_max + |
999 | sc.swap_cluster_max / 2) { | ||
1486 | wakeup_pdflush(laptop_mode ? 0 : total_scanned); | 1000 | wakeup_pdflush(laptop_mode ? 0 : total_scanned); |
1487 | sc.may_writepage = 1; | 1001 | sc.may_writepage = 1; |
1488 | } | 1002 | } |
@@ -1528,22 +1042,26 @@ out: | |||
1528 | * the page allocator fallback scheme to ensure that aging of pages is balanced | 1042 | * the page allocator fallback scheme to ensure that aging of pages is balanced |
1529 | * across the zones. | 1043 | * across the zones. |
1530 | */ | 1044 | */ |
1531 | static int balance_pgdat(pg_data_t *pgdat, int nr_pages, int order) | 1045 | static unsigned long balance_pgdat(pg_data_t *pgdat, unsigned long nr_pages, |
1046 | int order) | ||
1532 | { | 1047 | { |
1533 | int to_free = nr_pages; | 1048 | unsigned long to_free = nr_pages; |
1534 | int all_zones_ok; | 1049 | int all_zones_ok; |
1535 | int priority; | 1050 | int priority; |
1536 | int i; | 1051 | int i; |
1537 | int total_scanned, total_reclaimed; | 1052 | unsigned long total_scanned; |
1053 | unsigned long nr_reclaimed; | ||
1538 | struct reclaim_state *reclaim_state = current->reclaim_state; | 1054 | struct reclaim_state *reclaim_state = current->reclaim_state; |
1539 | struct scan_control sc; | 1055 | struct scan_control sc = { |
1056 | .gfp_mask = GFP_KERNEL, | ||
1057 | .may_swap = 1, | ||
1058 | .swap_cluster_max = nr_pages ? nr_pages : SWAP_CLUSTER_MAX, | ||
1059 | }; | ||
1540 | 1060 | ||
1541 | loop_again: | 1061 | loop_again: |
1542 | total_scanned = 0; | 1062 | total_scanned = 0; |
1543 | total_reclaimed = 0; | 1063 | nr_reclaimed = 0; |
1544 | sc.gfp_mask = GFP_KERNEL; | 1064 | sc.may_writepage = !laptop_mode, |
1545 | sc.may_writepage = !laptop_mode; | ||
1546 | sc.may_swap = 1; | ||
1547 | sc.nr_mapped = read_page_state(nr_mapped); | 1065 | sc.nr_mapped = read_page_state(nr_mapped); |
1548 | 1066 | ||
1549 | inc_page_state(pageoutrun); | 1067 | inc_page_state(pageoutrun); |
@@ -1624,15 +1142,11 @@ scan: | |||
1624 | if (zone->prev_priority > priority) | 1142 | if (zone->prev_priority > priority) |
1625 | zone->prev_priority = priority; | 1143 | zone->prev_priority = priority; |
1626 | sc.nr_scanned = 0; | 1144 | sc.nr_scanned = 0; |
1627 | sc.nr_reclaimed = 0; | 1145 | nr_reclaimed += shrink_zone(priority, zone, &sc); |
1628 | sc.priority = priority; | ||
1629 | sc.swap_cluster_max = nr_pages? nr_pages : SWAP_CLUSTER_MAX; | ||
1630 | shrink_zone(zone, &sc); | ||
1631 | reclaim_state->reclaimed_slab = 0; | 1146 | reclaim_state->reclaimed_slab = 0; |
1632 | nr_slab = shrink_slab(sc.nr_scanned, GFP_KERNEL, | 1147 | nr_slab = shrink_slab(sc.nr_scanned, GFP_KERNEL, |
1633 | lru_pages); | 1148 | lru_pages); |
1634 | sc.nr_reclaimed += reclaim_state->reclaimed_slab; | 1149 | nr_reclaimed += reclaim_state->reclaimed_slab; |
1635 | total_reclaimed += sc.nr_reclaimed; | ||
1636 | total_scanned += sc.nr_scanned; | 1150 | total_scanned += sc.nr_scanned; |
1637 | if (zone->all_unreclaimable) | 1151 | if (zone->all_unreclaimable) |
1638 | continue; | 1152 | continue; |
@@ -1645,10 +1159,10 @@ scan: | |||
1645 | * even in laptop mode | 1159 | * even in laptop mode |
1646 | */ | 1160 | */ |
1647 | if (total_scanned > SWAP_CLUSTER_MAX * 2 && | 1161 | if (total_scanned > SWAP_CLUSTER_MAX * 2 && |
1648 | total_scanned > total_reclaimed+total_reclaimed/2) | 1162 | total_scanned > nr_reclaimed + nr_reclaimed / 2) |
1649 | sc.may_writepage = 1; | 1163 | sc.may_writepage = 1; |
1650 | } | 1164 | } |
1651 | if (nr_pages && to_free > total_reclaimed) | 1165 | if (nr_pages && to_free > nr_reclaimed) |
1652 | continue; /* swsusp: need to do more work */ | 1166 | continue; /* swsusp: need to do more work */ |
1653 | if (all_zones_ok) | 1167 | if (all_zones_ok) |
1654 | break; /* kswapd: all done */ | 1168 | break; /* kswapd: all done */ |
@@ -1665,7 +1179,7 @@ scan: | |||
1665 | * matches the direct reclaim path behaviour in terms of impact | 1179 | * matches the direct reclaim path behaviour in terms of impact |
1666 | * on zone->*_priority. | 1180 | * on zone->*_priority. |
1667 | */ | 1181 | */ |
1668 | if ((total_reclaimed >= SWAP_CLUSTER_MAX) && (!nr_pages)) | 1182 | if ((nr_reclaimed >= SWAP_CLUSTER_MAX) && !nr_pages) |
1669 | break; | 1183 | break; |
1670 | } | 1184 | } |
1671 | out: | 1185 | out: |
@@ -1679,7 +1193,7 @@ out: | |||
1679 | goto loop_again; | 1193 | goto loop_again; |
1680 | } | 1194 | } |
1681 | 1195 | ||
1682 | return total_reclaimed; | 1196 | return nr_reclaimed; |
1683 | } | 1197 | } |
1684 | 1198 | ||
1685 | /* | 1199 | /* |
@@ -1779,24 +1293,31 @@ void wakeup_kswapd(struct zone *zone, int order) | |||
1779 | * Try to free `nr_pages' of memory, system-wide. Returns the number of freed | 1293 | * Try to free `nr_pages' of memory, system-wide. Returns the number of freed |
1780 | * pages. | 1294 | * pages. |
1781 | */ | 1295 | */ |
1782 | int shrink_all_memory(int nr_pages) | 1296 | unsigned long shrink_all_memory(unsigned long nr_pages) |
1783 | { | 1297 | { |
1784 | pg_data_t *pgdat; | 1298 | pg_data_t *pgdat; |
1785 | int nr_to_free = nr_pages; | 1299 | unsigned long nr_to_free = nr_pages; |
1786 | int ret = 0; | 1300 | unsigned long ret = 0; |
1301 | unsigned retry = 2; | ||
1787 | struct reclaim_state reclaim_state = { | 1302 | struct reclaim_state reclaim_state = { |
1788 | .reclaimed_slab = 0, | 1303 | .reclaimed_slab = 0, |
1789 | }; | 1304 | }; |
1790 | 1305 | ||
1791 | current->reclaim_state = &reclaim_state; | 1306 | current->reclaim_state = &reclaim_state; |
1307 | repeat: | ||
1792 | for_each_pgdat(pgdat) { | 1308 | for_each_pgdat(pgdat) { |
1793 | int freed; | 1309 | unsigned long freed; |
1310 | |||
1794 | freed = balance_pgdat(pgdat, nr_to_free, 0); | 1311 | freed = balance_pgdat(pgdat, nr_to_free, 0); |
1795 | ret += freed; | 1312 | ret += freed; |
1796 | nr_to_free -= freed; | 1313 | nr_to_free -= freed; |
1797 | if (nr_to_free <= 0) | 1314 | if ((long)nr_to_free <= 0) |
1798 | break; | 1315 | break; |
1799 | } | 1316 | } |
1317 | if (retry-- && ret < nr_pages) { | ||
1318 | blk_congestion_wait(WRITE, HZ/5); | ||
1319 | goto repeat; | ||
1320 | } | ||
1800 | current->reclaim_state = NULL; | 1321 | current->reclaim_state = NULL; |
1801 | return ret; | 1322 | return ret; |
1802 | } | 1323 | } |
@@ -1808,8 +1329,7 @@ int shrink_all_memory(int nr_pages) | |||
1808 | away, we get changed to run anywhere: as the first one comes back, | 1329 | away, we get changed to run anywhere: as the first one comes back, |
1809 | restore their cpu bindings. */ | 1330 | restore their cpu bindings. */ |
1810 | static int __devinit cpu_callback(struct notifier_block *nfb, | 1331 | static int __devinit cpu_callback(struct notifier_block *nfb, |
1811 | unsigned long action, | 1332 | unsigned long action, void *hcpu) |
1812 | void *hcpu) | ||
1813 | { | 1333 | { |
1814 | pg_data_t *pgdat; | 1334 | pg_data_t *pgdat; |
1815 | cpumask_t mask; | 1335 | cpumask_t mask; |
@@ -1829,10 +1349,15 @@ static int __devinit cpu_callback(struct notifier_block *nfb, | |||
1829 | static int __init kswapd_init(void) | 1349 | static int __init kswapd_init(void) |
1830 | { | 1350 | { |
1831 | pg_data_t *pgdat; | 1351 | pg_data_t *pgdat; |
1352 | |||
1832 | swap_setup(); | 1353 | swap_setup(); |
1833 | for_each_pgdat(pgdat) | 1354 | for_each_pgdat(pgdat) { |
1834 | pgdat->kswapd | 1355 | pid_t pid; |
1835 | = find_task_by_pid(kernel_thread(kswapd, pgdat, CLONE_KERNEL)); | 1356 | |
1357 | pid = kernel_thread(kswapd, pgdat, CLONE_KERNEL); | ||
1358 | BUG_ON(pid < 0); | ||
1359 | pgdat->kswapd = find_task_by_pid(pid); | ||
1360 | } | ||
1836 | total_memory = nr_free_pagecache_pages(); | 1361 | total_memory = nr_free_pagecache_pages(); |
1837 | hotcpu_notifier(cpu_callback, 0); | 1362 | hotcpu_notifier(cpu_callback, 0); |
1838 | return 0; | 1363 | return 0; |
@@ -1874,46 +1399,24 @@ int zone_reclaim_interval __read_mostly = 30*HZ; | |||
1874 | /* | 1399 | /* |
1875 | * Try to free up some pages from this zone through reclaim. | 1400 | * Try to free up some pages from this zone through reclaim. |
1876 | */ | 1401 | */ |
1877 | int zone_reclaim(struct zone *zone, gfp_t gfp_mask, unsigned int order) | 1402 | static int __zone_reclaim(struct zone *zone, gfp_t gfp_mask, unsigned int order) |
1878 | { | 1403 | { |
1879 | int nr_pages; | 1404 | /* Minimum pages needed in order to stay on node */ |
1405 | const unsigned long nr_pages = 1 << order; | ||
1880 | struct task_struct *p = current; | 1406 | struct task_struct *p = current; |
1881 | struct reclaim_state reclaim_state; | 1407 | struct reclaim_state reclaim_state; |
1882 | struct scan_control sc; | 1408 | int priority; |
1883 | cpumask_t mask; | 1409 | unsigned long nr_reclaimed = 0; |
1884 | int node_id; | 1410 | struct scan_control sc = { |
1885 | 1411 | .may_writepage = !!(zone_reclaim_mode & RECLAIM_WRITE), | |
1886 | if (time_before(jiffies, | 1412 | .may_swap = !!(zone_reclaim_mode & RECLAIM_SWAP), |
1887 | zone->last_unsuccessful_zone_reclaim + zone_reclaim_interval)) | 1413 | .nr_mapped = read_page_state(nr_mapped), |
1888 | return 0; | 1414 | .swap_cluster_max = max_t(unsigned long, nr_pages, |
1889 | 1415 | SWAP_CLUSTER_MAX), | |
1890 | if (!(gfp_mask & __GFP_WAIT) || | 1416 | .gfp_mask = gfp_mask, |
1891 | zone->all_unreclaimable || | 1417 | }; |
1892 | atomic_read(&zone->reclaim_in_progress) > 0 || | ||
1893 | (p->flags & PF_MEMALLOC)) | ||
1894 | return 0; | ||
1895 | |||
1896 | node_id = zone->zone_pgdat->node_id; | ||
1897 | mask = node_to_cpumask(node_id); | ||
1898 | if (!cpus_empty(mask) && node_id != numa_node_id()) | ||
1899 | return 0; | ||
1900 | |||
1901 | sc.may_writepage = !!(zone_reclaim_mode & RECLAIM_WRITE); | ||
1902 | sc.may_swap = !!(zone_reclaim_mode & RECLAIM_SWAP); | ||
1903 | sc.nr_scanned = 0; | ||
1904 | sc.nr_reclaimed = 0; | ||
1905 | sc.priority = ZONE_RECLAIM_PRIORITY + 1; | ||
1906 | sc.nr_mapped = read_page_state(nr_mapped); | ||
1907 | sc.gfp_mask = gfp_mask; | ||
1908 | 1418 | ||
1909 | disable_swap_token(); | 1419 | disable_swap_token(); |
1910 | |||
1911 | nr_pages = 1 << order; | ||
1912 | if (nr_pages > SWAP_CLUSTER_MAX) | ||
1913 | sc.swap_cluster_max = nr_pages; | ||
1914 | else | ||
1915 | sc.swap_cluster_max = SWAP_CLUSTER_MAX; | ||
1916 | |||
1917 | cond_resched(); | 1420 | cond_resched(); |
1918 | /* | 1421 | /* |
1919 | * We need to be able to allocate from the reserves for RECLAIM_SWAP | 1422 | * We need to be able to allocate from the reserves for RECLAIM_SWAP |
@@ -1928,17 +1431,20 @@ int zone_reclaim(struct zone *zone, gfp_t gfp_mask, unsigned int order) | |||
1928 | * Free memory by calling shrink zone with increasing priorities | 1431 | * Free memory by calling shrink zone with increasing priorities |
1929 | * until we have enough memory freed. | 1432 | * until we have enough memory freed. |
1930 | */ | 1433 | */ |
1434 | priority = ZONE_RECLAIM_PRIORITY; | ||
1931 | do { | 1435 | do { |
1932 | sc.priority--; | 1436 | nr_reclaimed += shrink_zone(priority, zone, &sc); |
1933 | shrink_zone(zone, &sc); | 1437 | priority--; |
1438 | } while (priority >= 0 && nr_reclaimed < nr_pages); | ||
1934 | 1439 | ||
1935 | } while (sc.nr_reclaimed < nr_pages && sc.priority > 0); | 1440 | if (nr_reclaimed < nr_pages && (zone_reclaim_mode & RECLAIM_SLAB)) { |
1936 | |||
1937 | if (sc.nr_reclaimed < nr_pages && (zone_reclaim_mode & RECLAIM_SLAB)) { | ||
1938 | /* | 1441 | /* |
1939 | * shrink_slab does not currently allow us to determine | 1442 | * shrink_slab() does not currently allow us to determine how |
1940 | * how many pages were freed in the zone. So we just | 1443 | * many pages were freed in this zone. So we just shake the slab |
1941 | * shake the slab and then go offnode for a single allocation. | 1444 | * a bit and then go off node for this particular allocation |
1445 | * despite possibly having freed enough memory to allocate in | ||
1446 | * this zone. If we freed local memory then the next | ||
1447 | * allocations will be local again. | ||
1942 | * | 1448 | * |
1943 | * shrink_slab will free memory on all zones and may take | 1449 | * shrink_slab will free memory on all zones and may take |
1944 | * a long time. | 1450 | * a long time. |
@@ -1949,10 +1455,54 @@ int zone_reclaim(struct zone *zone, gfp_t gfp_mask, unsigned int order) | |||
1949 | p->reclaim_state = NULL; | 1455 | p->reclaim_state = NULL; |
1950 | current->flags &= ~(PF_MEMALLOC | PF_SWAPWRITE); | 1456 | current->flags &= ~(PF_MEMALLOC | PF_SWAPWRITE); |
1951 | 1457 | ||
1952 | if (sc.nr_reclaimed == 0) | 1458 | if (nr_reclaimed == 0) { |
1459 | /* | ||
1460 | * We were unable to reclaim enough pages to stay on node. We | ||
1461 | * now allow off node accesses for a certain time period before | ||
1462 | * trying again to reclaim pages from the local zone. | ||
1463 | */ | ||
1953 | zone->last_unsuccessful_zone_reclaim = jiffies; | 1464 | zone->last_unsuccessful_zone_reclaim = jiffies; |
1465 | } | ||
1954 | 1466 | ||
1955 | return sc.nr_reclaimed >= nr_pages; | 1467 | return nr_reclaimed >= nr_pages; |
1956 | } | 1468 | } |
1957 | #endif | ||
1958 | 1469 | ||
1470 | int zone_reclaim(struct zone *zone, gfp_t gfp_mask, unsigned int order) | ||
1471 | { | ||
1472 | cpumask_t mask; | ||
1473 | int node_id; | ||
1474 | |||
1475 | /* | ||
1476 | * Do not reclaim if there was a recent unsuccessful attempt at zone | ||
1477 | * reclaim. In that case we let allocations go off node for the | ||
1478 | * zone_reclaim_interval. Otherwise we would scan for each off-node | ||
1479 | * page allocation. | ||
1480 | */ | ||
1481 | if (time_before(jiffies, | ||
1482 | zone->last_unsuccessful_zone_reclaim + zone_reclaim_interval)) | ||
1483 | return 0; | ||
1484 | |||
1485 | /* | ||
1486 | * Avoid concurrent zone reclaims, do not reclaim in a zone that does | ||
1487 | * not have reclaimable pages and if we should not delay the allocation | ||
1488 | * then do not scan. | ||
1489 | */ | ||
1490 | if (!(gfp_mask & __GFP_WAIT) || | ||
1491 | zone->all_unreclaimable || | ||
1492 | atomic_read(&zone->reclaim_in_progress) > 0 || | ||
1493 | (current->flags & PF_MEMALLOC)) | ||
1494 | return 0; | ||
1495 | |||
1496 | /* | ||
1497 | * Only run zone reclaim on the local zone or on zones that do not | ||
1498 | * have associated processors. This will favor the local processor | ||
1499 | * over remote processors and spread off node memory allocations | ||
1500 | * as wide as possible. | ||
1501 | */ | ||
1502 | node_id = zone->zone_pgdat->node_id; | ||
1503 | mask = node_to_cpumask(node_id); | ||
1504 | if (!cpus_empty(mask) && node_id != numa_node_id()) | ||
1505 | return 0; | ||
1506 | return __zone_reclaim(zone, gfp_mask, order); | ||
1507 | } | ||
1508 | #endif | ||
diff --git a/security/keys/process_keys.c b/security/keys/process_keys.c index 74cb79eb917e..f6940618e345 100644 --- a/security/keys/process_keys.c +++ b/security/keys/process_keys.c | |||
@@ -16,11 +16,12 @@ | |||
16 | #include <linux/keyctl.h> | 16 | #include <linux/keyctl.h> |
17 | #include <linux/fs.h> | 17 | #include <linux/fs.h> |
18 | #include <linux/err.h> | 18 | #include <linux/err.h> |
19 | #include <linux/mutex.h> | ||
19 | #include <asm/uaccess.h> | 20 | #include <asm/uaccess.h> |
20 | #include "internal.h" | 21 | #include "internal.h" |
21 | 22 | ||
22 | /* session keyring create vs join semaphore */ | 23 | /* session keyring create vs join semaphore */ |
23 | static DECLARE_MUTEX(key_session_sem); | 24 | static DEFINE_MUTEX(key_session_mutex); |
24 | 25 | ||
25 | /* the root user's tracking struct */ | 26 | /* the root user's tracking struct */ |
26 | struct key_user root_key_user = { | 27 | struct key_user root_key_user = { |
@@ -711,7 +712,7 @@ long join_session_keyring(const char *name) | |||
711 | } | 712 | } |
712 | 713 | ||
713 | /* allow the user to join or create a named keyring */ | 714 | /* allow the user to join or create a named keyring */ |
714 | down(&key_session_sem); | 715 | mutex_lock(&key_session_mutex); |
715 | 716 | ||
716 | /* look for an existing keyring of this name */ | 717 | /* look for an existing keyring of this name */ |
717 | keyring = find_keyring_by_name(name, 0); | 718 | keyring = find_keyring_by_name(name, 0); |
@@ -737,7 +738,7 @@ long join_session_keyring(const char *name) | |||
737 | key_put(keyring); | 738 | key_put(keyring); |
738 | 739 | ||
739 | error2: | 740 | error2: |
740 | up(&key_session_sem); | 741 | mutex_unlock(&key_session_mutex); |
741 | error: | 742 | error: |
742 | return ret; | 743 | return ret; |
743 | 744 | ||
diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c index 5b16196f2823..ccaf988f3729 100644 --- a/security/selinux/hooks.c +++ b/security/selinux/hooks.c | |||
@@ -117,6 +117,8 @@ static struct security_operations *secondary_ops = NULL; | |||
117 | static LIST_HEAD(superblock_security_head); | 117 | static LIST_HEAD(superblock_security_head); |
118 | static DEFINE_SPINLOCK(sb_security_lock); | 118 | static DEFINE_SPINLOCK(sb_security_lock); |
119 | 119 | ||
120 | static kmem_cache_t *sel_inode_cache; | ||
121 | |||
120 | /* Allocate and free functions for each kind of security blob. */ | 122 | /* Allocate and free functions for each kind of security blob. */ |
121 | 123 | ||
122 | static int task_alloc_security(struct task_struct *task) | 124 | static int task_alloc_security(struct task_struct *task) |
@@ -146,10 +148,11 @@ static int inode_alloc_security(struct inode *inode) | |||
146 | struct task_security_struct *tsec = current->security; | 148 | struct task_security_struct *tsec = current->security; |
147 | struct inode_security_struct *isec; | 149 | struct inode_security_struct *isec; |
148 | 150 | ||
149 | isec = kzalloc(sizeof(struct inode_security_struct), GFP_KERNEL); | 151 | isec = kmem_cache_alloc(sel_inode_cache, SLAB_KERNEL); |
150 | if (!isec) | 152 | if (!isec) |
151 | return -ENOMEM; | 153 | return -ENOMEM; |
152 | 154 | ||
155 | memset(isec, 0, sizeof(*isec)); | ||
153 | init_MUTEX(&isec->sem); | 156 | init_MUTEX(&isec->sem); |
154 | INIT_LIST_HEAD(&isec->list); | 157 | INIT_LIST_HEAD(&isec->list); |
155 | isec->inode = inode; | 158 | isec->inode = inode; |
@@ -172,7 +175,7 @@ static void inode_free_security(struct inode *inode) | |||
172 | spin_unlock(&sbsec->isec_lock); | 175 | spin_unlock(&sbsec->isec_lock); |
173 | 176 | ||
174 | inode->i_security = NULL; | 177 | inode->i_security = NULL; |
175 | kfree(isec); | 178 | kmem_cache_free(sel_inode_cache, isec); |
176 | } | 179 | } |
177 | 180 | ||
178 | static int file_alloc_security(struct file *file) | 181 | static int file_alloc_security(struct file *file) |
@@ -1929,7 +1932,6 @@ static int selinux_inode_init_security(struct inode *inode, struct inode *dir, | |||
1929 | struct task_security_struct *tsec; | 1932 | struct task_security_struct *tsec; |
1930 | struct inode_security_struct *dsec; | 1933 | struct inode_security_struct *dsec; |
1931 | struct superblock_security_struct *sbsec; | 1934 | struct superblock_security_struct *sbsec; |
1932 | struct inode_security_struct *isec; | ||
1933 | u32 newsid, clen; | 1935 | u32 newsid, clen; |
1934 | int rc; | 1936 | int rc; |
1935 | char *namep = NULL, *context; | 1937 | char *namep = NULL, *context; |
@@ -1937,7 +1939,6 @@ static int selinux_inode_init_security(struct inode *inode, struct inode *dir, | |||
1937 | tsec = current->security; | 1939 | tsec = current->security; |
1938 | dsec = dir->i_security; | 1940 | dsec = dir->i_security; |
1939 | sbsec = dir->i_sb->s_security; | 1941 | sbsec = dir->i_sb->s_security; |
1940 | isec = inode->i_security; | ||
1941 | 1942 | ||
1942 | if (tsec->create_sid && sbsec->behavior != SECURITY_FS_USE_MNTPOINT) { | 1943 | if (tsec->create_sid && sbsec->behavior != SECURITY_FS_USE_MNTPOINT) { |
1943 | newsid = tsec->create_sid; | 1944 | newsid = tsec->create_sid; |
@@ -1957,7 +1958,7 @@ static int selinux_inode_init_security(struct inode *inode, struct inode *dir, | |||
1957 | 1958 | ||
1958 | inode_security_set_sid(inode, newsid); | 1959 | inode_security_set_sid(inode, newsid); |
1959 | 1960 | ||
1960 | if (sbsec->behavior == SECURITY_FS_USE_MNTPOINT) | 1961 | if (!ss_initialized || sbsec->behavior == SECURITY_FS_USE_MNTPOINT) |
1961 | return -EOPNOTSUPP; | 1962 | return -EOPNOTSUPP; |
1962 | 1963 | ||
1963 | if (name) { | 1964 | if (name) { |
@@ -4408,6 +4409,9 @@ static __init int selinux_init(void) | |||
4408 | tsec = current->security; | 4409 | tsec = current->security; |
4409 | tsec->osid = tsec->sid = SECINITSID_KERNEL; | 4410 | tsec->osid = tsec->sid = SECINITSID_KERNEL; |
4410 | 4411 | ||
4412 | sel_inode_cache = kmem_cache_create("selinux_inode_security", | ||
4413 | sizeof(struct inode_security_struct), | ||
4414 | 0, SLAB_PANIC, NULL, NULL); | ||
4411 | avc_init(); | 4415 | avc_init(); |
4412 | 4416 | ||
4413 | original_ops = secondary_ops = security_ops; | 4417 | original_ops = secondary_ops = security_ops; |
diff --git a/security/selinux/selinuxfs.c b/security/selinux/selinuxfs.c index b5fa02d17b1e..f5d78365488f 100644 --- a/security/selinux/selinuxfs.c +++ b/security/selinux/selinuxfs.c | |||
@@ -15,6 +15,7 @@ | |||
15 | #include <linux/slab.h> | 15 | #include <linux/slab.h> |
16 | #include <linux/vmalloc.h> | 16 | #include <linux/vmalloc.h> |
17 | #include <linux/fs.h> | 17 | #include <linux/fs.h> |
18 | #include <linux/mutex.h> | ||
18 | #include <linux/init.h> | 19 | #include <linux/init.h> |
19 | #include <linux/string.h> | 20 | #include <linux/string.h> |
20 | #include <linux/security.h> | 21 | #include <linux/security.h> |
@@ -44,7 +45,7 @@ static int __init checkreqprot_setup(char *str) | |||
44 | __setup("checkreqprot=", checkreqprot_setup); | 45 | __setup("checkreqprot=", checkreqprot_setup); |
45 | 46 | ||
46 | 47 | ||
47 | static DECLARE_MUTEX(sel_sem); | 48 | static DEFINE_MUTEX(sel_mutex); |
48 | 49 | ||
49 | /* global data for booleans */ | 50 | /* global data for booleans */ |
50 | static struct dentry *bool_dir = NULL; | 51 | static struct dentry *bool_dir = NULL; |
@@ -230,7 +231,7 @@ static ssize_t sel_write_load(struct file * file, const char __user * buf, | |||
230 | ssize_t length; | 231 | ssize_t length; |
231 | void *data = NULL; | 232 | void *data = NULL; |
232 | 233 | ||
233 | down(&sel_sem); | 234 | mutex_lock(&sel_mutex); |
234 | 235 | ||
235 | length = task_has_security(current, SECURITY__LOAD_POLICY); | 236 | length = task_has_security(current, SECURITY__LOAD_POLICY); |
236 | if (length) | 237 | if (length) |
@@ -262,7 +263,7 @@ static ssize_t sel_write_load(struct file * file, const char __user * buf, | |||
262 | else | 263 | else |
263 | length = count; | 264 | length = count; |
264 | out: | 265 | out: |
265 | up(&sel_sem); | 266 | mutex_unlock(&sel_mutex); |
266 | vfree(data); | 267 | vfree(data); |
267 | return length; | 268 | return length; |
268 | } | 269 | } |
@@ -709,12 +710,11 @@ static ssize_t sel_read_bool(struct file *filep, char __user *buf, | |||
709 | { | 710 | { |
710 | char *page = NULL; | 711 | char *page = NULL; |
711 | ssize_t length; | 712 | ssize_t length; |
712 | ssize_t end; | ||
713 | ssize_t ret; | 713 | ssize_t ret; |
714 | int cur_enforcing; | 714 | int cur_enforcing; |
715 | struct inode *inode; | 715 | struct inode *inode; |
716 | 716 | ||
717 | down(&sel_sem); | 717 | mutex_lock(&sel_mutex); |
718 | 718 | ||
719 | ret = -EFAULT; | 719 | ret = -EFAULT; |
720 | 720 | ||
@@ -740,26 +740,9 @@ static ssize_t sel_read_bool(struct file *filep, char __user *buf, | |||
740 | 740 | ||
741 | length = scnprintf(page, PAGE_SIZE, "%d %d", cur_enforcing, | 741 | length = scnprintf(page, PAGE_SIZE, "%d %d", cur_enforcing, |
742 | bool_pending_values[inode->i_ino - BOOL_INO_OFFSET]); | 742 | bool_pending_values[inode->i_ino - BOOL_INO_OFFSET]); |
743 | if (length < 0) { | 743 | ret = simple_read_from_buffer(buf, count, ppos, page, length); |
744 | ret = length; | ||
745 | goto out; | ||
746 | } | ||
747 | |||
748 | if (*ppos >= length) { | ||
749 | ret = 0; | ||
750 | goto out; | ||
751 | } | ||
752 | if (count + *ppos > length) | ||
753 | count = length - *ppos; | ||
754 | end = count + *ppos; | ||
755 | if (copy_to_user(buf, (char *) page + *ppos, count)) { | ||
756 | ret = -EFAULT; | ||
757 | goto out; | ||
758 | } | ||
759 | *ppos = end; | ||
760 | ret = count; | ||
761 | out: | 744 | out: |
762 | up(&sel_sem); | 745 | mutex_unlock(&sel_mutex); |
763 | if (page) | 746 | if (page) |
764 | free_page((unsigned long)page); | 747 | free_page((unsigned long)page); |
765 | return ret; | 748 | return ret; |
@@ -773,7 +756,7 @@ static ssize_t sel_write_bool(struct file *filep, const char __user *buf, | |||
773 | int new_value; | 756 | int new_value; |
774 | struct inode *inode; | 757 | struct inode *inode; |
775 | 758 | ||
776 | down(&sel_sem); | 759 | mutex_lock(&sel_mutex); |
777 | 760 | ||
778 | length = task_has_security(current, SECURITY__SETBOOL); | 761 | length = task_has_security(current, SECURITY__SETBOOL); |
779 | if (length) | 762 | if (length) |
@@ -812,7 +795,7 @@ static ssize_t sel_write_bool(struct file *filep, const char __user *buf, | |||
812 | length = count; | 795 | length = count; |
813 | 796 | ||
814 | out: | 797 | out: |
815 | up(&sel_sem); | 798 | mutex_unlock(&sel_mutex); |
816 | if (page) | 799 | if (page) |
817 | free_page((unsigned long) page); | 800 | free_page((unsigned long) page); |
818 | return length; | 801 | return length; |
@@ -831,7 +814,7 @@ static ssize_t sel_commit_bools_write(struct file *filep, | |||
831 | ssize_t length = -EFAULT; | 814 | ssize_t length = -EFAULT; |
832 | int new_value; | 815 | int new_value; |
833 | 816 | ||
834 | down(&sel_sem); | 817 | mutex_lock(&sel_mutex); |
835 | 818 | ||
836 | length = task_has_security(current, SECURITY__SETBOOL); | 819 | length = task_has_security(current, SECURITY__SETBOOL); |
837 | if (length) | 820 | if (length) |
@@ -869,7 +852,7 @@ static ssize_t sel_commit_bools_write(struct file *filep, | |||
869 | length = count; | 852 | length = count; |
870 | 853 | ||
871 | out: | 854 | out: |
872 | up(&sel_sem); | 855 | mutex_unlock(&sel_mutex); |
873 | if (page) | 856 | if (page) |
874 | free_page((unsigned long) page); | 857 | free_page((unsigned long) page); |
875 | return length; | 858 | return length; |
@@ -987,7 +970,7 @@ out: | |||
987 | return ret; | 970 | return ret; |
988 | err: | 971 | err: |
989 | kfree(values); | 972 | kfree(values); |
990 | d_genocide(dir); | 973 | sel_remove_bools(dir); |
991 | ret = -ENOMEM; | 974 | ret = -ENOMEM; |
992 | goto out; | 975 | goto out; |
993 | } | 976 | } |
@@ -1168,37 +1151,38 @@ static int sel_make_avc_files(struct dentry *dir) | |||
1168 | dentry = d_alloc_name(dir, files[i].name); | 1151 | dentry = d_alloc_name(dir, files[i].name); |
1169 | if (!dentry) { | 1152 | if (!dentry) { |
1170 | ret = -ENOMEM; | 1153 | ret = -ENOMEM; |
1171 | goto err; | 1154 | goto out; |
1172 | } | 1155 | } |
1173 | 1156 | ||
1174 | inode = sel_make_inode(dir->d_sb, S_IFREG|files[i].mode); | 1157 | inode = sel_make_inode(dir->d_sb, S_IFREG|files[i].mode); |
1175 | if (!inode) { | 1158 | if (!inode) { |
1176 | ret = -ENOMEM; | 1159 | ret = -ENOMEM; |
1177 | goto err; | 1160 | goto out; |
1178 | } | 1161 | } |
1179 | inode->i_fop = files[i].ops; | 1162 | inode->i_fop = files[i].ops; |
1180 | d_add(dentry, inode); | 1163 | d_add(dentry, inode); |
1181 | } | 1164 | } |
1182 | out: | 1165 | out: |
1183 | return ret; | 1166 | return ret; |
1184 | err: | ||
1185 | d_genocide(dir); | ||
1186 | goto out; | ||
1187 | } | 1167 | } |
1188 | 1168 | ||
1189 | static int sel_make_dir(struct super_block *sb, struct dentry *dentry) | 1169 | static int sel_make_dir(struct inode *dir, struct dentry *dentry) |
1190 | { | 1170 | { |
1191 | int ret = 0; | 1171 | int ret = 0; |
1192 | struct inode *inode; | 1172 | struct inode *inode; |
1193 | 1173 | ||
1194 | inode = sel_make_inode(sb, S_IFDIR | S_IRUGO | S_IXUGO); | 1174 | inode = sel_make_inode(dir->i_sb, S_IFDIR | S_IRUGO | S_IXUGO); |
1195 | if (!inode) { | 1175 | if (!inode) { |
1196 | ret = -ENOMEM; | 1176 | ret = -ENOMEM; |
1197 | goto out; | 1177 | goto out; |
1198 | } | 1178 | } |
1199 | inode->i_op = &simple_dir_inode_operations; | 1179 | inode->i_op = &simple_dir_inode_operations; |
1200 | inode->i_fop = &simple_dir_operations; | 1180 | inode->i_fop = &simple_dir_operations; |
1181 | /* directory inodes start off with i_nlink == 2 (for "." entry) */ | ||
1182 | inode->i_nlink++; | ||
1201 | d_add(dentry, inode); | 1183 | d_add(dentry, inode); |
1184 | /* bump link count on parent directory, too */ | ||
1185 | dir->i_nlink++; | ||
1202 | out: | 1186 | out: |
1203 | return ret; | 1187 | return ret; |
1204 | } | 1188 | } |
@@ -1207,7 +1191,7 @@ static int sel_fill_super(struct super_block * sb, void * data, int silent) | |||
1207 | { | 1191 | { |
1208 | int ret; | 1192 | int ret; |
1209 | struct dentry *dentry; | 1193 | struct dentry *dentry; |
1210 | struct inode *inode; | 1194 | struct inode *inode, *root_inode; |
1211 | struct inode_security_struct *isec; | 1195 | struct inode_security_struct *isec; |
1212 | 1196 | ||
1213 | static struct tree_descr selinux_files[] = { | 1197 | static struct tree_descr selinux_files[] = { |
@@ -1228,30 +1212,33 @@ static int sel_fill_super(struct super_block * sb, void * data, int silent) | |||
1228 | }; | 1212 | }; |
1229 | ret = simple_fill_super(sb, SELINUX_MAGIC, selinux_files); | 1213 | ret = simple_fill_super(sb, SELINUX_MAGIC, selinux_files); |
1230 | if (ret) | 1214 | if (ret) |
1231 | return ret; | 1215 | goto err; |
1216 | |||
1217 | root_inode = sb->s_root->d_inode; | ||
1232 | 1218 | ||
1233 | dentry = d_alloc_name(sb->s_root, BOOL_DIR_NAME); | 1219 | dentry = d_alloc_name(sb->s_root, BOOL_DIR_NAME); |
1234 | if (!dentry) | 1220 | if (!dentry) { |
1235 | return -ENOMEM; | 1221 | ret = -ENOMEM; |
1222 | goto err; | ||
1223 | } | ||
1236 | 1224 | ||
1237 | inode = sel_make_inode(sb, S_IFDIR | S_IRUGO | S_IXUGO); | 1225 | ret = sel_make_dir(root_inode, dentry); |
1238 | if (!inode) | ||
1239 | goto out; | ||
1240 | inode->i_op = &simple_dir_inode_operations; | ||
1241 | inode->i_fop = &simple_dir_operations; | ||
1242 | d_add(dentry, inode); | ||
1243 | bool_dir = dentry; | ||
1244 | ret = sel_make_bools(); | ||
1245 | if (ret) | 1226 | if (ret) |
1246 | goto out; | 1227 | goto err; |
1228 | |||
1229 | bool_dir = dentry; | ||
1247 | 1230 | ||
1248 | dentry = d_alloc_name(sb->s_root, NULL_FILE_NAME); | 1231 | dentry = d_alloc_name(sb->s_root, NULL_FILE_NAME); |
1249 | if (!dentry) | 1232 | if (!dentry) { |
1250 | return -ENOMEM; | 1233 | ret = -ENOMEM; |
1234 | goto err; | ||
1235 | } | ||
1251 | 1236 | ||
1252 | inode = sel_make_inode(sb, S_IFCHR | S_IRUGO | S_IWUGO); | 1237 | inode = sel_make_inode(sb, S_IFCHR | S_IRUGO | S_IWUGO); |
1253 | if (!inode) | 1238 | if (!inode) { |
1254 | goto out; | 1239 | ret = -ENOMEM; |
1240 | goto err; | ||
1241 | } | ||
1255 | isec = (struct inode_security_struct*)inode->i_security; | 1242 | isec = (struct inode_security_struct*)inode->i_security; |
1256 | isec->sid = SECINITSID_DEVNULL; | 1243 | isec->sid = SECINITSID_DEVNULL; |
1257 | isec->sclass = SECCLASS_CHR_FILE; | 1244 | isec->sclass = SECCLASS_CHR_FILE; |
@@ -1262,22 +1249,23 @@ static int sel_fill_super(struct super_block * sb, void * data, int silent) | |||
1262 | selinux_null = dentry; | 1249 | selinux_null = dentry; |
1263 | 1250 | ||
1264 | dentry = d_alloc_name(sb->s_root, "avc"); | 1251 | dentry = d_alloc_name(sb->s_root, "avc"); |
1265 | if (!dentry) | 1252 | if (!dentry) { |
1266 | return -ENOMEM; | 1253 | ret = -ENOMEM; |
1254 | goto err; | ||
1255 | } | ||
1267 | 1256 | ||
1268 | ret = sel_make_dir(sb, dentry); | 1257 | ret = sel_make_dir(root_inode, dentry); |
1269 | if (ret) | 1258 | if (ret) |
1270 | goto out; | 1259 | goto err; |
1271 | 1260 | ||
1272 | ret = sel_make_avc_files(dentry); | 1261 | ret = sel_make_avc_files(dentry); |
1273 | if (ret) | 1262 | if (ret) |
1274 | goto out; | 1263 | goto err; |
1275 | |||
1276 | return 0; | ||
1277 | out: | 1264 | out: |
1278 | dput(dentry); | 1265 | return ret; |
1266 | err: | ||
1279 | printk(KERN_ERR "%s: failed while creating inodes\n", __FUNCTION__); | 1267 | printk(KERN_ERR "%s: failed while creating inodes\n", __FUNCTION__); |
1280 | return -ENOMEM; | 1268 | goto out; |
1281 | } | 1269 | } |
1282 | 1270 | ||
1283 | static struct super_block *sel_get_sb(struct file_system_type *fs_type, | 1271 | static struct super_block *sel_get_sb(struct file_system_type *fs_type, |
diff --git a/security/selinux/ss/services.c b/security/selinux/ss/services.c index 8a764928ff4b..63e0b7f29cb5 100644 --- a/security/selinux/ss/services.c +++ b/security/selinux/ss/services.c | |||
@@ -27,7 +27,8 @@ | |||
27 | #include <linux/in.h> | 27 | #include <linux/in.h> |
28 | #include <linux/sched.h> | 28 | #include <linux/sched.h> |
29 | #include <linux/audit.h> | 29 | #include <linux/audit.h> |
30 | #include <asm/semaphore.h> | 30 | #include <linux/mutex.h> |
31 | |||
31 | #include "flask.h" | 32 | #include "flask.h" |
32 | #include "avc.h" | 33 | #include "avc.h" |
33 | #include "avc_ss.h" | 34 | #include "avc_ss.h" |
@@ -48,9 +49,9 @@ static DEFINE_RWLOCK(policy_rwlock); | |||
48 | #define POLICY_RDUNLOCK read_unlock(&policy_rwlock) | 49 | #define POLICY_RDUNLOCK read_unlock(&policy_rwlock) |
49 | #define POLICY_WRUNLOCK write_unlock_irq(&policy_rwlock) | 50 | #define POLICY_WRUNLOCK write_unlock_irq(&policy_rwlock) |
50 | 51 | ||
51 | static DECLARE_MUTEX(load_sem); | 52 | static DEFINE_MUTEX(load_mutex); |
52 | #define LOAD_LOCK down(&load_sem) | 53 | #define LOAD_LOCK mutex_lock(&load_mutex) |
53 | #define LOAD_UNLOCK up(&load_sem) | 54 | #define LOAD_UNLOCK mutex_unlock(&load_mutex) |
54 | 55 | ||
55 | static struct sidtab sidtab; | 56 | static struct sidtab sidtab; |
56 | struct policydb policydb; | 57 | struct policydb policydb; |