diff options
175 files changed, 6839 insertions, 5527 deletions
diff --git a/Documentation/networking/e100.txt b/Documentation/networking/e100.txt index 4ef9f7cd5dc3..944aa55e79f8 100644 --- a/Documentation/networking/e100.txt +++ b/Documentation/networking/e100.txt | |||
| @@ -1,16 +1,17 @@ | |||
| 1 | Linux* Base Driver for the Intel(R) PRO/100 Family of Adapters | 1 | Linux* Base Driver for the Intel(R) PRO/100 Family of Adapters |
| 2 | ============================================================== | 2 | ============================================================== |
| 3 | 3 | ||
| 4 | November 17, 2004 | 4 | November 15, 2005 |
| 5 | |||
| 6 | 5 | ||
| 7 | Contents | 6 | Contents |
| 8 | ======== | 7 | ======== |
| 9 | 8 | ||
| 10 | - In This Release | 9 | - In This Release |
| 11 | - Identifying Your Adapter | 10 | - Identifying Your Adapter |
| 11 | - Building and Installation | ||
| 12 | - Driver Configuration Parameters | 12 | - Driver Configuration Parameters |
| 13 | - Additional Configurations | 13 | - Additional Configurations |
| 14 | - Known Issues | ||
| 14 | - Support | 15 | - Support |
| 15 | 16 | ||
| 16 | 17 | ||
| @@ -18,18 +19,30 @@ In This Release | |||
| 18 | =============== | 19 | =============== |
| 19 | 20 | ||
| 20 | This file describes the Linux* Base Driver for the Intel(R) PRO/100 Family of | 21 | This file describes the Linux* Base Driver for the Intel(R) PRO/100 Family of |
| 21 | Adapters, version 3.3.x. This driver supports 2.4.x and 2.6.x kernels. | 22 | Adapters. This driver includes support for Itanium(R)2-based systems. |
| 23 | |||
| 24 | For questions related to hardware requirements, refer to the documentation | ||
| 25 | supplied with your Intel PRO/100 adapter. | ||
| 26 | |||
| 27 | The following features are now available in supported kernels: | ||
| 28 | - Native VLANs | ||
| 29 | - Channel Bonding (teaming) | ||
| 30 | - SNMP | ||
| 31 | |||
| 32 | Channel Bonding documentation can be found in the Linux kernel source: | ||
| 33 | /Documentation/networking/bonding.txt | ||
| 34 | |||
| 22 | 35 | ||
| 23 | Identifying Your Adapter | 36 | Identifying Your Adapter |
| 24 | ======================== | 37 | ======================== |
| 25 | 38 | ||
| 26 | For more information on how to identify your adapter, go to the Adapter & | 39 | For more information on how to identify your adapter, go to the Adapter & |
| 27 | Driver ID Guide at: | 40 | Driver ID Guide at: |
| 28 | 41 | ||
| 29 | http://support.intel.com/support/network/adapter/pro100/21397.htm | 42 | http://support.intel.com/support/network/adapter/pro100/21397.htm |
| 30 | 43 | ||
| 31 | For the latest Intel network drivers for Linux, refer to the following | 44 | For the latest Intel network drivers for Linux, refer to the following |
| 32 | website. In the search field, enter your adapter name or type, or use the | 45 | website. In the search field, enter your adapter name or type, or use the |
| 33 | networking link on the left to search for your adapter: | 46 | networking link on the left to search for your adapter: |
| 34 | 47 | ||
| 35 | http://downloadfinder.intel.com/scripts-df/support_intel.asp | 48 | http://downloadfinder.intel.com/scripts-df/support_intel.asp |
| @@ -40,73 +53,75 @@ Driver Configuration Parameters | |||
| 40 | The default value for each parameter is generally the recommended setting, | 53 | The default value for each parameter is generally the recommended setting, |
| 41 | unless otherwise noted. | 54 | unless otherwise noted. |
| 42 | 55 | ||
| 43 | Rx Descriptors: Number of receive descriptors. A receive descriptor is a data | 56 | Rx Descriptors: Number of receive descriptors. A receive descriptor is a data |
| 44 | structure that describes a receive buffer and its attributes to the network | 57 | structure that describes a receive buffer and its attributes to the network |
| 45 | controller. The data in the descriptor is used by the controller to write | 58 | controller. The data in the descriptor is used by the controller to write |
| 46 | data from the controller to host memory. In the 3.0.x driver the valid | 59 | data from the controller to host memory. In the 3.x.x driver the valid range |
| 47 | range for this parameter is 64-256. The default value is 64. This parameter | 60 | for this parameter is 64-256. The default value is 64. This parameter can be |
| 48 | can be changed using the command | 61 | changed using the command: |
| 49 | 62 | ||
| 50 | ethtool -G eth? rx n, where n is the number of desired rx descriptors. | 63 | ethtool -G eth? rx n, where n is the number of desired rx descriptors. |
| 51 | 64 | ||
| 52 | Tx Descriptors: Number of transmit descriptors. A transmit descriptor is a | 65 | Tx Descriptors: Number of transmit descriptors. A transmit descriptor is a data |
| 53 | data structure that describes a transmit buffer and its attributes to the | 66 | structure that describes a transmit buffer and its attributes to the network |
| 54 | network controller. The data in the descriptor is used by the controller to | 67 | controller. The data in the descriptor is used by the controller to read |
| 55 | read data from the host memory to the controller. In the 3.0.x driver the | 68 | data from the host memory to the controller. In the 3.x.x driver the valid |
| 56 | valid range for this parameter is 64-256. The default value is 64. This | 69 | range for this parameter is 64-256. The default value is 64. This parameter |
| 57 | parameter can be changed using the command | 70 | can be changed using the command: |
| 58 | 71 | ||
| 59 | ethtool -G eth? tx n, where n is the number of desired tx descriptors. | 72 | ethtool -G eth? tx n, where n is the number of desired tx descriptors. |
| 60 | 73 | ||
| 61 | Speed/Duplex: The driver auto-negotiates the link speed and duplex settings by | 74 | Speed/Duplex: The driver auto-negotiates the link speed and duplex settings by |
| 62 | default. Ethtool can be used as follows to force speed/duplex. | 75 | default. Ethtool can be used as follows to force speed/duplex. |
| 63 | 76 | ||
| 64 | ethtool -s eth? autoneg off speed {10|100} duplex {full|half} | 77 | ethtool -s eth? autoneg off speed {10|100} duplex {full|half} |
| 65 | 78 | ||
| 66 | NOTE: setting the speed/duplex to incorrect values will cause the link to | 79 | NOTE: setting the speed/duplex to incorrect values will cause the link to |
| 67 | fail. | 80 | fail. |
| 68 | 81 | ||
| 69 | Event Log Message Level: The driver uses the message level flag to log events | 82 | Event Log Message Level: The driver uses the message level flag to log events |
| 70 | to syslog. The message level can be set at driver load time. It can also be | 83 | to syslog. The message level can be set at driver load time. It can also be |
| 71 | set using the command | 84 | set using the command: |
| 72 | 85 | ||
| 73 | ethtool -s eth? msglvl n | 86 | ethtool -s eth? msglvl n |
| 74 | 87 | ||
| 88 | |||
| 75 | Additional Configurations | 89 | Additional Configurations |
| 76 | ========================= | 90 | ========================= |
| 77 | 91 | ||
| 78 | Configuring the Driver on Different Distributions | 92 | Configuring the Driver on Different Distributions |
| 79 | ------------------------------------------------- | 93 | ------------------------------------------------- |
| 80 | 94 | ||
| 81 | Configuring a network driver to load properly when the system is started is | 95 | Configuring a network driver to load properly when the system is started is |
| 82 | distribution dependent. Typically, the configuration process involves adding | 96 | distribution dependent. Typically, the configuration process involves adding |
| 83 | an alias line to /etc/modules.conf as well as editing other system startup | 97 | an alias line to /etc/modules.conf or /etc/modprobe.conf as well as editing |
| 84 | scripts and/or configuration files. Many popular Linux distributions ship | 98 | other system startup scripts and/or configuration files. Many popular Linux |
| 85 | with tools to make these changes for you. To learn the proper way to | 99 | distributions ship with tools to make these changes for you. To learn the |
| 86 | configure a network device for your system, refer to your distribution | 100 | proper way to configure a network device for your system, refer to your |
| 87 | documentation. If during this process you are asked for the driver or module | 101 | distribution documentation. If during this process you are asked for the |
| 88 | name, the name for the Linux Base Driver for the Intel PRO/100 Family of | 102 | driver or module name, the name for the Linux Base Driver for the Intel |
| 89 | Adapters is e100. | 103 | PRO/100 Family of Adapters is e100. |
| 90 | 104 | ||
| 91 | As an example, if you install the e100 driver for two PRO/100 adapters | 105 | As an example, if you install the e100 driver for two PRO/100 adapters |
| 92 | (eth0 and eth1), add the following to modules.conf: | 106 | (eth0 and eth1), add the following to modules.conf or modprobe.conf: |
| 93 | 107 | ||
| 94 | alias eth0 e100 | 108 | alias eth0 e100 |
| 95 | alias eth1 e100 | 109 | alias eth1 e100 |
| 96 | 110 | ||
| 97 | Viewing Link Messages | 111 | Viewing Link Messages |
| 98 | --------------------- | 112 | --------------------- |
| 99 | In order to see link messages and other Intel driver information on your | 113 | In order to see link messages and other Intel driver information on your |
| 100 | console, you must set the dmesg level up to six. This can be done by | 114 | console, you must set the dmesg level up to six. This can be done by |
| 101 | entering the following on the command line before loading the e100 driver: | 115 | entering the following on the command line before loading the e100 driver: |
| 102 | 116 | ||
| 103 | dmesg -n 8 | 117 | dmesg -n 8 |
| 104 | 118 | ||
| 105 | If you wish to see all messages issued by the driver, including debug | 119 | If you wish to see all messages issued by the driver, including debug |
| 106 | messages, set the dmesg level to eight. | 120 | messages, set the dmesg level to eight. |
| 107 | 121 | ||
| 108 | NOTE: This setting is not saved across reboots. | 122 | NOTE: This setting is not saved across reboots. |
| 109 | 123 | ||
| 124 | |||
| 110 | Ethtool | 125 | Ethtool |
| 111 | ------- | 126 | ------- |
| 112 | 127 | ||
| @@ -114,29 +129,27 @@ Additional Configurations | |||
| 114 | diagnostics, as well as displaying statistical information. Ethtool | 129 | diagnostics, as well as displaying statistical information. Ethtool |
| 115 | version 1.6 or later is required for this functionality. | 130 | version 1.6 or later is required for this functionality. |
| 116 | 131 | ||
| 117 | The latest release of ethtool can be found at: | 132 | The latest release of ethtool can be found from |
| 118 | http://sf.net/projects/gkernel. | 133 | http://sourceforge.net/projects/gkernel. |
| 119 | 134 | ||
| 120 | NOTE: This driver uses mii support from the kernel. As a result, when | 135 | NOTE: Ethtool 1.6 only supports a limited set of ethtool options. Support |
| 121 | there is no link, ethtool will report speed/duplex to be 10/half. | 136 | for a more complete ethtool feature set can be enabled by upgrading |
| 137 | ethtool to ethtool-1.8.1. | ||
| 122 | 138 | ||
| 123 | NOTE: Ethtool 1.6 only supports a limited set of ethtool options. Support | ||
| 124 | for a more complete ethtool feature set can be enabled by upgrading | ||
| 125 | ethtool to ethtool-1.8.1. | ||
| 126 | 139 | ||
| 127 | Enabling Wake on LAN* (WoL) | 140 | Enabling Wake on LAN* (WoL) |
| 128 | --------------------------- | 141 | --------------------------- |
| 129 | WoL is provided through the Ethtool* utility. Ethtool is included with Red | 142 | WoL is provided through the Ethtool* utility. Ethtool is included with Red |
| 130 | Hat* 8.0. For other Linux distributions, download and install Ethtool from | 143 | Hat* 8.0. For other Linux distributions, download and install Ethtool from |
| 131 | the following website: http://sourceforge.net/projects/gkernel. | 144 | the following website: http://sourceforge.net/projects/gkernel. |
| 132 | 145 | ||
| 133 | For instructions on enabling WoL with Ethtool, refer to the Ethtool man | 146 | For instructions on enabling WoL with Ethtool, refer to the Ethtool man page. |
| 134 | page. | ||
| 135 | 147 | ||
| 136 | WoL will be enabled on the system during the next shut down or reboot. For | 148 | WoL will be enabled on the system during the next shut down or reboot. For |
| 137 | this driver version, in order to enable WoL, the e100 driver must be | 149 | this driver version, in order to enable WoL, the e100 driver must be |
| 138 | loaded when shutting down or rebooting the system. | 150 | loaded when shutting down or rebooting the system. |
| 139 | 151 | ||
| 152 | |||
| 140 | NAPI | 153 | NAPI |
| 141 | ---- | 154 | ---- |
| 142 | 155 | ||
| @@ -144,6 +157,25 @@ Additional Configurations | |||
| 144 | 157 | ||
| 145 | See www.cyberus.ca/~hadi/usenix-paper.tgz for more information on NAPI. | 158 | See www.cyberus.ca/~hadi/usenix-paper.tgz for more information on NAPI. |
| 146 | 159 | ||
| 160 | Multiple Interfaces on Same Ethernet Broadcast Network | ||
| 161 | ------------------------------------------------------ | ||
| 162 | |||
| 163 | Due to the default ARP behavior on Linux, it is not possible to have | ||
| 164 | one system on two IP networks in the same Ethernet broadcast domain | ||
| 165 | (non-partitioned switch) behave as expected. All Ethernet interfaces | ||
| 166 | will respond to IP traffic for any IP address assigned to the system. | ||
| 167 | This results in unbalanced receive traffic. | ||
| 168 | |||
| 169 | If you have multiple interfaces in a server, either turn on ARP | ||
| 170 | filtering by | ||
| 171 | |||
| 172 | (1) entering: echo 1 > /proc/sys/net/ipv4/conf/all/arp_filter | ||
| 173 | (this only works if your kernel's version is higher than 2.4.5), or | ||
| 174 | |||
| 175 | (2) installing the interfaces in separate broadcast domains (either | ||
| 176 | in different switches or in a switch partitioned to VLANs). | ||
| 177 | |||
| 178 | |||
| 147 | Support | 179 | Support |
| 148 | ======= | 180 | ======= |
| 149 | 181 | ||
| @@ -151,20 +183,24 @@ For general information, go to the Intel support website at: | |||
| 151 | 183 | ||
| 152 | http://support.intel.com | 184 | http://support.intel.com |
| 153 | 185 | ||
| 186 | or the Intel Wired Networking project hosted by Sourceforge at: | ||
| 187 | |||
| 188 | http://sourceforge.net/projects/e1000 | ||
| 189 | |||
| 154 | If an issue is identified with the released source code on the supported | 190 | If an issue is identified with the released source code on the supported |
| 155 | kernel with a supported adapter, email the specific information related to | 191 | kernel with a supported adapter, email the specific information related to the |
| 156 | the issue to linux.nics@intel.com. | 192 | issue to e1000-devel@lists.sourceforge.net. |
| 157 | 193 | ||
| 158 | 194 | ||
| 159 | License | 195 | License |
| 160 | ======= | 196 | ======= |
| 161 | 197 | ||
| 162 | This software program is released under the terms of a license agreement | 198 | This software program is released under the terms of a license agreement |
| 163 | between you ('Licensee') and Intel. Do not use or load this software or any | 199 | between you ('Licensee') and Intel. Do not use or load this software or any |
| 164 | associated materials (collectively, the 'Software') until you have carefully | 200 | associated materials (collectively, the 'Software') until you have carefully |
| 165 | read the full terms and conditions of the LICENSE located in this software | 201 | read the full terms and conditions of the file COPYING located in this software |
| 166 | package. By loading or using the Software, you agree to the terms of this | 202 | package. By loading or using the Software, you agree to the terms of this |
| 167 | Agreement. If you do not agree with the terms of this Agreement, do not | 203 | Agreement. If you do not agree with the terms of this Agreement, do not install |
| 168 | install or use the Software. | 204 | or use the Software. |
| 169 | 205 | ||
| 170 | * Other names and brands may be claimed as the property of others. | 206 | * Other names and brands may be claimed as the property of others. |
diff --git a/Documentation/networking/e1000.txt b/Documentation/networking/e1000.txt index 2ebd4058d46d..71fe15af356c 100644 --- a/Documentation/networking/e1000.txt +++ b/Documentation/networking/e1000.txt | |||
| @@ -1,7 +1,7 @@ | |||
| 1 | Linux* Base Driver for the Intel(R) PRO/1000 Family of Adapters | 1 | Linux* Base Driver for the Intel(R) PRO/1000 Family of Adapters |
| 2 | =============================================================== | 2 | =============================================================== |
| 3 | 3 | ||
| 4 | November 17, 2004 | 4 | November 15, 2005 |
| 5 | 5 | ||
| 6 | 6 | ||
| 7 | Contents | 7 | Contents |
| @@ -20,254 +20,316 @@ In This Release | |||
| 20 | =============== | 20 | =============== |
| 21 | 21 | ||
| 22 | This file describes the Linux* Base Driver for the Intel(R) PRO/1000 Family | 22 | This file describes the Linux* Base Driver for the Intel(R) PRO/1000 Family |
| 23 | of Adapters, version 5.x.x. | 23 | of Adapters. This driver includes support for Itanium(R)2-based systems. |
| 24 | 24 | ||
| 25 | For questions related to hardware requirements, refer to the documentation | 25 | For questions related to hardware requirements, refer to the documentation |
| 26 | supplied with your Intel PRO/1000 adapter. All hardware requirements listed | 26 | supplied with your Intel PRO/1000 adapter. All hardware requirements listed |
| 27 | apply to use with Linux. | 27 | apply to use with Linux. |
| 28 | 28 | ||
| 29 | Native VLANs are now available with supported kernels. | 29 | The following features are now available in supported kernels: |
| 30 | - Native VLANs | ||
| 31 | - Channel Bonding (teaming) | ||
| 32 | - SNMP | ||
| 33 | |||
| 34 | Channel Bonding documentation can be found in the Linux kernel source: | ||
| 35 | /Documentation/networking/bonding.txt | ||
| 36 | |||
| 37 | The driver information previously displayed in the /proc filesystem is not | ||
| 38 | supported in this release. Alternatively, you can use ethtool (version 1.6 | ||
| 39 | or later), lspci, and ifconfig to obtain the same information. | ||
| 40 | |||
| 41 | Instructions on updating ethtool can be found in the section "Additional | ||
| 42 | Configurations" later in this document. | ||
| 43 | |||
| 30 | 44 | ||
| 31 | Identifying Your Adapter | 45 | Identifying Your Adapter |
| 32 | ======================== | 46 | ======================== |
| 33 | 47 | ||
| 34 | For more information on how to identify your adapter, go to the Adapter & | 48 | For more information on how to identify your adapter, go to the Adapter & |
| 35 | Driver ID Guide at: | 49 | Driver ID Guide at: |
| 36 | 50 | ||
| 37 | http://support.intel.com/support/network/adapter/pro100/21397.htm | 51 | http://support.intel.com/support/network/adapter/pro100/21397.htm |
| 38 | 52 | ||
| 39 | For the latest Intel network drivers for Linux, refer to the following | 53 | For the latest Intel network drivers for Linux, refer to the following |
| 40 | website. In the search field, enter your adapter name or type, or use the | 54 | website. In the search field, enter your adapter name or type, or use the |
| 41 | networking link on the left to search for your adapter: | 55 | networking link on the left to search for your adapter: |
| 42 | 56 | ||
| 43 | http://downloadfinder.intel.com/scripts-df/support_intel.asp | 57 | http://downloadfinder.intel.com/scripts-df/support_intel.asp |
| 44 | 58 | ||
| 45 | Command Line Parameters | ||
| 46 | ======================= | ||
| 47 | 59 | ||
| 48 | If the driver is built as a module, the following optional parameters are | 60 | Command Line Parameters ======================= |
| 49 | used by entering them on the command line with the modprobe or insmod command | 61 | |
| 50 | using this syntax: | 62 | If the driver is built as a module, the following optional parameters |
| 63 | are used by entering them on the command line with the modprobe or insmod | ||
| 64 | command using this syntax: | ||
| 51 | 65 | ||
| 52 | modprobe e1000 [<option>=<VAL1>,<VAL2>,...] | 66 | modprobe e1000 [<option>=<VAL1>,<VAL2>,...] |
| 53 | 67 | ||
| 54 | insmod e1000 [<option>=<VAL1>,<VAL2>,...] | 68 | insmod e1000 [<option>=<VAL1>,<VAL2>,...] |
| 55 | 69 | ||
| 56 | For example, with two PRO/1000 PCI adapters, entering: | 70 | For example, with two PRO/1000 PCI adapters, entering: |
| 57 | 71 | ||
| 58 | insmod e1000 TxDescriptors=80,128 | 72 | insmod e1000 TxDescriptors=80,128 |
| 59 | 73 | ||
| 60 | loads the e1000 driver with 80 TX descriptors for the first adapter and 128 TX | 74 | loads the e1000 driver with 80 TX descriptors for the first adapter and 128 |
| 61 | descriptors for the second adapter. | 75 | TX descriptors for the second adapter. |
| 62 | 76 | ||
| 63 | The default value for each parameter is generally the recommended setting, | 77 | The default value for each parameter is generally the recommended setting, |
| 64 | unless otherwise noted. Also, if the driver is statically built into the | 78 | unless otherwise noted. |
| 65 | kernel, the driver is loaded with the default values for all the parameters. | 79 | |
| 66 | Ethtool can be used to change some of the parameters at runtime. | 80 | NOTES: For more information about the AutoNeg, Duplex, and Speed |
| 81 | parameters, see the "Speed and Duplex Configuration" section in | ||
| 82 | this document. | ||
| 67 | 83 | ||
| 68 | NOTES: For more information about the AutoNeg, Duplex, and Speed | 84 | For more information about the InterruptThrottleRate, |
| 69 | parameters, see the "Speed and Duplex Configuration" section in | 85 | RxIntDelay, TxIntDelay, RxAbsIntDelay, and TxAbsIntDelay |
| 70 | this document. | 86 | parameters, see the application note at: |
| 87 | http://www.intel.com/design/network/applnots/ap450.htm | ||
| 71 | 88 | ||
| 72 | For more information about the InterruptThrottleRate, RxIntDelay, | 89 | A descriptor describes a data buffer and attributes related to |
| 73 | TxIntDelay, RxAbsIntDelay, and TxAbsIntDelay parameters, see the | 90 | the data buffer. This information is accessed by the hardware. |
| 74 | application note at: | ||
| 75 | http://www.intel.com/design/network/applnots/ap450.htm | ||
| 76 | 91 | ||
| 77 | A descriptor describes a data buffer and attributes related to the | ||
| 78 | data buffer. This information is accessed by the hardware. | ||
| 79 | 92 | ||
| 80 | AutoNeg (adapters using copper connections only) | 93 | AutoNeg |
| 81 | Valid Range: 0x01-0x0F, 0x20-0x2F | 94 | ------- |
| 95 | (Supported only on adapters with copper connections) | ||
| 96 | Valid Range: 0x01-0x0F, 0x20-0x2F | ||
| 82 | Default Value: 0x2F | 97 | Default Value: 0x2F |
| 83 | This parameter is a bit mask that specifies which speed and duplex | 98 | |
| 84 | settings the board advertises. When this parameter is used, the Speed and | 99 | This parameter is a bit mask that specifies which speed and duplex |
| 85 | Duplex parameters must not be specified. | 100 | settings the board advertises. When this parameter is used, the Speed |
| 86 | NOTE: Refer to the Speed and Duplex section of this readme for more | 101 | and Duplex parameters must not be specified. |
| 87 | information on the AutoNeg parameter. | 102 | |
| 88 | 103 | NOTE: Refer to the Speed and Duplex section of this readme for more | |
| 89 | Duplex (adapters using copper connections only) | 104 | information on the AutoNeg parameter. |
| 90 | Valid Range: 0-2 (0=auto-negotiate, 1=half, 2=full) | 105 | |
| 106 | |||
| 107 | Duplex | ||
| 108 | ------ | ||
| 109 | (Supported only on adapters with copper connections) | ||
| 110 | Valid Range: 0-2 (0=auto-negotiate, 1=half, 2=full) | ||
| 91 | Default Value: 0 | 111 | Default Value: 0 |
| 92 | Defines the direction in which data is allowed to flow. Can be either one | 112 | |
| 93 | or two-directional. If both Duplex and the link partner are set to auto- | 113 | Defines the direction in which data is allowed to flow. Can be either |
| 94 | negotiate, the board auto-detects the correct duplex. If the link partner | 114 | one or two-directional. If both Duplex and the link partner are set to |
| 95 | is forced (either full or half), Duplex defaults to half-duplex. | 115 | auto-negotiate, the board auto-detects the correct duplex. If the link |
| 116 | partner is forced (either full or half), Duplex defaults to half-duplex. | ||
| 117 | |||
| 96 | 118 | ||
| 97 | FlowControl | 119 | FlowControl |
| 98 | Valid Range: 0-3 (0=none, 1=Rx only, 2=Tx only, 3=Rx&Tx) | 120 | ---------- |
| 99 | Default: Read flow control settings from the EEPROM | 121 | Valid Range: 0-3 (0=none, 1=Rx only, 2=Tx only, 3=Rx&Tx) |
| 100 | This parameter controls the automatic generation(Tx) and response(Rx) to | 122 | Default Value: Reads flow control settings from the EEPROM |
| 101 | Ethernet PAUSE frames. | 123 | |
| 124 | This parameter controls the automatic generation(Tx) and response(Rx) | ||
| 125 | to Ethernet PAUSE frames. | ||
| 126 | |||
| 102 | 127 | ||
| 103 | InterruptThrottleRate | 128 | InterruptThrottleRate |
| 104 | Valid Range: 100-100000 (0=off, 1=dynamic) | 129 | --------------------- |
| 130 | (not supported on Intel 82542, 82543 or 82544-based adapters) | ||
| 131 | Valid Range: 100-100000 (0=off, 1=dynamic) | ||
| 105 | Default Value: 8000 | 132 | Default Value: 8000 |
| 106 | This value represents the maximum number of interrupts per second the | 133 | |
| 107 | controller generates. InterruptThrottleRate is another setting used in | 134 | This value represents the maximum number of interrupts per second the |
| 108 | interrupt moderation. Dynamic mode uses a heuristic algorithm to adjust | 135 | controller generates. InterruptThrottleRate is another setting used in |
| 109 | InterruptThrottleRate based on the current traffic load. | 136 | interrupt moderation. Dynamic mode uses a heuristic algorithm to adjust |
| 110 | Un-supported Adapters: InterruptThrottleRate is NOT supported by 82542, 82543 | 137 | InterruptThrottleRate based on the current traffic load. |
| 111 | or 82544-based adapters. | 138 | |
| 112 | 139 | NOTE: InterruptThrottleRate takes precedence over the TxAbsIntDelay and | |
| 113 | NOTE: InterruptThrottleRate takes precedence over the TxAbsIntDelay and | 140 | RxAbsIntDelay parameters. In other words, minimizing the receive |
| 114 | RxAbsIntDelay parameters. In other words, minimizing the receive | 141 | and/or transmit absolute delays does not force the controller to |
| 115 | and/or transmit absolute delays does not force the controller to | 142 | generate more interrupts than what the Interrupt Throttle Rate |
| 116 | generate more interrupts than what the Interrupt Throttle Rate | 143 | allows. |
| 117 | allows. | 144 | |
| 118 | CAUTION: If you are using the Intel PRO/1000 CT Network Connection | 145 | CAUTION: If you are using the Intel PRO/1000 CT Network Connection |
| 119 | (controller 82547), setting InterruptThrottleRate to a value | 146 | (controller 82547), setting InterruptThrottleRate to a value |
| 120 | greater than 75,000, may hang (stop transmitting) adapters under | 147 | greater than 75,000, may hang (stop transmitting) adapters |
| 121 | certain network conditions. If this occurs a NETDEV WATCHDOG | 148 | under certain network conditions. If this occurs a NETDEV |
| 122 | message is logged in the system event log. In addition, the | 149 | WATCHDOG message is logged in the system event log. In |
| 123 | controller is automatically reset, restoring the network | 150 | addition, the controller is automatically reset, restoring |
| 124 | connection. To eliminate the potential for the hang, ensure | 151 | the network connection. To eliminate the potential for the |
| 125 | that InterruptThrottleRate is set no greater than 75,000 and is | 152 | hang, ensure that InterruptThrottleRate is set no greater |
| 126 | not set to 0. | 153 | than 75,000 and is not set to 0. |
| 127 | NOTE: When e1000 is loaded with default settings and multiple adapters are | 154 | |
| 128 | in use simultaneously, the CPU utilization may increase non-linearly. | 155 | NOTE: When e1000 is loaded with default settings and multiple adapters |
| 129 | In order to limit the CPU utilization without impacting the overall | 156 | are in use simultaneously, the CPU utilization may increase non- |
| 130 | throughput, we recommend that you load the driver as follows: | 157 | linearly. In order to limit the CPU utilization without impacting |
| 131 | 158 | the overall throughput, we recommend that you load the driver as | |
| 132 | insmod e1000.o InterruptThrottleRate=3000,3000,3000 | 159 | follows: |
| 133 | 160 | ||
| 134 | This sets the InterruptThrottleRate to 3000 interrupts/sec for the | 161 | insmod e1000.o InterruptThrottleRate=3000,3000,3000 |
| 135 | first, second, and third instances of the driver. The range of 2000 to | 162 | |
| 136 | 3000 interrupts per second works on a majority of systems and is a | 163 | This sets the InterruptThrottleRate to 3000 interrupts/sec for |
| 137 | good starting point, but the optimal value will be platform-specific. | 164 | the first, second, and third instances of the driver. The range |
| 138 | If CPU utilization is not a concern, use RX_POLLING (NAPI) and default | 165 | of 2000 to 3000 interrupts per second works on a majority of |
| 139 | driver settings. | 166 | systems and is a good starting point, but the optimal value will |
| 167 | be platform-specific. If CPU utilization is not a concern, use | ||
| 168 | RX_POLLING (NAPI) and default driver settings. | ||
| 169 | |||
| 140 | 170 | ||
| 141 | RxDescriptors | 171 | RxDescriptors |
| 142 | Valid Range: 80-256 for 82542 and 82543-based adapters | 172 | ------------- |
| 143 | 80-4096 for all other supported adapters | 173 | Valid Range: 80-256 for 82542 and 82543-based adapters |
| 174 | 80-4096 for all other supported adapters | ||
| 144 | Default Value: 256 | 175 | Default Value: 256 |
| 145 | This value is the number of receive descriptors allocated by the driver. | ||
| 146 | Increasing this value allows the driver to buffer more incoming packets. | ||
| 147 | Each descriptor is 16 bytes. A receive buffer is allocated for each | ||
| 148 | descriptor and can either be 2048 or 4096 bytes long, depending on the MTU | ||
| 149 | 176 | ||
| 150 | setting. An incoming packet can span one or more receive descriptors. | 177 | This value specifies the number of receive descriptors allocated by the |
| 151 | The maximum MTU size is 16110. | 178 | driver. Increasing this value allows the driver to buffer more incoming |
| 179 | packets. Each descriptor is 16 bytes. A receive buffer is also | ||
| 180 | allocated for each descriptor and is 2048. | ||
| 152 | 181 | ||
| 153 | NOTE: MTU designates the frame size. It only needs to be set for Jumbo | ||
| 154 | Frames. | ||
| 155 | NOTE: Depending on the available system resources, the request for a | ||
| 156 | higher number of receive descriptors may be denied. In this case, | ||
| 157 | use a lower number. | ||
| 158 | 182 | ||
| 159 | RxIntDelay | 183 | RxIntDelay |
| 160 | Valid Range: 0-65535 (0=off) | 184 | ---------- |
| 185 | Valid Range: 0-65535 (0=off) | ||
| 161 | Default Value: 0 | 186 | Default Value: 0 |
| 162 | This value delays the generation of receive interrupts in units of 1.024 | 187 | |
| 163 | microseconds. Receive interrupt reduction can improve CPU efficiency if | 188 | This value delays the generation of receive interrupts in units of 1.024 |
| 164 | properly tuned for specific network traffic. Increasing this value adds | 189 | microseconds. Receive interrupt reduction can improve CPU efficiency if |
| 165 | extra latency to frame reception and can end up decreasing the throughput | 190 | properly tuned for specific network traffic. Increasing this value adds |
| 166 | of TCP traffic. If the system is reporting dropped receives, this value | 191 | extra latency to frame reception and can end up decreasing the throughput |
| 167 | may be set too high, causing the driver to run out of available receive | 192 | of TCP traffic. If the system is reporting dropped receives, this value |
| 168 | descriptors. | 193 | may be set too high, causing the driver to run out of available receive |
| 169 | 194 | descriptors. | |
| 170 | CAUTION: When setting RxIntDelay to a value other than 0, adapters may | 195 | |
| 171 | hang (stop transmitting) under certain network conditions. If | 196 | CAUTION: When setting RxIntDelay to a value other than 0, adapters may |
| 172 | this occurs a NETDEV WATCHDOG message is logged in the system | 197 | hang (stop transmitting) under certain network conditions. If |
| 173 | event log. In addition, the controller is automatically reset, | 198 | this occurs a NETDEV WATCHDOG message is logged in the system |
| 174 | restoring the network connection. To eliminate the potential for | 199 | event log. In addition, the controller is automatically reset, |
| 175 | the hang ensure that RxIntDelay is set to 0. | 200 | restoring the network connection. To eliminate the potential |
| 176 | 201 | for the hang ensure that RxIntDelay is set to 0. | |
| 177 | RxAbsIntDelay (82540, 82545 and later adapters only) | 202 | |
| 178 | Valid Range: 0-65535 (0=off) | 203 | |
| 204 | RxAbsIntDelay | ||
| 205 | ------------- | ||
| 206 | (This parameter is supported only on 82540, 82545 and later adapters.) | ||
| 207 | Valid Range: 0-65535 (0=off) | ||
| 179 | Default Value: 128 | 208 | Default Value: 128 |
| 180 | This value, in units of 1.024 microseconds, limits the delay in which a | 209 | |
| 181 | receive interrupt is generated. Useful only if RxIntDelay is non-zero, | 210 | This value, in units of 1.024 microseconds, limits the delay in which a |
| 182 | this value ensures that an interrupt is generated after the initial | 211 | receive interrupt is generated. Useful only if RxIntDelay is non-zero, |
| 183 | packet is received within the set amount of time. Proper tuning, | 212 | this value ensures that an interrupt is generated after the initial |
| 184 | along with RxIntDelay, may improve traffic throughput in specific network | 213 | packet is received within the set amount of time. Proper tuning, |
| 185 | conditions. | 214 | along with RxIntDelay, may improve traffic throughput in specific network |
| 186 | 215 | conditions. | |
| 187 | Speed (adapters using copper connections only) | 216 | |
| 217 | |||
| 218 | Speed | ||
| 219 | ----- | ||
| 220 | (This parameter is supported only on adapters with copper connections.) | ||
| 188 | Valid Settings: 0, 10, 100, 1000 | 221 | Valid Settings: 0, 10, 100, 1000 |
| 189 | Default Value: 0 (auto-negotiate at all supported speeds) | 222 | Default Value: 0 (auto-negotiate at all supported speeds) |
| 190 | Speed forces the line speed to the specified value in megabits per second | 223 | |
| 191 | (Mbps). If this parameter is not specified or is set to 0 and the link | 224 | Speed forces the line speed to the specified value in megabits per second |
| 192 | partner is set to auto-negotiate, the board will auto-detect the correct | 225 | (Mbps). If this parameter is not specified or is set to 0 and the link |
| 193 | speed. Duplex should also be set when Speed is set to either 10 or 100. | 226 | partner is set to auto-negotiate, the board will auto-detect the correct |
| 227 | speed. Duplex should also be set when Speed is set to either 10 or 100. | ||
| 228 | |||
| 194 | 229 | ||
| 195 | TxDescriptors | 230 | TxDescriptors |
| 196 | Valid Range: 80-256 for 82542 and 82543-based adapters | 231 | ------------- |
| 197 | 80-4096 for all other supported adapters | 232 | Valid Range: 80-256 for 82542 and 82543-based adapters |
| 233 | 80-4096 for all other supported adapters | ||
| 198 | Default Value: 256 | 234 | Default Value: 256 |
| 199 | This value is the number of transmit descriptors allocated by the driver. | ||
| 200 | Increasing this value allows the driver to queue more transmits. Each | ||
| 201 | descriptor is 16 bytes. | ||
| 202 | 235 | ||
| 203 | NOTE: Depending on the available system resources, the request for a | 236 | This value is the number of transmit descriptors allocated by the driver. |
| 204 | higher number of transmit descriptors may be denied. In this case, | 237 | Increasing this value allows the driver to queue more transmits. Each |
| 205 | use a lower number. | 238 | descriptor is 16 bytes. |
| 239 | |||
| 240 | NOTE: Depending on the available system resources, the request for a | ||
| 241 | higher number of transmit descriptors may be denied. In this case, | ||
| 242 | use a lower number. | ||
| 243 | |||
| 206 | 244 | ||
| 207 | TxIntDelay | 245 | TxIntDelay |
| 208 | Valid Range: 0-65535 (0=off) | 246 | ---------- |
| 247 | Valid Range: 0-65535 (0=off) | ||
| 209 | Default Value: 64 | 248 | Default Value: 64 |
| 210 | This value delays the generation of transmit interrupts in units of | 249 | |
| 211 | 1.024 microseconds. Transmit interrupt reduction can improve CPU | 250 | This value delays the generation of transmit interrupts in units of |
| 212 | efficiency if properly tuned for specific network traffic. If the | 251 | 1.024 microseconds. Transmit interrupt reduction can improve CPU |
| 213 | system is reporting dropped transmits, this value may be set too high | 252 | efficiency if properly tuned for specific network traffic. If the |
| 214 | causing the driver to run out of available transmit descriptors. | 253 | system is reporting dropped transmits, this value may be set too high |
| 215 | 254 | causing the driver to run out of available transmit descriptors. | |
| 216 | TxAbsIntDelay (82540, 82545 and later adapters only) | 255 | |
| 217 | Valid Range: 0-65535 (0=off) | 256 | |
| 257 | TxAbsIntDelay | ||
| 258 | ------------- | ||
| 259 | (This parameter is supported only on 82540, 82545 and later adapters.) | ||
| 260 | Valid Range: 0-65535 (0=off) | ||
| 218 | Default Value: 64 | 261 | Default Value: 64 |
| 219 | This value, in units of 1.024 microseconds, limits the delay in which a | 262 | |
| 220 | transmit interrupt is generated. Useful only if TxIntDelay is non-zero, | 263 | This value, in units of 1.024 microseconds, limits the delay in which a |
| 221 | this value ensures that an interrupt is generated after the initial | 264 | transmit interrupt is generated. Useful only if TxIntDelay is non-zero, |
| 222 | packet is sent on the wire within the set amount of time. Proper tuning, | 265 | this value ensures that an interrupt is generated after the initial |
| 223 | along with TxIntDelay, may improve traffic throughput in specific | 266 | packet is sent on the wire within the set amount of time. Proper tuning, |
| 224 | network conditions. | 267 | along with TxIntDelay, may improve traffic throughput in specific |
| 225 | 268 | network conditions. | |
| 226 | XsumRX (not available on the 82542-based adapter) | 269 | |
| 227 | Valid Range: 0-1 | 270 | XsumRX |
| 271 | ------ | ||
| 272 | (This parameter is NOT supported on the 82542-based adapter.) | ||
| 273 | Valid Range: 0-1 | ||
| 228 | Default Value: 1 | 274 | Default Value: 1 |
| 229 | A value of '1' indicates that the driver should enable IP checksum | 275 | |
| 230 | offload for received packets (both UDP and TCP) to the adapter hardware. | 276 | A value of '1' indicates that the driver should enable IP checksum |
| 277 | offload for received packets (both UDP and TCP) to the adapter hardware. | ||
| 278 | |||
| 231 | 279 | ||
| 232 | Speed and Duplex Configuration | 280 | Speed and Duplex Configuration |
| 233 | ============================== | 281 | ============================== |
| 234 | 282 | ||
| 235 | Three keywords are used to control the speed and duplex configuration. These | 283 | Three keywords are used to control the speed and duplex configuration. |
| 236 | keywords are Speed, Duplex, and AutoNeg. | 284 | These keywords are Speed, Duplex, and AutoNeg. |
| 237 | 285 | ||
| 238 | If the board uses a fiber interface, these keywords are ignored, and the | 286 | If the board uses a fiber interface, these keywords are ignored, and the |
| 239 | fiber interface board only links at 1000 Mbps full-duplex. | 287 | fiber interface board only links at 1000 Mbps full-duplex. |
| 240 | 288 | ||
| 241 | For copper-based boards, the keywords interact as follows: | 289 | For copper-based boards, the keywords interact as follows: |
| 242 | 290 | ||
| 243 | The default operation is auto-negotiate. The board advertises all supported | 291 | The default operation is auto-negotiate. The board advertises all |
| 244 | speed and duplex combinations, and it links at the highest common speed and | 292 | supported speed and duplex combinations, and it links at the highest |
| 245 | duplex mode IF the link partner is set to auto-negotiate. | 293 | common speed and duplex mode IF the link partner is set to auto-negotiate. |
| 246 | 294 | ||
| 247 | If Speed = 1000, limited auto-negotiation is enabled and only 1000 Mbps is | 295 | If Speed = 1000, limited auto-negotiation is enabled and only 1000 Mbps |
| 248 | advertised (The 1000BaseT spec requires auto-negotiation.) | 296 | is advertised (The 1000BaseT spec requires auto-negotiation.) |
| 249 | 297 | ||
| 250 | If Speed = 10 or 100, then both Speed and Duplex should be set. Auto- | 298 | If Speed = 10 or 100, then both Speed and Duplex should be set. Auto- |
| 251 | negotiation is disabled, and the AutoNeg parameter is ignored. Partner SHOULD | 299 | negotiation is disabled, and the AutoNeg parameter is ignored. Partner |
| 252 | also be forced. | 300 | SHOULD also be forced. |
| 301 | |||
| 302 | The AutoNeg parameter is used when more control is required over the | ||
| 303 | auto-negotiation process. It should be used when you wish to control which | ||
| 304 | speed and duplex combinations are advertised during the auto-negotiation | ||
| 305 | process. | ||
| 306 | |||
| 307 | The parameter may be specified as either a decimal or hexidecimal value as | ||
| 308 | determined by the bitmap below. | ||
| 253 | 309 | ||
| 254 | The AutoNeg parameter is used when more control is required over the auto- | 310 | Bit position 7 6 5 4 3 2 1 0 |
| 255 | negotiation process. When this parameter is used, Speed and Duplex parameters | 311 | Decimal Value 128 64 32 16 8 4 2 1 |
| 256 | must not be specified. The following table describes supported values for the | 312 | Hex value 80 40 20 10 8 4 2 1 |
| 257 | AutoNeg parameter: | 313 | Speed (Mbps) N/A N/A 1000 N/A 100 100 10 10 |
| 314 | Duplex Full Full Half Full Half | ||
| 258 | 315 | ||
| 259 | Speed (Mbps) 1000 100 100 10 10 | 316 | Some examples of using AutoNeg: |
| 260 | Duplex Full Full Half Full Half | ||
| 261 | Value (in base 16) 0x20 0x08 0x04 0x02 0x01 | ||
| 262 | 317 | ||
| 263 | Example: insmod e1000 AutoNeg=0x03, loads e1000 and specifies (10 full duplex, | 318 | modprobe e1000 AutoNeg=0x01 (Restricts autonegotiation to 10 Half) |
| 264 | 10 half duplex) for negotiation with the peer. | 319 | modprobe e1000 AutoNeg=1 (Same as above) |
| 320 | modprobe e1000 AutoNeg=0x02 (Restricts autonegotiation to 10 Full) | ||
| 321 | modprobe e1000 AutoNeg=0x03 (Restricts autonegotiation to 10 Half or 10 Full) | ||
| 322 | modprobe e1000 AutoNeg=0x04 (Restricts autonegotiation to 100 Half) | ||
| 323 | modprobe e1000 AutoNeg=0x05 (Restricts autonegotiation to 10 Half or 100 | ||
| 324 | Half) | ||
| 325 | modprobe e1000 AutoNeg=0x020 (Restricts autonegotiation to 1000 Full) | ||
| 326 | modprobe e1000 AutoNeg=32 (Same as above) | ||
| 265 | 327 | ||
| 266 | Note that setting AutoNeg does not guarantee that the board will link at the | 328 | Note that when this parameter is used, Speed and Duplex must not be specified. |
| 267 | highest specified speed or duplex mode, but the board will link at the | 329 | |
| 268 | highest possible speed/duplex of the link partner IF the link partner is also | 330 | If the link partner is forced to a specific speed and duplex, then this |
| 269 | set to auto-negotiate. If the link partner is forced speed/duplex, the | 331 | parameter should not be used. Instead, use the Speed and Duplex parameters |
| 270 | adapter MUST be forced to the same speed/duplex. | 332 | previously mentioned to force the adapter to the same speed and duplex. |
| 271 | 333 | ||
| 272 | 334 | ||
| 273 | Additional Configurations | 335 | Additional Configurations |
| @@ -276,19 +338,19 @@ Additional Configurations | |||
| 276 | Configuring the Driver on Different Distributions | 338 | Configuring the Driver on Different Distributions |
| 277 | ------------------------------------------------- | 339 | ------------------------------------------------- |
| 278 | 340 | ||
| 279 | Configuring a network driver to load properly when the system is started is | 341 | Configuring a network driver to load properly when the system is started |
| 280 | distribution dependent. Typically, the configuration process involves adding | 342 | is distribution dependent. Typically, the configuration process involves |
| 281 | an alias line to /etc/modules.conf as well as editing other system startup | 343 | adding an alias line to /etc/modules.conf or /etc/modprobe.conf as well |
| 282 | scripts and/or configuration files. Many popular Linux distributions ship | 344 | as editing other system startup scripts and/or configuration files. Many |
| 283 | with tools to make these changes for you. To learn the proper way to | 345 | popular Linux distributions ship with tools to make these changes for you. |
| 284 | configure a network device for your system, refer to your distribution | 346 | To learn the proper way to configure a network device for your system, |
| 285 | documentation. If during this process you are asked for the driver or module | 347 | refer to your distribution documentation. If during this process you are |
| 286 | name, the name for the Linux Base Driver for the Intel PRO/1000 Family of | 348 | asked for the driver or module name, the name for the Linux Base Driver |
| 287 | Adapters is e1000. | 349 | for the Intel PRO/1000 Family of Adapters is e1000. |
| 288 | 350 | ||
| 289 | As an example, if you install the e1000 driver for two PRO/1000 adapters | 351 | As an example, if you install the e1000 driver for two PRO/1000 adapters |
| 290 | (eth0 and eth1) and set the speed and duplex to 10full and 100half, add the | 352 | (eth0 and eth1) and set the speed and duplex to 10full and 100half, add |
| 291 | following to modules.conf: | 353 | the following to modules.conf or or modprobe.conf: |
| 292 | 354 | ||
| 293 | alias eth0 e1000 | 355 | alias eth0 e1000 |
| 294 | alias eth1 e1000 | 356 | alias eth1 e1000 |
| @@ -297,9 +359,9 @@ Additional Configurations | |||
| 297 | Viewing Link Messages | 359 | Viewing Link Messages |
| 298 | --------------------- | 360 | --------------------- |
| 299 | 361 | ||
| 300 | Link messages will not be displayed to the console if the distribution is | 362 | Link messages will not be displayed to the console if the distribution is |
| 301 | restricting system messages. In order to see network driver link messages on | 363 | restricting system messages. In order to see network driver link messages |
| 302 | your console, set dmesg to eight by entering the following: | 364 | on your console, set dmesg to eight by entering the following: |
| 303 | 365 | ||
| 304 | dmesg -n 8 | 366 | dmesg -n 8 |
| 305 | 367 | ||
| @@ -308,22 +370,42 @@ Additional Configurations | |||
| 308 | Jumbo Frames | 370 | Jumbo Frames |
| 309 | ------------ | 371 | ------------ |
| 310 | 372 | ||
| 311 | The driver supports Jumbo Frames for all adapters except 82542-based | 373 | The driver supports Jumbo Frames for all adapters except 82542 and |
| 312 | adapters. Jumbo Frames support is enabled by changing the MTU to a value | 374 | 82573-based adapters. Jumbo Frames support is enabled by changing the |
| 313 | larger than the default of 1500. Use the ifconfig command to increase the | 375 | MTU to a value larger than the default of 1500. Use the ifconfig command |
| 314 | MTU size. For example: | 376 | to increase the MTU size. For example: |
| 377 | |||
| 378 | ifconfig eth<x> mtu 9000 up | ||
| 379 | |||
| 380 | This setting is not saved across reboots. It can be made permanent if | ||
| 381 | you add: | ||
| 382 | |||
| 383 | MTU=9000 | ||
| 315 | 384 | ||
| 316 | ifconfig ethx mtu 9000 up | 385 | to the file /etc/sysconfig/network-scripts/ifcfg-eth<x>. This example |
| 386 | applies to the Red Hat distributions; other distributions may store this | ||
| 387 | setting in a different location. | ||
| 317 | 388 | ||
| 318 | The maximum MTU setting for Jumbo Frames is 16110. This value coincides | 389 | Notes: |
| 319 | with the maximum Jumbo Frames size of 16128. | ||
| 320 | 390 | ||
| 321 | NOTE: Jumbo Frames are supported at 1000 Mbps only. Using Jumbo Frames at | 391 | - To enable Jumbo Frames, increase the MTU size on the interface beyond |
| 322 | 10 or 100 Mbps may result in poor performance or loss of link. | 392 | 1500. |
| 393 | - The maximum MTU setting for Jumbo Frames is 16110. This value coincides | ||
| 394 | with the maximum Jumbo Frames size of 16128. | ||
| 395 | - Using Jumbo Frames at 10 or 100 Mbps may result in poor performance or | ||
| 396 | loss of link. | ||
| 397 | - Some Intel gigabit adapters that support Jumbo Frames have a frame size | ||
| 398 | limit of 9238 bytes, with a corresponding MTU size limit of 9216 bytes. | ||
| 399 | The adapters with this limitation are based on the Intel 82571EB and | ||
| 400 | 82572EI controllers, which correspond to these product names: | ||
| 401 | Intel® PRO/1000 PT Dual Port Server Adapter | ||
| 402 | Intel® PRO/1000 PF Dual Port Server Adapter | ||
| 403 | Intel® PRO/1000 PT Server Adapter | ||
| 404 | Intel® PRO/1000 PT Desktop Adapter | ||
| 405 | Intel® PRO/1000 PF Server Adapter | ||
| 323 | 406 | ||
| 407 | - The Intel PRO/1000 PM Network Connection does not support jumbo frames. | ||
| 324 | 408 | ||
| 325 | NOTE: MTU designates the frame size. To enable Jumbo Frames, increase the | ||
| 326 | MTU size on the interface beyond 1500. | ||
| 327 | 409 | ||
| 328 | Ethtool | 410 | Ethtool |
| 329 | ------- | 411 | ------- |
| @@ -333,32 +415,41 @@ Additional Configurations | |||
| 333 | version 1.6 or later is required for this functionality. | 415 | version 1.6 or later is required for this functionality. |
| 334 | 416 | ||
| 335 | The latest release of ethtool can be found from | 417 | The latest release of ethtool can be found from |
| 336 | http://sf.net/projects/gkernel. | 418 | http://sourceforge.net/projects/gkernel. |
| 337 | 419 | ||
| 338 | NOTE: Ethtool 1.6 only supports a limited set of ethtool options. Support | 420 | NOTE: Ethtool 1.6 only supports a limited set of ethtool options. Support |
| 339 | for a more complete ethtool feature set can be enabled by upgrading | 421 | for a more complete ethtool feature set can be enabled by upgrading |
| 340 | ethtool to ethtool-1.8.1. | 422 | ethtool to ethtool-1.8.1. |
| 341 | 423 | ||
| 342 | Enabling Wake on LAN* (WoL) | 424 | Enabling Wake on LAN* (WoL) |
| 343 | --------------------------- | 425 | --------------------------- |
| 344 | 426 | ||
| 345 | WoL is configured through the Ethtool* utility. Ethtool is included with | 427 | WoL is configured through the Ethtool* utility. Ethtool is included with |
| 346 | all versions of Red Hat after Red Hat 7.2. For other Linux distributions, | 428 | all versions of Red Hat after Red Hat 7.2. For other Linux distributions, |
| 347 | download and install Ethtool from the following website: | 429 | download and install Ethtool from the following website: |
| 348 | http://sourceforge.net/projects/gkernel. | 430 | http://sourceforge.net/projects/gkernel. |
| 349 | 431 | ||
| 350 | For instructions on enabling WoL with Ethtool, refer to the website listed | 432 | For instructions on enabling WoL with Ethtool, refer to the website listed |
| 351 | above. | 433 | above. |
| 352 | 434 | ||
| 353 | WoL will be enabled on the system during the next shut down or reboot. | 435 | WoL will be enabled on the system during the next shut down or reboot. |
| 354 | For this driver version, in order to enable WoL, the e1000 driver must be | 436 | For this driver version, in order to enable WoL, the e1000 driver must be |
| 355 | loaded when shutting down or rebooting the system. | 437 | loaded when shutting down or rebooting the system. |
| 356 | 438 | ||
| 357 | NAPI | 439 | NAPI |
| 358 | ---- | 440 | ---- |
| 359 | 441 | ||
| 360 | NAPI (Rx polling mode) is supported in the e1000 driver. NAPI is enabled | 442 | NAPI (Rx polling mode) is supported in the e1000 driver. NAPI is enabled |
| 361 | or disabled based on the configuration of the kernel. | 443 | or disabled based on the configuration of the kernel. To override |
| 444 | the default, use the following compile-time flags. | ||
| 445 | |||
| 446 | To enable NAPI, compile the driver module, passing in a configuration option: | ||
| 447 | |||
| 448 | make CFLAGS_EXTRA=-DE1000_NAPI install | ||
| 449 | |||
| 450 | To disable NAPI, compile the driver module, passing in a configuration option: | ||
| 451 | |||
| 452 | make CFLAGS_EXTRA=-DE1000_NO_NAPI install | ||
| 362 | 453 | ||
| 363 | See www.cyberus.ca/~hadi/usenix-paper.tgz for more information on NAPI. | 454 | See www.cyberus.ca/~hadi/usenix-paper.tgz for more information on NAPI. |
| 364 | 455 | ||
| @@ -369,10 +460,85 @@ Known Issues | |||
| 369 | Jumbo Frames System Requirement | 460 | Jumbo Frames System Requirement |
| 370 | ------------------------------- | 461 | ------------------------------- |
| 371 | 462 | ||
| 372 | Memory allocation failures have been observed on Linux systems with 64 MB | 463 | Memory allocation failures have been observed on Linux systems with 64 MB |
| 373 | of RAM or less that are running Jumbo Frames. If you are using Jumbo Frames, | 464 | of RAM or less that are running Jumbo Frames. If you are using Jumbo |
| 374 | your system may require more than the advertised minimum requirement of 64 MB | 465 | Frames, your system may require more than the advertised minimum |
| 375 | of system memory. | 466 | requirement of 64 MB of system memory. |
| 467 | |||
| 468 | Performance Degradation with Jumbo Frames | ||
| 469 | ----------------------------------------- | ||
| 470 | |||
| 471 | Degradation in throughput performance may be observed in some Jumbo frames | ||
| 472 | environments. If this is observed, increasing the application's socket | ||
| 473 | buffer size and/or increasing the /proc/sys/net/ipv4/tcp_*mem entry values | ||
| 474 | may help. See the specific application manual and | ||
| 475 | /usr/src/linux*/Documentation/ | ||
| 476 | networking/ip-sysctl.txt for more details. | ||
| 477 | |||
| 478 | Jumbo frames on Foundry BigIron 8000 switch | ||
| 479 | ------------------------------------------- | ||
| 480 | There is a known issue using Jumbo frames when connected to a Foundry | ||
| 481 | BigIron 8000 switch. This is a 3rd party limitation. If you experience | ||
| 482 | loss of packets, lower the MTU size. | ||
| 483 | |||
| 484 | Multiple Interfaces on Same Ethernet Broadcast Network | ||
| 485 | ------------------------------------------------------ | ||
| 486 | |||
| 487 | Due to the default ARP behavior on Linux, it is not possible to have | ||
| 488 | one system on two IP networks in the same Ethernet broadcast domain | ||
| 489 | (non-partitioned switch) behave as expected. All Ethernet interfaces | ||
| 490 | will respond to IP traffic for any IP address assigned to the system. | ||
| 491 | This results in unbalanced receive traffic. | ||
| 492 | |||
| 493 | If you have multiple interfaces in a server, either turn on ARP | ||
| 494 | filtering by entering: | ||
| 495 | |||
| 496 | echo 1 > /proc/sys/net/ipv4/conf/all/arp_filter | ||
| 497 | (this only works if your kernel's version is higher than 2.4.5), | ||
| 498 | |||
| 499 | NOTE: This setting is not saved across reboots. The configuration | ||
| 500 | change can be made permanent by adding the line: | ||
| 501 | net.ipv4.conf.all.arp_filter = 1 | ||
| 502 | to the file /etc/sysctl.conf | ||
| 503 | |||
| 504 | or, | ||
| 505 | |||
| 506 | install the interfaces in separate broadcast domains (either in | ||
| 507 | different switches or in a switch partitioned to VLANs). | ||
| 508 | |||
| 509 | 82541/82547 can't link or are slow to link with some link partners | ||
| 510 | ----------------------------------------------------------------- | ||
| 511 | |||
| 512 | There is a known compatibility issue with 82541/82547 and some | ||
| 513 | low-end switches where the link will not be established, or will | ||
| 514 | be slow to establish. In particular, these switches are known to | ||
| 515 | be incompatible with 82541/82547: | ||
| 516 | |||
| 517 | Planex FXG-08TE | ||
| 518 | I-O Data ETG-SH8 | ||
| 519 | |||
| 520 | To workaround this issue, the driver can be compiled with an override | ||
| 521 | of the PHY's master/slave setting. Forcing master or forcing slave | ||
| 522 | mode will improve time-to-link. | ||
| 523 | |||
| 524 | # make EXTRA_CFLAGS=-DE1000_MASTER_SLAVE=<n> | ||
| 525 | |||
| 526 | Where <n> is: | ||
| 527 | |||
| 528 | 0 = Hardware default | ||
| 529 | 1 = Master mode | ||
| 530 | 2 = Slave mode | ||
| 531 | 3 = Auto master/slave | ||
| 532 | |||
| 533 | Disable rx flow control with ethtool | ||
| 534 | ------------------------------------ | ||
| 535 | |||
| 536 | In order to disable receive flow control using ethtool, you must turn | ||
| 537 | off auto-negotiation on the same command line. | ||
| 538 | |||
| 539 | For example: | ||
| 540 | |||
| 541 | ethtool -A eth? autoneg off rx off | ||
| 376 | 542 | ||
| 377 | 543 | ||
| 378 | Support | 544 | Support |
| @@ -382,20 +548,24 @@ For general information, go to the Intel support website at: | |||
| 382 | 548 | ||
| 383 | http://support.intel.com | 549 | http://support.intel.com |
| 384 | 550 | ||
| 551 | or the Intel Wired Networking project hosted by Sourceforge at: | ||
| 552 | |||
| 553 | http://sourceforge.net/projects/e1000 | ||
| 554 | |||
| 385 | If an issue is identified with the released source code on the supported | 555 | If an issue is identified with the released source code on the supported |
| 386 | kernel with a supported adapter, email the specific information related to | 556 | kernel with a supported adapter, email the specific information related |
| 387 | the issue to linux.nics@intel.com. | 557 | to the issue to e1000-devel@lists.sourceforge.net |
| 388 | 558 | ||
| 389 | 559 | ||
| 390 | License | 560 | License |
| 391 | ======= | 561 | ======= |
| 392 | 562 | ||
| 393 | This software program is released under the terms of a license agreement | 563 | This software program is released under the terms of a license agreement |
| 394 | between you ('Licensee') and Intel. Do not use or load this software or any | 564 | between you ('Licensee') and Intel. Do not use or load this software or any |
| 395 | associated materials (collectively, the 'Software') until you have carefully | 565 | associated materials (collectively, the 'Software') until you have carefully |
| 396 | read the full terms and conditions of the LICENSE located in this software | 566 | read the full terms and conditions of the file COPYING located in this software |
| 397 | package. By loading or using the Software, you agree to the terms of this | 567 | package. By loading or using the Software, you agree to the terms of this |
| 398 | Agreement. If you do not agree with the terms of this Agreement, do not | 568 | Agreement. If you do not agree with the terms of this Agreement, do not |
| 399 | install or use the Software. | 569 | install or use the Software. |
| 400 | 570 | ||
| 401 | * Other names and brands may be claimed as the property of others. | 571 | * Other names and brands may be claimed as the property of others. |
diff --git a/MAINTAINERS b/MAINTAINERS index b0dc75a5e74e..dd1351dc32b8 100644 --- a/MAINTAINERS +++ b/MAINTAINERS | |||
| @@ -1349,10 +1349,10 @@ S: Maintained | |||
| 1349 | INTEL PRO/100 ETHERNET SUPPORT | 1349 | INTEL PRO/100 ETHERNET SUPPORT |
| 1350 | P: John Ronciak | 1350 | P: John Ronciak |
| 1351 | M: john.ronciak@intel.com | 1351 | M: john.ronciak@intel.com |
| 1352 | P: Ganesh Venkatesan | ||
| 1353 | M: ganesh.venkatesan@intel.com | ||
| 1354 | P: Jesse Brandeburg | 1352 | P: Jesse Brandeburg |
| 1355 | M: jesse.brandeburg@intel.com | 1353 | M: jesse.brandeburg@intel.com |
| 1354 | P: Jeff Kirsher | ||
| 1355 | M: jeffrey.t.kirsher@intel.com | ||
| 1356 | W: http://sourceforge.net/projects/e1000/ | 1356 | W: http://sourceforge.net/projects/e1000/ |
| 1357 | S: Supported | 1357 | S: Supported |
| 1358 | 1358 | ||
| @@ -1361,18 +1361,22 @@ P: Jeb Cramer | |||
| 1361 | M: cramerj@intel.com | 1361 | M: cramerj@intel.com |
| 1362 | P: John Ronciak | 1362 | P: John Ronciak |
| 1363 | M: john.ronciak@intel.com | 1363 | M: john.ronciak@intel.com |
| 1364 | P: Ganesh Venkatesan | 1364 | P: Jesse Brandeburg |
| 1365 | M: ganesh.venkatesan@intel.com | 1365 | M: jesse.brandeburg@intel.com |
| 1366 | P: Jeff Kirsher | ||
| 1367 | M: jeffrey.t.kirsher@intel.com | ||
| 1366 | W: http://sourceforge.net/projects/e1000/ | 1368 | W: http://sourceforge.net/projects/e1000/ |
| 1367 | S: Supported | 1369 | S: Supported |
| 1368 | 1370 | ||
| 1369 | INTEL PRO/10GbE SUPPORT | 1371 | INTEL PRO/10GbE SUPPORT |
| 1372 | P: Jeff Kirsher | ||
| 1373 | M: jeffrey.t.kirsher@intel.com | ||
| 1370 | P: Ayyappan Veeraiyan | 1374 | P: Ayyappan Veeraiyan |
| 1371 | M: ayyappan.veeraiyan@intel.com | 1375 | M: ayyappan.veeraiyan@intel.com |
| 1372 | P: Ganesh Venkatesan | ||
| 1373 | M: ganesh.venkatesan@intel.com | ||
| 1374 | P: John Ronciak | 1376 | P: John Ronciak |
| 1375 | M: john.ronciak@intel.com | 1377 | M: john.ronciak@intel.com |
| 1378 | P: Jesse Brandeburg | ||
| 1379 | M: jesse.brandeburg@intel.com | ||
| 1376 | W: http://sourceforge.net/projects/e1000/ | 1380 | W: http://sourceforge.net/projects/e1000/ |
| 1377 | S: Supported | 1381 | S: Supported |
| 1378 | 1382 | ||
diff --git a/arch/alpha/mm/init.c b/arch/alpha/mm/init.c index 486d7945583d..544ac5dc09eb 100644 --- a/arch/alpha/mm/init.c +++ b/arch/alpha/mm/init.c | |||
| @@ -357,7 +357,7 @@ free_reserved_mem(void *start, void *end) | |||
| 357 | void *__start = start; | 357 | void *__start = start; |
| 358 | for (; __start < end; __start += PAGE_SIZE) { | 358 | for (; __start < end; __start += PAGE_SIZE) { |
| 359 | ClearPageReserved(virt_to_page(__start)); | 359 | ClearPageReserved(virt_to_page(__start)); |
| 360 | set_page_count(virt_to_page(__start), 1); | 360 | init_page_count(virt_to_page(__start)); |
| 361 | free_page((long)__start); | 361 | free_page((long)__start); |
| 362 | totalram_pages++; | 362 | totalram_pages++; |
| 363 | } | 363 | } |
diff --git a/arch/arm/mm/consistent.c b/arch/arm/mm/consistent.c index c2ee18d2075e..8a1bfcd50087 100644 --- a/arch/arm/mm/consistent.c +++ b/arch/arm/mm/consistent.c | |||
| @@ -223,6 +223,8 @@ __dma_alloc(struct device *dev, size_t size, dma_addr_t *handle, gfp_t gfp, | |||
| 223 | pte = consistent_pte[idx] + off; | 223 | pte = consistent_pte[idx] + off; |
| 224 | c->vm_pages = page; | 224 | c->vm_pages = page; |
| 225 | 225 | ||
| 226 | split_page(page, order); | ||
| 227 | |||
| 226 | /* | 228 | /* |
| 227 | * Set the "dma handle" | 229 | * Set the "dma handle" |
| 228 | */ | 230 | */ |
| @@ -231,7 +233,6 @@ __dma_alloc(struct device *dev, size_t size, dma_addr_t *handle, gfp_t gfp, | |||
| 231 | do { | 233 | do { |
| 232 | BUG_ON(!pte_none(*pte)); | 234 | BUG_ON(!pte_none(*pte)); |
| 233 | 235 | ||
| 234 | set_page_count(page, 1); | ||
| 235 | /* | 236 | /* |
| 236 | * x86 does not mark the pages reserved... | 237 | * x86 does not mark the pages reserved... |
| 237 | */ | 238 | */ |
| @@ -250,7 +251,6 @@ __dma_alloc(struct device *dev, size_t size, dma_addr_t *handle, gfp_t gfp, | |||
| 250 | * Free the otherwise unused pages. | 251 | * Free the otherwise unused pages. |
| 251 | */ | 252 | */ |
| 252 | while (page < end) { | 253 | while (page < end) { |
| 253 | set_page_count(page, 1); | ||
| 254 | __free_page(page); | 254 | __free_page(page); |
| 255 | page++; | 255 | page++; |
| 256 | } | 256 | } |
diff --git a/arch/arm/mm/init.c b/arch/arm/mm/init.c index 8b276ee38acf..b0321e943b76 100644 --- a/arch/arm/mm/init.c +++ b/arch/arm/mm/init.c | |||
| @@ -531,7 +531,7 @@ static inline void free_area(unsigned long addr, unsigned long end, char *s) | |||
| 531 | for (; addr < end; addr += PAGE_SIZE) { | 531 | for (; addr < end; addr += PAGE_SIZE) { |
| 532 | struct page *page = virt_to_page(addr); | 532 | struct page *page = virt_to_page(addr); |
| 533 | ClearPageReserved(page); | 533 | ClearPageReserved(page); |
| 534 | set_page_count(page, 1); | 534 | init_page_count(page); |
| 535 | free_page(addr); | 535 | free_page(addr); |
| 536 | totalram_pages++; | 536 | totalram_pages++; |
| 537 | } | 537 | } |
diff --git a/arch/arm26/mm/init.c b/arch/arm26/mm/init.c index 1f09a9d0fb83..e3ecaa453747 100644 --- a/arch/arm26/mm/init.c +++ b/arch/arm26/mm/init.c | |||
| @@ -324,7 +324,7 @@ static inline void free_area(unsigned long addr, unsigned long end, char *s) | |||
| 324 | for (; addr < end; addr += PAGE_SIZE) { | 324 | for (; addr < end; addr += PAGE_SIZE) { |
| 325 | struct page *page = virt_to_page(addr); | 325 | struct page *page = virt_to_page(addr); |
| 326 | ClearPageReserved(page); | 326 | ClearPageReserved(page); |
| 327 | set_page_count(page, 1); | 327 | init_page_count(page); |
| 328 | free_page(addr); | 328 | free_page(addr); |
| 329 | totalram_pages++; | 329 | totalram_pages++; |
| 330 | } | 330 | } |
diff --git a/arch/cris/mm/init.c b/arch/cris/mm/init.c index 31a0018b525a..b7842ff213a6 100644 --- a/arch/cris/mm/init.c +++ b/arch/cris/mm/init.c | |||
| @@ -216,7 +216,7 @@ free_initmem(void) | |||
| 216 | addr = (unsigned long)(&__init_begin); | 216 | addr = (unsigned long)(&__init_begin); |
| 217 | for (; addr < (unsigned long)(&__init_end); addr += PAGE_SIZE) { | 217 | for (; addr < (unsigned long)(&__init_end); addr += PAGE_SIZE) { |
| 218 | ClearPageReserved(virt_to_page(addr)); | 218 | ClearPageReserved(virt_to_page(addr)); |
| 219 | set_page_count(virt_to_page(addr), 1); | 219 | init_page_count(virt_to_page(addr)); |
| 220 | free_page(addr); | 220 | free_page(addr); |
| 221 | totalram_pages++; | 221 | totalram_pages++; |
| 222 | } | 222 | } |
diff --git a/arch/frv/kernel/frv_ksyms.c b/arch/frv/kernel/frv_ksyms.c index 0f1c6cbc4f50..aa6b7d0a2109 100644 --- a/arch/frv/kernel/frv_ksyms.c +++ b/arch/frv/kernel/frv_ksyms.c | |||
| @@ -27,6 +27,7 @@ EXPORT_SYMBOL(__ioremap); | |||
| 27 | EXPORT_SYMBOL(iounmap); | 27 | EXPORT_SYMBOL(iounmap); |
| 28 | 28 | ||
| 29 | EXPORT_SYMBOL(strnlen); | 29 | EXPORT_SYMBOL(strnlen); |
| 30 | EXPORT_SYMBOL(strpbrk); | ||
| 30 | EXPORT_SYMBOL(strrchr); | 31 | EXPORT_SYMBOL(strrchr); |
| 31 | EXPORT_SYMBOL(strstr); | 32 | EXPORT_SYMBOL(strstr); |
| 32 | EXPORT_SYMBOL(strchr); | 33 | EXPORT_SYMBOL(strchr); |
diff --git a/arch/frv/mm/dma-alloc.c b/arch/frv/mm/dma-alloc.c index 342823aad758..636b2f8b5d98 100644 --- a/arch/frv/mm/dma-alloc.c +++ b/arch/frv/mm/dma-alloc.c | |||
| @@ -115,9 +115,7 @@ void *consistent_alloc(gfp_t gfp, size_t size, dma_addr_t *dma_handle) | |||
| 115 | */ | 115 | */ |
| 116 | if (order > 0) { | 116 | if (order > 0) { |
| 117 | struct page *rpage = virt_to_page(page); | 117 | struct page *rpage = virt_to_page(page); |
| 118 | 118 | split_page(rpage, order); | |
| 119 | for (i = 1; i < (1 << order); i++) | ||
| 120 | set_page_count(rpage + i, 1); | ||
| 121 | } | 119 | } |
| 122 | 120 | ||
| 123 | err = 0; | 121 | err = 0; |
diff --git a/arch/frv/mm/init.c b/arch/frv/mm/init.c index 765088ea8a50..8899aa1a4f06 100644 --- a/arch/frv/mm/init.c +++ b/arch/frv/mm/init.c | |||
| @@ -169,7 +169,7 @@ void __init mem_init(void) | |||
| 169 | struct page *page = &mem_map[pfn]; | 169 | struct page *page = &mem_map[pfn]; |
| 170 | 170 | ||
| 171 | ClearPageReserved(page); | 171 | ClearPageReserved(page); |
| 172 | set_page_count(page, 1); | 172 | init_page_count(page); |
| 173 | __free_page(page); | 173 | __free_page(page); |
| 174 | totalram_pages++; | 174 | totalram_pages++; |
| 175 | } | 175 | } |
| @@ -210,7 +210,7 @@ void __init free_initmem(void) | |||
| 210 | /* next to check that the page we free is not a partial page */ | 210 | /* next to check that the page we free is not a partial page */ |
| 211 | for (addr = start; addr < end; addr += PAGE_SIZE) { | 211 | for (addr = start; addr < end; addr += PAGE_SIZE) { |
| 212 | ClearPageReserved(virt_to_page(addr)); | 212 | ClearPageReserved(virt_to_page(addr)); |
| 213 | set_page_count(virt_to_page(addr), 1); | 213 | init_page_count(virt_to_page(addr)); |
| 214 | free_page(addr); | 214 | free_page(addr); |
| 215 | totalram_pages++; | 215 | totalram_pages++; |
| 216 | } | 216 | } |
| @@ -230,7 +230,7 @@ void __init free_initrd_mem(unsigned long start, unsigned long end) | |||
| 230 | int pages = 0; | 230 | int pages = 0; |
| 231 | for (; start < end; start += PAGE_SIZE) { | 231 | for (; start < end; start += PAGE_SIZE) { |
| 232 | ClearPageReserved(virt_to_page(start)); | 232 | ClearPageReserved(virt_to_page(start)); |
| 233 | set_page_count(virt_to_page(start), 1); | 233 | init_page_count(virt_to_page(start)); |
| 234 | free_page(start); | 234 | free_page(start); |
| 235 | totalram_pages++; | 235 | totalram_pages++; |
| 236 | pages++; | 236 | pages++; |
diff --git a/arch/h8300/kernel/h8300_ksyms.c b/arch/h8300/kernel/h8300_ksyms.c index 5cc76efaf7aa..69d6ad32d56c 100644 --- a/arch/h8300/kernel/h8300_ksyms.c +++ b/arch/h8300/kernel/h8300_ksyms.c | |||
| @@ -25,6 +25,7 @@ extern char h8300_debug_device[]; | |||
| 25 | /* platform dependent support */ | 25 | /* platform dependent support */ |
| 26 | 26 | ||
| 27 | EXPORT_SYMBOL(strnlen); | 27 | EXPORT_SYMBOL(strnlen); |
| 28 | EXPORT_SYMBOL(strpbrk); | ||
| 28 | EXPORT_SYMBOL(strrchr); | 29 | EXPORT_SYMBOL(strrchr); |
| 29 | EXPORT_SYMBOL(strstr); | 30 | EXPORT_SYMBOL(strstr); |
| 30 | EXPORT_SYMBOL(strchr); | 31 | EXPORT_SYMBOL(strchr); |
diff --git a/arch/h8300/mm/init.c b/arch/h8300/mm/init.c index 1e0929ddc8c4..09efc4b1f038 100644 --- a/arch/h8300/mm/init.c +++ b/arch/h8300/mm/init.c | |||
| @@ -196,7 +196,7 @@ void free_initrd_mem(unsigned long start, unsigned long end) | |||
| 196 | int pages = 0; | 196 | int pages = 0; |
| 197 | for (; start < end; start += PAGE_SIZE) { | 197 | for (; start < end; start += PAGE_SIZE) { |
| 198 | ClearPageReserved(virt_to_page(start)); | 198 | ClearPageReserved(virt_to_page(start)); |
| 199 | set_page_count(virt_to_page(start), 1); | 199 | init_page_count(virt_to_page(start)); |
| 200 | free_page(start); | 200 | free_page(start); |
| 201 | totalram_pages++; | 201 | totalram_pages++; |
| 202 | pages++; | 202 | pages++; |
| @@ -219,7 +219,7 @@ free_initmem() | |||
| 219 | /* next to check that the page we free is not a partial page */ | 219 | /* next to check that the page we free is not a partial page */ |
| 220 | for (; addr + PAGE_SIZE < (unsigned long)(&__init_end); addr +=PAGE_SIZE) { | 220 | for (; addr + PAGE_SIZE < (unsigned long)(&__init_end); addr +=PAGE_SIZE) { |
| 221 | ClearPageReserved(virt_to_page(addr)); | 221 | ClearPageReserved(virt_to_page(addr)); |
| 222 | set_page_count(virt_to_page(addr), 1); | 222 | init_page_count(virt_to_page(addr)); |
| 223 | free_page(addr); | 223 | free_page(addr); |
| 224 | totalram_pages++; | 224 | totalram_pages++; |
| 225 | } | 225 | } |
diff --git a/arch/i386/kernel/efi.c b/arch/i386/kernel/efi.c index c9cad7ba0d2d..aeabb4196861 100644 --- a/arch/i386/kernel/efi.c +++ b/arch/i386/kernel/efi.c | |||
| @@ -115,7 +115,7 @@ static void efi_call_phys_epilog(void) | |||
| 115 | unsigned long cr4; | 115 | unsigned long cr4; |
| 116 | struct Xgt_desc_struct *cpu_gdt_descr = &per_cpu(cpu_gdt_descr, 0); | 116 | struct Xgt_desc_struct *cpu_gdt_descr = &per_cpu(cpu_gdt_descr, 0); |
| 117 | 117 | ||
| 118 | cpu_gdt_descr->address = __va(cpu_gdt_descr->address); | 118 | cpu_gdt_descr->address = (unsigned long)__va(cpu_gdt_descr->address); |
| 119 | load_gdt(cpu_gdt_descr); | 119 | load_gdt(cpu_gdt_descr); |
| 120 | 120 | ||
| 121 | cr4 = read_cr4(); | 121 | cr4 = read_cr4(); |
diff --git a/arch/i386/kernel/smp.c b/arch/i386/kernel/smp.c index 218d725a5a1e..d134e9643a58 100644 --- a/arch/i386/kernel/smp.c +++ b/arch/i386/kernel/smp.c | |||
| @@ -504,27 +504,23 @@ void unlock_ipi_call_lock(void) | |||
| 504 | spin_unlock_irq(&call_lock); | 504 | spin_unlock_irq(&call_lock); |
| 505 | } | 505 | } |
| 506 | 506 | ||
| 507 | static struct call_data_struct * call_data; | 507 | static struct call_data_struct *call_data; |
| 508 | 508 | ||
| 509 | /* | 509 | /** |
| 510 | * this function sends a 'generic call function' IPI to all other CPUs | 510 | * smp_call_function(): Run a function on all other CPUs. |
| 511 | * in the system. | 511 | * @func: The function to run. This must be fast and non-blocking. |
| 512 | */ | 512 | * @info: An arbitrary pointer to pass to the function. |
| 513 | 513 | * @nonatomic: currently unused. | |
| 514 | int smp_call_function (void (*func) (void *info), void *info, int nonatomic, | 514 | * @wait: If true, wait (atomically) until function has completed on other CPUs. |
| 515 | int wait) | 515 | * |
| 516 | /* | 516 | * Returns 0 on success, else a negative status code. Does not return until |
| 517 | * [SUMMARY] Run a function on all other CPUs. | ||
| 518 | * <func> The function to run. This must be fast and non-blocking. | ||
| 519 | * <info> An arbitrary pointer to pass to the function. | ||
| 520 | * <nonatomic> currently unused. | ||
| 521 | * <wait> If true, wait (atomically) until function has completed on other CPUs. | ||
| 522 | * [RETURNS] 0 on success, else a negative status code. Does not return until | ||
| 523 | * remote CPUs are nearly ready to execute <<func>> or are or have executed. | 517 | * remote CPUs are nearly ready to execute <<func>> or are or have executed. |
| 524 | * | 518 | * |
| 525 | * You must not call this function with disabled interrupts or from a | 519 | * You must not call this function with disabled interrupts or from a |
| 526 | * hardware interrupt handler or from a bottom half handler. | 520 | * hardware interrupt handler or from a bottom half handler. |
| 527 | */ | 521 | */ |
| 522 | int smp_call_function (void (*func) (void *info), void *info, int nonatomic, | ||
| 523 | int wait) | ||
| 528 | { | 524 | { |
| 529 | struct call_data_struct data; | 525 | struct call_data_struct data; |
| 530 | int cpus; | 526 | int cpus; |
diff --git a/arch/i386/kernel/sys_i386.c b/arch/i386/kernel/sys_i386.c index a4a61976ecb9..8fdb1fb17a5f 100644 --- a/arch/i386/kernel/sys_i386.c +++ b/arch/i386/kernel/sys_i386.c | |||
| @@ -40,14 +40,13 @@ asmlinkage int sys_pipe(unsigned long __user * fildes) | |||
| 40 | return error; | 40 | return error; |
| 41 | } | 41 | } |
| 42 | 42 | ||
| 43 | /* common code for old and new mmaps */ | 43 | asmlinkage long sys_mmap2(unsigned long addr, unsigned long len, |
| 44 | static inline long do_mmap2( | 44 | unsigned long prot, unsigned long flags, |
| 45 | unsigned long addr, unsigned long len, | 45 | unsigned long fd, unsigned long pgoff) |
| 46 | unsigned long prot, unsigned long flags, | ||
| 47 | unsigned long fd, unsigned long pgoff) | ||
| 48 | { | 46 | { |
| 49 | int error = -EBADF; | 47 | int error = -EBADF; |
| 50 | struct file * file = NULL; | 48 | struct file *file = NULL; |
| 49 | struct mm_struct *mm = current->mm; | ||
| 51 | 50 | ||
| 52 | flags &= ~(MAP_EXECUTABLE | MAP_DENYWRITE); | 51 | flags &= ~(MAP_EXECUTABLE | MAP_DENYWRITE); |
| 53 | if (!(flags & MAP_ANONYMOUS)) { | 52 | if (!(flags & MAP_ANONYMOUS)) { |
| @@ -56,9 +55,9 @@ static inline long do_mmap2( | |||
| 56 | goto out; | 55 | goto out; |
| 57 | } | 56 | } |
| 58 | 57 | ||
| 59 | down_write(¤t->mm->mmap_sem); | 58 | down_write(&mm->mmap_sem); |
| 60 | error = do_mmap_pgoff(file, addr, len, prot, flags, pgoff); | 59 | error = do_mmap_pgoff(file, addr, len, prot, flags, pgoff); |
| 61 | up_write(¤t->mm->mmap_sem); | 60 | up_write(&mm->mmap_sem); |
| 62 | 61 | ||
| 63 | if (file) | 62 | if (file) |
| 64 | fput(file); | 63 | fput(file); |
| @@ -66,13 +65,6 @@ out: | |||
| 66 | return error; | 65 | return error; |
| 67 | } | 66 | } |
| 68 | 67 | ||
| 69 | asmlinkage long sys_mmap2(unsigned long addr, unsigned long len, | ||
| 70 | unsigned long prot, unsigned long flags, | ||
| 71 | unsigned long fd, unsigned long pgoff) | ||
| 72 | { | ||
| 73 | return do_mmap2(addr, len, prot, flags, fd, pgoff); | ||
| 74 | } | ||
| 75 | |||
| 76 | /* | 68 | /* |
| 77 | * Perform the select(nd, in, out, ex, tv) and mmap() system | 69 | * Perform the select(nd, in, out, ex, tv) and mmap() system |
| 78 | * calls. Linux/i386 didn't use to be able to handle more than | 70 | * calls. Linux/i386 didn't use to be able to handle more than |
| @@ -101,7 +93,8 @@ asmlinkage int old_mmap(struct mmap_arg_struct __user *arg) | |||
| 101 | if (a.offset & ~PAGE_MASK) | 93 | if (a.offset & ~PAGE_MASK) |
| 102 | goto out; | 94 | goto out; |
| 103 | 95 | ||
| 104 | err = do_mmap2(a.addr, a.len, a.prot, a.flags, a.fd, a.offset >> PAGE_SHIFT); | 96 | err = sys_mmap2(a.addr, a.len, a.prot, a.flags, |
| 97 | a.fd, a.offset >> PAGE_SHIFT); | ||
| 105 | out: | 98 | out: |
| 106 | return err; | 99 | return err; |
| 107 | } | 100 | } |
diff --git a/arch/i386/kernel/timers/timer_hpet.c b/arch/i386/kernel/timers/timer_hpet.c index be242723c339..17a6fe7166e7 100644 --- a/arch/i386/kernel/timers/timer_hpet.c +++ b/arch/i386/kernel/timers/timer_hpet.c | |||
| @@ -46,7 +46,7 @@ static seqlock_t monotonic_lock = SEQLOCK_UNLOCKED; | |||
| 46 | * | 46 | * |
| 47 | * -johnstul@us.ibm.com "math is hard, lets go shopping!" | 47 | * -johnstul@us.ibm.com "math is hard, lets go shopping!" |
| 48 | */ | 48 | */ |
| 49 | static unsigned long cyc2ns_scale; | 49 | static unsigned long cyc2ns_scale __read_mostly; |
| 50 | #define CYC2NS_SCALE_FACTOR 10 /* 2^10, carefully chosen */ | 50 | #define CYC2NS_SCALE_FACTOR 10 /* 2^10, carefully chosen */ |
| 51 | 51 | ||
| 52 | static inline void set_cyc2ns_scale(unsigned long cpu_khz) | 52 | static inline void set_cyc2ns_scale(unsigned long cpu_khz) |
diff --git a/arch/i386/kernel/timers/timer_tsc.c b/arch/i386/kernel/timers/timer_tsc.c index a7f5a2aceba2..5e41ee29c8cf 100644 --- a/arch/i386/kernel/timers/timer_tsc.c +++ b/arch/i386/kernel/timers/timer_tsc.c | |||
| @@ -74,7 +74,7 @@ late_initcall(start_lost_tick_compensation); | |||
| 74 | * | 74 | * |
| 75 | * -johnstul@us.ibm.com "math is hard, lets go shopping!" | 75 | * -johnstul@us.ibm.com "math is hard, lets go shopping!" |
| 76 | */ | 76 | */ |
| 77 | static unsigned long cyc2ns_scale; | 77 | static unsigned long cyc2ns_scale __read_mostly; |
| 78 | #define CYC2NS_SCALE_FACTOR 10 /* 2^10, carefully chosen */ | 78 | #define CYC2NS_SCALE_FACTOR 10 /* 2^10, carefully chosen */ |
| 79 | 79 | ||
| 80 | static inline void set_cyc2ns_scale(unsigned long cpu_khz) | 80 | static inline void set_cyc2ns_scale(unsigned long cpu_khz) |
diff --git a/arch/i386/mm/hugetlbpage.c b/arch/i386/mm/hugetlbpage.c index d524127c9afc..a7d891585411 100644 --- a/arch/i386/mm/hugetlbpage.c +++ b/arch/i386/mm/hugetlbpage.c | |||
| @@ -48,18 +48,6 @@ pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr) | |||
| 48 | return (pte_t *) pmd; | 48 | return (pte_t *) pmd; |
| 49 | } | 49 | } |
| 50 | 50 | ||
| 51 | /* | ||
| 52 | * This function checks for proper alignment of input addr and len parameters. | ||
| 53 | */ | ||
| 54 | int is_aligned_hugepage_range(unsigned long addr, unsigned long len) | ||
| 55 | { | ||
| 56 | if (len & ~HPAGE_MASK) | ||
| 57 | return -EINVAL; | ||
| 58 | if (addr & ~HPAGE_MASK) | ||
| 59 | return -EINVAL; | ||
| 60 | return 0; | ||
| 61 | } | ||
| 62 | |||
| 63 | #if 0 /* This is just for testing */ | 51 | #if 0 /* This is just for testing */ |
| 64 | struct page * | 52 | struct page * |
| 65 | follow_huge_addr(struct mm_struct *mm, unsigned long address, int write) | 53 | follow_huge_addr(struct mm_struct *mm, unsigned long address, int write) |
diff --git a/arch/i386/mm/init.c b/arch/i386/mm/init.c index 2700f01994ba..7ba55a6e2dbc 100644 --- a/arch/i386/mm/init.c +++ b/arch/i386/mm/init.c | |||
| @@ -270,7 +270,7 @@ static void __init permanent_kmaps_init(pgd_t *pgd_base) | |||
| 270 | 270 | ||
| 271 | static void __meminit free_new_highpage(struct page *page) | 271 | static void __meminit free_new_highpage(struct page *page) |
| 272 | { | 272 | { |
| 273 | set_page_count(page, 1); | 273 | init_page_count(page); |
| 274 | __free_page(page); | 274 | __free_page(page); |
| 275 | totalhigh_pages++; | 275 | totalhigh_pages++; |
| 276 | } | 276 | } |
| @@ -727,7 +727,7 @@ void free_initmem(void) | |||
| 727 | addr = (unsigned long)(&__init_begin); | 727 | addr = (unsigned long)(&__init_begin); |
| 728 | for (; addr < (unsigned long)(&__init_end); addr += PAGE_SIZE) { | 728 | for (; addr < (unsigned long)(&__init_end); addr += PAGE_SIZE) { |
| 729 | ClearPageReserved(virt_to_page(addr)); | 729 | ClearPageReserved(virt_to_page(addr)); |
| 730 | set_page_count(virt_to_page(addr), 1); | 730 | init_page_count(virt_to_page(addr)); |
| 731 | memset((void *)addr, 0xcc, PAGE_SIZE); | 731 | memset((void *)addr, 0xcc, PAGE_SIZE); |
| 732 | free_page(addr); | 732 | free_page(addr); |
| 733 | totalram_pages++; | 733 | totalram_pages++; |
| @@ -766,7 +766,7 @@ void free_initrd_mem(unsigned long start, unsigned long end) | |||
| 766 | printk (KERN_INFO "Freeing initrd memory: %ldk freed\n", (end - start) >> 10); | 766 | printk (KERN_INFO "Freeing initrd memory: %ldk freed\n", (end - start) >> 10); |
| 767 | for (; start < end; start += PAGE_SIZE) { | 767 | for (; start < end; start += PAGE_SIZE) { |
| 768 | ClearPageReserved(virt_to_page(start)); | 768 | ClearPageReserved(virt_to_page(start)); |
| 769 | set_page_count(virt_to_page(start), 1); | 769 | init_page_count(virt_to_page(start)); |
| 770 | free_page(start); | 770 | free_page(start); |
| 771 | totalram_pages++; | 771 | totalram_pages++; |
| 772 | } | 772 | } |
diff --git a/arch/i386/mm/pageattr.c b/arch/i386/mm/pageattr.c index d0cadb33b54c..92c3d9f0e731 100644 --- a/arch/i386/mm/pageattr.c +++ b/arch/i386/mm/pageattr.c | |||
| @@ -51,6 +51,13 @@ static struct page *split_large_page(unsigned long address, pgprot_t prot, | |||
| 51 | if (!base) | 51 | if (!base) |
| 52 | return NULL; | 52 | return NULL; |
| 53 | 53 | ||
| 54 | /* | ||
| 55 | * page_private is used to track the number of entries in | ||
| 56 | * the page table page that have non standard attributes. | ||
| 57 | */ | ||
| 58 | SetPagePrivate(base); | ||
| 59 | page_private(base) = 0; | ||
| 60 | |||
| 54 | address = __pa(address); | 61 | address = __pa(address); |
| 55 | addr = address & LARGE_PAGE_MASK; | 62 | addr = address & LARGE_PAGE_MASK; |
| 56 | pbase = (pte_t *)page_address(base); | 63 | pbase = (pte_t *)page_address(base); |
| @@ -143,11 +150,12 @@ __change_page_attr(struct page *page, pgprot_t prot) | |||
| 143 | return -ENOMEM; | 150 | return -ENOMEM; |
| 144 | set_pmd_pte(kpte,address,mk_pte(split, ref_prot)); | 151 | set_pmd_pte(kpte,address,mk_pte(split, ref_prot)); |
| 145 | kpte_page = split; | 152 | kpte_page = split; |
| 146 | } | 153 | } |
| 147 | get_page(kpte_page); | 154 | page_private(kpte_page)++; |
| 148 | } else if ((pte_val(*kpte) & _PAGE_PSE) == 0) { | 155 | } else if ((pte_val(*kpte) & _PAGE_PSE) == 0) { |
| 149 | set_pte_atomic(kpte, mk_pte(page, PAGE_KERNEL)); | 156 | set_pte_atomic(kpte, mk_pte(page, PAGE_KERNEL)); |
| 150 | __put_page(kpte_page); | 157 | BUG_ON(page_private(kpte_page) == 0); |
| 158 | page_private(kpte_page)--; | ||
| 151 | } else | 159 | } else |
| 152 | BUG(); | 160 | BUG(); |
| 153 | 161 | ||
| @@ -157,10 +165,8 @@ __change_page_attr(struct page *page, pgprot_t prot) | |||
| 157 | * replace it with a largepage. | 165 | * replace it with a largepage. |
| 158 | */ | 166 | */ |
| 159 | if (!PageReserved(kpte_page)) { | 167 | if (!PageReserved(kpte_page)) { |
| 160 | /* memleak and potential failed 2M page regeneration */ | 168 | if (cpu_has_pse && (page_private(kpte_page) == 0)) { |
| 161 | BUG_ON(!page_count(kpte_page)); | 169 | ClearPagePrivate(kpte_page); |
| 162 | |||
| 163 | if (cpu_has_pse && (page_count(kpte_page) == 1)) { | ||
| 164 | list_add(&kpte_page->lru, &df_list); | 170 | list_add(&kpte_page->lru, &df_list); |
| 165 | revert_page(kpte_page, address); | 171 | revert_page(kpte_page, address); |
| 166 | } | 172 | } |
diff --git a/arch/ia64/Kconfig b/arch/ia64/Kconfig index a85ea9d37f05..ff7ae6b664e8 100644 --- a/arch/ia64/Kconfig +++ b/arch/ia64/Kconfig | |||
| @@ -271,6 +271,25 @@ config SCHED_SMT | |||
| 271 | Intel IA64 chips with MultiThreading at a cost of slightly increased | 271 | Intel IA64 chips with MultiThreading at a cost of slightly increased |
| 272 | overhead in some places. If unsure say N here. | 272 | overhead in some places. If unsure say N here. |
| 273 | 273 | ||
| 274 | config PERMIT_BSP_REMOVE | ||
| 275 | bool "Support removal of Bootstrap Processor" | ||
| 276 | depends on HOTPLUG_CPU | ||
| 277 | default n | ||
| 278 | ---help--- | ||
| 279 | Say Y here if your platform SAL will support removal of BSP with HOTPLUG_CPU | ||
| 280 | support. | ||
| 281 | |||
| 282 | config FORCE_CPEI_RETARGET | ||
| 283 | bool "Force assumption that CPEI can be re-targetted" | ||
| 284 | depends on PERMIT_BSP_REMOVE | ||
| 285 | default n | ||
| 286 | ---help--- | ||
| 287 | Say Y if you need to force the assumption that CPEI can be re-targetted to | ||
| 288 | any cpu in the system. This hint is available via ACPI 3.0 specifications. | ||
| 289 | Tiger4 systems are capable of re-directing CPEI to any CPU other than BSP. | ||
| 290 | This option it useful to enable this feature on older BIOS's as well. | ||
| 291 | You can also enable this by using boot command line option force_cpei=1. | ||
| 292 | |||
| 274 | config PREEMPT | 293 | config PREEMPT |
| 275 | bool "Preemptible Kernel" | 294 | bool "Preemptible Kernel" |
| 276 | help | 295 | help |
diff --git a/arch/ia64/configs/tiger_defconfig b/arch/ia64/configs/tiger_defconfig index 125568118b84..766bf4955432 100644 --- a/arch/ia64/configs/tiger_defconfig +++ b/arch/ia64/configs/tiger_defconfig | |||
| @@ -116,6 +116,8 @@ CONFIG_FORCE_MAX_ZONEORDER=17 | |||
| 116 | CONFIG_SMP=y | 116 | CONFIG_SMP=y |
| 117 | CONFIG_NR_CPUS=4 | 117 | CONFIG_NR_CPUS=4 |
| 118 | CONFIG_HOTPLUG_CPU=y | 118 | CONFIG_HOTPLUG_CPU=y |
| 119 | CONFIG_PERMIT_BSP_REMOVE=y | ||
| 120 | CONFIG_FORCE_CPEI_RETARGET=y | ||
| 119 | # CONFIG_SCHED_SMT is not set | 121 | # CONFIG_SCHED_SMT is not set |
| 120 | # CONFIG_PREEMPT is not set | 122 | # CONFIG_PREEMPT is not set |
| 121 | CONFIG_SELECT_MEMORY_MODEL=y | 123 | CONFIG_SELECT_MEMORY_MODEL=y |
diff --git a/arch/ia64/kernel/acpi.c b/arch/ia64/kernel/acpi.c index ecd44bdc8394..4722ec51c70c 100644 --- a/arch/ia64/kernel/acpi.c +++ b/arch/ia64/kernel/acpi.c | |||
| @@ -284,19 +284,24 @@ acpi_parse_plat_int_src(acpi_table_entry_header * header, | |||
| 284 | return 0; | 284 | return 0; |
| 285 | } | 285 | } |
| 286 | 286 | ||
| 287 | #ifdef CONFIG_HOTPLUG_CPU | ||
| 287 | unsigned int can_cpei_retarget(void) | 288 | unsigned int can_cpei_retarget(void) |
| 288 | { | 289 | { |
| 289 | extern int cpe_vector; | 290 | extern int cpe_vector; |
| 291 | extern unsigned int force_cpei_retarget; | ||
| 290 | 292 | ||
| 291 | /* | 293 | /* |
| 292 | * Only if CPEI is supported and the override flag | 294 | * Only if CPEI is supported and the override flag |
| 293 | * is present, otherwise return that its re-targettable | 295 | * is present, otherwise return that its re-targettable |
| 294 | * if we are in polling mode. | 296 | * if we are in polling mode. |
| 295 | */ | 297 | */ |
| 296 | if (cpe_vector > 0 && !acpi_cpei_override) | 298 | if (cpe_vector > 0) { |
| 297 | return 0; | 299 | if (acpi_cpei_override || force_cpei_retarget) |
| 298 | else | 300 | return 1; |
| 299 | return 1; | 301 | else |
| 302 | return 0; | ||
| 303 | } | ||
| 304 | return 1; | ||
| 300 | } | 305 | } |
| 301 | 306 | ||
| 302 | unsigned int is_cpu_cpei_target(unsigned int cpu) | 307 | unsigned int is_cpu_cpei_target(unsigned int cpu) |
| @@ -315,6 +320,7 @@ void set_cpei_target_cpu(unsigned int cpu) | |||
| 315 | { | 320 | { |
| 316 | acpi_cpei_phys_cpuid = cpu_physical_id(cpu); | 321 | acpi_cpei_phys_cpuid = cpu_physical_id(cpu); |
| 317 | } | 322 | } |
| 323 | #endif | ||
| 318 | 324 | ||
| 319 | unsigned int get_cpei_target_cpu(void) | 325 | unsigned int get_cpei_target_cpu(void) |
| 320 | { | 326 | { |
diff --git a/arch/ia64/kernel/entry.S b/arch/ia64/kernel/entry.S index 930fdfca6ddb..0e3eda99e549 100644 --- a/arch/ia64/kernel/entry.S +++ b/arch/ia64/kernel/entry.S | |||
| @@ -1102,9 +1102,6 @@ skip_rbs_switch: | |||
| 1102 | st8 [r2]=r8 | 1102 | st8 [r2]=r8 |
| 1103 | st8 [r3]=r10 | 1103 | st8 [r3]=r10 |
| 1104 | .work_pending: | 1104 | .work_pending: |
| 1105 | tbit.nz p6,p0=r31,TIF_SIGDELAYED // signal delayed from MCA/INIT/NMI/PMI context? | ||
| 1106 | (p6) br.cond.sptk.few .sigdelayed | ||
| 1107 | ;; | ||
| 1108 | tbit.z p6,p0=r31,TIF_NEED_RESCHED // current_thread_info()->need_resched==0? | 1105 | tbit.z p6,p0=r31,TIF_NEED_RESCHED // current_thread_info()->need_resched==0? |
| 1109 | (p6) br.cond.sptk.few .notify | 1106 | (p6) br.cond.sptk.few .notify |
| 1110 | #ifdef CONFIG_PREEMPT | 1107 | #ifdef CONFIG_PREEMPT |
| @@ -1131,17 +1128,6 @@ skip_rbs_switch: | |||
| 1131 | (pLvSys)br.cond.sptk.few .work_pending_syscall_end | 1128 | (pLvSys)br.cond.sptk.few .work_pending_syscall_end |
| 1132 | br.cond.sptk.many .work_processed_kernel // don't re-check | 1129 | br.cond.sptk.many .work_processed_kernel // don't re-check |
| 1133 | 1130 | ||
| 1134 | // There is a delayed signal that was detected in MCA/INIT/NMI/PMI context where | ||
| 1135 | // it could not be delivered. Deliver it now. The signal might be for us and | ||
| 1136 | // may set TIF_SIGPENDING, so redrive ia64_leave_* after processing the delayed | ||
| 1137 | // signal. | ||
| 1138 | |||
| 1139 | .sigdelayed: | ||
| 1140 | br.call.sptk.many rp=do_sigdelayed | ||
| 1141 | cmp.eq p6,p0=r0,r0 // p6 <- 1, always re-check | ||
| 1142 | (pLvSys)br.cond.sptk.few .work_pending_syscall_end | ||
| 1143 | br.cond.sptk.many .work_processed_kernel // re-check | ||
| 1144 | |||
| 1145 | .work_pending_syscall_end: | 1131 | .work_pending_syscall_end: |
| 1146 | adds r2=PT(R8)+16,r12 | 1132 | adds r2=PT(R8)+16,r12 |
| 1147 | adds r3=PT(R10)+16,r12 | 1133 | adds r3=PT(R10)+16,r12 |
diff --git a/arch/ia64/kernel/iosapic.c b/arch/ia64/kernel/iosapic.c index 574084f343fa..8832c553230a 100644 --- a/arch/ia64/kernel/iosapic.c +++ b/arch/ia64/kernel/iosapic.c | |||
| @@ -631,6 +631,7 @@ get_target_cpu (unsigned int gsi, int vector) | |||
| 631 | { | 631 | { |
| 632 | #ifdef CONFIG_SMP | 632 | #ifdef CONFIG_SMP |
| 633 | static int cpu = -1; | 633 | static int cpu = -1; |
| 634 | extern int cpe_vector; | ||
| 634 | 635 | ||
| 635 | /* | 636 | /* |
| 636 | * In case of vector shared by multiple RTEs, all RTEs that | 637 | * In case of vector shared by multiple RTEs, all RTEs that |
| @@ -653,6 +654,11 @@ get_target_cpu (unsigned int gsi, int vector) | |||
| 653 | if (!cpu_online(smp_processor_id())) | 654 | if (!cpu_online(smp_processor_id())) |
| 654 | return cpu_physical_id(smp_processor_id()); | 655 | return cpu_physical_id(smp_processor_id()); |
| 655 | 656 | ||
| 657 | #ifdef CONFIG_ACPI | ||
| 658 | if (cpe_vector > 0 && vector == IA64_CPEP_VECTOR) | ||
| 659 | return get_cpei_target_cpu(); | ||
| 660 | #endif | ||
| 661 | |||
| 656 | #ifdef CONFIG_NUMA | 662 | #ifdef CONFIG_NUMA |
| 657 | { | 663 | { |
| 658 | int num_cpus, cpu_index, iosapic_index, numa_cpu, i = 0; | 664 | int num_cpus, cpu_index, iosapic_index, numa_cpu, i = 0; |
diff --git a/arch/ia64/kernel/irq.c b/arch/ia64/kernel/irq.c index d33244c32759..5ce908ef9c95 100644 --- a/arch/ia64/kernel/irq.c +++ b/arch/ia64/kernel/irq.c | |||
| @@ -163,8 +163,19 @@ void fixup_irqs(void) | |||
| 163 | { | 163 | { |
| 164 | unsigned int irq; | 164 | unsigned int irq; |
| 165 | extern void ia64_process_pending_intr(void); | 165 | extern void ia64_process_pending_intr(void); |
| 166 | extern void ia64_disable_timer(void); | ||
| 167 | extern volatile int time_keeper_id; | ||
| 168 | |||
| 169 | ia64_disable_timer(); | ||
| 170 | |||
| 171 | /* | ||
| 172 | * Find a new timesync master | ||
| 173 | */ | ||
| 174 | if (smp_processor_id() == time_keeper_id) { | ||
| 175 | time_keeper_id = first_cpu(cpu_online_map); | ||
| 176 | printk ("CPU %d is now promoted to time-keeper master\n", time_keeper_id); | ||
| 177 | } | ||
| 166 | 178 | ||
| 167 | ia64_set_itv(1<<16); | ||
| 168 | /* | 179 | /* |
| 169 | * Phase 1: Locate irq's bound to this cpu and | 180 | * Phase 1: Locate irq's bound to this cpu and |
| 170 | * relocate them for cpu removal. | 181 | * relocate them for cpu removal. |
diff --git a/arch/ia64/kernel/mca.c b/arch/ia64/kernel/mca.c index ee7eec9ee576..b57e723f194c 100644 --- a/arch/ia64/kernel/mca.c +++ b/arch/ia64/kernel/mca.c | |||
| @@ -281,14 +281,10 @@ ia64_mca_log_sal_error_record(int sal_info_type) | |||
| 281 | ia64_sal_clear_state_info(sal_info_type); | 281 | ia64_sal_clear_state_info(sal_info_type); |
| 282 | } | 282 | } |
| 283 | 283 | ||
| 284 | /* | ||
| 285 | * platform dependent error handling | ||
| 286 | */ | ||
| 287 | #ifndef PLATFORM_MCA_HANDLERS | ||
| 288 | |||
| 289 | #ifdef CONFIG_ACPI | 284 | #ifdef CONFIG_ACPI |
| 290 | 285 | ||
| 291 | int cpe_vector = -1; | 286 | int cpe_vector = -1; |
| 287 | int ia64_cpe_irq = -1; | ||
| 292 | 288 | ||
| 293 | static irqreturn_t | 289 | static irqreturn_t |
| 294 | ia64_mca_cpe_int_handler (int cpe_irq, void *arg, struct pt_regs *ptregs) | 290 | ia64_mca_cpe_int_handler (int cpe_irq, void *arg, struct pt_regs *ptregs) |
| @@ -377,8 +373,6 @@ ia64_mca_register_cpev (int cpev) | |||
| 377 | } | 373 | } |
| 378 | #endif /* CONFIG_ACPI */ | 374 | #endif /* CONFIG_ACPI */ |
| 379 | 375 | ||
| 380 | #endif /* PLATFORM_MCA_HANDLERS */ | ||
| 381 | |||
| 382 | /* | 376 | /* |
| 383 | * ia64_mca_cmc_vector_setup | 377 | * ia64_mca_cmc_vector_setup |
| 384 | * | 378 | * |
| @@ -630,6 +624,32 @@ copy_reg(const u64 *fr, u64 fnat, u64 *tr, u64 *tnat) | |||
| 630 | *tnat |= (nat << tslot); | 624 | *tnat |= (nat << tslot); |
| 631 | } | 625 | } |
| 632 | 626 | ||
| 627 | /* Change the comm field on the MCA/INT task to include the pid that | ||
| 628 | * was interrupted, it makes for easier debugging. If that pid was 0 | ||
| 629 | * (swapper or nested MCA/INIT) then use the start of the previous comm | ||
| 630 | * field suffixed with its cpu. | ||
| 631 | */ | ||
| 632 | |||
| 633 | static void | ||
| 634 | ia64_mca_modify_comm(const task_t *previous_current) | ||
| 635 | { | ||
| 636 | char *p, comm[sizeof(current->comm)]; | ||
| 637 | if (previous_current->pid) | ||
| 638 | snprintf(comm, sizeof(comm), "%s %d", | ||
| 639 | current->comm, previous_current->pid); | ||
| 640 | else { | ||
| 641 | int l; | ||
| 642 | if ((p = strchr(previous_current->comm, ' '))) | ||
| 643 | l = p - previous_current->comm; | ||
| 644 | else | ||
| 645 | l = strlen(previous_current->comm); | ||
| 646 | snprintf(comm, sizeof(comm), "%s %*s %d", | ||
| 647 | current->comm, l, previous_current->comm, | ||
| 648 | task_thread_info(previous_current)->cpu); | ||
| 649 | } | ||
| 650 | memcpy(current->comm, comm, sizeof(current->comm)); | ||
| 651 | } | ||
| 652 | |||
| 633 | /* On entry to this routine, we are running on the per cpu stack, see | 653 | /* On entry to this routine, we are running on the per cpu stack, see |
| 634 | * mca_asm.h. The original stack has not been touched by this event. Some of | 654 | * mca_asm.h. The original stack has not been touched by this event. Some of |
| 635 | * the original stack's registers will be in the RBS on this stack. This stack | 655 | * the original stack's registers will be in the RBS on this stack. This stack |
| @@ -648,7 +668,7 @@ ia64_mca_modify_original_stack(struct pt_regs *regs, | |||
| 648 | struct ia64_sal_os_state *sos, | 668 | struct ia64_sal_os_state *sos, |
| 649 | const char *type) | 669 | const char *type) |
| 650 | { | 670 | { |
| 651 | char *p, comm[sizeof(current->comm)]; | 671 | char *p; |
| 652 | ia64_va va; | 672 | ia64_va va; |
| 653 | extern char ia64_leave_kernel[]; /* Need asm address, not function descriptor */ | 673 | extern char ia64_leave_kernel[]; /* Need asm address, not function descriptor */ |
| 654 | const pal_min_state_area_t *ms = sos->pal_min_state; | 674 | const pal_min_state_area_t *ms = sos->pal_min_state; |
| @@ -721,6 +741,10 @@ ia64_mca_modify_original_stack(struct pt_regs *regs, | |||
| 721 | /* Verify the previous stack state before we change it */ | 741 | /* Verify the previous stack state before we change it */ |
| 722 | if (user_mode(regs)) { | 742 | if (user_mode(regs)) { |
| 723 | msg = "occurred in user space"; | 743 | msg = "occurred in user space"; |
| 744 | /* previous_current is guaranteed to be valid when the task was | ||
| 745 | * in user space, so ... | ||
| 746 | */ | ||
| 747 | ia64_mca_modify_comm(previous_current); | ||
| 724 | goto no_mod; | 748 | goto no_mod; |
| 725 | } | 749 | } |
| 726 | if (r13 != sos->prev_IA64_KR_CURRENT) { | 750 | if (r13 != sos->prev_IA64_KR_CURRENT) { |
| @@ -750,25 +774,7 @@ ia64_mca_modify_original_stack(struct pt_regs *regs, | |||
| 750 | goto no_mod; | 774 | goto no_mod; |
| 751 | } | 775 | } |
| 752 | 776 | ||
| 753 | /* Change the comm field on the MCA/INT task to include the pid that | 777 | ia64_mca_modify_comm(previous_current); |
| 754 | * was interrupted, it makes for easier debugging. If that pid was 0 | ||
| 755 | * (swapper or nested MCA/INIT) then use the start of the previous comm | ||
| 756 | * field suffixed with its cpu. | ||
| 757 | */ | ||
| 758 | if (previous_current->pid) | ||
| 759 | snprintf(comm, sizeof(comm), "%s %d", | ||
| 760 | current->comm, previous_current->pid); | ||
| 761 | else { | ||
| 762 | int l; | ||
| 763 | if ((p = strchr(previous_current->comm, ' '))) | ||
| 764 | l = p - previous_current->comm; | ||
| 765 | else | ||
| 766 | l = strlen(previous_current->comm); | ||
| 767 | snprintf(comm, sizeof(comm), "%s %*s %d", | ||
| 768 | current->comm, l, previous_current->comm, | ||
| 769 | task_thread_info(previous_current)->cpu); | ||
| 770 | } | ||
| 771 | memcpy(current->comm, comm, sizeof(current->comm)); | ||
| 772 | 778 | ||
| 773 | /* Make the original task look blocked. First stack a struct pt_regs, | 779 | /* Make the original task look blocked. First stack a struct pt_regs, |
| 774 | * describing the state at the time of interrupt. mca_asm.S built a | 780 | * describing the state at the time of interrupt. mca_asm.S built a |
| @@ -908,7 +914,7 @@ no_mod: | |||
| 908 | static void | 914 | static void |
| 909 | ia64_wait_for_slaves(int monarch) | 915 | ia64_wait_for_slaves(int monarch) |
| 910 | { | 916 | { |
| 911 | int c, wait = 0; | 917 | int c, wait = 0, missing = 0; |
| 912 | for_each_online_cpu(c) { | 918 | for_each_online_cpu(c) { |
| 913 | if (c == monarch) | 919 | if (c == monarch) |
| 914 | continue; | 920 | continue; |
| @@ -919,15 +925,32 @@ ia64_wait_for_slaves(int monarch) | |||
| 919 | } | 925 | } |
| 920 | } | 926 | } |
| 921 | if (!wait) | 927 | if (!wait) |
| 922 | return; | 928 | goto all_in; |
| 923 | for_each_online_cpu(c) { | 929 | for_each_online_cpu(c) { |
| 924 | if (c == monarch) | 930 | if (c == monarch) |
| 925 | continue; | 931 | continue; |
| 926 | if (ia64_mc_info.imi_rendez_checkin[c] == IA64_MCA_RENDEZ_CHECKIN_NOTDONE) { | 932 | if (ia64_mc_info.imi_rendez_checkin[c] == IA64_MCA_RENDEZ_CHECKIN_NOTDONE) { |
| 927 | udelay(5*1000000); /* wait 5 seconds for slaves (arbitrary) */ | 933 | udelay(5*1000000); /* wait 5 seconds for slaves (arbitrary) */ |
| 934 | if (ia64_mc_info.imi_rendez_checkin[c] == IA64_MCA_RENDEZ_CHECKIN_NOTDONE) | ||
| 935 | missing = 1; | ||
| 928 | break; | 936 | break; |
| 929 | } | 937 | } |
| 930 | } | 938 | } |
| 939 | if (!missing) | ||
| 940 | goto all_in; | ||
| 941 | printk(KERN_INFO "OS MCA slave did not rendezvous on cpu"); | ||
| 942 | for_each_online_cpu(c) { | ||
| 943 | if (c == monarch) | ||
| 944 | continue; | ||
| 945 | if (ia64_mc_info.imi_rendez_checkin[c] == IA64_MCA_RENDEZ_CHECKIN_NOTDONE) | ||
| 946 | printk(" %d", c); | ||
| 947 | } | ||
| 948 | printk("\n"); | ||
| 949 | return; | ||
| 950 | |||
| 951 | all_in: | ||
| 952 | printk(KERN_INFO "All OS MCA slaves have reached rendezvous\n"); | ||
| 953 | return; | ||
| 931 | } | 954 | } |
| 932 | 955 | ||
| 933 | /* | 956 | /* |
| @@ -953,6 +976,10 @@ ia64_mca_handler(struct pt_regs *regs, struct switch_stack *sw, | |||
| 953 | task_t *previous_current; | 976 | task_t *previous_current; |
| 954 | 977 | ||
| 955 | oops_in_progress = 1; /* FIXME: make printk NMI/MCA/INIT safe */ | 978 | oops_in_progress = 1; /* FIXME: make printk NMI/MCA/INIT safe */ |
| 979 | console_loglevel = 15; /* make sure printks make it to console */ | ||
| 980 | printk(KERN_INFO "Entered OS MCA handler. PSP=%lx cpu=%d monarch=%ld\n", | ||
| 981 | sos->proc_state_param, cpu, sos->monarch); | ||
| 982 | |||
| 956 | previous_current = ia64_mca_modify_original_stack(regs, sw, sos, "MCA"); | 983 | previous_current = ia64_mca_modify_original_stack(regs, sw, sos, "MCA"); |
| 957 | monarch_cpu = cpu; | 984 | monarch_cpu = cpu; |
| 958 | if (notify_die(DIE_MCA_MONARCH_ENTER, "MCA", regs, 0, 0, 0) | 985 | if (notify_die(DIE_MCA_MONARCH_ENTER, "MCA", regs, 0, 0, 0) |
| @@ -1444,11 +1471,13 @@ void __devinit | |||
| 1444 | ia64_mca_cpu_init(void *cpu_data) | 1471 | ia64_mca_cpu_init(void *cpu_data) |
| 1445 | { | 1472 | { |
| 1446 | void *pal_vaddr; | 1473 | void *pal_vaddr; |
| 1474 | static int first_time = 1; | ||
| 1447 | 1475 | ||
| 1448 | if (smp_processor_id() == 0) { | 1476 | if (first_time) { |
| 1449 | void *mca_data; | 1477 | void *mca_data; |
| 1450 | int cpu; | 1478 | int cpu; |
| 1451 | 1479 | ||
| 1480 | first_time = 0; | ||
| 1452 | mca_data = alloc_bootmem(sizeof(struct ia64_mca_cpu) | 1481 | mca_data = alloc_bootmem(sizeof(struct ia64_mca_cpu) |
| 1453 | * NR_CPUS + KERNEL_STACK_SIZE); | 1482 | * NR_CPUS + KERNEL_STACK_SIZE); |
| 1454 | mca_data = (void *)(((unsigned long)mca_data + | 1483 | mca_data = (void *)(((unsigned long)mca_data + |
| @@ -1704,6 +1733,7 @@ ia64_mca_late_init(void) | |||
| 1704 | desc = irq_descp(irq); | 1733 | desc = irq_descp(irq); |
| 1705 | desc->status |= IRQ_PER_CPU; | 1734 | desc->status |= IRQ_PER_CPU; |
| 1706 | setup_irq(irq, &mca_cpe_irqaction); | 1735 | setup_irq(irq, &mca_cpe_irqaction); |
| 1736 | ia64_cpe_irq = irq; | ||
| 1707 | } | 1737 | } |
| 1708 | ia64_mca_register_cpev(cpe_vector); | 1738 | ia64_mca_register_cpev(cpe_vector); |
| 1709 | IA64_MCA_DEBUG("%s: CPEI/P setup and enabled.\n", __FUNCTION__); | 1739 | IA64_MCA_DEBUG("%s: CPEI/P setup and enabled.\n", __FUNCTION__); |
diff --git a/arch/ia64/kernel/perfmon.c b/arch/ia64/kernel/perfmon.c index 9c5194b385da..077f21216b65 100644 --- a/arch/ia64/kernel/perfmon.c +++ b/arch/ia64/kernel/perfmon.c | |||
| @@ -6722,6 +6722,7 @@ __initcall(pfm_init); | |||
| 6722 | void | 6722 | void |
| 6723 | pfm_init_percpu (void) | 6723 | pfm_init_percpu (void) |
| 6724 | { | 6724 | { |
| 6725 | static int first_time=1; | ||
| 6725 | /* | 6726 | /* |
| 6726 | * make sure no measurement is active | 6727 | * make sure no measurement is active |
| 6727 | * (may inherit programmed PMCs from EFI). | 6728 | * (may inherit programmed PMCs from EFI). |
| @@ -6734,8 +6735,10 @@ pfm_init_percpu (void) | |||
| 6734 | */ | 6735 | */ |
| 6735 | pfm_unfreeze_pmu(); | 6736 | pfm_unfreeze_pmu(); |
| 6736 | 6737 | ||
| 6737 | if (smp_processor_id() == 0) | 6738 | if (first_time) { |
| 6738 | register_percpu_irq(IA64_PERFMON_VECTOR, &perfmon_irqaction); | 6739 | register_percpu_irq(IA64_PERFMON_VECTOR, &perfmon_irqaction); |
| 6740 | first_time=0; | ||
| 6741 | } | ||
| 6739 | 6742 | ||
| 6740 | ia64_setreg(_IA64_REG_CR_PMV, IA64_PERFMON_VECTOR); | 6743 | ia64_setreg(_IA64_REG_CR_PMV, IA64_PERFMON_VECTOR); |
| 6741 | ia64_srlz_d(); | 6744 | ia64_srlz_d(); |
diff --git a/arch/ia64/kernel/signal.c b/arch/ia64/kernel/signal.c index 463f6bb44d07..1d7903ee2126 100644 --- a/arch/ia64/kernel/signal.c +++ b/arch/ia64/kernel/signal.c | |||
| @@ -588,104 +588,3 @@ ia64_do_signal (sigset_t *oldset, struct sigscratch *scr, long in_syscall) | |||
| 588 | } | 588 | } |
| 589 | return 0; | 589 | return 0; |
| 590 | } | 590 | } |
| 591 | |||
| 592 | /* Set a delayed signal that was detected in MCA/INIT/NMI/PMI context where it | ||
| 593 | * could not be delivered. It is important that the target process is not | ||
| 594 | * allowed to do any more work in user space. Possible cases for the target | ||
| 595 | * process: | ||
| 596 | * | ||
| 597 | * - It is sleeping and will wake up soon. Store the data in the current task, | ||
| 598 | * the signal will be sent when the current task returns from the next | ||
| 599 | * interrupt. | ||
| 600 | * | ||
| 601 | * - It is running in user context. Store the data in the current task, the | ||
| 602 | * signal will be sent when the current task returns from the next interrupt. | ||
| 603 | * | ||
| 604 | * - It is running in kernel context on this or another cpu and will return to | ||
| 605 | * user context. Store the data in the target task, the signal will be sent | ||
| 606 | * to itself when the target task returns to user space. | ||
| 607 | * | ||
| 608 | * - It is running in kernel context on this cpu and will sleep before | ||
| 609 | * returning to user context. Because this is also the current task, the | ||
| 610 | * signal will not get delivered and the task could sleep indefinitely. | ||
| 611 | * Store the data in the idle task for this cpu, the signal will be sent | ||
| 612 | * after the idle task processes its next interrupt. | ||
| 613 | * | ||
| 614 | * To cover all cases, store the data in the target task, the current task and | ||
| 615 | * the idle task on this cpu. Whatever happens, the signal will be delivered | ||
| 616 | * to the target task before it can do any useful user space work. Multiple | ||
| 617 | * deliveries have no unwanted side effects. | ||
| 618 | * | ||
| 619 | * Note: This code is executed in MCA/INIT/NMI/PMI context, with interrupts | ||
| 620 | * disabled. It must not take any locks nor use kernel structures or services | ||
| 621 | * that require locks. | ||
| 622 | */ | ||
| 623 | |||
| 624 | /* To ensure that we get the right pid, check its start time. To avoid extra | ||
| 625 | * include files in thread_info.h, convert the task start_time to unsigned long, | ||
| 626 | * giving us a cycle time of > 580 years. | ||
| 627 | */ | ||
| 628 | static inline unsigned long | ||
| 629 | start_time_ul(const struct task_struct *t) | ||
| 630 | { | ||
| 631 | return t->start_time.tv_sec * NSEC_PER_SEC + t->start_time.tv_nsec; | ||
| 632 | } | ||
| 633 | |||
| 634 | void | ||
| 635 | set_sigdelayed(pid_t pid, int signo, int code, void __user *addr) | ||
| 636 | { | ||
| 637 | struct task_struct *t; | ||
| 638 | unsigned long start_time = 0; | ||
| 639 | int i; | ||
| 640 | |||
| 641 | for (i = 1; i <= 3; ++i) { | ||
| 642 | switch (i) { | ||
| 643 | case 1: | ||
| 644 | t = find_task_by_pid(pid); | ||
| 645 | if (t) | ||
| 646 | start_time = start_time_ul(t); | ||
| 647 | break; | ||
| 648 | case 2: | ||
| 649 | t = current; | ||
| 650 | break; | ||
| 651 | default: | ||
| 652 | t = idle_task(smp_processor_id()); | ||
| 653 | break; | ||
| 654 | } | ||
| 655 | |||
| 656 | if (!t) | ||
| 657 | return; | ||
| 658 | task_thread_info(t)->sigdelayed.signo = signo; | ||
| 659 | task_thread_info(t)->sigdelayed.code = code; | ||
| 660 | task_thread_info(t)->sigdelayed.addr = addr; | ||
| 661 | task_thread_info(t)->sigdelayed.start_time = start_time; | ||
| 662 | task_thread_info(t)->sigdelayed.pid = pid; | ||
| 663 | wmb(); | ||
| 664 | set_tsk_thread_flag(t, TIF_SIGDELAYED); | ||
| 665 | } | ||
| 666 | } | ||
| 667 | |||
| 668 | /* Called from entry.S when it detects TIF_SIGDELAYED, a delayed signal that | ||
| 669 | * was detected in MCA/INIT/NMI/PMI context where it could not be delivered. | ||
| 670 | */ | ||
| 671 | |||
| 672 | void | ||
| 673 | do_sigdelayed(void) | ||
| 674 | { | ||
| 675 | struct siginfo siginfo; | ||
| 676 | pid_t pid; | ||
| 677 | struct task_struct *t; | ||
| 678 | |||
| 679 | clear_thread_flag(TIF_SIGDELAYED); | ||
| 680 | memset(&siginfo, 0, sizeof(siginfo)); | ||
| 681 | siginfo.si_signo = current_thread_info()->sigdelayed.signo; | ||
| 682 | siginfo.si_code = current_thread_info()->sigdelayed.code; | ||
| 683 | siginfo.si_addr = current_thread_info()->sigdelayed.addr; | ||
| 684 | pid = current_thread_info()->sigdelayed.pid; | ||
| 685 | t = find_task_by_pid(pid); | ||
| 686 | if (!t) | ||
| 687 | return; | ||
| 688 | if (current_thread_info()->sigdelayed.start_time != start_time_ul(t)) | ||
| 689 | return; | ||
| 690 | force_sig_info(siginfo.si_signo, &siginfo, t); | ||
| 691 | } | ||
diff --git a/arch/ia64/kernel/smpboot.c b/arch/ia64/kernel/smpboot.c index b681ef34a86e..c4b633b36dab 100644 --- a/arch/ia64/kernel/smpboot.c +++ b/arch/ia64/kernel/smpboot.c | |||
| @@ -70,6 +70,12 @@ | |||
| 70 | #endif | 70 | #endif |
| 71 | 71 | ||
| 72 | #ifdef CONFIG_HOTPLUG_CPU | 72 | #ifdef CONFIG_HOTPLUG_CPU |
| 73 | #ifdef CONFIG_PERMIT_BSP_REMOVE | ||
| 74 | #define bsp_remove_ok 1 | ||
| 75 | #else | ||
| 76 | #define bsp_remove_ok 0 | ||
| 77 | #endif | ||
| 78 | |||
| 73 | /* | 79 | /* |
| 74 | * Store all idle threads, this can be reused instead of creating | 80 | * Store all idle threads, this can be reused instead of creating |
| 75 | * a new thread. Also avoids complicated thread destroy functionality | 81 | * a new thread. Also avoids complicated thread destroy functionality |
| @@ -104,7 +110,7 @@ struct sal_to_os_boot *sal_state_for_booting_cpu = &sal_boot_rendez_state[0]; | |||
| 104 | /* | 110 | /* |
| 105 | * ITC synchronization related stuff: | 111 | * ITC synchronization related stuff: |
| 106 | */ | 112 | */ |
| 107 | #define MASTER 0 | 113 | #define MASTER (0) |
| 108 | #define SLAVE (SMP_CACHE_BYTES/8) | 114 | #define SLAVE (SMP_CACHE_BYTES/8) |
| 109 | 115 | ||
| 110 | #define NUM_ROUNDS 64 /* magic value */ | 116 | #define NUM_ROUNDS 64 /* magic value */ |
| @@ -151,6 +157,27 @@ char __initdata no_int_routing; | |||
| 151 | 157 | ||
| 152 | unsigned char smp_int_redirect; /* are INT and IPI redirectable by the chipset? */ | 158 | unsigned char smp_int_redirect; /* are INT and IPI redirectable by the chipset? */ |
| 153 | 159 | ||
| 160 | #ifdef CONFIG_FORCE_CPEI_RETARGET | ||
| 161 | #define CPEI_OVERRIDE_DEFAULT (1) | ||
| 162 | #else | ||
| 163 | #define CPEI_OVERRIDE_DEFAULT (0) | ||
| 164 | #endif | ||
| 165 | |||
| 166 | unsigned int force_cpei_retarget = CPEI_OVERRIDE_DEFAULT; | ||
| 167 | |||
| 168 | static int __init | ||
| 169 | cmdl_force_cpei(char *str) | ||
| 170 | { | ||
| 171 | int value=0; | ||
| 172 | |||
| 173 | get_option (&str, &value); | ||
| 174 | force_cpei_retarget = value; | ||
| 175 | |||
| 176 | return 1; | ||
| 177 | } | ||
| 178 | |||
| 179 | __setup("force_cpei=", cmdl_force_cpei); | ||
| 180 | |||
| 154 | static int __init | 181 | static int __init |
| 155 | nointroute (char *str) | 182 | nointroute (char *str) |
| 156 | { | 183 | { |
| @@ -161,6 +188,27 @@ nointroute (char *str) | |||
| 161 | 188 | ||
| 162 | __setup("nointroute", nointroute); | 189 | __setup("nointroute", nointroute); |
| 163 | 190 | ||
| 191 | static void fix_b0_for_bsp(void) | ||
| 192 | { | ||
| 193 | #ifdef CONFIG_HOTPLUG_CPU | ||
| 194 | int cpuid; | ||
| 195 | static int fix_bsp_b0 = 1; | ||
| 196 | |||
| 197 | cpuid = smp_processor_id(); | ||
| 198 | |||
| 199 | /* | ||
| 200 | * Cache the b0 value on the first AP that comes up | ||
| 201 | */ | ||
| 202 | if (!(fix_bsp_b0 && cpuid)) | ||
| 203 | return; | ||
| 204 | |||
| 205 | sal_boot_rendez_state[0].br[0] = sal_boot_rendez_state[cpuid].br[0]; | ||
| 206 | printk ("Fixed BSP b0 value from CPU %d\n", cpuid); | ||
| 207 | |||
| 208 | fix_bsp_b0 = 0; | ||
| 209 | #endif | ||
| 210 | } | ||
| 211 | |||
| 164 | void | 212 | void |
| 165 | sync_master (void *arg) | 213 | sync_master (void *arg) |
| 166 | { | 214 | { |
| @@ -327,8 +375,9 @@ smp_setup_percpu_timer (void) | |||
| 327 | static void __devinit | 375 | static void __devinit |
| 328 | smp_callin (void) | 376 | smp_callin (void) |
| 329 | { | 377 | { |
| 330 | int cpuid, phys_id; | 378 | int cpuid, phys_id, itc_master; |
| 331 | extern void ia64_init_itm(void); | 379 | extern void ia64_init_itm(void); |
| 380 | extern volatile int time_keeper_id; | ||
| 332 | 381 | ||
| 333 | #ifdef CONFIG_PERFMON | 382 | #ifdef CONFIG_PERFMON |
| 334 | extern void pfm_init_percpu(void); | 383 | extern void pfm_init_percpu(void); |
| @@ -336,6 +385,7 @@ smp_callin (void) | |||
| 336 | 385 | ||
| 337 | cpuid = smp_processor_id(); | 386 | cpuid = smp_processor_id(); |
| 338 | phys_id = hard_smp_processor_id(); | 387 | phys_id = hard_smp_processor_id(); |
| 388 | itc_master = time_keeper_id; | ||
| 339 | 389 | ||
| 340 | if (cpu_online(cpuid)) { | 390 | if (cpu_online(cpuid)) { |
| 341 | printk(KERN_ERR "huh, phys CPU#0x%x, CPU#0x%x already present??\n", | 391 | printk(KERN_ERR "huh, phys CPU#0x%x, CPU#0x%x already present??\n", |
| @@ -343,6 +393,8 @@ smp_callin (void) | |||
| 343 | BUG(); | 393 | BUG(); |
| 344 | } | 394 | } |
| 345 | 395 | ||
| 396 | fix_b0_for_bsp(); | ||
| 397 | |||
| 346 | lock_ipi_calllock(); | 398 | lock_ipi_calllock(); |
| 347 | cpu_set(cpuid, cpu_online_map); | 399 | cpu_set(cpuid, cpu_online_map); |
| 348 | unlock_ipi_calllock(); | 400 | unlock_ipi_calllock(); |
| @@ -365,8 +417,8 @@ smp_callin (void) | |||
| 365 | * calls spin_unlock_bh(), which calls spin_unlock_bh(), which calls | 417 | * calls spin_unlock_bh(), which calls spin_unlock_bh(), which calls |
| 366 | * local_bh_enable(), which bugs out if irqs are not enabled... | 418 | * local_bh_enable(), which bugs out if irqs are not enabled... |
| 367 | */ | 419 | */ |
| 368 | Dprintk("Going to syncup ITC with BP.\n"); | 420 | Dprintk("Going to syncup ITC with ITC Master.\n"); |
| 369 | ia64_sync_itc(0); | 421 | ia64_sync_itc(itc_master); |
| 370 | } | 422 | } |
| 371 | 423 | ||
| 372 | /* | 424 | /* |
| @@ -635,6 +687,47 @@ remove_siblinginfo(int cpu) | |||
| 635 | } | 687 | } |
| 636 | 688 | ||
| 637 | extern void fixup_irqs(void); | 689 | extern void fixup_irqs(void); |
| 690 | |||
| 691 | int migrate_platform_irqs(unsigned int cpu) | ||
| 692 | { | ||
| 693 | int new_cpei_cpu; | ||
| 694 | irq_desc_t *desc = NULL; | ||
| 695 | cpumask_t mask; | ||
| 696 | int retval = 0; | ||
| 697 | |||
| 698 | /* | ||
| 699 | * dont permit CPEI target to removed. | ||
| 700 | */ | ||
| 701 | if (cpe_vector > 0 && is_cpu_cpei_target(cpu)) { | ||
| 702 | printk ("CPU (%d) is CPEI Target\n", cpu); | ||
| 703 | if (can_cpei_retarget()) { | ||
| 704 | /* | ||
| 705 | * Now re-target the CPEI to a different processor | ||
| 706 | */ | ||
| 707 | new_cpei_cpu = any_online_cpu(cpu_online_map); | ||
| 708 | mask = cpumask_of_cpu(new_cpei_cpu); | ||
| 709 | set_cpei_target_cpu(new_cpei_cpu); | ||
| 710 | desc = irq_descp(ia64_cpe_irq); | ||
| 711 | /* | ||
| 712 | * Switch for now, immediatly, we need to do fake intr | ||
| 713 | * as other interrupts, but need to study CPEI behaviour with | ||
| 714 | * polling before making changes. | ||
| 715 | */ | ||
| 716 | if (desc) { | ||
| 717 | desc->handler->disable(ia64_cpe_irq); | ||
| 718 | desc->handler->set_affinity(ia64_cpe_irq, mask); | ||
| 719 | desc->handler->enable(ia64_cpe_irq); | ||
| 720 | printk ("Re-targetting CPEI to cpu %d\n", new_cpei_cpu); | ||
| 721 | } | ||
| 722 | } | ||
| 723 | if (!desc) { | ||
| 724 | printk ("Unable to retarget CPEI, offline cpu [%d] failed\n", cpu); | ||
| 725 | retval = -EBUSY; | ||
| 726 | } | ||
| 727 | } | ||
| 728 | return retval; | ||
| 729 | } | ||
| 730 | |||
| 638 | /* must be called with cpucontrol mutex held */ | 731 | /* must be called with cpucontrol mutex held */ |
| 639 | int __cpu_disable(void) | 732 | int __cpu_disable(void) |
| 640 | { | 733 | { |
| @@ -643,8 +736,17 @@ int __cpu_disable(void) | |||
| 643 | /* | 736 | /* |
| 644 | * dont permit boot processor for now | 737 | * dont permit boot processor for now |
| 645 | */ | 738 | */ |
| 646 | if (cpu == 0) | 739 | if (cpu == 0 && !bsp_remove_ok) { |
| 647 | return -EBUSY; | 740 | printk ("Your platform does not support removal of BSP\n"); |
| 741 | return (-EBUSY); | ||
| 742 | } | ||
| 743 | |||
| 744 | cpu_clear(cpu, cpu_online_map); | ||
| 745 | |||
| 746 | if (migrate_platform_irqs(cpu)) { | ||
| 747 | cpu_set(cpu, cpu_online_map); | ||
| 748 | return (-EBUSY); | ||
| 749 | } | ||
| 648 | 750 | ||
| 649 | remove_siblinginfo(cpu); | 751 | remove_siblinginfo(cpu); |
| 650 | cpu_clear(cpu, cpu_online_map); | 752 | cpu_clear(cpu, cpu_online_map); |
diff --git a/arch/ia64/kernel/time.c b/arch/ia64/kernel/time.c index 307d01e15b2e..ac167436e936 100644 --- a/arch/ia64/kernel/time.c +++ b/arch/ia64/kernel/time.c | |||
| @@ -32,7 +32,7 @@ | |||
| 32 | 32 | ||
| 33 | extern unsigned long wall_jiffies; | 33 | extern unsigned long wall_jiffies; |
| 34 | 34 | ||
| 35 | #define TIME_KEEPER_ID 0 /* smp_processor_id() of time-keeper */ | 35 | volatile int time_keeper_id = 0; /* smp_processor_id() of time-keeper */ |
| 36 | 36 | ||
| 37 | #ifdef CONFIG_IA64_DEBUG_IRQ | 37 | #ifdef CONFIG_IA64_DEBUG_IRQ |
| 38 | 38 | ||
| @@ -71,7 +71,7 @@ timer_interrupt (int irq, void *dev_id, struct pt_regs *regs) | |||
| 71 | 71 | ||
| 72 | new_itm += local_cpu_data->itm_delta; | 72 | new_itm += local_cpu_data->itm_delta; |
| 73 | 73 | ||
| 74 | if (smp_processor_id() == TIME_KEEPER_ID) { | 74 | if (smp_processor_id() == time_keeper_id) { |
| 75 | /* | 75 | /* |
| 76 | * Here we are in the timer irq handler. We have irqs locally | 76 | * Here we are in the timer irq handler. We have irqs locally |
| 77 | * disabled, but we don't know if the timer_bh is running on | 77 | * disabled, but we don't know if the timer_bh is running on |
| @@ -236,6 +236,11 @@ static struct irqaction timer_irqaction = { | |||
| 236 | .name = "timer" | 236 | .name = "timer" |
| 237 | }; | 237 | }; |
| 238 | 238 | ||
| 239 | void __devinit ia64_disable_timer(void) | ||
| 240 | { | ||
| 241 | ia64_set_itv(1 << 16); | ||
| 242 | } | ||
| 243 | |||
| 239 | void __init | 244 | void __init |
| 240 | time_init (void) | 245 | time_init (void) |
| 241 | { | 246 | { |
diff --git a/arch/ia64/kernel/topology.c b/arch/ia64/kernel/topology.c index 6e5eea19fa67..3b6fd798c4d6 100644 --- a/arch/ia64/kernel/topology.c +++ b/arch/ia64/kernel/topology.c | |||
| @@ -36,7 +36,7 @@ int arch_register_cpu(int num) | |||
| 36 | parent = &sysfs_nodes[cpu_to_node(num)]; | 36 | parent = &sysfs_nodes[cpu_to_node(num)]; |
| 37 | #endif /* CONFIG_NUMA */ | 37 | #endif /* CONFIG_NUMA */ |
| 38 | 38 | ||
| 39 | #ifdef CONFIG_ACPI | 39 | #if defined (CONFIG_ACPI) && defined (CONFIG_HOTPLUG_CPU) |
| 40 | /* | 40 | /* |
| 41 | * If CPEI cannot be re-targetted, and this is | 41 | * If CPEI cannot be re-targetted, and this is |
| 42 | * CPEI target, then dont create the control file | 42 | * CPEI target, then dont create the control file |
diff --git a/arch/ia64/mm/contig.c b/arch/ia64/mm/contig.c index acaaec4e4681..9855ba318094 100644 --- a/arch/ia64/mm/contig.c +++ b/arch/ia64/mm/contig.c | |||
| @@ -181,13 +181,15 @@ per_cpu_init (void) | |||
| 181 | { | 181 | { |
| 182 | void *cpu_data; | 182 | void *cpu_data; |
| 183 | int cpu; | 183 | int cpu; |
| 184 | static int first_time=1; | ||
| 184 | 185 | ||
| 185 | /* | 186 | /* |
| 186 | * get_free_pages() cannot be used before cpu_init() done. BSP | 187 | * get_free_pages() cannot be used before cpu_init() done. BSP |
| 187 | * allocates "NR_CPUS" pages for all CPUs to avoid that AP calls | 188 | * allocates "NR_CPUS" pages for all CPUs to avoid that AP calls |
| 188 | * get_zeroed_page(). | 189 | * get_zeroed_page(). |
| 189 | */ | 190 | */ |
| 190 | if (smp_processor_id() == 0) { | 191 | if (first_time) { |
| 192 | first_time=0; | ||
| 191 | cpu_data = __alloc_bootmem(PERCPU_PAGE_SIZE * NR_CPUS, | 193 | cpu_data = __alloc_bootmem(PERCPU_PAGE_SIZE * NR_CPUS, |
| 192 | PERCPU_PAGE_SIZE, __pa(MAX_DMA_ADDRESS)); | 194 | PERCPU_PAGE_SIZE, __pa(MAX_DMA_ADDRESS)); |
| 193 | for (cpu = 0; cpu < NR_CPUS; cpu++) { | 195 | for (cpu = 0; cpu < NR_CPUS; cpu++) { |
diff --git a/arch/ia64/mm/discontig.c b/arch/ia64/mm/discontig.c index c87d6d1d5813..573d5cc63e2b 100644 --- a/arch/ia64/mm/discontig.c +++ b/arch/ia64/mm/discontig.c | |||
| @@ -528,12 +528,17 @@ void __init find_memory(void) | |||
| 528 | void *per_cpu_init(void) | 528 | void *per_cpu_init(void) |
| 529 | { | 529 | { |
| 530 | int cpu; | 530 | int cpu; |
| 531 | static int first_time = 1; | ||
| 532 | |||
| 531 | 533 | ||
| 532 | if (smp_processor_id() != 0) | 534 | if (smp_processor_id() != 0) |
| 533 | return __per_cpu_start + __per_cpu_offset[smp_processor_id()]; | 535 | return __per_cpu_start + __per_cpu_offset[smp_processor_id()]; |
| 534 | 536 | ||
| 535 | for (cpu = 0; cpu < NR_CPUS; cpu++) | 537 | if (first_time) { |
| 536 | per_cpu(local_per_cpu_offset, cpu) = __per_cpu_offset[cpu]; | 538 | first_time = 0; |
| 539 | for (cpu = 0; cpu < NR_CPUS; cpu++) | ||
| 540 | per_cpu(local_per_cpu_offset, cpu) = __per_cpu_offset[cpu]; | ||
| 541 | } | ||
| 537 | 542 | ||
| 538 | return __per_cpu_start + __per_cpu_offset[smp_processor_id()]; | 543 | return __per_cpu_start + __per_cpu_offset[smp_processor_id()]; |
| 539 | } | 544 | } |
diff --git a/arch/ia64/mm/hugetlbpage.c b/arch/ia64/mm/hugetlbpage.c index 2d13889d0a99..9dbc7dadd165 100644 --- a/arch/ia64/mm/hugetlbpage.c +++ b/arch/ia64/mm/hugetlbpage.c | |||
| @@ -68,9 +68,10 @@ huge_pte_offset (struct mm_struct *mm, unsigned long addr) | |||
| 68 | #define mk_pte_huge(entry) { pte_val(entry) |= _PAGE_P; } | 68 | #define mk_pte_huge(entry) { pte_val(entry) |= _PAGE_P; } |
| 69 | 69 | ||
| 70 | /* | 70 | /* |
| 71 | * This function checks for proper alignment of input addr and len parameters. | 71 | * Don't actually need to do any preparation, but need to make sure |
| 72 | * the address is in the right region. | ||
| 72 | */ | 73 | */ |
| 73 | int is_aligned_hugepage_range(unsigned long addr, unsigned long len) | 74 | int prepare_hugepage_range(unsigned long addr, unsigned long len) |
| 74 | { | 75 | { |
| 75 | if (len & ~HPAGE_MASK) | 76 | if (len & ~HPAGE_MASK) |
| 76 | return -EINVAL; | 77 | return -EINVAL; |
diff --git a/arch/ia64/mm/init.c b/arch/ia64/mm/init.c index b38b6d213c15..08d94e6bfa18 100644 --- a/arch/ia64/mm/init.c +++ b/arch/ia64/mm/init.c | |||
| @@ -197,7 +197,7 @@ free_initmem (void) | |||
| 197 | eaddr = (unsigned long) ia64_imva(__init_end); | 197 | eaddr = (unsigned long) ia64_imva(__init_end); |
| 198 | while (addr < eaddr) { | 198 | while (addr < eaddr) { |
| 199 | ClearPageReserved(virt_to_page(addr)); | 199 | ClearPageReserved(virt_to_page(addr)); |
| 200 | set_page_count(virt_to_page(addr), 1); | 200 | init_page_count(virt_to_page(addr)); |
| 201 | free_page(addr); | 201 | free_page(addr); |
| 202 | ++totalram_pages; | 202 | ++totalram_pages; |
| 203 | addr += PAGE_SIZE; | 203 | addr += PAGE_SIZE; |
| @@ -252,7 +252,7 @@ free_initrd_mem (unsigned long start, unsigned long end) | |||
| 252 | continue; | 252 | continue; |
| 253 | page = virt_to_page(start); | 253 | page = virt_to_page(start); |
| 254 | ClearPageReserved(page); | 254 | ClearPageReserved(page); |
| 255 | set_page_count(page, 1); | 255 | init_page_count(page); |
| 256 | free_page(start); | 256 | free_page(start); |
| 257 | ++totalram_pages; | 257 | ++totalram_pages; |
| 258 | } | 258 | } |
| @@ -640,7 +640,7 @@ mem_init (void) | |||
| 640 | void online_page(struct page *page) | 640 | void online_page(struct page *page) |
| 641 | { | 641 | { |
| 642 | ClearPageReserved(page); | 642 | ClearPageReserved(page); |
| 643 | set_page_count(page, 1); | 643 | init_page_count(page); |
| 644 | __free_page(page); | 644 | __free_page(page); |
| 645 | totalram_pages++; | 645 | totalram_pages++; |
| 646 | num_physpages++; | 646 | num_physpages++; |
diff --git a/arch/ia64/sn/kernel/Makefile b/arch/ia64/sn/kernel/Makefile index 3e9b4eea7418..ab9c48c88012 100644 --- a/arch/ia64/sn/kernel/Makefile +++ b/arch/ia64/sn/kernel/Makefile | |||
| @@ -10,7 +10,8 @@ | |||
| 10 | CPPFLAGS += -I$(srctree)/arch/ia64/sn/include | 10 | CPPFLAGS += -I$(srctree)/arch/ia64/sn/include |
| 11 | 11 | ||
| 12 | obj-y += setup.o bte.o bte_error.o irq.o mca.o idle.o \ | 12 | obj-y += setup.o bte.o bte_error.o irq.o mca.o idle.o \ |
| 13 | huberror.o io_init.o iomv.o klconflib.o sn2/ | 13 | huberror.o io_init.o iomv.o klconflib.o pio_phys.o \ |
| 14 | sn2/ | ||
| 14 | obj-$(CONFIG_IA64_GENERIC) += machvec.o | 15 | obj-$(CONFIG_IA64_GENERIC) += machvec.o |
| 15 | obj-$(CONFIG_SGI_TIOCX) += tiocx.o | 16 | obj-$(CONFIG_SGI_TIOCX) += tiocx.o |
| 16 | obj-$(CONFIG_IA64_SGI_SN_XP) += xp.o | 17 | obj-$(CONFIG_IA64_SGI_SN_XP) += xp.o |
diff --git a/arch/ia64/sn/kernel/pio_phys.S b/arch/ia64/sn/kernel/pio_phys.S new file mode 100644 index 000000000000..3c7d48d6ecb8 --- /dev/null +++ b/arch/ia64/sn/kernel/pio_phys.S | |||
| @@ -0,0 +1,71 @@ | |||
| 1 | /* | ||
| 2 | * This file is subject to the terms and conditions of the GNU General Public | ||
| 3 | * License. See the file "COPYING" in the main directory of this archive | ||
| 4 | * for more details. | ||
| 5 | * | ||
| 6 | * Copyright (C) 2000-2005 Silicon Graphics, Inc. All rights reserved. | ||
| 7 | * | ||
| 8 | * This file contains macros used to access MMR registers via | ||
| 9 | * uncached physical addresses. | ||
| 10 | * pio_phys_read_mmr - read an MMR | ||
| 11 | * pio_phys_write_mmr - write an MMR | ||
| 12 | * pio_atomic_phys_write_mmrs - atomically write 1 or 2 MMRs with psr.ic=0 | ||
| 13 | * Second MMR will be skipped if address is NULL | ||
| 14 | * | ||
| 15 | * Addresses passed to these routines should be uncached physical addresses | ||
| 16 | * ie., 0x80000.... | ||
| 17 | */ | ||
| 18 | |||
| 19 | |||
| 20 | |||
| 21 | #include <asm/asmmacro.h> | ||
| 22 | #include <asm/page.h> | ||
| 23 | |||
| 24 | GLOBAL_ENTRY(pio_phys_read_mmr) | ||
| 25 | .prologue | ||
| 26 | .regstk 1,0,0,0 | ||
| 27 | .body | ||
| 28 | mov r2=psr | ||
| 29 | rsm psr.i | psr.dt | ||
| 30 | ;; | ||
| 31 | srlz.d | ||
| 32 | ld8.acq r8=[r32] | ||
| 33 | ;; | ||
| 34 | mov psr.l=r2;; | ||
| 35 | srlz.d | ||
| 36 | br.ret.sptk.many rp | ||
| 37 | END(pio_phys_read_mmr) | ||
| 38 | |||
| 39 | GLOBAL_ENTRY(pio_phys_write_mmr) | ||
| 40 | .prologue | ||
| 41 | .regstk 2,0,0,0 | ||
| 42 | .body | ||
| 43 | mov r2=psr | ||
| 44 | rsm psr.i | psr.dt | ||
| 45 | ;; | ||
| 46 | srlz.d | ||
| 47 | st8.rel [r32]=r33 | ||
| 48 | ;; | ||
| 49 | mov psr.l=r2;; | ||
| 50 | srlz.d | ||
| 51 | br.ret.sptk.many rp | ||
| 52 | END(pio_phys_write_mmr) | ||
| 53 | |||
| 54 | GLOBAL_ENTRY(pio_atomic_phys_write_mmrs) | ||
| 55 | .prologue | ||
| 56 | .regstk 4,0,0,0 | ||
| 57 | .body | ||
| 58 | mov r2=psr | ||
| 59 | cmp.ne p9,p0=r34,r0; | ||
| 60 | rsm psr.i | psr.dt | psr.ic | ||
| 61 | ;; | ||
| 62 | srlz.d | ||
| 63 | st8.rel [r32]=r33 | ||
| 64 | (p9) st8.rel [r34]=r35 | ||
| 65 | ;; | ||
| 66 | mov psr.l=r2;; | ||
| 67 | srlz.d | ||
| 68 | br.ret.sptk.many rp | ||
| 69 | END(pio_atomic_phys_write_mmrs) | ||
| 70 | |||
| 71 | |||
diff --git a/arch/ia64/sn/kernel/setup.c b/arch/ia64/sn/kernel/setup.c index 5b84836c2171..8b6d5c844708 100644 --- a/arch/ia64/sn/kernel/setup.c +++ b/arch/ia64/sn/kernel/setup.c | |||
| @@ -3,7 +3,7 @@ | |||
| 3 | * License. See the file "COPYING" in the main directory of this archive | 3 | * License. See the file "COPYING" in the main directory of this archive |
| 4 | * for more details. | 4 | * for more details. |
| 5 | * | 5 | * |
| 6 | * Copyright (C) 1999,2001-2005 Silicon Graphics, Inc. All rights reserved. | 6 | * Copyright (C) 1999,2001-2006 Silicon Graphics, Inc. All rights reserved. |
| 7 | */ | 7 | */ |
| 8 | 8 | ||
| 9 | #include <linux/config.h> | 9 | #include <linux/config.h> |
| @@ -498,6 +498,7 @@ void __init sn_setup(char **cmdline_p) | |||
| 498 | * for sn. | 498 | * for sn. |
| 499 | */ | 499 | */ |
| 500 | pm_power_off = ia64_sn_power_down; | 500 | pm_power_off = ia64_sn_power_down; |
| 501 | current->thread.flags |= IA64_THREAD_MIGRATION; | ||
| 501 | } | 502 | } |
| 502 | 503 | ||
| 503 | /** | 504 | /** |
| @@ -660,7 +661,8 @@ void __init sn_cpu_init(void) | |||
| 660 | SH2_PIO_WRITE_STATUS_1, SH2_PIO_WRITE_STATUS_3}; | 661 | SH2_PIO_WRITE_STATUS_1, SH2_PIO_WRITE_STATUS_3}; |
| 661 | u64 *pio; | 662 | u64 *pio; |
| 662 | pio = is_shub1() ? pio1 : pio2; | 663 | pio = is_shub1() ? pio1 : pio2; |
| 663 | pda->pio_write_status_addr = (volatile unsigned long *) LOCAL_MMR_ADDR(pio[slice]); | 664 | pda->pio_write_status_addr = |
| 665 | (volatile unsigned long *)GLOBAL_MMR_ADDR(nasid, pio[slice]); | ||
| 664 | pda->pio_write_status_val = is_shub1() ? SH_PIO_WRITE_STATUS_PENDING_WRITE_COUNT_MASK : 0; | 666 | pda->pio_write_status_val = is_shub1() ? SH_PIO_WRITE_STATUS_PENDING_WRITE_COUNT_MASK : 0; |
| 665 | } | 667 | } |
| 666 | 668 | ||
diff --git a/arch/ia64/sn/kernel/sn2/sn2_smp.c b/arch/ia64/sn/kernel/sn2/sn2_smp.c index b2e1e746b47f..d9d306c79f2d 100644 --- a/arch/ia64/sn/kernel/sn2/sn2_smp.c +++ b/arch/ia64/sn/kernel/sn2/sn2_smp.c | |||
| @@ -93,6 +93,27 @@ static inline unsigned long wait_piowc(void) | |||
| 93 | return (ws & SH_PIO_WRITE_STATUS_WRITE_DEADLOCK_MASK) != 0; | 93 | return (ws & SH_PIO_WRITE_STATUS_WRITE_DEADLOCK_MASK) != 0; |
| 94 | } | 94 | } |
| 95 | 95 | ||
| 96 | /** | ||
| 97 | * sn_migrate - SN-specific task migration actions | ||
| 98 | * @task: Task being migrated to new CPU | ||
| 99 | * | ||
| 100 | * SN2 PIO writes from separate CPUs are not guaranteed to arrive in order. | ||
| 101 | * Context switching user threads which have memory-mapped MMIO may cause | ||
| 102 | * PIOs to issue from seperate CPUs, thus the PIO writes must be drained | ||
| 103 | * from the previous CPU's Shub before execution resumes on the new CPU. | ||
| 104 | */ | ||
| 105 | void sn_migrate(struct task_struct *task) | ||
| 106 | { | ||
| 107 | pda_t *last_pda = pdacpu(task_thread_info(task)->last_cpu); | ||
| 108 | volatile unsigned long *adr = last_pda->pio_write_status_addr; | ||
| 109 | unsigned long val = last_pda->pio_write_status_val; | ||
| 110 | |||
| 111 | /* Drain PIO writes from old CPU's Shub */ | ||
| 112 | while (unlikely((*adr & SH_PIO_WRITE_STATUS_PENDING_WRITE_COUNT_MASK) | ||
| 113 | != val)) | ||
| 114 | cpu_relax(); | ||
| 115 | } | ||
| 116 | |||
| 96 | void sn_tlb_migrate_finish(struct mm_struct *mm) | 117 | void sn_tlb_migrate_finish(struct mm_struct *mm) |
| 97 | { | 118 | { |
| 98 | /* flush_tlb_mm is inefficient if more than 1 users of mm */ | 119 | /* flush_tlb_mm is inefficient if more than 1 users of mm */ |
diff --git a/arch/ia64/sn/kernel/xpc_channel.c b/arch/ia64/sn/kernel/xpc_channel.c index cdf6856ce089..d0abddd9ffe6 100644 --- a/arch/ia64/sn/kernel/xpc_channel.c +++ b/arch/ia64/sn/kernel/xpc_channel.c | |||
| @@ -21,7 +21,6 @@ | |||
| 21 | #include <linux/sched.h> | 21 | #include <linux/sched.h> |
| 22 | #include <linux/cache.h> | 22 | #include <linux/cache.h> |
| 23 | #include <linux/interrupt.h> | 23 | #include <linux/interrupt.h> |
| 24 | #include <linux/slab.h> | ||
| 25 | #include <linux/mutex.h> | 24 | #include <linux/mutex.h> |
| 26 | #include <linux/completion.h> | 25 | #include <linux/completion.h> |
| 27 | #include <asm/sn/bte.h> | 26 | #include <asm/sn/bte.h> |
| @@ -30,6 +29,31 @@ | |||
| 30 | 29 | ||
| 31 | 30 | ||
| 32 | /* | 31 | /* |
| 32 | * Guarantee that the kzalloc'd memory is cacheline aligned. | ||
| 33 | */ | ||
| 34 | static void * | ||
| 35 | xpc_kzalloc_cacheline_aligned(size_t size, gfp_t flags, void **base) | ||
| 36 | { | ||
| 37 | /* see if kzalloc will give us cachline aligned memory by default */ | ||
| 38 | *base = kzalloc(size, flags); | ||
| 39 | if (*base == NULL) { | ||
| 40 | return NULL; | ||
| 41 | } | ||
| 42 | if ((u64) *base == L1_CACHE_ALIGN((u64) *base)) { | ||
| 43 | return *base; | ||
| 44 | } | ||
| 45 | kfree(*base); | ||
| 46 | |||
| 47 | /* nope, we'll have to do it ourselves */ | ||
| 48 | *base = kzalloc(size + L1_CACHE_BYTES, flags); | ||
| 49 | if (*base == NULL) { | ||
| 50 | return NULL; | ||
| 51 | } | ||
| 52 | return (void *) L1_CACHE_ALIGN((u64) *base); | ||
| 53 | } | ||
| 54 | |||
| 55 | |||
| 56 | /* | ||
| 33 | * Set up the initial values for the XPartition Communication channels. | 57 | * Set up the initial values for the XPartition Communication channels. |
| 34 | */ | 58 | */ |
| 35 | static void | 59 | static void |
| @@ -93,20 +117,19 @@ xpc_setup_infrastructure(struct xpc_partition *part) | |||
| 93 | * Allocate all of the channel structures as a contiguous chunk of | 117 | * Allocate all of the channel structures as a contiguous chunk of |
| 94 | * memory. | 118 | * memory. |
| 95 | */ | 119 | */ |
| 96 | part->channels = kmalloc(sizeof(struct xpc_channel) * XPC_NCHANNELS, | 120 | part->channels = kzalloc(sizeof(struct xpc_channel) * XPC_NCHANNELS, |
| 97 | GFP_KERNEL); | 121 | GFP_KERNEL); |
| 98 | if (part->channels == NULL) { | 122 | if (part->channels == NULL) { |
| 99 | dev_err(xpc_chan, "can't get memory for channels\n"); | 123 | dev_err(xpc_chan, "can't get memory for channels\n"); |
| 100 | return xpcNoMemory; | 124 | return xpcNoMemory; |
| 101 | } | 125 | } |
| 102 | memset(part->channels, 0, sizeof(struct xpc_channel) * XPC_NCHANNELS); | ||
| 103 | 126 | ||
| 104 | part->nchannels = XPC_NCHANNELS; | 127 | part->nchannels = XPC_NCHANNELS; |
| 105 | 128 | ||
| 106 | 129 | ||
| 107 | /* allocate all the required GET/PUT values */ | 130 | /* allocate all the required GET/PUT values */ |
| 108 | 131 | ||
| 109 | part->local_GPs = xpc_kmalloc_cacheline_aligned(XPC_GP_SIZE, | 132 | part->local_GPs = xpc_kzalloc_cacheline_aligned(XPC_GP_SIZE, |
| 110 | GFP_KERNEL, &part->local_GPs_base); | 133 | GFP_KERNEL, &part->local_GPs_base); |
| 111 | if (part->local_GPs == NULL) { | 134 | if (part->local_GPs == NULL) { |
| 112 | kfree(part->channels); | 135 | kfree(part->channels); |
| @@ -115,55 +138,51 @@ xpc_setup_infrastructure(struct xpc_partition *part) | |||
| 115 | "values\n"); | 138 | "values\n"); |
| 116 | return xpcNoMemory; | 139 | return xpcNoMemory; |
| 117 | } | 140 | } |
| 118 | memset(part->local_GPs, 0, XPC_GP_SIZE); | ||
| 119 | 141 | ||
| 120 | part->remote_GPs = xpc_kmalloc_cacheline_aligned(XPC_GP_SIZE, | 142 | part->remote_GPs = xpc_kzalloc_cacheline_aligned(XPC_GP_SIZE, |
| 121 | GFP_KERNEL, &part->remote_GPs_base); | 143 | GFP_KERNEL, &part->remote_GPs_base); |
| 122 | if (part->remote_GPs == NULL) { | 144 | if (part->remote_GPs == NULL) { |
| 123 | kfree(part->channels); | ||
| 124 | part->channels = NULL; | ||
| 125 | kfree(part->local_GPs_base); | ||
| 126 | part->local_GPs = NULL; | ||
| 127 | dev_err(xpc_chan, "can't get memory for remote get/put " | 145 | dev_err(xpc_chan, "can't get memory for remote get/put " |
| 128 | "values\n"); | 146 | "values\n"); |
| 147 | kfree(part->local_GPs_base); | ||
| 148 | part->local_GPs = NULL; | ||
| 149 | kfree(part->channels); | ||
| 150 | part->channels = NULL; | ||
| 129 | return xpcNoMemory; | 151 | return xpcNoMemory; |
| 130 | } | 152 | } |
| 131 | memset(part->remote_GPs, 0, XPC_GP_SIZE); | ||
| 132 | 153 | ||
| 133 | 154 | ||
| 134 | /* allocate all the required open and close args */ | 155 | /* allocate all the required open and close args */ |
| 135 | 156 | ||
| 136 | part->local_openclose_args = xpc_kmalloc_cacheline_aligned( | 157 | part->local_openclose_args = xpc_kzalloc_cacheline_aligned( |
| 137 | XPC_OPENCLOSE_ARGS_SIZE, GFP_KERNEL, | 158 | XPC_OPENCLOSE_ARGS_SIZE, GFP_KERNEL, |
| 138 | &part->local_openclose_args_base); | 159 | &part->local_openclose_args_base); |
| 139 | if (part->local_openclose_args == NULL) { | 160 | if (part->local_openclose_args == NULL) { |
| 140 | kfree(part->channels); | 161 | dev_err(xpc_chan, "can't get memory for local connect args\n"); |
| 141 | part->channels = NULL; | ||
| 142 | kfree(part->local_GPs_base); | ||
| 143 | part->local_GPs = NULL; | ||
| 144 | kfree(part->remote_GPs_base); | 162 | kfree(part->remote_GPs_base); |
| 145 | part->remote_GPs = NULL; | 163 | part->remote_GPs = NULL; |
| 146 | dev_err(xpc_chan, "can't get memory for local connect args\n"); | 164 | kfree(part->local_GPs_base); |
| 165 | part->local_GPs = NULL; | ||
| 166 | kfree(part->channels); | ||
| 167 | part->channels = NULL; | ||
| 147 | return xpcNoMemory; | 168 | return xpcNoMemory; |
| 148 | } | 169 | } |
| 149 | memset(part->local_openclose_args, 0, XPC_OPENCLOSE_ARGS_SIZE); | ||
| 150 | 170 | ||
| 151 | part->remote_openclose_args = xpc_kmalloc_cacheline_aligned( | 171 | part->remote_openclose_args = xpc_kzalloc_cacheline_aligned( |
| 152 | XPC_OPENCLOSE_ARGS_SIZE, GFP_KERNEL, | 172 | XPC_OPENCLOSE_ARGS_SIZE, GFP_KERNEL, |
| 153 | &part->remote_openclose_args_base); | 173 | &part->remote_openclose_args_base); |
| 154 | if (part->remote_openclose_args == NULL) { | 174 | if (part->remote_openclose_args == NULL) { |
| 155 | kfree(part->channels); | 175 | dev_err(xpc_chan, "can't get memory for remote connect args\n"); |
| 156 | part->channels = NULL; | ||
| 157 | kfree(part->local_GPs_base); | ||
| 158 | part->local_GPs = NULL; | ||
| 159 | kfree(part->remote_GPs_base); | ||
| 160 | part->remote_GPs = NULL; | ||
| 161 | kfree(part->local_openclose_args_base); | 176 | kfree(part->local_openclose_args_base); |
| 162 | part->local_openclose_args = NULL; | 177 | part->local_openclose_args = NULL; |
| 163 | dev_err(xpc_chan, "can't get memory for remote connect args\n"); | 178 | kfree(part->remote_GPs_base); |
| 179 | part->remote_GPs = NULL; | ||
| 180 | kfree(part->local_GPs_base); | ||
| 181 | part->local_GPs = NULL; | ||
| 182 | kfree(part->channels); | ||
| 183 | part->channels = NULL; | ||
| 164 | return xpcNoMemory; | 184 | return xpcNoMemory; |
| 165 | } | 185 | } |
| 166 | memset(part->remote_openclose_args, 0, XPC_OPENCLOSE_ARGS_SIZE); | ||
| 167 | 186 | ||
| 168 | 187 | ||
| 169 | xpc_initialize_channels(part, partid); | 188 | xpc_initialize_channels(part, partid); |
| @@ -186,18 +205,18 @@ xpc_setup_infrastructure(struct xpc_partition *part) | |||
| 186 | ret = request_irq(SGI_XPC_NOTIFY, xpc_notify_IRQ_handler, SA_SHIRQ, | 205 | ret = request_irq(SGI_XPC_NOTIFY, xpc_notify_IRQ_handler, SA_SHIRQ, |
| 187 | part->IPI_owner, (void *) (u64) partid); | 206 | part->IPI_owner, (void *) (u64) partid); |
| 188 | if (ret != 0) { | 207 | if (ret != 0) { |
| 189 | kfree(part->channels); | ||
| 190 | part->channels = NULL; | ||
| 191 | kfree(part->local_GPs_base); | ||
| 192 | part->local_GPs = NULL; | ||
| 193 | kfree(part->remote_GPs_base); | ||
| 194 | part->remote_GPs = NULL; | ||
| 195 | kfree(part->local_openclose_args_base); | ||
| 196 | part->local_openclose_args = NULL; | ||
| 197 | kfree(part->remote_openclose_args_base); | ||
| 198 | part->remote_openclose_args = NULL; | ||
| 199 | dev_err(xpc_chan, "can't register NOTIFY IRQ handler, " | 208 | dev_err(xpc_chan, "can't register NOTIFY IRQ handler, " |
| 200 | "errno=%d\n", -ret); | 209 | "errno=%d\n", -ret); |
| 210 | kfree(part->remote_openclose_args_base); | ||
| 211 | part->remote_openclose_args = NULL; | ||
| 212 | kfree(part->local_openclose_args_base); | ||
| 213 | part->local_openclose_args = NULL; | ||
| 214 | kfree(part->remote_GPs_base); | ||
| 215 | part->remote_GPs = NULL; | ||
| 216 | kfree(part->local_GPs_base); | ||
| 217 | part->local_GPs = NULL; | ||
| 218 | kfree(part->channels); | ||
| 219 | part->channels = NULL; | ||
| 201 | return xpcLackOfResources; | 220 | return xpcLackOfResources; |
| 202 | } | 221 | } |
| 203 | 222 | ||
| @@ -446,22 +465,20 @@ xpc_allocate_local_msgqueue(struct xpc_channel *ch) | |||
| 446 | for (nentries = ch->local_nentries; nentries > 0; nentries--) { | 465 | for (nentries = ch->local_nentries; nentries > 0; nentries--) { |
| 447 | 466 | ||
| 448 | nbytes = nentries * ch->msg_size; | 467 | nbytes = nentries * ch->msg_size; |
| 449 | ch->local_msgqueue = xpc_kmalloc_cacheline_aligned(nbytes, | 468 | ch->local_msgqueue = xpc_kzalloc_cacheline_aligned(nbytes, |
| 450 | GFP_KERNEL, | 469 | GFP_KERNEL, |
| 451 | &ch->local_msgqueue_base); | 470 | &ch->local_msgqueue_base); |
| 452 | if (ch->local_msgqueue == NULL) { | 471 | if (ch->local_msgqueue == NULL) { |
| 453 | continue; | 472 | continue; |
| 454 | } | 473 | } |
| 455 | memset(ch->local_msgqueue, 0, nbytes); | ||
| 456 | 474 | ||
| 457 | nbytes = nentries * sizeof(struct xpc_notify); | 475 | nbytes = nentries * sizeof(struct xpc_notify); |
| 458 | ch->notify_queue = kmalloc(nbytes, GFP_KERNEL); | 476 | ch->notify_queue = kzalloc(nbytes, GFP_KERNEL); |
| 459 | if (ch->notify_queue == NULL) { | 477 | if (ch->notify_queue == NULL) { |
| 460 | kfree(ch->local_msgqueue_base); | 478 | kfree(ch->local_msgqueue_base); |
| 461 | ch->local_msgqueue = NULL; | 479 | ch->local_msgqueue = NULL; |
| 462 | continue; | 480 | continue; |
| 463 | } | 481 | } |
| 464 | memset(ch->notify_queue, 0, nbytes); | ||
| 465 | 482 | ||
| 466 | spin_lock_irqsave(&ch->lock, irq_flags); | 483 | spin_lock_irqsave(&ch->lock, irq_flags); |
| 467 | if (nentries < ch->local_nentries) { | 484 | if (nentries < ch->local_nentries) { |
| @@ -501,13 +518,12 @@ xpc_allocate_remote_msgqueue(struct xpc_channel *ch) | |||
| 501 | for (nentries = ch->remote_nentries; nentries > 0; nentries--) { | 518 | for (nentries = ch->remote_nentries; nentries > 0; nentries--) { |
| 502 | 519 | ||
| 503 | nbytes = nentries * ch->msg_size; | 520 | nbytes = nentries * ch->msg_size; |
| 504 | ch->remote_msgqueue = xpc_kmalloc_cacheline_aligned(nbytes, | 521 | ch->remote_msgqueue = xpc_kzalloc_cacheline_aligned(nbytes, |
| 505 | GFP_KERNEL, | 522 | GFP_KERNEL, |
| 506 | &ch->remote_msgqueue_base); | 523 | &ch->remote_msgqueue_base); |
| 507 | if (ch->remote_msgqueue == NULL) { | 524 | if (ch->remote_msgqueue == NULL) { |
| 508 | continue; | 525 | continue; |
| 509 | } | 526 | } |
| 510 | memset(ch->remote_msgqueue, 0, nbytes); | ||
| 511 | 527 | ||
| 512 | spin_lock_irqsave(&ch->lock, irq_flags); | 528 | spin_lock_irqsave(&ch->lock, irq_flags); |
| 513 | if (nentries < ch->remote_nentries) { | 529 | if (nentries < ch->remote_nentries) { |
diff --git a/arch/ia64/sn/kernel/xpc_main.c b/arch/ia64/sn/kernel/xpc_main.c index 8cbf16432570..99b123a6421a 100644 --- a/arch/ia64/sn/kernel/xpc_main.c +++ b/arch/ia64/sn/kernel/xpc_main.c | |||
| @@ -52,7 +52,6 @@ | |||
| 52 | #include <linux/syscalls.h> | 52 | #include <linux/syscalls.h> |
| 53 | #include <linux/cache.h> | 53 | #include <linux/cache.h> |
| 54 | #include <linux/interrupt.h> | 54 | #include <linux/interrupt.h> |
| 55 | #include <linux/slab.h> | ||
| 56 | #include <linux/delay.h> | 55 | #include <linux/delay.h> |
| 57 | #include <linux/reboot.h> | 56 | #include <linux/reboot.h> |
| 58 | #include <linux/completion.h> | 57 | #include <linux/completion.h> |
diff --git a/arch/ia64/sn/kernel/xpc_partition.c b/arch/ia64/sn/kernel/xpc_partition.c index 88a730e6cfdb..94211429fd0c 100644 --- a/arch/ia64/sn/kernel/xpc_partition.c +++ b/arch/ia64/sn/kernel/xpc_partition.c | |||
| @@ -81,6 +81,31 @@ char ____cacheline_aligned xpc_remote_copy_buffer[XPC_RP_HEADER_SIZE + | |||
| 81 | 81 | ||
| 82 | 82 | ||
| 83 | /* | 83 | /* |
| 84 | * Guarantee that the kmalloc'd memory is cacheline aligned. | ||
| 85 | */ | ||
| 86 | static void * | ||
| 87 | xpc_kmalloc_cacheline_aligned(size_t size, gfp_t flags, void **base) | ||
| 88 | { | ||
| 89 | /* see if kmalloc will give us cachline aligned memory by default */ | ||
| 90 | *base = kmalloc(size, flags); | ||
| 91 | if (*base == NULL) { | ||
| 92 | return NULL; | ||
| 93 | } | ||
| 94 | if ((u64) *base == L1_CACHE_ALIGN((u64) *base)) { | ||
| 95 | return *base; | ||
| 96 | } | ||
| 97 | kfree(*base); | ||
| 98 | |||
| 99 | /* nope, we'll have to do it ourselves */ | ||
| 100 | *base = kmalloc(size + L1_CACHE_BYTES, flags); | ||
| 101 | if (*base == NULL) { | ||
| 102 | return NULL; | ||
| 103 | } | ||
| 104 | return (void *) L1_CACHE_ALIGN((u64) *base); | ||
| 105 | } | ||
| 106 | |||
| 107 | |||
| 108 | /* | ||
| 84 | * Given a nasid, get the physical address of the partition's reserved page | 109 | * Given a nasid, get the physical address of the partition's reserved page |
| 85 | * for that nasid. This function returns 0 on any error. | 110 | * for that nasid. This function returns 0 on any error. |
| 86 | */ | 111 | */ |
| @@ -1038,13 +1063,12 @@ xpc_discovery(void) | |||
| 1038 | remote_vars = (struct xpc_vars *) remote_rp; | 1063 | remote_vars = (struct xpc_vars *) remote_rp; |
| 1039 | 1064 | ||
| 1040 | 1065 | ||
| 1041 | discovered_nasids = kmalloc(sizeof(u64) * xp_nasid_mask_words, | 1066 | discovered_nasids = kzalloc(sizeof(u64) * xp_nasid_mask_words, |
| 1042 | GFP_KERNEL); | 1067 | GFP_KERNEL); |
| 1043 | if (discovered_nasids == NULL) { | 1068 | if (discovered_nasids == NULL) { |
| 1044 | kfree(remote_rp_base); | 1069 | kfree(remote_rp_base); |
| 1045 | return; | 1070 | return; |
| 1046 | } | 1071 | } |
| 1047 | memset(discovered_nasids, 0, sizeof(u64) * xp_nasid_mask_words); | ||
| 1048 | 1072 | ||
| 1049 | rp = (struct xpc_rsvd_page *) xpc_rsvd_page; | 1073 | rp = (struct xpc_rsvd_page *) xpc_rsvd_page; |
| 1050 | 1074 | ||
diff --git a/arch/ia64/sn/pci/tioce_provider.c b/arch/ia64/sn/pci/tioce_provider.c index e52831ed93eb..fa073cc4b565 100644 --- a/arch/ia64/sn/pci/tioce_provider.c +++ b/arch/ia64/sn/pci/tioce_provider.c | |||
| @@ -15,6 +15,124 @@ | |||
| 15 | #include <asm/sn/pcidev.h> | 15 | #include <asm/sn/pcidev.h> |
| 16 | #include <asm/sn/pcibus_provider_defs.h> | 16 | #include <asm/sn/pcibus_provider_defs.h> |
| 17 | #include <asm/sn/tioce_provider.h> | 17 | #include <asm/sn/tioce_provider.h> |
| 18 | #include <asm/sn/sn2/sn_hwperf.h> | ||
| 19 | |||
| 20 | /* | ||
| 21 | * 1/26/2006 | ||
| 22 | * | ||
| 23 | * WAR for SGI PV 944642. For revA TIOCE, need to use the following recipe | ||
| 24 | * (taken from the above PV) before and after accessing tioce internal MMR's | ||
| 25 | * to avoid tioce lockups. | ||
| 26 | * | ||
| 27 | * The recipe as taken from the PV: | ||
| 28 | * | ||
| 29 | * if(mmr address < 0x45000) { | ||
| 30 | * if(mmr address == 0 or 0x80) | ||
| 31 | * mmr wrt or read address 0xc0 | ||
| 32 | * else if(mmr address == 0x148 or 0x200) | ||
| 33 | * mmr wrt or read address 0x28 | ||
| 34 | * else | ||
| 35 | * mmr wrt or read address 0x158 | ||
| 36 | * | ||
| 37 | * do desired mmr access (rd or wrt) | ||
| 38 | * | ||
| 39 | * if(mmr address == 0x100) | ||
| 40 | * mmr wrt or read address 0x38 | ||
| 41 | * mmr wrt or read address 0xb050 | ||
| 42 | * } else | ||
| 43 | * do desired mmr access | ||
| 44 | * | ||
| 45 | * According to hw, we can use reads instead of writes to the above addres | ||
| 46 | * | ||
| 47 | * Note this WAR can only to be used for accessing internal MMR's in the | ||
| 48 | * TIOCE Coretalk Address Range 0x0 - 0x07ff_ffff. This includes the | ||
| 49 | * "Local CE Registers and Memories" and "PCI Compatible Config Space" address | ||
| 50 | * spaces from table 2-1 of the "CE Programmer's Reference Overview" document. | ||
| 51 | * | ||
| 52 | * All registers defined in struct tioce will meet that criteria. | ||
| 53 | */ | ||
| 54 | |||
| 55 | static void inline | ||
| 56 | tioce_mmr_war_pre(struct tioce_kernel *kern, void *mmr_addr) | ||
| 57 | { | ||
| 58 | u64 mmr_base; | ||
| 59 | u64 mmr_offset; | ||
| 60 | |||
| 61 | if (kern->ce_common->ce_rev != TIOCE_REV_A) | ||
| 62 | return; | ||
| 63 | |||
| 64 | mmr_base = kern->ce_common->ce_pcibus.bs_base; | ||
| 65 | mmr_offset = (u64)mmr_addr - mmr_base; | ||
| 66 | |||
| 67 | if (mmr_offset < 0x45000) { | ||
| 68 | u64 mmr_war_offset; | ||
| 69 | |||
| 70 | if (mmr_offset == 0 || mmr_offset == 0x80) | ||
| 71 | mmr_war_offset = 0xc0; | ||
| 72 | else if (mmr_offset == 0x148 || mmr_offset == 0x200) | ||
| 73 | mmr_war_offset = 0x28; | ||
| 74 | else | ||
| 75 | mmr_war_offset = 0x158; | ||
| 76 | |||
| 77 | readq_relaxed((void *)(mmr_base + mmr_war_offset)); | ||
| 78 | } | ||
| 79 | } | ||
| 80 | |||
| 81 | static void inline | ||
| 82 | tioce_mmr_war_post(struct tioce_kernel *kern, void *mmr_addr) | ||
| 83 | { | ||
| 84 | u64 mmr_base; | ||
| 85 | u64 mmr_offset; | ||
| 86 | |||
| 87 | if (kern->ce_common->ce_rev != TIOCE_REV_A) | ||
| 88 | return; | ||
| 89 | |||
| 90 | mmr_base = kern->ce_common->ce_pcibus.bs_base; | ||
| 91 | mmr_offset = (u64)mmr_addr - mmr_base; | ||
| 92 | |||
| 93 | if (mmr_offset < 0x45000) { | ||
| 94 | if (mmr_offset == 0x100) | ||
| 95 | readq_relaxed((void *)(mmr_base + 0x38)); | ||
| 96 | readq_relaxed((void *)(mmr_base + 0xb050)); | ||
| 97 | } | ||
| 98 | } | ||
| 99 | |||
| 100 | /* load mmr contents into a variable */ | ||
| 101 | #define tioce_mmr_load(kern, mmrp, varp) do {\ | ||
| 102 | tioce_mmr_war_pre(kern, mmrp); \ | ||
| 103 | *(varp) = readq_relaxed(mmrp); \ | ||
| 104 | tioce_mmr_war_post(kern, mmrp); \ | ||
| 105 | } while (0) | ||
| 106 | |||
| 107 | /* store variable contents into mmr */ | ||
| 108 | #define tioce_mmr_store(kern, mmrp, varp) do {\ | ||
| 109 | tioce_mmr_war_pre(kern, mmrp); \ | ||
| 110 | writeq(*varp, mmrp); \ | ||
| 111 | tioce_mmr_war_post(kern, mmrp); \ | ||
| 112 | } while (0) | ||
| 113 | |||
| 114 | /* store immediate value into mmr */ | ||
| 115 | #define tioce_mmr_storei(kern, mmrp, val) do {\ | ||
| 116 | tioce_mmr_war_pre(kern, mmrp); \ | ||
| 117 | writeq(val, mmrp); \ | ||
| 118 | tioce_mmr_war_post(kern, mmrp); \ | ||
| 119 | } while (0) | ||
| 120 | |||
| 121 | /* set bits (immediate value) into mmr */ | ||
| 122 | #define tioce_mmr_seti(kern, mmrp, bits) do {\ | ||
| 123 | u64 tmp; \ | ||
| 124 | tioce_mmr_load(kern, mmrp, &tmp); \ | ||
| 125 | tmp |= (bits); \ | ||
| 126 | tioce_mmr_store(kern, mmrp, &tmp); \ | ||
| 127 | } while (0) | ||
| 128 | |||
| 129 | /* clear bits (immediate value) into mmr */ | ||
| 130 | #define tioce_mmr_clri(kern, mmrp, bits) do { \ | ||
| 131 | u64 tmp; \ | ||
| 132 | tioce_mmr_load(kern, mmrp, &tmp); \ | ||
| 133 | tmp &= ~(bits); \ | ||
| 134 | tioce_mmr_store(kern, mmrp, &tmp); \ | ||
| 135 | } while (0) | ||
| 18 | 136 | ||
| 19 | /** | 137 | /** |
| 20 | * Bus address ranges for the 5 flavors of TIOCE DMA | 138 | * Bus address ranges for the 5 flavors of TIOCE DMA |
| @@ -62,9 +180,9 @@ | |||
| 62 | #define TIOCE_ATE_M40 2 | 180 | #define TIOCE_ATE_M40 2 |
| 63 | #define TIOCE_ATE_M40S 3 | 181 | #define TIOCE_ATE_M40S 3 |
| 64 | 182 | ||
| 65 | #define KB(x) ((x) << 10) | 183 | #define KB(x) ((u64)(x) << 10) |
| 66 | #define MB(x) ((x) << 20) | 184 | #define MB(x) ((u64)(x) << 20) |
| 67 | #define GB(x) ((x) << 30) | 185 | #define GB(x) ((u64)(x) << 30) |
| 68 | 186 | ||
| 69 | /** | 187 | /** |
| 70 | * tioce_dma_d64 - create a DMA mapping using 64-bit direct mode | 188 | * tioce_dma_d64 - create a DMA mapping using 64-bit direct mode |
| @@ -151,7 +269,7 @@ tioce_alloc_map(struct tioce_kernel *ce_kern, int type, int port, | |||
| 151 | int last; | 269 | int last; |
| 152 | int entries; | 270 | int entries; |
| 153 | int nates; | 271 | int nates; |
| 154 | int pagesize; | 272 | u64 pagesize; |
| 155 | u64 *ate_shadow; | 273 | u64 *ate_shadow; |
| 156 | u64 *ate_reg; | 274 | u64 *ate_reg; |
| 157 | u64 addr; | 275 | u64 addr; |
| @@ -228,7 +346,7 @@ tioce_alloc_map(struct tioce_kernel *ce_kern, int type, int port, | |||
| 228 | 346 | ||
| 229 | ate = ATE_MAKE(addr, pagesize); | 347 | ate = ATE_MAKE(addr, pagesize); |
| 230 | ate_shadow[i + j] = ate; | 348 | ate_shadow[i + j] = ate; |
| 231 | writeq(ate, &ate_reg[i + j]); | 349 | tioce_mmr_storei(ce_kern, &ate_reg[i + j], ate); |
| 232 | addr += pagesize; | 350 | addr += pagesize; |
| 233 | } | 351 | } |
| 234 | 352 | ||
| @@ -272,7 +390,8 @@ tioce_dma_d32(struct pci_dev *pdev, u64 ct_addr) | |||
| 272 | u64 tmp; | 390 | u64 tmp; |
| 273 | 391 | ||
| 274 | ce_kern->ce_port[port].dirmap_shadow = ct_upper; | 392 | ce_kern->ce_port[port].dirmap_shadow = ct_upper; |
| 275 | writeq(ct_upper, &ce_mmr->ce_ure_dir_map[port]); | 393 | tioce_mmr_storei(ce_kern, &ce_mmr->ce_ure_dir_map[port], |
| 394 | ct_upper); | ||
| 276 | tmp = ce_mmr->ce_ure_dir_map[port]; | 395 | tmp = ce_mmr->ce_ure_dir_map[port]; |
| 277 | dma_ok = 1; | 396 | dma_ok = 1; |
| 278 | } else | 397 | } else |
| @@ -344,7 +463,8 @@ tioce_dma_unmap(struct pci_dev *pdev, dma_addr_t bus_addr, int dir) | |||
| 344 | if (TIOCE_D32_ADDR(bus_addr)) { | 463 | if (TIOCE_D32_ADDR(bus_addr)) { |
| 345 | if (--ce_kern->ce_port[port].dirmap_refcnt == 0) { | 464 | if (--ce_kern->ce_port[port].dirmap_refcnt == 0) { |
| 346 | ce_kern->ce_port[port].dirmap_shadow = 0; | 465 | ce_kern->ce_port[port].dirmap_shadow = 0; |
| 347 | writeq(0, &ce_mmr->ce_ure_dir_map[port]); | 466 | tioce_mmr_storei(ce_kern, &ce_mmr->ce_ure_dir_map[port], |
| 467 | 0); | ||
| 348 | } | 468 | } |
| 349 | } else { | 469 | } else { |
| 350 | struct tioce_dmamap *map; | 470 | struct tioce_dmamap *map; |
| @@ -365,7 +485,7 @@ tioce_dma_unmap(struct pci_dev *pdev, dma_addr_t bus_addr, int dir) | |||
| 365 | } else if (--map->refcnt == 0) { | 485 | } else if (--map->refcnt == 0) { |
| 366 | for (i = 0; i < map->ate_count; i++) { | 486 | for (i = 0; i < map->ate_count; i++) { |
| 367 | map->ate_shadow[i] = 0; | 487 | map->ate_shadow[i] = 0; |
| 368 | map->ate_hw[i] = 0; | 488 | tioce_mmr_storei(ce_kern, &map->ate_hw[i], 0); |
| 369 | } | 489 | } |
| 370 | 490 | ||
| 371 | list_del(&map->ce_dmamap_list); | 491 | list_del(&map->ce_dmamap_list); |
| @@ -486,7 +606,7 @@ tioce_do_dma_map(struct pci_dev *pdev, u64 paddr, size_t byte_count, | |||
| 486 | spin_unlock_irqrestore(&ce_kern->ce_lock, flags); | 606 | spin_unlock_irqrestore(&ce_kern->ce_lock, flags); |
| 487 | 607 | ||
| 488 | dma_map_done: | 608 | dma_map_done: |
| 489 | if (mapaddr & barrier) | 609 | if (mapaddr && barrier) |
| 490 | mapaddr = tioce_dma_barrier(mapaddr, 1); | 610 | mapaddr = tioce_dma_barrier(mapaddr, 1); |
| 491 | 611 | ||
| 492 | return mapaddr; | 612 | return mapaddr; |
| @@ -541,17 +661,61 @@ tioce_error_intr_handler(int irq, void *arg, struct pt_regs *pt) | |||
| 541 | soft->ce_pcibus.bs_persist_segment, | 661 | soft->ce_pcibus.bs_persist_segment, |
| 542 | soft->ce_pcibus.bs_persist_busnum, 0, 0, 0, 0, 0); | 662 | soft->ce_pcibus.bs_persist_busnum, 0, 0, 0, 0, 0); |
| 543 | 663 | ||
| 664 | if (ret_stuff.v0) | ||
| 665 | panic("tioce_error_intr_handler: Fatal TIOCE error"); | ||
| 666 | |||
| 544 | return IRQ_HANDLED; | 667 | return IRQ_HANDLED; |
| 545 | } | 668 | } |
| 546 | 669 | ||
| 547 | /** | 670 | /** |
| 671 | * tioce_reserve_m32 - reserve M32 ate's for the indicated address range | ||
| 672 | * @tioce_kernel: TIOCE context to reserve ate's for | ||
| 673 | * @base: starting bus address to reserve | ||
| 674 | * @limit: last bus address to reserve | ||
| 675 | * | ||
| 676 | * If base/limit falls within the range of bus space mapped through the | ||
| 677 | * M32 space, reserve the resources corresponding to the range. | ||
| 678 | */ | ||
| 679 | static void | ||
| 680 | tioce_reserve_m32(struct tioce_kernel *ce_kern, u64 base, u64 limit) | ||
| 681 | { | ||
| 682 | int ate_index, last_ate, ps; | ||
| 683 | struct tioce *ce_mmr; | ||
| 684 | |||
| 685 | if (!TIOCE_M32_ADDR(base)) | ||
| 686 | return; | ||
| 687 | |||
| 688 | ce_mmr = (struct tioce *)ce_kern->ce_common->ce_pcibus.bs_base; | ||
| 689 | ps = ce_kern->ce_ate3240_pagesize; | ||
| 690 | ate_index = ATE_PAGE(base, ps); | ||
| 691 | last_ate = ate_index + ATE_NPAGES(base, limit-base+1, ps) - 1; | ||
| 692 | |||
| 693 | if (ate_index < 64) | ||
| 694 | ate_index = 64; | ||
| 695 | |||
| 696 | while (ate_index <= last_ate) { | ||
| 697 | u64 ate; | ||
| 698 | |||
| 699 | ate = ATE_MAKE(0xdeadbeef, ps); | ||
| 700 | ce_kern->ce_ate3240_shadow[ate_index] = ate; | ||
| 701 | tioce_mmr_storei(ce_kern, &ce_mmr->ce_ure_ate3240[ate_index], | ||
| 702 | ate); | ||
| 703 | ate_index++; | ||
| 704 | } | ||
| 705 | } | ||
| 706 | |||
| 707 | /** | ||
| 548 | * tioce_kern_init - init kernel structures related to a given TIOCE | 708 | * tioce_kern_init - init kernel structures related to a given TIOCE |
| 549 | * @tioce_common: ptr to a cached tioce_common struct that originated in prom | 709 | * @tioce_common: ptr to a cached tioce_common struct that originated in prom |
| 550 | */ static struct tioce_kernel * | 710 | */ |
| 711 | static struct tioce_kernel * | ||
| 551 | tioce_kern_init(struct tioce_common *tioce_common) | 712 | tioce_kern_init(struct tioce_common *tioce_common) |
| 552 | { | 713 | { |
| 553 | int i; | 714 | int i; |
| 715 | int ps; | ||
| 716 | int dev; | ||
| 554 | u32 tmp; | 717 | u32 tmp; |
| 718 | unsigned int seg, bus; | ||
| 555 | struct tioce *tioce_mmr; | 719 | struct tioce *tioce_mmr; |
| 556 | struct tioce_kernel *tioce_kern; | 720 | struct tioce_kernel *tioce_kern; |
| 557 | 721 | ||
| @@ -572,9 +736,10 @@ tioce_kern_init(struct tioce_common *tioce_common) | |||
| 572 | * here to use pci_read_config_xxx() so use the raw_pci_ops vector. | 736 | * here to use pci_read_config_xxx() so use the raw_pci_ops vector. |
| 573 | */ | 737 | */ |
| 574 | 738 | ||
| 575 | raw_pci_ops->read(tioce_common->ce_pcibus.bs_persist_segment, | 739 | seg = tioce_common->ce_pcibus.bs_persist_segment; |
| 576 | tioce_common->ce_pcibus.bs_persist_busnum, | 740 | bus = tioce_common->ce_pcibus.bs_persist_busnum; |
| 577 | PCI_DEVFN(2, 0), PCI_SECONDARY_BUS, 1, &tmp); | 741 | |
| 742 | raw_pci_ops->read(seg, bus, PCI_DEVFN(2, 0), PCI_SECONDARY_BUS, 1,&tmp); | ||
| 578 | tioce_kern->ce_port1_secondary = (u8) tmp; | 743 | tioce_kern->ce_port1_secondary = (u8) tmp; |
| 579 | 744 | ||
| 580 | /* | 745 | /* |
| @@ -583,18 +748,76 @@ tioce_kern_init(struct tioce_common *tioce_common) | |||
| 583 | */ | 748 | */ |
| 584 | 749 | ||
| 585 | tioce_mmr = (struct tioce *)tioce_common->ce_pcibus.bs_base; | 750 | tioce_mmr = (struct tioce *)tioce_common->ce_pcibus.bs_base; |
| 586 | __sn_clrq_relaxed(&tioce_mmr->ce_ure_page_map, CE_URE_PAGESIZE_MASK); | 751 | tioce_mmr_clri(tioce_kern, &tioce_mmr->ce_ure_page_map, |
| 587 | __sn_setq_relaxed(&tioce_mmr->ce_ure_page_map, CE_URE_256K_PAGESIZE); | 752 | CE_URE_PAGESIZE_MASK); |
| 588 | tioce_kern->ce_ate3240_pagesize = KB(256); | 753 | tioce_mmr_seti(tioce_kern, &tioce_mmr->ce_ure_page_map, |
| 754 | CE_URE_256K_PAGESIZE); | ||
| 755 | ps = tioce_kern->ce_ate3240_pagesize = KB(256); | ||
| 589 | 756 | ||
| 590 | for (i = 0; i < TIOCE_NUM_M40_ATES; i++) { | 757 | for (i = 0; i < TIOCE_NUM_M40_ATES; i++) { |
| 591 | tioce_kern->ce_ate40_shadow[i] = 0; | 758 | tioce_kern->ce_ate40_shadow[i] = 0; |
| 592 | writeq(0, &tioce_mmr->ce_ure_ate40[i]); | 759 | tioce_mmr_storei(tioce_kern, &tioce_mmr->ce_ure_ate40[i], 0); |
| 593 | } | 760 | } |
| 594 | 761 | ||
| 595 | for (i = 0; i < TIOCE_NUM_M3240_ATES; i++) { | 762 | for (i = 0; i < TIOCE_NUM_M3240_ATES; i++) { |
| 596 | tioce_kern->ce_ate3240_shadow[i] = 0; | 763 | tioce_kern->ce_ate3240_shadow[i] = 0; |
| 597 | writeq(0, &tioce_mmr->ce_ure_ate3240[i]); | 764 | tioce_mmr_storei(tioce_kern, &tioce_mmr->ce_ure_ate3240[i], 0); |
| 765 | } | ||
| 766 | |||
| 767 | /* | ||
| 768 | * Reserve ATE's corresponding to reserved address ranges. These | ||
| 769 | * include: | ||
| 770 | * | ||
| 771 | * Memory space covered by each PPB mem base/limit register | ||
| 772 | * Memory space covered by each PPB prefetch base/limit register | ||
| 773 | * | ||
| 774 | * These bus ranges are for pio (downstream) traffic only, and so | ||
| 775 | * cannot be used for DMA. | ||
| 776 | */ | ||
| 777 | |||
| 778 | for (dev = 1; dev <= 2; dev++) { | ||
| 779 | u64 base, limit; | ||
| 780 | |||
| 781 | /* mem base/limit */ | ||
| 782 | |||
| 783 | raw_pci_ops->read(seg, bus, PCI_DEVFN(dev, 0), | ||
| 784 | PCI_MEMORY_BASE, 2, &tmp); | ||
| 785 | base = (u64)tmp << 16; | ||
| 786 | |||
| 787 | raw_pci_ops->read(seg, bus, PCI_DEVFN(dev, 0), | ||
| 788 | PCI_MEMORY_LIMIT, 2, &tmp); | ||
| 789 | limit = (u64)tmp << 16; | ||
| 790 | limit |= 0xfffffUL; | ||
| 791 | |||
| 792 | if (base < limit) | ||
| 793 | tioce_reserve_m32(tioce_kern, base, limit); | ||
| 794 | |||
| 795 | /* | ||
| 796 | * prefetch mem base/limit. The tioce ppb's have 64-bit | ||
| 797 | * decoders, so read the upper portions w/o checking the | ||
| 798 | * attributes. | ||
| 799 | */ | ||
| 800 | |||
| 801 | raw_pci_ops->read(seg, bus, PCI_DEVFN(dev, 0), | ||
| 802 | PCI_PREF_MEMORY_BASE, 2, &tmp); | ||
| 803 | base = ((u64)tmp & PCI_PREF_RANGE_MASK) << 16; | ||
| 804 | |||
| 805 | raw_pci_ops->read(seg, bus, PCI_DEVFN(dev, 0), | ||
| 806 | PCI_PREF_BASE_UPPER32, 4, &tmp); | ||
| 807 | base |= (u64)tmp << 32; | ||
| 808 | |||
| 809 | raw_pci_ops->read(seg, bus, PCI_DEVFN(dev, 0), | ||
| 810 | PCI_PREF_MEMORY_LIMIT, 2, &tmp); | ||
| 811 | |||
| 812 | limit = ((u64)tmp & PCI_PREF_RANGE_MASK) << 16; | ||
| 813 | limit |= 0xfffffUL; | ||
| 814 | |||
| 815 | raw_pci_ops->read(seg, bus, PCI_DEVFN(dev, 0), | ||
| 816 | PCI_PREF_LIMIT_UPPER32, 4, &tmp); | ||
| 817 | limit |= (u64)tmp << 32; | ||
| 818 | |||
| 819 | if ((base < limit) && TIOCE_M32_ADDR(base)) | ||
| 820 | tioce_reserve_m32(tioce_kern, base, limit); | ||
| 598 | } | 821 | } |
| 599 | 822 | ||
| 600 | return tioce_kern; | 823 | return tioce_kern; |
| @@ -614,6 +837,7 @@ tioce_force_interrupt(struct sn_irq_info *sn_irq_info) | |||
| 614 | { | 837 | { |
| 615 | struct pcidev_info *pcidev_info; | 838 | struct pcidev_info *pcidev_info; |
| 616 | struct tioce_common *ce_common; | 839 | struct tioce_common *ce_common; |
| 840 | struct tioce_kernel *ce_kern; | ||
| 617 | struct tioce *ce_mmr; | 841 | struct tioce *ce_mmr; |
| 618 | u64 force_int_val; | 842 | u64 force_int_val; |
| 619 | 843 | ||
| @@ -629,6 +853,29 @@ tioce_force_interrupt(struct sn_irq_info *sn_irq_info) | |||
| 629 | 853 | ||
| 630 | ce_common = (struct tioce_common *)pcidev_info->pdi_pcibus_info; | 854 | ce_common = (struct tioce_common *)pcidev_info->pdi_pcibus_info; |
| 631 | ce_mmr = (struct tioce *)ce_common->ce_pcibus.bs_base; | 855 | ce_mmr = (struct tioce *)ce_common->ce_pcibus.bs_base; |
| 856 | ce_kern = (struct tioce_kernel *)ce_common->ce_kernel_private; | ||
| 857 | |||
| 858 | /* | ||
| 859 | * TIOCE Rev A workaround (PV 945826), force an interrupt by writing | ||
| 860 | * the TIO_INTx register directly (1/26/2006) | ||
| 861 | */ | ||
| 862 | if (ce_common->ce_rev == TIOCE_REV_A) { | ||
| 863 | u64 int_bit_mask = (1ULL << sn_irq_info->irq_int_bit); | ||
| 864 | u64 status; | ||
| 865 | |||
| 866 | tioce_mmr_load(ce_kern, &ce_mmr->ce_adm_int_status, &status); | ||
| 867 | if (status & int_bit_mask) { | ||
| 868 | u64 force_irq = (1 << 8) | sn_irq_info->irq_irq; | ||
| 869 | u64 ctalk = sn_irq_info->irq_xtalkaddr; | ||
| 870 | u64 nasid, offset; | ||
| 871 | |||
| 872 | nasid = (ctalk & CTALK_NASID_MASK) >> CTALK_NASID_SHFT; | ||
| 873 | offset = (ctalk & CTALK_NODE_OFFSET); | ||
| 874 | HUB_S(TIO_IOSPACE_ADDR(nasid, offset), force_irq); | ||
| 875 | } | ||
| 876 | |||
| 877 | return; | ||
| 878 | } | ||
| 632 | 879 | ||
| 633 | /* | 880 | /* |
| 634 | * irq_int_bit is originally set up by prom, and holds the interrupt | 881 | * irq_int_bit is originally set up by prom, and holds the interrupt |
| @@ -666,7 +913,7 @@ tioce_force_interrupt(struct sn_irq_info *sn_irq_info) | |||
| 666 | default: | 913 | default: |
| 667 | return; | 914 | return; |
| 668 | } | 915 | } |
| 669 | writeq(force_int_val, &ce_mmr->ce_adm_force_int); | 916 | tioce_mmr_storei(ce_kern, &ce_mmr->ce_adm_force_int, force_int_val); |
| 670 | } | 917 | } |
| 671 | 918 | ||
| 672 | /** | 919 | /** |
| @@ -685,6 +932,7 @@ tioce_target_interrupt(struct sn_irq_info *sn_irq_info) | |||
| 685 | { | 932 | { |
| 686 | struct pcidev_info *pcidev_info; | 933 | struct pcidev_info *pcidev_info; |
| 687 | struct tioce_common *ce_common; | 934 | struct tioce_common *ce_common; |
| 935 | struct tioce_kernel *ce_kern; | ||
| 688 | struct tioce *ce_mmr; | 936 | struct tioce *ce_mmr; |
| 689 | int bit; | 937 | int bit; |
| 690 | u64 vector; | 938 | u64 vector; |
| @@ -695,14 +943,15 @@ tioce_target_interrupt(struct sn_irq_info *sn_irq_info) | |||
| 695 | 943 | ||
| 696 | ce_common = (struct tioce_common *)pcidev_info->pdi_pcibus_info; | 944 | ce_common = (struct tioce_common *)pcidev_info->pdi_pcibus_info; |
| 697 | ce_mmr = (struct tioce *)ce_common->ce_pcibus.bs_base; | 945 | ce_mmr = (struct tioce *)ce_common->ce_pcibus.bs_base; |
| 946 | ce_kern = (struct tioce_kernel *)ce_common->ce_kernel_private; | ||
| 698 | 947 | ||
| 699 | bit = sn_irq_info->irq_int_bit; | 948 | bit = sn_irq_info->irq_int_bit; |
| 700 | 949 | ||
| 701 | __sn_setq_relaxed(&ce_mmr->ce_adm_int_mask, (1UL << bit)); | 950 | tioce_mmr_seti(ce_kern, &ce_mmr->ce_adm_int_mask, (1UL << bit)); |
| 702 | vector = (u64)sn_irq_info->irq_irq << INTR_VECTOR_SHFT; | 951 | vector = (u64)sn_irq_info->irq_irq << INTR_VECTOR_SHFT; |
| 703 | vector |= sn_irq_info->irq_xtalkaddr; | 952 | vector |= sn_irq_info->irq_xtalkaddr; |
| 704 | writeq(vector, &ce_mmr->ce_adm_int_dest[bit]); | 953 | tioce_mmr_storei(ce_kern, &ce_mmr->ce_adm_int_dest[bit], vector); |
| 705 | __sn_clrq_relaxed(&ce_mmr->ce_adm_int_mask, (1UL << bit)); | 954 | tioce_mmr_clri(ce_kern, &ce_mmr->ce_adm_int_mask, (1UL << bit)); |
| 706 | 955 | ||
| 707 | tioce_force_interrupt(sn_irq_info); | 956 | tioce_force_interrupt(sn_irq_info); |
| 708 | } | 957 | } |
| @@ -721,7 +970,11 @@ tioce_target_interrupt(struct sn_irq_info *sn_irq_info) | |||
| 721 | static void * | 970 | static void * |
| 722 | tioce_bus_fixup(struct pcibus_bussoft *prom_bussoft, struct pci_controller *controller) | 971 | tioce_bus_fixup(struct pcibus_bussoft *prom_bussoft, struct pci_controller *controller) |
| 723 | { | 972 | { |
| 973 | int my_nasid; | ||
| 974 | cnodeid_t my_cnode, mem_cnode; | ||
| 724 | struct tioce_common *tioce_common; | 975 | struct tioce_common *tioce_common; |
| 976 | struct tioce_kernel *tioce_kern; | ||
| 977 | struct tioce *tioce_mmr; | ||
| 725 | 978 | ||
| 726 | /* | 979 | /* |
| 727 | * Allocate kernel bus soft and copy from prom. | 980 | * Allocate kernel bus soft and copy from prom. |
| @@ -734,11 +987,23 @@ tioce_bus_fixup(struct pcibus_bussoft *prom_bussoft, struct pci_controller *cont | |||
| 734 | memcpy(tioce_common, prom_bussoft, sizeof(struct tioce_common)); | 987 | memcpy(tioce_common, prom_bussoft, sizeof(struct tioce_common)); |
| 735 | tioce_common->ce_pcibus.bs_base |= __IA64_UNCACHED_OFFSET; | 988 | tioce_common->ce_pcibus.bs_base |= __IA64_UNCACHED_OFFSET; |
| 736 | 989 | ||
| 737 | if (tioce_kern_init(tioce_common) == NULL) { | 990 | tioce_kern = tioce_kern_init(tioce_common); |
| 991 | if (tioce_kern == NULL) { | ||
| 738 | kfree(tioce_common); | 992 | kfree(tioce_common); |
| 739 | return NULL; | 993 | return NULL; |
| 740 | } | 994 | } |
| 741 | 995 | ||
| 996 | /* | ||
| 997 | * Clear out any transient errors before registering the error | ||
| 998 | * interrupt handler. | ||
| 999 | */ | ||
| 1000 | |||
| 1001 | tioce_mmr = (struct tioce *)tioce_common->ce_pcibus.bs_base; | ||
| 1002 | tioce_mmr_seti(tioce_kern, &tioce_mmr->ce_adm_int_status_alias, ~0ULL); | ||
| 1003 | tioce_mmr_seti(tioce_kern, &tioce_mmr->ce_adm_error_summary_alias, | ||
| 1004 | ~0ULL); | ||
| 1005 | tioce_mmr_seti(tioce_kern, &tioce_mmr->ce_dre_comp_err_addr, ~0ULL); | ||
| 1006 | |||
| 742 | if (request_irq(SGI_PCIASIC_ERROR, | 1007 | if (request_irq(SGI_PCIASIC_ERROR, |
| 743 | tioce_error_intr_handler, | 1008 | tioce_error_intr_handler, |
| 744 | SA_SHIRQ, "TIOCE error", (void *)tioce_common)) | 1009 | SA_SHIRQ, "TIOCE error", (void *)tioce_common)) |
| @@ -750,6 +1015,21 @@ tioce_bus_fixup(struct pcibus_bussoft *prom_bussoft, struct pci_controller *cont | |||
| 750 | tioce_common->ce_pcibus.bs_persist_segment, | 1015 | tioce_common->ce_pcibus.bs_persist_segment, |
| 751 | tioce_common->ce_pcibus.bs_persist_busnum); | 1016 | tioce_common->ce_pcibus.bs_persist_busnum); |
| 752 | 1017 | ||
| 1018 | /* | ||
| 1019 | * identify closest nasid for memory allocations | ||
| 1020 | */ | ||
| 1021 | |||
| 1022 | my_nasid = NASID_GET(tioce_common->ce_pcibus.bs_base); | ||
| 1023 | my_cnode = nasid_to_cnodeid(my_nasid); | ||
| 1024 | |||
| 1025 | if (sn_hwperf_get_nearest_node(my_cnode, &mem_cnode, NULL) < 0) { | ||
| 1026 | printk(KERN_WARNING "tioce_bus_fixup: failed to find " | ||
| 1027 | "closest node with MEM to TIO node %d\n", my_cnode); | ||
| 1028 | mem_cnode = (cnodeid_t)-1; /* use any node */ | ||
| 1029 | } | ||
| 1030 | |||
| 1031 | controller->node = mem_cnode; | ||
| 1032 | |||
| 753 | return tioce_common; | 1033 | return tioce_common; |
| 754 | } | 1034 | } |
| 755 | 1035 | ||
diff --git a/arch/m32r/mm/init.c b/arch/m32r/mm/init.c index 6facf15b04f3..c9e7dad860b7 100644 --- a/arch/m32r/mm/init.c +++ b/arch/m32r/mm/init.c | |||
| @@ -226,7 +226,7 @@ void free_initmem(void) | |||
| 226 | addr = (unsigned long)(&__init_begin); | 226 | addr = (unsigned long)(&__init_begin); |
| 227 | for (; addr < (unsigned long)(&__init_end); addr += PAGE_SIZE) { | 227 | for (; addr < (unsigned long)(&__init_end); addr += PAGE_SIZE) { |
| 228 | ClearPageReserved(virt_to_page(addr)); | 228 | ClearPageReserved(virt_to_page(addr)); |
| 229 | set_page_count(virt_to_page(addr), 1); | 229 | init_page_count(virt_to_page(addr)); |
| 230 | free_page(addr); | 230 | free_page(addr); |
| 231 | totalram_pages++; | 231 | totalram_pages++; |
| 232 | } | 232 | } |
| @@ -244,7 +244,7 @@ void free_initrd_mem(unsigned long start, unsigned long end) | |||
| 244 | unsigned long p; | 244 | unsigned long p; |
| 245 | for (p = start; p < end; p += PAGE_SIZE) { | 245 | for (p = start; p < end; p += PAGE_SIZE) { |
| 246 | ClearPageReserved(virt_to_page(p)); | 246 | ClearPageReserved(virt_to_page(p)); |
| 247 | set_page_count(virt_to_page(p), 1); | 247 | init_page_count(virt_to_page(p)); |
| 248 | free_page(p); | 248 | free_page(p); |
| 249 | totalram_pages++; | 249 | totalram_pages++; |
| 250 | } | 250 | } |
diff --git a/arch/m68k/mm/init.c b/arch/m68k/mm/init.c index c45beb955943..a190e39c907a 100644 --- a/arch/m68k/mm/init.c +++ b/arch/m68k/mm/init.c | |||
| @@ -137,7 +137,7 @@ void free_initrd_mem(unsigned long start, unsigned long end) | |||
| 137 | int pages = 0; | 137 | int pages = 0; |
| 138 | for (; start < end; start += PAGE_SIZE) { | 138 | for (; start < end; start += PAGE_SIZE) { |
| 139 | ClearPageReserved(virt_to_page(start)); | 139 | ClearPageReserved(virt_to_page(start)); |
| 140 | set_page_count(virt_to_page(start), 1); | 140 | init_page_count(virt_to_page(start)); |
| 141 | free_page(start); | 141 | free_page(start); |
| 142 | totalram_pages++; | 142 | totalram_pages++; |
| 143 | pages++; | 143 | pages++; |
diff --git a/arch/m68k/mm/memory.c b/arch/m68k/mm/memory.c index 559942ce0e1e..d6d582a5abb0 100644 --- a/arch/m68k/mm/memory.c +++ b/arch/m68k/mm/memory.c | |||
| @@ -54,7 +54,7 @@ void __init init_pointer_table(unsigned long ptable) | |||
| 54 | 54 | ||
| 55 | /* unreserve the page so it's possible to free that page */ | 55 | /* unreserve the page so it's possible to free that page */ |
| 56 | PD_PAGE(dp)->flags &= ~(1 << PG_reserved); | 56 | PD_PAGE(dp)->flags &= ~(1 << PG_reserved); |
| 57 | set_page_count(PD_PAGE(dp), 1); | 57 | init_page_count(PD_PAGE(dp)); |
| 58 | 58 | ||
| 59 | return; | 59 | return; |
| 60 | } | 60 | } |
diff --git a/arch/m68k/mm/motorola.c b/arch/m68k/mm/motorola.c index d855fec26317..afb57eeafdcb 100644 --- a/arch/m68k/mm/motorola.c +++ b/arch/m68k/mm/motorola.c | |||
| @@ -276,7 +276,7 @@ void free_initmem(void) | |||
| 276 | addr = (unsigned long)&__init_begin; | 276 | addr = (unsigned long)&__init_begin; |
| 277 | for (; addr < (unsigned long)&__init_end; addr += PAGE_SIZE) { | 277 | for (; addr < (unsigned long)&__init_end; addr += PAGE_SIZE) { |
| 278 | virt_to_page(addr)->flags &= ~(1 << PG_reserved); | 278 | virt_to_page(addr)->flags &= ~(1 << PG_reserved); |
| 279 | set_page_count(virt_to_page(addr), 1); | 279 | init_page_count(virt_to_page(addr)); |
| 280 | free_page(addr); | 280 | free_page(addr); |
| 281 | totalram_pages++; | 281 | totalram_pages++; |
| 282 | } | 282 | } |
diff --git a/arch/m68knommu/kernel/m68k_ksyms.c b/arch/m68knommu/kernel/m68k_ksyms.c index eddb8d3e130a..d844c755945a 100644 --- a/arch/m68knommu/kernel/m68k_ksyms.c +++ b/arch/m68knommu/kernel/m68k_ksyms.c | |||
| @@ -26,6 +26,7 @@ EXPORT_SYMBOL(__ioremap); | |||
| 26 | EXPORT_SYMBOL(iounmap); | 26 | EXPORT_SYMBOL(iounmap); |
| 27 | EXPORT_SYMBOL(dump_fpu); | 27 | EXPORT_SYMBOL(dump_fpu); |
| 28 | EXPORT_SYMBOL(strnlen); | 28 | EXPORT_SYMBOL(strnlen); |
| 29 | EXPORT_SYMBOL(strpbrk); | ||
| 29 | EXPORT_SYMBOL(strrchr); | 30 | EXPORT_SYMBOL(strrchr); |
| 30 | EXPORT_SYMBOL(strstr); | 31 | EXPORT_SYMBOL(strstr); |
| 31 | EXPORT_SYMBOL(strchr); | 32 | EXPORT_SYMBOL(strchr); |
diff --git a/arch/m68knommu/mm/init.c b/arch/m68knommu/mm/init.c index 89f0b554ffb7..d79503fe6e42 100644 --- a/arch/m68knommu/mm/init.c +++ b/arch/m68knommu/mm/init.c | |||
| @@ -195,7 +195,7 @@ void free_initrd_mem(unsigned long start, unsigned long end) | |||
| 195 | int pages = 0; | 195 | int pages = 0; |
| 196 | for (; start < end; start += PAGE_SIZE) { | 196 | for (; start < end; start += PAGE_SIZE) { |
| 197 | ClearPageReserved(virt_to_page(start)); | 197 | ClearPageReserved(virt_to_page(start)); |
| 198 | set_page_count(virt_to_page(start), 1); | 198 | init_page_count(virt_to_page(start)); |
| 199 | free_page(start); | 199 | free_page(start); |
| 200 | totalram_pages++; | 200 | totalram_pages++; |
| 201 | pages++; | 201 | pages++; |
| @@ -218,7 +218,7 @@ free_initmem() | |||
| 218 | /* next to check that the page we free is not a partial page */ | 218 | /* next to check that the page we free is not a partial page */ |
| 219 | for (; addr + PAGE_SIZE < (unsigned long)(&__init_end); addr +=PAGE_SIZE) { | 219 | for (; addr + PAGE_SIZE < (unsigned long)(&__init_end); addr +=PAGE_SIZE) { |
| 220 | ClearPageReserved(virt_to_page(addr)); | 220 | ClearPageReserved(virt_to_page(addr)); |
| 221 | set_page_count(virt_to_page(addr), 1); | 221 | init_page_count(virt_to_page(addr)); |
| 222 | free_page(addr); | 222 | free_page(addr); |
| 223 | totalram_pages++; | 223 | totalram_pages++; |
| 224 | } | 224 | } |
diff --git a/arch/mips/arc/memory.c b/arch/mips/arc/memory.c index 958d2eb78862..8a9ef58cc399 100644 --- a/arch/mips/arc/memory.c +++ b/arch/mips/arc/memory.c | |||
| @@ -158,7 +158,7 @@ unsigned long __init prom_free_prom_memory(void) | |||
| 158 | while (addr < boot_mem_map.map[i].addr | 158 | while (addr < boot_mem_map.map[i].addr |
| 159 | + boot_mem_map.map[i].size) { | 159 | + boot_mem_map.map[i].size) { |
| 160 | ClearPageReserved(virt_to_page(__va(addr))); | 160 | ClearPageReserved(virt_to_page(__va(addr))); |
| 161 | set_page_count(virt_to_page(__va(addr)), 1); | 161 | init_page_count(virt_to_page(__va(addr))); |
| 162 | free_page((unsigned long)__va(addr)); | 162 | free_page((unsigned long)__va(addr)); |
| 163 | addr += PAGE_SIZE; | 163 | addr += PAGE_SIZE; |
| 164 | freed += PAGE_SIZE; | 164 | freed += PAGE_SIZE; |
diff --git a/arch/mips/dec/prom/memory.c b/arch/mips/dec/prom/memory.c index 81cb5a76cfb7..1edaf3074ee9 100644 --- a/arch/mips/dec/prom/memory.c +++ b/arch/mips/dec/prom/memory.c | |||
| @@ -118,7 +118,7 @@ unsigned long __init prom_free_prom_memory(void) | |||
| 118 | addr = PAGE_SIZE; | 118 | addr = PAGE_SIZE; |
| 119 | while (addr < end) { | 119 | while (addr < end) { |
| 120 | ClearPageReserved(virt_to_page(__va(addr))); | 120 | ClearPageReserved(virt_to_page(__va(addr))); |
| 121 | set_page_count(virt_to_page(__va(addr)), 1); | 121 | init_page_count(virt_to_page(__va(addr))); |
| 122 | free_page((unsigned long)__va(addr)); | 122 | free_page((unsigned long)__va(addr)); |
| 123 | addr += PAGE_SIZE; | 123 | addr += PAGE_SIZE; |
| 124 | } | 124 | } |
diff --git a/arch/mips/mips-boards/generic/memory.c b/arch/mips/mips-boards/generic/memory.c index 2c8afd77a20b..ee5e70c95cf3 100644 --- a/arch/mips/mips-boards/generic/memory.c +++ b/arch/mips/mips-boards/generic/memory.c | |||
| @@ -174,7 +174,7 @@ unsigned long __init prom_free_prom_memory(void) | |||
| 174 | while (addr < boot_mem_map.map[i].addr | 174 | while (addr < boot_mem_map.map[i].addr |
| 175 | + boot_mem_map.map[i].size) { | 175 | + boot_mem_map.map[i].size) { |
| 176 | ClearPageReserved(virt_to_page(__va(addr))); | 176 | ClearPageReserved(virt_to_page(__va(addr))); |
| 177 | set_page_count(virt_to_page(__va(addr)), 1); | 177 | init_page_count(virt_to_page(__va(addr))); |
| 178 | free_page((unsigned long)__va(addr)); | 178 | free_page((unsigned long)__va(addr)); |
| 179 | addr += PAGE_SIZE; | 179 | addr += PAGE_SIZE; |
| 180 | freed += PAGE_SIZE; | 180 | freed += PAGE_SIZE; |
diff --git a/arch/mips/mips-boards/sim/sim_mem.c b/arch/mips/mips-boards/sim/sim_mem.c index 0dbd7435bb2a..1ec4e75656bd 100644 --- a/arch/mips/mips-boards/sim/sim_mem.c +++ b/arch/mips/mips-boards/sim/sim_mem.c | |||
| @@ -117,7 +117,7 @@ unsigned long __init prom_free_prom_memory(void) | |||
| 117 | while (addr < boot_mem_map.map[i].addr | 117 | while (addr < boot_mem_map.map[i].addr |
| 118 | + boot_mem_map.map[i].size) { | 118 | + boot_mem_map.map[i].size) { |
| 119 | ClearPageReserved(virt_to_page(__va(addr))); | 119 | ClearPageReserved(virt_to_page(__va(addr))); |
| 120 | set_page_count(virt_to_page(__va(addr)), 1); | 120 | init_page_count(virt_to_page(__va(addr))); |
| 121 | free_page((unsigned long)__va(addr)); | 121 | free_page((unsigned long)__va(addr)); |
| 122 | addr += PAGE_SIZE; | 122 | addr += PAGE_SIZE; |
| 123 | freed += PAGE_SIZE; | 123 | freed += PAGE_SIZE; |
diff --git a/arch/mips/mm/init.c b/arch/mips/mm/init.c index 0ff9a348b843..52f7d59fe612 100644 --- a/arch/mips/mm/init.c +++ b/arch/mips/mm/init.c | |||
| @@ -54,7 +54,8 @@ unsigned long empty_zero_page, zero_page_mask; | |||
| 54 | */ | 54 | */ |
| 55 | unsigned long setup_zero_pages(void) | 55 | unsigned long setup_zero_pages(void) |
| 56 | { | 56 | { |
| 57 | unsigned long order, size; | 57 | unsigned int order; |
| 58 | unsigned long size; | ||
| 58 | struct page *page; | 59 | struct page *page; |
| 59 | 60 | ||
| 60 | if (cpu_has_vce) | 61 | if (cpu_has_vce) |
| @@ -67,9 +68,9 @@ unsigned long setup_zero_pages(void) | |||
| 67 | panic("Oh boy, that early out of memory?"); | 68 | panic("Oh boy, that early out of memory?"); |
| 68 | 69 | ||
| 69 | page = virt_to_page(empty_zero_page); | 70 | page = virt_to_page(empty_zero_page); |
| 71 | split_page(page, order); | ||
| 70 | while (page < virt_to_page(empty_zero_page + (PAGE_SIZE << order))) { | 72 | while (page < virt_to_page(empty_zero_page + (PAGE_SIZE << order))) { |
| 71 | SetPageReserved(page); | 73 | SetPageReserved(page); |
| 72 | set_page_count(page, 1); | ||
| 73 | page++; | 74 | page++; |
| 74 | } | 75 | } |
| 75 | 76 | ||
| @@ -244,7 +245,7 @@ void __init mem_init(void) | |||
| 244 | #ifdef CONFIG_LIMITED_DMA | 245 | #ifdef CONFIG_LIMITED_DMA |
| 245 | set_page_address(page, lowmem_page_address(page)); | 246 | set_page_address(page, lowmem_page_address(page)); |
| 246 | #endif | 247 | #endif |
| 247 | set_page_count(page, 1); | 248 | init_page_count(page); |
| 248 | __free_page(page); | 249 | __free_page(page); |
| 249 | totalhigh_pages++; | 250 | totalhigh_pages++; |
| 250 | } | 251 | } |
| @@ -291,7 +292,7 @@ void free_initrd_mem(unsigned long start, unsigned long end) | |||
| 291 | 292 | ||
| 292 | for (; start < end; start += PAGE_SIZE) { | 293 | for (; start < end; start += PAGE_SIZE) { |
| 293 | ClearPageReserved(virt_to_page(start)); | 294 | ClearPageReserved(virt_to_page(start)); |
| 294 | set_page_count(virt_to_page(start), 1); | 295 | init_page_count(virt_to_page(start)); |
| 295 | free_page(start); | 296 | free_page(start); |
| 296 | totalram_pages++; | 297 | totalram_pages++; |
| 297 | } | 298 | } |
| @@ -314,7 +315,7 @@ void free_initmem(void) | |||
| 314 | page = addr; | 315 | page = addr; |
| 315 | #endif | 316 | #endif |
| 316 | ClearPageReserved(virt_to_page(page)); | 317 | ClearPageReserved(virt_to_page(page)); |
| 317 | set_page_count(virt_to_page(page), 1); | 318 | init_page_count(virt_to_page(page)); |
| 318 | free_page(page); | 319 | free_page(page); |
| 319 | totalram_pages++; | 320 | totalram_pages++; |
| 320 | freed += PAGE_SIZE; | 321 | freed += PAGE_SIZE; |
diff --git a/arch/mips/sgi-ip27/ip27-memory.c b/arch/mips/sgi-ip27/ip27-memory.c index ed93a9792959..e0d095daa5ed 100644 --- a/arch/mips/sgi-ip27/ip27-memory.c +++ b/arch/mips/sgi-ip27/ip27-memory.c | |||
| @@ -559,7 +559,7 @@ void __init mem_init(void) | |||
| 559 | /* if (!page_is_ram(pgnr)) continue; */ | 559 | /* if (!page_is_ram(pgnr)) continue; */ |
| 560 | /* commented out until page_is_ram works */ | 560 | /* commented out until page_is_ram works */ |
| 561 | ClearPageReserved(p); | 561 | ClearPageReserved(p); |
| 562 | set_page_count(p, 1); | 562 | init_page_count(p); |
| 563 | __free_page(p); | 563 | __free_page(p); |
| 564 | totalram_pages++; | 564 | totalram_pages++; |
| 565 | } | 565 | } |
diff --git a/arch/parisc/mm/init.c b/arch/parisc/mm/init.c index 7847ca13d6c2..852eda3953dc 100644 --- a/arch/parisc/mm/init.c +++ b/arch/parisc/mm/init.c | |||
| @@ -398,7 +398,7 @@ void free_initmem(void) | |||
| 398 | addr = (unsigned long)(&__init_begin); | 398 | addr = (unsigned long)(&__init_begin); |
| 399 | for (; addr < (unsigned long)(&__init_end); addr += PAGE_SIZE) { | 399 | for (; addr < (unsigned long)(&__init_end); addr += PAGE_SIZE) { |
| 400 | ClearPageReserved(virt_to_page(addr)); | 400 | ClearPageReserved(virt_to_page(addr)); |
| 401 | set_page_count(virt_to_page(addr), 1); | 401 | init_page_count(virt_to_page(addr)); |
| 402 | free_page(addr); | 402 | free_page(addr); |
| 403 | num_physpages++; | 403 | num_physpages++; |
| 404 | totalram_pages++; | 404 | totalram_pages++; |
| @@ -1018,7 +1018,7 @@ void free_initrd_mem(unsigned long start, unsigned long end) | |||
| 1018 | printk(KERN_INFO "Freeing initrd memory: %ldk freed\n", (end - start) >> 10); | 1018 | printk(KERN_INFO "Freeing initrd memory: %ldk freed\n", (end - start) >> 10); |
| 1019 | for (; start < end; start += PAGE_SIZE) { | 1019 | for (; start < end; start += PAGE_SIZE) { |
| 1020 | ClearPageReserved(virt_to_page(start)); | 1020 | ClearPageReserved(virt_to_page(start)); |
| 1021 | set_page_count(virt_to_page(start), 1); | 1021 | init_page_count(virt_to_page(start)); |
| 1022 | free_page(start); | 1022 | free_page(start); |
| 1023 | num_physpages++; | 1023 | num_physpages++; |
| 1024 | totalram_pages++; | 1024 | totalram_pages++; |
diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c index b51bb28c054b..7370f9f33e29 100644 --- a/arch/powerpc/mm/hugetlbpage.c +++ b/arch/powerpc/mm/hugetlbpage.c | |||
| @@ -133,21 +133,6 @@ pte_t huge_ptep_get_and_clear(struct mm_struct *mm, unsigned long addr, | |||
| 133 | return __pte(old); | 133 | return __pte(old); |
| 134 | } | 134 | } |
| 135 | 135 | ||
| 136 | /* | ||
| 137 | * This function checks for proper alignment of input addr and len parameters. | ||
| 138 | */ | ||
| 139 | int is_aligned_hugepage_range(unsigned long addr, unsigned long len) | ||
| 140 | { | ||
| 141 | if (len & ~HPAGE_MASK) | ||
| 142 | return -EINVAL; | ||
| 143 | if (addr & ~HPAGE_MASK) | ||
| 144 | return -EINVAL; | ||
| 145 | if (! (within_hugepage_low_range(addr, len) | ||
| 146 | || within_hugepage_high_range(addr, len)) ) | ||
| 147 | return -EINVAL; | ||
| 148 | return 0; | ||
| 149 | } | ||
| 150 | |||
| 151 | struct slb_flush_info { | 136 | struct slb_flush_info { |
| 152 | struct mm_struct *mm; | 137 | struct mm_struct *mm; |
| 153 | u16 newareas; | 138 | u16 newareas; |
diff --git a/arch/powerpc/mm/init_32.c b/arch/powerpc/mm/init_32.c index 7d0d75c11848..b57fb3a2b7bb 100644 --- a/arch/powerpc/mm/init_32.c +++ b/arch/powerpc/mm/init_32.c | |||
| @@ -216,7 +216,7 @@ static void free_sec(unsigned long start, unsigned long end, const char *name) | |||
| 216 | 216 | ||
| 217 | while (start < end) { | 217 | while (start < end) { |
| 218 | ClearPageReserved(virt_to_page(start)); | 218 | ClearPageReserved(virt_to_page(start)); |
| 219 | set_page_count(virt_to_page(start), 1); | 219 | init_page_count(virt_to_page(start)); |
| 220 | free_page(start); | 220 | free_page(start); |
| 221 | cnt++; | 221 | cnt++; |
| 222 | start += PAGE_SIZE; | 222 | start += PAGE_SIZE; |
| @@ -248,7 +248,7 @@ void free_initrd_mem(unsigned long start, unsigned long end) | |||
| 248 | printk ("Freeing initrd memory: %ldk freed\n", (end - start) >> 10); | 248 | printk ("Freeing initrd memory: %ldk freed\n", (end - start) >> 10); |
| 249 | for (; start < end; start += PAGE_SIZE) { | 249 | for (; start < end; start += PAGE_SIZE) { |
| 250 | ClearPageReserved(virt_to_page(start)); | 250 | ClearPageReserved(virt_to_page(start)); |
| 251 | set_page_count(virt_to_page(start), 1); | 251 | init_page_count(virt_to_page(start)); |
| 252 | free_page(start); | 252 | free_page(start); |
| 253 | totalram_pages++; | 253 | totalram_pages++; |
| 254 | } | 254 | } |
diff --git a/arch/powerpc/mm/init_64.c b/arch/powerpc/mm/init_64.c index 81cfb0c2ec58..bacb71c89811 100644 --- a/arch/powerpc/mm/init_64.c +++ b/arch/powerpc/mm/init_64.c | |||
| @@ -140,7 +140,7 @@ void free_initmem(void) | |||
| 140 | for (; addr < (unsigned long)__init_end; addr += PAGE_SIZE) { | 140 | for (; addr < (unsigned long)__init_end; addr += PAGE_SIZE) { |
| 141 | memset((void *)addr, 0xcc, PAGE_SIZE); | 141 | memset((void *)addr, 0xcc, PAGE_SIZE); |
| 142 | ClearPageReserved(virt_to_page(addr)); | 142 | ClearPageReserved(virt_to_page(addr)); |
| 143 | set_page_count(virt_to_page(addr), 1); | 143 | init_page_count(virt_to_page(addr)); |
| 144 | free_page(addr); | 144 | free_page(addr); |
| 145 | totalram_pages++; | 145 | totalram_pages++; |
| 146 | } | 146 | } |
| @@ -155,7 +155,7 @@ void free_initrd_mem(unsigned long start, unsigned long end) | |||
| 155 | printk ("Freeing initrd memory: %ldk freed\n", (end - start) >> 10); | 155 | printk ("Freeing initrd memory: %ldk freed\n", (end - start) >> 10); |
| 156 | for (; start < end; start += PAGE_SIZE) { | 156 | for (; start < end; start += PAGE_SIZE) { |
| 157 | ClearPageReserved(virt_to_page(start)); | 157 | ClearPageReserved(virt_to_page(start)); |
| 158 | set_page_count(virt_to_page(start), 1); | 158 | init_page_count(virt_to_page(start)); |
| 159 | free_page(start); | 159 | free_page(start); |
| 160 | totalram_pages++; | 160 | totalram_pages++; |
| 161 | } | 161 | } |
diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c index 550517c2dd42..454cac01d8cc 100644 --- a/arch/powerpc/mm/mem.c +++ b/arch/powerpc/mm/mem.c | |||
| @@ -108,8 +108,8 @@ EXPORT_SYMBOL(phys_mem_access_prot); | |||
| 108 | void online_page(struct page *page) | 108 | void online_page(struct page *page) |
| 109 | { | 109 | { |
| 110 | ClearPageReserved(page); | 110 | ClearPageReserved(page); |
| 111 | set_page_count(page, 0); | 111 | init_page_count(page); |
| 112 | free_cold_page(page); | 112 | __free_page(page); |
| 113 | totalram_pages++; | 113 | totalram_pages++; |
| 114 | num_physpages++; | 114 | num_physpages++; |
| 115 | } | 115 | } |
| @@ -376,7 +376,7 @@ void __init mem_init(void) | |||
| 376 | struct page *page = pfn_to_page(pfn); | 376 | struct page *page = pfn_to_page(pfn); |
| 377 | 377 | ||
| 378 | ClearPageReserved(page); | 378 | ClearPageReserved(page); |
| 379 | set_page_count(page, 1); | 379 | init_page_count(page); |
| 380 | __free_page(page); | 380 | __free_page(page); |
| 381 | totalhigh_pages++; | 381 | totalhigh_pages++; |
| 382 | } | 382 | } |
diff --git a/arch/powerpc/platforms/cell/setup.c b/arch/powerpc/platforms/cell/setup.c index b33a4443f5a9..fec8e65b36ea 100644 --- a/arch/powerpc/platforms/cell/setup.c +++ b/arch/powerpc/platforms/cell/setup.c | |||
| @@ -115,7 +115,7 @@ static void __init cell_spuprop_present(struct device_node *spe, | |||
| 115 | for (pfn = start_pfn; pfn < end_pfn; pfn++) { | 115 | for (pfn = start_pfn; pfn < end_pfn; pfn++) { |
| 116 | struct page *page = pfn_to_page(pfn); | 116 | struct page *page = pfn_to_page(pfn); |
| 117 | set_page_links(page, ZONE_DMA, node_id, pfn); | 117 | set_page_links(page, ZONE_DMA, node_id, pfn); |
| 118 | set_page_count(page, 1); | 118 | init_page_count(page); |
| 119 | reset_page_mapcount(page); | 119 | reset_page_mapcount(page); |
| 120 | SetPageReserved(page); | 120 | SetPageReserved(page); |
| 121 | INIT_LIST_HEAD(&page->lru); | 121 | INIT_LIST_HEAD(&page->lru); |
diff --git a/arch/ppc/kernel/dma-mapping.c b/arch/ppc/kernel/dma-mapping.c index 685fd0defe23..61465ec88bc7 100644 --- a/arch/ppc/kernel/dma-mapping.c +++ b/arch/ppc/kernel/dma-mapping.c | |||
| @@ -223,6 +223,8 @@ __dma_alloc_coherent(size_t size, dma_addr_t *handle, gfp_t gfp) | |||
| 223 | pte_t *pte = consistent_pte + CONSISTENT_OFFSET(vaddr); | 223 | pte_t *pte = consistent_pte + CONSISTENT_OFFSET(vaddr); |
| 224 | struct page *end = page + (1 << order); | 224 | struct page *end = page + (1 << order); |
| 225 | 225 | ||
| 226 | split_page(page, order); | ||
| 227 | |||
| 226 | /* | 228 | /* |
| 227 | * Set the "dma handle" | 229 | * Set the "dma handle" |
| 228 | */ | 230 | */ |
| @@ -231,7 +233,6 @@ __dma_alloc_coherent(size_t size, dma_addr_t *handle, gfp_t gfp) | |||
| 231 | do { | 233 | do { |
| 232 | BUG_ON(!pte_none(*pte)); | 234 | BUG_ON(!pte_none(*pte)); |
| 233 | 235 | ||
| 234 | set_page_count(page, 1); | ||
| 235 | SetPageReserved(page); | 236 | SetPageReserved(page); |
| 236 | set_pte_at(&init_mm, vaddr, | 237 | set_pte_at(&init_mm, vaddr, |
| 237 | pte, mk_pte(page, pgprot_noncached(PAGE_KERNEL))); | 238 | pte, mk_pte(page, pgprot_noncached(PAGE_KERNEL))); |
| @@ -244,7 +245,6 @@ __dma_alloc_coherent(size_t size, dma_addr_t *handle, gfp_t gfp) | |||
| 244 | * Free the otherwise unused pages. | 245 | * Free the otherwise unused pages. |
| 245 | */ | 246 | */ |
| 246 | while (page < end) { | 247 | while (page < end) { |
| 247 | set_page_count(page, 1); | ||
| 248 | __free_page(page); | 248 | __free_page(page); |
| 249 | page++; | 249 | page++; |
| 250 | } | 250 | } |
diff --git a/arch/ppc/mm/init.c b/arch/ppc/mm/init.c index 134db5c04203..cb1c294fb932 100644 --- a/arch/ppc/mm/init.c +++ b/arch/ppc/mm/init.c | |||
| @@ -140,7 +140,7 @@ static void free_sec(unsigned long start, unsigned long end, const char *name) | |||
| 140 | 140 | ||
| 141 | while (start < end) { | 141 | while (start < end) { |
| 142 | ClearPageReserved(virt_to_page(start)); | 142 | ClearPageReserved(virt_to_page(start)); |
| 143 | set_page_count(virt_to_page(start), 1); | 143 | init_page_count(virt_to_page(start)); |
| 144 | free_page(start); | 144 | free_page(start); |
| 145 | cnt++; | 145 | cnt++; |
| 146 | start += PAGE_SIZE; | 146 | start += PAGE_SIZE; |
| @@ -172,7 +172,7 @@ void free_initrd_mem(unsigned long start, unsigned long end) | |||
| 172 | 172 | ||
| 173 | for (; start < end; start += PAGE_SIZE) { | 173 | for (; start < end; start += PAGE_SIZE) { |
| 174 | ClearPageReserved(virt_to_page(start)); | 174 | ClearPageReserved(virt_to_page(start)); |
| 175 | set_page_count(virt_to_page(start), 1); | 175 | init_page_count(virt_to_page(start)); |
| 176 | free_page(start); | 176 | free_page(start); |
| 177 | totalram_pages++; | 177 | totalram_pages++; |
| 178 | } | 178 | } |
| @@ -441,7 +441,7 @@ void __init mem_init(void) | |||
| 441 | struct page *page = mem_map + pfn; | 441 | struct page *page = mem_map + pfn; |
| 442 | 442 | ||
| 443 | ClearPageReserved(page); | 443 | ClearPageReserved(page); |
| 444 | set_page_count(page, 1); | 444 | init_page_count(page); |
| 445 | __free_page(page); | 445 | __free_page(page); |
| 446 | totalhigh_pages++; | 446 | totalhigh_pages++; |
| 447 | } | 447 | } |
diff --git a/arch/s390/mm/init.c b/arch/s390/mm/init.c index df953383724d..a055894f3bd8 100644 --- a/arch/s390/mm/init.c +++ b/arch/s390/mm/init.c | |||
| @@ -292,7 +292,7 @@ void free_initmem(void) | |||
| 292 | addr = (unsigned long)(&__init_begin); | 292 | addr = (unsigned long)(&__init_begin); |
| 293 | for (; addr < (unsigned long)(&__init_end); addr += PAGE_SIZE) { | 293 | for (; addr < (unsigned long)(&__init_end); addr += PAGE_SIZE) { |
| 294 | ClearPageReserved(virt_to_page(addr)); | 294 | ClearPageReserved(virt_to_page(addr)); |
| 295 | set_page_count(virt_to_page(addr), 1); | 295 | init_page_count(virt_to_page(addr)); |
| 296 | free_page(addr); | 296 | free_page(addr); |
| 297 | totalram_pages++; | 297 | totalram_pages++; |
| 298 | } | 298 | } |
| @@ -307,7 +307,7 @@ void free_initrd_mem(unsigned long start, unsigned long end) | |||
| 307 | printk ("Freeing initrd memory: %ldk freed\n", (end - start) >> 10); | 307 | printk ("Freeing initrd memory: %ldk freed\n", (end - start) >> 10); |
| 308 | for (; start < end; start += PAGE_SIZE) { | 308 | for (; start < end; start += PAGE_SIZE) { |
| 309 | ClearPageReserved(virt_to_page(start)); | 309 | ClearPageReserved(virt_to_page(start)); |
| 310 | set_page_count(virt_to_page(start), 1); | 310 | init_page_count(virt_to_page(start)); |
| 311 | free_page(start); | 311 | free_page(start); |
| 312 | totalram_pages++; | 312 | totalram_pages++; |
| 313 | } | 313 | } |
diff --git a/arch/sh/mm/consistent.c b/arch/sh/mm/consistent.c index df3a9e452cc5..ee73e30263af 100644 --- a/arch/sh/mm/consistent.c +++ b/arch/sh/mm/consistent.c | |||
| @@ -23,6 +23,7 @@ void *consistent_alloc(gfp_t gfp, size_t size, dma_addr_t *handle) | |||
| 23 | page = alloc_pages(gfp, order); | 23 | page = alloc_pages(gfp, order); |
| 24 | if (!page) | 24 | if (!page) |
| 25 | return NULL; | 25 | return NULL; |
| 26 | split_page(page, order); | ||
| 26 | 27 | ||
| 27 | ret = page_address(page); | 28 | ret = page_address(page); |
| 28 | *handle = virt_to_phys(ret); | 29 | *handle = virt_to_phys(ret); |
| @@ -37,8 +38,6 @@ void *consistent_alloc(gfp_t gfp, size_t size, dma_addr_t *handle) | |||
| 37 | end = page + (1 << order); | 38 | end = page + (1 << order); |
| 38 | 39 | ||
| 39 | while (++page < end) { | 40 | while (++page < end) { |
| 40 | set_page_count(page, 1); | ||
| 41 | |||
| 42 | /* Free any unused pages */ | 41 | /* Free any unused pages */ |
| 43 | if (page >= free) { | 42 | if (page >= free) { |
| 44 | __free_page(page); | 43 | __free_page(page); |
diff --git a/arch/sh/mm/hugetlbpage.c b/arch/sh/mm/hugetlbpage.c index 6b7a7688c98e..a3568fd51508 100644 --- a/arch/sh/mm/hugetlbpage.c +++ b/arch/sh/mm/hugetlbpage.c | |||
| @@ -84,18 +84,6 @@ pte_t huge_ptep_get_and_clear(struct mm_struct *mm, unsigned long addr, | |||
| 84 | return entry; | 84 | return entry; |
| 85 | } | 85 | } |
| 86 | 86 | ||
| 87 | /* | ||
| 88 | * This function checks for proper alignment of input addr and len parameters. | ||
| 89 | */ | ||
| 90 | int is_aligned_hugepage_range(unsigned long addr, unsigned long len) | ||
| 91 | { | ||
| 92 | if (len & ~HPAGE_MASK) | ||
| 93 | return -EINVAL; | ||
| 94 | if (addr & ~HPAGE_MASK) | ||
| 95 | return -EINVAL; | ||
| 96 | return 0; | ||
| 97 | } | ||
| 98 | |||
| 99 | struct page *follow_huge_addr(struct mm_struct *mm, | 87 | struct page *follow_huge_addr(struct mm_struct *mm, |
| 100 | unsigned long address, int write) | 88 | unsigned long address, int write) |
| 101 | { | 89 | { |
diff --git a/arch/sh/mm/init.c b/arch/sh/mm/init.c index e342565f75fb..77b4a838fe10 100644 --- a/arch/sh/mm/init.c +++ b/arch/sh/mm/init.c | |||
| @@ -273,7 +273,7 @@ void free_initmem(void) | |||
| 273 | addr = (unsigned long)(&__init_begin); | 273 | addr = (unsigned long)(&__init_begin); |
| 274 | for (; addr < (unsigned long)(&__init_end); addr += PAGE_SIZE) { | 274 | for (; addr < (unsigned long)(&__init_end); addr += PAGE_SIZE) { |
| 275 | ClearPageReserved(virt_to_page(addr)); | 275 | ClearPageReserved(virt_to_page(addr)); |
| 276 | set_page_count(virt_to_page(addr), 1); | 276 | init_page_count(virt_to_page(addr)); |
| 277 | free_page(addr); | 277 | free_page(addr); |
| 278 | totalram_pages++; | 278 | totalram_pages++; |
| 279 | } | 279 | } |
| @@ -286,7 +286,7 @@ void free_initrd_mem(unsigned long start, unsigned long end) | |||
| 286 | unsigned long p; | 286 | unsigned long p; |
| 287 | for (p = start; p < end; p += PAGE_SIZE) { | 287 | for (p = start; p < end; p += PAGE_SIZE) { |
| 288 | ClearPageReserved(virt_to_page(p)); | 288 | ClearPageReserved(virt_to_page(p)); |
| 289 | set_page_count(virt_to_page(p), 1); | 289 | init_page_count(virt_to_page(p)); |
| 290 | free_page(p); | 290 | free_page(p); |
| 291 | totalram_pages++; | 291 | totalram_pages++; |
| 292 | } | 292 | } |
diff --git a/arch/sh64/mm/hugetlbpage.c b/arch/sh64/mm/hugetlbpage.c index ed6a505b3ee2..3d89f2a6c785 100644 --- a/arch/sh64/mm/hugetlbpage.c +++ b/arch/sh64/mm/hugetlbpage.c | |||
| @@ -84,18 +84,6 @@ pte_t huge_ptep_get_and_clear(struct mm_struct *mm, unsigned long addr, | |||
| 84 | return entry; | 84 | return entry; |
| 85 | } | 85 | } |
| 86 | 86 | ||
| 87 | /* | ||
| 88 | * This function checks for proper alignment of input addr and len parameters. | ||
| 89 | */ | ||
| 90 | int is_aligned_hugepage_range(unsigned long addr, unsigned long len) | ||
| 91 | { | ||
| 92 | if (len & ~HPAGE_MASK) | ||
| 93 | return -EINVAL; | ||
| 94 | if (addr & ~HPAGE_MASK) | ||
| 95 | return -EINVAL; | ||
| 96 | return 0; | ||
| 97 | } | ||
| 98 | |||
| 99 | struct page *follow_huge_addr(struct mm_struct *mm, | 87 | struct page *follow_huge_addr(struct mm_struct *mm, |
| 100 | unsigned long address, int write) | 88 | unsigned long address, int write) |
| 101 | { | 89 | { |
diff --git a/arch/sh64/mm/init.c b/arch/sh64/mm/init.c index a65e8bb2c3cc..1169757fb38b 100644 --- a/arch/sh64/mm/init.c +++ b/arch/sh64/mm/init.c | |||
| @@ -173,7 +173,7 @@ void free_initmem(void) | |||
| 173 | addr = (unsigned long)(&__init_begin); | 173 | addr = (unsigned long)(&__init_begin); |
| 174 | for (; addr < (unsigned long)(&__init_end); addr += PAGE_SIZE) { | 174 | for (; addr < (unsigned long)(&__init_end); addr += PAGE_SIZE) { |
| 175 | ClearPageReserved(virt_to_page(addr)); | 175 | ClearPageReserved(virt_to_page(addr)); |
| 176 | set_page_count(virt_to_page(addr), 1); | 176 | init_page_count(virt_to_page(addr)); |
| 177 | free_page(addr); | 177 | free_page(addr); |
| 178 | totalram_pages++; | 178 | totalram_pages++; |
| 179 | } | 179 | } |
| @@ -186,7 +186,7 @@ void free_initrd_mem(unsigned long start, unsigned long end) | |||
| 186 | unsigned long p; | 186 | unsigned long p; |
| 187 | for (p = start; p < end; p += PAGE_SIZE) { | 187 | for (p = start; p < end; p += PAGE_SIZE) { |
| 188 | ClearPageReserved(virt_to_page(p)); | 188 | ClearPageReserved(virt_to_page(p)); |
| 189 | set_page_count(virt_to_page(p), 1); | 189 | init_page_count(virt_to_page(p)); |
| 190 | free_page(p); | 190 | free_page(p); |
| 191 | totalram_pages++; | 191 | totalram_pages++; |
| 192 | } | 192 | } |
diff --git a/arch/sparc/kernel/sun4d_smp.c b/arch/sparc/kernel/sun4d_smp.c index 40d426cce824..4219dd2ce3a2 100644 --- a/arch/sparc/kernel/sun4d_smp.c +++ b/arch/sparc/kernel/sun4d_smp.c | |||
| @@ -266,19 +266,19 @@ void __init smp4d_boot_cpus(void) | |||
| 266 | 266 | ||
| 267 | /* Free unneeded trap tables */ | 267 | /* Free unneeded trap tables */ |
| 268 | ClearPageReserved(virt_to_page(trapbase_cpu1)); | 268 | ClearPageReserved(virt_to_page(trapbase_cpu1)); |
| 269 | set_page_count(virt_to_page(trapbase_cpu1), 1); | 269 | init_page_count(virt_to_page(trapbase_cpu1)); |
| 270 | free_page((unsigned long)trapbase_cpu1); | 270 | free_page((unsigned long)trapbase_cpu1); |
| 271 | totalram_pages++; | 271 | totalram_pages++; |
| 272 | num_physpages++; | 272 | num_physpages++; |
| 273 | 273 | ||
| 274 | ClearPageReserved(virt_to_page(trapbase_cpu2)); | 274 | ClearPageReserved(virt_to_page(trapbase_cpu2)); |
| 275 | set_page_count(virt_to_page(trapbase_cpu2), 1); | 275 | init_page_count(virt_to_page(trapbase_cpu2)); |
| 276 | free_page((unsigned long)trapbase_cpu2); | 276 | free_page((unsigned long)trapbase_cpu2); |
| 277 | totalram_pages++; | 277 | totalram_pages++; |
| 278 | num_physpages++; | 278 | num_physpages++; |
| 279 | 279 | ||
| 280 | ClearPageReserved(virt_to_page(trapbase_cpu3)); | 280 | ClearPageReserved(virt_to_page(trapbase_cpu3)); |
| 281 | set_page_count(virt_to_page(trapbase_cpu3), 1); | 281 | init_page_count(virt_to_page(trapbase_cpu3)); |
| 282 | free_page((unsigned long)trapbase_cpu3); | 282 | free_page((unsigned long)trapbase_cpu3); |
| 283 | totalram_pages++; | 283 | totalram_pages++; |
| 284 | num_physpages++; | 284 | num_physpages++; |
diff --git a/arch/sparc/kernel/sun4m_smp.c b/arch/sparc/kernel/sun4m_smp.c index a21f27d10e55..fbbd8a474c4c 100644 --- a/arch/sparc/kernel/sun4m_smp.c +++ b/arch/sparc/kernel/sun4m_smp.c | |||
| @@ -233,21 +233,21 @@ void __init smp4m_boot_cpus(void) | |||
| 233 | /* Free unneeded trap tables */ | 233 | /* Free unneeded trap tables */ |
| 234 | if (!cpu_isset(i, cpu_present_map)) { | 234 | if (!cpu_isset(i, cpu_present_map)) { |
| 235 | ClearPageReserved(virt_to_page(trapbase_cpu1)); | 235 | ClearPageReserved(virt_to_page(trapbase_cpu1)); |
| 236 | set_page_count(virt_to_page(trapbase_cpu1), 1); | 236 | init_page_count(virt_to_page(trapbase_cpu1)); |
| 237 | free_page((unsigned long)trapbase_cpu1); | 237 | free_page((unsigned long)trapbase_cpu1); |
| 238 | totalram_pages++; | 238 | totalram_pages++; |
| 239 | num_physpages++; | 239 | num_physpages++; |
| 240 | } | 240 | } |
| 241 | if (!cpu_isset(2, cpu_present_map)) { | 241 | if (!cpu_isset(2, cpu_present_map)) { |
| 242 | ClearPageReserved(virt_to_page(trapbase_cpu2)); | 242 | ClearPageReserved(virt_to_page(trapbase_cpu2)); |
| 243 | set_page_count(virt_to_page(trapbase_cpu2), 1); | 243 | init_page_count(virt_to_page(trapbase_cpu2)); |
| 244 | free_page((unsigned long)trapbase_cpu2); | 244 | free_page((unsigned long)trapbase_cpu2); |
| 245 | totalram_pages++; | 245 | totalram_pages++; |
| 246 | num_physpages++; | 246 | num_physpages++; |
| 247 | } | 247 | } |
| 248 | if (!cpu_isset(3, cpu_present_map)) { | 248 | if (!cpu_isset(3, cpu_present_map)) { |
| 249 | ClearPageReserved(virt_to_page(trapbase_cpu3)); | 249 | ClearPageReserved(virt_to_page(trapbase_cpu3)); |
| 250 | set_page_count(virt_to_page(trapbase_cpu3), 1); | 250 | init_page_count(virt_to_page(trapbase_cpu3)); |
| 251 | free_page((unsigned long)trapbase_cpu3); | 251 | free_page((unsigned long)trapbase_cpu3); |
| 252 | totalram_pages++; | 252 | totalram_pages++; |
| 253 | num_physpages++; | 253 | num_physpages++; |
diff --git a/arch/sparc/mm/init.c b/arch/sparc/mm/init.c index c03babaa0498..898669732466 100644 --- a/arch/sparc/mm/init.c +++ b/arch/sparc/mm/init.c | |||
| @@ -383,7 +383,7 @@ void map_high_region(unsigned long start_pfn, unsigned long end_pfn) | |||
| 383 | struct page *page = pfn_to_page(tmp); | 383 | struct page *page = pfn_to_page(tmp); |
| 384 | 384 | ||
| 385 | ClearPageReserved(page); | 385 | ClearPageReserved(page); |
| 386 | set_page_count(page, 1); | 386 | init_page_count(page); |
| 387 | __free_page(page); | 387 | __free_page(page); |
| 388 | totalhigh_pages++; | 388 | totalhigh_pages++; |
| 389 | } | 389 | } |
| @@ -480,7 +480,7 @@ void free_initmem (void) | |||
| 480 | p = virt_to_page(addr); | 480 | p = virt_to_page(addr); |
| 481 | 481 | ||
| 482 | ClearPageReserved(p); | 482 | ClearPageReserved(p); |
| 483 | set_page_count(p, 1); | 483 | init_page_count(p); |
| 484 | __free_page(p); | 484 | __free_page(p); |
| 485 | totalram_pages++; | 485 | totalram_pages++; |
| 486 | num_physpages++; | 486 | num_physpages++; |
| @@ -497,7 +497,7 @@ void free_initrd_mem(unsigned long start, unsigned long end) | |||
| 497 | struct page *p = virt_to_page(start); | 497 | struct page *p = virt_to_page(start); |
| 498 | 498 | ||
| 499 | ClearPageReserved(p); | 499 | ClearPageReserved(p); |
| 500 | set_page_count(p, 1); | 500 | init_page_count(p); |
| 501 | __free_page(p); | 501 | __free_page(p); |
| 502 | num_physpages++; | 502 | num_physpages++; |
| 503 | } | 503 | } |
diff --git a/arch/sparc64/mm/hugetlbpage.c b/arch/sparc64/mm/hugetlbpage.c index a7a24869d045..280dc7958a13 100644 --- a/arch/sparc64/mm/hugetlbpage.c +++ b/arch/sparc64/mm/hugetlbpage.c | |||
| @@ -263,18 +263,6 @@ pte_t huge_ptep_get_and_clear(struct mm_struct *mm, unsigned long addr, | |||
| 263 | return entry; | 263 | return entry; |
| 264 | } | 264 | } |
| 265 | 265 | ||
| 266 | /* | ||
| 267 | * This function checks for proper alignment of input addr and len parameters. | ||
| 268 | */ | ||
| 269 | int is_aligned_hugepage_range(unsigned long addr, unsigned long len) | ||
| 270 | { | ||
| 271 | if (len & ~HPAGE_MASK) | ||
| 272 | return -EINVAL; | ||
| 273 | if (addr & ~HPAGE_MASK) | ||
| 274 | return -EINVAL; | ||
| 275 | return 0; | ||
| 276 | } | ||
| 277 | |||
| 278 | struct page *follow_huge_addr(struct mm_struct *mm, | 266 | struct page *follow_huge_addr(struct mm_struct *mm, |
| 279 | unsigned long address, int write) | 267 | unsigned long address, int write) |
| 280 | { | 268 | { |
diff --git a/arch/sparc64/mm/init.c b/arch/sparc64/mm/init.c index c2b556106fc1..2ae143ba50d8 100644 --- a/arch/sparc64/mm/init.c +++ b/arch/sparc64/mm/init.c | |||
| @@ -1461,7 +1461,7 @@ void free_initmem(void) | |||
| 1461 | p = virt_to_page(page); | 1461 | p = virt_to_page(page); |
| 1462 | 1462 | ||
| 1463 | ClearPageReserved(p); | 1463 | ClearPageReserved(p); |
| 1464 | set_page_count(p, 1); | 1464 | init_page_count(p); |
| 1465 | __free_page(p); | 1465 | __free_page(p); |
| 1466 | num_physpages++; | 1466 | num_physpages++; |
| 1467 | totalram_pages++; | 1467 | totalram_pages++; |
| @@ -1477,7 +1477,7 @@ void free_initrd_mem(unsigned long start, unsigned long end) | |||
| 1477 | struct page *p = virt_to_page(start); | 1477 | struct page *p = virt_to_page(start); |
| 1478 | 1478 | ||
| 1479 | ClearPageReserved(p); | 1479 | ClearPageReserved(p); |
| 1480 | set_page_count(p, 1); | 1480 | init_page_count(p); |
| 1481 | __free_page(p); | 1481 | __free_page(p); |
| 1482 | num_physpages++; | 1482 | num_physpages++; |
| 1483 | totalram_pages++; | 1483 | totalram_pages++; |
diff --git a/arch/um/kernel/mem.c b/arch/um/kernel/mem.c index fa4f915be5c5..92cce96b5e24 100644 --- a/arch/um/kernel/mem.c +++ b/arch/um/kernel/mem.c | |||
| @@ -57,7 +57,7 @@ static void setup_highmem(unsigned long highmem_start, | |||
| 57 | for(i = 0; i < highmem_len >> PAGE_SHIFT; i++){ | 57 | for(i = 0; i < highmem_len >> PAGE_SHIFT; i++){ |
| 58 | page = &mem_map[highmem_pfn + i]; | 58 | page = &mem_map[highmem_pfn + i]; |
| 59 | ClearPageReserved(page); | 59 | ClearPageReserved(page); |
| 60 | set_page_count(page, 1); | 60 | init_page_count(page); |
| 61 | __free_page(page); | 61 | __free_page(page); |
| 62 | } | 62 | } |
| 63 | } | 63 | } |
| @@ -296,7 +296,7 @@ void free_initrd_mem(unsigned long start, unsigned long end) | |||
| 296 | (end - start) >> 10); | 296 | (end - start) >> 10); |
| 297 | for (; start < end; start += PAGE_SIZE) { | 297 | for (; start < end; start += PAGE_SIZE) { |
| 298 | ClearPageReserved(virt_to_page(start)); | 298 | ClearPageReserved(virt_to_page(start)); |
| 299 | set_page_count(virt_to_page(start), 1); | 299 | init_page_count(virt_to_page(start)); |
| 300 | free_page(start); | 300 | free_page(start); |
| 301 | totalram_pages++; | 301 | totalram_pages++; |
| 302 | } | 302 | } |
diff --git a/arch/um/kernel/physmem.c b/arch/um/kernel/physmem.c index 544665e04513..0e65340eee33 100644 --- a/arch/um/kernel/physmem.c +++ b/arch/um/kernel/physmem.c | |||
| @@ -279,7 +279,7 @@ int init_maps(unsigned long physmem, unsigned long iomem, unsigned long highmem) | |||
| 279 | 279 | ||
| 280 | for(i = 0; i < total_pages; i++){ | 280 | for(i = 0; i < total_pages; i++){ |
| 281 | p = &map[i]; | 281 | p = &map[i]; |
| 282 | set_page_count(p, 0); | 282 | memset(p, 0, sizeof(struct page)); |
| 283 | SetPageReserved(p); | 283 | SetPageReserved(p); |
| 284 | INIT_LIST_HEAD(&p->lru); | 284 | INIT_LIST_HEAD(&p->lru); |
| 285 | } | 285 | } |
diff --git a/arch/x86_64/kernel/time.c b/arch/x86_64/kernel/time.c index 3080f84bf7b7..ee5ce3d3cbc3 100644 --- a/arch/x86_64/kernel/time.c +++ b/arch/x86_64/kernel/time.c | |||
| @@ -477,7 +477,7 @@ static irqreturn_t timer_interrupt(int irq, void *dev_id, struct pt_regs *regs) | |||
| 477 | return IRQ_HANDLED; | 477 | return IRQ_HANDLED; |
| 478 | } | 478 | } |
| 479 | 479 | ||
| 480 | static unsigned int cyc2ns_scale; | 480 | static unsigned int cyc2ns_scale __read_mostly; |
| 481 | #define CYC2NS_SCALE_FACTOR 10 /* 2^10, carefully chosen */ | 481 | #define CYC2NS_SCALE_FACTOR 10 /* 2^10, carefully chosen */ |
| 482 | 482 | ||
| 483 | static inline void set_cyc2ns_scale(unsigned long cpu_khz) | 483 | static inline void set_cyc2ns_scale(unsigned long cpu_khz) |
diff --git a/arch/x86_64/kernel/x8664_ksyms.c b/arch/x86_64/kernel/x8664_ksyms.c index 3496abc8d372..c9dc7e46731e 100644 --- a/arch/x86_64/kernel/x8664_ksyms.c +++ b/arch/x86_64/kernel/x8664_ksyms.c | |||
| @@ -124,6 +124,7 @@ extern void * __memcpy(void *,const void *,__kernel_size_t); | |||
| 124 | 124 | ||
| 125 | EXPORT_SYMBOL(memset); | 125 | EXPORT_SYMBOL(memset); |
| 126 | EXPORT_SYMBOL(strlen); | 126 | EXPORT_SYMBOL(strlen); |
| 127 | EXPORT_SYMBOL(strpbrk); | ||
| 127 | EXPORT_SYMBOL(memmove); | 128 | EXPORT_SYMBOL(memmove); |
| 128 | EXPORT_SYMBOL(memcpy); | 129 | EXPORT_SYMBOL(memcpy); |
| 129 | EXPORT_SYMBOL(__memcpy); | 130 | EXPORT_SYMBOL(__memcpy); |
diff --git a/arch/x86_64/mm/init.c b/arch/x86_64/mm/init.c index 7af1742aa958..40ed13d263cd 100644 --- a/arch/x86_64/mm/init.c +++ b/arch/x86_64/mm/init.c | |||
| @@ -486,7 +486,7 @@ void __init clear_kernel_mapping(unsigned long address, unsigned long size) | |||
| 486 | void online_page(struct page *page) | 486 | void online_page(struct page *page) |
| 487 | { | 487 | { |
| 488 | ClearPageReserved(page); | 488 | ClearPageReserved(page); |
| 489 | set_page_count(page, 1); | 489 | init_page_count(page); |
| 490 | __free_page(page); | 490 | __free_page(page); |
| 491 | totalram_pages++; | 491 | totalram_pages++; |
| 492 | num_physpages++; | 492 | num_physpages++; |
| @@ -592,7 +592,7 @@ void free_initmem(void) | |||
| 592 | addr = (unsigned long)(&__init_begin); | 592 | addr = (unsigned long)(&__init_begin); |
| 593 | for (; addr < (unsigned long)(&__init_end); addr += PAGE_SIZE) { | 593 | for (; addr < (unsigned long)(&__init_end); addr += PAGE_SIZE) { |
| 594 | ClearPageReserved(virt_to_page(addr)); | 594 | ClearPageReserved(virt_to_page(addr)); |
| 595 | set_page_count(virt_to_page(addr), 1); | 595 | init_page_count(virt_to_page(addr)); |
| 596 | memset((void *)(addr & ~(PAGE_SIZE-1)), 0xcc, PAGE_SIZE); | 596 | memset((void *)(addr & ~(PAGE_SIZE-1)), 0xcc, PAGE_SIZE); |
| 597 | free_page(addr); | 597 | free_page(addr); |
| 598 | totalram_pages++; | 598 | totalram_pages++; |
| @@ -632,7 +632,7 @@ void free_initrd_mem(unsigned long start, unsigned long end) | |||
| 632 | printk ("Freeing initrd memory: %ldk freed\n", (end - start) >> 10); | 632 | printk ("Freeing initrd memory: %ldk freed\n", (end - start) >> 10); |
| 633 | for (; start < end; start += PAGE_SIZE) { | 633 | for (; start < end; start += PAGE_SIZE) { |
| 634 | ClearPageReserved(virt_to_page(start)); | 634 | ClearPageReserved(virt_to_page(start)); |
| 635 | set_page_count(virt_to_page(start), 1); | 635 | init_page_count(virt_to_page(start)); |
| 636 | free_page(start); | 636 | free_page(start); |
| 637 | totalram_pages++; | 637 | totalram_pages++; |
| 638 | } | 638 | } |
diff --git a/arch/x86_64/mm/pageattr.c b/arch/x86_64/mm/pageattr.c index 35f1f1aab063..531ad21447b1 100644 --- a/arch/x86_64/mm/pageattr.c +++ b/arch/x86_64/mm/pageattr.c | |||
| @@ -45,6 +45,13 @@ static struct page *split_large_page(unsigned long address, pgprot_t prot, | |||
| 45 | pte_t *pbase; | 45 | pte_t *pbase; |
| 46 | if (!base) | 46 | if (!base) |
| 47 | return NULL; | 47 | return NULL; |
| 48 | /* | ||
| 49 | * page_private is used to track the number of entries in | ||
| 50 | * the page table page have non standard attributes. | ||
| 51 | */ | ||
| 52 | SetPagePrivate(base); | ||
| 53 | page_private(base) = 0; | ||
| 54 | |||
| 48 | address = __pa(address); | 55 | address = __pa(address); |
| 49 | addr = address & LARGE_PAGE_MASK; | 56 | addr = address & LARGE_PAGE_MASK; |
| 50 | pbase = (pte_t *)page_address(base); | 57 | pbase = (pte_t *)page_address(base); |
| @@ -77,26 +84,12 @@ static inline void flush_map(unsigned long address) | |||
| 77 | on_each_cpu(flush_kernel_map, (void *)address, 1, 1); | 84 | on_each_cpu(flush_kernel_map, (void *)address, 1, 1); |
| 78 | } | 85 | } |
| 79 | 86 | ||
| 80 | struct deferred_page { | 87 | static struct page *deferred_pages; /* protected by init_mm.mmap_sem */ |
| 81 | struct deferred_page *next; | ||
| 82 | struct page *fpage; | ||
| 83 | unsigned long address; | ||
| 84 | }; | ||
| 85 | static struct deferred_page *df_list; /* protected by init_mm.mmap_sem */ | ||
| 86 | 88 | ||
| 87 | static inline void save_page(unsigned long address, struct page *fpage) | 89 | static inline void save_page(struct page *fpage) |
| 88 | { | 90 | { |
| 89 | struct deferred_page *df; | 91 | fpage->lru.next = (struct list_head *)deferred_pages; |
| 90 | df = kmalloc(sizeof(struct deferred_page), GFP_KERNEL); | 92 | deferred_pages = fpage; |
| 91 | if (!df) { | ||
| 92 | flush_map(address); | ||
| 93 | __free_page(fpage); | ||
| 94 | } else { | ||
| 95 | df->next = df_list; | ||
| 96 | df->fpage = fpage; | ||
| 97 | df->address = address; | ||
| 98 | df_list = df; | ||
| 99 | } | ||
| 100 | } | 93 | } |
| 101 | 94 | ||
| 102 | /* | 95 | /* |
| @@ -138,8 +131,8 @@ __change_page_attr(unsigned long address, unsigned long pfn, pgprot_t prot, | |||
| 138 | set_pte(kpte, pfn_pte(pfn, prot)); | 131 | set_pte(kpte, pfn_pte(pfn, prot)); |
| 139 | } else { | 132 | } else { |
| 140 | /* | 133 | /* |
| 141 | * split_large_page will take the reference for this change_page_attr | 134 | * split_large_page will take the reference for this |
| 142 | * on the split page. | 135 | * change_page_attr on the split page. |
| 143 | */ | 136 | */ |
| 144 | 137 | ||
| 145 | struct page *split; | 138 | struct page *split; |
| @@ -151,23 +144,20 @@ __change_page_attr(unsigned long address, unsigned long pfn, pgprot_t prot, | |||
| 151 | set_pte(kpte,mk_pte(split, ref_prot2)); | 144 | set_pte(kpte,mk_pte(split, ref_prot2)); |
| 152 | kpte_page = split; | 145 | kpte_page = split; |
| 153 | } | 146 | } |
| 154 | get_page(kpte_page); | 147 | page_private(kpte_page)++; |
| 155 | } else if ((kpte_flags & _PAGE_PSE) == 0) { | 148 | } else if ((kpte_flags & _PAGE_PSE) == 0) { |
| 156 | set_pte(kpte, pfn_pte(pfn, ref_prot)); | 149 | set_pte(kpte, pfn_pte(pfn, ref_prot)); |
| 157 | __put_page(kpte_page); | 150 | BUG_ON(page_private(kpte_page) == 0); |
| 151 | page_private(kpte_page)--; | ||
| 158 | } else | 152 | } else |
| 159 | BUG(); | 153 | BUG(); |
| 160 | 154 | ||
| 161 | /* on x86-64 the direct mapping set at boot is not using 4k pages */ | 155 | /* on x86-64 the direct mapping set at boot is not using 4k pages */ |
| 162 | BUG_ON(PageReserved(kpte_page)); | 156 | BUG_ON(PageReserved(kpte_page)); |
| 163 | 157 | ||
| 164 | switch (page_count(kpte_page)) { | 158 | if (page_private(kpte_page) == 0) { |
| 165 | case 1: | 159 | save_page(kpte_page); |
| 166 | save_page(address, kpte_page); | ||
| 167 | revert_page(address, ref_prot); | 160 | revert_page(address, ref_prot); |
| 168 | break; | ||
| 169 | case 0: | ||
| 170 | BUG(); /* memleak and failed 2M page regeneration */ | ||
| 171 | } | 161 | } |
| 172 | return 0; | 162 | return 0; |
| 173 | } | 163 | } |
| @@ -220,17 +210,18 @@ int change_page_attr(struct page *page, int numpages, pgprot_t prot) | |||
| 220 | 210 | ||
| 221 | void global_flush_tlb(void) | 211 | void global_flush_tlb(void) |
| 222 | { | 212 | { |
| 223 | struct deferred_page *df, *next_df; | 213 | struct page *dpage; |
| 224 | 214 | ||
| 225 | down_read(&init_mm.mmap_sem); | 215 | down_read(&init_mm.mmap_sem); |
| 226 | df = xchg(&df_list, NULL); | 216 | dpage = xchg(&deferred_pages, NULL); |
| 227 | up_read(&init_mm.mmap_sem); | 217 | up_read(&init_mm.mmap_sem); |
| 228 | flush_map((df && !df->next) ? df->address : 0); | 218 | |
| 229 | for (; df; df = next_df) { | 219 | flush_map((dpage && !dpage->lru.next) ? (unsigned long)page_address(dpage) : 0); |
| 230 | next_df = df->next; | 220 | while (dpage) { |
| 231 | if (df->fpage) | 221 | struct page *tmp = dpage; |
| 232 | __free_page(df->fpage); | 222 | dpage = (struct page *)dpage->lru.next; |
| 233 | kfree(df); | 223 | ClearPagePrivate(tmp); |
| 224 | __free_page(tmp); | ||
| 234 | } | 225 | } |
| 235 | } | 226 | } |
| 236 | 227 | ||
diff --git a/arch/xtensa/mm/init.c b/arch/xtensa/mm/init.c index 5a91d6c9e66d..e1be4235f367 100644 --- a/arch/xtensa/mm/init.c +++ b/arch/xtensa/mm/init.c | |||
| @@ -272,7 +272,7 @@ free_reserved_mem(void *start, void *end) | |||
| 272 | { | 272 | { |
| 273 | for (; start < end; start += PAGE_SIZE) { | 273 | for (; start < end; start += PAGE_SIZE) { |
| 274 | ClearPageReserved(virt_to_page(start)); | 274 | ClearPageReserved(virt_to_page(start)); |
| 275 | set_page_count(virt_to_page(start), 1); | 275 | init_page_count(virt_to_page(start)); |
| 276 | free_page((unsigned long)start); | 276 | free_page((unsigned long)start); |
| 277 | totalram_pages++; | 277 | totalram_pages++; |
| 278 | } | 278 | } |
diff --git a/arch/xtensa/mm/pgtable.c b/arch/xtensa/mm/pgtable.c index e5e119c820e4..7d28914d11cb 100644 --- a/arch/xtensa/mm/pgtable.c +++ b/arch/xtensa/mm/pgtable.c | |||
| @@ -14,25 +14,21 @@ | |||
| 14 | 14 | ||
| 15 | pte_t* pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address) | 15 | pte_t* pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address) |
| 16 | { | 16 | { |
| 17 | pte_t *pte, p; | 17 | pte_t *pte = NULL, *p; |
| 18 | int color = ADDR_COLOR(address); | 18 | int color = ADDR_COLOR(address); |
| 19 | int i; | 19 | int i; |
| 20 | 20 | ||
| 21 | p = (pte_t*) __get_free_pages(GFP_KERNEL|__GFP_REPEAT, COLOR_ORDER); | 21 | p = (pte_t*) __get_free_pages(GFP_KERNEL|__GFP_REPEAT, COLOR_ORDER); |
| 22 | 22 | ||
| 23 | if (likely(p)) { | 23 | if (likely(p)) { |
| 24 | struct page *page; | 24 | split_page(virt_to_page(p), COLOR_ORDER); |
| 25 | |||
| 26 | for (i = 0; i < COLOR_SIZE; i++, p++) { | ||
| 27 | page = virt_to_page(pte); | ||
| 28 | |||
| 29 | set_page_count(page, 1); | ||
| 30 | ClearPageCompound(page); | ||
| 31 | 25 | ||
| 26 | for (i = 0; i < COLOR_SIZE; i++) { | ||
| 32 | if (ADDR_COLOR(p) == color) | 27 | if (ADDR_COLOR(p) == color) |
| 33 | pte = p; | 28 | pte = p; |
| 34 | else | 29 | else |
| 35 | free_page(p); | 30 | free_page(p); |
| 31 | p += PTRS_PER_PTE; | ||
| 36 | } | 32 | } |
| 37 | clear_page(pte); | 33 | clear_page(pte); |
| 38 | } | 34 | } |
| @@ -49,20 +45,20 @@ int flush; | |||
| 49 | 45 | ||
| 50 | struct page* pte_alloc_one(struct mm_struct *mm, unsigned long address) | 46 | struct page* pte_alloc_one(struct mm_struct *mm, unsigned long address) |
| 51 | { | 47 | { |
| 52 | struct page *page, p; | 48 | struct page *page = NULL, *p; |
| 53 | int color = ADDR_COLOR(address); | 49 | int color = ADDR_COLOR(address); |
| 54 | 50 | ||
| 55 | p = alloc_pages(GFP_KERNEL | __GFP_REPEAT, PTE_ORDER); | 51 | p = alloc_pages(GFP_KERNEL | __GFP_REPEAT, PTE_ORDER); |
| 56 | 52 | ||
| 57 | if (likely(p)) { | 53 | if (likely(p)) { |
| 58 | for (i = 0; i < PAGE_ORDER; i++) { | 54 | split_page(p, COLOR_ORDER); |
| 59 | set_page_count(p, 1); | ||
| 60 | ClearPageCompound(p); | ||
| 61 | 55 | ||
| 62 | if (PADDR_COLOR(page_address(pg)) == color) | 56 | for (i = 0; i < PAGE_ORDER; i++) { |
| 57 | if (PADDR_COLOR(page_address(p)) == color) | ||
| 63 | page = p; | 58 | page = p; |
| 64 | else | 59 | else |
| 65 | free_page(p); | 60 | __free_page(p); |
| 61 | p++; | ||
| 66 | } | 62 | } |
| 67 | clear_highpage(page); | 63 | clear_highpage(page); |
| 68 | } | 64 | } |
diff --git a/drivers/char/snsc.h b/drivers/char/snsc.h index a9efc13cc858..8a98169b60c1 100644 --- a/drivers/char/snsc.h +++ b/drivers/char/snsc.h | |||
| @@ -5,7 +5,7 @@ | |||
| 5 | * License. See the file "COPYING" in the main directory of this archive | 5 | * License. See the file "COPYING" in the main directory of this archive |
| 6 | * for more details. | 6 | * for more details. |
| 7 | * | 7 | * |
| 8 | * Copyright (C) 2004 Silicon Graphics, Inc. All rights reserved. | 8 | * Copyright (C) 2004-2006 Silicon Graphics, Inc. All rights reserved. |
| 9 | */ | 9 | */ |
| 10 | 10 | ||
| 11 | /* | 11 | /* |
| @@ -70,6 +70,9 @@ struct sysctl_data_s { | |||
| 70 | #define EV_CLASS_TEST_WARNING 0x6000ul | 70 | #define EV_CLASS_TEST_WARNING 0x6000ul |
| 71 | #define EV_CLASS_PWRD_NOTIFY 0x8000ul | 71 | #define EV_CLASS_PWRD_NOTIFY 0x8000ul |
| 72 | 72 | ||
| 73 | /* ENV class codes */ | ||
| 74 | #define ENV_PWRDN_PEND 0x4101ul | ||
| 75 | |||
| 73 | #define EV_SEVERITY_POWER_STABLE 0x0000ul | 76 | #define EV_SEVERITY_POWER_STABLE 0x0000ul |
| 74 | #define EV_SEVERITY_POWER_LOW_WARNING 0x0100ul | 77 | #define EV_SEVERITY_POWER_LOW_WARNING 0x0100ul |
| 75 | #define EV_SEVERITY_POWER_HIGH_WARNING 0x0200ul | 78 | #define EV_SEVERITY_POWER_HIGH_WARNING 0x0200ul |
diff --git a/drivers/char/snsc_event.c b/drivers/char/snsc_event.c index baaa365285fa..a4fa507eed9e 100644 --- a/drivers/char/snsc_event.c +++ b/drivers/char/snsc_event.c | |||
| @@ -5,7 +5,7 @@ | |||
| 5 | * License. See the file "COPYING" in the main directory of this archive | 5 | * License. See the file "COPYING" in the main directory of this archive |
| 6 | * for more details. | 6 | * for more details. |
| 7 | * | 7 | * |
| 8 | * Copyright (C) 2004 Silicon Graphics, Inc. All rights reserved. | 8 | * Copyright (C) 2004-2006 Silicon Graphics, Inc. All rights reserved. |
| 9 | */ | 9 | */ |
| 10 | 10 | ||
| 11 | /* | 11 | /* |
| @@ -187,7 +187,8 @@ scdrv_event_severity(int code) | |||
| 187 | static void | 187 | static void |
| 188 | scdrv_dispatch_event(char *event, int len) | 188 | scdrv_dispatch_event(char *event, int len) |
| 189 | { | 189 | { |
| 190 | int code, esp_code, src; | 190 | static int snsc_shutting_down = 0; |
| 191 | int code, esp_code, src, class; | ||
| 191 | char desc[CHUNKSIZE]; | 192 | char desc[CHUNKSIZE]; |
| 192 | char *severity; | 193 | char *severity; |
| 193 | 194 | ||
| @@ -199,9 +200,25 @@ scdrv_dispatch_event(char *event, int len) | |||
| 199 | /* how urgent is the message? */ | 200 | /* how urgent is the message? */ |
| 200 | severity = scdrv_event_severity(code); | 201 | severity = scdrv_event_severity(code); |
| 201 | 202 | ||
| 202 | if ((code & EV_CLASS_MASK) == EV_CLASS_PWRD_NOTIFY) { | 203 | class = (code & EV_CLASS_MASK); |
| 204 | |||
| 205 | if (class == EV_CLASS_PWRD_NOTIFY || code == ENV_PWRDN_PEND) { | ||
| 203 | struct task_struct *p; | 206 | struct task_struct *p; |
| 204 | 207 | ||
| 208 | if (snsc_shutting_down) | ||
| 209 | return; | ||
| 210 | |||
| 211 | snsc_shutting_down = 1; | ||
| 212 | |||
| 213 | /* give a message for each type of event */ | ||
| 214 | if (class == EV_CLASS_PWRD_NOTIFY) | ||
| 215 | printk(KERN_NOTICE "Power off indication received." | ||
| 216 | " Sending SIGPWR to init...\n"); | ||
| 217 | else if (code == ENV_PWRDN_PEND) | ||
| 218 | printk(KERN_CRIT "WARNING: Shutting down the system" | ||
| 219 | " due to a critical environmental condition." | ||
| 220 | " Sending SIGPWR to init...\n"); | ||
| 221 | |||
| 205 | /* give a SIGPWR signal to init proc */ | 222 | /* give a SIGPWR signal to init proc */ |
| 206 | 223 | ||
| 207 | /* first find init's task */ | 224 | /* first find init's task */ |
| @@ -210,12 +227,11 @@ scdrv_dispatch_event(char *event, int len) | |||
| 210 | if (p->pid == 1) | 227 | if (p->pid == 1) |
| 211 | break; | 228 | break; |
| 212 | } | 229 | } |
| 213 | if (p) { /* we found init's task */ | 230 | if (p) { |
| 214 | printk(KERN_EMERG "Power off indication received. Initiating power fail sequence...\n"); | ||
| 215 | force_sig(SIGPWR, p); | 231 | force_sig(SIGPWR, p); |
| 216 | } else { /* failed to find init's task - just give message(s) */ | 232 | } else { |
| 217 | printk(KERN_WARNING "Failed to find init proc to handle power off!\n"); | 233 | printk(KERN_ERR "Failed to signal init!\n"); |
| 218 | printk("%s|$(0x%x)%s\n", severity, esp_code, desc); | 234 | snsc_shutting_down = 0; /* so can try again (?) */ |
| 219 | } | 235 | } |
| 220 | read_unlock(&tasklist_lock); | 236 | read_unlock(&tasklist_lock); |
| 221 | } else { | 237 | } else { |
diff --git a/drivers/char/tb0219.c b/drivers/char/tb0219.c index ac2a297ce37c..a80c83210872 100644 --- a/drivers/char/tb0219.c +++ b/drivers/char/tb0219.c | |||
| @@ -283,7 +283,7 @@ static void tb0219_pci_irq_init(void) | |||
| 283 | vr41xx_set_irq_level(TB0219_PCI_SLOT3_PIN, IRQ_LEVEL_LOW); | 283 | vr41xx_set_irq_level(TB0219_PCI_SLOT3_PIN, IRQ_LEVEL_LOW); |
| 284 | } | 284 | } |
| 285 | 285 | ||
| 286 | static int tb0219_probe(struct platform_device *dev) | 286 | static int __devinit tb0219_probe(struct platform_device *dev) |
| 287 | { | 287 | { |
| 288 | int retval; | 288 | int retval; |
| 289 | 289 | ||
| @@ -319,7 +319,7 @@ static int tb0219_probe(struct platform_device *dev) | |||
| 319 | return 0; | 319 | return 0; |
| 320 | } | 320 | } |
| 321 | 321 | ||
| 322 | static int tb0219_remove(struct platform_device *dev) | 322 | static int __devexit tb0219_remove(struct platform_device *dev) |
| 323 | { | 323 | { |
| 324 | _machine_restart = old_machine_restart; | 324 | _machine_restart = old_machine_restart; |
| 325 | 325 | ||
| @@ -335,19 +335,26 @@ static struct platform_device *tb0219_platform_device; | |||
| 335 | 335 | ||
| 336 | static struct platform_driver tb0219_device_driver = { | 336 | static struct platform_driver tb0219_device_driver = { |
| 337 | .probe = tb0219_probe, | 337 | .probe = tb0219_probe, |
| 338 | .remove = tb0219_remove, | 338 | .remove = __devexit_p(tb0219_remove), |
| 339 | .driver = { | 339 | .driver = { |
| 340 | .name = "TB0219", | 340 | .name = "TB0219", |
| 341 | .owner = THIS_MODULE, | ||
| 341 | }, | 342 | }, |
| 342 | }; | 343 | }; |
| 343 | 344 | ||
| 344 | static int __devinit tanbac_tb0219_init(void) | 345 | static int __init tanbac_tb0219_init(void) |
| 345 | { | 346 | { |
| 346 | int retval; | 347 | int retval; |
| 347 | 348 | ||
| 348 | tb0219_platform_device = platform_device_register_simple("TB0219", -1, NULL, 0); | 349 | tb0219_platform_device = platform_device_alloc("TB0219", -1); |
| 349 | if (IS_ERR(tb0219_platform_device)) | 350 | if (!tb0219_platform_device) |
| 350 | return PTR_ERR(tb0219_platform_device); | 351 | return -ENOMEM; |
| 352 | |||
| 353 | retval = platform_device_add(tb0219_platform_device); | ||
| 354 | if (retval < 0) { | ||
| 355 | platform_device_put(tb0219_platform_device); | ||
| 356 | return retval; | ||
| 357 | } | ||
| 351 | 358 | ||
| 352 | retval = platform_driver_register(&tb0219_device_driver); | 359 | retval = platform_driver_register(&tb0219_device_driver); |
| 353 | if (retval < 0) | 360 | if (retval < 0) |
| @@ -356,10 +363,9 @@ static int __devinit tanbac_tb0219_init(void) | |||
| 356 | return retval; | 363 | return retval; |
| 357 | } | 364 | } |
| 358 | 365 | ||
| 359 | static void __devexit tanbac_tb0219_exit(void) | 366 | static void __exit tanbac_tb0219_exit(void) |
| 360 | { | 367 | { |
| 361 | platform_driver_unregister(&tb0219_device_driver); | 368 | platform_driver_unregister(&tb0219_device_driver); |
| 362 | |||
| 363 | platform_device_unregister(tb0219_platform_device); | 369 | platform_device_unregister(tb0219_platform_device); |
| 364 | } | 370 | } |
| 365 | 371 | ||
diff --git a/drivers/char/vr41xx_giu.c b/drivers/char/vr41xx_giu.c index 2267c7b81799..05e6e814d86f 100644 --- a/drivers/char/vr41xx_giu.c +++ b/drivers/char/vr41xx_giu.c | |||
| @@ -613,7 +613,7 @@ static struct file_operations gpio_fops = { | |||
| 613 | .release = gpio_release, | 613 | .release = gpio_release, |
| 614 | }; | 614 | }; |
| 615 | 615 | ||
| 616 | static int giu_probe(struct platform_device *dev) | 616 | static int __devinit giu_probe(struct platform_device *dev) |
| 617 | { | 617 | { |
| 618 | unsigned long start, size, flags = 0; | 618 | unsigned long start, size, flags = 0; |
| 619 | unsigned int nr_pins = 0; | 619 | unsigned int nr_pins = 0; |
| @@ -697,7 +697,7 @@ static int giu_probe(struct platform_device *dev) | |||
| 697 | return cascade_irq(GIUINT_IRQ, giu_get_irq); | 697 | return cascade_irq(GIUINT_IRQ, giu_get_irq); |
| 698 | } | 698 | } |
| 699 | 699 | ||
| 700 | static int giu_remove(struct platform_device *dev) | 700 | static int __devexit giu_remove(struct platform_device *dev) |
| 701 | { | 701 | { |
| 702 | iounmap(giu_base); | 702 | iounmap(giu_base); |
| 703 | 703 | ||
| @@ -712,9 +712,10 @@ static struct platform_device *giu_platform_device; | |||
| 712 | 712 | ||
| 713 | static struct platform_driver giu_device_driver = { | 713 | static struct platform_driver giu_device_driver = { |
| 714 | .probe = giu_probe, | 714 | .probe = giu_probe, |
| 715 | .remove = giu_remove, | 715 | .remove = __devexit_p(giu_remove), |
| 716 | .driver = { | 716 | .driver = { |
| 717 | .name = "GIU", | 717 | .name = "GIU", |
| 718 | .owner = THIS_MODULE, | ||
| 718 | }, | 719 | }, |
| 719 | }; | 720 | }; |
| 720 | 721 | ||
| @@ -722,9 +723,15 @@ static int __init vr41xx_giu_init(void) | |||
| 722 | { | 723 | { |
| 723 | int retval; | 724 | int retval; |
| 724 | 725 | ||
| 725 | giu_platform_device = platform_device_register_simple("GIU", -1, NULL, 0); | 726 | giu_platform_device = platform_device_alloc("GIU", -1); |
| 726 | if (IS_ERR(giu_platform_device)) | 727 | if (!giu_platform_device) |
| 727 | return PTR_ERR(giu_platform_device); | 728 | return -ENOMEM; |
| 729 | |||
| 730 | retval = platform_device_add(giu_platform_device); | ||
| 731 | if (retval < 0) { | ||
| 732 | platform_device_put(giu_platform_device); | ||
| 733 | return retval; | ||
| 734 | } | ||
| 728 | 735 | ||
| 729 | retval = platform_driver_register(&giu_device_driver); | 736 | retval = platform_driver_register(&giu_device_driver); |
| 730 | if (retval < 0) | 737 | if (retval < 0) |
diff --git a/drivers/char/vr41xx_rtc.c b/drivers/char/vr41xx_rtc.c index bc1b4a15212c..b109d9a502d6 100644 --- a/drivers/char/vr41xx_rtc.c +++ b/drivers/char/vr41xx_rtc.c | |||
| @@ -558,7 +558,7 @@ static struct miscdevice rtc_miscdevice = { | |||
| 558 | .fops = &rtc_fops, | 558 | .fops = &rtc_fops, |
| 559 | }; | 559 | }; |
| 560 | 560 | ||
| 561 | static int rtc_probe(struct platform_device *pdev) | 561 | static int __devinit rtc_probe(struct platform_device *pdev) |
| 562 | { | 562 | { |
| 563 | unsigned int irq; | 563 | unsigned int irq; |
| 564 | int retval; | 564 | int retval; |
| @@ -631,7 +631,7 @@ static int rtc_probe(struct platform_device *pdev) | |||
| 631 | return 0; | 631 | return 0; |
| 632 | } | 632 | } |
| 633 | 633 | ||
| 634 | static int rtc_remove(struct platform_device *dev) | 634 | static int __devexit rtc_remove(struct platform_device *dev) |
| 635 | { | 635 | { |
| 636 | int retval; | 636 | int retval; |
| 637 | 637 | ||
| @@ -653,13 +653,14 @@ static struct platform_device *rtc_platform_device; | |||
| 653 | 653 | ||
| 654 | static struct platform_driver rtc_device_driver = { | 654 | static struct platform_driver rtc_device_driver = { |
| 655 | .probe = rtc_probe, | 655 | .probe = rtc_probe, |
| 656 | .remove = rtc_remove, | 656 | .remove = __devexit_p(rtc_remove), |
| 657 | .driver = { | 657 | .driver = { |
| 658 | .name = rtc_name, | 658 | .name = rtc_name, |
| 659 | .owner = THIS_MODULE, | ||
| 659 | }, | 660 | }, |
| 660 | }; | 661 | }; |
| 661 | 662 | ||
| 662 | static int __devinit vr41xx_rtc_init(void) | 663 | static int __init vr41xx_rtc_init(void) |
| 663 | { | 664 | { |
| 664 | int retval; | 665 | int retval; |
| 665 | 666 | ||
| @@ -684,10 +685,20 @@ static int __devinit vr41xx_rtc_init(void) | |||
| 684 | break; | 685 | break; |
| 685 | } | 686 | } |
| 686 | 687 | ||
| 687 | rtc_platform_device = platform_device_register_simple("RTC", -1, | 688 | rtc_platform_device = platform_device_alloc("RTC", -1); |
| 688 | rtc_resource, ARRAY_SIZE(rtc_resource)); | 689 | if (!rtc_platform_device) |
| 689 | if (IS_ERR(rtc_platform_device)) | 690 | return -ENOMEM; |
| 690 | return PTR_ERR(rtc_platform_device); | 691 | |
| 692 | retval = platform_device_add_resources(rtc_platform_device, | ||
| 693 | rtc_resource, ARRAY_SIZE(rtc_resource)); | ||
| 694 | |||
| 695 | if (retval == 0) | ||
| 696 | retval = platform_device_add(rtc_platform_device); | ||
| 697 | |||
| 698 | if (retval < 0) { | ||
| 699 | platform_device_put(rtc_platform_device); | ||
| 700 | return retval; | ||
| 701 | } | ||
| 691 | 702 | ||
| 692 | retval = platform_driver_register(&rtc_device_driver); | 703 | retval = platform_driver_register(&rtc_device_driver); |
| 693 | if (retval < 0) | 704 | if (retval < 0) |
| @@ -696,10 +707,9 @@ static int __devinit vr41xx_rtc_init(void) | |||
| 696 | return retval; | 707 | return retval; |
| 697 | } | 708 | } |
| 698 | 709 | ||
| 699 | static void __devexit vr41xx_rtc_exit(void) | 710 | static void __exit vr41xx_rtc_exit(void) |
| 700 | { | 711 | { |
| 701 | platform_driver_unregister(&rtc_device_driver); | 712 | platform_driver_unregister(&rtc_device_driver); |
| 702 | |||
| 703 | platform_device_unregister(rtc_platform_device); | 713 | platform_device_unregister(rtc_platform_device); |
| 704 | } | 714 | } |
| 705 | 715 | ||
diff --git a/drivers/char/watchdog/mv64x60_wdt.c b/drivers/char/watchdog/mv64x60_wdt.c index 00d9ef04a369..f1b9cf89f153 100644 --- a/drivers/char/watchdog/mv64x60_wdt.c +++ b/drivers/char/watchdog/mv64x60_wdt.c | |||
| @@ -228,15 +228,25 @@ static int __init mv64x60_wdt_init(void) | |||
| 228 | 228 | ||
| 229 | printk(KERN_INFO "MV64x60 watchdog driver\n"); | 229 | printk(KERN_INFO "MV64x60 watchdog driver\n"); |
| 230 | 230 | ||
| 231 | mv64x60_wdt_dev = platform_device_register_simple(MV64x60_WDT_NAME, | 231 | mv64x60_wdt_dev = platform_device_alloc(MV64x60_WDT_NAME, -1); |
| 232 | -1, NULL, 0); | 232 | if (!mv64x60_wdt_dev) { |
| 233 | if (IS_ERR(mv64x60_wdt_dev)) { | 233 | ret = -ENOMEM; |
| 234 | ret = PTR_ERR(mv64x60_wdt_dev); | 234 | goto out; |
| 235 | } | ||
| 236 | |||
| 237 | ret = platform_device_add(mv64x60_wdt_dev); | ||
| 238 | if (ret) { | ||
| 239 | platform_device_put(mv64x60_wdt_dev); | ||
| 235 | goto out; | 240 | goto out; |
| 236 | } | 241 | } |
| 237 | 242 | ||
| 238 | ret = platform_driver_register(&mv64x60_wdt_driver); | 243 | ret = platform_driver_register(&mv64x60_wdt_driver); |
| 239 | out: | 244 | if (ret) { |
| 245 | platform_device_unregister(mv64x60_wdt_dev); | ||
| 246 | goto out; | ||
| 247 | } | ||
| 248 | |||
| 249 | out: | ||
| 240 | return ret; | 250 | return ret; |
| 241 | } | 251 | } |
| 242 | 252 | ||
diff --git a/drivers/firmware/dcdbas.c b/drivers/firmware/dcdbas.c index 4652512f7d1a..3a4e5c5b4e1f 100644 --- a/drivers/firmware/dcdbas.c +++ b/drivers/firmware/dcdbas.c | |||
| @@ -530,30 +530,27 @@ static DCDBAS_DEV_ATTR_RW(host_control_action); | |||
| 530 | static DCDBAS_DEV_ATTR_RW(host_control_smi_type); | 530 | static DCDBAS_DEV_ATTR_RW(host_control_smi_type); |
| 531 | static DCDBAS_DEV_ATTR_RW(host_control_on_shutdown); | 531 | static DCDBAS_DEV_ATTR_RW(host_control_on_shutdown); |
| 532 | 532 | ||
| 533 | static struct device_attribute *dcdbas_dev_attrs[] = { | 533 | static struct attribute *dcdbas_dev_attrs[] = { |
| 534 | &dev_attr_smi_data_buf_size, | 534 | &dev_attr_smi_data_buf_size.attr, |
| 535 | &dev_attr_smi_data_buf_phys_addr, | 535 | &dev_attr_smi_data_buf_phys_addr.attr, |
| 536 | &dev_attr_smi_request, | 536 | &dev_attr_smi_request.attr, |
| 537 | &dev_attr_host_control_action, | 537 | &dev_attr_host_control_action.attr, |
| 538 | &dev_attr_host_control_smi_type, | 538 | &dev_attr_host_control_smi_type.attr, |
| 539 | &dev_attr_host_control_on_shutdown, | 539 | &dev_attr_host_control_on_shutdown.attr, |
| 540 | NULL | 540 | NULL |
| 541 | }; | 541 | }; |
| 542 | 542 | ||
| 543 | /** | 543 | static struct attribute_group dcdbas_attr_group = { |
| 544 | * dcdbas_init: initialize driver | 544 | .attrs = dcdbas_dev_attrs, |
| 545 | */ | 545 | }; |
| 546 | static int __init dcdbas_init(void) | 546 | |
| 547 | static int __devinit dcdbas_probe(struct platform_device *dev) | ||
| 547 | { | 548 | { |
| 548 | int i; | 549 | int i, error; |
| 549 | 550 | ||
| 550 | host_control_action = HC_ACTION_NONE; | 551 | host_control_action = HC_ACTION_NONE; |
| 551 | host_control_smi_type = HC_SMITYPE_NONE; | 552 | host_control_smi_type = HC_SMITYPE_NONE; |
| 552 | 553 | ||
| 553 | dcdbas_pdev = platform_device_register_simple(DRIVER_NAME, -1, NULL, 0); | ||
| 554 | if (IS_ERR(dcdbas_pdev)) | ||
| 555 | return PTR_ERR(dcdbas_pdev); | ||
| 556 | |||
| 557 | /* | 554 | /* |
| 558 | * BIOS SMI calls require buffer addresses be in 32-bit address space. | 555 | * BIOS SMI calls require buffer addresses be in 32-bit address space. |
| 559 | * This is done by setting the DMA mask below. | 556 | * This is done by setting the DMA mask below. |
| @@ -561,19 +558,79 @@ static int __init dcdbas_init(void) | |||
| 561 | dcdbas_pdev->dev.coherent_dma_mask = DMA_32BIT_MASK; | 558 | dcdbas_pdev->dev.coherent_dma_mask = DMA_32BIT_MASK; |
| 562 | dcdbas_pdev->dev.dma_mask = &dcdbas_pdev->dev.coherent_dma_mask; | 559 | dcdbas_pdev->dev.dma_mask = &dcdbas_pdev->dev.coherent_dma_mask; |
| 563 | 560 | ||
| 561 | error = sysfs_create_group(&dev->dev.kobj, &dcdbas_attr_group); | ||
| 562 | if (error) | ||
| 563 | return error; | ||
| 564 | |||
| 565 | for (i = 0; dcdbas_bin_attrs[i]; i++) { | ||
| 566 | error = sysfs_create_bin_file(&dev->dev.kobj, | ||
| 567 | dcdbas_bin_attrs[i]); | ||
| 568 | if (error) { | ||
| 569 | while (--i >= 0) | ||
| 570 | sysfs_remove_bin_file(&dev->dev.kobj, | ||
| 571 | dcdbas_bin_attrs[i]); | ||
| 572 | sysfs_create_group(&dev->dev.kobj, &dcdbas_attr_group); | ||
| 573 | return error; | ||
| 574 | } | ||
| 575 | } | ||
| 576 | |||
| 564 | register_reboot_notifier(&dcdbas_reboot_nb); | 577 | register_reboot_notifier(&dcdbas_reboot_nb); |
| 565 | 578 | ||
| 579 | dev_info(&dev->dev, "%s (version %s)\n", | ||
| 580 | DRIVER_DESCRIPTION, DRIVER_VERSION); | ||
| 581 | |||
| 582 | return 0; | ||
| 583 | } | ||
| 584 | |||
| 585 | static int __devexit dcdbas_remove(struct platform_device *dev) | ||
| 586 | { | ||
| 587 | int i; | ||
| 588 | |||
| 589 | unregister_reboot_notifier(&dcdbas_reboot_nb); | ||
| 566 | for (i = 0; dcdbas_bin_attrs[i]; i++) | 590 | for (i = 0; dcdbas_bin_attrs[i]; i++) |
| 567 | sysfs_create_bin_file(&dcdbas_pdev->dev.kobj, | 591 | sysfs_remove_bin_file(&dev->dev.kobj, dcdbas_bin_attrs[i]); |
| 568 | dcdbas_bin_attrs[i]); | 592 | sysfs_remove_group(&dev->dev.kobj, &dcdbas_attr_group); |
| 569 | 593 | ||
| 570 | for (i = 0; dcdbas_dev_attrs[i]; i++) | 594 | return 0; |
| 571 | device_create_file(&dcdbas_pdev->dev, dcdbas_dev_attrs[i]); | 595 | } |
| 572 | 596 | ||
| 573 | dev_info(&dcdbas_pdev->dev, "%s (version %s)\n", | 597 | static struct platform_driver dcdbas_driver = { |
| 574 | DRIVER_DESCRIPTION, DRIVER_VERSION); | 598 | .driver = { |
| 599 | .name = DRIVER_NAME, | ||
| 600 | .owner = THIS_MODULE, | ||
| 601 | }, | ||
| 602 | .probe = dcdbas_probe, | ||
| 603 | .remove = __devexit_p(dcdbas_remove), | ||
| 604 | }; | ||
| 605 | |||
| 606 | /** | ||
| 607 | * dcdbas_init: initialize driver | ||
| 608 | */ | ||
| 609 | static int __init dcdbas_init(void) | ||
| 610 | { | ||
| 611 | int error; | ||
| 612 | |||
| 613 | error = platform_driver_register(&dcdbas_driver); | ||
| 614 | if (error) | ||
| 615 | return error; | ||
| 616 | |||
| 617 | dcdbas_pdev = platform_device_alloc(DRIVER_NAME, -1); | ||
| 618 | if (!dcdbas_pdev) { | ||
| 619 | error = -ENOMEM; | ||
| 620 | goto err_unregister_driver; | ||
| 621 | } | ||
| 622 | |||
| 623 | error = platform_device_add(dcdbas_pdev); | ||
| 624 | if (error) | ||
| 625 | goto err_free_device; | ||
| 575 | 626 | ||
| 576 | return 0; | 627 | return 0; |
| 628 | |||
| 629 | err_free_device: | ||
| 630 | platform_device_put(dcdbas_pdev); | ||
| 631 | err_unregister_driver: | ||
| 632 | platform_driver_unregister(&dcdbas_driver); | ||
| 633 | return error; | ||
| 577 | } | 634 | } |
| 578 | 635 | ||
| 579 | /** | 636 | /** |
| @@ -588,6 +645,15 @@ static void __exit dcdbas_exit(void) | |||
| 588 | unregister_reboot_notifier(&dcdbas_reboot_nb); | 645 | unregister_reboot_notifier(&dcdbas_reboot_nb); |
| 589 | smi_data_buf_free(); | 646 | smi_data_buf_free(); |
| 590 | platform_device_unregister(dcdbas_pdev); | 647 | platform_device_unregister(dcdbas_pdev); |
| 648 | platform_driver_unregister(&dcdbas_driver); | ||
| 649 | |||
| 650 | /* | ||
| 651 | * We have to free the buffer here instead of dcdbas_remove | ||
| 652 | * because only in module exit function we can be sure that | ||
| 653 | * all sysfs attributes belonging to this module have been | ||
| 654 | * released. | ||
| 655 | */ | ||
| 656 | smi_data_buf_free(); | ||
| 591 | } | 657 | } |
| 592 | 658 | ||
| 593 | module_init(dcdbas_init); | 659 | module_init(dcdbas_init); |
diff --git a/drivers/md/dm.c b/drivers/md/dm.c index 88d60202b9db..26b08ee425c7 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c | |||
| @@ -533,30 +533,35 @@ static void __clone_and_map(struct clone_info *ci) | |||
| 533 | 533 | ||
| 534 | } else { | 534 | } else { |
| 535 | /* | 535 | /* |
| 536 | * Create two copy bios to deal with io that has | 536 | * Handle a bvec that must be split between two or more targets. |
| 537 | * been split across a target. | ||
| 538 | */ | 537 | */ |
| 539 | struct bio_vec *bv = bio->bi_io_vec + ci->idx; | 538 | struct bio_vec *bv = bio->bi_io_vec + ci->idx; |
| 539 | sector_t remaining = to_sector(bv->bv_len); | ||
| 540 | unsigned int offset = 0; | ||
| 540 | 541 | ||
| 541 | clone = split_bvec(bio, ci->sector, ci->idx, | 542 | do { |
| 542 | bv->bv_offset, max); | 543 | if (offset) { |
| 543 | __map_bio(ti, clone, tio); | 544 | ti = dm_table_find_target(ci->map, ci->sector); |
| 545 | max = max_io_len(ci->md, ci->sector, ti); | ||
| 544 | 546 | ||
| 545 | ci->sector += max; | 547 | tio = alloc_tio(ci->md); |
| 546 | ci->sector_count -= max; | 548 | tio->io = ci->io; |
| 547 | ti = dm_table_find_target(ci->map, ci->sector); | 549 | tio->ti = ti; |
| 548 | 550 | memset(&tio->info, 0, sizeof(tio->info)); | |
| 549 | len = to_sector(bv->bv_len) - max; | 551 | } |
| 550 | clone = split_bvec(bio, ci->sector, ci->idx, | 552 | |
| 551 | bv->bv_offset + to_bytes(max), len); | 553 | len = min(remaining, max); |
| 552 | tio = alloc_tio(ci->md); | 554 | |
| 553 | tio->io = ci->io; | 555 | clone = split_bvec(bio, ci->sector, ci->idx, |
| 554 | tio->ti = ti; | 556 | bv->bv_offset + offset, len); |
| 555 | memset(&tio->info, 0, sizeof(tio->info)); | 557 | |
| 556 | __map_bio(ti, clone, tio); | 558 | __map_bio(ti, clone, tio); |
| 559 | |||
| 560 | ci->sector += len; | ||
| 561 | ci->sector_count -= len; | ||
| 562 | offset += to_bytes(len); | ||
| 563 | } while (remaining -= len); | ||
| 557 | 564 | ||
| 558 | ci->sector += len; | ||
| 559 | ci->sector_count -= len; | ||
| 560 | ci->idx++; | 565 | ci->idx++; |
| 561 | } | 566 | } |
| 562 | } | 567 | } |
diff --git a/drivers/media/dvb/bt8xx/Makefile b/drivers/media/dvb/bt8xx/Makefile index 9d197efb481d..d188e4c670b5 100644 --- a/drivers/media/dvb/bt8xx/Makefile +++ b/drivers/media/dvb/bt8xx/Makefile | |||
| @@ -1,3 +1,3 @@ | |||
| 1 | obj-$(CONFIG_DVB_BT8XX) += bt878.o dvb-bt8xx.o dst.o dst_ca.o | 1 | obj-$(CONFIG_DVB_BT8XX) += bt878.o dvb-bt8xx.o dst.o dst_ca.o |
| 2 | 2 | ||
| 3 | EXTRA_CFLAGS = -Idrivers/media/dvb/dvb-core/ -Idrivers/media/video/bt8xx -Idrivers/media/dvb/frontends | 3 | EXTRA_CFLAGS = -Idrivers/media/dvb/dvb-core/ -Idrivers/media/video -Idrivers/media/dvb/frontends |
diff --git a/drivers/net/mv643xx_eth.h b/drivers/net/mv643xx_eth.h index 7754d1974b9e..4262c1da6d4a 100644 --- a/drivers/net/mv643xx_eth.h +++ b/drivers/net/mv643xx_eth.h | |||
| @@ -42,13 +42,23 @@ | |||
| 42 | #define MAX_DESCS_PER_SKB 1 | 42 | #define MAX_DESCS_PER_SKB 1 |
| 43 | #endif | 43 | #endif |
| 44 | 44 | ||
| 45 | /* | ||
| 46 | * The MV643XX HW requires 8-byte alignment. However, when I/O | ||
| 47 | * is non-cache-coherent, we need to ensure that the I/O buffers | ||
| 48 | * we use don't share cache lines with other data. | ||
| 49 | */ | ||
| 50 | #if defined(CONFIG_DMA_NONCOHERENT) || defined(CONFIG_NOT_COHERENT_CACHE) | ||
| 51 | #define ETH_DMA_ALIGN L1_CACHE_BYTES | ||
| 52 | #else | ||
| 53 | #define ETH_DMA_ALIGN 8 | ||
| 54 | #endif | ||
| 55 | |||
| 45 | #define ETH_VLAN_HLEN 4 | 56 | #define ETH_VLAN_HLEN 4 |
| 46 | #define ETH_FCS_LEN 4 | 57 | #define ETH_FCS_LEN 4 |
| 47 | #define ETH_DMA_ALIGN 8 /* hw requires 8-byte alignment */ | 58 | #define ETH_HW_IP_ALIGN 2 /* hw aligns IP header */ |
| 48 | #define ETH_HW_IP_ALIGN 2 /* hw aligns IP header */ | ||
| 49 | #define ETH_WRAPPER_LEN (ETH_HW_IP_ALIGN + ETH_HLEN + \ | 59 | #define ETH_WRAPPER_LEN (ETH_HW_IP_ALIGN + ETH_HLEN + \ |
| 50 | ETH_VLAN_HLEN + ETH_FCS_LEN) | 60 | ETH_VLAN_HLEN + ETH_FCS_LEN) |
| 51 | #define ETH_RX_SKB_SIZE ((dev->mtu + ETH_WRAPPER_LEN + 7) & ~0x7) | 61 | #define ETH_RX_SKB_SIZE (dev->mtu + ETH_WRAPPER_LEN + ETH_DMA_ALIGN) |
| 52 | 62 | ||
| 53 | #define ETH_RX_QUEUES_ENABLED (1 << 0) /* use only Q0 for receive */ | 63 | #define ETH_RX_QUEUES_ENABLED (1 << 0) /* use only Q0 for receive */ |
| 54 | #define ETH_TX_QUEUES_ENABLED (1 << 0) /* use only Q0 for transmit */ | 64 | #define ETH_TX_QUEUES_ENABLED (1 << 0) /* use only Q0 for transmit */ |
diff --git a/drivers/net/pcnet32.c b/drivers/net/pcnet32.c index 7e900572eaf8..9595f74da93f 100644 --- a/drivers/net/pcnet32.c +++ b/drivers/net/pcnet32.c | |||
| @@ -22,12 +22,12 @@ | |||
| 22 | *************************************************************************/ | 22 | *************************************************************************/ |
| 23 | 23 | ||
| 24 | #define DRV_NAME "pcnet32" | 24 | #define DRV_NAME "pcnet32" |
| 25 | #define DRV_VERSION "1.31c" | 25 | #define DRV_VERSION "1.32" |
| 26 | #define DRV_RELDATE "01.Nov.2005" | 26 | #define DRV_RELDATE "18.Mar.2006" |
| 27 | #define PFX DRV_NAME ": " | 27 | #define PFX DRV_NAME ": " |
| 28 | 28 | ||
| 29 | static const char * const version = | 29 | static const char *const version = |
| 30 | DRV_NAME ".c:v" DRV_VERSION " " DRV_RELDATE " tsbogend@alpha.franken.de\n"; | 30 | DRV_NAME ".c:v" DRV_VERSION " " DRV_RELDATE " tsbogend@alpha.franken.de\n"; |
| 31 | 31 | ||
| 32 | #include <linux/module.h> | 32 | #include <linux/module.h> |
| 33 | #include <linux/kernel.h> | 33 | #include <linux/kernel.h> |
| @@ -58,18 +58,23 @@ DRV_NAME ".c:v" DRV_VERSION " " DRV_RELDATE " tsbogend@alpha.franken.de\n"; | |||
| 58 | * PCI device identifiers for "new style" Linux PCI Device Drivers | 58 | * PCI device identifiers for "new style" Linux PCI Device Drivers |
| 59 | */ | 59 | */ |
| 60 | static struct pci_device_id pcnet32_pci_tbl[] = { | 60 | static struct pci_device_id pcnet32_pci_tbl[] = { |
| 61 | { PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_LANCE_HOME, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0 }, | 61 | { PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_LANCE_HOME, |
| 62 | { PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_LANCE, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0 }, | 62 | PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0}, |
| 63 | /* | 63 | { PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_LANCE, |
| 64 | * Adapters that were sold with IBM's RS/6000 or pSeries hardware have | 64 | PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0}, |
| 65 | * the incorrect vendor id. | 65 | |
| 66 | */ | 66 | /* |
| 67 | { PCI_VENDOR_ID_TRIDENT, PCI_DEVICE_ID_AMD_LANCE, PCI_ANY_ID, PCI_ANY_ID, | 67 | * Adapters that were sold with IBM's RS/6000 or pSeries hardware have |
| 68 | PCI_CLASS_NETWORK_ETHERNET << 8, 0xffff00, 0 }, | 68 | * the incorrect vendor id. |
| 69 | { 0, } | 69 | */ |
| 70 | { PCI_VENDOR_ID_TRIDENT, PCI_DEVICE_ID_AMD_LANCE, | ||
| 71 | PCI_ANY_ID, PCI_ANY_ID, | ||
| 72 | PCI_CLASS_NETWORK_ETHERNET << 8, 0xffff00, 0}, | ||
| 73 | |||
| 74 | { } /* terminate list */ | ||
| 70 | }; | 75 | }; |
| 71 | 76 | ||
| 72 | MODULE_DEVICE_TABLE (pci, pcnet32_pci_tbl); | 77 | MODULE_DEVICE_TABLE(pci, pcnet32_pci_tbl); |
| 73 | 78 | ||
| 74 | static int cards_found; | 79 | static int cards_found; |
| 75 | 80 | ||
| @@ -77,13 +82,11 @@ static int cards_found; | |||
| 77 | * VLB I/O addresses | 82 | * VLB I/O addresses |
| 78 | */ | 83 | */ |
| 79 | static unsigned int pcnet32_portlist[] __initdata = | 84 | static unsigned int pcnet32_portlist[] __initdata = |
| 80 | { 0x300, 0x320, 0x340, 0x360, 0 }; | 85 | { 0x300, 0x320, 0x340, 0x360, 0 }; |
| 81 | |||
| 82 | |||
| 83 | 86 | ||
| 84 | static int pcnet32_debug = 0; | 87 | static int pcnet32_debug = 0; |
| 85 | static int tx_start = 1; /* Mapping -- 0:20, 1:64, 2:128, 3:~220 (depends on chip vers) */ | 88 | static int tx_start = 1; /* Mapping -- 0:20, 1:64, 2:128, 3:~220 (depends on chip vers) */ |
| 86 | static int pcnet32vlb; /* check for VLB cards ? */ | 89 | static int pcnet32vlb; /* check for VLB cards ? */ |
| 87 | 90 | ||
| 88 | static struct net_device *pcnet32_dev; | 91 | static struct net_device *pcnet32_dev; |
| 89 | 92 | ||
| @@ -110,32 +113,34 @@ static int rx_copybreak = 200; | |||
| 110 | * to internal options | 113 | * to internal options |
| 111 | */ | 114 | */ |
| 112 | static const unsigned char options_mapping[] = { | 115 | static const unsigned char options_mapping[] = { |
| 113 | PCNET32_PORT_ASEL, /* 0 Auto-select */ | 116 | PCNET32_PORT_ASEL, /* 0 Auto-select */ |
| 114 | PCNET32_PORT_AUI, /* 1 BNC/AUI */ | 117 | PCNET32_PORT_AUI, /* 1 BNC/AUI */ |
| 115 | PCNET32_PORT_AUI, /* 2 AUI/BNC */ | 118 | PCNET32_PORT_AUI, /* 2 AUI/BNC */ |
| 116 | PCNET32_PORT_ASEL, /* 3 not supported */ | 119 | PCNET32_PORT_ASEL, /* 3 not supported */ |
| 117 | PCNET32_PORT_10BT | PCNET32_PORT_FD, /* 4 10baseT-FD */ | 120 | PCNET32_PORT_10BT | PCNET32_PORT_FD, /* 4 10baseT-FD */ |
| 118 | PCNET32_PORT_ASEL, /* 5 not supported */ | 121 | PCNET32_PORT_ASEL, /* 5 not supported */ |
| 119 | PCNET32_PORT_ASEL, /* 6 not supported */ | 122 | PCNET32_PORT_ASEL, /* 6 not supported */ |
| 120 | PCNET32_PORT_ASEL, /* 7 not supported */ | 123 | PCNET32_PORT_ASEL, /* 7 not supported */ |
| 121 | PCNET32_PORT_ASEL, /* 8 not supported */ | 124 | PCNET32_PORT_ASEL, /* 8 not supported */ |
| 122 | PCNET32_PORT_MII, /* 9 MII 10baseT */ | 125 | PCNET32_PORT_MII, /* 9 MII 10baseT */ |
| 123 | PCNET32_PORT_MII | PCNET32_PORT_FD, /* 10 MII 10baseT-FD */ | 126 | PCNET32_PORT_MII | PCNET32_PORT_FD, /* 10 MII 10baseT-FD */ |
| 124 | PCNET32_PORT_MII, /* 11 MII (autosel) */ | 127 | PCNET32_PORT_MII, /* 11 MII (autosel) */ |
| 125 | PCNET32_PORT_10BT, /* 12 10BaseT */ | 128 | PCNET32_PORT_10BT, /* 12 10BaseT */ |
| 126 | PCNET32_PORT_MII | PCNET32_PORT_100, /* 13 MII 100BaseTx */ | 129 | PCNET32_PORT_MII | PCNET32_PORT_100, /* 13 MII 100BaseTx */ |
| 127 | PCNET32_PORT_MII | PCNET32_PORT_100 | PCNET32_PORT_FD, /* 14 MII 100BaseTx-FD */ | 130 | /* 14 MII 100BaseTx-FD */ |
| 128 | PCNET32_PORT_ASEL /* 15 not supported */ | 131 | PCNET32_PORT_MII | PCNET32_PORT_100 | PCNET32_PORT_FD, |
| 132 | PCNET32_PORT_ASEL /* 15 not supported */ | ||
| 129 | }; | 133 | }; |
| 130 | 134 | ||
| 131 | static const char pcnet32_gstrings_test[][ETH_GSTRING_LEN] = { | 135 | static const char pcnet32_gstrings_test[][ETH_GSTRING_LEN] = { |
| 132 | "Loopback test (offline)" | 136 | "Loopback test (offline)" |
| 133 | }; | 137 | }; |
| 138 | |||
| 134 | #define PCNET32_TEST_LEN (sizeof(pcnet32_gstrings_test) / ETH_GSTRING_LEN) | 139 | #define PCNET32_TEST_LEN (sizeof(pcnet32_gstrings_test) / ETH_GSTRING_LEN) |
| 135 | 140 | ||
| 136 | #define PCNET32_NUM_REGS 168 | 141 | #define PCNET32_NUM_REGS 136 |
| 137 | 142 | ||
| 138 | #define MAX_UNITS 8 /* More are supported, limit only on options */ | 143 | #define MAX_UNITS 8 /* More are supported, limit only on options */ |
| 139 | static int options[MAX_UNITS]; | 144 | static int options[MAX_UNITS]; |
| 140 | static int full_duplex[MAX_UNITS]; | 145 | static int full_duplex[MAX_UNITS]; |
| 141 | static int homepna[MAX_UNITS]; | 146 | static int homepna[MAX_UNITS]; |
| @@ -151,124 +156,6 @@ static int homepna[MAX_UNITS]; | |||
| 151 | */ | 156 | */ |
| 152 | 157 | ||
| 153 | /* | 158 | /* |
| 154 | * History: | ||
| 155 | * v0.01: Initial version | ||
| 156 | * only tested on Alpha Noname Board | ||
| 157 | * v0.02: changed IRQ handling for new interrupt scheme (dev_id) | ||
| 158 | * tested on a ASUS SP3G | ||
| 159 | * v0.10: fixed an odd problem with the 79C974 in a Compaq Deskpro XL | ||
| 160 | * looks like the 974 doesn't like stopping and restarting in a | ||
| 161 | * short period of time; now we do a reinit of the lance; the | ||
| 162 | * bug was triggered by doing ifconfig eth0 <ip> broadcast <addr> | ||
| 163 | * and hangs the machine (thanks to Klaus Liedl for debugging) | ||
| 164 | * v0.12: by suggestion from Donald Becker: Renamed driver to pcnet32, | ||
| 165 | * made it standalone (no need for lance.c) | ||
| 166 | * v0.13: added additional PCI detecting for special PCI devices (Compaq) | ||
| 167 | * v0.14: stripped down additional PCI probe (thanks to David C Niemi | ||
| 168 | * and sveneric@xs4all.nl for testing this on their Compaq boxes) | ||
| 169 | * v0.15: added 79C965 (VLB) probe | ||
| 170 | * added interrupt sharing for PCI chips | ||
| 171 | * v0.16: fixed set_multicast_list on Alpha machines | ||
| 172 | * v0.17: removed hack from dev.c; now pcnet32 uses ethif_probe in Space.c | ||
| 173 | * v0.19: changed setting of autoselect bit | ||
| 174 | * v0.20: removed additional Compaq PCI probe; there is now a working one | ||
| 175 | * in arch/i386/bios32.c | ||
| 176 | * v0.21: added endian conversion for ppc, from work by cort@cs.nmt.edu | ||
| 177 | * v0.22: added printing of status to ring dump | ||
| 178 | * v0.23: changed enet_statistics to net_devive_stats | ||
| 179 | * v0.90: added multicast filter | ||
| 180 | * added module support | ||
| 181 | * changed irq probe to new style | ||
| 182 | * added PCnetFast chip id | ||
| 183 | * added fix for receive stalls with Intel saturn chipsets | ||
| 184 | * added in-place rx skbs like in the tulip driver | ||
| 185 | * minor cleanups | ||
| 186 | * v0.91: added PCnetFast+ chip id | ||
| 187 | * back port to 2.0.x | ||
| 188 | * v1.00: added some stuff from Donald Becker's 2.0.34 version | ||
| 189 | * added support for byte counters in net_dev_stats | ||
| 190 | * v1.01: do ring dumps, only when debugging the driver | ||
| 191 | * increased the transmit timeout | ||
| 192 | * v1.02: fixed memory leak in pcnet32_init_ring() | ||
| 193 | * v1.10: workaround for stopped transmitter | ||
| 194 | * added port selection for modules | ||
| 195 | * detect special T1/E1 WAN card and setup port selection | ||
| 196 | * v1.11: fixed wrong checking of Tx errors | ||
| 197 | * v1.20: added check of return value kmalloc (cpeterso@cs.washington.edu) | ||
| 198 | * added save original kmalloc addr for freeing (mcr@solidum.com) | ||
| 199 | * added support for PCnetHome chip (joe@MIT.EDU) | ||
| 200 | * rewritten PCI card detection | ||
| 201 | * added dwio mode to get driver working on some PPC machines | ||
| 202 | * v1.21: added mii selection and mii ioctl | ||
| 203 | * v1.22: changed pci scanning code to make PPC people happy | ||
| 204 | * fixed switching to 32bit mode in pcnet32_open() (thanks | ||
| 205 | * to Michael Richard <mcr@solidum.com> for noticing this one) | ||
| 206 | * added sub vendor/device id matching (thanks again to | ||
| 207 | * Michael Richard <mcr@solidum.com>) | ||
| 208 | * added chip id for 79c973/975 (thanks to Zach Brown <zab@zabbo.net>) | ||
| 209 | * v1.23 fixed small bug, when manual selecting MII speed/duplex | ||
| 210 | * v1.24 Applied Thomas' patch to use TxStartPoint and thus decrease TxFIFO | ||
| 211 | * underflows. Added tx_start_pt module parameter. Increased | ||
| 212 | * TX_RING_SIZE from 16 to 32. Added #ifdef'd code to use DXSUFLO | ||
| 213 | * for FAST[+] chipsets. <kaf@fc.hp.com> | ||
| 214 | * v1.24ac Added SMP spinlocking - Alan Cox <alan@redhat.com> | ||
| 215 | * v1.25kf Added No Interrupt on successful Tx for some Tx's <kaf@fc.hp.com> | ||
| 216 | * v1.26 Converted to pci_alloc_consistent, Jamey Hicks / George France | ||
| 217 | * <jamey@crl.dec.com> | ||
| 218 | * - Fixed a few bugs, related to running the controller in 32bit mode. | ||
| 219 | * 23 Oct, 2000. Carsten Langgaard, carstenl@mips.com | ||
| 220 | * Copyright (C) 2000 MIPS Technologies, Inc. All rights reserved. | ||
| 221 | * v1.26p Fix oops on rmmod+insmod; plug i/o resource leak - Paul Gortmaker | ||
| 222 | * v1.27 improved CSR/PROM address detection, lots of cleanups, | ||
| 223 | * new pcnet32vlb module option, HP-PARISC support, | ||
| 224 | * added module parameter descriptions, | ||
| 225 | * initial ethtool support - Helge Deller <deller@gmx.de> | ||
| 226 | * v1.27a Sun Feb 10 2002 Go Taniguchi <go@turbolinux.co.jp> | ||
| 227 | * use alloc_etherdev and register_netdev | ||
| 228 | * fix pci probe not increment cards_found | ||
| 229 | * FD auto negotiate error workaround for xSeries250 | ||
| 230 | * clean up and using new mii module | ||
| 231 | * v1.27b Sep 30 2002 Kent Yoder <yoder1@us.ibm.com> | ||
| 232 | * Added timer for cable connection state changes. | ||
| 233 | * v1.28 20 Feb 2004 Don Fry <brazilnut@us.ibm.com> | ||
| 234 | * Jon Mason <jonmason@us.ibm.com>, Chinmay Albal <albal@in.ibm.com> | ||
| 235 | * Now uses ethtool_ops, netif_msg_* and generic_mii_ioctl. | ||
| 236 | * Fixes bogus 'Bus master arbitration failure', pci_[un]map_single | ||
| 237 | * length errors, and transmit hangs. Cleans up after errors in open. | ||
| 238 | * Jim Lewis <jklewis@us.ibm.com> added ethernet loopback test. | ||
| 239 | * Thomas Munck Steenholdt <tmus@tmus.dk> non-mii ioctl corrections. | ||
| 240 | * v1.29 6 Apr 2004 Jim Lewis <jklewis@us.ibm.com> added physical | ||
| 241 | * identification code (blink led's) and register dump. | ||
| 242 | * Don Fry added timer for 971/972 so skbufs don't remain on tx ring | ||
| 243 | * forever. | ||
| 244 | * v1.30 18 May 2004 Don Fry removed timer and Last Transmit Interrupt | ||
| 245 | * (ltint) as they added complexity and didn't give good throughput. | ||
| 246 | * v1.30a 22 May 2004 Don Fry limit frames received during interrupt. | ||
| 247 | * v1.30b 24 May 2004 Don Fry fix bogus tx carrier errors with 79c973, | ||
| 248 | * assisted by Bruce Penrod <bmpenrod@endruntechnologies.com>. | ||
| 249 | * v1.30c 25 May 2004 Don Fry added netif_wake_queue after pcnet32_restart. | ||
| 250 | * v1.30d 01 Jun 2004 Don Fry discard oversize rx packets. | ||
| 251 | * v1.30e 11 Jun 2004 Don Fry recover after fifo error and rx hang. | ||
| 252 | * v1.30f 16 Jun 2004 Don Fry cleanup IRQ to allow 0 and 1 for PCI, | ||
| 253 | * expanding on suggestions from Ralf Baechle <ralf@linux-mips.org>, | ||
| 254 | * and Brian Murphy <brian@murphy.dk>. | ||
| 255 | * v1.30g 22 Jun 2004 Patrick Simmons <psimmons@flash.net> added option | ||
| 256 | * homepna for selecting HomePNA mode for PCNet/Home 79C978. | ||
| 257 | * v1.30h 24 Jun 2004 Don Fry correctly select auto, speed, duplex in bcr32. | ||
| 258 | * v1.30i 28 Jun 2004 Don Fry change to use module_param. | ||
| 259 | * v1.30j 29 Apr 2005 Don Fry fix skb/map leak with loopback test. | ||
| 260 | * v1.31 02 Sep 2005 Hubert WS Lin <wslin@tw.ibm.c0m> added set_ringparam(). | ||
| 261 | * v1.31a 12 Sep 2005 Hubert WS Lin <wslin@tw.ibm.c0m> set min ring size to 4 | ||
| 262 | * to allow loopback test to work unchanged. | ||
| 263 | * v1.31b 06 Oct 2005 Don Fry changed alloc_ring to show name of device | ||
| 264 | * if allocation fails | ||
| 265 | * v1.31c 01 Nov 2005 Don Fry Allied Telesyn 2700/2701 FX are 100Mbit only. | ||
| 266 | * Force 100Mbit FD if Auto (ASEL) is selected. | ||
| 267 | * See Bugzilla 2669 and 4551. | ||
| 268 | */ | ||
| 269 | |||
| 270 | |||
| 271 | /* | ||
| 272 | * Set the number of Tx and Rx buffers, using Log_2(# buffers). | 159 | * Set the number of Tx and Rx buffers, using Log_2(# buffers). |
| 273 | * Reasonable default values are 4 Tx buffers, and 16 Rx buffers. | 160 | * Reasonable default values are 4 Tx buffers, and 16 Rx buffers. |
| 274 | * That translates to 2 (4 == 2^^2) and 4 (16 == 2^^4). | 161 | * That translates to 2 (4 == 2^^2) and 4 (16 == 2^^4). |
| @@ -303,42 +190,42 @@ static int homepna[MAX_UNITS]; | |||
| 303 | 190 | ||
| 304 | /* The PCNET32 Rx and Tx ring descriptors. */ | 191 | /* The PCNET32 Rx and Tx ring descriptors. */ |
| 305 | struct pcnet32_rx_head { | 192 | struct pcnet32_rx_head { |
| 306 | u32 base; | 193 | u32 base; |
| 307 | s16 buf_length; | 194 | s16 buf_length; |
| 308 | s16 status; | 195 | s16 status; |
| 309 | u32 msg_length; | 196 | u32 msg_length; |
| 310 | u32 reserved; | 197 | u32 reserved; |
| 311 | }; | 198 | }; |
| 312 | 199 | ||
| 313 | struct pcnet32_tx_head { | 200 | struct pcnet32_tx_head { |
| 314 | u32 base; | 201 | u32 base; |
| 315 | s16 length; | 202 | s16 length; |
| 316 | s16 status; | 203 | s16 status; |
| 317 | u32 misc; | 204 | u32 misc; |
| 318 | u32 reserved; | 205 | u32 reserved; |
| 319 | }; | 206 | }; |
| 320 | 207 | ||
| 321 | /* The PCNET32 32-Bit initialization block, described in databook. */ | 208 | /* The PCNET32 32-Bit initialization block, described in databook. */ |
| 322 | struct pcnet32_init_block { | 209 | struct pcnet32_init_block { |
| 323 | u16 mode; | 210 | u16 mode; |
| 324 | u16 tlen_rlen; | 211 | u16 tlen_rlen; |
| 325 | u8 phys_addr[6]; | 212 | u8 phys_addr[6]; |
| 326 | u16 reserved; | 213 | u16 reserved; |
| 327 | u32 filter[2]; | 214 | u32 filter[2]; |
| 328 | /* Receive and transmit ring base, along with extra bits. */ | 215 | /* Receive and transmit ring base, along with extra bits. */ |
| 329 | u32 rx_ring; | 216 | u32 rx_ring; |
| 330 | u32 tx_ring; | 217 | u32 tx_ring; |
| 331 | }; | 218 | }; |
| 332 | 219 | ||
| 333 | /* PCnet32 access functions */ | 220 | /* PCnet32 access functions */ |
| 334 | struct pcnet32_access { | 221 | struct pcnet32_access { |
| 335 | u16 (*read_csr)(unsigned long, int); | 222 | u16 (*read_csr) (unsigned long, int); |
| 336 | void (*write_csr)(unsigned long, int, u16); | 223 | void (*write_csr) (unsigned long, int, u16); |
| 337 | u16 (*read_bcr)(unsigned long, int); | 224 | u16 (*read_bcr) (unsigned long, int); |
| 338 | void (*write_bcr)(unsigned long, int, u16); | 225 | void (*write_bcr) (unsigned long, int, u16); |
| 339 | u16 (*read_rap)(unsigned long); | 226 | u16 (*read_rap) (unsigned long); |
| 340 | void (*write_rap)(unsigned long, u16); | 227 | void (*write_rap) (unsigned long, u16); |
| 341 | void (*reset)(unsigned long); | 228 | void (*reset) (unsigned long); |
| 342 | }; | 229 | }; |
| 343 | 230 | ||
| 344 | /* | 231 | /* |
| @@ -346,760 +233,794 @@ struct pcnet32_access { | |||
| 346 | * so the structure should be allocated using pci_alloc_consistent(). | 233 | * so the structure should be allocated using pci_alloc_consistent(). |
| 347 | */ | 234 | */ |
| 348 | struct pcnet32_private { | 235 | struct pcnet32_private { |
| 349 | struct pcnet32_init_block init_block; | 236 | struct pcnet32_init_block init_block; |
| 350 | /* The Tx and Rx ring entries must be aligned on 16-byte boundaries in 32bit mode. */ | 237 | /* The Tx and Rx ring entries must be aligned on 16-byte boundaries in 32bit mode. */ |
| 351 | struct pcnet32_rx_head *rx_ring; | 238 | struct pcnet32_rx_head *rx_ring; |
| 352 | struct pcnet32_tx_head *tx_ring; | 239 | struct pcnet32_tx_head *tx_ring; |
| 353 | dma_addr_t dma_addr; /* DMA address of beginning of this | 240 | dma_addr_t dma_addr;/* DMA address of beginning of this |
| 354 | object, returned by | 241 | object, returned by pci_alloc_consistent */ |
| 355 | pci_alloc_consistent */ | 242 | struct pci_dev *pci_dev; |
| 356 | struct pci_dev *pci_dev; /* Pointer to the associated pci device | 243 | const char *name; |
| 357 | structure */ | 244 | /* The saved address of a sent-in-place packet/buffer, for skfree(). */ |
| 358 | const char *name; | 245 | struct sk_buff **tx_skbuff; |
| 359 | /* The saved address of a sent-in-place packet/buffer, for skfree(). */ | 246 | struct sk_buff **rx_skbuff; |
| 360 | struct sk_buff **tx_skbuff; | 247 | dma_addr_t *tx_dma_addr; |
| 361 | struct sk_buff **rx_skbuff; | 248 | dma_addr_t *rx_dma_addr; |
| 362 | dma_addr_t *tx_dma_addr; | 249 | struct pcnet32_access a; |
| 363 | dma_addr_t *rx_dma_addr; | 250 | spinlock_t lock; /* Guard lock */ |
| 364 | struct pcnet32_access a; | 251 | unsigned int cur_rx, cur_tx; /* The next free ring entry */ |
| 365 | spinlock_t lock; /* Guard lock */ | 252 | unsigned int rx_ring_size; /* current rx ring size */ |
| 366 | unsigned int cur_rx, cur_tx; /* The next free ring entry */ | 253 | unsigned int tx_ring_size; /* current tx ring size */ |
| 367 | unsigned int rx_ring_size; /* current rx ring size */ | 254 | unsigned int rx_mod_mask; /* rx ring modular mask */ |
| 368 | unsigned int tx_ring_size; /* current tx ring size */ | 255 | unsigned int tx_mod_mask; /* tx ring modular mask */ |
| 369 | unsigned int rx_mod_mask; /* rx ring modular mask */ | 256 | unsigned short rx_len_bits; |
| 370 | unsigned int tx_mod_mask; /* tx ring modular mask */ | 257 | unsigned short tx_len_bits; |
| 371 | unsigned short rx_len_bits; | 258 | dma_addr_t rx_ring_dma_addr; |
| 372 | unsigned short tx_len_bits; | 259 | dma_addr_t tx_ring_dma_addr; |
| 373 | dma_addr_t rx_ring_dma_addr; | 260 | unsigned int dirty_rx, /* ring entries to be freed. */ |
| 374 | dma_addr_t tx_ring_dma_addr; | 261 | dirty_tx; |
| 375 | unsigned int dirty_rx, dirty_tx; /* The ring entries to be free()ed. */ | 262 | |
| 376 | struct net_device_stats stats; | 263 | struct net_device_stats stats; |
| 377 | char tx_full; | 264 | char tx_full; |
| 378 | int options; | 265 | char phycount; /* number of phys found */ |
| 379 | unsigned int shared_irq:1, /* shared irq possible */ | 266 | int options; |
| 380 | dxsuflo:1, /* disable transmit stop on uflo */ | 267 | unsigned int shared_irq:1, /* shared irq possible */ |
| 381 | mii:1; /* mii port available */ | 268 | dxsuflo:1, /* disable transmit stop on uflo */ |
| 382 | struct net_device *next; | 269 | mii:1; /* mii port available */ |
| 383 | struct mii_if_info mii_if; | 270 | struct net_device *next; |
| 384 | struct timer_list watchdog_timer; | 271 | struct mii_if_info mii_if; |
| 385 | struct timer_list blink_timer; | 272 | struct timer_list watchdog_timer; |
| 386 | u32 msg_enable; /* debug message level */ | 273 | struct timer_list blink_timer; |
| 274 | u32 msg_enable; /* debug message level */ | ||
| 275 | |||
| 276 | /* each bit indicates an available PHY */ | ||
| 277 | u32 phymask; | ||
| 387 | }; | 278 | }; |
| 388 | 279 | ||
| 389 | static void pcnet32_probe_vlbus(void); | 280 | static void pcnet32_probe_vlbus(void); |
| 390 | static int pcnet32_probe_pci(struct pci_dev *, const struct pci_device_id *); | 281 | static int pcnet32_probe_pci(struct pci_dev *, const struct pci_device_id *); |
| 391 | static int pcnet32_probe1(unsigned long, int, struct pci_dev *); | 282 | static int pcnet32_probe1(unsigned long, int, struct pci_dev *); |
| 392 | static int pcnet32_open(struct net_device *); | 283 | static int pcnet32_open(struct net_device *); |
| 393 | static int pcnet32_init_ring(struct net_device *); | 284 | static int pcnet32_init_ring(struct net_device *); |
| 394 | static int pcnet32_start_xmit(struct sk_buff *, struct net_device *); | 285 | static int pcnet32_start_xmit(struct sk_buff *, struct net_device *); |
| 395 | static int pcnet32_rx(struct net_device *); | 286 | static int pcnet32_rx(struct net_device *); |
| 396 | static void pcnet32_tx_timeout (struct net_device *dev); | 287 | static void pcnet32_tx_timeout(struct net_device *dev); |
| 397 | static irqreturn_t pcnet32_interrupt(int, void *, struct pt_regs *); | 288 | static irqreturn_t pcnet32_interrupt(int, void *, struct pt_regs *); |
| 398 | static int pcnet32_close(struct net_device *); | 289 | static int pcnet32_close(struct net_device *); |
| 399 | static struct net_device_stats *pcnet32_get_stats(struct net_device *); | 290 | static struct net_device_stats *pcnet32_get_stats(struct net_device *); |
| 400 | static void pcnet32_load_multicast(struct net_device *dev); | 291 | static void pcnet32_load_multicast(struct net_device *dev); |
| 401 | static void pcnet32_set_multicast_list(struct net_device *); | 292 | static void pcnet32_set_multicast_list(struct net_device *); |
| 402 | static int pcnet32_ioctl(struct net_device *, struct ifreq *, int); | 293 | static int pcnet32_ioctl(struct net_device *, struct ifreq *, int); |
| 403 | static void pcnet32_watchdog(struct net_device *); | 294 | static void pcnet32_watchdog(struct net_device *); |
| 404 | static int mdio_read(struct net_device *dev, int phy_id, int reg_num); | 295 | static int mdio_read(struct net_device *dev, int phy_id, int reg_num); |
| 405 | static void mdio_write(struct net_device *dev, int phy_id, int reg_num, int val); | 296 | static void mdio_write(struct net_device *dev, int phy_id, int reg_num, |
| 297 | int val); | ||
| 406 | static void pcnet32_restart(struct net_device *dev, unsigned int csr0_bits); | 298 | static void pcnet32_restart(struct net_device *dev, unsigned int csr0_bits); |
| 407 | static void pcnet32_ethtool_test(struct net_device *dev, | 299 | static void pcnet32_ethtool_test(struct net_device *dev, |
| 408 | struct ethtool_test *eth_test, u64 *data); | 300 | struct ethtool_test *eth_test, u64 * data); |
| 409 | static int pcnet32_loopback_test(struct net_device *dev, uint64_t *data1); | 301 | static int pcnet32_loopback_test(struct net_device *dev, uint64_t * data1); |
| 410 | static int pcnet32_phys_id(struct net_device *dev, u32 data); | 302 | static int pcnet32_phys_id(struct net_device *dev, u32 data); |
| 411 | static void pcnet32_led_blink_callback(struct net_device *dev); | 303 | static void pcnet32_led_blink_callback(struct net_device *dev); |
| 412 | static int pcnet32_get_regs_len(struct net_device *dev); | 304 | static int pcnet32_get_regs_len(struct net_device *dev); |
| 413 | static void pcnet32_get_regs(struct net_device *dev, struct ethtool_regs *regs, | 305 | static void pcnet32_get_regs(struct net_device *dev, struct ethtool_regs *regs, |
| 414 | void *ptr); | 306 | void *ptr); |
| 415 | static void pcnet32_purge_tx_ring(struct net_device *dev); | 307 | static void pcnet32_purge_tx_ring(struct net_device *dev); |
| 416 | static int pcnet32_alloc_ring(struct net_device *dev, char *name); | 308 | static int pcnet32_alloc_ring(struct net_device *dev, char *name); |
| 417 | static void pcnet32_free_ring(struct net_device *dev); | 309 | static void pcnet32_free_ring(struct net_device *dev); |
| 418 | 310 | static void pcnet32_check_media(struct net_device *dev, int verbose); | |
| 419 | 311 | ||
| 420 | enum pci_flags_bit { | 312 | enum pci_flags_bit { |
| 421 | PCI_USES_IO=1, PCI_USES_MEM=2, PCI_USES_MASTER=4, | 313 | PCI_USES_IO = 1, PCI_USES_MEM = 2, PCI_USES_MASTER = 4, |
| 422 | PCI_ADDR0=0x10<<0, PCI_ADDR1=0x10<<1, PCI_ADDR2=0x10<<2, PCI_ADDR3=0x10<<3, | 314 | PCI_ADDR0 = 0x10 << 0, PCI_ADDR1 = 0x10 << 1, PCI_ADDR2 = |
| 315 | 0x10 << 2, PCI_ADDR3 = 0x10 << 3, | ||
| 423 | }; | 316 | }; |
| 424 | 317 | ||
| 425 | 318 | static u16 pcnet32_wio_read_csr(unsigned long addr, int index) | |
| 426 | static u16 pcnet32_wio_read_csr (unsigned long addr, int index) | ||
| 427 | { | 319 | { |
| 428 | outw (index, addr+PCNET32_WIO_RAP); | 320 | outw(index, addr + PCNET32_WIO_RAP); |
| 429 | return inw (addr+PCNET32_WIO_RDP); | 321 | return inw(addr + PCNET32_WIO_RDP); |
| 430 | } | 322 | } |
| 431 | 323 | ||
| 432 | static void pcnet32_wio_write_csr (unsigned long addr, int index, u16 val) | 324 | static void pcnet32_wio_write_csr(unsigned long addr, int index, u16 val) |
| 433 | { | 325 | { |
| 434 | outw (index, addr+PCNET32_WIO_RAP); | 326 | outw(index, addr + PCNET32_WIO_RAP); |
| 435 | outw (val, addr+PCNET32_WIO_RDP); | 327 | outw(val, addr + PCNET32_WIO_RDP); |
| 436 | } | 328 | } |
| 437 | 329 | ||
| 438 | static u16 pcnet32_wio_read_bcr (unsigned long addr, int index) | 330 | static u16 pcnet32_wio_read_bcr(unsigned long addr, int index) |
| 439 | { | 331 | { |
| 440 | outw (index, addr+PCNET32_WIO_RAP); | 332 | outw(index, addr + PCNET32_WIO_RAP); |
| 441 | return inw (addr+PCNET32_WIO_BDP); | 333 | return inw(addr + PCNET32_WIO_BDP); |
| 442 | } | 334 | } |
| 443 | 335 | ||
| 444 | static void pcnet32_wio_write_bcr (unsigned long addr, int index, u16 val) | 336 | static void pcnet32_wio_write_bcr(unsigned long addr, int index, u16 val) |
| 445 | { | 337 | { |
| 446 | outw (index, addr+PCNET32_WIO_RAP); | 338 | outw(index, addr + PCNET32_WIO_RAP); |
| 447 | outw (val, addr+PCNET32_WIO_BDP); | 339 | outw(val, addr + PCNET32_WIO_BDP); |
| 448 | } | 340 | } |
| 449 | 341 | ||
| 450 | static u16 pcnet32_wio_read_rap (unsigned long addr) | 342 | static u16 pcnet32_wio_read_rap(unsigned long addr) |
| 451 | { | 343 | { |
| 452 | return inw (addr+PCNET32_WIO_RAP); | 344 | return inw(addr + PCNET32_WIO_RAP); |
| 453 | } | 345 | } |
| 454 | 346 | ||
| 455 | static void pcnet32_wio_write_rap (unsigned long addr, u16 val) | 347 | static void pcnet32_wio_write_rap(unsigned long addr, u16 val) |
| 456 | { | 348 | { |
| 457 | outw (val, addr+PCNET32_WIO_RAP); | 349 | outw(val, addr + PCNET32_WIO_RAP); |
| 458 | } | 350 | } |
| 459 | 351 | ||
| 460 | static void pcnet32_wio_reset (unsigned long addr) | 352 | static void pcnet32_wio_reset(unsigned long addr) |
| 461 | { | 353 | { |
| 462 | inw (addr+PCNET32_WIO_RESET); | 354 | inw(addr + PCNET32_WIO_RESET); |
| 463 | } | 355 | } |
| 464 | 356 | ||
| 465 | static int pcnet32_wio_check (unsigned long addr) | 357 | static int pcnet32_wio_check(unsigned long addr) |
| 466 | { | 358 | { |
| 467 | outw (88, addr+PCNET32_WIO_RAP); | 359 | outw(88, addr + PCNET32_WIO_RAP); |
| 468 | return (inw (addr+PCNET32_WIO_RAP) == 88); | 360 | return (inw(addr + PCNET32_WIO_RAP) == 88); |
| 469 | } | 361 | } |
| 470 | 362 | ||
| 471 | static struct pcnet32_access pcnet32_wio = { | 363 | static struct pcnet32_access pcnet32_wio = { |
| 472 | .read_csr = pcnet32_wio_read_csr, | 364 | .read_csr = pcnet32_wio_read_csr, |
| 473 | .write_csr = pcnet32_wio_write_csr, | 365 | .write_csr = pcnet32_wio_write_csr, |
| 474 | .read_bcr = pcnet32_wio_read_bcr, | 366 | .read_bcr = pcnet32_wio_read_bcr, |
| 475 | .write_bcr = pcnet32_wio_write_bcr, | 367 | .write_bcr = pcnet32_wio_write_bcr, |
| 476 | .read_rap = pcnet32_wio_read_rap, | 368 | .read_rap = pcnet32_wio_read_rap, |
| 477 | .write_rap = pcnet32_wio_write_rap, | 369 | .write_rap = pcnet32_wio_write_rap, |
| 478 | .reset = pcnet32_wio_reset | 370 | .reset = pcnet32_wio_reset |
| 479 | }; | 371 | }; |
| 480 | 372 | ||
| 481 | static u16 pcnet32_dwio_read_csr (unsigned long addr, int index) | 373 | static u16 pcnet32_dwio_read_csr(unsigned long addr, int index) |
| 482 | { | 374 | { |
| 483 | outl (index, addr+PCNET32_DWIO_RAP); | 375 | outl(index, addr + PCNET32_DWIO_RAP); |
| 484 | return (inl (addr+PCNET32_DWIO_RDP) & 0xffff); | 376 | return (inl(addr + PCNET32_DWIO_RDP) & 0xffff); |
| 485 | } | 377 | } |
| 486 | 378 | ||
| 487 | static void pcnet32_dwio_write_csr (unsigned long addr, int index, u16 val) | 379 | static void pcnet32_dwio_write_csr(unsigned long addr, int index, u16 val) |
| 488 | { | 380 | { |
| 489 | outl (index, addr+PCNET32_DWIO_RAP); | 381 | outl(index, addr + PCNET32_DWIO_RAP); |
| 490 | outl (val, addr+PCNET32_DWIO_RDP); | 382 | outl(val, addr + PCNET32_DWIO_RDP); |
| 491 | } | 383 | } |
| 492 | 384 | ||
| 493 | static u16 pcnet32_dwio_read_bcr (unsigned long addr, int index) | 385 | static u16 pcnet32_dwio_read_bcr(unsigned long addr, int index) |
| 494 | { | 386 | { |
| 495 | outl (index, addr+PCNET32_DWIO_RAP); | 387 | outl(index, addr + PCNET32_DWIO_RAP); |
| 496 | return (inl (addr+PCNET32_DWIO_BDP) & 0xffff); | 388 | return (inl(addr + PCNET32_DWIO_BDP) & 0xffff); |
| 497 | } | 389 | } |
| 498 | 390 | ||
| 499 | static void pcnet32_dwio_write_bcr (unsigned long addr, int index, u16 val) | 391 | static void pcnet32_dwio_write_bcr(unsigned long addr, int index, u16 val) |
| 500 | { | 392 | { |
| 501 | outl (index, addr+PCNET32_DWIO_RAP); | 393 | outl(index, addr + PCNET32_DWIO_RAP); |
| 502 | outl (val, addr+PCNET32_DWIO_BDP); | 394 | outl(val, addr + PCNET32_DWIO_BDP); |
| 503 | } | 395 | } |
| 504 | 396 | ||
| 505 | static u16 pcnet32_dwio_read_rap (unsigned long addr) | 397 | static u16 pcnet32_dwio_read_rap(unsigned long addr) |
| 506 | { | 398 | { |
| 507 | return (inl (addr+PCNET32_DWIO_RAP) & 0xffff); | 399 | return (inl(addr + PCNET32_DWIO_RAP) & 0xffff); |
| 508 | } | 400 | } |
| 509 | 401 | ||
| 510 | static void pcnet32_dwio_write_rap (unsigned long addr, u16 val) | 402 | static void pcnet32_dwio_write_rap(unsigned long addr, u16 val) |
| 511 | { | 403 | { |
| 512 | outl (val, addr+PCNET32_DWIO_RAP); | 404 | outl(val, addr + PCNET32_DWIO_RAP); |
| 513 | } | 405 | } |
| 514 | 406 | ||
| 515 | static void pcnet32_dwio_reset (unsigned long addr) | 407 | static void pcnet32_dwio_reset(unsigned long addr) |
| 516 | { | 408 | { |
| 517 | inl (addr+PCNET32_DWIO_RESET); | 409 | inl(addr + PCNET32_DWIO_RESET); |
| 518 | } | 410 | } |
| 519 | 411 | ||
| 520 | static int pcnet32_dwio_check (unsigned long addr) | 412 | static int pcnet32_dwio_check(unsigned long addr) |
| 521 | { | 413 | { |
| 522 | outl (88, addr+PCNET32_DWIO_RAP); | 414 | outl(88, addr + PCNET32_DWIO_RAP); |
| 523 | return ((inl (addr+PCNET32_DWIO_RAP) & 0xffff) == 88); | 415 | return ((inl(addr + PCNET32_DWIO_RAP) & 0xffff) == 88); |
| 524 | } | 416 | } |
| 525 | 417 | ||
| 526 | static struct pcnet32_access pcnet32_dwio = { | 418 | static struct pcnet32_access pcnet32_dwio = { |
| 527 | .read_csr = pcnet32_dwio_read_csr, | 419 | .read_csr = pcnet32_dwio_read_csr, |
| 528 | .write_csr = pcnet32_dwio_write_csr, | 420 | .write_csr = pcnet32_dwio_write_csr, |
| 529 | .read_bcr = pcnet32_dwio_read_bcr, | 421 | .read_bcr = pcnet32_dwio_read_bcr, |
| 530 | .write_bcr = pcnet32_dwio_write_bcr, | 422 | .write_bcr = pcnet32_dwio_write_bcr, |
| 531 | .read_rap = pcnet32_dwio_read_rap, | 423 | .read_rap = pcnet32_dwio_read_rap, |
| 532 | .write_rap = pcnet32_dwio_write_rap, | 424 | .write_rap = pcnet32_dwio_write_rap, |
| 533 | .reset = pcnet32_dwio_reset | 425 | .reset = pcnet32_dwio_reset |
| 534 | }; | 426 | }; |
| 535 | 427 | ||
| 536 | #ifdef CONFIG_NET_POLL_CONTROLLER | 428 | #ifdef CONFIG_NET_POLL_CONTROLLER |
| 537 | static void pcnet32_poll_controller(struct net_device *dev) | 429 | static void pcnet32_poll_controller(struct net_device *dev) |
| 538 | { | 430 | { |
| 539 | disable_irq(dev->irq); | 431 | disable_irq(dev->irq); |
| 540 | pcnet32_interrupt(0, dev, NULL); | 432 | pcnet32_interrupt(0, dev, NULL); |
| 541 | enable_irq(dev->irq); | 433 | enable_irq(dev->irq); |
| 542 | } | 434 | } |
| 543 | #endif | 435 | #endif |
| 544 | 436 | ||
| 545 | |||
| 546 | static int pcnet32_get_settings(struct net_device *dev, struct ethtool_cmd *cmd) | 437 | static int pcnet32_get_settings(struct net_device *dev, struct ethtool_cmd *cmd) |
| 547 | { | 438 | { |
| 548 | struct pcnet32_private *lp = dev->priv; | 439 | struct pcnet32_private *lp = dev->priv; |
| 549 | unsigned long flags; | 440 | unsigned long flags; |
| 550 | int r = -EOPNOTSUPP; | 441 | int r = -EOPNOTSUPP; |
| 551 | 442 | ||
| 552 | if (lp->mii) { | 443 | if (lp->mii) { |
| 553 | spin_lock_irqsave(&lp->lock, flags); | 444 | spin_lock_irqsave(&lp->lock, flags); |
| 554 | mii_ethtool_gset(&lp->mii_if, cmd); | 445 | mii_ethtool_gset(&lp->mii_if, cmd); |
| 555 | spin_unlock_irqrestore(&lp->lock, flags); | 446 | spin_unlock_irqrestore(&lp->lock, flags); |
| 556 | r = 0; | 447 | r = 0; |
| 557 | } | 448 | } |
| 558 | return r; | 449 | return r; |
| 559 | } | 450 | } |
| 560 | 451 | ||
| 561 | static int pcnet32_set_settings(struct net_device *dev, struct ethtool_cmd *cmd) | 452 | static int pcnet32_set_settings(struct net_device *dev, struct ethtool_cmd *cmd) |
| 562 | { | 453 | { |
| 563 | struct pcnet32_private *lp = dev->priv; | 454 | struct pcnet32_private *lp = dev->priv; |
| 564 | unsigned long flags; | 455 | unsigned long flags; |
| 565 | int r = -EOPNOTSUPP; | 456 | int r = -EOPNOTSUPP; |
| 566 | 457 | ||
| 567 | if (lp->mii) { | 458 | if (lp->mii) { |
| 568 | spin_lock_irqsave(&lp->lock, flags); | 459 | spin_lock_irqsave(&lp->lock, flags); |
| 569 | r = mii_ethtool_sset(&lp->mii_if, cmd); | 460 | r = mii_ethtool_sset(&lp->mii_if, cmd); |
| 570 | spin_unlock_irqrestore(&lp->lock, flags); | 461 | spin_unlock_irqrestore(&lp->lock, flags); |
| 571 | } | 462 | } |
| 572 | return r; | 463 | return r; |
| 573 | } | 464 | } |
| 574 | 465 | ||
| 575 | static void pcnet32_get_drvinfo(struct net_device *dev, struct ethtool_drvinfo *info) | 466 | static void pcnet32_get_drvinfo(struct net_device *dev, |
| 467 | struct ethtool_drvinfo *info) | ||
| 576 | { | 468 | { |
| 577 | struct pcnet32_private *lp = dev->priv; | 469 | struct pcnet32_private *lp = dev->priv; |
| 578 | 470 | ||
| 579 | strcpy (info->driver, DRV_NAME); | 471 | strcpy(info->driver, DRV_NAME); |
| 580 | strcpy (info->version, DRV_VERSION); | 472 | strcpy(info->version, DRV_VERSION); |
| 581 | if (lp->pci_dev) | 473 | if (lp->pci_dev) |
| 582 | strcpy (info->bus_info, pci_name(lp->pci_dev)); | 474 | strcpy(info->bus_info, pci_name(lp->pci_dev)); |
| 583 | else | 475 | else |
| 584 | sprintf(info->bus_info, "VLB 0x%lx", dev->base_addr); | 476 | sprintf(info->bus_info, "VLB 0x%lx", dev->base_addr); |
| 585 | } | 477 | } |
| 586 | 478 | ||
| 587 | static u32 pcnet32_get_link(struct net_device *dev) | 479 | static u32 pcnet32_get_link(struct net_device *dev) |
| 588 | { | 480 | { |
| 589 | struct pcnet32_private *lp = dev->priv; | 481 | struct pcnet32_private *lp = dev->priv; |
| 590 | unsigned long flags; | 482 | unsigned long flags; |
| 591 | int r; | 483 | int r; |
| 592 | |||
| 593 | spin_lock_irqsave(&lp->lock, flags); | ||
| 594 | if (lp->mii) { | ||
| 595 | r = mii_link_ok(&lp->mii_if); | ||
| 596 | } else { | ||
| 597 | ulong ioaddr = dev->base_addr; /* card base I/O address */ | ||
| 598 | r = (lp->a.read_bcr(ioaddr, 4) != 0xc0); | ||
| 599 | } | ||
| 600 | spin_unlock_irqrestore(&lp->lock, flags); | ||
| 601 | 484 | ||
| 602 | return r; | 485 | spin_lock_irqsave(&lp->lock, flags); |
| 486 | if (lp->mii) { | ||
| 487 | r = mii_link_ok(&lp->mii_if); | ||
| 488 | } else { | ||
| 489 | ulong ioaddr = dev->base_addr; /* card base I/O address */ | ||
| 490 | r = (lp->a.read_bcr(ioaddr, 4) != 0xc0); | ||
| 491 | } | ||
| 492 | spin_unlock_irqrestore(&lp->lock, flags); | ||
| 493 | |||
| 494 | return r; | ||
| 603 | } | 495 | } |
| 604 | 496 | ||
| 605 | static u32 pcnet32_get_msglevel(struct net_device *dev) | 497 | static u32 pcnet32_get_msglevel(struct net_device *dev) |
| 606 | { | 498 | { |
| 607 | struct pcnet32_private *lp = dev->priv; | 499 | struct pcnet32_private *lp = dev->priv; |
| 608 | return lp->msg_enable; | 500 | return lp->msg_enable; |
| 609 | } | 501 | } |
| 610 | 502 | ||
| 611 | static void pcnet32_set_msglevel(struct net_device *dev, u32 value) | 503 | static void pcnet32_set_msglevel(struct net_device *dev, u32 value) |
| 612 | { | 504 | { |
| 613 | struct pcnet32_private *lp = dev->priv; | 505 | struct pcnet32_private *lp = dev->priv; |
| 614 | lp->msg_enable = value; | 506 | lp->msg_enable = value; |
| 615 | } | 507 | } |
| 616 | 508 | ||
| 617 | static int pcnet32_nway_reset(struct net_device *dev) | 509 | static int pcnet32_nway_reset(struct net_device *dev) |
| 618 | { | 510 | { |
| 619 | struct pcnet32_private *lp = dev->priv; | 511 | struct pcnet32_private *lp = dev->priv; |
| 620 | unsigned long flags; | 512 | unsigned long flags; |
| 621 | int r = -EOPNOTSUPP; | 513 | int r = -EOPNOTSUPP; |
| 622 | 514 | ||
| 623 | if (lp->mii) { | 515 | if (lp->mii) { |
| 624 | spin_lock_irqsave(&lp->lock, flags); | 516 | spin_lock_irqsave(&lp->lock, flags); |
| 625 | r = mii_nway_restart(&lp->mii_if); | 517 | r = mii_nway_restart(&lp->mii_if); |
| 626 | spin_unlock_irqrestore(&lp->lock, flags); | 518 | spin_unlock_irqrestore(&lp->lock, flags); |
| 627 | } | 519 | } |
| 628 | return r; | 520 | return r; |
| 629 | } | 521 | } |
| 630 | 522 | ||
| 631 | static void pcnet32_get_ringparam(struct net_device *dev, struct ethtool_ringparam *ering) | 523 | static void pcnet32_get_ringparam(struct net_device *dev, |
| 524 | struct ethtool_ringparam *ering) | ||
| 632 | { | 525 | { |
| 633 | struct pcnet32_private *lp = dev->priv; | 526 | struct pcnet32_private *lp = dev->priv; |
| 634 | 527 | ||
| 635 | ering->tx_max_pending = TX_MAX_RING_SIZE - 1; | 528 | ering->tx_max_pending = TX_MAX_RING_SIZE - 1; |
| 636 | ering->tx_pending = lp->tx_ring_size - 1; | 529 | ering->tx_pending = lp->tx_ring_size - 1; |
| 637 | ering->rx_max_pending = RX_MAX_RING_SIZE - 1; | 530 | ering->rx_max_pending = RX_MAX_RING_SIZE - 1; |
| 638 | ering->rx_pending = lp->rx_ring_size - 1; | 531 | ering->rx_pending = lp->rx_ring_size - 1; |
| 639 | } | 532 | } |
| 640 | 533 | ||
| 641 | static int pcnet32_set_ringparam(struct net_device *dev, struct ethtool_ringparam *ering) | 534 | static int pcnet32_set_ringparam(struct net_device *dev, |
| 535 | struct ethtool_ringparam *ering) | ||
| 642 | { | 536 | { |
| 643 | struct pcnet32_private *lp = dev->priv; | 537 | struct pcnet32_private *lp = dev->priv; |
| 644 | unsigned long flags; | 538 | unsigned long flags; |
| 645 | int i; | 539 | int i; |
| 646 | 540 | ||
| 647 | if (ering->rx_mini_pending || ering->rx_jumbo_pending) | 541 | if (ering->rx_mini_pending || ering->rx_jumbo_pending) |
| 648 | return -EINVAL; | 542 | return -EINVAL; |
| 649 | 543 | ||
| 650 | if (netif_running(dev)) | 544 | if (netif_running(dev)) |
| 651 | pcnet32_close(dev); | 545 | pcnet32_close(dev); |
| 652 | 546 | ||
| 653 | spin_lock_irqsave(&lp->lock, flags); | 547 | spin_lock_irqsave(&lp->lock, flags); |
| 654 | pcnet32_free_ring(dev); | ||
| 655 | lp->tx_ring_size = min(ering->tx_pending, (unsigned int) TX_MAX_RING_SIZE); | ||
| 656 | lp->rx_ring_size = min(ering->rx_pending, (unsigned int) RX_MAX_RING_SIZE); | ||
| 657 | |||
| 658 | /* set the minimum ring size to 4, to allow the loopback test to work | ||
| 659 | * unchanged. | ||
| 660 | */ | ||
| 661 | for (i = 2; i <= PCNET32_LOG_MAX_TX_BUFFERS; i++) { | ||
| 662 | if (lp->tx_ring_size <= (1 << i)) | ||
| 663 | break; | ||
| 664 | } | ||
| 665 | lp->tx_ring_size = (1 << i); | ||
| 666 | lp->tx_mod_mask = lp->tx_ring_size - 1; | ||
| 667 | lp->tx_len_bits = (i << 12); | ||
| 668 | |||
| 669 | for (i = 2; i <= PCNET32_LOG_MAX_RX_BUFFERS; i++) { | ||
| 670 | if (lp->rx_ring_size <= (1 << i)) | ||
| 671 | break; | ||
| 672 | } | ||
| 673 | lp->rx_ring_size = (1 << i); | ||
| 674 | lp->rx_mod_mask = lp->rx_ring_size - 1; | ||
| 675 | lp->rx_len_bits = (i << 4); | ||
| 676 | |||
| 677 | if (pcnet32_alloc_ring(dev, dev->name)) { | ||
| 678 | pcnet32_free_ring(dev); | 548 | pcnet32_free_ring(dev); |
| 679 | spin_unlock_irqrestore(&lp->lock, flags); | 549 | lp->tx_ring_size = |
| 680 | return -ENOMEM; | 550 | min(ering->tx_pending, (unsigned int)TX_MAX_RING_SIZE); |
| 681 | } | 551 | lp->rx_ring_size = |
| 552 | min(ering->rx_pending, (unsigned int)RX_MAX_RING_SIZE); | ||
| 553 | |||
| 554 | /* set the minimum ring size to 4, to allow the loopback test to work | ||
| 555 | * unchanged. | ||
| 556 | */ | ||
| 557 | for (i = 2; i <= PCNET32_LOG_MAX_TX_BUFFERS; i++) { | ||
| 558 | if (lp->tx_ring_size <= (1 << i)) | ||
| 559 | break; | ||
| 560 | } | ||
| 561 | lp->tx_ring_size = (1 << i); | ||
| 562 | lp->tx_mod_mask = lp->tx_ring_size - 1; | ||
| 563 | lp->tx_len_bits = (i << 12); | ||
| 682 | 564 | ||
| 683 | spin_unlock_irqrestore(&lp->lock, flags); | 565 | for (i = 2; i <= PCNET32_LOG_MAX_RX_BUFFERS; i++) { |
| 566 | if (lp->rx_ring_size <= (1 << i)) | ||
| 567 | break; | ||
| 568 | } | ||
| 569 | lp->rx_ring_size = (1 << i); | ||
| 570 | lp->rx_mod_mask = lp->rx_ring_size - 1; | ||
| 571 | lp->rx_len_bits = (i << 4); | ||
| 572 | |||
| 573 | if (pcnet32_alloc_ring(dev, dev->name)) { | ||
| 574 | pcnet32_free_ring(dev); | ||
| 575 | spin_unlock_irqrestore(&lp->lock, flags); | ||
| 576 | return -ENOMEM; | ||
| 577 | } | ||
| 684 | 578 | ||
| 685 | if (pcnet32_debug & NETIF_MSG_DRV) | 579 | spin_unlock_irqrestore(&lp->lock, flags); |
| 686 | printk(KERN_INFO PFX "%s: Ring Param Settings: RX: %d, TX: %d\n", | ||
| 687 | dev->name, lp->rx_ring_size, lp->tx_ring_size); | ||
| 688 | 580 | ||
| 689 | if (netif_running(dev)) | 581 | if (pcnet32_debug & NETIF_MSG_DRV) |
| 690 | pcnet32_open(dev); | 582 | printk(KERN_INFO PFX |
| 583 | "%s: Ring Param Settings: RX: %d, TX: %d\n", dev->name, | ||
| 584 | lp->rx_ring_size, lp->tx_ring_size); | ||
| 691 | 585 | ||
| 692 | return 0; | 586 | if (netif_running(dev)) |
| 587 | pcnet32_open(dev); | ||
| 588 | |||
| 589 | return 0; | ||
| 693 | } | 590 | } |
| 694 | 591 | ||
| 695 | static void pcnet32_get_strings(struct net_device *dev, u32 stringset, u8 *data) | 592 | static void pcnet32_get_strings(struct net_device *dev, u32 stringset, |
| 593 | u8 * data) | ||
| 696 | { | 594 | { |
| 697 | memcpy(data, pcnet32_gstrings_test, sizeof(pcnet32_gstrings_test)); | 595 | memcpy(data, pcnet32_gstrings_test, sizeof(pcnet32_gstrings_test)); |
| 698 | } | 596 | } |
| 699 | 597 | ||
| 700 | static int pcnet32_self_test_count(struct net_device *dev) | 598 | static int pcnet32_self_test_count(struct net_device *dev) |
| 701 | { | 599 | { |
| 702 | return PCNET32_TEST_LEN; | 600 | return PCNET32_TEST_LEN; |
| 703 | } | 601 | } |
| 704 | 602 | ||
| 705 | static void pcnet32_ethtool_test(struct net_device *dev, | 603 | static void pcnet32_ethtool_test(struct net_device *dev, |
| 706 | struct ethtool_test *test, u64 *data) | 604 | struct ethtool_test *test, u64 * data) |
| 707 | { | 605 | { |
| 708 | struct pcnet32_private *lp = dev->priv; | 606 | struct pcnet32_private *lp = dev->priv; |
| 709 | int rc; | 607 | int rc; |
| 710 | 608 | ||
| 711 | if (test->flags == ETH_TEST_FL_OFFLINE) { | 609 | if (test->flags == ETH_TEST_FL_OFFLINE) { |
| 712 | rc = pcnet32_loopback_test(dev, data); | 610 | rc = pcnet32_loopback_test(dev, data); |
| 713 | if (rc) { | 611 | if (rc) { |
| 714 | if (netif_msg_hw(lp)) | 612 | if (netif_msg_hw(lp)) |
| 715 | printk(KERN_DEBUG "%s: Loopback test failed.\n", dev->name); | 613 | printk(KERN_DEBUG "%s: Loopback test failed.\n", |
| 716 | test->flags |= ETH_TEST_FL_FAILED; | 614 | dev->name); |
| 615 | test->flags |= ETH_TEST_FL_FAILED; | ||
| 616 | } else if (netif_msg_hw(lp)) | ||
| 617 | printk(KERN_DEBUG "%s: Loopback test passed.\n", | ||
| 618 | dev->name); | ||
| 717 | } else if (netif_msg_hw(lp)) | 619 | } else if (netif_msg_hw(lp)) |
| 718 | printk(KERN_DEBUG "%s: Loopback test passed.\n", dev->name); | 620 | printk(KERN_DEBUG |
| 719 | } else if (netif_msg_hw(lp)) | 621 | "%s: No tests to run (specify 'Offline' on ethtool).", |
| 720 | printk(KERN_DEBUG "%s: No tests to run (specify 'Offline' on ethtool).", dev->name); | 622 | dev->name); |
| 721 | } /* end pcnet32_ethtool_test */ | 623 | } /* end pcnet32_ethtool_test */ |
| 722 | 624 | ||
| 723 | static int pcnet32_loopback_test(struct net_device *dev, uint64_t *data1) | 625 | static int pcnet32_loopback_test(struct net_device *dev, uint64_t * data1) |
| 724 | { | 626 | { |
| 725 | struct pcnet32_private *lp = dev->priv; | 627 | struct pcnet32_private *lp = dev->priv; |
| 726 | struct pcnet32_access *a = &lp->a; /* access to registers */ | 628 | struct pcnet32_access *a = &lp->a; /* access to registers */ |
| 727 | ulong ioaddr = dev->base_addr; /* card base I/O address */ | 629 | ulong ioaddr = dev->base_addr; /* card base I/O address */ |
| 728 | struct sk_buff *skb; /* sk buff */ | 630 | struct sk_buff *skb; /* sk buff */ |
| 729 | int x, i; /* counters */ | 631 | int x, i; /* counters */ |
| 730 | int numbuffs = 4; /* number of TX/RX buffers and descs */ | 632 | int numbuffs = 4; /* number of TX/RX buffers and descs */ |
| 731 | u16 status = 0x8300; /* TX ring status */ | 633 | u16 status = 0x8300; /* TX ring status */ |
| 732 | u16 teststatus; /* test of ring status */ | 634 | u16 teststatus; /* test of ring status */ |
| 733 | int rc; /* return code */ | 635 | int rc; /* return code */ |
| 734 | int size; /* size of packets */ | 636 | int size; /* size of packets */ |
| 735 | unsigned char *packet; /* source packet data */ | 637 | unsigned char *packet; /* source packet data */ |
| 736 | static const int data_len = 60; /* length of source packets */ | 638 | static const int data_len = 60; /* length of source packets */ |
| 737 | unsigned long flags; | 639 | unsigned long flags; |
| 738 | unsigned long ticks; | 640 | unsigned long ticks; |
| 739 | 641 | ||
| 740 | *data1 = 1; /* status of test, default to fail */ | 642 | *data1 = 1; /* status of test, default to fail */ |
| 741 | rc = 1; /* default to fail */ | 643 | rc = 1; /* default to fail */ |
| 742 | 644 | ||
| 743 | if (netif_running(dev)) | 645 | if (netif_running(dev)) |
| 744 | pcnet32_close(dev); | 646 | pcnet32_close(dev); |
| 745 | 647 | ||
| 746 | spin_lock_irqsave(&lp->lock, flags); | 648 | spin_lock_irqsave(&lp->lock, flags); |
| 747 | 649 | ||
| 748 | /* Reset the PCNET32 */ | 650 | /* Reset the PCNET32 */ |
| 749 | lp->a.reset (ioaddr); | 651 | lp->a.reset(ioaddr); |
| 750 | 652 | ||
| 751 | /* switch pcnet32 to 32bit mode */ | 653 | /* switch pcnet32 to 32bit mode */ |
| 752 | lp->a.write_bcr (ioaddr, 20, 2); | 654 | lp->a.write_bcr(ioaddr, 20, 2); |
| 753 | 655 | ||
| 754 | lp->init_block.mode = le16_to_cpu((lp->options & PCNET32_PORT_PORTSEL) << 7); | 656 | lp->init_block.mode = |
| 755 | lp->init_block.filter[0] = 0; | 657 | le16_to_cpu((lp->options & PCNET32_PORT_PORTSEL) << 7); |
| 756 | lp->init_block.filter[1] = 0; | 658 | lp->init_block.filter[0] = 0; |
| 757 | 659 | lp->init_block.filter[1] = 0; | |
| 758 | /* purge & init rings but don't actually restart */ | 660 | |
| 759 | pcnet32_restart(dev, 0x0000); | 661 | /* purge & init rings but don't actually restart */ |
| 760 | 662 | pcnet32_restart(dev, 0x0000); | |
| 761 | lp->a.write_csr(ioaddr, 0, 0x0004); /* Set STOP bit */ | 663 | |
| 762 | 664 | lp->a.write_csr(ioaddr, 0, 0x0004); /* Set STOP bit */ | |
| 763 | /* Initialize Transmit buffers. */ | 665 | |
| 764 | size = data_len + 15; | 666 | /* Initialize Transmit buffers. */ |
| 765 | for (x=0; x<numbuffs; x++) { | 667 | size = data_len + 15; |
| 766 | if (!(skb = dev_alloc_skb(size))) { | 668 | for (x = 0; x < numbuffs; x++) { |
| 767 | if (netif_msg_hw(lp)) | 669 | if (!(skb = dev_alloc_skb(size))) { |
| 768 | printk(KERN_DEBUG "%s: Cannot allocate skb at line: %d!\n", | 670 | if (netif_msg_hw(lp)) |
| 769 | dev->name, __LINE__); | 671 | printk(KERN_DEBUG |
| 770 | goto clean_up; | 672 | "%s: Cannot allocate skb at line: %d!\n", |
| 771 | } else { | 673 | dev->name, __LINE__); |
| 772 | packet = skb->data; | 674 | goto clean_up; |
| 773 | skb_put(skb, size); /* create space for data */ | 675 | } else { |
| 774 | lp->tx_skbuff[x] = skb; | 676 | packet = skb->data; |
| 775 | lp->tx_ring[x].length = le16_to_cpu(-skb->len); | 677 | skb_put(skb, size); /* create space for data */ |
| 776 | lp->tx_ring[x].misc = 0; | 678 | lp->tx_skbuff[x] = skb; |
| 777 | 679 | lp->tx_ring[x].length = le16_to_cpu(-skb->len); | |
| 778 | /* put DA and SA into the skb */ | 680 | lp->tx_ring[x].misc = 0; |
| 779 | for (i=0; i<6; i++) | 681 | |
| 780 | *packet++ = dev->dev_addr[i]; | 682 | /* put DA and SA into the skb */ |
| 781 | for (i=0; i<6; i++) | 683 | for (i = 0; i < 6; i++) |
| 782 | *packet++ = dev->dev_addr[i]; | 684 | *packet++ = dev->dev_addr[i]; |
| 783 | /* type */ | 685 | for (i = 0; i < 6; i++) |
| 784 | *packet++ = 0x08; | 686 | *packet++ = dev->dev_addr[i]; |
| 785 | *packet++ = 0x06; | 687 | /* type */ |
| 786 | /* packet number */ | 688 | *packet++ = 0x08; |
| 787 | *packet++ = x; | 689 | *packet++ = 0x06; |
| 788 | /* fill packet with data */ | 690 | /* packet number */ |
| 789 | for (i=0; i<data_len; i++) | 691 | *packet++ = x; |
| 790 | *packet++ = i; | 692 | /* fill packet with data */ |
| 791 | 693 | for (i = 0; i < data_len; i++) | |
| 792 | lp->tx_dma_addr[x] = pci_map_single(lp->pci_dev, skb->data, | 694 | *packet++ = i; |
| 793 | skb->len, PCI_DMA_TODEVICE); | 695 | |
| 794 | lp->tx_ring[x].base = (u32)le32_to_cpu(lp->tx_dma_addr[x]); | 696 | lp->tx_dma_addr[x] = |
| 795 | wmb(); /* Make sure owner changes after all others are visible */ | 697 | pci_map_single(lp->pci_dev, skb->data, skb->len, |
| 796 | lp->tx_ring[x].status = le16_to_cpu(status); | 698 | PCI_DMA_TODEVICE); |
| 797 | } | 699 | lp->tx_ring[x].base = |
| 798 | } | 700 | (u32) le32_to_cpu(lp->tx_dma_addr[x]); |
| 799 | 701 | wmb(); /* Make sure owner changes after all others are visible */ | |
| 800 | x = a->read_bcr(ioaddr, 32); /* set internal loopback in BSR32 */ | 702 | lp->tx_ring[x].status = le16_to_cpu(status); |
| 801 | x = x | 0x0002; | 703 | } |
| 802 | a->write_bcr(ioaddr, 32, x); | 704 | } |
| 803 | 705 | ||
| 804 | lp->a.write_csr (ioaddr, 15, 0x0044); /* set int loopback in CSR15 */ | 706 | x = a->read_bcr(ioaddr, 32); /* set internal loopback in BSR32 */ |
| 805 | 707 | x = x | 0x0002; | |
| 806 | teststatus = le16_to_cpu(0x8000); | 708 | a->write_bcr(ioaddr, 32, x); |
| 807 | lp->a.write_csr(ioaddr, 0, 0x0002); /* Set STRT bit */ | 709 | |
| 808 | 710 | lp->a.write_csr(ioaddr, 15, 0x0044); /* set int loopback in CSR15 */ | |
| 809 | /* Check status of descriptors */ | 711 | |
| 810 | for (x=0; x<numbuffs; x++) { | 712 | teststatus = le16_to_cpu(0x8000); |
| 811 | ticks = 0; | 713 | lp->a.write_csr(ioaddr, 0, 0x0002); /* Set STRT bit */ |
| 812 | rmb(); | 714 | |
| 813 | while ((lp->rx_ring[x].status & teststatus) && (ticks < 200)) { | 715 | /* Check status of descriptors */ |
| 814 | spin_unlock_irqrestore(&lp->lock, flags); | 716 | for (x = 0; x < numbuffs; x++) { |
| 815 | mdelay(1); | 717 | ticks = 0; |
| 816 | spin_lock_irqsave(&lp->lock, flags); | 718 | rmb(); |
| 817 | rmb(); | 719 | while ((lp->rx_ring[x].status & teststatus) && (ticks < 200)) { |
| 818 | ticks++; | 720 | spin_unlock_irqrestore(&lp->lock, flags); |
| 819 | } | 721 | mdelay(1); |
| 820 | if (ticks == 200) { | 722 | spin_lock_irqsave(&lp->lock, flags); |
| 821 | if (netif_msg_hw(lp)) | 723 | rmb(); |
| 822 | printk("%s: Desc %d failed to reset!\n",dev->name,x); | 724 | ticks++; |
| 823 | break; | 725 | } |
| 824 | } | 726 | if (ticks == 200) { |
| 825 | } | 727 | if (netif_msg_hw(lp)) |
| 826 | 728 | printk("%s: Desc %d failed to reset!\n", | |
| 827 | lp->a.write_csr(ioaddr, 0, 0x0004); /* Set STOP bit */ | 729 | dev->name, x); |
| 828 | wmb(); | 730 | break; |
| 829 | if (netif_msg_hw(lp) && netif_msg_pktdata(lp)) { | 731 | } |
| 830 | printk(KERN_DEBUG "%s: RX loopback packets:\n", dev->name); | 732 | } |
| 831 | 733 | ||
| 832 | for (x=0; x<numbuffs; x++) { | 734 | lp->a.write_csr(ioaddr, 0, 0x0004); /* Set STOP bit */ |
| 833 | printk(KERN_DEBUG "%s: Packet %d:\n", dev->name, x); | 735 | wmb(); |
| 834 | skb = lp->rx_skbuff[x]; | 736 | if (netif_msg_hw(lp) && netif_msg_pktdata(lp)) { |
| 835 | for (i=0; i<size; i++) { | 737 | printk(KERN_DEBUG "%s: RX loopback packets:\n", dev->name); |
| 836 | printk("%02x ", *(skb->data+i)); | 738 | |
| 837 | } | 739 | for (x = 0; x < numbuffs; x++) { |
| 838 | printk("\n"); | 740 | printk(KERN_DEBUG "%s: Packet %d:\n", dev->name, x); |
| 839 | } | 741 | skb = lp->rx_skbuff[x]; |
| 840 | } | 742 | for (i = 0; i < size; i++) { |
| 841 | 743 | printk("%02x ", *(skb->data + i)); | |
| 842 | x = 0; | 744 | } |
| 843 | rc = 0; | 745 | printk("\n"); |
| 844 | while (x<numbuffs && !rc) { | 746 | } |
| 845 | skb = lp->rx_skbuff[x]; | 747 | } |
| 846 | packet = lp->tx_skbuff[x]->data; | 748 | |
| 847 | for (i=0; i<size; i++) { | 749 | x = 0; |
| 848 | if (*(skb->data+i) != packet[i]) { | 750 | rc = 0; |
| 849 | if (netif_msg_hw(lp)) | 751 | while (x < numbuffs && !rc) { |
| 850 | printk(KERN_DEBUG "%s: Error in compare! %2x - %02x %02x\n", | 752 | skb = lp->rx_skbuff[x]; |
| 851 | dev->name, i, *(skb->data+i), packet[i]); | 753 | packet = lp->tx_skbuff[x]->data; |
| 852 | rc = 1; | 754 | for (i = 0; i < size; i++) { |
| 853 | break; | 755 | if (*(skb->data + i) != packet[i]) { |
| 854 | } | 756 | if (netif_msg_hw(lp)) |
| 757 | printk(KERN_DEBUG | ||
| 758 | "%s: Error in compare! %2x - %02x %02x\n", | ||
| 759 | dev->name, i, *(skb->data + i), | ||
| 760 | packet[i]); | ||
| 761 | rc = 1; | ||
| 762 | break; | ||
| 763 | } | ||
| 764 | } | ||
| 765 | x++; | ||
| 766 | } | ||
| 767 | if (!rc) { | ||
| 768 | *data1 = 0; | ||
| 855 | } | 769 | } |
| 856 | x++; | ||
| 857 | } | ||
| 858 | if (!rc) { | ||
| 859 | *data1 = 0; | ||
| 860 | } | ||
| 861 | 770 | ||
| 862 | clean_up: | 771 | clean_up: |
| 863 | pcnet32_purge_tx_ring(dev); | 772 | pcnet32_purge_tx_ring(dev); |
| 864 | x = a->read_csr(ioaddr, 15) & 0xFFFF; | 773 | x = a->read_csr(ioaddr, 15) & 0xFFFF; |
| 865 | a->write_csr(ioaddr, 15, (x & ~0x0044)); /* reset bits 6 and 2 */ | 774 | a->write_csr(ioaddr, 15, (x & ~0x0044)); /* reset bits 6 and 2 */ |
| 866 | 775 | ||
| 867 | x = a->read_bcr(ioaddr, 32); /* reset internal loopback */ | 776 | x = a->read_bcr(ioaddr, 32); /* reset internal loopback */ |
| 868 | x = x & ~0x0002; | 777 | x = x & ~0x0002; |
| 869 | a->write_bcr(ioaddr, 32, x); | 778 | a->write_bcr(ioaddr, 32, x); |
| 870 | 779 | ||
| 871 | spin_unlock_irqrestore(&lp->lock, flags); | 780 | spin_unlock_irqrestore(&lp->lock, flags); |
| 872 | 781 | ||
| 873 | if (netif_running(dev)) { | 782 | if (netif_running(dev)) { |
| 874 | pcnet32_open(dev); | 783 | pcnet32_open(dev); |
| 875 | } else { | 784 | } else { |
| 876 | lp->a.write_bcr (ioaddr, 20, 4); /* return to 16bit mode */ | 785 | lp->a.write_bcr(ioaddr, 20, 4); /* return to 16bit mode */ |
| 877 | } | 786 | } |
| 878 | 787 | ||
| 879 | return(rc); | 788 | return (rc); |
| 880 | } /* end pcnet32_loopback_test */ | 789 | } /* end pcnet32_loopback_test */ |
| 881 | 790 | ||
| 882 | static void pcnet32_led_blink_callback(struct net_device *dev) | 791 | static void pcnet32_led_blink_callback(struct net_device *dev) |
| 883 | { | 792 | { |
| 884 | struct pcnet32_private *lp = dev->priv; | 793 | struct pcnet32_private *lp = dev->priv; |
| 885 | struct pcnet32_access *a = &lp->a; | 794 | struct pcnet32_access *a = &lp->a; |
| 886 | ulong ioaddr = dev->base_addr; | 795 | ulong ioaddr = dev->base_addr; |
| 887 | unsigned long flags; | 796 | unsigned long flags; |
| 888 | int i; | 797 | int i; |
| 889 | 798 | ||
| 890 | spin_lock_irqsave(&lp->lock, flags); | 799 | spin_lock_irqsave(&lp->lock, flags); |
| 891 | for (i=4; i<8; i++) { | 800 | for (i = 4; i < 8; i++) { |
| 892 | a->write_bcr(ioaddr, i, a->read_bcr(ioaddr, i) ^ 0x4000); | 801 | a->write_bcr(ioaddr, i, a->read_bcr(ioaddr, i) ^ 0x4000); |
| 893 | } | 802 | } |
| 894 | spin_unlock_irqrestore(&lp->lock, flags); | 803 | spin_unlock_irqrestore(&lp->lock, flags); |
| 895 | 804 | ||
| 896 | mod_timer(&lp->blink_timer, PCNET32_BLINK_TIMEOUT); | 805 | mod_timer(&lp->blink_timer, PCNET32_BLINK_TIMEOUT); |
| 897 | } | 806 | } |
| 898 | 807 | ||
| 899 | static int pcnet32_phys_id(struct net_device *dev, u32 data) | 808 | static int pcnet32_phys_id(struct net_device *dev, u32 data) |
| 900 | { | 809 | { |
| 901 | struct pcnet32_private *lp = dev->priv; | 810 | struct pcnet32_private *lp = dev->priv; |
| 902 | struct pcnet32_access *a = &lp->a; | 811 | struct pcnet32_access *a = &lp->a; |
| 903 | ulong ioaddr = dev->base_addr; | 812 | ulong ioaddr = dev->base_addr; |
| 904 | unsigned long flags; | 813 | unsigned long flags; |
| 905 | int i, regs[4]; | 814 | int i, regs[4]; |
| 906 | 815 | ||
| 907 | if (!lp->blink_timer.function) { | 816 | if (!lp->blink_timer.function) { |
| 908 | init_timer(&lp->blink_timer); | 817 | init_timer(&lp->blink_timer); |
| 909 | lp->blink_timer.function = (void *) pcnet32_led_blink_callback; | 818 | lp->blink_timer.function = (void *)pcnet32_led_blink_callback; |
| 910 | lp->blink_timer.data = (unsigned long) dev; | 819 | lp->blink_timer.data = (unsigned long)dev; |
| 911 | } | 820 | } |
| 912 | 821 | ||
| 913 | /* Save the current value of the bcrs */ | 822 | /* Save the current value of the bcrs */ |
| 914 | spin_lock_irqsave(&lp->lock, flags); | 823 | spin_lock_irqsave(&lp->lock, flags); |
| 915 | for (i=4; i<8; i++) { | 824 | for (i = 4; i < 8; i++) { |
| 916 | regs[i-4] = a->read_bcr(ioaddr, i); | 825 | regs[i - 4] = a->read_bcr(ioaddr, i); |
| 917 | } | 826 | } |
| 918 | spin_unlock_irqrestore(&lp->lock, flags); | 827 | spin_unlock_irqrestore(&lp->lock, flags); |
| 919 | 828 | ||
| 920 | mod_timer(&lp->blink_timer, jiffies); | 829 | mod_timer(&lp->blink_timer, jiffies); |
| 921 | set_current_state(TASK_INTERRUPTIBLE); | 830 | set_current_state(TASK_INTERRUPTIBLE); |
| 922 | 831 | ||
| 923 | if ((!data) || (data > (u32)(MAX_SCHEDULE_TIMEOUT / HZ))) | 832 | if ((!data) || (data > (u32) (MAX_SCHEDULE_TIMEOUT / HZ))) |
| 924 | data = (u32)(MAX_SCHEDULE_TIMEOUT / HZ); | 833 | data = (u32) (MAX_SCHEDULE_TIMEOUT / HZ); |
| 925 | 834 | ||
| 926 | msleep_interruptible(data * 1000); | 835 | msleep_interruptible(data * 1000); |
| 927 | del_timer_sync(&lp->blink_timer); | 836 | del_timer_sync(&lp->blink_timer); |
| 928 | 837 | ||
| 929 | /* Restore the original value of the bcrs */ | 838 | /* Restore the original value of the bcrs */ |
| 930 | spin_lock_irqsave(&lp->lock, flags); | 839 | spin_lock_irqsave(&lp->lock, flags); |
| 931 | for (i=4; i<8; i++) { | 840 | for (i = 4; i < 8; i++) { |
| 932 | a->write_bcr(ioaddr, i, regs[i-4]); | 841 | a->write_bcr(ioaddr, i, regs[i - 4]); |
| 933 | } | 842 | } |
| 934 | spin_unlock_irqrestore(&lp->lock, flags); | 843 | spin_unlock_irqrestore(&lp->lock, flags); |
| 935 | 844 | ||
| 936 | return 0; | 845 | return 0; |
| 937 | } | 846 | } |
| 938 | 847 | ||
| 848 | #define PCNET32_REGS_PER_PHY 32 | ||
| 849 | #define PCNET32_MAX_PHYS 32 | ||
| 939 | static int pcnet32_get_regs_len(struct net_device *dev) | 850 | static int pcnet32_get_regs_len(struct net_device *dev) |
| 940 | { | 851 | { |
| 941 | return(PCNET32_NUM_REGS * sizeof(u16)); | 852 | struct pcnet32_private *lp = dev->priv; |
| 853 | int j = lp->phycount * PCNET32_REGS_PER_PHY; | ||
| 854 | |||
| 855 | return ((PCNET32_NUM_REGS + j) * sizeof(u16)); | ||
| 942 | } | 856 | } |
| 943 | 857 | ||
| 944 | static void pcnet32_get_regs(struct net_device *dev, struct ethtool_regs *regs, | 858 | static void pcnet32_get_regs(struct net_device *dev, struct ethtool_regs *regs, |
| 945 | void *ptr) | 859 | void *ptr) |
| 946 | { | 860 | { |
| 947 | int i, csr0; | 861 | int i, csr0; |
| 948 | u16 *buff = ptr; | 862 | u16 *buff = ptr; |
| 949 | struct pcnet32_private *lp = dev->priv; | 863 | struct pcnet32_private *lp = dev->priv; |
| 950 | struct pcnet32_access *a = &lp->a; | 864 | struct pcnet32_access *a = &lp->a; |
| 951 | ulong ioaddr = dev->base_addr; | 865 | ulong ioaddr = dev->base_addr; |
| 952 | int ticks; | 866 | int ticks; |
| 953 | unsigned long flags; | 867 | unsigned long flags; |
| 954 | |||
| 955 | spin_lock_irqsave(&lp->lock, flags); | ||
| 956 | |||
| 957 | csr0 = a->read_csr(ioaddr, 0); | ||
| 958 | if (!(csr0 & 0x0004)) { /* If not stopped */ | ||
| 959 | /* set SUSPEND (SPND) - CSR5 bit 0 */ | ||
| 960 | a->write_csr(ioaddr, 5, 0x0001); | ||
| 961 | |||
| 962 | /* poll waiting for bit to be set */ | ||
| 963 | ticks = 0; | ||
| 964 | while (!(a->read_csr(ioaddr, 5) & 0x0001)) { | ||
| 965 | spin_unlock_irqrestore(&lp->lock, flags); | ||
| 966 | mdelay(1); | ||
| 967 | spin_lock_irqsave(&lp->lock, flags); | ||
| 968 | ticks++; | ||
| 969 | if (ticks > 200) { | ||
| 970 | if (netif_msg_hw(lp)) | ||
| 971 | printk(KERN_DEBUG "%s: Error getting into suspend!\n", | ||
| 972 | dev->name); | ||
| 973 | break; | ||
| 974 | } | ||
| 975 | } | ||
| 976 | } | ||
| 977 | 868 | ||
| 978 | /* read address PROM */ | 869 | spin_lock_irqsave(&lp->lock, flags); |
| 979 | for (i=0; i<16; i += 2) | ||
| 980 | *buff++ = inw(ioaddr + i); | ||
| 981 | 870 | ||
| 982 | /* read control and status registers */ | 871 | csr0 = a->read_csr(ioaddr, 0); |
| 983 | for (i=0; i<90; i++) { | 872 | if (!(csr0 & 0x0004)) { /* If not stopped */ |
| 984 | *buff++ = a->read_csr(ioaddr, i); | 873 | /* set SUSPEND (SPND) - CSR5 bit 0 */ |
| 985 | } | 874 | a->write_csr(ioaddr, 5, 0x0001); |
| 875 | |||
| 876 | /* poll waiting for bit to be set */ | ||
| 877 | ticks = 0; | ||
| 878 | while (!(a->read_csr(ioaddr, 5) & 0x0001)) { | ||
| 879 | spin_unlock_irqrestore(&lp->lock, flags); | ||
| 880 | mdelay(1); | ||
| 881 | spin_lock_irqsave(&lp->lock, flags); | ||
| 882 | ticks++; | ||
| 883 | if (ticks > 200) { | ||
| 884 | if (netif_msg_hw(lp)) | ||
| 885 | printk(KERN_DEBUG | ||
| 886 | "%s: Error getting into suspend!\n", | ||
| 887 | dev->name); | ||
| 888 | break; | ||
| 889 | } | ||
| 890 | } | ||
| 891 | } | ||
| 986 | 892 | ||
| 987 | *buff++ = a->read_csr(ioaddr, 112); | 893 | /* read address PROM */ |
| 988 | *buff++ = a->read_csr(ioaddr, 114); | 894 | for (i = 0; i < 16; i += 2) |
| 895 | *buff++ = inw(ioaddr + i); | ||
| 989 | 896 | ||
| 990 | /* read bus configuration registers */ | 897 | /* read control and status registers */ |
| 991 | for (i=0; i<30; i++) { | 898 | for (i = 0; i < 90; i++) { |
| 992 | *buff++ = a->read_bcr(ioaddr, i); | 899 | *buff++ = a->read_csr(ioaddr, i); |
| 993 | } | 900 | } |
| 994 | *buff++ = 0; /* skip bcr30 so as not to hang 79C976 */ | 901 | |
| 995 | for (i=31; i<36; i++) { | 902 | *buff++ = a->read_csr(ioaddr, 112); |
| 996 | *buff++ = a->read_bcr(ioaddr, i); | 903 | *buff++ = a->read_csr(ioaddr, 114); |
| 997 | } | ||
| 998 | 904 | ||
| 999 | /* read mii phy registers */ | 905 | /* read bus configuration registers */ |
| 1000 | if (lp->mii) { | 906 | for (i = 0; i < 30; i++) { |
| 1001 | for (i=0; i<32; i++) { | 907 | *buff++ = a->read_bcr(ioaddr, i); |
| 1002 | lp->a.write_bcr(ioaddr, 33, ((lp->mii_if.phy_id) << 5) | i); | 908 | } |
| 1003 | *buff++ = lp->a.read_bcr(ioaddr, 34); | 909 | *buff++ = 0; /* skip bcr30 so as not to hang 79C976 */ |
| 910 | for (i = 31; i < 36; i++) { | ||
| 911 | *buff++ = a->read_bcr(ioaddr, i); | ||
| 1004 | } | 912 | } |
| 1005 | } | ||
| 1006 | 913 | ||
| 1007 | if (!(csr0 & 0x0004)) { /* If not stopped */ | 914 | /* read mii phy registers */ |
| 1008 | /* clear SUSPEND (SPND) - CSR5 bit 0 */ | 915 | if (lp->mii) { |
| 1009 | a->write_csr(ioaddr, 5, 0x0000); | 916 | int j; |
| 1010 | } | 917 | for (j = 0; j < PCNET32_MAX_PHYS; j++) { |
| 918 | if (lp->phymask & (1 << j)) { | ||
| 919 | for (i = 0; i < PCNET32_REGS_PER_PHY; i++) { | ||
| 920 | lp->a.write_bcr(ioaddr, 33, | ||
| 921 | (j << 5) | i); | ||
| 922 | *buff++ = lp->a.read_bcr(ioaddr, 34); | ||
| 923 | } | ||
| 924 | } | ||
| 925 | } | ||
| 926 | } | ||
| 1011 | 927 | ||
| 1012 | i = buff - (u16 *)ptr; | 928 | if (!(csr0 & 0x0004)) { /* If not stopped */ |
| 1013 | for (; i < PCNET32_NUM_REGS; i++) | 929 | /* clear SUSPEND (SPND) - CSR5 bit 0 */ |
| 1014 | *buff++ = 0; | 930 | a->write_csr(ioaddr, 5, 0x0000); |
| 931 | } | ||
| 1015 | 932 | ||
| 1016 | spin_unlock_irqrestore(&lp->lock, flags); | 933 | spin_unlock_irqrestore(&lp->lock, flags); |
| 1017 | } | 934 | } |
| 1018 | 935 | ||
| 1019 | static struct ethtool_ops pcnet32_ethtool_ops = { | 936 | static struct ethtool_ops pcnet32_ethtool_ops = { |
| 1020 | .get_settings = pcnet32_get_settings, | 937 | .get_settings = pcnet32_get_settings, |
| 1021 | .set_settings = pcnet32_set_settings, | 938 | .set_settings = pcnet32_set_settings, |
| 1022 | .get_drvinfo = pcnet32_get_drvinfo, | 939 | .get_drvinfo = pcnet32_get_drvinfo, |
| 1023 | .get_msglevel = pcnet32_get_msglevel, | 940 | .get_msglevel = pcnet32_get_msglevel, |
| 1024 | .set_msglevel = pcnet32_set_msglevel, | 941 | .set_msglevel = pcnet32_set_msglevel, |
| 1025 | .nway_reset = pcnet32_nway_reset, | 942 | .nway_reset = pcnet32_nway_reset, |
| 1026 | .get_link = pcnet32_get_link, | 943 | .get_link = pcnet32_get_link, |
| 1027 | .get_ringparam = pcnet32_get_ringparam, | 944 | .get_ringparam = pcnet32_get_ringparam, |
| 1028 | .set_ringparam = pcnet32_set_ringparam, | 945 | .set_ringparam = pcnet32_set_ringparam, |
| 1029 | .get_tx_csum = ethtool_op_get_tx_csum, | 946 | .get_tx_csum = ethtool_op_get_tx_csum, |
| 1030 | .get_sg = ethtool_op_get_sg, | 947 | .get_sg = ethtool_op_get_sg, |
| 1031 | .get_tso = ethtool_op_get_tso, | 948 | .get_tso = ethtool_op_get_tso, |
| 1032 | .get_strings = pcnet32_get_strings, | 949 | .get_strings = pcnet32_get_strings, |
| 1033 | .self_test_count = pcnet32_self_test_count, | 950 | .self_test_count = pcnet32_self_test_count, |
| 1034 | .self_test = pcnet32_ethtool_test, | 951 | .self_test = pcnet32_ethtool_test, |
| 1035 | .phys_id = pcnet32_phys_id, | 952 | .phys_id = pcnet32_phys_id, |
| 1036 | .get_regs_len = pcnet32_get_regs_len, | 953 | .get_regs_len = pcnet32_get_regs_len, |
| 1037 | .get_regs = pcnet32_get_regs, | 954 | .get_regs = pcnet32_get_regs, |
| 1038 | .get_perm_addr = ethtool_op_get_perm_addr, | 955 | .get_perm_addr = ethtool_op_get_perm_addr, |
| 1039 | }; | 956 | }; |
| 1040 | 957 | ||
| 1041 | /* only probes for non-PCI devices, the rest are handled by | 958 | /* only probes for non-PCI devices, the rest are handled by |
| 1042 | * pci_register_driver via pcnet32_probe_pci */ | 959 | * pci_register_driver via pcnet32_probe_pci */ |
| 1043 | 960 | ||
| 1044 | static void __devinit | 961 | static void __devinit pcnet32_probe_vlbus(void) |
| 1045 | pcnet32_probe_vlbus(void) | ||
| 1046 | { | 962 | { |
| 1047 | unsigned int *port, ioaddr; | 963 | unsigned int *port, ioaddr; |
| 1048 | 964 | ||
| 1049 | /* search for PCnet32 VLB cards at known addresses */ | 965 | /* search for PCnet32 VLB cards at known addresses */ |
| 1050 | for (port = pcnet32_portlist; (ioaddr = *port); port++) { | 966 | for (port = pcnet32_portlist; (ioaddr = *port); port++) { |
| 1051 | if (request_region(ioaddr, PCNET32_TOTAL_SIZE, "pcnet32_probe_vlbus")) { | 967 | if (request_region |
| 1052 | /* check if there is really a pcnet chip on that ioaddr */ | 968 | (ioaddr, PCNET32_TOTAL_SIZE, "pcnet32_probe_vlbus")) { |
| 1053 | if ((inb(ioaddr + 14) == 0x57) && (inb(ioaddr + 15) == 0x57)) { | 969 | /* check if there is really a pcnet chip on that ioaddr */ |
| 1054 | pcnet32_probe1(ioaddr, 0, NULL); | 970 | if ((inb(ioaddr + 14) == 0x57) |
| 1055 | } else { | 971 | && (inb(ioaddr + 15) == 0x57)) { |
| 1056 | release_region(ioaddr, PCNET32_TOTAL_SIZE); | 972 | pcnet32_probe1(ioaddr, 0, NULL); |
| 1057 | } | 973 | } else { |
| 1058 | } | 974 | release_region(ioaddr, PCNET32_TOTAL_SIZE); |
| 1059 | } | 975 | } |
| 976 | } | ||
| 977 | } | ||
| 1060 | } | 978 | } |
| 1061 | 979 | ||
| 1062 | |||
| 1063 | static int __devinit | 980 | static int __devinit |
| 1064 | pcnet32_probe_pci(struct pci_dev *pdev, const struct pci_device_id *ent) | 981 | pcnet32_probe_pci(struct pci_dev *pdev, const struct pci_device_id *ent) |
| 1065 | { | 982 | { |
| 1066 | unsigned long ioaddr; | 983 | unsigned long ioaddr; |
| 1067 | int err; | 984 | int err; |
| 1068 | 985 | ||
| 1069 | err = pci_enable_device(pdev); | 986 | err = pci_enable_device(pdev); |
| 1070 | if (err < 0) { | 987 | if (err < 0) { |
| 1071 | if (pcnet32_debug & NETIF_MSG_PROBE) | 988 | if (pcnet32_debug & NETIF_MSG_PROBE) |
| 1072 | printk(KERN_ERR PFX "failed to enable device -- err=%d\n", err); | 989 | printk(KERN_ERR PFX |
| 1073 | return err; | 990 | "failed to enable device -- err=%d\n", err); |
| 1074 | } | 991 | return err; |
| 1075 | pci_set_master(pdev); | 992 | } |
| 993 | pci_set_master(pdev); | ||
| 994 | |||
| 995 | ioaddr = pci_resource_start(pdev, 0); | ||
| 996 | if (!ioaddr) { | ||
| 997 | if (pcnet32_debug & NETIF_MSG_PROBE) | ||
| 998 | printk(KERN_ERR PFX | ||
| 999 | "card has no PCI IO resources, aborting\n"); | ||
| 1000 | return -ENODEV; | ||
| 1001 | } | ||
| 1076 | 1002 | ||
| 1077 | ioaddr = pci_resource_start (pdev, 0); | 1003 | if (!pci_dma_supported(pdev, PCNET32_DMA_MASK)) { |
| 1078 | if (!ioaddr) { | 1004 | if (pcnet32_debug & NETIF_MSG_PROBE) |
| 1079 | if (pcnet32_debug & NETIF_MSG_PROBE) | 1005 | printk(KERN_ERR PFX |
| 1080 | printk (KERN_ERR PFX "card has no PCI IO resources, aborting\n"); | 1006 | "architecture does not support 32bit PCI busmaster DMA\n"); |
| 1081 | return -ENODEV; | 1007 | return -ENODEV; |
| 1082 | } | 1008 | } |
| 1009 | if (request_region(ioaddr, PCNET32_TOTAL_SIZE, "pcnet32_probe_pci") == | ||
| 1010 | NULL) { | ||
| 1011 | if (pcnet32_debug & NETIF_MSG_PROBE) | ||
| 1012 | printk(KERN_ERR PFX | ||
| 1013 | "io address range already allocated\n"); | ||
| 1014 | return -EBUSY; | ||
| 1015 | } | ||
| 1083 | 1016 | ||
| 1084 | if (!pci_dma_supported(pdev, PCNET32_DMA_MASK)) { | 1017 | err = pcnet32_probe1(ioaddr, 1, pdev); |
| 1085 | if (pcnet32_debug & NETIF_MSG_PROBE) | 1018 | if (err < 0) { |
| 1086 | printk(KERN_ERR PFX "architecture does not support 32bit PCI busmaster DMA\n"); | 1019 | pci_disable_device(pdev); |
| 1087 | return -ENODEV; | 1020 | } |
| 1088 | } | 1021 | return err; |
| 1089 | if (request_region(ioaddr, PCNET32_TOTAL_SIZE, "pcnet32_probe_pci") == NULL) { | ||
| 1090 | if (pcnet32_debug & NETIF_MSG_PROBE) | ||
| 1091 | printk(KERN_ERR PFX "io address range already allocated\n"); | ||
| 1092 | return -EBUSY; | ||
| 1093 | } | ||
| 1094 | |||
| 1095 | err = pcnet32_probe1(ioaddr, 1, pdev); | ||
| 1096 | if (err < 0) { | ||
| 1097 | pci_disable_device(pdev); | ||
| 1098 | } | ||
| 1099 | return err; | ||
| 1100 | } | 1022 | } |
| 1101 | 1023 | ||
| 1102 | |||
| 1103 | /* pcnet32_probe1 | 1024 | /* pcnet32_probe1 |
| 1104 | * Called from both pcnet32_probe_vlbus and pcnet_probe_pci. | 1025 | * Called from both pcnet32_probe_vlbus and pcnet_probe_pci. |
| 1105 | * pdev will be NULL when called from pcnet32_probe_vlbus. | 1026 | * pdev will be NULL when called from pcnet32_probe_vlbus. |
| @@ -1107,630 +1028,764 @@ pcnet32_probe_pci(struct pci_dev *pdev, const struct pci_device_id *ent) | |||
| 1107 | static int __devinit | 1028 | static int __devinit |
| 1108 | pcnet32_probe1(unsigned long ioaddr, int shared, struct pci_dev *pdev) | 1029 | pcnet32_probe1(unsigned long ioaddr, int shared, struct pci_dev *pdev) |
| 1109 | { | 1030 | { |
| 1110 | struct pcnet32_private *lp; | 1031 | struct pcnet32_private *lp; |
| 1111 | dma_addr_t lp_dma_addr; | 1032 | dma_addr_t lp_dma_addr; |
| 1112 | int i, media; | 1033 | int i, media; |
| 1113 | int fdx, mii, fset, dxsuflo; | 1034 | int fdx, mii, fset, dxsuflo; |
| 1114 | int chip_version; | 1035 | int chip_version; |
| 1115 | char *chipname; | 1036 | char *chipname; |
| 1116 | struct net_device *dev; | 1037 | struct net_device *dev; |
| 1117 | struct pcnet32_access *a = NULL; | 1038 | struct pcnet32_access *a = NULL; |
| 1118 | u8 promaddr[6]; | 1039 | u8 promaddr[6]; |
| 1119 | int ret = -ENODEV; | 1040 | int ret = -ENODEV; |
| 1120 | 1041 | ||
| 1121 | /* reset the chip */ | 1042 | /* reset the chip */ |
| 1122 | pcnet32_wio_reset(ioaddr); | 1043 | pcnet32_wio_reset(ioaddr); |
| 1123 | 1044 | ||
| 1124 | /* NOTE: 16-bit check is first, otherwise some older PCnet chips fail */ | 1045 | /* NOTE: 16-bit check is first, otherwise some older PCnet chips fail */ |
| 1125 | if (pcnet32_wio_read_csr(ioaddr, 0) == 4 && pcnet32_wio_check(ioaddr)) { | 1046 | if (pcnet32_wio_read_csr(ioaddr, 0) == 4 && pcnet32_wio_check(ioaddr)) { |
| 1126 | a = &pcnet32_wio; | 1047 | a = &pcnet32_wio; |
| 1127 | } else { | 1048 | } else { |
| 1128 | pcnet32_dwio_reset(ioaddr); | 1049 | pcnet32_dwio_reset(ioaddr); |
| 1129 | if (pcnet32_dwio_read_csr(ioaddr, 0) == 4 && pcnet32_dwio_check(ioaddr)) { | 1050 | if (pcnet32_dwio_read_csr(ioaddr, 0) == 4 |
| 1130 | a = &pcnet32_dwio; | 1051 | && pcnet32_dwio_check(ioaddr)) { |
| 1131 | } else | 1052 | a = &pcnet32_dwio; |
| 1132 | goto err_release_region; | 1053 | } else |
| 1133 | } | 1054 | goto err_release_region; |
| 1134 | 1055 | } | |
| 1135 | chip_version = a->read_csr(ioaddr, 88) | (a->read_csr(ioaddr,89) << 16); | 1056 | |
| 1136 | if ((pcnet32_debug & NETIF_MSG_PROBE) && (pcnet32_debug & NETIF_MSG_HW)) | 1057 | chip_version = |
| 1137 | printk(KERN_INFO " PCnet chip version is %#x.\n", chip_version); | 1058 | a->read_csr(ioaddr, 88) | (a->read_csr(ioaddr, 89) << 16); |
| 1138 | if ((chip_version & 0xfff) != 0x003) { | 1059 | if ((pcnet32_debug & NETIF_MSG_PROBE) && (pcnet32_debug & NETIF_MSG_HW)) |
| 1139 | if (pcnet32_debug & NETIF_MSG_PROBE) | 1060 | printk(KERN_INFO " PCnet chip version is %#x.\n", |
| 1140 | printk(KERN_INFO PFX "Unsupported chip version.\n"); | 1061 | chip_version); |
| 1141 | goto err_release_region; | 1062 | if ((chip_version & 0xfff) != 0x003) { |
| 1142 | } | 1063 | if (pcnet32_debug & NETIF_MSG_PROBE) |
| 1143 | 1064 | printk(KERN_INFO PFX "Unsupported chip version.\n"); | |
| 1144 | /* initialize variables */ | 1065 | goto err_release_region; |
| 1145 | fdx = mii = fset = dxsuflo = 0; | 1066 | } |
| 1146 | chip_version = (chip_version >> 12) & 0xffff; | 1067 | |
| 1147 | 1068 | /* initialize variables */ | |
| 1148 | switch (chip_version) { | 1069 | fdx = mii = fset = dxsuflo = 0; |
| 1149 | case 0x2420: | 1070 | chip_version = (chip_version >> 12) & 0xffff; |
| 1150 | chipname = "PCnet/PCI 79C970"; /* PCI */ | 1071 | |
| 1151 | break; | 1072 | switch (chip_version) { |
| 1152 | case 0x2430: | 1073 | case 0x2420: |
| 1153 | if (shared) | 1074 | chipname = "PCnet/PCI 79C970"; /* PCI */ |
| 1154 | chipname = "PCnet/PCI 79C970"; /* 970 gives the wrong chip id back */ | 1075 | break; |
| 1155 | else | 1076 | case 0x2430: |
| 1156 | chipname = "PCnet/32 79C965"; /* 486/VL bus */ | 1077 | if (shared) |
| 1157 | break; | 1078 | chipname = "PCnet/PCI 79C970"; /* 970 gives the wrong chip id back */ |
| 1158 | case 0x2621: | 1079 | else |
| 1159 | chipname = "PCnet/PCI II 79C970A"; /* PCI */ | 1080 | chipname = "PCnet/32 79C965"; /* 486/VL bus */ |
| 1160 | fdx = 1; | 1081 | break; |
| 1161 | break; | 1082 | case 0x2621: |
| 1162 | case 0x2623: | 1083 | chipname = "PCnet/PCI II 79C970A"; /* PCI */ |
| 1163 | chipname = "PCnet/FAST 79C971"; /* PCI */ | 1084 | fdx = 1; |
| 1164 | fdx = 1; mii = 1; fset = 1; | 1085 | break; |
| 1165 | break; | 1086 | case 0x2623: |
| 1166 | case 0x2624: | 1087 | chipname = "PCnet/FAST 79C971"; /* PCI */ |
| 1167 | chipname = "PCnet/FAST+ 79C972"; /* PCI */ | 1088 | fdx = 1; |
| 1168 | fdx = 1; mii = 1; fset = 1; | 1089 | mii = 1; |
| 1169 | break; | 1090 | fset = 1; |
| 1170 | case 0x2625: | 1091 | break; |
| 1171 | chipname = "PCnet/FAST III 79C973"; /* PCI */ | 1092 | case 0x2624: |
| 1172 | fdx = 1; mii = 1; | 1093 | chipname = "PCnet/FAST+ 79C972"; /* PCI */ |
| 1173 | break; | 1094 | fdx = 1; |
| 1174 | case 0x2626: | 1095 | mii = 1; |
| 1175 | chipname = "PCnet/Home 79C978"; /* PCI */ | 1096 | fset = 1; |
| 1176 | fdx = 1; | 1097 | break; |
| 1098 | case 0x2625: | ||
| 1099 | chipname = "PCnet/FAST III 79C973"; /* PCI */ | ||
| 1100 | fdx = 1; | ||
| 1101 | mii = 1; | ||
| 1102 | break; | ||
| 1103 | case 0x2626: | ||
| 1104 | chipname = "PCnet/Home 79C978"; /* PCI */ | ||
| 1105 | fdx = 1; | ||
| 1106 | /* | ||
| 1107 | * This is based on specs published at www.amd.com. This section | ||
| 1108 | * assumes that a card with a 79C978 wants to go into standard | ||
| 1109 | * ethernet mode. The 79C978 can also go into 1Mb HomePNA mode, | ||
| 1110 | * and the module option homepna=1 can select this instead. | ||
| 1111 | */ | ||
| 1112 | media = a->read_bcr(ioaddr, 49); | ||
| 1113 | media &= ~3; /* default to 10Mb ethernet */ | ||
| 1114 | if (cards_found < MAX_UNITS && homepna[cards_found]) | ||
| 1115 | media |= 1; /* switch to home wiring mode */ | ||
| 1116 | if (pcnet32_debug & NETIF_MSG_PROBE) | ||
| 1117 | printk(KERN_DEBUG PFX "media set to %sMbit mode.\n", | ||
| 1118 | (media & 1) ? "1" : "10"); | ||
| 1119 | a->write_bcr(ioaddr, 49, media); | ||
| 1120 | break; | ||
| 1121 | case 0x2627: | ||
| 1122 | chipname = "PCnet/FAST III 79C975"; /* PCI */ | ||
| 1123 | fdx = 1; | ||
| 1124 | mii = 1; | ||
| 1125 | break; | ||
| 1126 | case 0x2628: | ||
| 1127 | chipname = "PCnet/PRO 79C976"; | ||
| 1128 | fdx = 1; | ||
| 1129 | mii = 1; | ||
| 1130 | break; | ||
| 1131 | default: | ||
| 1132 | if (pcnet32_debug & NETIF_MSG_PROBE) | ||
| 1133 | printk(KERN_INFO PFX | ||
| 1134 | "PCnet version %#x, no PCnet32 chip.\n", | ||
| 1135 | chip_version); | ||
| 1136 | goto err_release_region; | ||
| 1137 | } | ||
| 1138 | |||
| 1177 | /* | 1139 | /* |
| 1178 | * This is based on specs published at www.amd.com. This section | 1140 | * On selected chips turn on the BCR18:NOUFLO bit. This stops transmit |
| 1179 | * assumes that a card with a 79C978 wants to go into standard | 1141 | * starting until the packet is loaded. Strike one for reliability, lose |
| 1180 | * ethernet mode. The 79C978 can also go into 1Mb HomePNA mode, | 1142 | * one for latency - although on PCI this isnt a big loss. Older chips |
| 1181 | * and the module option homepna=1 can select this instead. | 1143 | * have FIFO's smaller than a packet, so you can't do this. |
| 1144 | * Turn on BCR18:BurstRdEn and BCR18:BurstWrEn. | ||
| 1182 | */ | 1145 | */ |
| 1183 | media = a->read_bcr(ioaddr, 49); | 1146 | |
| 1184 | media &= ~3; /* default to 10Mb ethernet */ | 1147 | if (fset) { |
| 1185 | if (cards_found < MAX_UNITS && homepna[cards_found]) | 1148 | a->write_bcr(ioaddr, 18, (a->read_bcr(ioaddr, 18) | 0x0860)); |
| 1186 | media |= 1; /* switch to home wiring mode */ | 1149 | a->write_csr(ioaddr, 80, |
| 1187 | if (pcnet32_debug & NETIF_MSG_PROBE) | 1150 | (a->read_csr(ioaddr, 80) & 0x0C00) | 0x0c00); |
| 1188 | printk(KERN_DEBUG PFX "media set to %sMbit mode.\n", | 1151 | dxsuflo = 1; |
| 1189 | (media & 1) ? "1" : "10"); | 1152 | } |
| 1190 | a->write_bcr(ioaddr, 49, media); | 1153 | |
| 1191 | break; | 1154 | dev = alloc_etherdev(0); |
| 1192 | case 0x2627: | 1155 | if (!dev) { |
| 1193 | chipname = "PCnet/FAST III 79C975"; /* PCI */ | 1156 | if (pcnet32_debug & NETIF_MSG_PROBE) |
| 1194 | fdx = 1; mii = 1; | 1157 | printk(KERN_ERR PFX "Memory allocation failed.\n"); |
| 1195 | break; | 1158 | ret = -ENOMEM; |
| 1196 | case 0x2628: | 1159 | goto err_release_region; |
| 1197 | chipname = "PCnet/PRO 79C976"; | 1160 | } |
| 1198 | fdx = 1; mii = 1; | 1161 | SET_NETDEV_DEV(dev, &pdev->dev); |
| 1199 | break; | 1162 | |
| 1200 | default: | ||
| 1201 | if (pcnet32_debug & NETIF_MSG_PROBE) | ||
| 1202 | printk(KERN_INFO PFX "PCnet version %#x, no PCnet32 chip.\n", | ||
| 1203 | chip_version); | ||
| 1204 | goto err_release_region; | ||
| 1205 | } | ||
| 1206 | |||
| 1207 | /* | ||
| 1208 | * On selected chips turn on the BCR18:NOUFLO bit. This stops transmit | ||
| 1209 | * starting until the packet is loaded. Strike one for reliability, lose | ||
| 1210 | * one for latency - although on PCI this isnt a big loss. Older chips | ||
| 1211 | * have FIFO's smaller than a packet, so you can't do this. | ||
| 1212 | * Turn on BCR18:BurstRdEn and BCR18:BurstWrEn. | ||
| 1213 | */ | ||
| 1214 | |||
| 1215 | if (fset) { | ||
| 1216 | a->write_bcr(ioaddr, 18, (a->read_bcr(ioaddr, 18) | 0x0860)); | ||
| 1217 | a->write_csr(ioaddr, 80, (a->read_csr(ioaddr, 80) & 0x0C00) | 0x0c00); | ||
| 1218 | dxsuflo = 1; | ||
| 1219 | } | ||
| 1220 | |||
| 1221 | dev = alloc_etherdev(0); | ||
| 1222 | if (!dev) { | ||
| 1223 | if (pcnet32_debug & NETIF_MSG_PROBE) | 1163 | if (pcnet32_debug & NETIF_MSG_PROBE) |
| 1224 | printk(KERN_ERR PFX "Memory allocation failed.\n"); | 1164 | printk(KERN_INFO PFX "%s at %#3lx,", chipname, ioaddr); |
| 1225 | ret = -ENOMEM; | 1165 | |
| 1226 | goto err_release_region; | 1166 | /* In most chips, after a chip reset, the ethernet address is read from the |
| 1227 | } | 1167 | * station address PROM at the base address and programmed into the |
| 1228 | SET_NETDEV_DEV(dev, &pdev->dev); | 1168 | * "Physical Address Registers" CSR12-14. |
| 1229 | 1169 | * As a precautionary measure, we read the PROM values and complain if | |
| 1230 | if (pcnet32_debug & NETIF_MSG_PROBE) | 1170 | * they disagree with the CSRs. Either way, we use the CSR values, and |
| 1231 | printk(KERN_INFO PFX "%s at %#3lx,", chipname, ioaddr); | 1171 | * double check that they are valid. |
| 1232 | 1172 | */ | |
| 1233 | /* In most chips, after a chip reset, the ethernet address is read from the | 1173 | for (i = 0; i < 3; i++) { |
| 1234 | * station address PROM at the base address and programmed into the | 1174 | unsigned int val; |
| 1235 | * "Physical Address Registers" CSR12-14. | 1175 | val = a->read_csr(ioaddr, i + 12) & 0x0ffff; |
| 1236 | * As a precautionary measure, we read the PROM values and complain if | 1176 | /* There may be endianness issues here. */ |
| 1237 | * they disagree with the CSRs. Either way, we use the CSR values, and | 1177 | dev->dev_addr[2 * i] = val & 0x0ff; |
| 1238 | * double check that they are valid. | 1178 | dev->dev_addr[2 * i + 1] = (val >> 8) & 0x0ff; |
| 1239 | */ | 1179 | } |
| 1240 | for (i = 0; i < 3; i++) { | 1180 | |
| 1241 | unsigned int val; | 1181 | /* read PROM address and compare with CSR address */ |
| 1242 | val = a->read_csr(ioaddr, i+12) & 0x0ffff; | ||
| 1243 | /* There may be endianness issues here. */ | ||
| 1244 | dev->dev_addr[2*i] = val & 0x0ff; | ||
| 1245 | dev->dev_addr[2*i+1] = (val >> 8) & 0x0ff; | ||
| 1246 | } | ||
| 1247 | |||
| 1248 | /* read PROM address and compare with CSR address */ | ||
| 1249 | for (i = 0; i < 6; i++) | ||
| 1250 | promaddr[i] = inb(ioaddr + i); | ||
| 1251 | |||
| 1252 | if (memcmp(promaddr, dev->dev_addr, 6) | ||
| 1253 | || !is_valid_ether_addr(dev->dev_addr)) { | ||
| 1254 | if (is_valid_ether_addr(promaddr)) { | ||
| 1255 | if (pcnet32_debug & NETIF_MSG_PROBE) { | ||
| 1256 | printk(" warning: CSR address invalid,\n"); | ||
| 1257 | printk(KERN_INFO " using instead PROM address of"); | ||
| 1258 | } | ||
| 1259 | memcpy(dev->dev_addr, promaddr, 6); | ||
| 1260 | } | ||
| 1261 | } | ||
| 1262 | memcpy(dev->perm_addr, dev->dev_addr, dev->addr_len); | ||
| 1263 | |||
| 1264 | /* if the ethernet address is not valid, force to 00:00:00:00:00:00 */ | ||
| 1265 | if (!is_valid_ether_addr(dev->perm_addr)) | ||
| 1266 | memset(dev->dev_addr, 0, sizeof(dev->dev_addr)); | ||
| 1267 | |||
| 1268 | if (pcnet32_debug & NETIF_MSG_PROBE) { | ||
| 1269 | for (i = 0; i < 6; i++) | 1182 | for (i = 0; i < 6; i++) |
| 1270 | printk(" %2.2x", dev->dev_addr[i]); | 1183 | promaddr[i] = inb(ioaddr + i); |
| 1271 | 1184 | ||
| 1272 | /* Version 0x2623 and 0x2624 */ | 1185 | if (memcmp(promaddr, dev->dev_addr, 6) |
| 1273 | if (((chip_version + 1) & 0xfffe) == 0x2624) { | 1186 | || !is_valid_ether_addr(dev->dev_addr)) { |
| 1274 | i = a->read_csr(ioaddr, 80) & 0x0C00; /* Check tx_start_pt */ | 1187 | if (is_valid_ether_addr(promaddr)) { |
| 1275 | printk("\n" KERN_INFO " tx_start_pt(0x%04x):",i); | 1188 | if (pcnet32_debug & NETIF_MSG_PROBE) { |
| 1276 | switch(i>>10) { | 1189 | printk(" warning: CSR address invalid,\n"); |
| 1277 | case 0: printk(" 20 bytes,"); break; | 1190 | printk(KERN_INFO |
| 1278 | case 1: printk(" 64 bytes,"); break; | 1191 | " using instead PROM address of"); |
| 1279 | case 2: printk(" 128 bytes,"); break; | 1192 | } |
| 1280 | case 3: printk("~220 bytes,"); break; | 1193 | memcpy(dev->dev_addr, promaddr, 6); |
| 1281 | } | 1194 | } |
| 1282 | i = a->read_bcr(ioaddr, 18); /* Check Burst/Bus control */ | 1195 | } |
| 1283 | printk(" BCR18(%x):",i&0xffff); | 1196 | memcpy(dev->perm_addr, dev->dev_addr, dev->addr_len); |
| 1284 | if (i & (1<<5)) printk("BurstWrEn "); | 1197 | |
| 1285 | if (i & (1<<6)) printk("BurstRdEn "); | 1198 | /* if the ethernet address is not valid, force to 00:00:00:00:00:00 */ |
| 1286 | if (i & (1<<7)) printk("DWordIO "); | 1199 | if (!is_valid_ether_addr(dev->perm_addr)) |
| 1287 | if (i & (1<<11)) printk("NoUFlow "); | 1200 | memset(dev->dev_addr, 0, sizeof(dev->dev_addr)); |
| 1288 | i = a->read_bcr(ioaddr, 25); | 1201 | |
| 1289 | printk("\n" KERN_INFO " SRAMSIZE=0x%04x,",i<<8); | 1202 | if (pcnet32_debug & NETIF_MSG_PROBE) { |
| 1290 | i = a->read_bcr(ioaddr, 26); | 1203 | for (i = 0; i < 6; i++) |
| 1291 | printk(" SRAM_BND=0x%04x,",i<<8); | 1204 | printk(" %2.2x", dev->dev_addr[i]); |
| 1292 | i = a->read_bcr(ioaddr, 27); | 1205 | |
| 1293 | if (i & (1<<14)) printk("LowLatRx"); | 1206 | /* Version 0x2623 and 0x2624 */ |
| 1294 | } | 1207 | if (((chip_version + 1) & 0xfffe) == 0x2624) { |
| 1295 | } | 1208 | i = a->read_csr(ioaddr, 80) & 0x0C00; /* Check tx_start_pt */ |
| 1296 | 1209 | printk("\n" KERN_INFO " tx_start_pt(0x%04x):", i); | |
| 1297 | dev->base_addr = ioaddr; | 1210 | switch (i >> 10) { |
| 1298 | /* pci_alloc_consistent returns page-aligned memory, so we do not have to check the alignment */ | 1211 | case 0: |
| 1299 | if ((lp = pci_alloc_consistent(pdev, sizeof(*lp), &lp_dma_addr)) == NULL) { | 1212 | printk(" 20 bytes,"); |
| 1300 | if (pcnet32_debug & NETIF_MSG_PROBE) | 1213 | break; |
| 1301 | printk(KERN_ERR PFX "Consistent memory allocation failed.\n"); | 1214 | case 1: |
| 1302 | ret = -ENOMEM; | 1215 | printk(" 64 bytes,"); |
| 1303 | goto err_free_netdev; | 1216 | break; |
| 1304 | } | 1217 | case 2: |
| 1305 | 1218 | printk(" 128 bytes,"); | |
| 1306 | memset(lp, 0, sizeof(*lp)); | 1219 | break; |
| 1307 | lp->dma_addr = lp_dma_addr; | 1220 | case 3: |
| 1308 | lp->pci_dev = pdev; | 1221 | printk("~220 bytes,"); |
| 1309 | 1222 | break; | |
| 1310 | spin_lock_init(&lp->lock); | 1223 | } |
| 1311 | 1224 | i = a->read_bcr(ioaddr, 18); /* Check Burst/Bus control */ | |
| 1312 | SET_MODULE_OWNER(dev); | 1225 | printk(" BCR18(%x):", i & 0xffff); |
| 1313 | SET_NETDEV_DEV(dev, &pdev->dev); | 1226 | if (i & (1 << 5)) |
| 1314 | dev->priv = lp; | 1227 | printk("BurstWrEn "); |
| 1315 | lp->name = chipname; | 1228 | if (i & (1 << 6)) |
| 1316 | lp->shared_irq = shared; | 1229 | printk("BurstRdEn "); |
| 1317 | lp->tx_ring_size = TX_RING_SIZE; /* default tx ring size */ | 1230 | if (i & (1 << 7)) |
| 1318 | lp->rx_ring_size = RX_RING_SIZE; /* default rx ring size */ | 1231 | printk("DWordIO "); |
| 1319 | lp->tx_mod_mask = lp->tx_ring_size - 1; | 1232 | if (i & (1 << 11)) |
| 1320 | lp->rx_mod_mask = lp->rx_ring_size - 1; | 1233 | printk("NoUFlow "); |
| 1321 | lp->tx_len_bits = (PCNET32_LOG_TX_BUFFERS << 12); | 1234 | i = a->read_bcr(ioaddr, 25); |
| 1322 | lp->rx_len_bits = (PCNET32_LOG_RX_BUFFERS << 4); | 1235 | printk("\n" KERN_INFO " SRAMSIZE=0x%04x,", i << 8); |
| 1323 | lp->mii_if.full_duplex = fdx; | 1236 | i = a->read_bcr(ioaddr, 26); |
| 1324 | lp->mii_if.phy_id_mask = 0x1f; | 1237 | printk(" SRAM_BND=0x%04x,", i << 8); |
| 1325 | lp->mii_if.reg_num_mask = 0x1f; | 1238 | i = a->read_bcr(ioaddr, 27); |
| 1326 | lp->dxsuflo = dxsuflo; | 1239 | if (i & (1 << 14)) |
| 1327 | lp->mii = mii; | 1240 | printk("LowLatRx"); |
| 1328 | lp->msg_enable = pcnet32_debug; | 1241 | } |
| 1329 | if ((cards_found >= MAX_UNITS) || (options[cards_found] > sizeof(options_mapping))) | 1242 | } |
| 1330 | lp->options = PCNET32_PORT_ASEL; | 1243 | |
| 1331 | else | 1244 | dev->base_addr = ioaddr; |
| 1332 | lp->options = options_mapping[options[cards_found]]; | 1245 | /* pci_alloc_consistent returns page-aligned memory, so we do not have to check the alignment */ |
| 1333 | lp->mii_if.dev = dev; | 1246 | if ((lp = |
| 1334 | lp->mii_if.mdio_read = mdio_read; | 1247 | pci_alloc_consistent(pdev, sizeof(*lp), &lp_dma_addr)) == NULL) { |
| 1335 | lp->mii_if.mdio_write = mdio_write; | 1248 | if (pcnet32_debug & NETIF_MSG_PROBE) |
| 1336 | 1249 | printk(KERN_ERR PFX | |
| 1337 | if (fdx && !(lp->options & PCNET32_PORT_ASEL) && | 1250 | "Consistent memory allocation failed.\n"); |
| 1338 | ((cards_found>=MAX_UNITS) || full_duplex[cards_found])) | 1251 | ret = -ENOMEM; |
| 1339 | lp->options |= PCNET32_PORT_FD; | 1252 | goto err_free_netdev; |
| 1340 | 1253 | } | |
| 1341 | if (!a) { | 1254 | |
| 1342 | if (pcnet32_debug & NETIF_MSG_PROBE) | 1255 | memset(lp, 0, sizeof(*lp)); |
| 1343 | printk(KERN_ERR PFX "No access methods\n"); | 1256 | lp->dma_addr = lp_dma_addr; |
| 1344 | ret = -ENODEV; | 1257 | lp->pci_dev = pdev; |
| 1345 | goto err_free_consistent; | 1258 | |
| 1346 | } | 1259 | spin_lock_init(&lp->lock); |
| 1347 | lp->a = *a; | 1260 | |
| 1348 | 1261 | SET_MODULE_OWNER(dev); | |
| 1349 | /* prior to register_netdev, dev->name is not yet correct */ | 1262 | SET_NETDEV_DEV(dev, &pdev->dev); |
| 1350 | if (pcnet32_alloc_ring(dev, pci_name(lp->pci_dev))) { | 1263 | dev->priv = lp; |
| 1351 | ret = -ENOMEM; | 1264 | lp->name = chipname; |
| 1352 | goto err_free_ring; | 1265 | lp->shared_irq = shared; |
| 1353 | } | 1266 | lp->tx_ring_size = TX_RING_SIZE; /* default tx ring size */ |
| 1354 | /* detect special T1/E1 WAN card by checking for MAC address */ | 1267 | lp->rx_ring_size = RX_RING_SIZE; /* default rx ring size */ |
| 1355 | if (dev->dev_addr[0] == 0x00 && dev->dev_addr[1] == 0xe0 | 1268 | lp->tx_mod_mask = lp->tx_ring_size - 1; |
| 1269 | lp->rx_mod_mask = lp->rx_ring_size - 1; | ||
| 1270 | lp->tx_len_bits = (PCNET32_LOG_TX_BUFFERS << 12); | ||
| 1271 | lp->rx_len_bits = (PCNET32_LOG_RX_BUFFERS << 4); | ||
| 1272 | lp->mii_if.full_duplex = fdx; | ||
| 1273 | lp->mii_if.phy_id_mask = 0x1f; | ||
| 1274 | lp->mii_if.reg_num_mask = 0x1f; | ||
| 1275 | lp->dxsuflo = dxsuflo; | ||
| 1276 | lp->mii = mii; | ||
| 1277 | lp->msg_enable = pcnet32_debug; | ||
| 1278 | if ((cards_found >= MAX_UNITS) | ||
| 1279 | || (options[cards_found] > sizeof(options_mapping))) | ||
| 1280 | lp->options = PCNET32_PORT_ASEL; | ||
| 1281 | else | ||
| 1282 | lp->options = options_mapping[options[cards_found]]; | ||
| 1283 | lp->mii_if.dev = dev; | ||
| 1284 | lp->mii_if.mdio_read = mdio_read; | ||
| 1285 | lp->mii_if.mdio_write = mdio_write; | ||
| 1286 | |||
| 1287 | if (fdx && !(lp->options & PCNET32_PORT_ASEL) && | ||
| 1288 | ((cards_found >= MAX_UNITS) || full_duplex[cards_found])) | ||
| 1289 | lp->options |= PCNET32_PORT_FD; | ||
| 1290 | |||
| 1291 | if (!a) { | ||
| 1292 | if (pcnet32_debug & NETIF_MSG_PROBE) | ||
| 1293 | printk(KERN_ERR PFX "No access methods\n"); | ||
| 1294 | ret = -ENODEV; | ||
| 1295 | goto err_free_consistent; | ||
| 1296 | } | ||
| 1297 | lp->a = *a; | ||
| 1298 | |||
| 1299 | /* prior to register_netdev, dev->name is not yet correct */ | ||
| 1300 | if (pcnet32_alloc_ring(dev, pci_name(lp->pci_dev))) { | ||
| 1301 | ret = -ENOMEM; | ||
| 1302 | goto err_free_ring; | ||
| 1303 | } | ||
| 1304 | /* detect special T1/E1 WAN card by checking for MAC address */ | ||
| 1305 | if (dev->dev_addr[0] == 0x00 && dev->dev_addr[1] == 0xe0 | ||
| 1356 | && dev->dev_addr[2] == 0x75) | 1306 | && dev->dev_addr[2] == 0x75) |
| 1357 | lp->options = PCNET32_PORT_FD | PCNET32_PORT_GPSI; | 1307 | lp->options = PCNET32_PORT_FD | PCNET32_PORT_GPSI; |
| 1358 | |||
| 1359 | lp->init_block.mode = le16_to_cpu(0x0003); /* Disable Rx and Tx. */ | ||
| 1360 | lp->init_block.tlen_rlen = le16_to_cpu(lp->tx_len_bits | lp->rx_len_bits); | ||
| 1361 | for (i = 0; i < 6; i++) | ||
| 1362 | lp->init_block.phys_addr[i] = dev->dev_addr[i]; | ||
| 1363 | lp->init_block.filter[0] = 0x00000000; | ||
| 1364 | lp->init_block.filter[1] = 0x00000000; | ||
| 1365 | lp->init_block.rx_ring = (u32)le32_to_cpu(lp->rx_ring_dma_addr); | ||
| 1366 | lp->init_block.tx_ring = (u32)le32_to_cpu(lp->tx_ring_dma_addr); | ||
| 1367 | |||
| 1368 | /* switch pcnet32 to 32bit mode */ | ||
| 1369 | a->write_bcr(ioaddr, 20, 2); | ||
| 1370 | |||
| 1371 | a->write_csr(ioaddr, 1, (lp->dma_addr + offsetof(struct pcnet32_private, | ||
| 1372 | init_block)) & 0xffff); | ||
| 1373 | a->write_csr(ioaddr, 2, (lp->dma_addr + offsetof(struct pcnet32_private, | ||
| 1374 | init_block)) >> 16); | ||
| 1375 | |||
| 1376 | if (pdev) { /* use the IRQ provided by PCI */ | ||
| 1377 | dev->irq = pdev->irq; | ||
| 1378 | if (pcnet32_debug & NETIF_MSG_PROBE) | ||
| 1379 | printk(" assigned IRQ %d.\n", dev->irq); | ||
| 1380 | } else { | ||
| 1381 | unsigned long irq_mask = probe_irq_on(); | ||
| 1382 | 1308 | ||
| 1383 | /* | 1309 | lp->init_block.mode = le16_to_cpu(0x0003); /* Disable Rx and Tx. */ |
| 1384 | * To auto-IRQ we enable the initialization-done and DMA error | 1310 | lp->init_block.tlen_rlen = |
| 1385 | * interrupts. For ISA boards we get a DMA error, but VLB and PCI | 1311 | le16_to_cpu(lp->tx_len_bits | lp->rx_len_bits); |
| 1386 | * boards will work. | 1312 | for (i = 0; i < 6; i++) |
| 1387 | */ | 1313 | lp->init_block.phys_addr[i] = dev->dev_addr[i]; |
| 1388 | /* Trigger an initialization just for the interrupt. */ | 1314 | lp->init_block.filter[0] = 0x00000000; |
| 1389 | a->write_csr (ioaddr, 0, 0x41); | 1315 | lp->init_block.filter[1] = 0x00000000; |
| 1390 | mdelay (1); | 1316 | lp->init_block.rx_ring = (u32) le32_to_cpu(lp->rx_ring_dma_addr); |
| 1317 | lp->init_block.tx_ring = (u32) le32_to_cpu(lp->tx_ring_dma_addr); | ||
| 1318 | |||
| 1319 | /* switch pcnet32 to 32bit mode */ | ||
| 1320 | a->write_bcr(ioaddr, 20, 2); | ||
| 1321 | |||
| 1322 | a->write_csr(ioaddr, 1, (lp->dma_addr + offsetof(struct pcnet32_private, | ||
| 1323 | init_block)) & 0xffff); | ||
| 1324 | a->write_csr(ioaddr, 2, (lp->dma_addr + offsetof(struct pcnet32_private, | ||
| 1325 | init_block)) >> 16); | ||
| 1326 | |||
| 1327 | if (pdev) { /* use the IRQ provided by PCI */ | ||
| 1328 | dev->irq = pdev->irq; | ||
| 1329 | if (pcnet32_debug & NETIF_MSG_PROBE) | ||
| 1330 | printk(" assigned IRQ %d.\n", dev->irq); | ||
| 1331 | } else { | ||
| 1332 | unsigned long irq_mask = probe_irq_on(); | ||
| 1333 | |||
| 1334 | /* | ||
| 1335 | * To auto-IRQ we enable the initialization-done and DMA error | ||
| 1336 | * interrupts. For ISA boards we get a DMA error, but VLB and PCI | ||
| 1337 | * boards will work. | ||
| 1338 | */ | ||
| 1339 | /* Trigger an initialization just for the interrupt. */ | ||
| 1340 | a->write_csr(ioaddr, 0, 0x41); | ||
| 1341 | mdelay(1); | ||
| 1342 | |||
| 1343 | dev->irq = probe_irq_off(irq_mask); | ||
| 1344 | if (!dev->irq) { | ||
| 1345 | if (pcnet32_debug & NETIF_MSG_PROBE) | ||
| 1346 | printk(", failed to detect IRQ line.\n"); | ||
| 1347 | ret = -ENODEV; | ||
| 1348 | goto err_free_ring; | ||
| 1349 | } | ||
| 1350 | if (pcnet32_debug & NETIF_MSG_PROBE) | ||
| 1351 | printk(", probed IRQ %d.\n", dev->irq); | ||
| 1352 | } | ||
| 1391 | 1353 | ||
| 1392 | dev->irq = probe_irq_off (irq_mask); | 1354 | /* Set the mii phy_id so that we can query the link state */ |
| 1393 | if (!dev->irq) { | 1355 | if (lp->mii) { |
| 1394 | if (pcnet32_debug & NETIF_MSG_PROBE) | 1356 | /* lp->phycount and lp->phymask are set to 0 by memset above */ |
| 1395 | printk(", failed to detect IRQ line.\n"); | 1357 | |
| 1396 | ret = -ENODEV; | 1358 | lp->mii_if.phy_id = ((lp->a.read_bcr(ioaddr, 33)) >> 5) & 0x1f; |
| 1397 | goto err_free_ring; | 1359 | /* scan for PHYs */ |
| 1360 | for (i = 0; i < PCNET32_MAX_PHYS; i++) { | ||
| 1361 | unsigned short id1, id2; | ||
| 1362 | |||
| 1363 | id1 = mdio_read(dev, i, MII_PHYSID1); | ||
| 1364 | if (id1 == 0xffff) | ||
| 1365 | continue; | ||
| 1366 | id2 = mdio_read(dev, i, MII_PHYSID2); | ||
| 1367 | if (id2 == 0xffff) | ||
| 1368 | continue; | ||
| 1369 | if (i == 31 && ((chip_version + 1) & 0xfffe) == 0x2624) | ||
| 1370 | continue; /* 79C971 & 79C972 have phantom phy at id 31 */ | ||
| 1371 | lp->phycount++; | ||
| 1372 | lp->phymask |= (1 << i); | ||
| 1373 | lp->mii_if.phy_id = i; | ||
| 1374 | if (pcnet32_debug & NETIF_MSG_PROBE) | ||
| 1375 | printk(KERN_INFO PFX | ||
| 1376 | "Found PHY %04x:%04x at address %d.\n", | ||
| 1377 | id1, id2, i); | ||
| 1378 | } | ||
| 1379 | lp->a.write_bcr(ioaddr, 33, (lp->mii_if.phy_id) << 5); | ||
| 1380 | if (lp->phycount > 1) { | ||
| 1381 | lp->options |= PCNET32_PORT_MII; | ||
| 1382 | } | ||
| 1398 | } | 1383 | } |
| 1399 | if (pcnet32_debug & NETIF_MSG_PROBE) | 1384 | |
| 1400 | printk(", probed IRQ %d.\n", dev->irq); | 1385 | init_timer(&lp->watchdog_timer); |
| 1401 | } | 1386 | lp->watchdog_timer.data = (unsigned long)dev; |
| 1402 | 1387 | lp->watchdog_timer.function = (void *)&pcnet32_watchdog; | |
| 1403 | /* Set the mii phy_id so that we can query the link state */ | 1388 | |
| 1404 | if (lp->mii) | 1389 | /* The PCNET32-specific entries in the device structure. */ |
| 1405 | lp->mii_if.phy_id = ((lp->a.read_bcr (ioaddr, 33)) >> 5) & 0x1f; | 1390 | dev->open = &pcnet32_open; |
| 1406 | 1391 | dev->hard_start_xmit = &pcnet32_start_xmit; | |
| 1407 | init_timer (&lp->watchdog_timer); | 1392 | dev->stop = &pcnet32_close; |
| 1408 | lp->watchdog_timer.data = (unsigned long) dev; | 1393 | dev->get_stats = &pcnet32_get_stats; |
| 1409 | lp->watchdog_timer.function = (void *) &pcnet32_watchdog; | 1394 | dev->set_multicast_list = &pcnet32_set_multicast_list; |
| 1410 | 1395 | dev->do_ioctl = &pcnet32_ioctl; | |
| 1411 | /* The PCNET32-specific entries in the device structure. */ | 1396 | dev->ethtool_ops = &pcnet32_ethtool_ops; |
| 1412 | dev->open = &pcnet32_open; | 1397 | dev->tx_timeout = pcnet32_tx_timeout; |
| 1413 | dev->hard_start_xmit = &pcnet32_start_xmit; | 1398 | dev->watchdog_timeo = (5 * HZ); |
| 1414 | dev->stop = &pcnet32_close; | ||
| 1415 | dev->get_stats = &pcnet32_get_stats; | ||
| 1416 | dev->set_multicast_list = &pcnet32_set_multicast_list; | ||
| 1417 | dev->do_ioctl = &pcnet32_ioctl; | ||
| 1418 | dev->ethtool_ops = &pcnet32_ethtool_ops; | ||
| 1419 | dev->tx_timeout = pcnet32_tx_timeout; | ||
| 1420 | dev->watchdog_timeo = (5*HZ); | ||
| 1421 | 1399 | ||
| 1422 | #ifdef CONFIG_NET_POLL_CONTROLLER | 1400 | #ifdef CONFIG_NET_POLL_CONTROLLER |
| 1423 | dev->poll_controller = pcnet32_poll_controller; | 1401 | dev->poll_controller = pcnet32_poll_controller; |
| 1424 | #endif | 1402 | #endif |
| 1425 | 1403 | ||
| 1426 | /* Fill in the generic fields of the device structure. */ | 1404 | /* Fill in the generic fields of the device structure. */ |
| 1427 | if (register_netdev(dev)) | 1405 | if (register_netdev(dev)) |
| 1428 | goto err_free_ring; | 1406 | goto err_free_ring; |
| 1429 | 1407 | ||
| 1430 | if (pdev) { | 1408 | if (pdev) { |
| 1431 | pci_set_drvdata(pdev, dev); | 1409 | pci_set_drvdata(pdev, dev); |
| 1432 | } else { | 1410 | } else { |
| 1433 | lp->next = pcnet32_dev; | 1411 | lp->next = pcnet32_dev; |
| 1434 | pcnet32_dev = dev; | 1412 | pcnet32_dev = dev; |
| 1435 | } | 1413 | } |
| 1436 | |||
| 1437 | if (pcnet32_debug & NETIF_MSG_PROBE) | ||
| 1438 | printk(KERN_INFO "%s: registered as %s\n", dev->name, lp->name); | ||
| 1439 | cards_found++; | ||
| 1440 | |||
| 1441 | /* enable LED writes */ | ||
| 1442 | a->write_bcr(ioaddr, 2, a->read_bcr(ioaddr, 2) | 0x1000); | ||
| 1443 | |||
| 1444 | return 0; | ||
| 1445 | |||
| 1446 | err_free_ring: | ||
| 1447 | pcnet32_free_ring(dev); | ||
| 1448 | err_free_consistent: | ||
| 1449 | pci_free_consistent(lp->pci_dev, sizeof(*lp), lp, lp->dma_addr); | ||
| 1450 | err_free_netdev: | ||
| 1451 | free_netdev(dev); | ||
| 1452 | err_release_region: | ||
| 1453 | release_region(ioaddr, PCNET32_TOTAL_SIZE); | ||
| 1454 | return ret; | ||
| 1455 | } | ||
| 1456 | 1414 | ||
| 1415 | if (pcnet32_debug & NETIF_MSG_PROBE) | ||
| 1416 | printk(KERN_INFO "%s: registered as %s\n", dev->name, lp->name); | ||
| 1417 | cards_found++; | ||
| 1418 | |||
| 1419 | /* enable LED writes */ | ||
| 1420 | a->write_bcr(ioaddr, 2, a->read_bcr(ioaddr, 2) | 0x1000); | ||
| 1421 | |||
| 1422 | return 0; | ||
| 1423 | |||
| 1424 | err_free_ring: | ||
| 1425 | pcnet32_free_ring(dev); | ||
| 1426 | err_free_consistent: | ||
| 1427 | pci_free_consistent(lp->pci_dev, sizeof(*lp), lp, lp->dma_addr); | ||
| 1428 | err_free_netdev: | ||
| 1429 | free_netdev(dev); | ||
| 1430 | err_release_region: | ||
| 1431 | release_region(ioaddr, PCNET32_TOTAL_SIZE); | ||
| 1432 | return ret; | ||
| 1433 | } | ||
| 1457 | 1434 | ||
| 1458 | /* if any allocation fails, caller must also call pcnet32_free_ring */ | 1435 | /* if any allocation fails, caller must also call pcnet32_free_ring */ |
| 1459 | static int pcnet32_alloc_ring(struct net_device *dev, char *name) | 1436 | static int pcnet32_alloc_ring(struct net_device *dev, char *name) |
| 1460 | { | 1437 | { |
| 1461 | struct pcnet32_private *lp = dev->priv; | 1438 | struct pcnet32_private *lp = dev->priv; |
| 1462 | 1439 | ||
| 1463 | lp->tx_ring = pci_alloc_consistent(lp->pci_dev, | 1440 | lp->tx_ring = pci_alloc_consistent(lp->pci_dev, |
| 1464 | sizeof(struct pcnet32_tx_head) * lp->tx_ring_size, | 1441 | sizeof(struct pcnet32_tx_head) * |
| 1465 | &lp->tx_ring_dma_addr); | 1442 | lp->tx_ring_size, |
| 1466 | if (lp->tx_ring == NULL) { | 1443 | &lp->tx_ring_dma_addr); |
| 1467 | if (pcnet32_debug & NETIF_MSG_DRV) | 1444 | if (lp->tx_ring == NULL) { |
| 1468 | printk("\n" KERN_ERR PFX "%s: Consistent memory allocation failed.\n", | 1445 | if (pcnet32_debug & NETIF_MSG_DRV) |
| 1469 | name); | 1446 | printk("\n" KERN_ERR PFX |
| 1470 | return -ENOMEM; | 1447 | "%s: Consistent memory allocation failed.\n", |
| 1471 | } | 1448 | name); |
| 1472 | 1449 | return -ENOMEM; | |
| 1473 | lp->rx_ring = pci_alloc_consistent(lp->pci_dev, | 1450 | } |
| 1474 | sizeof(struct pcnet32_rx_head) * lp->rx_ring_size, | ||
| 1475 | &lp->rx_ring_dma_addr); | ||
| 1476 | if (lp->rx_ring == NULL) { | ||
| 1477 | if (pcnet32_debug & NETIF_MSG_DRV) | ||
| 1478 | printk("\n" KERN_ERR PFX "%s: Consistent memory allocation failed.\n", | ||
| 1479 | name); | ||
| 1480 | return -ENOMEM; | ||
| 1481 | } | ||
| 1482 | |||
| 1483 | lp->tx_dma_addr = kmalloc(sizeof(dma_addr_t) * lp->tx_ring_size, | ||
| 1484 | GFP_ATOMIC); | ||
| 1485 | if (!lp->tx_dma_addr) { | ||
| 1486 | if (pcnet32_debug & NETIF_MSG_DRV) | ||
| 1487 | printk("\n" KERN_ERR PFX "%s: Memory allocation failed.\n", name); | ||
| 1488 | return -ENOMEM; | ||
| 1489 | } | ||
| 1490 | memset(lp->tx_dma_addr, 0, sizeof(dma_addr_t) * lp->tx_ring_size); | ||
| 1491 | |||
| 1492 | lp->rx_dma_addr = kmalloc(sizeof(dma_addr_t) * lp->rx_ring_size, | ||
| 1493 | GFP_ATOMIC); | ||
| 1494 | if (!lp->rx_dma_addr) { | ||
| 1495 | if (pcnet32_debug & NETIF_MSG_DRV) | ||
| 1496 | printk("\n" KERN_ERR PFX "%s: Memory allocation failed.\n", name); | ||
| 1497 | return -ENOMEM; | ||
| 1498 | } | ||
| 1499 | memset(lp->rx_dma_addr, 0, sizeof(dma_addr_t) * lp->rx_ring_size); | ||
| 1500 | |||
| 1501 | lp->tx_skbuff = kmalloc(sizeof(struct sk_buff *) * lp->tx_ring_size, | ||
| 1502 | GFP_ATOMIC); | ||
| 1503 | if (!lp->tx_skbuff) { | ||
| 1504 | if (pcnet32_debug & NETIF_MSG_DRV) | ||
| 1505 | printk("\n" KERN_ERR PFX "%s: Memory allocation failed.\n", name); | ||
| 1506 | return -ENOMEM; | ||
| 1507 | } | ||
| 1508 | memset(lp->tx_skbuff, 0, sizeof(struct sk_buff *) * lp->tx_ring_size); | ||
| 1509 | |||
| 1510 | lp->rx_skbuff = kmalloc(sizeof(struct sk_buff *) * lp->rx_ring_size, | ||
| 1511 | GFP_ATOMIC); | ||
| 1512 | if (!lp->rx_skbuff) { | ||
| 1513 | if (pcnet32_debug & NETIF_MSG_DRV) | ||
| 1514 | printk("\n" KERN_ERR PFX "%s: Memory allocation failed.\n", name); | ||
| 1515 | return -ENOMEM; | ||
| 1516 | } | ||
| 1517 | memset(lp->rx_skbuff, 0, sizeof(struct sk_buff *) * lp->rx_ring_size); | ||
| 1518 | 1451 | ||
| 1519 | return 0; | 1452 | lp->rx_ring = pci_alloc_consistent(lp->pci_dev, |
| 1520 | } | 1453 | sizeof(struct pcnet32_rx_head) * |
| 1454 | lp->rx_ring_size, | ||
| 1455 | &lp->rx_ring_dma_addr); | ||
| 1456 | if (lp->rx_ring == NULL) { | ||
| 1457 | if (pcnet32_debug & NETIF_MSG_DRV) | ||
| 1458 | printk("\n" KERN_ERR PFX | ||
| 1459 | "%s: Consistent memory allocation failed.\n", | ||
| 1460 | name); | ||
| 1461 | return -ENOMEM; | ||
| 1462 | } | ||
| 1521 | 1463 | ||
| 1464 | lp->tx_dma_addr = kmalloc(sizeof(dma_addr_t) * lp->tx_ring_size, | ||
| 1465 | GFP_ATOMIC); | ||
| 1466 | if (!lp->tx_dma_addr) { | ||
| 1467 | if (pcnet32_debug & NETIF_MSG_DRV) | ||
| 1468 | printk("\n" KERN_ERR PFX | ||
| 1469 | "%s: Memory allocation failed.\n", name); | ||
| 1470 | return -ENOMEM; | ||
| 1471 | } | ||
| 1472 | memset(lp->tx_dma_addr, 0, sizeof(dma_addr_t) * lp->tx_ring_size); | ||
| 1473 | |||
| 1474 | lp->rx_dma_addr = kmalloc(sizeof(dma_addr_t) * lp->rx_ring_size, | ||
| 1475 | GFP_ATOMIC); | ||
| 1476 | if (!lp->rx_dma_addr) { | ||
| 1477 | if (pcnet32_debug & NETIF_MSG_DRV) | ||
| 1478 | printk("\n" KERN_ERR PFX | ||
| 1479 | "%s: Memory allocation failed.\n", name); | ||
| 1480 | return -ENOMEM; | ||
| 1481 | } | ||
| 1482 | memset(lp->rx_dma_addr, 0, sizeof(dma_addr_t) * lp->rx_ring_size); | ||
| 1483 | |||
| 1484 | lp->tx_skbuff = kmalloc(sizeof(struct sk_buff *) * lp->tx_ring_size, | ||
| 1485 | GFP_ATOMIC); | ||
| 1486 | if (!lp->tx_skbuff) { | ||
| 1487 | if (pcnet32_debug & NETIF_MSG_DRV) | ||
| 1488 | printk("\n" KERN_ERR PFX | ||
| 1489 | "%s: Memory allocation failed.\n", name); | ||
| 1490 | return -ENOMEM; | ||
| 1491 | } | ||
| 1492 | memset(lp->tx_skbuff, 0, sizeof(struct sk_buff *) * lp->tx_ring_size); | ||
| 1493 | |||
| 1494 | lp->rx_skbuff = kmalloc(sizeof(struct sk_buff *) * lp->rx_ring_size, | ||
| 1495 | GFP_ATOMIC); | ||
| 1496 | if (!lp->rx_skbuff) { | ||
| 1497 | if (pcnet32_debug & NETIF_MSG_DRV) | ||
| 1498 | printk("\n" KERN_ERR PFX | ||
| 1499 | "%s: Memory allocation failed.\n", name); | ||
| 1500 | return -ENOMEM; | ||
| 1501 | } | ||
| 1502 | memset(lp->rx_skbuff, 0, sizeof(struct sk_buff *) * lp->rx_ring_size); | ||
| 1503 | |||
| 1504 | return 0; | ||
| 1505 | } | ||
| 1522 | 1506 | ||
| 1523 | static void pcnet32_free_ring(struct net_device *dev) | 1507 | static void pcnet32_free_ring(struct net_device *dev) |
| 1524 | { | 1508 | { |
| 1525 | struct pcnet32_private *lp = dev->priv; | 1509 | struct pcnet32_private *lp = dev->priv; |
| 1526 | 1510 | ||
| 1527 | kfree(lp->tx_skbuff); | 1511 | kfree(lp->tx_skbuff); |
| 1528 | lp->tx_skbuff = NULL; | 1512 | lp->tx_skbuff = NULL; |
| 1529 | 1513 | ||
| 1530 | kfree(lp->rx_skbuff); | 1514 | kfree(lp->rx_skbuff); |
| 1531 | lp->rx_skbuff = NULL; | 1515 | lp->rx_skbuff = NULL; |
| 1532 | 1516 | ||
| 1533 | kfree(lp->tx_dma_addr); | 1517 | kfree(lp->tx_dma_addr); |
| 1534 | lp->tx_dma_addr = NULL; | 1518 | lp->tx_dma_addr = NULL; |
| 1535 | 1519 | ||
| 1536 | kfree(lp->rx_dma_addr); | 1520 | kfree(lp->rx_dma_addr); |
| 1537 | lp->rx_dma_addr = NULL; | 1521 | lp->rx_dma_addr = NULL; |
| 1538 | 1522 | ||
| 1539 | if (lp->tx_ring) { | 1523 | if (lp->tx_ring) { |
| 1540 | pci_free_consistent(lp->pci_dev, sizeof(struct pcnet32_tx_head) * lp->tx_ring_size, | 1524 | pci_free_consistent(lp->pci_dev, |
| 1541 | lp->tx_ring, lp->tx_ring_dma_addr); | 1525 | sizeof(struct pcnet32_tx_head) * |
| 1542 | lp->tx_ring = NULL; | 1526 | lp->tx_ring_size, lp->tx_ring, |
| 1543 | } | 1527 | lp->tx_ring_dma_addr); |
| 1528 | lp->tx_ring = NULL; | ||
| 1529 | } | ||
| 1544 | 1530 | ||
| 1545 | if (lp->rx_ring) { | 1531 | if (lp->rx_ring) { |
| 1546 | pci_free_consistent(lp->pci_dev, sizeof(struct pcnet32_rx_head) * lp->rx_ring_size, | 1532 | pci_free_consistent(lp->pci_dev, |
| 1547 | lp->rx_ring, lp->rx_ring_dma_addr); | 1533 | sizeof(struct pcnet32_rx_head) * |
| 1548 | lp->rx_ring = NULL; | 1534 | lp->rx_ring_size, lp->rx_ring, |
| 1549 | } | 1535 | lp->rx_ring_dma_addr); |
| 1536 | lp->rx_ring = NULL; | ||
| 1537 | } | ||
| 1550 | } | 1538 | } |
| 1551 | 1539 | ||
| 1552 | 1540 | static int pcnet32_open(struct net_device *dev) | |
| 1553 | static int | ||
| 1554 | pcnet32_open(struct net_device *dev) | ||
| 1555 | { | 1541 | { |
| 1556 | struct pcnet32_private *lp = dev->priv; | 1542 | struct pcnet32_private *lp = dev->priv; |
| 1557 | unsigned long ioaddr = dev->base_addr; | 1543 | unsigned long ioaddr = dev->base_addr; |
| 1558 | u16 val; | 1544 | u16 val; |
| 1559 | int i; | 1545 | int i; |
| 1560 | int rc; | 1546 | int rc; |
| 1561 | unsigned long flags; | 1547 | unsigned long flags; |
| 1562 | 1548 | ||
| 1563 | if (request_irq(dev->irq, &pcnet32_interrupt, | 1549 | if (request_irq(dev->irq, &pcnet32_interrupt, |
| 1564 | lp->shared_irq ? SA_SHIRQ : 0, dev->name, (void *)dev)) { | 1550 | lp->shared_irq ? SA_SHIRQ : 0, dev->name, |
| 1565 | return -EAGAIN; | 1551 | (void *)dev)) { |
| 1566 | } | 1552 | return -EAGAIN; |
| 1567 | 1553 | } | |
| 1568 | spin_lock_irqsave(&lp->lock, flags); | 1554 | |
| 1569 | /* Check for a valid station address */ | 1555 | spin_lock_irqsave(&lp->lock, flags); |
| 1570 | if (!is_valid_ether_addr(dev->dev_addr)) { | 1556 | /* Check for a valid station address */ |
| 1571 | rc = -EINVAL; | 1557 | if (!is_valid_ether_addr(dev->dev_addr)) { |
| 1572 | goto err_free_irq; | 1558 | rc = -EINVAL; |
| 1573 | } | 1559 | goto err_free_irq; |
| 1574 | 1560 | } | |
| 1575 | /* Reset the PCNET32 */ | 1561 | |
| 1576 | lp->a.reset (ioaddr); | 1562 | /* Reset the PCNET32 */ |
| 1577 | 1563 | lp->a.reset(ioaddr); | |
| 1578 | /* switch pcnet32 to 32bit mode */ | 1564 | |
| 1579 | lp->a.write_bcr (ioaddr, 20, 2); | 1565 | /* switch pcnet32 to 32bit mode */ |
| 1580 | 1566 | lp->a.write_bcr(ioaddr, 20, 2); | |
| 1581 | if (netif_msg_ifup(lp)) | 1567 | |
| 1582 | printk(KERN_DEBUG "%s: pcnet32_open() irq %d tx/rx rings %#x/%#x init %#x.\n", | 1568 | if (netif_msg_ifup(lp)) |
| 1583 | dev->name, dev->irq, | 1569 | printk(KERN_DEBUG |
| 1584 | (u32) (lp->tx_ring_dma_addr), | 1570 | "%s: pcnet32_open() irq %d tx/rx rings %#x/%#x init %#x.\n", |
| 1585 | (u32) (lp->rx_ring_dma_addr), | 1571 | dev->name, dev->irq, (u32) (lp->tx_ring_dma_addr), |
| 1586 | (u32) (lp->dma_addr + offsetof(struct pcnet32_private, init_block))); | 1572 | (u32) (lp->rx_ring_dma_addr), |
| 1587 | 1573 | (u32) (lp->dma_addr + | |
| 1588 | /* set/reset autoselect bit */ | 1574 | offsetof(struct pcnet32_private, init_block))); |
| 1589 | val = lp->a.read_bcr (ioaddr, 2) & ~2; | 1575 | |
| 1590 | if (lp->options & PCNET32_PORT_ASEL) | 1576 | /* set/reset autoselect bit */ |
| 1591 | val |= 2; | 1577 | val = lp->a.read_bcr(ioaddr, 2) & ~2; |
| 1592 | lp->a.write_bcr (ioaddr, 2, val); | 1578 | if (lp->options & PCNET32_PORT_ASEL) |
| 1593 | |||
| 1594 | /* handle full duplex setting */ | ||
| 1595 | if (lp->mii_if.full_duplex) { | ||
| 1596 | val = lp->a.read_bcr (ioaddr, 9) & ~3; | ||
| 1597 | if (lp->options & PCNET32_PORT_FD) { | ||
| 1598 | val |= 1; | ||
| 1599 | if (lp->options == (PCNET32_PORT_FD | PCNET32_PORT_AUI)) | ||
| 1600 | val |= 2; | 1579 | val |= 2; |
| 1601 | } else if (lp->options & PCNET32_PORT_ASEL) { | 1580 | lp->a.write_bcr(ioaddr, 2, val); |
| 1602 | /* workaround of xSeries250, turn on for 79C975 only */ | 1581 | |
| 1603 | i = ((lp->a.read_csr(ioaddr, 88) | | 1582 | /* handle full duplex setting */ |
| 1604 | (lp->a.read_csr(ioaddr,89) << 16)) >> 12) & 0xffff; | 1583 | if (lp->mii_if.full_duplex) { |
| 1605 | if (i == 0x2627) | 1584 | val = lp->a.read_bcr(ioaddr, 9) & ~3; |
| 1606 | val |= 3; | 1585 | if (lp->options & PCNET32_PORT_FD) { |
| 1607 | } | 1586 | val |= 1; |
| 1608 | lp->a.write_bcr (ioaddr, 9, val); | 1587 | if (lp->options == (PCNET32_PORT_FD | PCNET32_PORT_AUI)) |
| 1609 | } | 1588 | val |= 2; |
| 1610 | 1589 | } else if (lp->options & PCNET32_PORT_ASEL) { | |
| 1611 | /* set/reset GPSI bit in test register */ | 1590 | /* workaround of xSeries250, turn on for 79C975 only */ |
| 1612 | val = lp->a.read_csr (ioaddr, 124) & ~0x10; | 1591 | i = ((lp->a.read_csr(ioaddr, 88) | |
| 1613 | if ((lp->options & PCNET32_PORT_PORTSEL) == PCNET32_PORT_GPSI) | 1592 | (lp->a. |
| 1614 | val |= 0x10; | 1593 | read_csr(ioaddr, 89) << 16)) >> 12) & 0xffff; |
| 1615 | lp->a.write_csr (ioaddr, 124, val); | 1594 | if (i == 0x2627) |
| 1616 | 1595 | val |= 3; | |
| 1617 | /* Allied Telesyn AT 2700/2701 FX are 100Mbit only and do not negotiate */ | 1596 | } |
| 1618 | if (lp->pci_dev->subsystem_vendor == PCI_VENDOR_ID_AT && | 1597 | lp->a.write_bcr(ioaddr, 9, val); |
| 1598 | } | ||
| 1599 | |||
| 1600 | /* set/reset GPSI bit in test register */ | ||
| 1601 | val = lp->a.read_csr(ioaddr, 124) & ~0x10; | ||
| 1602 | if ((lp->options & PCNET32_PORT_PORTSEL) == PCNET32_PORT_GPSI) | ||
| 1603 | val |= 0x10; | ||
| 1604 | lp->a.write_csr(ioaddr, 124, val); | ||
| 1605 | |||
| 1606 | /* Allied Telesyn AT 2700/2701 FX are 100Mbit only and do not negotiate */ | ||
| 1607 | if (lp->pci_dev->subsystem_vendor == PCI_VENDOR_ID_AT && | ||
| 1619 | (lp->pci_dev->subsystem_device == PCI_SUBDEVICE_ID_AT_2700FX || | 1608 | (lp->pci_dev->subsystem_device == PCI_SUBDEVICE_ID_AT_2700FX || |
| 1620 | lp->pci_dev->subsystem_device == PCI_SUBDEVICE_ID_AT_2701FX)) { | 1609 | lp->pci_dev->subsystem_device == PCI_SUBDEVICE_ID_AT_2701FX)) { |
| 1621 | if (lp->options & PCNET32_PORT_ASEL) { | 1610 | if (lp->options & PCNET32_PORT_ASEL) { |
| 1622 | lp->options = PCNET32_PORT_FD | PCNET32_PORT_100; | 1611 | lp->options = PCNET32_PORT_FD | PCNET32_PORT_100; |
| 1623 | if (netif_msg_link(lp)) | 1612 | if (netif_msg_link(lp)) |
| 1624 | printk(KERN_DEBUG "%s: Setting 100Mb-Full Duplex.\n", | 1613 | printk(KERN_DEBUG |
| 1625 | dev->name); | 1614 | "%s: Setting 100Mb-Full Duplex.\n", |
| 1626 | } | 1615 | dev->name); |
| 1627 | } | 1616 | } |
| 1628 | { | 1617 | } |
| 1629 | /* | 1618 | if (lp->phycount < 2) { |
| 1630 | * 24 Jun 2004 according AMD, in order to change the PHY, | 1619 | /* |
| 1631 | * DANAS (or DISPM for 79C976) must be set; then select the speed, | 1620 | * 24 Jun 2004 according AMD, in order to change the PHY, |
| 1632 | * duplex, and/or enable auto negotiation, and clear DANAS | 1621 | * DANAS (or DISPM for 79C976) must be set; then select the speed, |
| 1633 | */ | 1622 | * duplex, and/or enable auto negotiation, and clear DANAS |
| 1634 | if (lp->mii && !(lp->options & PCNET32_PORT_ASEL)) { | 1623 | */ |
| 1635 | lp->a.write_bcr(ioaddr, 32, | 1624 | if (lp->mii && !(lp->options & PCNET32_PORT_ASEL)) { |
| 1636 | lp->a.read_bcr(ioaddr, 32) | 0x0080); | 1625 | lp->a.write_bcr(ioaddr, 32, |
| 1637 | /* disable Auto Negotiation, set 10Mpbs, HD */ | 1626 | lp->a.read_bcr(ioaddr, 32) | 0x0080); |
| 1638 | val = lp->a.read_bcr(ioaddr, 32) & ~0xb8; | 1627 | /* disable Auto Negotiation, set 10Mpbs, HD */ |
| 1639 | if (lp->options & PCNET32_PORT_FD) | 1628 | val = lp->a.read_bcr(ioaddr, 32) & ~0xb8; |
| 1640 | val |= 0x10; | 1629 | if (lp->options & PCNET32_PORT_FD) |
| 1641 | if (lp->options & PCNET32_PORT_100) | 1630 | val |= 0x10; |
| 1642 | val |= 0x08; | 1631 | if (lp->options & PCNET32_PORT_100) |
| 1643 | lp->a.write_bcr (ioaddr, 32, val); | 1632 | val |= 0x08; |
| 1633 | lp->a.write_bcr(ioaddr, 32, val); | ||
| 1634 | } else { | ||
| 1635 | if (lp->options & PCNET32_PORT_ASEL) { | ||
| 1636 | lp->a.write_bcr(ioaddr, 32, | ||
| 1637 | lp->a.read_bcr(ioaddr, | ||
| 1638 | 32) | 0x0080); | ||
| 1639 | /* enable auto negotiate, setup, disable fd */ | ||
| 1640 | val = lp->a.read_bcr(ioaddr, 32) & ~0x98; | ||
| 1641 | val |= 0x20; | ||
| 1642 | lp->a.write_bcr(ioaddr, 32, val); | ||
| 1643 | } | ||
| 1644 | } | ||
| 1644 | } else { | 1645 | } else { |
| 1645 | if (lp->options & PCNET32_PORT_ASEL) { | 1646 | int first_phy = -1; |
| 1646 | lp->a.write_bcr(ioaddr, 32, | 1647 | u16 bmcr; |
| 1647 | lp->a.read_bcr(ioaddr, 32) | 0x0080); | 1648 | u32 bcr9; |
| 1648 | /* enable auto negotiate, setup, disable fd */ | 1649 | struct ethtool_cmd ecmd; |
| 1649 | val = lp->a.read_bcr(ioaddr, 32) & ~0x98; | 1650 | |
| 1650 | val |= 0x20; | 1651 | /* |
| 1651 | lp->a.write_bcr(ioaddr, 32, val); | 1652 | * There is really no good other way to handle multiple PHYs |
| 1652 | } | 1653 | * other than turning off all automatics |
| 1654 | */ | ||
| 1655 | val = lp->a.read_bcr(ioaddr, 2); | ||
| 1656 | lp->a.write_bcr(ioaddr, 2, val & ~2); | ||
| 1657 | val = lp->a.read_bcr(ioaddr, 32); | ||
| 1658 | lp->a.write_bcr(ioaddr, 32, val & ~(1 << 7)); /* stop MII manager */ | ||
| 1659 | |||
| 1660 | if (!(lp->options & PCNET32_PORT_ASEL)) { | ||
| 1661 | /* setup ecmd */ | ||
| 1662 | ecmd.port = PORT_MII; | ||
| 1663 | ecmd.transceiver = XCVR_INTERNAL; | ||
| 1664 | ecmd.autoneg = AUTONEG_DISABLE; | ||
| 1665 | ecmd.speed = | ||
| 1666 | lp-> | ||
| 1667 | options & PCNET32_PORT_100 ? SPEED_100 : SPEED_10; | ||
| 1668 | bcr9 = lp->a.read_bcr(ioaddr, 9); | ||
| 1669 | |||
| 1670 | if (lp->options & PCNET32_PORT_FD) { | ||
| 1671 | ecmd.duplex = DUPLEX_FULL; | ||
| 1672 | bcr9 |= (1 << 0); | ||
| 1673 | } else { | ||
| 1674 | ecmd.duplex = DUPLEX_HALF; | ||
| 1675 | bcr9 |= ~(1 << 0); | ||
| 1676 | } | ||
| 1677 | lp->a.write_bcr(ioaddr, 9, bcr9); | ||
| 1678 | } | ||
| 1679 | |||
| 1680 | for (i = 0; i < PCNET32_MAX_PHYS; i++) { | ||
| 1681 | if (lp->phymask & (1 << i)) { | ||
| 1682 | /* isolate all but the first PHY */ | ||
| 1683 | bmcr = mdio_read(dev, i, MII_BMCR); | ||
| 1684 | if (first_phy == -1) { | ||
| 1685 | first_phy = i; | ||
| 1686 | mdio_write(dev, i, MII_BMCR, | ||
| 1687 | bmcr & ~BMCR_ISOLATE); | ||
| 1688 | } else { | ||
| 1689 | mdio_write(dev, i, MII_BMCR, | ||
| 1690 | bmcr | BMCR_ISOLATE); | ||
| 1691 | } | ||
| 1692 | /* use mii_ethtool_sset to setup PHY */ | ||
| 1693 | lp->mii_if.phy_id = i; | ||
| 1694 | ecmd.phy_address = i; | ||
| 1695 | if (lp->options & PCNET32_PORT_ASEL) { | ||
| 1696 | mii_ethtool_gset(&lp->mii_if, &ecmd); | ||
| 1697 | ecmd.autoneg = AUTONEG_ENABLE; | ||
| 1698 | } | ||
| 1699 | mii_ethtool_sset(&lp->mii_if, &ecmd); | ||
| 1700 | } | ||
| 1701 | } | ||
| 1702 | lp->mii_if.phy_id = first_phy; | ||
| 1703 | if (netif_msg_link(lp)) | ||
| 1704 | printk(KERN_INFO "%s: Using PHY number %d.\n", | ||
| 1705 | dev->name, first_phy); | ||
| 1653 | } | 1706 | } |
| 1654 | } | ||
| 1655 | 1707 | ||
| 1656 | #ifdef DO_DXSUFLO | 1708 | #ifdef DO_DXSUFLO |
| 1657 | if (lp->dxsuflo) { /* Disable transmit stop on underflow */ | 1709 | if (lp->dxsuflo) { /* Disable transmit stop on underflow */ |
| 1658 | val = lp->a.read_csr (ioaddr, 3); | 1710 | val = lp->a.read_csr(ioaddr, 3); |
| 1659 | val |= 0x40; | 1711 | val |= 0x40; |
| 1660 | lp->a.write_csr (ioaddr, 3, val); | 1712 | lp->a.write_csr(ioaddr, 3, val); |
| 1661 | } | 1713 | } |
| 1662 | #endif | 1714 | #endif |
| 1663 | 1715 | ||
| 1664 | lp->init_block.mode = le16_to_cpu((lp->options & PCNET32_PORT_PORTSEL) << 7); | 1716 | lp->init_block.mode = |
| 1665 | pcnet32_load_multicast(dev); | 1717 | le16_to_cpu((lp->options & PCNET32_PORT_PORTSEL) << 7); |
| 1666 | 1718 | pcnet32_load_multicast(dev); | |
| 1667 | if (pcnet32_init_ring(dev)) { | 1719 | |
| 1668 | rc = -ENOMEM; | 1720 | if (pcnet32_init_ring(dev)) { |
| 1669 | goto err_free_ring; | 1721 | rc = -ENOMEM; |
| 1670 | } | 1722 | goto err_free_ring; |
| 1671 | 1723 | } | |
| 1672 | /* Re-initialize the PCNET32, and start it when done. */ | 1724 | |
| 1673 | lp->a.write_csr (ioaddr, 1, (lp->dma_addr + | 1725 | /* Re-initialize the PCNET32, and start it when done. */ |
| 1674 | offsetof(struct pcnet32_private, init_block)) & 0xffff); | 1726 | lp->a.write_csr(ioaddr, 1, (lp->dma_addr + |
| 1675 | lp->a.write_csr (ioaddr, 2, (lp->dma_addr + | 1727 | offsetof(struct pcnet32_private, |
| 1676 | offsetof(struct pcnet32_private, init_block)) >> 16); | 1728 | init_block)) & 0xffff); |
| 1677 | 1729 | lp->a.write_csr(ioaddr, 2, | |
| 1678 | lp->a.write_csr (ioaddr, 4, 0x0915); | 1730 | (lp->dma_addr + |
| 1679 | lp->a.write_csr (ioaddr, 0, 0x0001); | 1731 | offsetof(struct pcnet32_private, init_block)) >> 16); |
| 1680 | 1732 | ||
| 1681 | netif_start_queue(dev); | 1733 | lp->a.write_csr(ioaddr, 4, 0x0915); |
| 1682 | 1734 | lp->a.write_csr(ioaddr, 0, 0x0001); | |
| 1683 | /* If we have mii, print the link status and start the watchdog */ | 1735 | |
| 1684 | if (lp->mii) { | 1736 | netif_start_queue(dev); |
| 1685 | mii_check_media (&lp->mii_if, netif_msg_link(lp), 1); | 1737 | |
| 1686 | mod_timer (&(lp->watchdog_timer), PCNET32_WATCHDOG_TIMEOUT); | 1738 | /* Print the link status and start the watchdog */ |
| 1687 | } | 1739 | pcnet32_check_media(dev, 1); |
| 1688 | 1740 | mod_timer(&(lp->watchdog_timer), PCNET32_WATCHDOG_TIMEOUT); | |
| 1689 | i = 0; | 1741 | |
| 1690 | while (i++ < 100) | 1742 | i = 0; |
| 1691 | if (lp->a.read_csr (ioaddr, 0) & 0x0100) | 1743 | while (i++ < 100) |
| 1692 | break; | 1744 | if (lp->a.read_csr(ioaddr, 0) & 0x0100) |
| 1693 | /* | 1745 | break; |
| 1694 | * We used to clear the InitDone bit, 0x0100, here but Mark Stockton | 1746 | /* |
| 1695 | * reports that doing so triggers a bug in the '974. | 1747 | * We used to clear the InitDone bit, 0x0100, here but Mark Stockton |
| 1696 | */ | 1748 | * reports that doing so triggers a bug in the '974. |
| 1697 | lp->a.write_csr (ioaddr, 0, 0x0042); | 1749 | */ |
| 1698 | 1750 | lp->a.write_csr(ioaddr, 0, 0x0042); | |
| 1699 | if (netif_msg_ifup(lp)) | 1751 | |
| 1700 | printk(KERN_DEBUG "%s: pcnet32 open after %d ticks, init block %#x csr0 %4.4x.\n", | 1752 | if (netif_msg_ifup(lp)) |
| 1701 | dev->name, i, (u32) (lp->dma_addr + | 1753 | printk(KERN_DEBUG |
| 1702 | offsetof(struct pcnet32_private, init_block)), | 1754 | "%s: pcnet32 open after %d ticks, init block %#x csr0 %4.4x.\n", |
| 1703 | lp->a.read_csr(ioaddr, 0)); | 1755 | dev->name, i, |
| 1704 | 1756 | (u32) (lp->dma_addr + | |
| 1705 | spin_unlock_irqrestore(&lp->lock, flags); | 1757 | offsetof(struct pcnet32_private, init_block)), |
| 1706 | 1758 | lp->a.read_csr(ioaddr, 0)); | |
| 1707 | return 0; /* Always succeed */ | 1759 | |
| 1708 | 1760 | spin_unlock_irqrestore(&lp->lock, flags); | |
| 1709 | err_free_ring: | 1761 | |
| 1710 | /* free any allocated skbuffs */ | 1762 | return 0; /* Always succeed */ |
| 1711 | for (i = 0; i < lp->rx_ring_size; i++) { | 1763 | |
| 1712 | lp->rx_ring[i].status = 0; | 1764 | err_free_ring: |
| 1713 | if (lp->rx_skbuff[i]) { | 1765 | /* free any allocated skbuffs */ |
| 1714 | pci_unmap_single(lp->pci_dev, lp->rx_dma_addr[i], PKT_BUF_SZ-2, | 1766 | for (i = 0; i < lp->rx_ring_size; i++) { |
| 1715 | PCI_DMA_FROMDEVICE); | 1767 | lp->rx_ring[i].status = 0; |
| 1716 | dev_kfree_skb(lp->rx_skbuff[i]); | 1768 | if (lp->rx_skbuff[i]) { |
| 1717 | } | 1769 | pci_unmap_single(lp->pci_dev, lp->rx_dma_addr[i], |
| 1718 | lp->rx_skbuff[i] = NULL; | 1770 | PKT_BUF_SZ - 2, PCI_DMA_FROMDEVICE); |
| 1719 | lp->rx_dma_addr[i] = 0; | 1771 | dev_kfree_skb(lp->rx_skbuff[i]); |
| 1720 | } | 1772 | } |
| 1721 | 1773 | lp->rx_skbuff[i] = NULL; | |
| 1722 | pcnet32_free_ring(dev); | 1774 | lp->rx_dma_addr[i] = 0; |
| 1723 | 1775 | } | |
| 1724 | /* | 1776 | |
| 1725 | * Switch back to 16bit mode to avoid problems with dumb | 1777 | pcnet32_free_ring(dev); |
| 1726 | * DOS packet driver after a warm reboot | 1778 | |
| 1727 | */ | 1779 | /* |
| 1728 | lp->a.write_bcr (ioaddr, 20, 4); | 1780 | * Switch back to 16bit mode to avoid problems with dumb |
| 1729 | 1781 | * DOS packet driver after a warm reboot | |
| 1730 | err_free_irq: | 1782 | */ |
| 1731 | spin_unlock_irqrestore(&lp->lock, flags); | 1783 | lp->a.write_bcr(ioaddr, 20, 4); |
| 1732 | free_irq(dev->irq, dev); | 1784 | |
| 1733 | return rc; | 1785 | err_free_irq: |
| 1786 | spin_unlock_irqrestore(&lp->lock, flags); | ||
| 1787 | free_irq(dev->irq, dev); | ||
| 1788 | return rc; | ||
| 1734 | } | 1789 | } |
| 1735 | 1790 | ||
| 1736 | /* | 1791 | /* |
| @@ -1746,727 +1801,893 @@ err_free_irq: | |||
| 1746 | * restarting the chip, but I'm too lazy to do so right now. dplatt@3do.com | 1801 | * restarting the chip, but I'm too lazy to do so right now. dplatt@3do.com |
| 1747 | */ | 1802 | */ |
| 1748 | 1803 | ||
| 1749 | static void | 1804 | static void pcnet32_purge_tx_ring(struct net_device *dev) |
| 1750 | pcnet32_purge_tx_ring(struct net_device *dev) | ||
| 1751 | { | 1805 | { |
| 1752 | struct pcnet32_private *lp = dev->priv; | 1806 | struct pcnet32_private *lp = dev->priv; |
| 1753 | int i; | 1807 | int i; |
| 1754 | |||
| 1755 | for (i = 0; i < lp->tx_ring_size; i++) { | ||
| 1756 | lp->tx_ring[i].status = 0; /* CPU owns buffer */ | ||
| 1757 | wmb(); /* Make sure adapter sees owner change */ | ||
| 1758 | if (lp->tx_skbuff[i]) { | ||
| 1759 | pci_unmap_single(lp->pci_dev, lp->tx_dma_addr[i], | ||
| 1760 | lp->tx_skbuff[i]->len, PCI_DMA_TODEVICE); | ||
| 1761 | dev_kfree_skb_any(lp->tx_skbuff[i]); | ||
| 1762 | } | ||
| 1763 | lp->tx_skbuff[i] = NULL; | ||
| 1764 | lp->tx_dma_addr[i] = 0; | ||
| 1765 | } | ||
| 1766 | } | ||
| 1767 | 1808 | ||
| 1809 | for (i = 0; i < lp->tx_ring_size; i++) { | ||
| 1810 | lp->tx_ring[i].status = 0; /* CPU owns buffer */ | ||
| 1811 | wmb(); /* Make sure adapter sees owner change */ | ||
| 1812 | if (lp->tx_skbuff[i]) { | ||
| 1813 | pci_unmap_single(lp->pci_dev, lp->tx_dma_addr[i], | ||
| 1814 | lp->tx_skbuff[i]->len, | ||
| 1815 | PCI_DMA_TODEVICE); | ||
| 1816 | dev_kfree_skb_any(lp->tx_skbuff[i]); | ||
| 1817 | } | ||
| 1818 | lp->tx_skbuff[i] = NULL; | ||
| 1819 | lp->tx_dma_addr[i] = 0; | ||
| 1820 | } | ||
| 1821 | } | ||
| 1768 | 1822 | ||
| 1769 | /* Initialize the PCNET32 Rx and Tx rings. */ | 1823 | /* Initialize the PCNET32 Rx and Tx rings. */ |
| 1770 | static int | 1824 | static int pcnet32_init_ring(struct net_device *dev) |
| 1771 | pcnet32_init_ring(struct net_device *dev) | ||
| 1772 | { | 1825 | { |
| 1773 | struct pcnet32_private *lp = dev->priv; | 1826 | struct pcnet32_private *lp = dev->priv; |
| 1774 | int i; | 1827 | int i; |
| 1775 | 1828 | ||
| 1776 | lp->tx_full = 0; | 1829 | lp->tx_full = 0; |
| 1777 | lp->cur_rx = lp->cur_tx = 0; | 1830 | lp->cur_rx = lp->cur_tx = 0; |
| 1778 | lp->dirty_rx = lp->dirty_tx = 0; | 1831 | lp->dirty_rx = lp->dirty_tx = 0; |
| 1779 | 1832 | ||
| 1780 | for (i = 0; i < lp->rx_ring_size; i++) { | 1833 | for (i = 0; i < lp->rx_ring_size; i++) { |
| 1781 | struct sk_buff *rx_skbuff = lp->rx_skbuff[i]; | 1834 | struct sk_buff *rx_skbuff = lp->rx_skbuff[i]; |
| 1782 | if (rx_skbuff == NULL) { | 1835 | if (rx_skbuff == NULL) { |
| 1783 | if (!(rx_skbuff = lp->rx_skbuff[i] = dev_alloc_skb (PKT_BUF_SZ))) { | 1836 | if (! |
| 1784 | /* there is not much, we can do at this point */ | 1837 | (rx_skbuff = lp->rx_skbuff[i] = |
| 1785 | if (pcnet32_debug & NETIF_MSG_DRV) | 1838 | dev_alloc_skb(PKT_BUF_SZ))) { |
| 1786 | printk(KERN_ERR "%s: pcnet32_init_ring dev_alloc_skb failed.\n", | 1839 | /* there is not much, we can do at this point */ |
| 1787 | dev->name); | 1840 | if (pcnet32_debug & NETIF_MSG_DRV) |
| 1788 | return -1; | 1841 | printk(KERN_ERR |
| 1789 | } | 1842 | "%s: pcnet32_init_ring dev_alloc_skb failed.\n", |
| 1790 | skb_reserve (rx_skbuff, 2); | 1843 | dev->name); |
| 1791 | } | 1844 | return -1; |
| 1792 | 1845 | } | |
| 1793 | rmb(); | 1846 | skb_reserve(rx_skbuff, 2); |
| 1794 | if (lp->rx_dma_addr[i] == 0) | 1847 | } |
| 1795 | lp->rx_dma_addr[i] = pci_map_single(lp->pci_dev, rx_skbuff->data, | 1848 | |
| 1796 | PKT_BUF_SZ-2, PCI_DMA_FROMDEVICE); | 1849 | rmb(); |
| 1797 | lp->rx_ring[i].base = (u32)le32_to_cpu(lp->rx_dma_addr[i]); | 1850 | if (lp->rx_dma_addr[i] == 0) |
| 1798 | lp->rx_ring[i].buf_length = le16_to_cpu(2-PKT_BUF_SZ); | 1851 | lp->rx_dma_addr[i] = |
| 1799 | wmb(); /* Make sure owner changes after all others are visible */ | 1852 | pci_map_single(lp->pci_dev, rx_skbuff->data, |
| 1800 | lp->rx_ring[i].status = le16_to_cpu(0x8000); | 1853 | PKT_BUF_SZ - 2, PCI_DMA_FROMDEVICE); |
| 1801 | } | 1854 | lp->rx_ring[i].base = (u32) le32_to_cpu(lp->rx_dma_addr[i]); |
| 1802 | /* The Tx buffer address is filled in as needed, but we do need to clear | 1855 | lp->rx_ring[i].buf_length = le16_to_cpu(2 - PKT_BUF_SZ); |
| 1803 | * the upper ownership bit. */ | 1856 | wmb(); /* Make sure owner changes after all others are visible */ |
| 1804 | for (i = 0; i < lp->tx_ring_size; i++) { | 1857 | lp->rx_ring[i].status = le16_to_cpu(0x8000); |
| 1805 | lp->tx_ring[i].status = 0; /* CPU owns buffer */ | 1858 | } |
| 1806 | wmb(); /* Make sure adapter sees owner change */ | 1859 | /* The Tx buffer address is filled in as needed, but we do need to clear |
| 1807 | lp->tx_ring[i].base = 0; | 1860 | * the upper ownership bit. */ |
| 1808 | lp->tx_dma_addr[i] = 0; | 1861 | for (i = 0; i < lp->tx_ring_size; i++) { |
| 1809 | } | 1862 | lp->tx_ring[i].status = 0; /* CPU owns buffer */ |
| 1810 | 1863 | wmb(); /* Make sure adapter sees owner change */ | |
| 1811 | lp->init_block.tlen_rlen = le16_to_cpu(lp->tx_len_bits | lp->rx_len_bits); | 1864 | lp->tx_ring[i].base = 0; |
| 1812 | for (i = 0; i < 6; i++) | 1865 | lp->tx_dma_addr[i] = 0; |
| 1813 | lp->init_block.phys_addr[i] = dev->dev_addr[i]; | 1866 | } |
| 1814 | lp->init_block.rx_ring = (u32)le32_to_cpu(lp->rx_ring_dma_addr); | 1867 | |
| 1815 | lp->init_block.tx_ring = (u32)le32_to_cpu(lp->tx_ring_dma_addr); | 1868 | lp->init_block.tlen_rlen = |
| 1816 | wmb(); /* Make sure all changes are visible */ | 1869 | le16_to_cpu(lp->tx_len_bits | lp->rx_len_bits); |
| 1817 | return 0; | 1870 | for (i = 0; i < 6; i++) |
| 1871 | lp->init_block.phys_addr[i] = dev->dev_addr[i]; | ||
| 1872 | lp->init_block.rx_ring = (u32) le32_to_cpu(lp->rx_ring_dma_addr); | ||
| 1873 | lp->init_block.tx_ring = (u32) le32_to_cpu(lp->tx_ring_dma_addr); | ||
| 1874 | wmb(); /* Make sure all changes are visible */ | ||
| 1875 | return 0; | ||
| 1818 | } | 1876 | } |
| 1819 | 1877 | ||
| 1820 | /* the pcnet32 has been issued a stop or reset. Wait for the stop bit | 1878 | /* the pcnet32 has been issued a stop or reset. Wait for the stop bit |
| 1821 | * then flush the pending transmit operations, re-initialize the ring, | 1879 | * then flush the pending transmit operations, re-initialize the ring, |
| 1822 | * and tell the chip to initialize. | 1880 | * and tell the chip to initialize. |
| 1823 | */ | 1881 | */ |
| 1824 | static void | 1882 | static void pcnet32_restart(struct net_device *dev, unsigned int csr0_bits) |
| 1825 | pcnet32_restart(struct net_device *dev, unsigned int csr0_bits) | ||
| 1826 | { | 1883 | { |
| 1827 | struct pcnet32_private *lp = dev->priv; | 1884 | struct pcnet32_private *lp = dev->priv; |
| 1828 | unsigned long ioaddr = dev->base_addr; | 1885 | unsigned long ioaddr = dev->base_addr; |
| 1829 | int i; | 1886 | int i; |
| 1830 | 1887 | ||
| 1831 | /* wait for stop */ | 1888 | /* wait for stop */ |
| 1832 | for (i=0; i<100; i++) | 1889 | for (i = 0; i < 100; i++) |
| 1833 | if (lp->a.read_csr(ioaddr, 0) & 0x0004) | 1890 | if (lp->a.read_csr(ioaddr, 0) & 0x0004) |
| 1834 | break; | 1891 | break; |
| 1835 | 1892 | ||
| 1836 | if (i >= 100 && netif_msg_drv(lp)) | 1893 | if (i >= 100 && netif_msg_drv(lp)) |
| 1837 | printk(KERN_ERR "%s: pcnet32_restart timed out waiting for stop.\n", | 1894 | printk(KERN_ERR |
| 1838 | dev->name); | 1895 | "%s: pcnet32_restart timed out waiting for stop.\n", |
| 1896 | dev->name); | ||
| 1839 | 1897 | ||
| 1840 | pcnet32_purge_tx_ring(dev); | 1898 | pcnet32_purge_tx_ring(dev); |
| 1841 | if (pcnet32_init_ring(dev)) | 1899 | if (pcnet32_init_ring(dev)) |
| 1842 | return; | 1900 | return; |
| 1843 | 1901 | ||
| 1844 | /* ReInit Ring */ | 1902 | /* ReInit Ring */ |
| 1845 | lp->a.write_csr (ioaddr, 0, 1); | 1903 | lp->a.write_csr(ioaddr, 0, 1); |
| 1846 | i = 0; | 1904 | i = 0; |
| 1847 | while (i++ < 1000) | 1905 | while (i++ < 1000) |
| 1848 | if (lp->a.read_csr (ioaddr, 0) & 0x0100) | 1906 | if (lp->a.read_csr(ioaddr, 0) & 0x0100) |
| 1849 | break; | 1907 | break; |
| 1850 | 1908 | ||
| 1851 | lp->a.write_csr (ioaddr, 0, csr0_bits); | 1909 | lp->a.write_csr(ioaddr, 0, csr0_bits); |
| 1852 | } | 1910 | } |
| 1853 | 1911 | ||
| 1854 | 1912 | static void pcnet32_tx_timeout(struct net_device *dev) | |
| 1855 | static void | ||
| 1856 | pcnet32_tx_timeout (struct net_device *dev) | ||
| 1857 | { | 1913 | { |
| 1858 | struct pcnet32_private *lp = dev->priv; | 1914 | struct pcnet32_private *lp = dev->priv; |
| 1859 | unsigned long ioaddr = dev->base_addr, flags; | 1915 | unsigned long ioaddr = dev->base_addr, flags; |
| 1860 | 1916 | ||
| 1861 | spin_lock_irqsave(&lp->lock, flags); | 1917 | spin_lock_irqsave(&lp->lock, flags); |
| 1862 | /* Transmitter timeout, serious problems. */ | 1918 | /* Transmitter timeout, serious problems. */ |
| 1863 | if (pcnet32_debug & NETIF_MSG_DRV) | 1919 | if (pcnet32_debug & NETIF_MSG_DRV) |
| 1864 | printk(KERN_ERR "%s: transmit timed out, status %4.4x, resetting.\n", | 1920 | printk(KERN_ERR |
| 1865 | dev->name, lp->a.read_csr(ioaddr, 0)); | 1921 | "%s: transmit timed out, status %4.4x, resetting.\n", |
| 1866 | lp->a.write_csr (ioaddr, 0, 0x0004); | 1922 | dev->name, lp->a.read_csr(ioaddr, 0)); |
| 1867 | lp->stats.tx_errors++; | 1923 | lp->a.write_csr(ioaddr, 0, 0x0004); |
| 1868 | if (netif_msg_tx_err(lp)) { | 1924 | lp->stats.tx_errors++; |
| 1869 | int i; | 1925 | if (netif_msg_tx_err(lp)) { |
| 1870 | printk(KERN_DEBUG " Ring data dump: dirty_tx %d cur_tx %d%s cur_rx %d.", | 1926 | int i; |
| 1871 | lp->dirty_tx, lp->cur_tx, lp->tx_full ? " (full)" : "", | 1927 | printk(KERN_DEBUG |
| 1872 | lp->cur_rx); | 1928 | " Ring data dump: dirty_tx %d cur_tx %d%s cur_rx %d.", |
| 1873 | for (i = 0 ; i < lp->rx_ring_size; i++) | 1929 | lp->dirty_tx, lp->cur_tx, lp->tx_full ? " (full)" : "", |
| 1874 | printk("%s %08x %04x %08x %04x", i & 1 ? "" : "\n ", | 1930 | lp->cur_rx); |
| 1875 | le32_to_cpu(lp->rx_ring[i].base), | 1931 | for (i = 0; i < lp->rx_ring_size; i++) |
| 1876 | (-le16_to_cpu(lp->rx_ring[i].buf_length)) & 0xffff, | 1932 | printk("%s %08x %04x %08x %04x", i & 1 ? "" : "\n ", |
| 1877 | le32_to_cpu(lp->rx_ring[i].msg_length), | 1933 | le32_to_cpu(lp->rx_ring[i].base), |
| 1878 | le16_to_cpu(lp->rx_ring[i].status)); | 1934 | (-le16_to_cpu(lp->rx_ring[i].buf_length)) & |
| 1879 | for (i = 0 ; i < lp->tx_ring_size; i++) | 1935 | 0xffff, le32_to_cpu(lp->rx_ring[i].msg_length), |
| 1880 | printk("%s %08x %04x %08x %04x", i & 1 ? "" : "\n ", | 1936 | le16_to_cpu(lp->rx_ring[i].status)); |
| 1881 | le32_to_cpu(lp->tx_ring[i].base), | 1937 | for (i = 0; i < lp->tx_ring_size; i++) |
| 1882 | (-le16_to_cpu(lp->tx_ring[i].length)) & 0xffff, | 1938 | printk("%s %08x %04x %08x %04x", i & 1 ? "" : "\n ", |
| 1883 | le32_to_cpu(lp->tx_ring[i].misc), | 1939 | le32_to_cpu(lp->tx_ring[i].base), |
| 1884 | le16_to_cpu(lp->tx_ring[i].status)); | 1940 | (-le16_to_cpu(lp->tx_ring[i].length)) & 0xffff, |
| 1885 | printk("\n"); | 1941 | le32_to_cpu(lp->tx_ring[i].misc), |
| 1886 | } | 1942 | le16_to_cpu(lp->tx_ring[i].status)); |
| 1887 | pcnet32_restart(dev, 0x0042); | 1943 | printk("\n"); |
| 1888 | 1944 | } | |
| 1889 | dev->trans_start = jiffies; | 1945 | pcnet32_restart(dev, 0x0042); |
| 1890 | netif_wake_queue(dev); | 1946 | |
| 1891 | 1947 | dev->trans_start = jiffies; | |
| 1892 | spin_unlock_irqrestore(&lp->lock, flags); | 1948 | netif_wake_queue(dev); |
| 1893 | } | ||
| 1894 | 1949 | ||
| 1950 | spin_unlock_irqrestore(&lp->lock, flags); | ||
| 1951 | } | ||
| 1895 | 1952 | ||
| 1896 | static int | 1953 | static int pcnet32_start_xmit(struct sk_buff *skb, struct net_device *dev) |
| 1897 | pcnet32_start_xmit(struct sk_buff *skb, struct net_device *dev) | ||
| 1898 | { | 1954 | { |
| 1899 | struct pcnet32_private *lp = dev->priv; | 1955 | struct pcnet32_private *lp = dev->priv; |
| 1900 | unsigned long ioaddr = dev->base_addr; | 1956 | unsigned long ioaddr = dev->base_addr; |
| 1901 | u16 status; | 1957 | u16 status; |
| 1902 | int entry; | 1958 | int entry; |
| 1903 | unsigned long flags; | 1959 | unsigned long flags; |
| 1904 | 1960 | ||
| 1905 | spin_lock_irqsave(&lp->lock, flags); | 1961 | spin_lock_irqsave(&lp->lock, flags); |
| 1906 | 1962 | ||
| 1907 | if (netif_msg_tx_queued(lp)) { | 1963 | if (netif_msg_tx_queued(lp)) { |
| 1908 | printk(KERN_DEBUG "%s: pcnet32_start_xmit() called, csr0 %4.4x.\n", | 1964 | printk(KERN_DEBUG |
| 1909 | dev->name, lp->a.read_csr(ioaddr, 0)); | 1965 | "%s: pcnet32_start_xmit() called, csr0 %4.4x.\n", |
| 1910 | } | 1966 | dev->name, lp->a.read_csr(ioaddr, 0)); |
| 1967 | } | ||
| 1911 | 1968 | ||
| 1912 | /* Default status -- will not enable Successful-TxDone | 1969 | /* Default status -- will not enable Successful-TxDone |
| 1913 | * interrupt when that option is available to us. | 1970 | * interrupt when that option is available to us. |
| 1914 | */ | 1971 | */ |
| 1915 | status = 0x8300; | 1972 | status = 0x8300; |
| 1916 | 1973 | ||
| 1917 | /* Fill in a Tx ring entry */ | 1974 | /* Fill in a Tx ring entry */ |
| 1918 | 1975 | ||
| 1919 | /* Mask to ring buffer boundary. */ | 1976 | /* Mask to ring buffer boundary. */ |
| 1920 | entry = lp->cur_tx & lp->tx_mod_mask; | 1977 | entry = lp->cur_tx & lp->tx_mod_mask; |
| 1921 | 1978 | ||
| 1922 | /* Caution: the write order is important here, set the status | 1979 | /* Caution: the write order is important here, set the status |
| 1923 | * with the "ownership" bits last. */ | 1980 | * with the "ownership" bits last. */ |
| 1924 | 1981 | ||
| 1925 | lp->tx_ring[entry].length = le16_to_cpu(-skb->len); | 1982 | lp->tx_ring[entry].length = le16_to_cpu(-skb->len); |
| 1926 | 1983 | ||
| 1927 | lp->tx_ring[entry].misc = 0x00000000; | 1984 | lp->tx_ring[entry].misc = 0x00000000; |
| 1928 | 1985 | ||
| 1929 | lp->tx_skbuff[entry] = skb; | 1986 | lp->tx_skbuff[entry] = skb; |
| 1930 | lp->tx_dma_addr[entry] = pci_map_single(lp->pci_dev, skb->data, skb->len, | 1987 | lp->tx_dma_addr[entry] = |
| 1931 | PCI_DMA_TODEVICE); | 1988 | pci_map_single(lp->pci_dev, skb->data, skb->len, PCI_DMA_TODEVICE); |
| 1932 | lp->tx_ring[entry].base = (u32)le32_to_cpu(lp->tx_dma_addr[entry]); | 1989 | lp->tx_ring[entry].base = (u32) le32_to_cpu(lp->tx_dma_addr[entry]); |
| 1933 | wmb(); /* Make sure owner changes after all others are visible */ | 1990 | wmb(); /* Make sure owner changes after all others are visible */ |
| 1934 | lp->tx_ring[entry].status = le16_to_cpu(status); | 1991 | lp->tx_ring[entry].status = le16_to_cpu(status); |
| 1935 | 1992 | ||
| 1936 | lp->cur_tx++; | 1993 | lp->cur_tx++; |
| 1937 | lp->stats.tx_bytes += skb->len; | 1994 | lp->stats.tx_bytes += skb->len; |
| 1938 | 1995 | ||
| 1939 | /* Trigger an immediate send poll. */ | 1996 | /* Trigger an immediate send poll. */ |
| 1940 | lp->a.write_csr (ioaddr, 0, 0x0048); | 1997 | lp->a.write_csr(ioaddr, 0, 0x0048); |
| 1941 | 1998 | ||
| 1942 | dev->trans_start = jiffies; | 1999 | dev->trans_start = jiffies; |
| 1943 | 2000 | ||
| 1944 | if (lp->tx_ring[(entry+1) & lp->tx_mod_mask].base != 0) { | 2001 | if (lp->tx_ring[(entry + 1) & lp->tx_mod_mask].base != 0) { |
| 1945 | lp->tx_full = 1; | 2002 | lp->tx_full = 1; |
| 1946 | netif_stop_queue(dev); | 2003 | netif_stop_queue(dev); |
| 1947 | } | 2004 | } |
| 1948 | spin_unlock_irqrestore(&lp->lock, flags); | 2005 | spin_unlock_irqrestore(&lp->lock, flags); |
| 1949 | return 0; | 2006 | return 0; |
| 1950 | } | 2007 | } |
| 1951 | 2008 | ||
| 1952 | /* The PCNET32 interrupt handler. */ | 2009 | /* The PCNET32 interrupt handler. */ |
| 1953 | static irqreturn_t | 2010 | static irqreturn_t |
| 1954 | pcnet32_interrupt(int irq, void *dev_id, struct pt_regs * regs) | 2011 | pcnet32_interrupt(int irq, void *dev_id, struct pt_regs *regs) |
| 1955 | { | 2012 | { |
| 1956 | struct net_device *dev = dev_id; | 2013 | struct net_device *dev = dev_id; |
| 1957 | struct pcnet32_private *lp; | 2014 | struct pcnet32_private *lp; |
| 1958 | unsigned long ioaddr; | 2015 | unsigned long ioaddr; |
| 1959 | u16 csr0,rap; | 2016 | u16 csr0, rap; |
| 1960 | int boguscnt = max_interrupt_work; | 2017 | int boguscnt = max_interrupt_work; |
| 1961 | int must_restart; | 2018 | int must_restart; |
| 1962 | 2019 | ||
| 1963 | if (!dev) { | 2020 | if (!dev) { |
| 1964 | if (pcnet32_debug & NETIF_MSG_INTR) | 2021 | if (pcnet32_debug & NETIF_MSG_INTR) |
| 1965 | printk (KERN_DEBUG "%s(): irq %d for unknown device\n", | 2022 | printk(KERN_DEBUG "%s(): irq %d for unknown device\n", |
| 1966 | __FUNCTION__, irq); | 2023 | __FUNCTION__, irq); |
| 1967 | return IRQ_NONE; | 2024 | return IRQ_NONE; |
| 1968 | } | ||
| 1969 | |||
| 1970 | ioaddr = dev->base_addr; | ||
| 1971 | lp = dev->priv; | ||
| 1972 | |||
| 1973 | spin_lock(&lp->lock); | ||
| 1974 | |||
| 1975 | rap = lp->a.read_rap(ioaddr); | ||
| 1976 | while ((csr0 = lp->a.read_csr (ioaddr, 0)) & 0x8f00 && --boguscnt >= 0) { | ||
| 1977 | if (csr0 == 0xffff) { | ||
| 1978 | break; /* PCMCIA remove happened */ | ||
| 1979 | } | 2025 | } |
| 1980 | /* Acknowledge all of the current interrupt sources ASAP. */ | ||
| 1981 | lp->a.write_csr (ioaddr, 0, csr0 & ~0x004f); | ||
| 1982 | 2026 | ||
| 1983 | must_restart = 0; | 2027 | ioaddr = dev->base_addr; |
| 2028 | lp = dev->priv; | ||
| 1984 | 2029 | ||
| 1985 | if (netif_msg_intr(lp)) | 2030 | spin_lock(&lp->lock); |
| 1986 | printk(KERN_DEBUG "%s: interrupt csr0=%#2.2x new csr=%#2.2x.\n", | 2031 | |
| 1987 | dev->name, csr0, lp->a.read_csr (ioaddr, 0)); | 2032 | rap = lp->a.read_rap(ioaddr); |
| 1988 | 2033 | while ((csr0 = lp->a.read_csr(ioaddr, 0)) & 0x8f00 && --boguscnt >= 0) { | |
| 1989 | if (csr0 & 0x0400) /* Rx interrupt */ | 2034 | if (csr0 == 0xffff) { |
| 1990 | pcnet32_rx(dev); | 2035 | break; /* PCMCIA remove happened */ |
| 1991 | 2036 | } | |
| 1992 | if (csr0 & 0x0200) { /* Tx-done interrupt */ | 2037 | /* Acknowledge all of the current interrupt sources ASAP. */ |
| 1993 | unsigned int dirty_tx = lp->dirty_tx; | 2038 | lp->a.write_csr(ioaddr, 0, csr0 & ~0x004f); |
| 1994 | int delta; | 2039 | |
| 1995 | 2040 | must_restart = 0; | |
| 1996 | while (dirty_tx != lp->cur_tx) { | 2041 | |
| 1997 | int entry = dirty_tx & lp->tx_mod_mask; | 2042 | if (netif_msg_intr(lp)) |
| 1998 | int status = (short)le16_to_cpu(lp->tx_ring[entry].status); | 2043 | printk(KERN_DEBUG |
| 1999 | 2044 | "%s: interrupt csr0=%#2.2x new csr=%#2.2x.\n", | |
| 2000 | if (status < 0) | 2045 | dev->name, csr0, lp->a.read_csr(ioaddr, 0)); |
| 2001 | break; /* It still hasn't been Txed */ | 2046 | |
| 2002 | 2047 | if (csr0 & 0x0400) /* Rx interrupt */ | |
| 2003 | lp->tx_ring[entry].base = 0; | 2048 | pcnet32_rx(dev); |
| 2004 | 2049 | ||
| 2005 | if (status & 0x4000) { | 2050 | if (csr0 & 0x0200) { /* Tx-done interrupt */ |
| 2006 | /* There was an major error, log it. */ | 2051 | unsigned int dirty_tx = lp->dirty_tx; |
| 2007 | int err_status = le32_to_cpu(lp->tx_ring[entry].misc); | 2052 | int delta; |
| 2008 | lp->stats.tx_errors++; | 2053 | |
| 2009 | if (netif_msg_tx_err(lp)) | 2054 | while (dirty_tx != lp->cur_tx) { |
| 2010 | printk(KERN_ERR "%s: Tx error status=%04x err_status=%08x\n", | 2055 | int entry = dirty_tx & lp->tx_mod_mask; |
| 2011 | dev->name, status, err_status); | 2056 | int status = |
| 2012 | if (err_status & 0x04000000) lp->stats.tx_aborted_errors++; | 2057 | (short)le16_to_cpu(lp->tx_ring[entry]. |
| 2013 | if (err_status & 0x08000000) lp->stats.tx_carrier_errors++; | 2058 | status); |
| 2014 | if (err_status & 0x10000000) lp->stats.tx_window_errors++; | 2059 | |
| 2060 | if (status < 0) | ||
| 2061 | break; /* It still hasn't been Txed */ | ||
| 2062 | |||
| 2063 | lp->tx_ring[entry].base = 0; | ||
| 2064 | |||
| 2065 | if (status & 0x4000) { | ||
| 2066 | /* There was an major error, log it. */ | ||
| 2067 | int err_status = | ||
| 2068 | le32_to_cpu(lp->tx_ring[entry]. | ||
| 2069 | misc); | ||
| 2070 | lp->stats.tx_errors++; | ||
| 2071 | if (netif_msg_tx_err(lp)) | ||
| 2072 | printk(KERN_ERR | ||
| 2073 | "%s: Tx error status=%04x err_status=%08x\n", | ||
| 2074 | dev->name, status, | ||
| 2075 | err_status); | ||
| 2076 | if (err_status & 0x04000000) | ||
| 2077 | lp->stats.tx_aborted_errors++; | ||
| 2078 | if (err_status & 0x08000000) | ||
| 2079 | lp->stats.tx_carrier_errors++; | ||
| 2080 | if (err_status & 0x10000000) | ||
| 2081 | lp->stats.tx_window_errors++; | ||
| 2015 | #ifndef DO_DXSUFLO | 2082 | #ifndef DO_DXSUFLO |
| 2016 | if (err_status & 0x40000000) { | 2083 | if (err_status & 0x40000000) { |
| 2017 | lp->stats.tx_fifo_errors++; | 2084 | lp->stats.tx_fifo_errors++; |
| 2018 | /* Ackk! On FIFO errors the Tx unit is turned off! */ | 2085 | /* Ackk! On FIFO errors the Tx unit is turned off! */ |
| 2019 | /* Remove this verbosity later! */ | 2086 | /* Remove this verbosity later! */ |
| 2020 | if (netif_msg_tx_err(lp)) | 2087 | if (netif_msg_tx_err(lp)) |
| 2021 | printk(KERN_ERR "%s: Tx FIFO error! CSR0=%4.4x\n", | 2088 | printk(KERN_ERR |
| 2022 | dev->name, csr0); | 2089 | "%s: Tx FIFO error! CSR0=%4.4x\n", |
| 2023 | must_restart = 1; | 2090 | dev->name, csr0); |
| 2024 | } | 2091 | must_restart = 1; |
| 2092 | } | ||
| 2025 | #else | 2093 | #else |
| 2026 | if (err_status & 0x40000000) { | 2094 | if (err_status & 0x40000000) { |
| 2027 | lp->stats.tx_fifo_errors++; | 2095 | lp->stats.tx_fifo_errors++; |
| 2028 | if (! lp->dxsuflo) { /* If controller doesn't recover ... */ | 2096 | if (!lp->dxsuflo) { /* If controller doesn't recover ... */ |
| 2029 | /* Ackk! On FIFO errors the Tx unit is turned off! */ | 2097 | /* Ackk! On FIFO errors the Tx unit is turned off! */ |
| 2030 | /* Remove this verbosity later! */ | 2098 | /* Remove this verbosity later! */ |
| 2031 | if (netif_msg_tx_err(lp)) | 2099 | if (netif_msg_tx_err |
| 2032 | printk(KERN_ERR "%s: Tx FIFO error! CSR0=%4.4x\n", | 2100 | (lp)) |
| 2033 | dev->name, csr0); | 2101 | printk(KERN_ERR |
| 2034 | must_restart = 1; | 2102 | "%s: Tx FIFO error! CSR0=%4.4x\n", |
| 2035 | } | 2103 | dev-> |
| 2036 | } | 2104 | name, |
| 2105 | csr0); | ||
| 2106 | must_restart = 1; | ||
| 2107 | } | ||
| 2108 | } | ||
| 2037 | #endif | 2109 | #endif |
| 2038 | } else { | 2110 | } else { |
| 2039 | if (status & 0x1800) | 2111 | if (status & 0x1800) |
| 2040 | lp->stats.collisions++; | 2112 | lp->stats.collisions++; |
| 2041 | lp->stats.tx_packets++; | 2113 | lp->stats.tx_packets++; |
| 2114 | } | ||
| 2115 | |||
| 2116 | /* We must free the original skb */ | ||
| 2117 | if (lp->tx_skbuff[entry]) { | ||
| 2118 | pci_unmap_single(lp->pci_dev, | ||
| 2119 | lp->tx_dma_addr[entry], | ||
| 2120 | lp->tx_skbuff[entry]-> | ||
| 2121 | len, PCI_DMA_TODEVICE); | ||
| 2122 | dev_kfree_skb_irq(lp->tx_skbuff[entry]); | ||
| 2123 | lp->tx_skbuff[entry] = NULL; | ||
| 2124 | lp->tx_dma_addr[entry] = 0; | ||
| 2125 | } | ||
| 2126 | dirty_tx++; | ||
| 2127 | } | ||
| 2128 | |||
| 2129 | delta = | ||
| 2130 | (lp->cur_tx - dirty_tx) & (lp->tx_mod_mask + | ||
| 2131 | lp->tx_ring_size); | ||
| 2132 | if (delta > lp->tx_ring_size) { | ||
| 2133 | if (netif_msg_drv(lp)) | ||
| 2134 | printk(KERN_ERR | ||
| 2135 | "%s: out-of-sync dirty pointer, %d vs. %d, full=%d.\n", | ||
| 2136 | dev->name, dirty_tx, lp->cur_tx, | ||
| 2137 | lp->tx_full); | ||
| 2138 | dirty_tx += lp->tx_ring_size; | ||
| 2139 | delta -= lp->tx_ring_size; | ||
| 2140 | } | ||
| 2141 | |||
| 2142 | if (lp->tx_full && | ||
| 2143 | netif_queue_stopped(dev) && | ||
| 2144 | delta < lp->tx_ring_size - 2) { | ||
| 2145 | /* The ring is no longer full, clear tbusy. */ | ||
| 2146 | lp->tx_full = 0; | ||
| 2147 | netif_wake_queue(dev); | ||
| 2148 | } | ||
| 2149 | lp->dirty_tx = dirty_tx; | ||
| 2150 | } | ||
| 2151 | |||
| 2152 | /* Log misc errors. */ | ||
| 2153 | if (csr0 & 0x4000) | ||
| 2154 | lp->stats.tx_errors++; /* Tx babble. */ | ||
| 2155 | if (csr0 & 0x1000) { | ||
| 2156 | /* | ||
| 2157 | * this happens when our receive ring is full. This shouldn't | ||
| 2158 | * be a problem as we will see normal rx interrupts for the frames | ||
| 2159 | * in the receive ring. But there are some PCI chipsets (I can | ||
| 2160 | * reproduce this on SP3G with Intel saturn chipset) which have | ||
| 2161 | * sometimes problems and will fill up the receive ring with | ||
| 2162 | * error descriptors. In this situation we don't get a rx | ||
| 2163 | * interrupt, but a missed frame interrupt sooner or later. | ||
| 2164 | * So we try to clean up our receive ring here. | ||
| 2165 | */ | ||
| 2166 | pcnet32_rx(dev); | ||
| 2167 | lp->stats.rx_errors++; /* Missed a Rx frame. */ | ||
| 2168 | } | ||
| 2169 | if (csr0 & 0x0800) { | ||
| 2170 | if (netif_msg_drv(lp)) | ||
| 2171 | printk(KERN_ERR | ||
| 2172 | "%s: Bus master arbitration failure, status %4.4x.\n", | ||
| 2173 | dev->name, csr0); | ||
| 2174 | /* unlike for the lance, there is no restart needed */ | ||
| 2042 | } | 2175 | } |
| 2043 | 2176 | ||
| 2044 | /* We must free the original skb */ | 2177 | if (must_restart) { |
| 2045 | if (lp->tx_skbuff[entry]) { | 2178 | /* reset the chip to clear the error condition, then restart */ |
| 2046 | pci_unmap_single(lp->pci_dev, lp->tx_dma_addr[entry], | 2179 | lp->a.reset(ioaddr); |
| 2047 | lp->tx_skbuff[entry]->len, PCI_DMA_TODEVICE); | 2180 | lp->a.write_csr(ioaddr, 4, 0x0915); |
| 2048 | dev_kfree_skb_irq(lp->tx_skbuff[entry]); | 2181 | pcnet32_restart(dev, 0x0002); |
| 2049 | lp->tx_skbuff[entry] = NULL; | 2182 | netif_wake_queue(dev); |
| 2050 | lp->tx_dma_addr[entry] = 0; | ||
| 2051 | } | 2183 | } |
| 2052 | dirty_tx++; | 2184 | } |
| 2053 | } | 2185 | |
| 2054 | 2186 | /* Set interrupt enable. */ | |
| 2055 | delta = (lp->cur_tx - dirty_tx) & (lp->tx_mod_mask + lp->tx_ring_size); | 2187 | lp->a.write_csr(ioaddr, 0, 0x0040); |
| 2056 | if (delta > lp->tx_ring_size) { | 2188 | lp->a.write_rap(ioaddr, rap); |
| 2057 | if (netif_msg_drv(lp)) | 2189 | |
| 2058 | printk(KERN_ERR "%s: out-of-sync dirty pointer, %d vs. %d, full=%d.\n", | 2190 | if (netif_msg_intr(lp)) |
| 2059 | dev->name, dirty_tx, lp->cur_tx, lp->tx_full); | 2191 | printk(KERN_DEBUG "%s: exiting interrupt, csr0=%#4.4x.\n", |
| 2060 | dirty_tx += lp->tx_ring_size; | 2192 | dev->name, lp->a.read_csr(ioaddr, 0)); |
| 2061 | delta -= lp->tx_ring_size; | 2193 | |
| 2062 | } | 2194 | spin_unlock(&lp->lock); |
| 2063 | 2195 | ||
| 2064 | if (lp->tx_full && | 2196 | return IRQ_HANDLED; |
| 2065 | netif_queue_stopped(dev) && | ||
| 2066 | delta < lp->tx_ring_size - 2) { | ||
| 2067 | /* The ring is no longer full, clear tbusy. */ | ||
| 2068 | lp->tx_full = 0; | ||
| 2069 | netif_wake_queue (dev); | ||
| 2070 | } | ||
| 2071 | lp->dirty_tx = dirty_tx; | ||
| 2072 | } | ||
| 2073 | |||
| 2074 | /* Log misc errors. */ | ||
| 2075 | if (csr0 & 0x4000) lp->stats.tx_errors++; /* Tx babble. */ | ||
| 2076 | if (csr0 & 0x1000) { | ||
| 2077 | /* | ||
| 2078 | * this happens when our receive ring is full. This shouldn't | ||
| 2079 | * be a problem as we will see normal rx interrupts for the frames | ||
| 2080 | * in the receive ring. But there are some PCI chipsets (I can | ||
| 2081 | * reproduce this on SP3G with Intel saturn chipset) which have | ||
| 2082 | * sometimes problems and will fill up the receive ring with | ||
| 2083 | * error descriptors. In this situation we don't get a rx | ||
| 2084 | * interrupt, but a missed frame interrupt sooner or later. | ||
| 2085 | * So we try to clean up our receive ring here. | ||
| 2086 | */ | ||
| 2087 | pcnet32_rx(dev); | ||
| 2088 | lp->stats.rx_errors++; /* Missed a Rx frame. */ | ||
| 2089 | } | ||
| 2090 | if (csr0 & 0x0800) { | ||
| 2091 | if (netif_msg_drv(lp)) | ||
| 2092 | printk(KERN_ERR "%s: Bus master arbitration failure, status %4.4x.\n", | ||
| 2093 | dev->name, csr0); | ||
| 2094 | /* unlike for the lance, there is no restart needed */ | ||
| 2095 | } | ||
| 2096 | |||
| 2097 | if (must_restart) { | ||
| 2098 | /* reset the chip to clear the error condition, then restart */ | ||
| 2099 | lp->a.reset(ioaddr); | ||
| 2100 | lp->a.write_csr(ioaddr, 4, 0x0915); | ||
| 2101 | pcnet32_restart(dev, 0x0002); | ||
| 2102 | netif_wake_queue(dev); | ||
| 2103 | } | ||
| 2104 | } | ||
| 2105 | |||
| 2106 | /* Set interrupt enable. */ | ||
| 2107 | lp->a.write_csr (ioaddr, 0, 0x0040); | ||
| 2108 | lp->a.write_rap (ioaddr,rap); | ||
| 2109 | |||
| 2110 | if (netif_msg_intr(lp)) | ||
| 2111 | printk(KERN_DEBUG "%s: exiting interrupt, csr0=%#4.4x.\n", | ||
| 2112 | dev->name, lp->a.read_csr (ioaddr, 0)); | ||
| 2113 | |||
| 2114 | spin_unlock(&lp->lock); | ||
| 2115 | |||
| 2116 | return IRQ_HANDLED; | ||
| 2117 | } | 2197 | } |
| 2118 | 2198 | ||
| 2119 | static int | 2199 | static int pcnet32_rx(struct net_device *dev) |
| 2120 | pcnet32_rx(struct net_device *dev) | ||
| 2121 | { | 2200 | { |
| 2122 | struct pcnet32_private *lp = dev->priv; | 2201 | struct pcnet32_private *lp = dev->priv; |
| 2123 | int entry = lp->cur_rx & lp->rx_mod_mask; | 2202 | int entry = lp->cur_rx & lp->rx_mod_mask; |
| 2124 | int boguscnt = lp->rx_ring_size / 2; | 2203 | int boguscnt = lp->rx_ring_size / 2; |
| 2125 | 2204 | ||
| 2126 | /* If we own the next entry, it's a new packet. Send it up. */ | 2205 | /* If we own the next entry, it's a new packet. Send it up. */ |
| 2127 | while ((short)le16_to_cpu(lp->rx_ring[entry].status) >= 0) { | 2206 | while ((short)le16_to_cpu(lp->rx_ring[entry].status) >= 0) { |
| 2128 | int status = (short)le16_to_cpu(lp->rx_ring[entry].status) >> 8; | 2207 | int status = (short)le16_to_cpu(lp->rx_ring[entry].status) >> 8; |
| 2129 | 2208 | ||
| 2130 | if (status != 0x03) { /* There was an error. */ | 2209 | if (status != 0x03) { /* There was an error. */ |
| 2131 | /* | 2210 | /* |
| 2132 | * There is a tricky error noted by John Murphy, | 2211 | * There is a tricky error noted by John Murphy, |
| 2133 | * <murf@perftech.com> to Russ Nelson: Even with full-sized | 2212 | * <murf@perftech.com> to Russ Nelson: Even with full-sized |
| 2134 | * buffers it's possible for a jabber packet to use two | 2213 | * buffers it's possible for a jabber packet to use two |
| 2135 | * buffers, with only the last correctly noting the error. | 2214 | * buffers, with only the last correctly noting the error. |
| 2136 | */ | 2215 | */ |
| 2137 | if (status & 0x01) /* Only count a general error at the */ | 2216 | if (status & 0x01) /* Only count a general error at the */ |
| 2138 | lp->stats.rx_errors++; /* end of a packet.*/ | 2217 | lp->stats.rx_errors++; /* end of a packet. */ |
| 2139 | if (status & 0x20) lp->stats.rx_frame_errors++; | 2218 | if (status & 0x20) |
| 2140 | if (status & 0x10) lp->stats.rx_over_errors++; | 2219 | lp->stats.rx_frame_errors++; |
| 2141 | if (status & 0x08) lp->stats.rx_crc_errors++; | 2220 | if (status & 0x10) |
| 2142 | if (status & 0x04) lp->stats.rx_fifo_errors++; | 2221 | lp->stats.rx_over_errors++; |
| 2143 | lp->rx_ring[entry].status &= le16_to_cpu(0x03ff); | 2222 | if (status & 0x08) |
| 2144 | } else { | 2223 | lp->stats.rx_crc_errors++; |
| 2145 | /* Malloc up new buffer, compatible with net-2e. */ | 2224 | if (status & 0x04) |
| 2146 | short pkt_len = (le32_to_cpu(lp->rx_ring[entry].msg_length) & 0xfff)-4; | 2225 | lp->stats.rx_fifo_errors++; |
| 2147 | struct sk_buff *skb; | 2226 | lp->rx_ring[entry].status &= le16_to_cpu(0x03ff); |
| 2148 | |||
| 2149 | /* Discard oversize frames. */ | ||
| 2150 | if (unlikely(pkt_len > PKT_BUF_SZ - 2)) { | ||
| 2151 | if (netif_msg_drv(lp)) | ||
| 2152 | printk(KERN_ERR "%s: Impossible packet size %d!\n", | ||
| 2153 | dev->name, pkt_len); | ||
| 2154 | lp->stats.rx_errors++; | ||
| 2155 | } else if (pkt_len < 60) { | ||
| 2156 | if (netif_msg_rx_err(lp)) | ||
| 2157 | printk(KERN_ERR "%s: Runt packet!\n", dev->name); | ||
| 2158 | lp->stats.rx_errors++; | ||
| 2159 | } else { | ||
| 2160 | int rx_in_place = 0; | ||
| 2161 | |||
| 2162 | if (pkt_len > rx_copybreak) { | ||
| 2163 | struct sk_buff *newskb; | ||
| 2164 | |||
| 2165 | if ((newskb = dev_alloc_skb(PKT_BUF_SZ))) { | ||
| 2166 | skb_reserve (newskb, 2); | ||
| 2167 | skb = lp->rx_skbuff[entry]; | ||
| 2168 | pci_unmap_single(lp->pci_dev, lp->rx_dma_addr[entry], | ||
| 2169 | PKT_BUF_SZ-2, PCI_DMA_FROMDEVICE); | ||
| 2170 | skb_put (skb, pkt_len); | ||
| 2171 | lp->rx_skbuff[entry] = newskb; | ||
| 2172 | newskb->dev = dev; | ||
| 2173 | lp->rx_dma_addr[entry] = | ||
| 2174 | pci_map_single(lp->pci_dev, newskb->data, | ||
| 2175 | PKT_BUF_SZ-2, PCI_DMA_FROMDEVICE); | ||
| 2176 | lp->rx_ring[entry].base = le32_to_cpu(lp->rx_dma_addr[entry]); | ||
| 2177 | rx_in_place = 1; | ||
| 2178 | } else | ||
| 2179 | skb = NULL; | ||
| 2180 | } else { | 2227 | } else { |
| 2181 | skb = dev_alloc_skb(pkt_len+2); | 2228 | /* Malloc up new buffer, compatible with net-2e. */ |
| 2182 | } | 2229 | short pkt_len = |
| 2183 | 2230 | (le32_to_cpu(lp->rx_ring[entry].msg_length) & 0xfff) | |
| 2184 | if (skb == NULL) { | 2231 | - 4; |
| 2185 | int i; | 2232 | struct sk_buff *skb; |
| 2186 | if (netif_msg_drv(lp)) | 2233 | |
| 2187 | printk(KERN_ERR "%s: Memory squeeze, deferring packet.\n", | 2234 | /* Discard oversize frames. */ |
| 2188 | dev->name); | 2235 | if (unlikely(pkt_len > PKT_BUF_SZ - 2)) { |
| 2189 | for (i = 0; i < lp->rx_ring_size; i++) | 2236 | if (netif_msg_drv(lp)) |
| 2190 | if ((short)le16_to_cpu(lp->rx_ring[(entry+i) | 2237 | printk(KERN_ERR |
| 2191 | & lp->rx_mod_mask].status) < 0) | 2238 | "%s: Impossible packet size %d!\n", |
| 2192 | break; | 2239 | dev->name, pkt_len); |
| 2193 | 2240 | lp->stats.rx_errors++; | |
| 2194 | if (i > lp->rx_ring_size -2) { | 2241 | } else if (pkt_len < 60) { |
| 2195 | lp->stats.rx_dropped++; | 2242 | if (netif_msg_rx_err(lp)) |
| 2196 | lp->rx_ring[entry].status |= le16_to_cpu(0x8000); | 2243 | printk(KERN_ERR "%s: Runt packet!\n", |
| 2197 | wmb(); /* Make sure adapter sees owner change */ | 2244 | dev->name); |
| 2198 | lp->cur_rx++; | 2245 | lp->stats.rx_errors++; |
| 2199 | } | 2246 | } else { |
| 2200 | break; | 2247 | int rx_in_place = 0; |
| 2201 | } | 2248 | |
| 2202 | skb->dev = dev; | 2249 | if (pkt_len > rx_copybreak) { |
| 2203 | if (!rx_in_place) { | 2250 | struct sk_buff *newskb; |
| 2204 | skb_reserve(skb,2); /* 16 byte align */ | 2251 | |
| 2205 | skb_put(skb,pkt_len); /* Make room */ | 2252 | if ((newskb = |
| 2206 | pci_dma_sync_single_for_cpu(lp->pci_dev, | 2253 | dev_alloc_skb(PKT_BUF_SZ))) { |
| 2207 | lp->rx_dma_addr[entry], | 2254 | skb_reserve(newskb, 2); |
| 2208 | PKT_BUF_SZ-2, | 2255 | skb = lp->rx_skbuff[entry]; |
| 2209 | PCI_DMA_FROMDEVICE); | 2256 | pci_unmap_single(lp->pci_dev, |
| 2210 | eth_copy_and_sum(skb, | 2257 | lp-> |
| 2211 | (unsigned char *)(lp->rx_skbuff[entry]->data), | 2258 | rx_dma_addr |
| 2212 | pkt_len,0); | 2259 | [entry], |
| 2213 | pci_dma_sync_single_for_device(lp->pci_dev, | 2260 | PKT_BUF_SZ - 2, |
| 2214 | lp->rx_dma_addr[entry], | 2261 | PCI_DMA_FROMDEVICE); |
| 2215 | PKT_BUF_SZ-2, | 2262 | skb_put(skb, pkt_len); |
| 2216 | PCI_DMA_FROMDEVICE); | 2263 | lp->rx_skbuff[entry] = newskb; |
| 2264 | newskb->dev = dev; | ||
| 2265 | lp->rx_dma_addr[entry] = | ||
| 2266 | pci_map_single(lp->pci_dev, | ||
| 2267 | newskb->data, | ||
| 2268 | PKT_BUF_SZ - | ||
| 2269 | 2, | ||
| 2270 | PCI_DMA_FROMDEVICE); | ||
| 2271 | lp->rx_ring[entry].base = | ||
| 2272 | le32_to_cpu(lp-> | ||
| 2273 | rx_dma_addr | ||
| 2274 | [entry]); | ||
| 2275 | rx_in_place = 1; | ||
| 2276 | } else | ||
| 2277 | skb = NULL; | ||
| 2278 | } else { | ||
| 2279 | skb = dev_alloc_skb(pkt_len + 2); | ||
| 2280 | } | ||
| 2281 | |||
| 2282 | if (skb == NULL) { | ||
| 2283 | int i; | ||
| 2284 | if (netif_msg_drv(lp)) | ||
| 2285 | printk(KERN_ERR | ||
| 2286 | "%s: Memory squeeze, deferring packet.\n", | ||
| 2287 | dev->name); | ||
| 2288 | for (i = 0; i < lp->rx_ring_size; i++) | ||
| 2289 | if ((short) | ||
| 2290 | le16_to_cpu(lp-> | ||
| 2291 | rx_ring[(entry + | ||
| 2292 | i) | ||
| 2293 | & lp-> | ||
| 2294 | rx_mod_mask]. | ||
| 2295 | status) < 0) | ||
| 2296 | break; | ||
| 2297 | |||
| 2298 | if (i > lp->rx_ring_size - 2) { | ||
| 2299 | lp->stats.rx_dropped++; | ||
| 2300 | lp->rx_ring[entry].status |= | ||
| 2301 | le16_to_cpu(0x8000); | ||
| 2302 | wmb(); /* Make sure adapter sees owner change */ | ||
| 2303 | lp->cur_rx++; | ||
| 2304 | } | ||
| 2305 | break; | ||
| 2306 | } | ||
| 2307 | skb->dev = dev; | ||
| 2308 | if (!rx_in_place) { | ||
| 2309 | skb_reserve(skb, 2); /* 16 byte align */ | ||
| 2310 | skb_put(skb, pkt_len); /* Make room */ | ||
| 2311 | pci_dma_sync_single_for_cpu(lp->pci_dev, | ||
| 2312 | lp-> | ||
| 2313 | rx_dma_addr | ||
| 2314 | [entry], | ||
| 2315 | PKT_BUF_SZ - | ||
| 2316 | 2, | ||
| 2317 | PCI_DMA_FROMDEVICE); | ||
| 2318 | eth_copy_and_sum(skb, | ||
| 2319 | (unsigned char *)(lp-> | ||
| 2320 | rx_skbuff | ||
| 2321 | [entry]-> | ||
| 2322 | data), | ||
| 2323 | pkt_len, 0); | ||
| 2324 | pci_dma_sync_single_for_device(lp-> | ||
| 2325 | pci_dev, | ||
| 2326 | lp-> | ||
| 2327 | rx_dma_addr | ||
| 2328 | [entry], | ||
| 2329 | PKT_BUF_SZ | ||
| 2330 | - 2, | ||
| 2331 | PCI_DMA_FROMDEVICE); | ||
| 2332 | } | ||
| 2333 | lp->stats.rx_bytes += skb->len; | ||
| 2334 | skb->protocol = eth_type_trans(skb, dev); | ||
| 2335 | netif_rx(skb); | ||
| 2336 | dev->last_rx = jiffies; | ||
| 2337 | lp->stats.rx_packets++; | ||
| 2338 | } | ||
| 2217 | } | 2339 | } |
| 2218 | lp->stats.rx_bytes += skb->len; | 2340 | /* |
| 2219 | skb->protocol=eth_type_trans(skb,dev); | 2341 | * The docs say that the buffer length isn't touched, but Andrew Boyd |
| 2220 | netif_rx(skb); | 2342 | * of QNX reports that some revs of the 79C965 clear it. |
| 2221 | dev->last_rx = jiffies; | 2343 | */ |
| 2222 | lp->stats.rx_packets++; | 2344 | lp->rx_ring[entry].buf_length = le16_to_cpu(2 - PKT_BUF_SZ); |
| 2223 | } | 2345 | wmb(); /* Make sure owner changes after all others are visible */ |
| 2346 | lp->rx_ring[entry].status |= le16_to_cpu(0x8000); | ||
| 2347 | entry = (++lp->cur_rx) & lp->rx_mod_mask; | ||
| 2348 | if (--boguscnt <= 0) | ||
| 2349 | break; /* don't stay in loop forever */ | ||
| 2224 | } | 2350 | } |
| 2225 | /* | 2351 | |
| 2226 | * The docs say that the buffer length isn't touched, but Andrew Boyd | 2352 | return 0; |
| 2227 | * of QNX reports that some revs of the 79C965 clear it. | ||
| 2228 | */ | ||
| 2229 | lp->rx_ring[entry].buf_length = le16_to_cpu(2-PKT_BUF_SZ); | ||
| 2230 | wmb(); /* Make sure owner changes after all others are visible */ | ||
| 2231 | lp->rx_ring[entry].status |= le16_to_cpu(0x8000); | ||
| 2232 | entry = (++lp->cur_rx) & lp->rx_mod_mask; | ||
| 2233 | if (--boguscnt <= 0) break; /* don't stay in loop forever */ | ||
| 2234 | } | ||
| 2235 | |||
| 2236 | return 0; | ||
| 2237 | } | 2353 | } |
| 2238 | 2354 | ||
| 2239 | static int | 2355 | static int pcnet32_close(struct net_device *dev) |
| 2240 | pcnet32_close(struct net_device *dev) | ||
| 2241 | { | 2356 | { |
| 2242 | unsigned long ioaddr = dev->base_addr; | 2357 | unsigned long ioaddr = dev->base_addr; |
| 2243 | struct pcnet32_private *lp = dev->priv; | 2358 | struct pcnet32_private *lp = dev->priv; |
| 2244 | int i; | 2359 | int i; |
| 2245 | unsigned long flags; | 2360 | unsigned long flags; |
| 2246 | 2361 | ||
| 2247 | del_timer_sync(&lp->watchdog_timer); | 2362 | del_timer_sync(&lp->watchdog_timer); |
| 2248 | 2363 | ||
| 2249 | netif_stop_queue(dev); | 2364 | netif_stop_queue(dev); |
| 2250 | 2365 | ||
| 2251 | spin_lock_irqsave(&lp->lock, flags); | 2366 | spin_lock_irqsave(&lp->lock, flags); |
| 2252 | 2367 | ||
| 2253 | lp->stats.rx_missed_errors = lp->a.read_csr (ioaddr, 112); | 2368 | lp->stats.rx_missed_errors = lp->a.read_csr(ioaddr, 112); |
| 2254 | 2369 | ||
| 2255 | if (netif_msg_ifdown(lp)) | 2370 | if (netif_msg_ifdown(lp)) |
| 2256 | printk(KERN_DEBUG "%s: Shutting down ethercard, status was %2.2x.\n", | 2371 | printk(KERN_DEBUG |
| 2257 | dev->name, lp->a.read_csr (ioaddr, 0)); | 2372 | "%s: Shutting down ethercard, status was %2.2x.\n", |
| 2373 | dev->name, lp->a.read_csr(ioaddr, 0)); | ||
| 2258 | 2374 | ||
| 2259 | /* We stop the PCNET32 here -- it occasionally polls memory if we don't. */ | 2375 | /* We stop the PCNET32 here -- it occasionally polls memory if we don't. */ |
| 2260 | lp->a.write_csr (ioaddr, 0, 0x0004); | 2376 | lp->a.write_csr(ioaddr, 0, 0x0004); |
| 2261 | 2377 | ||
| 2262 | /* | 2378 | /* |
| 2263 | * Switch back to 16bit mode to avoid problems with dumb | 2379 | * Switch back to 16bit mode to avoid problems with dumb |
| 2264 | * DOS packet driver after a warm reboot | 2380 | * DOS packet driver after a warm reboot |
| 2265 | */ | 2381 | */ |
| 2266 | lp->a.write_bcr (ioaddr, 20, 4); | 2382 | lp->a.write_bcr(ioaddr, 20, 4); |
| 2267 | 2383 | ||
| 2268 | spin_unlock_irqrestore(&lp->lock, flags); | 2384 | spin_unlock_irqrestore(&lp->lock, flags); |
| 2269 | 2385 | ||
| 2270 | free_irq(dev->irq, dev); | 2386 | free_irq(dev->irq, dev); |
| 2271 | 2387 | ||
| 2272 | spin_lock_irqsave(&lp->lock, flags); | 2388 | spin_lock_irqsave(&lp->lock, flags); |
| 2273 | 2389 | ||
| 2274 | /* free all allocated skbuffs */ | 2390 | /* free all allocated skbuffs */ |
| 2275 | for (i = 0; i < lp->rx_ring_size; i++) { | 2391 | for (i = 0; i < lp->rx_ring_size; i++) { |
| 2276 | lp->rx_ring[i].status = 0; | 2392 | lp->rx_ring[i].status = 0; |
| 2277 | wmb(); /* Make sure adapter sees owner change */ | 2393 | wmb(); /* Make sure adapter sees owner change */ |
| 2278 | if (lp->rx_skbuff[i]) { | 2394 | if (lp->rx_skbuff[i]) { |
| 2279 | pci_unmap_single(lp->pci_dev, lp->rx_dma_addr[i], PKT_BUF_SZ-2, | 2395 | pci_unmap_single(lp->pci_dev, lp->rx_dma_addr[i], |
| 2280 | PCI_DMA_FROMDEVICE); | 2396 | PKT_BUF_SZ - 2, PCI_DMA_FROMDEVICE); |
| 2281 | dev_kfree_skb(lp->rx_skbuff[i]); | 2397 | dev_kfree_skb(lp->rx_skbuff[i]); |
| 2398 | } | ||
| 2399 | lp->rx_skbuff[i] = NULL; | ||
| 2400 | lp->rx_dma_addr[i] = 0; | ||
| 2282 | } | 2401 | } |
| 2283 | lp->rx_skbuff[i] = NULL; | ||
| 2284 | lp->rx_dma_addr[i] = 0; | ||
| 2285 | } | ||
| 2286 | 2402 | ||
| 2287 | for (i = 0; i < lp->tx_ring_size; i++) { | 2403 | for (i = 0; i < lp->tx_ring_size; i++) { |
| 2288 | lp->tx_ring[i].status = 0; /* CPU owns buffer */ | 2404 | lp->tx_ring[i].status = 0; /* CPU owns buffer */ |
| 2289 | wmb(); /* Make sure adapter sees owner change */ | 2405 | wmb(); /* Make sure adapter sees owner change */ |
| 2290 | if (lp->tx_skbuff[i]) { | 2406 | if (lp->tx_skbuff[i]) { |
| 2291 | pci_unmap_single(lp->pci_dev, lp->tx_dma_addr[i], | 2407 | pci_unmap_single(lp->pci_dev, lp->tx_dma_addr[i], |
| 2292 | lp->tx_skbuff[i]->len, PCI_DMA_TODEVICE); | 2408 | lp->tx_skbuff[i]->len, |
| 2293 | dev_kfree_skb(lp->tx_skbuff[i]); | 2409 | PCI_DMA_TODEVICE); |
| 2410 | dev_kfree_skb(lp->tx_skbuff[i]); | ||
| 2411 | } | ||
| 2412 | lp->tx_skbuff[i] = NULL; | ||
| 2413 | lp->tx_dma_addr[i] = 0; | ||
| 2294 | } | 2414 | } |
| 2295 | lp->tx_skbuff[i] = NULL; | ||
| 2296 | lp->tx_dma_addr[i] = 0; | ||
| 2297 | } | ||
| 2298 | 2415 | ||
| 2299 | spin_unlock_irqrestore(&lp->lock, flags); | 2416 | spin_unlock_irqrestore(&lp->lock, flags); |
| 2300 | 2417 | ||
| 2301 | return 0; | 2418 | return 0; |
| 2302 | } | 2419 | } |
| 2303 | 2420 | ||
| 2304 | static struct net_device_stats * | 2421 | static struct net_device_stats *pcnet32_get_stats(struct net_device *dev) |
| 2305 | pcnet32_get_stats(struct net_device *dev) | ||
| 2306 | { | 2422 | { |
| 2307 | struct pcnet32_private *lp = dev->priv; | 2423 | struct pcnet32_private *lp = dev->priv; |
| 2308 | unsigned long ioaddr = dev->base_addr; | 2424 | unsigned long ioaddr = dev->base_addr; |
| 2309 | u16 saved_addr; | 2425 | u16 saved_addr; |
| 2310 | unsigned long flags; | 2426 | unsigned long flags; |
| 2311 | 2427 | ||
| 2312 | spin_lock_irqsave(&lp->lock, flags); | 2428 | spin_lock_irqsave(&lp->lock, flags); |
| 2313 | saved_addr = lp->a.read_rap(ioaddr); | 2429 | saved_addr = lp->a.read_rap(ioaddr); |
| 2314 | lp->stats.rx_missed_errors = lp->a.read_csr (ioaddr, 112); | 2430 | lp->stats.rx_missed_errors = lp->a.read_csr(ioaddr, 112); |
| 2315 | lp->a.write_rap(ioaddr, saved_addr); | 2431 | lp->a.write_rap(ioaddr, saved_addr); |
| 2316 | spin_unlock_irqrestore(&lp->lock, flags); | 2432 | spin_unlock_irqrestore(&lp->lock, flags); |
| 2317 | 2433 | ||
| 2318 | return &lp->stats; | 2434 | return &lp->stats; |
| 2319 | } | 2435 | } |
| 2320 | 2436 | ||
| 2321 | /* taken from the sunlance driver, which it took from the depca driver */ | 2437 | /* taken from the sunlance driver, which it took from the depca driver */ |
| 2322 | static void pcnet32_load_multicast (struct net_device *dev) | 2438 | static void pcnet32_load_multicast(struct net_device *dev) |
| 2323 | { | 2439 | { |
| 2324 | struct pcnet32_private *lp = dev->priv; | 2440 | struct pcnet32_private *lp = dev->priv; |
| 2325 | volatile struct pcnet32_init_block *ib = &lp->init_block; | 2441 | volatile struct pcnet32_init_block *ib = &lp->init_block; |
| 2326 | volatile u16 *mcast_table = (u16 *)&ib->filter; | 2442 | volatile u16 *mcast_table = (u16 *) & ib->filter; |
| 2327 | struct dev_mc_list *dmi=dev->mc_list; | 2443 | struct dev_mc_list *dmi = dev->mc_list; |
| 2328 | char *addrs; | 2444 | char *addrs; |
| 2329 | int i; | 2445 | int i; |
| 2330 | u32 crc; | 2446 | u32 crc; |
| 2331 | 2447 | ||
| 2332 | /* set all multicast bits */ | 2448 | /* set all multicast bits */ |
| 2333 | if (dev->flags & IFF_ALLMULTI) { | 2449 | if (dev->flags & IFF_ALLMULTI) { |
| 2334 | ib->filter[0] = 0xffffffff; | 2450 | ib->filter[0] = 0xffffffff; |
| 2335 | ib->filter[1] = 0xffffffff; | 2451 | ib->filter[1] = 0xffffffff; |
| 2452 | return; | ||
| 2453 | } | ||
| 2454 | /* clear the multicast filter */ | ||
| 2455 | ib->filter[0] = 0; | ||
| 2456 | ib->filter[1] = 0; | ||
| 2457 | |||
| 2458 | /* Add addresses */ | ||
| 2459 | for (i = 0; i < dev->mc_count; i++) { | ||
| 2460 | addrs = dmi->dmi_addr; | ||
| 2461 | dmi = dmi->next; | ||
| 2462 | |||
| 2463 | /* multicast address? */ | ||
| 2464 | if (!(*addrs & 1)) | ||
| 2465 | continue; | ||
| 2466 | |||
| 2467 | crc = ether_crc_le(6, addrs); | ||
| 2468 | crc = crc >> 26; | ||
| 2469 | mcast_table[crc >> 4] = | ||
| 2470 | le16_to_cpu(le16_to_cpu(mcast_table[crc >> 4]) | | ||
| 2471 | (1 << (crc & 0xf))); | ||
| 2472 | } | ||
| 2336 | return; | 2473 | return; |
| 2337 | } | ||
| 2338 | /* clear the multicast filter */ | ||
| 2339 | ib->filter[0] = 0; | ||
| 2340 | ib->filter[1] = 0; | ||
| 2341 | |||
| 2342 | /* Add addresses */ | ||
| 2343 | for (i = 0; i < dev->mc_count; i++) { | ||
| 2344 | addrs = dmi->dmi_addr; | ||
| 2345 | dmi = dmi->next; | ||
| 2346 | |||
| 2347 | /* multicast address? */ | ||
| 2348 | if (!(*addrs & 1)) | ||
| 2349 | continue; | ||
| 2350 | |||
| 2351 | crc = ether_crc_le(6, addrs); | ||
| 2352 | crc = crc >> 26; | ||
| 2353 | mcast_table [crc >> 4] = le16_to_cpu( | ||
| 2354 | le16_to_cpu(mcast_table [crc >> 4]) | (1 << (crc & 0xf))); | ||
| 2355 | } | ||
| 2356 | return; | ||
| 2357 | } | 2474 | } |
| 2358 | 2475 | ||
| 2359 | |||
| 2360 | /* | 2476 | /* |
| 2361 | * Set or clear the multicast filter for this adaptor. | 2477 | * Set or clear the multicast filter for this adaptor. |
| 2362 | */ | 2478 | */ |
| 2363 | static void pcnet32_set_multicast_list(struct net_device *dev) | 2479 | static void pcnet32_set_multicast_list(struct net_device *dev) |
| 2364 | { | 2480 | { |
| 2365 | unsigned long ioaddr = dev->base_addr, flags; | 2481 | unsigned long ioaddr = dev->base_addr, flags; |
| 2366 | struct pcnet32_private *lp = dev->priv; | 2482 | struct pcnet32_private *lp = dev->priv; |
| 2367 | 2483 | ||
| 2368 | spin_lock_irqsave(&lp->lock, flags); | 2484 | spin_lock_irqsave(&lp->lock, flags); |
| 2369 | if (dev->flags&IFF_PROMISC) { | 2485 | if (dev->flags & IFF_PROMISC) { |
| 2370 | /* Log any net taps. */ | 2486 | /* Log any net taps. */ |
| 2371 | if (netif_msg_hw(lp)) | 2487 | if (netif_msg_hw(lp)) |
| 2372 | printk(KERN_INFO "%s: Promiscuous mode enabled.\n", dev->name); | 2488 | printk(KERN_INFO "%s: Promiscuous mode enabled.\n", |
| 2373 | lp->init_block.mode = le16_to_cpu(0x8000 | (lp->options & PCNET32_PORT_PORTSEL) << 7); | 2489 | dev->name); |
| 2374 | } else { | 2490 | lp->init_block.mode = |
| 2375 | lp->init_block.mode = le16_to_cpu((lp->options & PCNET32_PORT_PORTSEL) << 7); | 2491 | le16_to_cpu(0x8000 | (lp->options & PCNET32_PORT_PORTSEL) << |
| 2376 | pcnet32_load_multicast (dev); | 2492 | 7); |
| 2377 | } | 2493 | } else { |
| 2378 | 2494 | lp->init_block.mode = | |
| 2379 | lp->a.write_csr (ioaddr, 0, 0x0004); /* Temporarily stop the lance. */ | 2495 | le16_to_cpu((lp->options & PCNET32_PORT_PORTSEL) << 7); |
| 2380 | pcnet32_restart(dev, 0x0042); /* Resume normal operation */ | 2496 | pcnet32_load_multicast(dev); |
| 2381 | netif_wake_queue(dev); | 2497 | } |
| 2382 | 2498 | ||
| 2383 | spin_unlock_irqrestore(&lp->lock, flags); | 2499 | lp->a.write_csr(ioaddr, 0, 0x0004); /* Temporarily stop the lance. */ |
| 2500 | pcnet32_restart(dev, 0x0042); /* Resume normal operation */ | ||
| 2501 | netif_wake_queue(dev); | ||
| 2502 | |||
| 2503 | spin_unlock_irqrestore(&lp->lock, flags); | ||
| 2384 | } | 2504 | } |
| 2385 | 2505 | ||
| 2386 | /* This routine assumes that the lp->lock is held */ | 2506 | /* This routine assumes that the lp->lock is held */ |
| 2387 | static int mdio_read(struct net_device *dev, int phy_id, int reg_num) | 2507 | static int mdio_read(struct net_device *dev, int phy_id, int reg_num) |
| 2388 | { | 2508 | { |
| 2389 | struct pcnet32_private *lp = dev->priv; | 2509 | struct pcnet32_private *lp = dev->priv; |
| 2390 | unsigned long ioaddr = dev->base_addr; | 2510 | unsigned long ioaddr = dev->base_addr; |
| 2391 | u16 val_out; | 2511 | u16 val_out; |
| 2392 | 2512 | ||
| 2393 | if (!lp->mii) | 2513 | if (!lp->mii) |
| 2394 | return 0; | 2514 | return 0; |
| 2395 | 2515 | ||
| 2396 | lp->a.write_bcr(ioaddr, 33, ((phy_id & 0x1f) << 5) | (reg_num & 0x1f)); | 2516 | lp->a.write_bcr(ioaddr, 33, ((phy_id & 0x1f) << 5) | (reg_num & 0x1f)); |
| 2397 | val_out = lp->a.read_bcr(ioaddr, 34); | 2517 | val_out = lp->a.read_bcr(ioaddr, 34); |
| 2398 | 2518 | ||
| 2399 | return val_out; | 2519 | return val_out; |
| 2400 | } | 2520 | } |
| 2401 | 2521 | ||
| 2402 | /* This routine assumes that the lp->lock is held */ | 2522 | /* This routine assumes that the lp->lock is held */ |
| 2403 | static void mdio_write(struct net_device *dev, int phy_id, int reg_num, int val) | 2523 | static void mdio_write(struct net_device *dev, int phy_id, int reg_num, int val) |
| 2404 | { | 2524 | { |
| 2405 | struct pcnet32_private *lp = dev->priv; | 2525 | struct pcnet32_private *lp = dev->priv; |
| 2406 | unsigned long ioaddr = dev->base_addr; | 2526 | unsigned long ioaddr = dev->base_addr; |
| 2407 | 2527 | ||
| 2408 | if (!lp->mii) | 2528 | if (!lp->mii) |
| 2409 | return; | 2529 | return; |
| 2410 | 2530 | ||
| 2411 | lp->a.write_bcr(ioaddr, 33, ((phy_id & 0x1f) << 5) | (reg_num & 0x1f)); | 2531 | lp->a.write_bcr(ioaddr, 33, ((phy_id & 0x1f) << 5) | (reg_num & 0x1f)); |
| 2412 | lp->a.write_bcr(ioaddr, 34, val); | 2532 | lp->a.write_bcr(ioaddr, 34, val); |
| 2413 | } | 2533 | } |
| 2414 | 2534 | ||
| 2415 | static int pcnet32_ioctl(struct net_device *dev, struct ifreq *rq, int cmd) | 2535 | static int pcnet32_ioctl(struct net_device *dev, struct ifreq *rq, int cmd) |
| 2416 | { | 2536 | { |
| 2417 | struct pcnet32_private *lp = dev->priv; | 2537 | struct pcnet32_private *lp = dev->priv; |
| 2418 | int rc; | 2538 | int rc; |
| 2419 | unsigned long flags; | 2539 | unsigned long flags; |
| 2540 | |||
| 2541 | /* SIOC[GS]MIIxxx ioctls */ | ||
| 2542 | if (lp->mii) { | ||
| 2543 | spin_lock_irqsave(&lp->lock, flags); | ||
| 2544 | rc = generic_mii_ioctl(&lp->mii_if, if_mii(rq), cmd, NULL); | ||
| 2545 | spin_unlock_irqrestore(&lp->lock, flags); | ||
| 2546 | } else { | ||
| 2547 | rc = -EOPNOTSUPP; | ||
| 2548 | } | ||
| 2549 | |||
| 2550 | return rc; | ||
| 2551 | } | ||
| 2552 | |||
| 2553 | static int pcnet32_check_otherphy(struct net_device *dev) | ||
| 2554 | { | ||
| 2555 | struct pcnet32_private *lp = dev->priv; | ||
| 2556 | struct mii_if_info mii = lp->mii_if; | ||
| 2557 | u16 bmcr; | ||
| 2558 | int i; | ||
| 2420 | 2559 | ||
| 2421 | /* SIOC[GS]MIIxxx ioctls */ | 2560 | for (i = 0; i < PCNET32_MAX_PHYS; i++) { |
| 2422 | if (lp->mii) { | 2561 | if (i == lp->mii_if.phy_id) |
| 2423 | spin_lock_irqsave(&lp->lock, flags); | 2562 | continue; /* skip active phy */ |
| 2424 | rc = generic_mii_ioctl(&lp->mii_if, if_mii(rq), cmd, NULL); | 2563 | if (lp->phymask & (1 << i)) { |
| 2425 | spin_unlock_irqrestore(&lp->lock, flags); | 2564 | mii.phy_id = i; |
| 2426 | } else { | 2565 | if (mii_link_ok(&mii)) { |
| 2427 | rc = -EOPNOTSUPP; | 2566 | /* found PHY with active link */ |
| 2428 | } | 2567 | if (netif_msg_link(lp)) |
| 2568 | printk(KERN_INFO | ||
| 2569 | "%s: Using PHY number %d.\n", | ||
| 2570 | dev->name, i); | ||
| 2571 | |||
| 2572 | /* isolate inactive phy */ | ||
| 2573 | bmcr = | ||
| 2574 | mdio_read(dev, lp->mii_if.phy_id, MII_BMCR); | ||
| 2575 | mdio_write(dev, lp->mii_if.phy_id, MII_BMCR, | ||
| 2576 | bmcr | BMCR_ISOLATE); | ||
| 2577 | |||
| 2578 | /* de-isolate new phy */ | ||
| 2579 | bmcr = mdio_read(dev, i, MII_BMCR); | ||
| 2580 | mdio_write(dev, i, MII_BMCR, | ||
| 2581 | bmcr & ~BMCR_ISOLATE); | ||
| 2582 | |||
| 2583 | /* set new phy address */ | ||
| 2584 | lp->mii_if.phy_id = i; | ||
| 2585 | return 1; | ||
| 2586 | } | ||
| 2587 | } | ||
| 2588 | } | ||
| 2589 | return 0; | ||
| 2590 | } | ||
| 2591 | |||
| 2592 | /* | ||
| 2593 | * Show the status of the media. Similar to mii_check_media however it | ||
| 2594 | * correctly shows the link speed for all (tested) pcnet32 variants. | ||
| 2595 | * Devices with no mii just report link state without speed. | ||
| 2596 | * | ||
| 2597 | * Caller is assumed to hold and release the lp->lock. | ||
| 2598 | */ | ||
| 2429 | 2599 | ||
| 2430 | return rc; | 2600 | static void pcnet32_check_media(struct net_device *dev, int verbose) |
| 2601 | { | ||
| 2602 | struct pcnet32_private *lp = dev->priv; | ||
| 2603 | int curr_link; | ||
| 2604 | int prev_link = netif_carrier_ok(dev) ? 1 : 0; | ||
| 2605 | u32 bcr9; | ||
| 2606 | |||
| 2607 | if (lp->mii) { | ||
| 2608 | curr_link = mii_link_ok(&lp->mii_if); | ||
| 2609 | } else { | ||
| 2610 | ulong ioaddr = dev->base_addr; /* card base I/O address */ | ||
| 2611 | curr_link = (lp->a.read_bcr(ioaddr, 4) != 0xc0); | ||
| 2612 | } | ||
| 2613 | if (!curr_link) { | ||
| 2614 | if (prev_link || verbose) { | ||
| 2615 | netif_carrier_off(dev); | ||
| 2616 | if (netif_msg_link(lp)) | ||
| 2617 | printk(KERN_INFO "%s: link down\n", dev->name); | ||
| 2618 | } | ||
| 2619 | if (lp->phycount > 1) { | ||
| 2620 | curr_link = pcnet32_check_otherphy(dev); | ||
| 2621 | prev_link = 0; | ||
| 2622 | } | ||
| 2623 | } else if (verbose || !prev_link) { | ||
| 2624 | netif_carrier_on(dev); | ||
| 2625 | if (lp->mii) { | ||
| 2626 | if (netif_msg_link(lp)) { | ||
| 2627 | struct ethtool_cmd ecmd; | ||
| 2628 | mii_ethtool_gset(&lp->mii_if, &ecmd); | ||
| 2629 | printk(KERN_INFO | ||
| 2630 | "%s: link up, %sMbps, %s-duplex\n", | ||
| 2631 | dev->name, | ||
| 2632 | (ecmd.speed == SPEED_100) ? "100" : "10", | ||
| 2633 | (ecmd.duplex == | ||
| 2634 | DUPLEX_FULL) ? "full" : "half"); | ||
| 2635 | } | ||
| 2636 | bcr9 = lp->a.read_bcr(dev->base_addr, 9); | ||
| 2637 | if ((bcr9 & (1 << 0)) != lp->mii_if.full_duplex) { | ||
| 2638 | if (lp->mii_if.full_duplex) | ||
| 2639 | bcr9 |= (1 << 0); | ||
| 2640 | else | ||
| 2641 | bcr9 &= ~(1 << 0); | ||
| 2642 | lp->a.write_bcr(dev->base_addr, 9, bcr9); | ||
| 2643 | } | ||
| 2644 | } else { | ||
| 2645 | if (netif_msg_link(lp)) | ||
| 2646 | printk(KERN_INFO "%s: link up\n", dev->name); | ||
| 2647 | } | ||
| 2648 | } | ||
| 2431 | } | 2649 | } |
| 2432 | 2650 | ||
| 2651 | /* | ||
| 2652 | * Check for loss of link and link establishment. | ||
| 2653 | * Can not use mii_check_media because it does nothing if mode is forced. | ||
| 2654 | */ | ||
| 2655 | |||
| 2433 | static void pcnet32_watchdog(struct net_device *dev) | 2656 | static void pcnet32_watchdog(struct net_device *dev) |
| 2434 | { | 2657 | { |
| 2435 | struct pcnet32_private *lp = dev->priv; | 2658 | struct pcnet32_private *lp = dev->priv; |
| 2436 | unsigned long flags; | 2659 | unsigned long flags; |
| 2437 | 2660 | ||
| 2438 | /* Print the link status if it has changed */ | 2661 | /* Print the link status if it has changed */ |
| 2439 | if (lp->mii) { | ||
| 2440 | spin_lock_irqsave(&lp->lock, flags); | 2662 | spin_lock_irqsave(&lp->lock, flags); |
| 2441 | mii_check_media (&lp->mii_if, netif_msg_link(lp), 0); | 2663 | pcnet32_check_media(dev, 0); |
| 2442 | spin_unlock_irqrestore(&lp->lock, flags); | 2664 | spin_unlock_irqrestore(&lp->lock, flags); |
| 2443 | } | ||
| 2444 | 2665 | ||
| 2445 | mod_timer (&(lp->watchdog_timer), PCNET32_WATCHDOG_TIMEOUT); | 2666 | mod_timer(&(lp->watchdog_timer), PCNET32_WATCHDOG_TIMEOUT); |
| 2446 | } | 2667 | } |
| 2447 | 2668 | ||
| 2448 | static void __devexit pcnet32_remove_one(struct pci_dev *pdev) | 2669 | static void __devexit pcnet32_remove_one(struct pci_dev *pdev) |
| 2449 | { | 2670 | { |
| 2450 | struct net_device *dev = pci_get_drvdata(pdev); | 2671 | struct net_device *dev = pci_get_drvdata(pdev); |
| 2451 | 2672 | ||
| 2452 | if (dev) { | 2673 | if (dev) { |
| 2453 | struct pcnet32_private *lp = dev->priv; | 2674 | struct pcnet32_private *lp = dev->priv; |
| 2454 | 2675 | ||
| 2455 | unregister_netdev(dev); | 2676 | unregister_netdev(dev); |
| 2456 | pcnet32_free_ring(dev); | 2677 | pcnet32_free_ring(dev); |
| 2457 | release_region(dev->base_addr, PCNET32_TOTAL_SIZE); | 2678 | release_region(dev->base_addr, PCNET32_TOTAL_SIZE); |
| 2458 | pci_free_consistent(lp->pci_dev, sizeof(*lp), lp, lp->dma_addr); | 2679 | pci_free_consistent(lp->pci_dev, sizeof(*lp), lp, lp->dma_addr); |
| 2459 | free_netdev(dev); | 2680 | free_netdev(dev); |
| 2460 | pci_disable_device(pdev); | 2681 | pci_disable_device(pdev); |
| 2461 | pci_set_drvdata(pdev, NULL); | 2682 | pci_set_drvdata(pdev, NULL); |
| 2462 | } | 2683 | } |
| 2463 | } | 2684 | } |
| 2464 | 2685 | ||
| 2465 | static struct pci_driver pcnet32_driver = { | 2686 | static struct pci_driver pcnet32_driver = { |
| 2466 | .name = DRV_NAME, | 2687 | .name = DRV_NAME, |
| 2467 | .probe = pcnet32_probe_pci, | 2688 | .probe = pcnet32_probe_pci, |
| 2468 | .remove = __devexit_p(pcnet32_remove_one), | 2689 | .remove = __devexit_p(pcnet32_remove_one), |
| 2469 | .id_table = pcnet32_pci_tbl, | 2690 | .id_table = pcnet32_pci_tbl, |
| 2470 | }; | 2691 | }; |
| 2471 | 2692 | ||
| 2472 | /* An additional parameter that may be passed in... */ | 2693 | /* An additional parameter that may be passed in... */ |
| @@ -2477,9 +2698,11 @@ static int pcnet32_have_pci; | |||
| 2477 | module_param(debug, int, 0); | 2698 | module_param(debug, int, 0); |
| 2478 | MODULE_PARM_DESC(debug, DRV_NAME " debug level"); | 2699 | MODULE_PARM_DESC(debug, DRV_NAME " debug level"); |
| 2479 | module_param(max_interrupt_work, int, 0); | 2700 | module_param(max_interrupt_work, int, 0); |
| 2480 | MODULE_PARM_DESC(max_interrupt_work, DRV_NAME " maximum events handled per interrupt"); | 2701 | MODULE_PARM_DESC(max_interrupt_work, |
| 2702 | DRV_NAME " maximum events handled per interrupt"); | ||
| 2481 | module_param(rx_copybreak, int, 0); | 2703 | module_param(rx_copybreak, int, 0); |
| 2482 | MODULE_PARM_DESC(rx_copybreak, DRV_NAME " copy breakpoint for copy-only-tiny-frames"); | 2704 | MODULE_PARM_DESC(rx_copybreak, |
| 2705 | DRV_NAME " copy breakpoint for copy-only-tiny-frames"); | ||
| 2483 | module_param(tx_start_pt, int, 0); | 2706 | module_param(tx_start_pt, int, 0); |
| 2484 | MODULE_PARM_DESC(tx_start_pt, DRV_NAME " transmit start point (0-3)"); | 2707 | MODULE_PARM_DESC(tx_start_pt, DRV_NAME " transmit start point (0-3)"); |
| 2485 | module_param(pcnet32vlb, int, 0); | 2708 | module_param(pcnet32vlb, int, 0); |
| @@ -2490,7 +2713,9 @@ module_param_array(full_duplex, int, NULL, 0); | |||
| 2490 | MODULE_PARM_DESC(full_duplex, DRV_NAME " full duplex setting(s) (1)"); | 2713 | MODULE_PARM_DESC(full_duplex, DRV_NAME " full duplex setting(s) (1)"); |
| 2491 | /* Module Parameter for HomePNA cards added by Patrick Simmons, 2004 */ | 2714 | /* Module Parameter for HomePNA cards added by Patrick Simmons, 2004 */ |
| 2492 | module_param_array(homepna, int, NULL, 0); | 2715 | module_param_array(homepna, int, NULL, 0); |
| 2493 | MODULE_PARM_DESC(homepna, DRV_NAME " mode for 79C978 cards (1 for HomePNA, 0 for Ethernet, default Ethernet"); | 2716 | MODULE_PARM_DESC(homepna, |
| 2717 | DRV_NAME | ||
| 2718 | " mode for 79C978 cards (1 for HomePNA, 0 for Ethernet, default Ethernet"); | ||
| 2494 | 2719 | ||
| 2495 | MODULE_AUTHOR("Thomas Bogendoerfer"); | 2720 | MODULE_AUTHOR("Thomas Bogendoerfer"); |
| 2496 | MODULE_DESCRIPTION("Driver for PCnet32 and PCnetPCI based ethercards"); | 2721 | MODULE_DESCRIPTION("Driver for PCnet32 and PCnetPCI based ethercards"); |
| @@ -2500,44 +2725,44 @@ MODULE_LICENSE("GPL"); | |||
| 2500 | 2725 | ||
| 2501 | static int __init pcnet32_init_module(void) | 2726 | static int __init pcnet32_init_module(void) |
| 2502 | { | 2727 | { |
| 2503 | printk(KERN_INFO "%s", version); | 2728 | printk(KERN_INFO "%s", version); |
| 2504 | 2729 | ||
| 2505 | pcnet32_debug = netif_msg_init(debug, PCNET32_MSG_DEFAULT); | 2730 | pcnet32_debug = netif_msg_init(debug, PCNET32_MSG_DEFAULT); |
| 2506 | 2731 | ||
| 2507 | if ((tx_start_pt >= 0) && (tx_start_pt <= 3)) | 2732 | if ((tx_start_pt >= 0) && (tx_start_pt <= 3)) |
| 2508 | tx_start = tx_start_pt; | 2733 | tx_start = tx_start_pt; |
| 2509 | 2734 | ||
| 2510 | /* find the PCI devices */ | 2735 | /* find the PCI devices */ |
| 2511 | if (!pci_module_init(&pcnet32_driver)) | 2736 | if (!pci_module_init(&pcnet32_driver)) |
| 2512 | pcnet32_have_pci = 1; | 2737 | pcnet32_have_pci = 1; |
| 2513 | 2738 | ||
| 2514 | /* should we find any remaining VLbus devices ? */ | 2739 | /* should we find any remaining VLbus devices ? */ |
| 2515 | if (pcnet32vlb) | 2740 | if (pcnet32vlb) |
| 2516 | pcnet32_probe_vlbus(); | 2741 | pcnet32_probe_vlbus(); |
| 2517 | 2742 | ||
| 2518 | if (cards_found && (pcnet32_debug & NETIF_MSG_PROBE)) | 2743 | if (cards_found && (pcnet32_debug & NETIF_MSG_PROBE)) |
| 2519 | printk(KERN_INFO PFX "%d cards_found.\n", cards_found); | 2744 | printk(KERN_INFO PFX "%d cards_found.\n", cards_found); |
| 2520 | 2745 | ||
| 2521 | return (pcnet32_have_pci + cards_found) ? 0 : -ENODEV; | 2746 | return (pcnet32_have_pci + cards_found) ? 0 : -ENODEV; |
| 2522 | } | 2747 | } |
| 2523 | 2748 | ||
| 2524 | static void __exit pcnet32_cleanup_module(void) | 2749 | static void __exit pcnet32_cleanup_module(void) |
| 2525 | { | 2750 | { |
| 2526 | struct net_device *next_dev; | 2751 | struct net_device *next_dev; |
| 2527 | 2752 | ||
| 2528 | while (pcnet32_dev) { | 2753 | while (pcnet32_dev) { |
| 2529 | struct pcnet32_private *lp = pcnet32_dev->priv; | 2754 | struct pcnet32_private *lp = pcnet32_dev->priv; |
| 2530 | next_dev = lp->next; | 2755 | next_dev = lp->next; |
| 2531 | unregister_netdev(pcnet32_dev); | 2756 | unregister_netdev(pcnet32_dev); |
| 2532 | pcnet32_free_ring(pcnet32_dev); | 2757 | pcnet32_free_ring(pcnet32_dev); |
| 2533 | release_region(pcnet32_dev->base_addr, PCNET32_TOTAL_SIZE); | 2758 | release_region(pcnet32_dev->base_addr, PCNET32_TOTAL_SIZE); |
| 2534 | pci_free_consistent(lp->pci_dev, sizeof(*lp), lp, lp->dma_addr); | 2759 | pci_free_consistent(lp->pci_dev, sizeof(*lp), lp, lp->dma_addr); |
| 2535 | free_netdev(pcnet32_dev); | 2760 | free_netdev(pcnet32_dev); |
| 2536 | pcnet32_dev = next_dev; | 2761 | pcnet32_dev = next_dev; |
| 2537 | } | 2762 | } |
| 2538 | 2763 | ||
| 2539 | if (pcnet32_have_pci) | 2764 | if (pcnet32_have_pci) |
| 2540 | pci_unregister_driver(&pcnet32_driver); | 2765 | pci_unregister_driver(&pcnet32_driver); |
| 2541 | } | 2766 | } |
| 2542 | 2767 | ||
| 2543 | module_init(pcnet32_init_module); | 2768 | module_init(pcnet32_init_module); |
diff --git a/drivers/net/skfp/fplustm.c b/drivers/net/skfp/fplustm.c index a4b2b6975d6c..0784f558ca9a 100644 --- a/drivers/net/skfp/fplustm.c +++ b/drivers/net/skfp/fplustm.c | |||
| @@ -549,12 +549,12 @@ void formac_tx_restart(struct s_smc *smc) | |||
| 549 | static void enable_formac(struct s_smc *smc) | 549 | static void enable_formac(struct s_smc *smc) |
| 550 | { | 550 | { |
| 551 | /* set formac IMSK : 0 enables irq */ | 551 | /* set formac IMSK : 0 enables irq */ |
| 552 | outpw(FM_A(FM_IMSK1U),~mac_imsk1u) ; | 552 | outpw(FM_A(FM_IMSK1U),(unsigned short)~mac_imsk1u); |
| 553 | outpw(FM_A(FM_IMSK1L),~mac_imsk1l) ; | 553 | outpw(FM_A(FM_IMSK1L),(unsigned short)~mac_imsk1l); |
| 554 | outpw(FM_A(FM_IMSK2U),~mac_imsk2u) ; | 554 | outpw(FM_A(FM_IMSK2U),(unsigned short)~mac_imsk2u); |
| 555 | outpw(FM_A(FM_IMSK2L),~mac_imsk2l) ; | 555 | outpw(FM_A(FM_IMSK2L),(unsigned short)~mac_imsk2l); |
| 556 | outpw(FM_A(FM_IMSK3U),~mac_imsk3u) ; | 556 | outpw(FM_A(FM_IMSK3U),(unsigned short)~mac_imsk3u); |
| 557 | outpw(FM_A(FM_IMSK3L),~mac_imsk3l) ; | 557 | outpw(FM_A(FM_IMSK3L),(unsigned short)~mac_imsk3l); |
| 558 | } | 558 | } |
| 559 | 559 | ||
| 560 | #if 0 /* Removed because the driver should use the ASICs TX complete IRQ. */ | 560 | #if 0 /* Removed because the driver should use the ASICs TX complete IRQ. */ |
diff --git a/drivers/net/skge.c b/drivers/net/skge.c index 25e028b7ce48..4eda81d41b10 100644 --- a/drivers/net/skge.c +++ b/drivers/net/skge.c | |||
| @@ -44,7 +44,7 @@ | |||
| 44 | #include "skge.h" | 44 | #include "skge.h" |
| 45 | 45 | ||
| 46 | #define DRV_NAME "skge" | 46 | #define DRV_NAME "skge" |
| 47 | #define DRV_VERSION "1.3" | 47 | #define DRV_VERSION "1.4" |
| 48 | #define PFX DRV_NAME " " | 48 | #define PFX DRV_NAME " " |
| 49 | 49 | ||
| 50 | #define DEFAULT_TX_RING_SIZE 128 | 50 | #define DEFAULT_TX_RING_SIZE 128 |
| @@ -104,7 +104,6 @@ static const int txqaddr[] = { Q_XA1, Q_XA2 }; | |||
| 104 | static const int rxqaddr[] = { Q_R1, Q_R2 }; | 104 | static const int rxqaddr[] = { Q_R1, Q_R2 }; |
| 105 | static const u32 rxirqmask[] = { IS_R1_F, IS_R2_F }; | 105 | static const u32 rxirqmask[] = { IS_R1_F, IS_R2_F }; |
| 106 | static const u32 txirqmask[] = { IS_XA1_F, IS_XA2_F }; | 106 | static const u32 txirqmask[] = { IS_XA1_F, IS_XA2_F }; |
| 107 | static const u32 portirqmask[] = { IS_PORT_1, IS_PORT_2 }; | ||
| 108 | 107 | ||
| 109 | static int skge_get_regs_len(struct net_device *dev) | 108 | static int skge_get_regs_len(struct net_device *dev) |
| 110 | { | 109 | { |
| @@ -728,19 +727,18 @@ static struct ethtool_ops skge_ethtool_ops = { | |||
| 728 | * Allocate ring elements and chain them together | 727 | * Allocate ring elements and chain them together |
| 729 | * One-to-one association of board descriptors with ring elements | 728 | * One-to-one association of board descriptors with ring elements |
| 730 | */ | 729 | */ |
| 731 | static int skge_ring_alloc(struct skge_ring *ring, void *vaddr, u64 base) | 730 | static int skge_ring_alloc(struct skge_ring *ring, void *vaddr, u32 base) |
| 732 | { | 731 | { |
| 733 | struct skge_tx_desc *d; | 732 | struct skge_tx_desc *d; |
| 734 | struct skge_element *e; | 733 | struct skge_element *e; |
| 735 | int i; | 734 | int i; |
| 736 | 735 | ||
| 737 | ring->start = kmalloc(sizeof(*e)*ring->count, GFP_KERNEL); | 736 | ring->start = kcalloc(sizeof(*e), ring->count, GFP_KERNEL); |
| 738 | if (!ring->start) | 737 | if (!ring->start) |
| 739 | return -ENOMEM; | 738 | return -ENOMEM; |
| 740 | 739 | ||
| 741 | for (i = 0, e = ring->start, d = vaddr; i < ring->count; i++, e++, d++) { | 740 | for (i = 0, e = ring->start, d = vaddr; i < ring->count; i++, e++, d++) { |
| 742 | e->desc = d; | 741 | e->desc = d; |
| 743 | e->skb = NULL; | ||
| 744 | if (i == ring->count - 1) { | 742 | if (i == ring->count - 1) { |
| 745 | e->next = ring->start; | 743 | e->next = ring->start; |
| 746 | d->next_offset = base; | 744 | d->next_offset = base; |
| @@ -2169,27 +2167,31 @@ static int skge_up(struct net_device *dev) | |||
| 2169 | if (!skge->mem) | 2167 | if (!skge->mem) |
| 2170 | return -ENOMEM; | 2168 | return -ENOMEM; |
| 2171 | 2169 | ||
| 2170 | BUG_ON(skge->dma & 7); | ||
| 2171 | |||
| 2172 | if ((u64)skge->dma >> 32 != ((u64) skge->dma + skge->mem_size) >> 32) { | ||
| 2173 | printk(KERN_ERR PFX "pci_alloc_consistent region crosses 4G boundary\n"); | ||
| 2174 | err = -EINVAL; | ||
| 2175 | goto free_pci_mem; | ||
| 2176 | } | ||
| 2177 | |||
| 2172 | memset(skge->mem, 0, skge->mem_size); | 2178 | memset(skge->mem, 0, skge->mem_size); |
| 2173 | 2179 | ||
| 2174 | if ((err = skge_ring_alloc(&skge->rx_ring, skge->mem, skge->dma))) | 2180 | err = skge_ring_alloc(&skge->rx_ring, skge->mem, skge->dma); |
| 2181 | if (err) | ||
| 2175 | goto free_pci_mem; | 2182 | goto free_pci_mem; |
| 2176 | 2183 | ||
| 2177 | err = skge_rx_fill(skge); | 2184 | err = skge_rx_fill(skge); |
| 2178 | if (err) | 2185 | if (err) |
| 2179 | goto free_rx_ring; | 2186 | goto free_rx_ring; |
| 2180 | 2187 | ||
| 2181 | if ((err = skge_ring_alloc(&skge->tx_ring, skge->mem + rx_size, | 2188 | err = skge_ring_alloc(&skge->tx_ring, skge->mem + rx_size, |
| 2182 | skge->dma + rx_size))) | 2189 | skge->dma + rx_size); |
| 2190 | if (err) | ||
| 2183 | goto free_rx_ring; | 2191 | goto free_rx_ring; |
| 2184 | 2192 | ||
| 2185 | skge->tx_avail = skge->tx_ring.count - 1; | 2193 | skge->tx_avail = skge->tx_ring.count - 1; |
| 2186 | 2194 | ||
| 2187 | /* Enable IRQ from port */ | ||
| 2188 | spin_lock_irq(&hw->hw_lock); | ||
| 2189 | hw->intr_mask |= portirqmask[port]; | ||
| 2190 | skge_write32(hw, B0_IMSK, hw->intr_mask); | ||
| 2191 | spin_unlock_irq(&hw->hw_lock); | ||
| 2192 | |||
| 2193 | /* Initialize MAC */ | 2195 | /* Initialize MAC */ |
| 2194 | spin_lock_bh(&hw->phy_lock); | 2196 | spin_lock_bh(&hw->phy_lock); |
| 2195 | if (hw->chip_id == CHIP_ID_GENESIS) | 2197 | if (hw->chip_id == CHIP_ID_GENESIS) |
| @@ -2246,11 +2248,6 @@ static int skge_down(struct net_device *dev) | |||
| 2246 | else | 2248 | else |
| 2247 | yukon_stop(skge); | 2249 | yukon_stop(skge); |
| 2248 | 2250 | ||
| 2249 | spin_lock_irq(&hw->hw_lock); | ||
| 2250 | hw->intr_mask &= ~portirqmask[skge->port]; | ||
| 2251 | skge_write32(hw, B0_IMSK, hw->intr_mask); | ||
| 2252 | spin_unlock_irq(&hw->hw_lock); | ||
| 2253 | |||
| 2254 | /* Stop transmitter */ | 2251 | /* Stop transmitter */ |
| 2255 | skge_write8(hw, Q_ADDR(txqaddr[port], Q_CSR), CSR_STOP); | 2252 | skge_write8(hw, Q_ADDR(txqaddr[port], Q_CSR), CSR_STOP); |
| 2256 | skge_write32(hw, RB_ADDR(txqaddr[port], RB_CTRL), | 2253 | skge_write32(hw, RB_ADDR(txqaddr[port], RB_CTRL), |
| @@ -2307,18 +2304,15 @@ static int skge_xmit_frame(struct sk_buff *skb, struct net_device *dev) | |||
| 2307 | int i; | 2304 | int i; |
| 2308 | u32 control, len; | 2305 | u32 control, len; |
| 2309 | u64 map; | 2306 | u64 map; |
| 2310 | unsigned long flags; | ||
| 2311 | 2307 | ||
| 2312 | skb = skb_padto(skb, ETH_ZLEN); | 2308 | skb = skb_padto(skb, ETH_ZLEN); |
| 2313 | if (!skb) | 2309 | if (!skb) |
| 2314 | return NETDEV_TX_OK; | 2310 | return NETDEV_TX_OK; |
| 2315 | 2311 | ||
| 2316 | local_irq_save(flags); | ||
| 2317 | if (!spin_trylock(&skge->tx_lock)) { | 2312 | if (!spin_trylock(&skge->tx_lock)) { |
| 2318 | /* Collision - tell upper layer to requeue */ | 2313 | /* Collision - tell upper layer to requeue */ |
| 2319 | local_irq_restore(flags); | 2314 | return NETDEV_TX_LOCKED; |
| 2320 | return NETDEV_TX_LOCKED; | 2315 | } |
| 2321 | } | ||
| 2322 | 2316 | ||
| 2323 | if (unlikely(skge->tx_avail < skb_shinfo(skb)->nr_frags +1)) { | 2317 | if (unlikely(skge->tx_avail < skb_shinfo(skb)->nr_frags +1)) { |
| 2324 | if (!netif_queue_stopped(dev)) { | 2318 | if (!netif_queue_stopped(dev)) { |
| @@ -2327,7 +2321,7 @@ static int skge_xmit_frame(struct sk_buff *skb, struct net_device *dev) | |||
| 2327 | printk(KERN_WARNING PFX "%s: ring full when queue awake!\n", | 2321 | printk(KERN_WARNING PFX "%s: ring full when queue awake!\n", |
| 2328 | dev->name); | 2322 | dev->name); |
| 2329 | } | 2323 | } |
| 2330 | spin_unlock_irqrestore(&skge->tx_lock, flags); | 2324 | spin_unlock(&skge->tx_lock); |
| 2331 | return NETDEV_TX_BUSY; | 2325 | return NETDEV_TX_BUSY; |
| 2332 | } | 2326 | } |
| 2333 | 2327 | ||
| @@ -2402,8 +2396,10 @@ static int skge_xmit_frame(struct sk_buff *skb, struct net_device *dev) | |||
| 2402 | netif_stop_queue(dev); | 2396 | netif_stop_queue(dev); |
| 2403 | } | 2397 | } |
| 2404 | 2398 | ||
| 2399 | mmiowb(); | ||
| 2400 | spin_unlock(&skge->tx_lock); | ||
| 2401 | |||
| 2405 | dev->trans_start = jiffies; | 2402 | dev->trans_start = jiffies; |
| 2406 | spin_unlock_irqrestore(&skge->tx_lock, flags); | ||
| 2407 | 2403 | ||
| 2408 | return NETDEV_TX_OK; | 2404 | return NETDEV_TX_OK; |
| 2409 | } | 2405 | } |
| @@ -2416,7 +2412,7 @@ static inline void skge_tx_free(struct skge_hw *hw, struct skge_element *e) | |||
| 2416 | pci_unmap_addr(e, mapaddr), | 2412 | pci_unmap_addr(e, mapaddr), |
| 2417 | pci_unmap_len(e, maplen), | 2413 | pci_unmap_len(e, maplen), |
| 2418 | PCI_DMA_TODEVICE); | 2414 | PCI_DMA_TODEVICE); |
| 2419 | dev_kfree_skb_any(e->skb); | 2415 | dev_kfree_skb(e->skb); |
| 2420 | e->skb = NULL; | 2416 | e->skb = NULL; |
| 2421 | } else { | 2417 | } else { |
| 2422 | pci_unmap_page(hw->pdev, | 2418 | pci_unmap_page(hw->pdev, |
| @@ -2430,15 +2426,14 @@ static void skge_tx_clean(struct skge_port *skge) | |||
| 2430 | { | 2426 | { |
| 2431 | struct skge_ring *ring = &skge->tx_ring; | 2427 | struct skge_ring *ring = &skge->tx_ring; |
| 2432 | struct skge_element *e; | 2428 | struct skge_element *e; |
| 2433 | unsigned long flags; | ||
| 2434 | 2429 | ||
| 2435 | spin_lock_irqsave(&skge->tx_lock, flags); | 2430 | spin_lock_bh(&skge->tx_lock); |
| 2436 | for (e = ring->to_clean; e != ring->to_use; e = e->next) { | 2431 | for (e = ring->to_clean; e != ring->to_use; e = e->next) { |
| 2437 | ++skge->tx_avail; | 2432 | ++skge->tx_avail; |
| 2438 | skge_tx_free(skge->hw, e); | 2433 | skge_tx_free(skge->hw, e); |
| 2439 | } | 2434 | } |
| 2440 | ring->to_clean = e; | 2435 | ring->to_clean = e; |
| 2441 | spin_unlock_irqrestore(&skge->tx_lock, flags); | 2436 | spin_unlock_bh(&skge->tx_lock); |
| 2442 | } | 2437 | } |
| 2443 | 2438 | ||
| 2444 | static void skge_tx_timeout(struct net_device *dev) | 2439 | static void skge_tx_timeout(struct net_device *dev) |
| @@ -2663,6 +2658,37 @@ resubmit: | |||
| 2663 | return NULL; | 2658 | return NULL; |
| 2664 | } | 2659 | } |
| 2665 | 2660 | ||
| 2661 | static void skge_tx_done(struct skge_port *skge) | ||
| 2662 | { | ||
| 2663 | struct skge_ring *ring = &skge->tx_ring; | ||
| 2664 | struct skge_element *e; | ||
| 2665 | |||
| 2666 | spin_lock(&skge->tx_lock); | ||
| 2667 | for (e = ring->to_clean; prefetch(e->next), e != ring->to_use; e = e->next) { | ||
| 2668 | struct skge_tx_desc *td = e->desc; | ||
| 2669 | u32 control; | ||
| 2670 | |||
| 2671 | rmb(); | ||
| 2672 | control = td->control; | ||
| 2673 | if (control & BMU_OWN) | ||
| 2674 | break; | ||
| 2675 | |||
| 2676 | if (unlikely(netif_msg_tx_done(skge))) | ||
| 2677 | printk(KERN_DEBUG PFX "%s: tx done slot %td status 0x%x\n", | ||
| 2678 | skge->netdev->name, e - ring->start, td->status); | ||
| 2679 | |||
| 2680 | skge_tx_free(skge->hw, e); | ||
| 2681 | e->skb = NULL; | ||
| 2682 | ++skge->tx_avail; | ||
| 2683 | } | ||
| 2684 | ring->to_clean = e; | ||
| 2685 | skge_write8(skge->hw, Q_ADDR(txqaddr[skge->port], Q_CSR), CSR_IRQ_CL_F); | ||
| 2686 | |||
| 2687 | if (skge->tx_avail > MAX_SKB_FRAGS + 1) | ||
| 2688 | netif_wake_queue(skge->netdev); | ||
| 2689 | |||
| 2690 | spin_unlock(&skge->tx_lock); | ||
| 2691 | } | ||
| 2666 | 2692 | ||
| 2667 | static int skge_poll(struct net_device *dev, int *budget) | 2693 | static int skge_poll(struct net_device *dev, int *budget) |
| 2668 | { | 2694 | { |
| @@ -2670,8 +2696,10 @@ static int skge_poll(struct net_device *dev, int *budget) | |||
| 2670 | struct skge_hw *hw = skge->hw; | 2696 | struct skge_hw *hw = skge->hw; |
| 2671 | struct skge_ring *ring = &skge->rx_ring; | 2697 | struct skge_ring *ring = &skge->rx_ring; |
| 2672 | struct skge_element *e; | 2698 | struct skge_element *e; |
| 2673 | unsigned int to_do = min(dev->quota, *budget); | 2699 | int to_do = min(dev->quota, *budget); |
| 2674 | unsigned int work_done = 0; | 2700 | int work_done = 0; |
| 2701 | |||
| 2702 | skge_tx_done(skge); | ||
| 2675 | 2703 | ||
| 2676 | for (e = ring->to_clean; prefetch(e->next), work_done < to_do; e = e->next) { | 2704 | for (e = ring->to_clean; prefetch(e->next), work_done < to_do; e = e->next) { |
| 2677 | struct skge_rx_desc *rd = e->desc; | 2705 | struct skge_rx_desc *rd = e->desc; |
| @@ -2683,8 +2711,8 @@ static int skge_poll(struct net_device *dev, int *budget) | |||
| 2683 | if (control & BMU_OWN) | 2711 | if (control & BMU_OWN) |
| 2684 | break; | 2712 | break; |
| 2685 | 2713 | ||
| 2686 | skb = skge_rx_get(skge, e, control, rd->status, | 2714 | skb = skge_rx_get(skge, e, control, rd->status, |
| 2687 | le16_to_cpu(rd->csum2)); | 2715 | le16_to_cpu(rd->csum2)); |
| 2688 | if (likely(skb)) { | 2716 | if (likely(skb)) { |
| 2689 | dev->last_rx = jiffies; | 2717 | dev->last_rx = jiffies; |
| 2690 | netif_receive_skb(skb); | 2718 | netif_receive_skb(skb); |
| @@ -2705,49 +2733,15 @@ static int skge_poll(struct net_device *dev, int *budget) | |||
| 2705 | if (work_done >= to_do) | 2733 | if (work_done >= to_do) |
| 2706 | return 1; /* not done */ | 2734 | return 1; /* not done */ |
| 2707 | 2735 | ||
| 2708 | spin_lock_irq(&hw->hw_lock); | 2736 | netif_rx_complete(dev); |
| 2709 | __netif_rx_complete(dev); | 2737 | mmiowb(); |
| 2710 | hw->intr_mask |= portirqmask[skge->port]; | 2738 | |
| 2739 | hw->intr_mask |= skge->port == 0 ? (IS_R1_F|IS_XA1_F) : (IS_R2_F|IS_XA2_F); | ||
| 2711 | skge_write32(hw, B0_IMSK, hw->intr_mask); | 2740 | skge_write32(hw, B0_IMSK, hw->intr_mask); |
| 2712 | spin_unlock_irq(&hw->hw_lock); | ||
| 2713 | 2741 | ||
| 2714 | return 0; | 2742 | return 0; |
| 2715 | } | 2743 | } |
| 2716 | 2744 | ||
| 2717 | static inline void skge_tx_intr(struct net_device *dev) | ||
| 2718 | { | ||
| 2719 | struct skge_port *skge = netdev_priv(dev); | ||
| 2720 | struct skge_hw *hw = skge->hw; | ||
| 2721 | struct skge_ring *ring = &skge->tx_ring; | ||
| 2722 | struct skge_element *e; | ||
| 2723 | |||
| 2724 | spin_lock(&skge->tx_lock); | ||
| 2725 | for (e = ring->to_clean; prefetch(e->next), e != ring->to_use; e = e->next) { | ||
| 2726 | struct skge_tx_desc *td = e->desc; | ||
| 2727 | u32 control; | ||
| 2728 | |||
| 2729 | rmb(); | ||
| 2730 | control = td->control; | ||
| 2731 | if (control & BMU_OWN) | ||
| 2732 | break; | ||
| 2733 | |||
| 2734 | if (unlikely(netif_msg_tx_done(skge))) | ||
| 2735 | printk(KERN_DEBUG PFX "%s: tx done slot %td status 0x%x\n", | ||
| 2736 | dev->name, e - ring->start, td->status); | ||
| 2737 | |||
| 2738 | skge_tx_free(hw, e); | ||
| 2739 | e->skb = NULL; | ||
| 2740 | ++skge->tx_avail; | ||
| 2741 | } | ||
| 2742 | ring->to_clean = e; | ||
| 2743 | skge_write8(hw, Q_ADDR(txqaddr[skge->port], Q_CSR), CSR_IRQ_CL_F); | ||
| 2744 | |||
| 2745 | if (skge->tx_avail > MAX_SKB_FRAGS + 1) | ||
| 2746 | netif_wake_queue(dev); | ||
| 2747 | |||
| 2748 | spin_unlock(&skge->tx_lock); | ||
| 2749 | } | ||
| 2750 | |||
| 2751 | /* Parity errors seem to happen when Genesis is connected to a switch | 2745 | /* Parity errors seem to happen when Genesis is connected to a switch |
| 2752 | * with no other ports present. Heartbeat error?? | 2746 | * with no other ports present. Heartbeat error?? |
| 2753 | */ | 2747 | */ |
| @@ -2770,17 +2764,6 @@ static void skge_mac_parity(struct skge_hw *hw, int port) | |||
| 2770 | ? GMF_CLI_TX_FC : GMF_CLI_TX_PE); | 2764 | ? GMF_CLI_TX_FC : GMF_CLI_TX_PE); |
| 2771 | } | 2765 | } |
| 2772 | 2766 | ||
| 2773 | static void skge_pci_clear(struct skge_hw *hw) | ||
| 2774 | { | ||
| 2775 | u16 status; | ||
| 2776 | |||
| 2777 | pci_read_config_word(hw->pdev, PCI_STATUS, &status); | ||
| 2778 | skge_write8(hw, B2_TST_CTRL1, TST_CFG_WRITE_ON); | ||
| 2779 | pci_write_config_word(hw->pdev, PCI_STATUS, | ||
| 2780 | status | PCI_STATUS_ERROR_BITS); | ||
| 2781 | skge_write8(hw, B2_TST_CTRL1, TST_CFG_WRITE_OFF); | ||
| 2782 | } | ||
| 2783 | |||
| 2784 | static void skge_mac_intr(struct skge_hw *hw, int port) | 2767 | static void skge_mac_intr(struct skge_hw *hw, int port) |
| 2785 | { | 2768 | { |
| 2786 | if (hw->chip_id == CHIP_ID_GENESIS) | 2769 | if (hw->chip_id == CHIP_ID_GENESIS) |
| @@ -2822,23 +2805,39 @@ static void skge_error_irq(struct skge_hw *hw) | |||
| 2822 | if (hwstatus & IS_M2_PAR_ERR) | 2805 | if (hwstatus & IS_M2_PAR_ERR) |
| 2823 | skge_mac_parity(hw, 1); | 2806 | skge_mac_parity(hw, 1); |
| 2824 | 2807 | ||
| 2825 | if (hwstatus & IS_R1_PAR_ERR) | 2808 | if (hwstatus & IS_R1_PAR_ERR) { |
| 2809 | printk(KERN_ERR PFX "%s: receive queue parity error\n", | ||
| 2810 | hw->dev[0]->name); | ||
| 2826 | skge_write32(hw, B0_R1_CSR, CSR_IRQ_CL_P); | 2811 | skge_write32(hw, B0_R1_CSR, CSR_IRQ_CL_P); |
| 2812 | } | ||
| 2827 | 2813 | ||
| 2828 | if (hwstatus & IS_R2_PAR_ERR) | 2814 | if (hwstatus & IS_R2_PAR_ERR) { |
| 2815 | printk(KERN_ERR PFX "%s: receive queue parity error\n", | ||
| 2816 | hw->dev[1]->name); | ||
| 2829 | skge_write32(hw, B0_R2_CSR, CSR_IRQ_CL_P); | 2817 | skge_write32(hw, B0_R2_CSR, CSR_IRQ_CL_P); |
| 2818 | } | ||
| 2830 | 2819 | ||
| 2831 | if (hwstatus & (IS_IRQ_MST_ERR|IS_IRQ_STAT)) { | 2820 | if (hwstatus & (IS_IRQ_MST_ERR|IS_IRQ_STAT)) { |
| 2832 | printk(KERN_ERR PFX "hardware error detected (status 0x%x)\n", | 2821 | u16 pci_status, pci_cmd; |
| 2833 | hwstatus); | 2822 | |
| 2823 | pci_read_config_word(hw->pdev, PCI_COMMAND, &pci_cmd); | ||
| 2824 | pci_read_config_word(hw->pdev, PCI_STATUS, &pci_status); | ||
| 2834 | 2825 | ||
| 2835 | skge_pci_clear(hw); | 2826 | printk(KERN_ERR PFX "%s: PCI error cmd=%#x status=%#x\n", |
| 2827 | pci_name(hw->pdev), pci_cmd, pci_status); | ||
| 2828 | |||
| 2829 | /* Write the error bits back to clear them. */ | ||
| 2830 | pci_status &= PCI_STATUS_ERROR_BITS; | ||
| 2831 | skge_write8(hw, B2_TST_CTRL1, TST_CFG_WRITE_ON); | ||
| 2832 | pci_write_config_word(hw->pdev, PCI_COMMAND, | ||
| 2833 | pci_cmd | PCI_COMMAND_SERR | PCI_COMMAND_PARITY); | ||
| 2834 | pci_write_config_word(hw->pdev, PCI_STATUS, pci_status); | ||
| 2835 | skge_write8(hw, B2_TST_CTRL1, TST_CFG_WRITE_OFF); | ||
| 2836 | 2836 | ||
| 2837 | /* if error still set then just ignore it */ | 2837 | /* if error still set then just ignore it */ |
| 2838 | hwstatus = skge_read32(hw, B0_HWE_ISRC); | 2838 | hwstatus = skge_read32(hw, B0_HWE_ISRC); |
| 2839 | if (hwstatus & IS_IRQ_STAT) { | 2839 | if (hwstatus & IS_IRQ_STAT) { |
| 2840 | pr_debug("IRQ status %x: still set ignoring hardware errors\n", | 2840 | printk(KERN_INFO PFX "unable to clear error (so ignoring them)\n"); |
| 2841 | hwstatus); | ||
| 2842 | hw->intr_mask &= ~IS_HW_ERR; | 2841 | hw->intr_mask &= ~IS_HW_ERR; |
| 2843 | } | 2842 | } |
| 2844 | } | 2843 | } |
| @@ -2855,12 +2854,11 @@ static void skge_extirq(unsigned long data) | |||
| 2855 | int port; | 2854 | int port; |
| 2856 | 2855 | ||
| 2857 | spin_lock(&hw->phy_lock); | 2856 | spin_lock(&hw->phy_lock); |
| 2858 | for (port = 0; port < 2; port++) { | 2857 | for (port = 0; port < hw->ports; port++) { |
| 2859 | struct net_device *dev = hw->dev[port]; | 2858 | struct net_device *dev = hw->dev[port]; |
| 2859 | struct skge_port *skge = netdev_priv(dev); | ||
| 2860 | 2860 | ||
| 2861 | if (dev && netif_running(dev)) { | 2861 | if (netif_running(dev)) { |
| 2862 | struct skge_port *skge = netdev_priv(dev); | ||
| 2863 | |||
| 2864 | if (hw->chip_id != CHIP_ID_GENESIS) | 2862 | if (hw->chip_id != CHIP_ID_GENESIS) |
| 2865 | yukon_phy_intr(skge); | 2863 | yukon_phy_intr(skge); |
| 2866 | else | 2864 | else |
| @@ -2869,38 +2867,39 @@ static void skge_extirq(unsigned long data) | |||
| 2869 | } | 2867 | } |
| 2870 | spin_unlock(&hw->phy_lock); | 2868 | spin_unlock(&hw->phy_lock); |
| 2871 | 2869 | ||
| 2872 | spin_lock_irq(&hw->hw_lock); | ||
| 2873 | hw->intr_mask |= IS_EXT_REG; | 2870 | hw->intr_mask |= IS_EXT_REG; |
| 2874 | skge_write32(hw, B0_IMSK, hw->intr_mask); | 2871 | skge_write32(hw, B0_IMSK, hw->intr_mask); |
| 2875 | spin_unlock_irq(&hw->hw_lock); | ||
| 2876 | } | 2872 | } |
| 2877 | 2873 | ||
| 2878 | static irqreturn_t skge_intr(int irq, void *dev_id, struct pt_regs *regs) | 2874 | static irqreturn_t skge_intr(int irq, void *dev_id, struct pt_regs *regs) |
| 2879 | { | 2875 | { |
| 2880 | struct skge_hw *hw = dev_id; | 2876 | struct skge_hw *hw = dev_id; |
| 2881 | u32 status = skge_read32(hw, B0_SP_ISRC); | 2877 | u32 status; |
| 2882 | 2878 | ||
| 2883 | if (status == 0 || status == ~0) /* hotplug or shared irq */ | 2879 | /* Reading this register masks IRQ */ |
| 2880 | status = skge_read32(hw, B0_SP_ISRC); | ||
| 2881 | if (status == 0) | ||
| 2884 | return IRQ_NONE; | 2882 | return IRQ_NONE; |
| 2885 | 2883 | ||
| 2886 | spin_lock(&hw->hw_lock); | 2884 | if (status & IS_EXT_REG) { |
| 2887 | if (status & IS_R1_F) { | 2885 | hw->intr_mask &= ~IS_EXT_REG; |
| 2886 | tasklet_schedule(&hw->ext_tasklet); | ||
| 2887 | } | ||
| 2888 | |||
| 2889 | if (status & (IS_R1_F|IS_XA1_F)) { | ||
| 2888 | skge_write8(hw, Q_ADDR(Q_R1, Q_CSR), CSR_IRQ_CL_F); | 2890 | skge_write8(hw, Q_ADDR(Q_R1, Q_CSR), CSR_IRQ_CL_F); |
| 2889 | hw->intr_mask &= ~IS_R1_F; | 2891 | hw->intr_mask &= ~(IS_R1_F|IS_XA1_F); |
| 2890 | netif_rx_schedule(hw->dev[0]); | 2892 | netif_rx_schedule(hw->dev[0]); |
| 2891 | } | 2893 | } |
| 2892 | 2894 | ||
| 2893 | if (status & IS_R2_F) { | 2895 | if (status & (IS_R2_F|IS_XA2_F)) { |
| 2894 | skge_write8(hw, Q_ADDR(Q_R2, Q_CSR), CSR_IRQ_CL_F); | 2896 | skge_write8(hw, Q_ADDR(Q_R2, Q_CSR), CSR_IRQ_CL_F); |
| 2895 | hw->intr_mask &= ~IS_R2_F; | 2897 | hw->intr_mask &= ~(IS_R2_F|IS_XA2_F); |
| 2896 | netif_rx_schedule(hw->dev[1]); | 2898 | netif_rx_schedule(hw->dev[1]); |
| 2897 | } | 2899 | } |
| 2898 | 2900 | ||
| 2899 | if (status & IS_XA1_F) | 2901 | if (likely((status & hw->intr_mask) == 0)) |
| 2900 | skge_tx_intr(hw->dev[0]); | 2902 | return IRQ_HANDLED; |
| 2901 | |||
| 2902 | if (status & IS_XA2_F) | ||
| 2903 | skge_tx_intr(hw->dev[1]); | ||
| 2904 | 2903 | ||
| 2905 | if (status & IS_PA_TO_RX1) { | 2904 | if (status & IS_PA_TO_RX1) { |
| 2906 | struct skge_port *skge = netdev_priv(hw->dev[0]); | 2905 | struct skge_port *skge = netdev_priv(hw->dev[0]); |
| @@ -2929,13 +2928,7 @@ static irqreturn_t skge_intr(int irq, void *dev_id, struct pt_regs *regs) | |||
| 2929 | if (status & IS_HW_ERR) | 2928 | if (status & IS_HW_ERR) |
| 2930 | skge_error_irq(hw); | 2929 | skge_error_irq(hw); |
| 2931 | 2930 | ||
| 2932 | if (status & IS_EXT_REG) { | ||
| 2933 | hw->intr_mask &= ~IS_EXT_REG; | ||
| 2934 | tasklet_schedule(&hw->ext_tasklet); | ||
| 2935 | } | ||
| 2936 | |||
| 2937 | skge_write32(hw, B0_IMSK, hw->intr_mask); | 2931 | skge_write32(hw, B0_IMSK, hw->intr_mask); |
| 2938 | spin_unlock(&hw->hw_lock); | ||
| 2939 | 2932 | ||
| 2940 | return IRQ_HANDLED; | 2933 | return IRQ_HANDLED; |
| 2941 | } | 2934 | } |
| @@ -3010,7 +3003,7 @@ static const char *skge_board_name(const struct skge_hw *hw) | |||
| 3010 | static int skge_reset(struct skge_hw *hw) | 3003 | static int skge_reset(struct skge_hw *hw) |
| 3011 | { | 3004 | { |
| 3012 | u32 reg; | 3005 | u32 reg; |
| 3013 | u16 ctst; | 3006 | u16 ctst, pci_status; |
| 3014 | u8 t8, mac_cfg, pmd_type, phy_type; | 3007 | u8 t8, mac_cfg, pmd_type, phy_type; |
| 3015 | int i; | 3008 | int i; |
| 3016 | 3009 | ||
| @@ -3021,8 +3014,13 @@ static int skge_reset(struct skge_hw *hw) | |||
| 3021 | skge_write8(hw, B0_CTST, CS_RST_CLR); | 3014 | skge_write8(hw, B0_CTST, CS_RST_CLR); |
| 3022 | 3015 | ||
| 3023 | /* clear PCI errors, if any */ | 3016 | /* clear PCI errors, if any */ |
| 3024 | skge_pci_clear(hw); | 3017 | skge_write8(hw, B2_TST_CTRL1, TST_CFG_WRITE_ON); |
| 3018 | skge_write8(hw, B2_TST_CTRL2, 0); | ||
| 3025 | 3019 | ||
| 3020 | pci_read_config_word(hw->pdev, PCI_STATUS, &pci_status); | ||
| 3021 | pci_write_config_word(hw->pdev, PCI_STATUS, | ||
| 3022 | pci_status | PCI_STATUS_ERROR_BITS); | ||
| 3023 | skge_write8(hw, B2_TST_CTRL1, TST_CFG_WRITE_OFF); | ||
| 3026 | skge_write8(hw, B0_CTST, CS_MRST_CLR); | 3024 | skge_write8(hw, B0_CTST, CS_MRST_CLR); |
| 3027 | 3025 | ||
| 3028 | /* restore CLK_RUN bits (for Yukon-Lite) */ | 3026 | /* restore CLK_RUN bits (for Yukon-Lite) */ |
| @@ -3081,7 +3079,10 @@ static int skge_reset(struct skge_hw *hw) | |||
| 3081 | else | 3079 | else |
| 3082 | hw->ram_size = t8 * 4096; | 3080 | hw->ram_size = t8 * 4096; |
| 3083 | 3081 | ||
| 3084 | hw->intr_mask = IS_HW_ERR | IS_EXT_REG; | 3082 | hw->intr_mask = IS_HW_ERR | IS_EXT_REG | IS_PORT_1; |
| 3083 | if (hw->ports > 1) | ||
| 3084 | hw->intr_mask |= IS_PORT_2; | ||
| 3085 | |||
| 3085 | if (hw->chip_id == CHIP_ID_GENESIS) | 3086 | if (hw->chip_id == CHIP_ID_GENESIS) |
| 3086 | genesis_init(hw); | 3087 | genesis_init(hw); |
| 3087 | else { | 3088 | else { |
| @@ -3251,13 +3252,15 @@ static int __devinit skge_probe(struct pci_dev *pdev, | |||
| 3251 | struct skge_hw *hw; | 3252 | struct skge_hw *hw; |
| 3252 | int err, using_dac = 0; | 3253 | int err, using_dac = 0; |
| 3253 | 3254 | ||
| 3254 | if ((err = pci_enable_device(pdev))) { | 3255 | err = pci_enable_device(pdev); |
| 3256 | if (err) { | ||
| 3255 | printk(KERN_ERR PFX "%s cannot enable PCI device\n", | 3257 | printk(KERN_ERR PFX "%s cannot enable PCI device\n", |
| 3256 | pci_name(pdev)); | 3258 | pci_name(pdev)); |
| 3257 | goto err_out; | 3259 | goto err_out; |
| 3258 | } | 3260 | } |
| 3259 | 3261 | ||
| 3260 | if ((err = pci_request_regions(pdev, DRV_NAME))) { | 3262 | err = pci_request_regions(pdev, DRV_NAME); |
| 3263 | if (err) { | ||
| 3261 | printk(KERN_ERR PFX "%s cannot obtain PCI resources\n", | 3264 | printk(KERN_ERR PFX "%s cannot obtain PCI resources\n", |
| 3262 | pci_name(pdev)); | 3265 | pci_name(pdev)); |
| 3263 | goto err_out_disable_pdev; | 3266 | goto err_out_disable_pdev; |
| @@ -3265,22 +3268,18 @@ static int __devinit skge_probe(struct pci_dev *pdev, | |||
| 3265 | 3268 | ||
| 3266 | pci_set_master(pdev); | 3269 | pci_set_master(pdev); |
| 3267 | 3270 | ||
| 3268 | if (sizeof(dma_addr_t) > sizeof(u32) && | 3271 | if (!pci_set_dma_mask(pdev, DMA_64BIT_MASK)) { |
| 3269 | !(err = pci_set_dma_mask(pdev, DMA_64BIT_MASK))) { | ||
| 3270 | using_dac = 1; | 3272 | using_dac = 1; |
| 3271 | err = pci_set_consistent_dma_mask(pdev, DMA_64BIT_MASK); | 3273 | err = pci_set_consistent_dma_mask(pdev, DMA_64BIT_MASK); |
| 3272 | if (err < 0) { | 3274 | } else if (!(err = pci_set_dma_mask(pdev, DMA_32BIT_MASK))) { |
| 3273 | printk(KERN_ERR PFX "%s unable to obtain 64 bit DMA " | 3275 | using_dac = 0; |
| 3274 | "for consistent allocations\n", pci_name(pdev)); | 3276 | err = pci_set_consistent_dma_mask(pdev, DMA_32BIT_MASK); |
| 3275 | goto err_out_free_regions; | 3277 | } |
| 3276 | } | 3278 | |
| 3277 | } else { | 3279 | if (err) { |
| 3278 | err = pci_set_dma_mask(pdev, DMA_32BIT_MASK); | 3280 | printk(KERN_ERR PFX "%s no usable DMA configuration\n", |
| 3279 | if (err) { | 3281 | pci_name(pdev)); |
| 3280 | printk(KERN_ERR PFX "%s no usable DMA configuration\n", | 3282 | goto err_out_free_regions; |
| 3281 | pci_name(pdev)); | ||
| 3282 | goto err_out_free_regions; | ||
| 3283 | } | ||
| 3284 | } | 3283 | } |
| 3285 | 3284 | ||
| 3286 | #ifdef __BIG_ENDIAN | 3285 | #ifdef __BIG_ENDIAN |
| @@ -3304,7 +3303,6 @@ static int __devinit skge_probe(struct pci_dev *pdev, | |||
| 3304 | 3303 | ||
| 3305 | hw->pdev = pdev; | 3304 | hw->pdev = pdev; |
| 3306 | spin_lock_init(&hw->phy_lock); | 3305 | spin_lock_init(&hw->phy_lock); |
| 3307 | spin_lock_init(&hw->hw_lock); | ||
| 3308 | tasklet_init(&hw->ext_tasklet, skge_extirq, (unsigned long) hw); | 3306 | tasklet_init(&hw->ext_tasklet, skge_extirq, (unsigned long) hw); |
| 3309 | 3307 | ||
| 3310 | hw->regs = ioremap_nocache(pci_resource_start(pdev, 0), 0x4000); | 3308 | hw->regs = ioremap_nocache(pci_resource_start(pdev, 0), 0x4000); |
| @@ -3314,7 +3312,8 @@ static int __devinit skge_probe(struct pci_dev *pdev, | |||
| 3314 | goto err_out_free_hw; | 3312 | goto err_out_free_hw; |
| 3315 | } | 3313 | } |
| 3316 | 3314 | ||
| 3317 | if ((err = request_irq(pdev->irq, skge_intr, SA_SHIRQ, DRV_NAME, hw))) { | 3315 | err = request_irq(pdev->irq, skge_intr, SA_SHIRQ, DRV_NAME, hw); |
| 3316 | if (err) { | ||
| 3318 | printk(KERN_ERR PFX "%s: cannot assign irq %d\n", | 3317 | printk(KERN_ERR PFX "%s: cannot assign irq %d\n", |
| 3319 | pci_name(pdev), pdev->irq); | 3318 | pci_name(pdev), pdev->irq); |
| 3320 | goto err_out_iounmap; | 3319 | goto err_out_iounmap; |
| @@ -3332,7 +3331,8 @@ static int __devinit skge_probe(struct pci_dev *pdev, | |||
| 3332 | if ((dev = skge_devinit(hw, 0, using_dac)) == NULL) | 3331 | if ((dev = skge_devinit(hw, 0, using_dac)) == NULL) |
| 3333 | goto err_out_led_off; | 3332 | goto err_out_led_off; |
| 3334 | 3333 | ||
| 3335 | if ((err = register_netdev(dev))) { | 3334 | err = register_netdev(dev); |
| 3335 | if (err) { | ||
| 3336 | printk(KERN_ERR PFX "%s: cannot register net device\n", | 3336 | printk(KERN_ERR PFX "%s: cannot register net device\n", |
| 3337 | pci_name(pdev)); | 3337 | pci_name(pdev)); |
| 3338 | goto err_out_free_netdev; | 3338 | goto err_out_free_netdev; |
| @@ -3387,7 +3387,6 @@ static void __devexit skge_remove(struct pci_dev *pdev) | |||
| 3387 | 3387 | ||
| 3388 | skge_write32(hw, B0_IMSK, 0); | 3388 | skge_write32(hw, B0_IMSK, 0); |
| 3389 | skge_write16(hw, B0_LED, LED_STAT_OFF); | 3389 | skge_write16(hw, B0_LED, LED_STAT_OFF); |
| 3390 | skge_pci_clear(hw); | ||
| 3391 | skge_write8(hw, B0_CTST, CS_RST_SET); | 3390 | skge_write8(hw, B0_CTST, CS_RST_SET); |
| 3392 | 3391 | ||
| 3393 | tasklet_kill(&hw->ext_tasklet); | 3392 | tasklet_kill(&hw->ext_tasklet); |
diff --git a/drivers/net/skge.h b/drivers/net/skge.h index 941f12a333b6..2efdacc290e5 100644 --- a/drivers/net/skge.h +++ b/drivers/net/skge.h | |||
| @@ -2402,7 +2402,6 @@ struct skge_hw { | |||
| 2402 | 2402 | ||
| 2403 | struct tasklet_struct ext_tasklet; | 2403 | struct tasklet_struct ext_tasklet; |
| 2404 | spinlock_t phy_lock; | 2404 | spinlock_t phy_lock; |
| 2405 | spinlock_t hw_lock; | ||
| 2406 | }; | 2405 | }; |
| 2407 | 2406 | ||
| 2408 | enum { | 2407 | enum { |
diff --git a/drivers/net/sky2.c b/drivers/net/sky2.c index 73260364cba3..f08fe6c884b2 100644 --- a/drivers/net/sky2.c +++ b/drivers/net/sky2.c | |||
| @@ -51,7 +51,7 @@ | |||
| 51 | #include "sky2.h" | 51 | #include "sky2.h" |
| 52 | 52 | ||
| 53 | #define DRV_NAME "sky2" | 53 | #define DRV_NAME "sky2" |
| 54 | #define DRV_VERSION "0.15" | 54 | #define DRV_VERSION "1.1" |
| 55 | #define PFX DRV_NAME " " | 55 | #define PFX DRV_NAME " " |
| 56 | 56 | ||
| 57 | /* | 57 | /* |
| @@ -61,10 +61,6 @@ | |||
| 61 | * a receive requires one (or two if using 64 bit dma). | 61 | * a receive requires one (or two if using 64 bit dma). |
| 62 | */ | 62 | */ |
| 63 | 63 | ||
| 64 | #define is_ec_a1(hw) \ | ||
| 65 | unlikely((hw)->chip_id == CHIP_ID_YUKON_EC && \ | ||
| 66 | (hw)->chip_rev == CHIP_REV_YU_EC_A1) | ||
| 67 | |||
| 68 | #define RX_LE_SIZE 512 | 64 | #define RX_LE_SIZE 512 |
| 69 | #define RX_LE_BYTES (RX_LE_SIZE*sizeof(struct sky2_rx_le)) | 65 | #define RX_LE_BYTES (RX_LE_SIZE*sizeof(struct sky2_rx_le)) |
| 70 | #define RX_MAX_PENDING (RX_LE_SIZE/2 - 2) | 66 | #define RX_MAX_PENDING (RX_LE_SIZE/2 - 2) |
| @@ -96,6 +92,10 @@ static int copybreak __read_mostly = 256; | |||
| 96 | module_param(copybreak, int, 0); | 92 | module_param(copybreak, int, 0); |
| 97 | MODULE_PARM_DESC(copybreak, "Receive copy threshold"); | 93 | MODULE_PARM_DESC(copybreak, "Receive copy threshold"); |
| 98 | 94 | ||
| 95 | static int disable_msi = 0; | ||
| 96 | module_param(disable_msi, int, 0); | ||
| 97 | MODULE_PARM_DESC(disable_msi, "Disable Message Signaled Interrupt (MSI)"); | ||
| 98 | |||
| 99 | static const struct pci_device_id sky2_id_table[] = { | 99 | static const struct pci_device_id sky2_id_table[] = { |
| 100 | { PCI_DEVICE(PCI_VENDOR_ID_SYSKONNECT, 0x9000) }, | 100 | { PCI_DEVICE(PCI_VENDOR_ID_SYSKONNECT, 0x9000) }, |
| 101 | { PCI_DEVICE(PCI_VENDOR_ID_SYSKONNECT, 0x9E00) }, | 101 | { PCI_DEVICE(PCI_VENDOR_ID_SYSKONNECT, 0x9E00) }, |
| @@ -504,9 +504,9 @@ static void sky2_phy_init(struct sky2_hw *hw, unsigned port) | |||
| 504 | /* Force a renegotiation */ | 504 | /* Force a renegotiation */ |
| 505 | static void sky2_phy_reinit(struct sky2_port *sky2) | 505 | static void sky2_phy_reinit(struct sky2_port *sky2) |
| 506 | { | 506 | { |
| 507 | down(&sky2->phy_sema); | 507 | spin_lock_bh(&sky2->phy_lock); |
| 508 | sky2_phy_init(sky2->hw, sky2->port); | 508 | sky2_phy_init(sky2->hw, sky2->port); |
| 509 | up(&sky2->phy_sema); | 509 | spin_unlock_bh(&sky2->phy_lock); |
| 510 | } | 510 | } |
| 511 | 511 | ||
| 512 | static void sky2_mac_init(struct sky2_hw *hw, unsigned port) | 512 | static void sky2_mac_init(struct sky2_hw *hw, unsigned port) |
| @@ -571,9 +571,9 @@ static void sky2_mac_init(struct sky2_hw *hw, unsigned port) | |||
| 571 | 571 | ||
| 572 | sky2_read16(hw, SK_REG(port, GMAC_IRQ_SRC)); | 572 | sky2_read16(hw, SK_REG(port, GMAC_IRQ_SRC)); |
| 573 | 573 | ||
| 574 | down(&sky2->phy_sema); | 574 | spin_lock_bh(&sky2->phy_lock); |
| 575 | sky2_phy_init(hw, port); | 575 | sky2_phy_init(hw, port); |
| 576 | up(&sky2->phy_sema); | 576 | spin_unlock_bh(&sky2->phy_lock); |
| 577 | 577 | ||
| 578 | /* MIB clear */ | 578 | /* MIB clear */ |
| 579 | reg = gma_read16(hw, port, GM_PHY_ADDR); | 579 | reg = gma_read16(hw, port, GM_PHY_ADDR); |
| @@ -725,37 +725,11 @@ static inline struct sky2_tx_le *get_tx_le(struct sky2_port *sky2) | |||
| 725 | return le; | 725 | return le; |
| 726 | } | 726 | } |
| 727 | 727 | ||
| 728 | /* | 728 | /* Update chip's next pointer */ |
| 729 | * This is a workaround code taken from SysKonnect sk98lin driver | 729 | static inline void sky2_put_idx(struct sky2_hw *hw, unsigned q, u16 idx) |
| 730 | * to deal with chip bug on Yukon EC rev 0 in the wraparound case. | ||
| 731 | */ | ||
| 732 | static void sky2_put_idx(struct sky2_hw *hw, unsigned q, | ||
| 733 | u16 idx, u16 *last, u16 size) | ||
| 734 | { | 730 | { |
| 735 | wmb(); | 731 | wmb(); |
| 736 | if (is_ec_a1(hw) && idx < *last) { | 732 | sky2_write16(hw, Y2_QADDR(q, PREF_UNIT_PUT_IDX), idx); |
| 737 | u16 hwget = sky2_read16(hw, Y2_QADDR(q, PREF_UNIT_GET_IDX)); | ||
| 738 | |||
| 739 | if (hwget == 0) { | ||
| 740 | /* Start prefetching again */ | ||
| 741 | sky2_write8(hw, Y2_QADDR(q, PREF_UNIT_FIFO_WM), 0xe0); | ||
| 742 | goto setnew; | ||
| 743 | } | ||
| 744 | |||
| 745 | if (hwget == size - 1) { | ||
| 746 | /* set watermark to one list element */ | ||
| 747 | sky2_write8(hw, Y2_QADDR(q, PREF_UNIT_FIFO_WM), 8); | ||
| 748 | |||
| 749 | /* set put index to first list element */ | ||
| 750 | sky2_write16(hw, Y2_QADDR(q, PREF_UNIT_PUT_IDX), 0); | ||
| 751 | } else /* have hardware go to end of list */ | ||
| 752 | sky2_write16(hw, Y2_QADDR(q, PREF_UNIT_PUT_IDX), | ||
| 753 | size - 1); | ||
| 754 | } else { | ||
| 755 | setnew: | ||
| 756 | sky2_write16(hw, Y2_QADDR(q, PREF_UNIT_PUT_IDX), idx); | ||
| 757 | } | ||
| 758 | *last = idx; | ||
| 759 | mmiowb(); | 733 | mmiowb(); |
| 760 | } | 734 | } |
| 761 | 735 | ||
| @@ -878,7 +852,7 @@ static int sky2_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) | |||
| 878 | if (!netif_running(dev)) | 852 | if (!netif_running(dev)) |
| 879 | return -ENODEV; /* Phy still in reset */ | 853 | return -ENODEV; /* Phy still in reset */ |
| 880 | 854 | ||
| 881 | switch(cmd) { | 855 | switch (cmd) { |
| 882 | case SIOCGMIIPHY: | 856 | case SIOCGMIIPHY: |
| 883 | data->phy_id = PHY_ADDR_MARV; | 857 | data->phy_id = PHY_ADDR_MARV; |
| 884 | 858 | ||
| @@ -886,9 +860,9 @@ static int sky2_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) | |||
| 886 | case SIOCGMIIREG: { | 860 | case SIOCGMIIREG: { |
| 887 | u16 val = 0; | 861 | u16 val = 0; |
| 888 | 862 | ||
| 889 | down(&sky2->phy_sema); | 863 | spin_lock_bh(&sky2->phy_lock); |
| 890 | err = __gm_phy_read(hw, sky2->port, data->reg_num & 0x1f, &val); | 864 | err = __gm_phy_read(hw, sky2->port, data->reg_num & 0x1f, &val); |
| 891 | up(&sky2->phy_sema); | 865 | spin_unlock_bh(&sky2->phy_lock); |
| 892 | 866 | ||
| 893 | data->val_out = val; | 867 | data->val_out = val; |
| 894 | break; | 868 | break; |
| @@ -898,10 +872,10 @@ static int sky2_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) | |||
| 898 | if (!capable(CAP_NET_ADMIN)) | 872 | if (!capable(CAP_NET_ADMIN)) |
| 899 | return -EPERM; | 873 | return -EPERM; |
| 900 | 874 | ||
| 901 | down(&sky2->phy_sema); | 875 | spin_lock_bh(&sky2->phy_lock); |
| 902 | err = gm_phy_write(hw, sky2->port, data->reg_num & 0x1f, | 876 | err = gm_phy_write(hw, sky2->port, data->reg_num & 0x1f, |
| 903 | data->val_in); | 877 | data->val_in); |
| 904 | up(&sky2->phy_sema); | 878 | spin_unlock_bh(&sky2->phy_lock); |
| 905 | break; | 879 | break; |
| 906 | } | 880 | } |
| 907 | return err; | 881 | return err; |
| @@ -1001,7 +975,6 @@ static int sky2_rx_start(struct sky2_port *sky2) | |||
| 1001 | 975 | ||
| 1002 | /* Tell chip about available buffers */ | 976 | /* Tell chip about available buffers */ |
| 1003 | sky2_write16(hw, Y2_QADDR(rxq, PREF_UNIT_PUT_IDX), sky2->rx_put); | 977 | sky2_write16(hw, Y2_QADDR(rxq, PREF_UNIT_PUT_IDX), sky2->rx_put); |
| 1004 | sky2->rx_last_put = sky2_read16(hw, Y2_QADDR(rxq, PREF_UNIT_PUT_IDX)); | ||
| 1005 | return 0; | 978 | return 0; |
| 1006 | nomem: | 979 | nomem: |
| 1007 | sky2_rx_clean(sky2); | 980 | sky2_rx_clean(sky2); |
| @@ -1014,7 +987,7 @@ static int sky2_up(struct net_device *dev) | |||
| 1014 | struct sky2_port *sky2 = netdev_priv(dev); | 987 | struct sky2_port *sky2 = netdev_priv(dev); |
| 1015 | struct sky2_hw *hw = sky2->hw; | 988 | struct sky2_hw *hw = sky2->hw; |
| 1016 | unsigned port = sky2->port; | 989 | unsigned port = sky2->port; |
| 1017 | u32 ramsize, rxspace; | 990 | u32 ramsize, rxspace, imask; |
| 1018 | int err = -ENOMEM; | 991 | int err = -ENOMEM; |
| 1019 | 992 | ||
| 1020 | if (netif_msg_ifup(sky2)) | 993 | if (netif_msg_ifup(sky2)) |
| @@ -1079,10 +1052,10 @@ static int sky2_up(struct net_device *dev) | |||
| 1079 | goto err_out; | 1052 | goto err_out; |
| 1080 | 1053 | ||
| 1081 | /* Enable interrupts from phy/mac for port */ | 1054 | /* Enable interrupts from phy/mac for port */ |
| 1082 | spin_lock_irq(&hw->hw_lock); | 1055 | imask = sky2_read32(hw, B0_IMSK); |
| 1083 | hw->intr_mask |= (port == 0) ? Y2_IS_PORT_1 : Y2_IS_PORT_2; | 1056 | imask |= (port == 0) ? Y2_IS_PORT_1 : Y2_IS_PORT_2; |
| 1084 | sky2_write32(hw, B0_IMSK, hw->intr_mask); | 1057 | sky2_write32(hw, B0_IMSK, imask); |
| 1085 | spin_unlock_irq(&hw->hw_lock); | 1058 | |
| 1086 | return 0; | 1059 | return 0; |
| 1087 | 1060 | ||
| 1088 | err_out: | 1061 | err_out: |
| @@ -1299,8 +1272,7 @@ static int sky2_xmit_frame(struct sk_buff *skb, struct net_device *dev) | |||
| 1299 | netif_stop_queue(dev); | 1272 | netif_stop_queue(dev); |
| 1300 | } | 1273 | } |
| 1301 | 1274 | ||
| 1302 | sky2_put_idx(hw, txqaddr[sky2->port], sky2->tx_prod, | 1275 | sky2_put_idx(hw, txqaddr[sky2->port], sky2->tx_prod); |
| 1303 | &sky2->tx_last_put, TX_RING_SIZE); | ||
| 1304 | 1276 | ||
| 1305 | out_unlock: | 1277 | out_unlock: |
| 1306 | spin_unlock(&sky2->tx_lock); | 1278 | spin_unlock(&sky2->tx_lock); |
| @@ -1332,7 +1304,7 @@ static void sky2_tx_complete(struct sky2_port *sky2, u16 done) | |||
| 1332 | struct tx_ring_info *re = sky2->tx_ring + put; | 1304 | struct tx_ring_info *re = sky2->tx_ring + put; |
| 1333 | struct sk_buff *skb = re->skb; | 1305 | struct sk_buff *skb = re->skb; |
| 1334 | 1306 | ||
| 1335 | nxt = re->idx; | 1307 | nxt = re->idx; |
| 1336 | BUG_ON(nxt >= TX_RING_SIZE); | 1308 | BUG_ON(nxt >= TX_RING_SIZE); |
| 1337 | prefetch(sky2->tx_ring + nxt); | 1309 | prefetch(sky2->tx_ring + nxt); |
| 1338 | 1310 | ||
| @@ -1348,7 +1320,7 @@ static void sky2_tx_complete(struct sky2_port *sky2, u16 done) | |||
| 1348 | struct tx_ring_info *fre; | 1320 | struct tx_ring_info *fre; |
| 1349 | fre = sky2->tx_ring + (put + i + 1) % TX_RING_SIZE; | 1321 | fre = sky2->tx_ring + (put + i + 1) % TX_RING_SIZE; |
| 1350 | pci_unmap_page(pdev, pci_unmap_addr(fre, mapaddr), | 1322 | pci_unmap_page(pdev, pci_unmap_addr(fre, mapaddr), |
| 1351 | skb_shinfo(skb)->frags[i].size, | 1323 | skb_shinfo(skb)->frags[i].size, |
| 1352 | PCI_DMA_TODEVICE); | 1324 | PCI_DMA_TODEVICE); |
| 1353 | } | 1325 | } |
| 1354 | 1326 | ||
| @@ -1356,7 +1328,7 @@ static void sky2_tx_complete(struct sky2_port *sky2, u16 done) | |||
| 1356 | } | 1328 | } |
| 1357 | 1329 | ||
| 1358 | sky2->tx_cons = put; | 1330 | sky2->tx_cons = put; |
| 1359 | if (netif_queue_stopped(dev) && tx_avail(sky2) > MAX_SKB_TX_LE) | 1331 | if (tx_avail(sky2) > MAX_SKB_TX_LE) |
| 1360 | netif_wake_queue(dev); | 1332 | netif_wake_queue(dev); |
| 1361 | } | 1333 | } |
| 1362 | 1334 | ||
| @@ -1375,6 +1347,7 @@ static int sky2_down(struct net_device *dev) | |||
| 1375 | struct sky2_hw *hw = sky2->hw; | 1347 | struct sky2_hw *hw = sky2->hw; |
| 1376 | unsigned port = sky2->port; | 1348 | unsigned port = sky2->port; |
| 1377 | u16 ctrl; | 1349 | u16 ctrl; |
| 1350 | u32 imask; | ||
| 1378 | 1351 | ||
| 1379 | /* Never really got started! */ | 1352 | /* Never really got started! */ |
| 1380 | if (!sky2->tx_le) | 1353 | if (!sky2->tx_le) |
| @@ -1386,14 +1359,6 @@ static int sky2_down(struct net_device *dev) | |||
| 1386 | /* Stop more packets from being queued */ | 1359 | /* Stop more packets from being queued */ |
| 1387 | netif_stop_queue(dev); | 1360 | netif_stop_queue(dev); |
| 1388 | 1361 | ||
| 1389 | /* Disable port IRQ */ | ||
| 1390 | spin_lock_irq(&hw->hw_lock); | ||
| 1391 | hw->intr_mask &= ~((sky2->port == 0) ? Y2_IS_IRQ_PHY1 : Y2_IS_IRQ_PHY2); | ||
| 1392 | sky2_write32(hw, B0_IMSK, hw->intr_mask); | ||
| 1393 | spin_unlock_irq(&hw->hw_lock); | ||
| 1394 | |||
| 1395 | flush_scheduled_work(); | ||
| 1396 | |||
| 1397 | sky2_phy_reset(hw, port); | 1362 | sky2_phy_reset(hw, port); |
| 1398 | 1363 | ||
| 1399 | /* Stop transmitter */ | 1364 | /* Stop transmitter */ |
| @@ -1437,6 +1402,11 @@ static int sky2_down(struct net_device *dev) | |||
| 1437 | sky2_write8(hw, SK_REG(port, RX_GMF_CTRL_T), GMF_RST_SET); | 1402 | sky2_write8(hw, SK_REG(port, RX_GMF_CTRL_T), GMF_RST_SET); |
| 1438 | sky2_write8(hw, SK_REG(port, TX_GMF_CTRL_T), GMF_RST_SET); | 1403 | sky2_write8(hw, SK_REG(port, TX_GMF_CTRL_T), GMF_RST_SET); |
| 1439 | 1404 | ||
| 1405 | /* Disable port IRQ */ | ||
| 1406 | imask = sky2_read32(hw, B0_IMSK); | ||
| 1407 | imask &= ~(sky2->port == 0) ? Y2_IS_PORT_1 : Y2_IS_PORT_2; | ||
| 1408 | sky2_write32(hw, B0_IMSK, imask); | ||
| 1409 | |||
| 1440 | /* turn off LED's */ | 1410 | /* turn off LED's */ |
| 1441 | sky2_write16(hw, B0_Y2LED, LED_STAT_OFF); | 1411 | sky2_write16(hw, B0_Y2LED, LED_STAT_OFF); |
| 1442 | 1412 | ||
| @@ -1631,20 +1601,19 @@ static int sky2_autoneg_done(struct sky2_port *sky2, u16 aux) | |||
| 1631 | return 0; | 1601 | return 0; |
| 1632 | } | 1602 | } |
| 1633 | 1603 | ||
| 1634 | /* | 1604 | /* Interrupt from PHY */ |
| 1635 | * Interrupt from PHY are handled outside of interrupt context | 1605 | static void sky2_phy_intr(struct sky2_hw *hw, unsigned port) |
| 1636 | * because accessing phy registers requires spin wait which might | ||
| 1637 | * cause excess interrupt latency. | ||
| 1638 | */ | ||
| 1639 | static void sky2_phy_task(void *arg) | ||
| 1640 | { | 1606 | { |
| 1641 | struct sky2_port *sky2 = arg; | 1607 | struct net_device *dev = hw->dev[port]; |
| 1642 | struct sky2_hw *hw = sky2->hw; | 1608 | struct sky2_port *sky2 = netdev_priv(dev); |
| 1643 | u16 istatus, phystat; | 1609 | u16 istatus, phystat; |
| 1644 | 1610 | ||
| 1645 | down(&sky2->phy_sema); | 1611 | spin_lock(&sky2->phy_lock); |
| 1646 | istatus = gm_phy_read(hw, sky2->port, PHY_MARV_INT_STAT); | 1612 | istatus = gm_phy_read(hw, port, PHY_MARV_INT_STAT); |
| 1647 | phystat = gm_phy_read(hw, sky2->port, PHY_MARV_PHY_STAT); | 1613 | phystat = gm_phy_read(hw, port, PHY_MARV_PHY_STAT); |
| 1614 | |||
| 1615 | if (!netif_running(dev)) | ||
| 1616 | goto out; | ||
| 1648 | 1617 | ||
| 1649 | if (netif_msg_intr(sky2)) | 1618 | if (netif_msg_intr(sky2)) |
| 1650 | printk(KERN_INFO PFX "%s: phy interrupt status 0x%x 0x%x\n", | 1619 | printk(KERN_INFO PFX "%s: phy interrupt status 0x%x 0x%x\n", |
| @@ -1670,12 +1639,7 @@ static void sky2_phy_task(void *arg) | |||
| 1670 | sky2_link_down(sky2); | 1639 | sky2_link_down(sky2); |
| 1671 | } | 1640 | } |
| 1672 | out: | 1641 | out: |
| 1673 | up(&sky2->phy_sema); | 1642 | spin_unlock(&sky2->phy_lock); |
| 1674 | |||
| 1675 | spin_lock_irq(&hw->hw_lock); | ||
| 1676 | hw->intr_mask |= (sky2->port == 0) ? Y2_IS_IRQ_PHY1 : Y2_IS_IRQ_PHY2; | ||
| 1677 | sky2_write32(hw, B0_IMSK, hw->intr_mask); | ||
| 1678 | spin_unlock_irq(&hw->hw_lock); | ||
| 1679 | } | 1643 | } |
| 1680 | 1644 | ||
| 1681 | 1645 | ||
| @@ -1687,31 +1651,40 @@ static void sky2_tx_timeout(struct net_device *dev) | |||
| 1687 | struct sky2_port *sky2 = netdev_priv(dev); | 1651 | struct sky2_port *sky2 = netdev_priv(dev); |
| 1688 | struct sky2_hw *hw = sky2->hw; | 1652 | struct sky2_hw *hw = sky2->hw; |
| 1689 | unsigned txq = txqaddr[sky2->port]; | 1653 | unsigned txq = txqaddr[sky2->port]; |
| 1690 | u16 ridx; | 1654 | u16 report, done; |
| 1691 | |||
| 1692 | /* Maybe we just missed an status interrupt */ | ||
| 1693 | spin_lock(&sky2->tx_lock); | ||
| 1694 | ridx = sky2_read16(hw, | ||
| 1695 | sky2->port == 0 ? STAT_TXA1_RIDX : STAT_TXA2_RIDX); | ||
| 1696 | sky2_tx_complete(sky2, ridx); | ||
| 1697 | spin_unlock(&sky2->tx_lock); | ||
| 1698 | |||
| 1699 | if (!netif_queue_stopped(dev)) { | ||
| 1700 | if (net_ratelimit()) | ||
| 1701 | pr_info(PFX "transmit interrupt missed? recovered\n"); | ||
| 1702 | return; | ||
| 1703 | } | ||
| 1704 | 1655 | ||
| 1705 | if (netif_msg_timer(sky2)) | 1656 | if (netif_msg_timer(sky2)) |
| 1706 | printk(KERN_ERR PFX "%s: tx timeout\n", dev->name); | 1657 | printk(KERN_ERR PFX "%s: tx timeout\n", dev->name); |
| 1707 | 1658 | ||
| 1708 | sky2_write32(hw, Q_ADDR(txq, Q_CSR), BMU_STOP); | 1659 | report = sky2_read16(hw, sky2->port == 0 ? STAT_TXA1_RIDX : STAT_TXA2_RIDX); |
| 1709 | sky2_write32(hw, Y2_QADDR(txq, PREF_UNIT_CTRL), PREF_UNIT_RST_SET); | 1660 | done = sky2_read16(hw, Q_ADDR(txq, Q_DONE)); |
| 1710 | 1661 | ||
| 1711 | sky2_tx_clean(sky2); | 1662 | printk(KERN_DEBUG PFX "%s: transmit ring %u .. %u report=%u done=%u\n", |
| 1663 | dev->name, | ||
| 1664 | sky2->tx_cons, sky2->tx_prod, report, done); | ||
| 1712 | 1665 | ||
| 1713 | sky2_qset(hw, txq); | 1666 | if (report != done) { |
| 1714 | sky2_prefetch_init(hw, txq, sky2->tx_le_map, TX_RING_SIZE - 1); | 1667 | printk(KERN_INFO PFX "status burst pending (irq moderation?)\n"); |
| 1668 | |||
| 1669 | sky2_write8(hw, STAT_TX_TIMER_CTRL, TIM_STOP); | ||
| 1670 | sky2_write8(hw, STAT_TX_TIMER_CTRL, TIM_START); | ||
| 1671 | } else if (report != sky2->tx_cons) { | ||
| 1672 | printk(KERN_INFO PFX "status report lost?\n"); | ||
| 1673 | |||
| 1674 | spin_lock_bh(&sky2->tx_lock); | ||
| 1675 | sky2_tx_complete(sky2, report); | ||
| 1676 | spin_unlock_bh(&sky2->tx_lock); | ||
| 1677 | } else { | ||
| 1678 | printk(KERN_INFO PFX "hardware hung? flushing\n"); | ||
| 1679 | |||
| 1680 | sky2_write32(hw, Q_ADDR(txq, Q_CSR), BMU_STOP); | ||
| 1681 | sky2_write32(hw, Y2_QADDR(txq, PREF_UNIT_CTRL), PREF_UNIT_RST_SET); | ||
| 1682 | |||
| 1683 | sky2_tx_clean(sky2); | ||
| 1684 | |||
| 1685 | sky2_qset(hw, txq); | ||
| 1686 | sky2_prefetch_init(hw, txq, sky2->tx_le_map, TX_RING_SIZE - 1); | ||
| 1687 | } | ||
| 1715 | } | 1688 | } |
| 1716 | 1689 | ||
| 1717 | 1690 | ||
| @@ -1730,6 +1703,7 @@ static int sky2_change_mtu(struct net_device *dev, int new_mtu) | |||
| 1730 | struct sky2_hw *hw = sky2->hw; | 1703 | struct sky2_hw *hw = sky2->hw; |
| 1731 | int err; | 1704 | int err; |
| 1732 | u16 ctl, mode; | 1705 | u16 ctl, mode; |
| 1706 | u32 imask; | ||
| 1733 | 1707 | ||
| 1734 | if (new_mtu < ETH_ZLEN || new_mtu > ETH_JUMBO_MTU) | 1708 | if (new_mtu < ETH_ZLEN || new_mtu > ETH_JUMBO_MTU) |
| 1735 | return -EINVAL; | 1709 | return -EINVAL; |
| @@ -1742,12 +1716,15 @@ static int sky2_change_mtu(struct net_device *dev, int new_mtu) | |||
| 1742 | return 0; | 1716 | return 0; |
| 1743 | } | 1717 | } |
| 1744 | 1718 | ||
| 1719 | imask = sky2_read32(hw, B0_IMSK); | ||
| 1745 | sky2_write32(hw, B0_IMSK, 0); | 1720 | sky2_write32(hw, B0_IMSK, 0); |
| 1746 | 1721 | ||
| 1747 | dev->trans_start = jiffies; /* prevent tx timeout */ | 1722 | dev->trans_start = jiffies; /* prevent tx timeout */ |
| 1748 | netif_stop_queue(dev); | 1723 | netif_stop_queue(dev); |
| 1749 | netif_poll_disable(hw->dev[0]); | 1724 | netif_poll_disable(hw->dev[0]); |
| 1750 | 1725 | ||
| 1726 | synchronize_irq(hw->pdev->irq); | ||
| 1727 | |||
| 1751 | ctl = gma_read16(hw, sky2->port, GM_GP_CTRL); | 1728 | ctl = gma_read16(hw, sky2->port, GM_GP_CTRL); |
| 1752 | gma_write16(hw, sky2->port, GM_GP_CTRL, ctl & ~GM_GPCR_RX_ENA); | 1729 | gma_write16(hw, sky2->port, GM_GP_CTRL, ctl & ~GM_GPCR_RX_ENA); |
| 1753 | sky2_rx_stop(sky2); | 1730 | sky2_rx_stop(sky2); |
| @@ -1766,7 +1743,7 @@ static int sky2_change_mtu(struct net_device *dev, int new_mtu) | |||
| 1766 | sky2_write8(hw, RB_ADDR(rxqaddr[sky2->port], RB_CTRL), RB_ENA_OP_MD); | 1743 | sky2_write8(hw, RB_ADDR(rxqaddr[sky2->port], RB_CTRL), RB_ENA_OP_MD); |
| 1767 | 1744 | ||
| 1768 | err = sky2_rx_start(sky2); | 1745 | err = sky2_rx_start(sky2); |
| 1769 | sky2_write32(hw, B0_IMSK, hw->intr_mask); | 1746 | sky2_write32(hw, B0_IMSK, imask); |
| 1770 | 1747 | ||
| 1771 | if (err) | 1748 | if (err) |
| 1772 | dev_close(dev); | 1749 | dev_close(dev); |
| @@ -1843,8 +1820,7 @@ resubmit: | |||
| 1843 | sky2_rx_add(sky2, re->mapaddr); | 1820 | sky2_rx_add(sky2, re->mapaddr); |
| 1844 | 1821 | ||
| 1845 | /* Tell receiver about new buffers. */ | 1822 | /* Tell receiver about new buffers. */ |
| 1846 | sky2_put_idx(sky2->hw, rxqaddr[sky2->port], sky2->rx_put, | 1823 | sky2_put_idx(sky2->hw, rxqaddr[sky2->port], sky2->rx_put); |
| 1847 | &sky2->rx_last_put, RX_LE_SIZE); | ||
| 1848 | 1824 | ||
| 1849 | return skb; | 1825 | return skb; |
| 1850 | 1826 | ||
| @@ -1871,76 +1847,51 @@ error: | |||
| 1871 | goto resubmit; | 1847 | goto resubmit; |
| 1872 | } | 1848 | } |
| 1873 | 1849 | ||
| 1874 | /* | 1850 | /* Transmit complete */ |
| 1875 | * Check for transmit complete | 1851 | static inline void sky2_tx_done(struct net_device *dev, u16 last) |
| 1876 | */ | ||
| 1877 | #define TX_NO_STATUS 0xffff | ||
| 1878 | |||
| 1879 | static void sky2_tx_check(struct sky2_hw *hw, int port, u16 last) | ||
| 1880 | { | 1852 | { |
| 1881 | if (last != TX_NO_STATUS) { | 1853 | struct sky2_port *sky2 = netdev_priv(dev); |
| 1882 | struct net_device *dev = hw->dev[port]; | ||
| 1883 | if (dev && netif_running(dev)) { | ||
| 1884 | struct sky2_port *sky2 = netdev_priv(dev); | ||
| 1885 | 1854 | ||
| 1886 | spin_lock(&sky2->tx_lock); | 1855 | if (netif_running(dev)) { |
| 1887 | sky2_tx_complete(sky2, last); | 1856 | spin_lock(&sky2->tx_lock); |
| 1888 | spin_unlock(&sky2->tx_lock); | 1857 | sky2_tx_complete(sky2, last); |
| 1889 | } | 1858 | spin_unlock(&sky2->tx_lock); |
| 1890 | } | 1859 | } |
| 1891 | } | 1860 | } |
| 1892 | 1861 | ||
| 1893 | /* | 1862 | /* Process status response ring */ |
| 1894 | * Both ports share the same status interrupt, therefore there is only | 1863 | static int sky2_status_intr(struct sky2_hw *hw, int to_do) |
| 1895 | * one poll routine. | ||
| 1896 | */ | ||
| 1897 | static int sky2_poll(struct net_device *dev0, int *budget) | ||
| 1898 | { | 1864 | { |
| 1899 | struct sky2_hw *hw = ((struct sky2_port *) netdev_priv(dev0))->hw; | 1865 | int work_done = 0; |
| 1900 | unsigned int to_do = min(dev0->quota, *budget); | ||
| 1901 | unsigned int work_done = 0; | ||
| 1902 | u16 hwidx; | ||
| 1903 | u16 tx_done[2] = { TX_NO_STATUS, TX_NO_STATUS }; | ||
| 1904 | |||
| 1905 | sky2_write32(hw, STAT_CTRL, SC_STAT_CLR_IRQ); | ||
| 1906 | |||
| 1907 | /* | ||
| 1908 | * Kick the STAT_LEV_TIMER_CTRL timer. | ||
| 1909 | * This fixes my hangs on Yukon-EC (0xb6) rev 1. | ||
| 1910 | * The if clause is there to start the timer only if it has been | ||
| 1911 | * configured correctly and not been disabled via ethtool. | ||
| 1912 | */ | ||
| 1913 | if (sky2_read8(hw, STAT_LEV_TIMER_CTRL) == TIM_START) { | ||
| 1914 | sky2_write8(hw, STAT_LEV_TIMER_CTRL, TIM_STOP); | ||
| 1915 | sky2_write8(hw, STAT_LEV_TIMER_CTRL, TIM_START); | ||
| 1916 | } | ||
| 1917 | 1866 | ||
| 1918 | hwidx = sky2_read16(hw, STAT_PUT_IDX); | ||
| 1919 | BUG_ON(hwidx >= STATUS_RING_SIZE); | ||
| 1920 | rmb(); | 1867 | rmb(); |
| 1921 | 1868 | ||
| 1922 | while (hwidx != hw->st_idx) { | 1869 | for(;;) { |
| 1923 | struct sky2_status_le *le = hw->st_le + hw->st_idx; | 1870 | struct sky2_status_le *le = hw->st_le + hw->st_idx; |
| 1924 | struct net_device *dev; | 1871 | struct net_device *dev; |
| 1925 | struct sky2_port *sky2; | 1872 | struct sky2_port *sky2; |
| 1926 | struct sk_buff *skb; | 1873 | struct sk_buff *skb; |
| 1927 | u32 status; | 1874 | u32 status; |
| 1928 | u16 length; | 1875 | u16 length; |
| 1876 | u8 link, opcode; | ||
| 1877 | |||
| 1878 | opcode = le->opcode; | ||
| 1879 | if (!opcode) | ||
| 1880 | break; | ||
| 1881 | opcode &= ~HW_OWNER; | ||
| 1929 | 1882 | ||
| 1930 | le = hw->st_le + hw->st_idx; | ||
| 1931 | hw->st_idx = (hw->st_idx + 1) % STATUS_RING_SIZE; | 1883 | hw->st_idx = (hw->st_idx + 1) % STATUS_RING_SIZE; |
| 1932 | prefetch(hw->st_le + hw->st_idx); | 1884 | le->opcode = 0; |
| 1933 | 1885 | ||
| 1934 | BUG_ON(le->link >= 2); | 1886 | link = le->link; |
| 1935 | dev = hw->dev[le->link]; | 1887 | BUG_ON(link >= 2); |
| 1936 | if (dev == NULL || !netif_running(dev)) | 1888 | dev = hw->dev[link]; |
| 1937 | continue; | ||
| 1938 | 1889 | ||
| 1939 | sky2 = netdev_priv(dev); | 1890 | sky2 = netdev_priv(dev); |
| 1940 | status = le32_to_cpu(le->status); | 1891 | length = le->length; |
| 1941 | length = le16_to_cpu(le->length); | 1892 | status = le->status; |
| 1942 | 1893 | ||
| 1943 | switch (le->opcode & ~HW_OWNER) { | 1894 | switch (opcode) { |
| 1944 | case OP_RXSTAT: | 1895 | case OP_RXSTAT: |
| 1945 | skb = sky2_receive(sky2, length, status); | 1896 | skb = sky2_receive(sky2, length, status); |
| 1946 | if (!skb) | 1897 | if (!skb) |
| @@ -1980,42 +1931,23 @@ static int sky2_poll(struct net_device *dev0, int *budget) | |||
| 1980 | 1931 | ||
| 1981 | case OP_TXINDEXLE: | 1932 | case OP_TXINDEXLE: |
| 1982 | /* TX index reports status for both ports */ | 1933 | /* TX index reports status for both ports */ |
| 1983 | tx_done[0] = status & 0xffff; | 1934 | sky2_tx_done(hw->dev[0], status & 0xffff); |
| 1984 | tx_done[1] = ((status >> 24) & 0xff) | 1935 | if (hw->dev[1]) |
| 1985 | | (u16)(length & 0xf) << 8; | 1936 | sky2_tx_done(hw->dev[1], |
| 1937 | ((status >> 24) & 0xff) | ||
| 1938 | | (u16)(length & 0xf) << 8); | ||
| 1986 | break; | 1939 | break; |
| 1987 | 1940 | ||
| 1988 | default: | 1941 | default: |
| 1989 | if (net_ratelimit()) | 1942 | if (net_ratelimit()) |
| 1990 | printk(KERN_WARNING PFX | 1943 | printk(KERN_WARNING PFX |
| 1991 | "unknown status opcode 0x%x\n", le->opcode); | 1944 | "unknown status opcode 0x%x\n", opcode); |
| 1992 | break; | 1945 | break; |
| 1993 | } | 1946 | } |
| 1994 | } | 1947 | } |
| 1995 | 1948 | ||
| 1996 | exit_loop: | 1949 | exit_loop: |
| 1997 | sky2_tx_check(hw, 0, tx_done[0]); | 1950 | return work_done; |
| 1998 | sky2_tx_check(hw, 1, tx_done[1]); | ||
| 1999 | |||
| 2000 | if (sky2_read8(hw, STAT_TX_TIMER_CTRL) == TIM_START) { | ||
| 2001 | sky2_write8(hw, STAT_TX_TIMER_CTRL, TIM_STOP); | ||
| 2002 | sky2_write8(hw, STAT_TX_TIMER_CTRL, TIM_START); | ||
| 2003 | } | ||
| 2004 | |||
| 2005 | if (likely(work_done < to_do)) { | ||
| 2006 | spin_lock_irq(&hw->hw_lock); | ||
| 2007 | __netif_rx_complete(dev0); | ||
| 2008 | |||
| 2009 | hw->intr_mask |= Y2_IS_STAT_BMU; | ||
| 2010 | sky2_write32(hw, B0_IMSK, hw->intr_mask); | ||
| 2011 | spin_unlock_irq(&hw->hw_lock); | ||
| 2012 | |||
| 2013 | return 0; | ||
| 2014 | } else { | ||
| 2015 | *budget -= work_done; | ||
| 2016 | dev0->quota -= work_done; | ||
| 2017 | return 1; | ||
| 2018 | } | ||
| 2019 | } | 1951 | } |
| 2020 | 1952 | ||
| 2021 | static void sky2_hw_error(struct sky2_hw *hw, unsigned port, u32 status) | 1953 | static void sky2_hw_error(struct sky2_hw *hw, unsigned port, u32 status) |
| @@ -2134,57 +2066,97 @@ static void sky2_mac_intr(struct sky2_hw *hw, unsigned port) | |||
| 2134 | } | 2066 | } |
| 2135 | } | 2067 | } |
| 2136 | 2068 | ||
| 2137 | static void sky2_phy_intr(struct sky2_hw *hw, unsigned port) | 2069 | /* This should never happen it is a fatal situation */ |
| 2070 | static void sky2_descriptor_error(struct sky2_hw *hw, unsigned port, | ||
| 2071 | const char *rxtx, u32 mask) | ||
| 2138 | { | 2072 | { |
| 2139 | struct net_device *dev = hw->dev[port]; | 2073 | struct net_device *dev = hw->dev[port]; |
| 2140 | struct sky2_port *sky2 = netdev_priv(dev); | 2074 | struct sky2_port *sky2 = netdev_priv(dev); |
| 2075 | u32 imask; | ||
| 2076 | |||
| 2077 | printk(KERN_ERR PFX "%s: %s descriptor error (hardware problem)\n", | ||
| 2078 | dev ? dev->name : "<not registered>", rxtx); | ||
| 2141 | 2079 | ||
| 2142 | hw->intr_mask &= ~(port == 0 ? Y2_IS_IRQ_PHY1 : Y2_IS_IRQ_PHY2); | 2080 | imask = sky2_read32(hw, B0_IMSK); |
| 2143 | sky2_write32(hw, B0_IMSK, hw->intr_mask); | 2081 | imask &= ~mask; |
| 2082 | sky2_write32(hw, B0_IMSK, imask); | ||
| 2144 | 2083 | ||
| 2145 | schedule_work(&sky2->phy_task); | 2084 | if (dev) { |
| 2085 | spin_lock(&sky2->phy_lock); | ||
| 2086 | sky2_link_down(sky2); | ||
| 2087 | spin_unlock(&sky2->phy_lock); | ||
| 2088 | } | ||
| 2146 | } | 2089 | } |
| 2147 | 2090 | ||
| 2148 | static irqreturn_t sky2_intr(int irq, void *dev_id, struct pt_regs *regs) | 2091 | static int sky2_poll(struct net_device *dev0, int *budget) |
| 2149 | { | 2092 | { |
| 2150 | struct sky2_hw *hw = dev_id; | 2093 | struct sky2_hw *hw = ((struct sky2_port *) netdev_priv(dev0))->hw; |
| 2151 | struct net_device *dev0 = hw->dev[0]; | 2094 | int work_limit = min(dev0->quota, *budget); |
| 2152 | u32 status; | 2095 | int work_done = 0; |
| 2096 | u32 status = sky2_read32(hw, B0_Y2_SP_EISR); | ||
| 2153 | 2097 | ||
| 2154 | status = sky2_read32(hw, B0_Y2_SP_ISRC2); | 2098 | if (unlikely(status & ~Y2_IS_STAT_BMU)) { |
| 2155 | if (status == 0 || status == ~0) | 2099 | if (status & Y2_IS_HW_ERR) |
| 2156 | return IRQ_NONE; | 2100 | sky2_hw_intr(hw); |
| 2157 | 2101 | ||
| 2158 | spin_lock(&hw->hw_lock); | 2102 | if (status & Y2_IS_IRQ_PHY1) |
| 2159 | if (status & Y2_IS_HW_ERR) | 2103 | sky2_phy_intr(hw, 0); |
| 2160 | sky2_hw_intr(hw); | ||
| 2161 | 2104 | ||
| 2162 | /* Do NAPI for Rx and Tx status */ | 2105 | if (status & Y2_IS_IRQ_PHY2) |
| 2163 | if (status & Y2_IS_STAT_BMU) { | 2106 | sky2_phy_intr(hw, 1); |
| 2164 | hw->intr_mask &= ~Y2_IS_STAT_BMU; | ||
| 2165 | sky2_write32(hw, B0_IMSK, hw->intr_mask); | ||
| 2166 | 2107 | ||
| 2167 | if (likely(__netif_rx_schedule_prep(dev0))) { | 2108 | if (status & Y2_IS_IRQ_MAC1) |
| 2168 | prefetch(&hw->st_le[hw->st_idx]); | 2109 | sky2_mac_intr(hw, 0); |
| 2169 | __netif_rx_schedule(dev0); | 2110 | |
| 2170 | } | 2111 | if (status & Y2_IS_IRQ_MAC2) |
| 2112 | sky2_mac_intr(hw, 1); | ||
| 2113 | |||
| 2114 | if (status & Y2_IS_CHK_RX1) | ||
| 2115 | sky2_descriptor_error(hw, 0, "receive", Y2_IS_CHK_RX1); | ||
| 2116 | |||
| 2117 | if (status & Y2_IS_CHK_RX2) | ||
| 2118 | sky2_descriptor_error(hw, 1, "receive", Y2_IS_CHK_RX2); | ||
| 2119 | |||
| 2120 | if (status & Y2_IS_CHK_TXA1) | ||
| 2121 | sky2_descriptor_error(hw, 0, "transmit", Y2_IS_CHK_TXA1); | ||
| 2122 | |||
| 2123 | if (status & Y2_IS_CHK_TXA2) | ||
| 2124 | sky2_descriptor_error(hw, 1, "transmit", Y2_IS_CHK_TXA2); | ||
| 2171 | } | 2125 | } |
| 2172 | 2126 | ||
| 2173 | if (status & Y2_IS_IRQ_PHY1) | 2127 | if (status & Y2_IS_STAT_BMU) { |
| 2174 | sky2_phy_intr(hw, 0); | 2128 | work_done = sky2_status_intr(hw, work_limit); |
| 2129 | *budget -= work_done; | ||
| 2130 | dev0->quota -= work_done; | ||
| 2131 | |||
| 2132 | if (work_done >= work_limit) | ||
| 2133 | return 1; | ||
| 2175 | 2134 | ||
| 2176 | if (status & Y2_IS_IRQ_PHY2) | 2135 | sky2_write32(hw, STAT_CTRL, SC_STAT_CLR_IRQ); |
| 2177 | sky2_phy_intr(hw, 1); | 2136 | } |
| 2178 | 2137 | ||
| 2179 | if (status & Y2_IS_IRQ_MAC1) | 2138 | netif_rx_complete(dev0); |
| 2180 | sky2_mac_intr(hw, 0); | ||
| 2181 | 2139 | ||
| 2182 | if (status & Y2_IS_IRQ_MAC2) | 2140 | status = sky2_read32(hw, B0_Y2_SP_LISR); |
| 2183 | sky2_mac_intr(hw, 1); | 2141 | return 0; |
| 2142 | } | ||
| 2184 | 2143 | ||
| 2185 | sky2_write32(hw, B0_Y2_SP_ICR, 2); | 2144 | static irqreturn_t sky2_intr(int irq, void *dev_id, struct pt_regs *regs) |
| 2145 | { | ||
| 2146 | struct sky2_hw *hw = dev_id; | ||
| 2147 | struct net_device *dev0 = hw->dev[0]; | ||
| 2148 | u32 status; | ||
| 2186 | 2149 | ||
| 2187 | spin_unlock(&hw->hw_lock); | 2150 | /* Reading this mask interrupts as side effect */ |
| 2151 | status = sky2_read32(hw, B0_Y2_SP_ISRC2); | ||
| 2152 | if (status == 0 || status == ~0) | ||
| 2153 | return IRQ_NONE; | ||
| 2154 | |||
| 2155 | prefetch(&hw->st_le[hw->st_idx]); | ||
| 2156 | if (likely(__netif_rx_schedule_prep(dev0))) | ||
| 2157 | __netif_rx_schedule(dev0); | ||
| 2158 | else | ||
| 2159 | printk(KERN_DEBUG PFX "irq race detected\n"); | ||
| 2188 | 2160 | ||
| 2189 | return IRQ_HANDLED; | 2161 | return IRQ_HANDLED; |
| 2190 | } | 2162 | } |
| @@ -2238,6 +2210,23 @@ static int sky2_reset(struct sky2_hw *hw) | |||
| 2238 | return -EOPNOTSUPP; | 2210 | return -EOPNOTSUPP; |
| 2239 | } | 2211 | } |
| 2240 | 2212 | ||
| 2213 | hw->chip_rev = (sky2_read8(hw, B2_MAC_CFG) & CFG_CHIP_R_MSK) >> 4; | ||
| 2214 | |||
| 2215 | /* This rev is really old, and requires untested workarounds */ | ||
| 2216 | if (hw->chip_id == CHIP_ID_YUKON_EC && hw->chip_rev == CHIP_REV_YU_EC_A1) { | ||
| 2217 | printk(KERN_ERR PFX "%s: unsupported revision Yukon-%s (0x%x) rev %d\n", | ||
| 2218 | pci_name(hw->pdev), yukon2_name[hw->chip_id - CHIP_ID_YUKON_XL], | ||
| 2219 | hw->chip_id, hw->chip_rev); | ||
| 2220 | return -EOPNOTSUPP; | ||
| 2221 | } | ||
| 2222 | |||
| 2223 | /* This chip is new and not tested yet */ | ||
| 2224 | if (hw->chip_id == CHIP_ID_YUKON_EC_U) { | ||
| 2225 | pr_info(PFX "%s: is a version of Yukon 2 chipset that has not been tested yet.\n", | ||
| 2226 | pci_name(hw->pdev)); | ||
| 2227 | pr_info("Please report success/failure to maintainer <shemminger@osdl.org>\n"); | ||
| 2228 | } | ||
| 2229 | |||
| 2241 | /* disable ASF */ | 2230 | /* disable ASF */ |
| 2242 | if (hw->chip_id <= CHIP_ID_YUKON_EC) { | 2231 | if (hw->chip_id <= CHIP_ID_YUKON_EC) { |
| 2243 | sky2_write8(hw, B28_Y2_ASF_STAT_CMD, Y2_ASF_RESET); | 2232 | sky2_write8(hw, B28_Y2_ASF_STAT_CMD, Y2_ASF_RESET); |
| @@ -2258,7 +2247,7 @@ static int sky2_reset(struct sky2_hw *hw) | |||
| 2258 | sky2_write8(hw, B0_CTST, CS_MRST_CLR); | 2247 | sky2_write8(hw, B0_CTST, CS_MRST_CLR); |
| 2259 | 2248 | ||
| 2260 | /* clear any PEX errors */ | 2249 | /* clear any PEX errors */ |
| 2261 | if (pci_find_capability(hw->pdev, PCI_CAP_ID_EXP)) | 2250 | if (pci_find_capability(hw->pdev, PCI_CAP_ID_EXP)) |
| 2262 | sky2_pci_write32(hw, PEX_UNC_ERR_STAT, 0xffffffffUL); | 2251 | sky2_pci_write32(hw, PEX_UNC_ERR_STAT, 0xffffffffUL); |
| 2263 | 2252 | ||
| 2264 | 2253 | ||
| @@ -2271,7 +2260,6 @@ static int sky2_reset(struct sky2_hw *hw) | |||
| 2271 | if (!(sky2_read8(hw, B2_Y2_CLK_GATE) & Y2_STATUS_LNK2_INAC)) | 2260 | if (!(sky2_read8(hw, B2_Y2_CLK_GATE) & Y2_STATUS_LNK2_INAC)) |
| 2272 | ++hw->ports; | 2261 | ++hw->ports; |
| 2273 | } | 2262 | } |
| 2274 | hw->chip_rev = (sky2_read8(hw, B2_MAC_CFG) & CFG_CHIP_R_MSK) >> 4; | ||
| 2275 | 2263 | ||
| 2276 | sky2_set_power_state(hw, PCI_D0); | 2264 | sky2_set_power_state(hw, PCI_D0); |
| 2277 | 2265 | ||
| @@ -2337,30 +2325,18 @@ static int sky2_reset(struct sky2_hw *hw) | |||
| 2337 | /* Set the list last index */ | 2325 | /* Set the list last index */ |
| 2338 | sky2_write16(hw, STAT_LAST_IDX, STATUS_RING_SIZE - 1); | 2326 | sky2_write16(hw, STAT_LAST_IDX, STATUS_RING_SIZE - 1); |
| 2339 | 2327 | ||
| 2340 | /* These status setup values are copied from SysKonnect's driver */ | 2328 | sky2_write16(hw, STAT_TX_IDX_TH, 10); |
| 2341 | if (is_ec_a1(hw)) { | 2329 | sky2_write8(hw, STAT_FIFO_WM, 16); |
| 2342 | /* WA for dev. #4.3 */ | ||
| 2343 | sky2_write16(hw, STAT_TX_IDX_TH, 0xfff); /* Tx Threshold */ | ||
| 2344 | |||
| 2345 | /* set Status-FIFO watermark */ | ||
| 2346 | sky2_write8(hw, STAT_FIFO_WM, 0x21); /* WA for dev. #4.18 */ | ||
| 2347 | 2330 | ||
| 2348 | /* set Status-FIFO ISR watermark */ | 2331 | /* set Status-FIFO ISR watermark */ |
| 2349 | sky2_write8(hw, STAT_FIFO_ISR_WM, 0x07); /* WA for dev. #4.18 */ | 2332 | if (hw->chip_id == CHIP_ID_YUKON_XL && hw->chip_rev == 0) |
| 2350 | sky2_write32(hw, STAT_TX_TIMER_INI, sky2_us2clk(hw, 10000)); | 2333 | sky2_write8(hw, STAT_FIFO_ISR_WM, 4); |
| 2351 | } else { | 2334 | else |
| 2352 | sky2_write16(hw, STAT_TX_IDX_TH, 10); | 2335 | sky2_write8(hw, STAT_FIFO_ISR_WM, 16); |
| 2353 | sky2_write8(hw, STAT_FIFO_WM, 16); | ||
| 2354 | |||
| 2355 | /* set Status-FIFO ISR watermark */ | ||
| 2356 | if (hw->chip_id == CHIP_ID_YUKON_XL && hw->chip_rev == 0) | ||
| 2357 | sky2_write8(hw, STAT_FIFO_ISR_WM, 4); | ||
| 2358 | else | ||
| 2359 | sky2_write8(hw, STAT_FIFO_ISR_WM, 16); | ||
| 2360 | 2336 | ||
| 2361 | sky2_write32(hw, STAT_TX_TIMER_INI, sky2_us2clk(hw, 1000)); | 2337 | sky2_write32(hw, STAT_TX_TIMER_INI, sky2_us2clk(hw, 1000)); |
| 2362 | sky2_write32(hw, STAT_ISR_TIMER_INI, sky2_us2clk(hw, 7)); | 2338 | sky2_write32(hw, STAT_ISR_TIMER_INI, sky2_us2clk(hw, 20)); |
| 2363 | } | 2339 | sky2_write32(hw, STAT_LEV_TIMER_INI, sky2_us2clk(hw, 100)); |
| 2364 | 2340 | ||
| 2365 | /* enable status unit */ | 2341 | /* enable status unit */ |
| 2366 | sky2_write32(hw, STAT_CTRL, SC_STAT_OP_ON); | 2342 | sky2_write32(hw, STAT_CTRL, SC_STAT_OP_ON); |
| @@ -2743,7 +2719,7 @@ static int sky2_phys_id(struct net_device *dev, u32 data) | |||
| 2743 | ms = data * 1000; | 2719 | ms = data * 1000; |
| 2744 | 2720 | ||
| 2745 | /* save initial values */ | 2721 | /* save initial values */ |
| 2746 | down(&sky2->phy_sema); | 2722 | spin_lock_bh(&sky2->phy_lock); |
| 2747 | if (hw->chip_id == CHIP_ID_YUKON_XL) { | 2723 | if (hw->chip_id == CHIP_ID_YUKON_XL) { |
| 2748 | u16 pg = gm_phy_read(hw, port, PHY_MARV_EXT_ADR); | 2724 | u16 pg = gm_phy_read(hw, port, PHY_MARV_EXT_ADR); |
| 2749 | gm_phy_write(hw, port, PHY_MARV_EXT_ADR, 3); | 2725 | gm_phy_write(hw, port, PHY_MARV_EXT_ADR, 3); |
| @@ -2759,9 +2735,9 @@ static int sky2_phys_id(struct net_device *dev, u32 data) | |||
| 2759 | sky2_led(hw, port, onoff); | 2735 | sky2_led(hw, port, onoff); |
| 2760 | onoff = !onoff; | 2736 | onoff = !onoff; |
| 2761 | 2737 | ||
| 2762 | up(&sky2->phy_sema); | 2738 | spin_unlock_bh(&sky2->phy_lock); |
| 2763 | interrupted = msleep_interruptible(250); | 2739 | interrupted = msleep_interruptible(250); |
| 2764 | down(&sky2->phy_sema); | 2740 | spin_lock_bh(&sky2->phy_lock); |
| 2765 | 2741 | ||
| 2766 | ms -= 250; | 2742 | ms -= 250; |
| 2767 | } | 2743 | } |
| @@ -2776,7 +2752,7 @@ static int sky2_phys_id(struct net_device *dev, u32 data) | |||
| 2776 | gm_phy_write(hw, port, PHY_MARV_LED_CTRL, ledctrl); | 2752 | gm_phy_write(hw, port, PHY_MARV_LED_CTRL, ledctrl); |
| 2777 | gm_phy_write(hw, port, PHY_MARV_LED_OVER, ledover); | 2753 | gm_phy_write(hw, port, PHY_MARV_LED_OVER, ledover); |
| 2778 | } | 2754 | } |
| 2779 | up(&sky2->phy_sema); | 2755 | spin_unlock_bh(&sky2->phy_lock); |
| 2780 | 2756 | ||
| 2781 | return 0; | 2757 | return 0; |
| 2782 | } | 2758 | } |
| @@ -2806,38 +2782,6 @@ static int sky2_set_pauseparam(struct net_device *dev, | |||
| 2806 | return err; | 2782 | return err; |
| 2807 | } | 2783 | } |
| 2808 | 2784 | ||
| 2809 | #ifdef CONFIG_PM | ||
| 2810 | static void sky2_get_wol(struct net_device *dev, struct ethtool_wolinfo *wol) | ||
| 2811 | { | ||
| 2812 | struct sky2_port *sky2 = netdev_priv(dev); | ||
| 2813 | |||
| 2814 | wol->supported = WAKE_MAGIC; | ||
| 2815 | wol->wolopts = sky2->wol ? WAKE_MAGIC : 0; | ||
| 2816 | } | ||
| 2817 | |||
| 2818 | static int sky2_set_wol(struct net_device *dev, struct ethtool_wolinfo *wol) | ||
| 2819 | { | ||
| 2820 | struct sky2_port *sky2 = netdev_priv(dev); | ||
| 2821 | struct sky2_hw *hw = sky2->hw; | ||
| 2822 | |||
| 2823 | if (wol->wolopts != WAKE_MAGIC && wol->wolopts != 0) | ||
| 2824 | return -EOPNOTSUPP; | ||
| 2825 | |||
| 2826 | sky2->wol = wol->wolopts == WAKE_MAGIC; | ||
| 2827 | |||
| 2828 | if (sky2->wol) { | ||
| 2829 | memcpy_toio(hw->regs + WOL_MAC_ADDR, dev->dev_addr, ETH_ALEN); | ||
| 2830 | |||
| 2831 | sky2_write16(hw, WOL_CTRL_STAT, | ||
| 2832 | WOL_CTL_ENA_PME_ON_MAGIC_PKT | | ||
| 2833 | WOL_CTL_ENA_MAGIC_PKT_UNIT); | ||
| 2834 | } else | ||
| 2835 | sky2_write16(hw, WOL_CTRL_STAT, WOL_CTL_DEFAULT); | ||
| 2836 | |||
| 2837 | return 0; | ||
| 2838 | } | ||
| 2839 | #endif | ||
| 2840 | |||
| 2841 | static int sky2_get_coalesce(struct net_device *dev, | 2785 | static int sky2_get_coalesce(struct net_device *dev, |
| 2842 | struct ethtool_coalesce *ecmd) | 2786 | struct ethtool_coalesce *ecmd) |
| 2843 | { | 2787 | { |
| @@ -2878,19 +2822,11 @@ static int sky2_set_coalesce(struct net_device *dev, | |||
| 2878 | { | 2822 | { |
| 2879 | struct sky2_port *sky2 = netdev_priv(dev); | 2823 | struct sky2_port *sky2 = netdev_priv(dev); |
| 2880 | struct sky2_hw *hw = sky2->hw; | 2824 | struct sky2_hw *hw = sky2->hw; |
| 2881 | const u32 tmin = sky2_clk2us(hw, 1); | 2825 | const u32 tmax = sky2_clk2us(hw, 0x0ffffff); |
| 2882 | const u32 tmax = 5000; | ||
| 2883 | |||
| 2884 | if (ecmd->tx_coalesce_usecs != 0 && | ||
| 2885 | (ecmd->tx_coalesce_usecs < tmin || ecmd->tx_coalesce_usecs > tmax)) | ||
| 2886 | return -EINVAL; | ||
| 2887 | |||
| 2888 | if (ecmd->rx_coalesce_usecs != 0 && | ||
| 2889 | (ecmd->rx_coalesce_usecs < tmin || ecmd->rx_coalesce_usecs > tmax)) | ||
| 2890 | return -EINVAL; | ||
| 2891 | 2826 | ||
| 2892 | if (ecmd->rx_coalesce_usecs_irq != 0 && | 2827 | if (ecmd->tx_coalesce_usecs > tmax || |
| 2893 | (ecmd->rx_coalesce_usecs_irq < tmin || ecmd->rx_coalesce_usecs_irq > tmax)) | 2828 | ecmd->rx_coalesce_usecs > tmax || |
| 2829 | ecmd->rx_coalesce_usecs_irq > tmax) | ||
| 2894 | return -EINVAL; | 2830 | return -EINVAL; |
| 2895 | 2831 | ||
| 2896 | if (ecmd->tx_max_coalesced_frames >= TX_RING_SIZE-1) | 2832 | if (ecmd->tx_max_coalesced_frames >= TX_RING_SIZE-1) |
| @@ -3025,10 +2961,6 @@ static struct ethtool_ops sky2_ethtool_ops = { | |||
| 3025 | .set_ringparam = sky2_set_ringparam, | 2961 | .set_ringparam = sky2_set_ringparam, |
| 3026 | .get_pauseparam = sky2_get_pauseparam, | 2962 | .get_pauseparam = sky2_get_pauseparam, |
| 3027 | .set_pauseparam = sky2_set_pauseparam, | 2963 | .set_pauseparam = sky2_set_pauseparam, |
| 3028 | #ifdef CONFIG_PM | ||
| 3029 | .get_wol = sky2_get_wol, | ||
| 3030 | .set_wol = sky2_set_wol, | ||
| 3031 | #endif | ||
| 3032 | .phys_id = sky2_phys_id, | 2964 | .phys_id = sky2_phys_id, |
| 3033 | .get_stats_count = sky2_get_stats_count, | 2965 | .get_stats_count = sky2_get_stats_count, |
| 3034 | .get_ethtool_stats = sky2_get_ethtool_stats, | 2966 | .get_ethtool_stats = sky2_get_ethtool_stats, |
| @@ -3082,16 +3014,15 @@ static __devinit struct net_device *sky2_init_netdev(struct sky2_hw *hw, | |||
| 3082 | sky2->speed = -1; | 3014 | sky2->speed = -1; |
| 3083 | sky2->advertising = sky2_supported_modes(hw); | 3015 | sky2->advertising = sky2_supported_modes(hw); |
| 3084 | 3016 | ||
| 3085 | /* Receive checksum disabled for Yukon XL | 3017 | /* Receive checksum disabled for Yukon XL |
| 3086 | * because of observed problems with incorrect | 3018 | * because of observed problems with incorrect |
| 3087 | * values when multiple packets are received in one interrupt | 3019 | * values when multiple packets are received in one interrupt |
| 3088 | */ | 3020 | */ |
| 3089 | sky2->rx_csum = (hw->chip_id != CHIP_ID_YUKON_XL); | 3021 | sky2->rx_csum = (hw->chip_id != CHIP_ID_YUKON_XL); |
| 3090 | 3022 | ||
| 3091 | INIT_WORK(&sky2->phy_task, sky2_phy_task, sky2); | 3023 | spin_lock_init(&sky2->phy_lock); |
| 3092 | init_MUTEX(&sky2->phy_sema); | ||
| 3093 | sky2->tx_pending = TX_DEF_PENDING; | 3024 | sky2->tx_pending = TX_DEF_PENDING; |
| 3094 | sky2->rx_pending = is_ec_a1(hw) ? 8 : RX_DEF_PENDING; | 3025 | sky2->rx_pending = RX_DEF_PENDING; |
| 3095 | sky2->rx_bufsize = sky2_buf_size(ETH_DATA_LEN); | 3026 | sky2->rx_bufsize = sky2_buf_size(ETH_DATA_LEN); |
| 3096 | 3027 | ||
| 3097 | hw->dev[port] = dev; | 3028 | hw->dev[port] = dev; |
| @@ -3133,6 +3064,66 @@ static void __devinit sky2_show_addr(struct net_device *dev) | |||
| 3133 | dev->dev_addr[3], dev->dev_addr[4], dev->dev_addr[5]); | 3064 | dev->dev_addr[3], dev->dev_addr[4], dev->dev_addr[5]); |
| 3134 | } | 3065 | } |
| 3135 | 3066 | ||
| 3067 | /* Handle software interrupt used during MSI test */ | ||
| 3068 | static irqreturn_t __devinit sky2_test_intr(int irq, void *dev_id, | ||
| 3069 | struct pt_regs *regs) | ||
| 3070 | { | ||
| 3071 | struct sky2_hw *hw = dev_id; | ||
| 3072 | u32 status = sky2_read32(hw, B0_Y2_SP_ISRC2); | ||
| 3073 | |||
| 3074 | if (status == 0) | ||
| 3075 | return IRQ_NONE; | ||
| 3076 | |||
| 3077 | if (status & Y2_IS_IRQ_SW) { | ||
| 3078 | hw->msi_detected = 1; | ||
| 3079 | wake_up(&hw->msi_wait); | ||
| 3080 | sky2_write8(hw, B0_CTST, CS_CL_SW_IRQ); | ||
| 3081 | } | ||
| 3082 | sky2_write32(hw, B0_Y2_SP_ICR, 2); | ||
| 3083 | |||
| 3084 | return IRQ_HANDLED; | ||
| 3085 | } | ||
| 3086 | |||
| 3087 | /* Test interrupt path by forcing a a software IRQ */ | ||
| 3088 | static int __devinit sky2_test_msi(struct sky2_hw *hw) | ||
| 3089 | { | ||
| 3090 | struct pci_dev *pdev = hw->pdev; | ||
| 3091 | int err; | ||
| 3092 | |||
| 3093 | sky2_write32(hw, B0_IMSK, Y2_IS_IRQ_SW); | ||
| 3094 | |||
| 3095 | err = request_irq(pdev->irq, sky2_test_intr, SA_SHIRQ, DRV_NAME, hw); | ||
| 3096 | if (err) { | ||
| 3097 | printk(KERN_ERR PFX "%s: cannot assign irq %d\n", | ||
| 3098 | pci_name(pdev), pdev->irq); | ||
| 3099 | return err; | ||
| 3100 | } | ||
| 3101 | |||
| 3102 | init_waitqueue_head (&hw->msi_wait); | ||
| 3103 | |||
| 3104 | sky2_write8(hw, B0_CTST, CS_ST_SW_IRQ); | ||
| 3105 | wmb(); | ||
| 3106 | |||
| 3107 | wait_event_timeout(hw->msi_wait, hw->msi_detected, HZ/10); | ||
| 3108 | |||
| 3109 | if (!hw->msi_detected) { | ||
| 3110 | /* MSI test failed, go back to INTx mode */ | ||
| 3111 | printk(KERN_WARNING PFX "%s: No interrupt was generated using MSI, " | ||
| 3112 | "switching to INTx mode. Please report this failure to " | ||
| 3113 | "the PCI maintainer and include system chipset information.\n", | ||
| 3114 | pci_name(pdev)); | ||
| 3115 | |||
| 3116 | err = -EOPNOTSUPP; | ||
| 3117 | sky2_write8(hw, B0_CTST, CS_CL_SW_IRQ); | ||
| 3118 | } | ||
| 3119 | |||
| 3120 | sky2_write32(hw, B0_IMSK, 0); | ||
| 3121 | |||
| 3122 | free_irq(pdev->irq, hw); | ||
| 3123 | |||
| 3124 | return err; | ||
| 3125 | } | ||
| 3126 | |||
| 3136 | static int __devinit sky2_probe(struct pci_dev *pdev, | 3127 | static int __devinit sky2_probe(struct pci_dev *pdev, |
| 3137 | const struct pci_device_id *ent) | 3128 | const struct pci_device_id *ent) |
| 3138 | { | 3129 | { |
| @@ -3201,7 +3192,6 @@ static int __devinit sky2_probe(struct pci_dev *pdev, | |||
| 3201 | goto err_out_free_hw; | 3192 | goto err_out_free_hw; |
| 3202 | } | 3193 | } |
| 3203 | hw->pm_cap = pm_cap; | 3194 | hw->pm_cap = pm_cap; |
| 3204 | spin_lock_init(&hw->hw_lock); | ||
| 3205 | 3195 | ||
| 3206 | #ifdef __BIG_ENDIAN | 3196 | #ifdef __BIG_ENDIAN |
| 3207 | /* byte swap descriptors in hardware */ | 3197 | /* byte swap descriptors in hardware */ |
| @@ -3254,21 +3244,29 @@ static int __devinit sky2_probe(struct pci_dev *pdev, | |||
| 3254 | } | 3244 | } |
| 3255 | } | 3245 | } |
| 3256 | 3246 | ||
| 3257 | err = request_irq(pdev->irq, sky2_intr, SA_SHIRQ, DRV_NAME, hw); | 3247 | if (!disable_msi && pci_enable_msi(pdev) == 0) { |
| 3248 | err = sky2_test_msi(hw); | ||
| 3249 | if (err == -EOPNOTSUPP) | ||
| 3250 | pci_disable_msi(pdev); | ||
| 3251 | else if (err) | ||
| 3252 | goto err_out_unregister; | ||
| 3253 | } | ||
| 3254 | |||
| 3255 | err = request_irq(pdev->irq, sky2_intr, SA_SHIRQ, DRV_NAME, hw); | ||
| 3258 | if (err) { | 3256 | if (err) { |
| 3259 | printk(KERN_ERR PFX "%s: cannot assign irq %d\n", | 3257 | printk(KERN_ERR PFX "%s: cannot assign irq %d\n", |
| 3260 | pci_name(pdev), pdev->irq); | 3258 | pci_name(pdev), pdev->irq); |
| 3261 | goto err_out_unregister; | 3259 | goto err_out_unregister; |
| 3262 | } | 3260 | } |
| 3263 | 3261 | ||
| 3264 | hw->intr_mask = Y2_IS_BASE; | 3262 | sky2_write32(hw, B0_IMSK, Y2_IS_BASE); |
| 3265 | sky2_write32(hw, B0_IMSK, hw->intr_mask); | ||
| 3266 | 3263 | ||
| 3267 | pci_set_drvdata(pdev, hw); | 3264 | pci_set_drvdata(pdev, hw); |
| 3268 | 3265 | ||
| 3269 | return 0; | 3266 | return 0; |
| 3270 | 3267 | ||
| 3271 | err_out_unregister: | 3268 | err_out_unregister: |
| 3269 | pci_disable_msi(pdev); | ||
| 3272 | if (dev1) { | 3270 | if (dev1) { |
| 3273 | unregister_netdev(dev1); | 3271 | unregister_netdev(dev1); |
| 3274 | free_netdev(dev1); | 3272 | free_netdev(dev1); |
| @@ -3311,6 +3309,7 @@ static void __devexit sky2_remove(struct pci_dev *pdev) | |||
| 3311 | sky2_read8(hw, B0_CTST); | 3309 | sky2_read8(hw, B0_CTST); |
| 3312 | 3310 | ||
| 3313 | free_irq(pdev->irq, hw); | 3311 | free_irq(pdev->irq, hw); |
| 3312 | pci_disable_msi(pdev); | ||
| 3314 | pci_free_consistent(pdev, STATUS_LE_BYTES, hw->st_le, hw->st_dma); | 3313 | pci_free_consistent(pdev, STATUS_LE_BYTES, hw->st_le, hw->st_dma); |
| 3315 | pci_release_regions(pdev); | 3314 | pci_release_regions(pdev); |
| 3316 | pci_disable_device(pdev); | 3315 | pci_disable_device(pdev); |
diff --git a/drivers/net/sky2.h b/drivers/net/sky2.h index dce955c76f3c..d63cd5a1b71c 100644 --- a/drivers/net/sky2.h +++ b/drivers/net/sky2.h | |||
| @@ -278,13 +278,11 @@ enum { | |||
| 278 | Y2_IS_CHK_TXS1 = 1<<1, /* Descriptor error TXS 1 */ | 278 | Y2_IS_CHK_TXS1 = 1<<1, /* Descriptor error TXS 1 */ |
| 279 | Y2_IS_CHK_TXA1 = 1<<0, /* Descriptor error TXA 1 */ | 279 | Y2_IS_CHK_TXA1 = 1<<0, /* Descriptor error TXA 1 */ |
| 280 | 280 | ||
| 281 | Y2_IS_BASE = Y2_IS_HW_ERR | Y2_IS_STAT_BMU | | 281 | Y2_IS_BASE = Y2_IS_HW_ERR | Y2_IS_STAT_BMU, |
| 282 | Y2_IS_POLL_CHK | Y2_IS_TWSI_RDY | | 282 | Y2_IS_PORT_1 = Y2_IS_IRQ_PHY1 | Y2_IS_IRQ_MAC1 |
| 283 | Y2_IS_IRQ_SW | Y2_IS_TIMINT, | 283 | | Y2_IS_CHK_TXA1 | Y2_IS_CHK_RX1, |
| 284 | Y2_IS_PORT_1 = Y2_IS_IRQ_PHY1 | Y2_IS_IRQ_MAC1 | | 284 | Y2_IS_PORT_2 = Y2_IS_IRQ_PHY2 | Y2_IS_IRQ_MAC2 |
| 285 | Y2_IS_CHK_RX1 | Y2_IS_CHK_TXA1 | Y2_IS_CHK_TXS1, | 285 | | Y2_IS_CHK_TXA2 | Y2_IS_CHK_RX2, |
| 286 | Y2_IS_PORT_2 = Y2_IS_IRQ_PHY2 | Y2_IS_IRQ_MAC2 | | ||
| 287 | Y2_IS_CHK_RX2 | Y2_IS_CHK_TXA2 | Y2_IS_CHK_TXS2, | ||
| 288 | }; | 286 | }; |
| 289 | 287 | ||
| 290 | /* B2_IRQM_HWE_MSK 32 bit IRQ Moderation HW Error Mask */ | 288 | /* B2_IRQM_HWE_MSK 32 bit IRQ Moderation HW Error Mask */ |
| @@ -1832,6 +1830,7 @@ struct sky2_port { | |||
| 1832 | struct net_device *netdev; | 1830 | struct net_device *netdev; |
| 1833 | unsigned port; | 1831 | unsigned port; |
| 1834 | u32 msg_enable; | 1832 | u32 msg_enable; |
| 1833 | spinlock_t phy_lock; | ||
| 1835 | 1834 | ||
| 1836 | spinlock_t tx_lock ____cacheline_aligned_in_smp; | 1835 | spinlock_t tx_lock ____cacheline_aligned_in_smp; |
| 1837 | struct tx_ring_info *tx_ring; | 1836 | struct tx_ring_info *tx_ring; |
| @@ -1840,7 +1839,6 @@ struct sky2_port { | |||
| 1840 | u16 tx_prod; /* next le to use */ | 1839 | u16 tx_prod; /* next le to use */ |
| 1841 | u32 tx_addr64; | 1840 | u32 tx_addr64; |
| 1842 | u16 tx_pending; | 1841 | u16 tx_pending; |
| 1843 | u16 tx_last_put; | ||
| 1844 | u16 tx_last_mss; | 1842 | u16 tx_last_mss; |
| 1845 | 1843 | ||
| 1846 | struct ring_info *rx_ring ____cacheline_aligned_in_smp; | 1844 | struct ring_info *rx_ring ____cacheline_aligned_in_smp; |
| @@ -1849,7 +1847,6 @@ struct sky2_port { | |||
| 1849 | u16 rx_next; /* next re to check */ | 1847 | u16 rx_next; /* next re to check */ |
| 1850 | u16 rx_put; /* next le index to use */ | 1848 | u16 rx_put; /* next le index to use */ |
| 1851 | u16 rx_pending; | 1849 | u16 rx_pending; |
| 1852 | u16 rx_last_put; | ||
| 1853 | u16 rx_bufsize; | 1850 | u16 rx_bufsize; |
| 1854 | #ifdef SKY2_VLAN_TAG_USED | 1851 | #ifdef SKY2_VLAN_TAG_USED |
| 1855 | u16 rx_tag; | 1852 | u16 rx_tag; |
| @@ -1865,20 +1862,15 @@ struct sky2_port { | |||
| 1865 | u8 rx_pause; | 1862 | u8 rx_pause; |
| 1866 | u8 tx_pause; | 1863 | u8 tx_pause; |
| 1867 | u8 rx_csum; | 1864 | u8 rx_csum; |
| 1868 | u8 wol; | ||
| 1869 | 1865 | ||
| 1870 | struct net_device_stats net_stats; | 1866 | struct net_device_stats net_stats; |
| 1871 | 1867 | ||
| 1872 | struct work_struct phy_task; | ||
| 1873 | struct semaphore phy_sema; | ||
| 1874 | }; | 1868 | }; |
| 1875 | 1869 | ||
| 1876 | struct sky2_hw { | 1870 | struct sky2_hw { |
| 1877 | void __iomem *regs; | 1871 | void __iomem *regs; |
| 1878 | struct pci_dev *pdev; | 1872 | struct pci_dev *pdev; |
| 1879 | struct net_device *dev[2]; | 1873 | struct net_device *dev[2]; |
| 1880 | spinlock_t hw_lock; | ||
| 1881 | u32 intr_mask; | ||
| 1882 | 1874 | ||
| 1883 | int pm_cap; | 1875 | int pm_cap; |
| 1884 | u8 chip_id; | 1876 | u8 chip_id; |
| @@ -1889,6 +1881,8 @@ struct sky2_hw { | |||
| 1889 | struct sky2_status_le *st_le; | 1881 | struct sky2_status_le *st_le; |
| 1890 | u32 st_idx; | 1882 | u32 st_idx; |
| 1891 | dma_addr_t st_dma; | 1883 | dma_addr_t st_dma; |
| 1884 | int msi_detected; | ||
| 1885 | wait_queue_head_t msi_wait; | ||
| 1892 | }; | 1886 | }; |
| 1893 | 1887 | ||
| 1894 | /* Register accessor for memory mapped device */ | 1888 | /* Register accessor for memory mapped device */ |
diff --git a/drivers/net/smc91x.c b/drivers/net/smc91x.c index 75e9b3b910cc..0e9833adf9fe 100644 --- a/drivers/net/smc91x.c +++ b/drivers/net/smc91x.c | |||
| @@ -215,15 +215,12 @@ struct smc_local { | |||
| 215 | 215 | ||
| 216 | spinlock_t lock; | 216 | spinlock_t lock; |
| 217 | 217 | ||
| 218 | #ifdef SMC_CAN_USE_DATACS | ||
| 219 | u32 __iomem *datacs; | ||
| 220 | #endif | ||
| 221 | |||
| 222 | #ifdef SMC_USE_PXA_DMA | 218 | #ifdef SMC_USE_PXA_DMA |
| 223 | /* DMA needs the physical address of the chip */ | 219 | /* DMA needs the physical address of the chip */ |
| 224 | u_long physaddr; | 220 | u_long physaddr; |
| 225 | #endif | 221 | #endif |
| 226 | void __iomem *base; | 222 | void __iomem *base; |
| 223 | void __iomem *datacs; | ||
| 227 | }; | 224 | }; |
| 228 | 225 | ||
| 229 | #if SMC_DEBUG > 0 | 226 | #if SMC_DEBUG > 0 |
| @@ -2104,9 +2101,8 @@ static int smc_enable_device(struct platform_device *pdev) | |||
| 2104 | * Set the appropriate byte/word mode. | 2101 | * Set the appropriate byte/word mode. |
| 2105 | */ | 2102 | */ |
| 2106 | ecsr = readb(addr + (ECSR << SMC_IO_SHIFT)) & ~ECSR_IOIS8; | 2103 | ecsr = readb(addr + (ECSR << SMC_IO_SHIFT)) & ~ECSR_IOIS8; |
| 2107 | #ifndef SMC_CAN_USE_16BIT | 2104 | if (!SMC_CAN_USE_16BIT) |
| 2108 | ecsr |= ECSR_IOIS8; | 2105 | ecsr |= ECSR_IOIS8; |
| 2109 | #endif | ||
| 2110 | writeb(ecsr, addr + (ECSR << SMC_IO_SHIFT)); | 2106 | writeb(ecsr, addr + (ECSR << SMC_IO_SHIFT)); |
| 2111 | local_irq_restore(flags); | 2107 | local_irq_restore(flags); |
| 2112 | 2108 | ||
| @@ -2143,40 +2139,39 @@ static void smc_release_attrib(struct platform_device *pdev) | |||
| 2143 | release_mem_region(res->start, ATTRIB_SIZE); | 2139 | release_mem_region(res->start, ATTRIB_SIZE); |
| 2144 | } | 2140 | } |
| 2145 | 2141 | ||
| 2146 | #ifdef SMC_CAN_USE_DATACS | 2142 | static inline void smc_request_datacs(struct platform_device *pdev, struct net_device *ndev) |
| 2147 | static void smc_request_datacs(struct platform_device *pdev, struct net_device *ndev) | ||
| 2148 | { | 2143 | { |
| 2149 | struct resource * res = platform_get_resource_byname(pdev, IORESOURCE_MEM, "smc91x-data32"); | 2144 | if (SMC_CAN_USE_DATACS) { |
| 2150 | struct smc_local *lp = netdev_priv(ndev); | 2145 | struct resource * res = platform_get_resource_byname(pdev, IORESOURCE_MEM, "smc91x-data32"); |
| 2146 | struct smc_local *lp = netdev_priv(ndev); | ||
| 2151 | 2147 | ||
| 2152 | if (!res) | 2148 | if (!res) |
| 2153 | return; | 2149 | return; |
| 2154 | 2150 | ||
| 2155 | if(!request_mem_region(res->start, SMC_DATA_EXTENT, CARDNAME)) { | 2151 | if(!request_mem_region(res->start, SMC_DATA_EXTENT, CARDNAME)) { |
| 2156 | printk(KERN_INFO "%s: failed to request datacs memory region.\n", CARDNAME); | 2152 | printk(KERN_INFO "%s: failed to request datacs memory region.\n", CARDNAME); |
| 2157 | return; | 2153 | return; |
| 2158 | } | 2154 | } |
| 2159 | 2155 | ||
| 2160 | lp->datacs = ioremap(res->start, SMC_DATA_EXTENT); | 2156 | lp->datacs = ioremap(res->start, SMC_DATA_EXTENT); |
| 2157 | } | ||
| 2161 | } | 2158 | } |
| 2162 | 2159 | ||
| 2163 | static void smc_release_datacs(struct platform_device *pdev, struct net_device *ndev) | 2160 | static void smc_release_datacs(struct platform_device *pdev, struct net_device *ndev) |
| 2164 | { | 2161 | { |
| 2165 | struct smc_local *lp = netdev_priv(ndev); | 2162 | if (SMC_CAN_USE_DATACS) { |
| 2166 | struct resource * res = platform_get_resource_byname(pdev, IORESOURCE_MEM, "smc91x-data32"); | 2163 | struct smc_local *lp = netdev_priv(ndev); |
| 2164 | struct resource * res = platform_get_resource_byname(pdev, IORESOURCE_MEM, "smc91x-data32"); | ||
| 2167 | 2165 | ||
| 2168 | if (lp->datacs) | 2166 | if (lp->datacs) |
| 2169 | iounmap(lp->datacs); | 2167 | iounmap(lp->datacs); |
| 2170 | 2168 | ||
| 2171 | lp->datacs = NULL; | 2169 | lp->datacs = NULL; |
| 2172 | 2170 | ||
| 2173 | if (res) | 2171 | if (res) |
| 2174 | release_mem_region(res->start, SMC_DATA_EXTENT); | 2172 | release_mem_region(res->start, SMC_DATA_EXTENT); |
| 2173 | } | ||
| 2175 | } | 2174 | } |
| 2176 | #else | ||
| 2177 | static void smc_request_datacs(struct platform_device *pdev, struct net_device *ndev) {} | ||
| 2178 | static void smc_release_datacs(struct platform_device *pdev, struct net_device *ndev) {} | ||
| 2179 | #endif | ||
| 2180 | 2175 | ||
| 2181 | /* | 2176 | /* |
| 2182 | * smc_init(void) | 2177 | * smc_init(void) |
diff --git a/drivers/net/smc91x.h b/drivers/net/smc91x.h index e0efd1964e72..e1be1af51201 100644 --- a/drivers/net/smc91x.h +++ b/drivers/net/smc91x.h | |||
| @@ -275,7 +275,10 @@ SMC_outw(u16 val, void __iomem *ioaddr, int reg) | |||
| 275 | #define SMC_insw(a,r,p,l) readsw ((void*) ((a) + (r)), p, l) | 275 | #define SMC_insw(a,r,p,l) readsw ((void*) ((a) + (r)), p, l) |
| 276 | #define SMC_outw(v,a,r) ({ writew ((v), (a) + (r)); LPD7A40X_IOBARRIER; }) | 276 | #define SMC_outw(v,a,r) ({ writew ((v), (a) + (r)); LPD7A40X_IOBARRIER; }) |
| 277 | 277 | ||
| 278 | static inline void SMC_outsw (unsigned long a, int r, unsigned char* p, int l) | 278 | #define SMC_outsw LPD7A40X_SMC_outsw |
| 279 | |||
| 280 | static inline void LPD7A40X_SMC_outsw(unsigned long a, int r, | ||
| 281 | unsigned char* p, int l) | ||
| 279 | { | 282 | { |
| 280 | unsigned short* ps = (unsigned short*) p; | 283 | unsigned short* ps = (unsigned short*) p; |
| 281 | while (l-- > 0) { | 284 | while (l-- > 0) { |
| @@ -342,10 +345,6 @@ static inline void SMC_outsw (unsigned long a, int r, unsigned char* p, int l) | |||
| 342 | 345 | ||
| 343 | #endif | 346 | #endif |
| 344 | 347 | ||
| 345 | #ifndef SMC_IRQ_FLAGS | ||
| 346 | #define SMC_IRQ_FLAGS SA_TRIGGER_RISING | ||
| 347 | #endif | ||
| 348 | |||
| 349 | #ifdef SMC_USE_PXA_DMA | 348 | #ifdef SMC_USE_PXA_DMA |
| 350 | /* | 349 | /* |
| 351 | * Let's use the DMA engine on the XScale PXA2xx for RX packets. This is | 350 | * Let's use the DMA engine on the XScale PXA2xx for RX packets. This is |
| @@ -441,10 +440,85 @@ smc_pxa_dma_irq(int dma, void *dummy, struct pt_regs *regs) | |||
| 441 | #endif /* SMC_USE_PXA_DMA */ | 440 | #endif /* SMC_USE_PXA_DMA */ |
| 442 | 441 | ||
| 443 | 442 | ||
| 444 | /* Because of bank switching, the LAN91x uses only 16 I/O ports */ | 443 | /* |
| 444 | * Everything a particular hardware setup needs should have been defined | ||
| 445 | * at this point. Add stubs for the undefined cases, mainly to avoid | ||
| 446 | * compilation warnings since they'll be optimized away, or to prevent buggy | ||
| 447 | * use of them. | ||
| 448 | */ | ||
| 449 | |||
| 450 | #if ! SMC_CAN_USE_32BIT | ||
| 451 | #define SMC_inl(ioaddr, reg) ({ BUG(); 0; }) | ||
| 452 | #define SMC_outl(x, ioaddr, reg) BUG() | ||
| 453 | #define SMC_insl(a, r, p, l) BUG() | ||
| 454 | #define SMC_outsl(a, r, p, l) BUG() | ||
| 455 | #endif | ||
| 456 | |||
| 457 | #if !defined(SMC_insl) || !defined(SMC_outsl) | ||
| 458 | #define SMC_insl(a, r, p, l) BUG() | ||
| 459 | #define SMC_outsl(a, r, p, l) BUG() | ||
| 460 | #endif | ||
| 461 | |||
| 462 | #if ! SMC_CAN_USE_16BIT | ||
| 463 | |||
| 464 | /* | ||
| 465 | * Any 16-bit access is performed with two 8-bit accesses if the hardware | ||
| 466 | * can't do it directly. Most registers are 16-bit so those are mandatory. | ||
| 467 | */ | ||
| 468 | #define SMC_outw(x, ioaddr, reg) \ | ||
| 469 | do { \ | ||
| 470 | unsigned int __val16 = (x); \ | ||
| 471 | SMC_outb( __val16, ioaddr, reg ); \ | ||
| 472 | SMC_outb( __val16 >> 8, ioaddr, reg + (1 << SMC_IO_SHIFT));\ | ||
| 473 | } while (0) | ||
| 474 | #define SMC_inw(ioaddr, reg) \ | ||
| 475 | ({ \ | ||
| 476 | unsigned int __val16; \ | ||
| 477 | __val16 = SMC_inb( ioaddr, reg ); \ | ||
| 478 | __val16 |= SMC_inb( ioaddr, reg + (1 << SMC_IO_SHIFT)) << 8; \ | ||
| 479 | __val16; \ | ||
| 480 | }) | ||
| 481 | |||
| 482 | #define SMC_insw(a, r, p, l) BUG() | ||
| 483 | #define SMC_outsw(a, r, p, l) BUG() | ||
| 484 | |||
| 485 | #endif | ||
| 486 | |||
| 487 | #if !defined(SMC_insw) || !defined(SMC_outsw) | ||
| 488 | #define SMC_insw(a, r, p, l) BUG() | ||
| 489 | #define SMC_outsw(a, r, p, l) BUG() | ||
| 490 | #endif | ||
| 491 | |||
| 492 | #if ! SMC_CAN_USE_8BIT | ||
| 493 | #define SMC_inb(ioaddr, reg) ({ BUG(); 0; }) | ||
| 494 | #define SMC_outb(x, ioaddr, reg) BUG() | ||
| 495 | #define SMC_insb(a, r, p, l) BUG() | ||
| 496 | #define SMC_outsb(a, r, p, l) BUG() | ||
| 497 | #endif | ||
| 498 | |||
| 499 | #if !defined(SMC_insb) || !defined(SMC_outsb) | ||
| 500 | #define SMC_insb(a, r, p, l) BUG() | ||
| 501 | #define SMC_outsb(a, r, p, l) BUG() | ||
| 502 | #endif | ||
| 503 | |||
| 504 | #ifndef SMC_CAN_USE_DATACS | ||
| 505 | #define SMC_CAN_USE_DATACS 0 | ||
| 506 | #endif | ||
| 507 | |||
| 445 | #ifndef SMC_IO_SHIFT | 508 | #ifndef SMC_IO_SHIFT |
| 446 | #define SMC_IO_SHIFT 0 | 509 | #define SMC_IO_SHIFT 0 |
| 447 | #endif | 510 | #endif |
| 511 | |||
| 512 | #ifndef SMC_IRQ_FLAGS | ||
| 513 | #define SMC_IRQ_FLAGS SA_TRIGGER_RISING | ||
| 514 | #endif | ||
| 515 | |||
| 516 | #ifndef SMC_INTERRUPT_PREAMBLE | ||
| 517 | #define SMC_INTERRUPT_PREAMBLE | ||
| 518 | #endif | ||
| 519 | |||
| 520 | |||
| 521 | /* Because of bank switching, the LAN91x uses only 16 I/O ports */ | ||
| 448 | #define SMC_IO_EXTENT (16 << SMC_IO_SHIFT) | 522 | #define SMC_IO_EXTENT (16 << SMC_IO_SHIFT) |
| 449 | #define SMC_DATA_EXTENT (4) | 523 | #define SMC_DATA_EXTENT (4) |
| 450 | 524 | ||
| @@ -817,6 +891,11 @@ static const char * chip_ids[ 16 ] = { | |||
| 817 | * Note: the following macros do *not* select the bank -- this must | 891 | * Note: the following macros do *not* select the bank -- this must |
| 818 | * be done separately as needed in the main code. The SMC_REG() macro | 892 | * be done separately as needed in the main code. The SMC_REG() macro |
| 819 | * only uses the bank argument for debugging purposes (when enabled). | 893 | * only uses the bank argument for debugging purposes (when enabled). |
| 894 | * | ||
| 895 | * Note: despite inline functions being safer, everything leading to this | ||
| 896 | * should preferably be macros to let BUG() display the line number in | ||
| 897 | * the core source code since we're interested in the top call site | ||
| 898 | * not in any inline function location. | ||
| 820 | */ | 899 | */ |
| 821 | 900 | ||
| 822 | #if SMC_DEBUG > 0 | 901 | #if SMC_DEBUG > 0 |
| @@ -834,62 +913,142 @@ static const char * chip_ids[ 16 ] = { | |||
| 834 | #define SMC_REG(reg, bank) (reg<<SMC_IO_SHIFT) | 913 | #define SMC_REG(reg, bank) (reg<<SMC_IO_SHIFT) |
| 835 | #endif | 914 | #endif |
| 836 | 915 | ||
| 837 | #if SMC_CAN_USE_8BIT | 916 | /* |
| 838 | #define SMC_GET_PN() SMC_inb( ioaddr, PN_REG ) | 917 | * Hack Alert: Some setups just can't write 8 or 16 bits reliably when not |
| 839 | #define SMC_SET_PN(x) SMC_outb( x, ioaddr, PN_REG ) | 918 | * aligned to a 32 bit boundary. I tell you that does exist! |
| 840 | #define SMC_GET_AR() SMC_inb( ioaddr, AR_REG ) | 919 | * Fortunately the affected register accesses can be easily worked around |
| 841 | #define SMC_GET_TXFIFO() SMC_inb( ioaddr, TXFIFO_REG ) | 920 | * since we can write zeroes to the preceeding 16 bits without adverse |
| 842 | #define SMC_GET_RXFIFO() SMC_inb( ioaddr, RXFIFO_REG ) | 921 | * effects and use a 32-bit access. |
| 843 | #define SMC_GET_INT() SMC_inb( ioaddr, INT_REG ) | 922 | * |
| 844 | #define SMC_ACK_INT(x) SMC_outb( x, ioaddr, INT_REG ) | 923 | * Enforce it on any 32-bit capable setup for now. |
| 845 | #define SMC_GET_INT_MASK() SMC_inb( ioaddr, IM_REG ) | 924 | */ |
| 846 | #define SMC_SET_INT_MASK(x) SMC_outb( x, ioaddr, IM_REG ) | 925 | #define SMC_MUST_ALIGN_WRITE SMC_CAN_USE_32BIT |
| 847 | #else | 926 | |
| 848 | #define SMC_GET_PN() (SMC_inw( ioaddr, PN_REG ) & 0xFF) | 927 | #define SMC_GET_PN() \ |
| 849 | #define SMC_SET_PN(x) SMC_outw( x, ioaddr, PN_REG ) | 928 | ( SMC_CAN_USE_8BIT ? (SMC_inb(ioaddr, PN_REG)) \ |
| 850 | #define SMC_GET_AR() (SMC_inw( ioaddr, PN_REG ) >> 8) | 929 | : (SMC_inw(ioaddr, PN_REG) & 0xFF) ) |
| 851 | #define SMC_GET_TXFIFO() (SMC_inw( ioaddr, TXFIFO_REG ) & 0xFF) | 930 | |
| 852 | #define SMC_GET_RXFIFO() (SMC_inw( ioaddr, TXFIFO_REG ) >> 8) | 931 | #define SMC_SET_PN(x) \ |
| 853 | #define SMC_GET_INT() (SMC_inw( ioaddr, INT_REG ) & 0xFF) | 932 | do { \ |
| 933 | if (SMC_MUST_ALIGN_WRITE) \ | ||
| 934 | SMC_outl((x)<<16, ioaddr, SMC_REG(0, 2)); \ | ||
| 935 | else if (SMC_CAN_USE_8BIT) \ | ||
| 936 | SMC_outb(x, ioaddr, PN_REG); \ | ||
| 937 | else \ | ||
| 938 | SMC_outw(x, ioaddr, PN_REG); \ | ||
| 939 | } while (0) | ||
| 940 | |||
| 941 | #define SMC_GET_AR() \ | ||
| 942 | ( SMC_CAN_USE_8BIT ? (SMC_inb(ioaddr, AR_REG)) \ | ||
| 943 | : (SMC_inw(ioaddr, PN_REG) >> 8) ) | ||
| 944 | |||
| 945 | #define SMC_GET_TXFIFO() \ | ||
| 946 | ( SMC_CAN_USE_8BIT ? (SMC_inb(ioaddr, TXFIFO_REG)) \ | ||
| 947 | : (SMC_inw(ioaddr, TXFIFO_REG) & 0xFF) ) | ||
| 948 | |||
| 949 | #define SMC_GET_RXFIFO() \ | ||
| 950 | ( SMC_CAN_USE_8BIT ? (SMC_inb(ioaddr, RXFIFO_REG)) \ | ||
| 951 | : (SMC_inw(ioaddr, TXFIFO_REG) >> 8) ) | ||
| 952 | |||
| 953 | #define SMC_GET_INT() \ | ||
| 954 | ( SMC_CAN_USE_8BIT ? (SMC_inb(ioaddr, INT_REG)) \ | ||
| 955 | : (SMC_inw(ioaddr, INT_REG) & 0xFF) ) | ||
| 956 | |||
| 854 | #define SMC_ACK_INT(x) \ | 957 | #define SMC_ACK_INT(x) \ |
| 855 | do { \ | 958 | do { \ |
| 856 | unsigned long __flags; \ | 959 | if (SMC_CAN_USE_8BIT) \ |
| 857 | int __mask; \ | 960 | SMC_outb(x, ioaddr, INT_REG); \ |
| 858 | local_irq_save(__flags); \ | 961 | else { \ |
| 859 | __mask = SMC_inw( ioaddr, INT_REG ) & ~0xff; \ | 962 | unsigned long __flags; \ |
| 860 | SMC_outw( __mask | (x), ioaddr, INT_REG ); \ | 963 | int __mask; \ |
| 861 | local_irq_restore(__flags); \ | 964 | local_irq_save(__flags); \ |
| 965 | __mask = SMC_inw( ioaddr, INT_REG ) & ~0xff; \ | ||
| 966 | SMC_outw( __mask | (x), ioaddr, INT_REG ); \ | ||
| 967 | local_irq_restore(__flags); \ | ||
| 968 | } \ | ||
| 969 | } while (0) | ||
| 970 | |||
| 971 | #define SMC_GET_INT_MASK() \ | ||
| 972 | ( SMC_CAN_USE_8BIT ? (SMC_inb(ioaddr, IM_REG)) \ | ||
| 973 | : (SMC_inw( ioaddr, INT_REG ) >> 8) ) | ||
| 974 | |||
| 975 | #define SMC_SET_INT_MASK(x) \ | ||
| 976 | do { \ | ||
| 977 | if (SMC_CAN_USE_8BIT) \ | ||
| 978 | SMC_outb(x, ioaddr, IM_REG); \ | ||
| 979 | else \ | ||
| 980 | SMC_outw((x) << 8, ioaddr, INT_REG); \ | ||
| 981 | } while (0) | ||
| 982 | |||
| 983 | #define SMC_CURRENT_BANK() SMC_inw(ioaddr, BANK_SELECT) | ||
| 984 | |||
| 985 | #define SMC_SELECT_BANK(x) \ | ||
| 986 | do { \ | ||
| 987 | if (SMC_MUST_ALIGN_WRITE) \ | ||
| 988 | SMC_outl((x)<<16, ioaddr, 12<<SMC_IO_SHIFT); \ | ||
| 989 | else \ | ||
| 990 | SMC_outw(x, ioaddr, BANK_SELECT); \ | ||
| 991 | } while (0) | ||
| 992 | |||
| 993 | #define SMC_GET_BASE() SMC_inw(ioaddr, BASE_REG) | ||
| 994 | |||
| 995 | #define SMC_SET_BASE(x) SMC_outw(x, ioaddr, BASE_REG) | ||
| 996 | |||
| 997 | #define SMC_GET_CONFIG() SMC_inw(ioaddr, CONFIG_REG) | ||
| 998 | |||
| 999 | #define SMC_SET_CONFIG(x) SMC_outw(x, ioaddr, CONFIG_REG) | ||
| 1000 | |||
| 1001 | #define SMC_GET_COUNTER() SMC_inw(ioaddr, COUNTER_REG) | ||
| 1002 | |||
| 1003 | #define SMC_GET_CTL() SMC_inw(ioaddr, CTL_REG) | ||
| 1004 | |||
| 1005 | #define SMC_SET_CTL(x) SMC_outw(x, ioaddr, CTL_REG) | ||
| 1006 | |||
| 1007 | #define SMC_GET_MII() SMC_inw(ioaddr, MII_REG) | ||
| 1008 | |||
| 1009 | #define SMC_SET_MII(x) SMC_outw(x, ioaddr, MII_REG) | ||
| 1010 | |||
| 1011 | #define SMC_GET_MIR() SMC_inw(ioaddr, MIR_REG) | ||
| 1012 | |||
| 1013 | #define SMC_SET_MIR(x) SMC_outw(x, ioaddr, MIR_REG) | ||
| 1014 | |||
| 1015 | #define SMC_GET_MMU_CMD() SMC_inw(ioaddr, MMU_CMD_REG) | ||
| 1016 | |||
| 1017 | #define SMC_SET_MMU_CMD(x) SMC_outw(x, ioaddr, MMU_CMD_REG) | ||
| 1018 | |||
| 1019 | #define SMC_GET_FIFO() SMC_inw(ioaddr, FIFO_REG) | ||
| 1020 | |||
| 1021 | #define SMC_GET_PTR() SMC_inw(ioaddr, PTR_REG) | ||
| 1022 | |||
| 1023 | #define SMC_SET_PTR(x) \ | ||
| 1024 | do { \ | ||
| 1025 | if (SMC_MUST_ALIGN_WRITE) \ | ||
| 1026 | SMC_outl((x)<<16, ioaddr, SMC_REG(4, 2)); \ | ||
| 1027 | else \ | ||
| 1028 | SMC_outw(x, ioaddr, PTR_REG); \ | ||
| 862 | } while (0) | 1029 | } while (0) |
| 863 | #define SMC_GET_INT_MASK() (SMC_inw( ioaddr, INT_REG ) >> 8) | ||
| 864 | #define SMC_SET_INT_MASK(x) SMC_outw( (x) << 8, ioaddr, INT_REG ) | ||
| 865 | #endif | ||
| 866 | 1030 | ||
| 867 | #define SMC_CURRENT_BANK() SMC_inw( ioaddr, BANK_SELECT ) | 1031 | #define SMC_GET_EPH_STATUS() SMC_inw(ioaddr, EPH_STATUS_REG) |
| 868 | #define SMC_SELECT_BANK(x) SMC_outw( x, ioaddr, BANK_SELECT ) | 1032 | |
| 869 | #define SMC_GET_BASE() SMC_inw( ioaddr, BASE_REG ) | 1033 | #define SMC_GET_RCR() SMC_inw(ioaddr, RCR_REG) |
| 870 | #define SMC_SET_BASE(x) SMC_outw( x, ioaddr, BASE_REG ) | 1034 | |
| 871 | #define SMC_GET_CONFIG() SMC_inw( ioaddr, CONFIG_REG ) | 1035 | #define SMC_SET_RCR(x) SMC_outw(x, ioaddr, RCR_REG) |
| 872 | #define SMC_SET_CONFIG(x) SMC_outw( x, ioaddr, CONFIG_REG ) | 1036 | |
| 873 | #define SMC_GET_COUNTER() SMC_inw( ioaddr, COUNTER_REG ) | 1037 | #define SMC_GET_REV() SMC_inw(ioaddr, REV_REG) |
| 874 | #define SMC_GET_CTL() SMC_inw( ioaddr, CTL_REG ) | 1038 | |
| 875 | #define SMC_SET_CTL(x) SMC_outw( x, ioaddr, CTL_REG ) | 1039 | #define SMC_GET_RPC() SMC_inw(ioaddr, RPC_REG) |
| 876 | #define SMC_GET_MII() SMC_inw( ioaddr, MII_REG ) | 1040 | |
| 877 | #define SMC_SET_MII(x) SMC_outw( x, ioaddr, MII_REG ) | 1041 | #define SMC_SET_RPC(x) \ |
| 878 | #define SMC_GET_MIR() SMC_inw( ioaddr, MIR_REG ) | 1042 | do { \ |
| 879 | #define SMC_SET_MIR(x) SMC_outw( x, ioaddr, MIR_REG ) | 1043 | if (SMC_MUST_ALIGN_WRITE) \ |
| 880 | #define SMC_GET_MMU_CMD() SMC_inw( ioaddr, MMU_CMD_REG ) | 1044 | SMC_outl((x)<<16, ioaddr, SMC_REG(8, 0)); \ |
| 881 | #define SMC_SET_MMU_CMD(x) SMC_outw( x, ioaddr, MMU_CMD_REG ) | 1045 | else \ |
| 882 | #define SMC_GET_FIFO() SMC_inw( ioaddr, FIFO_REG ) | 1046 | SMC_outw(x, ioaddr, RPC_REG); \ |
| 883 | #define SMC_GET_PTR() SMC_inw( ioaddr, PTR_REG ) | 1047 | } while (0) |
| 884 | #define SMC_SET_PTR(x) SMC_outw( x, ioaddr, PTR_REG ) | 1048 | |
| 885 | #define SMC_GET_EPH_STATUS() SMC_inw( ioaddr, EPH_STATUS_REG ) | 1049 | #define SMC_GET_TCR() SMC_inw(ioaddr, TCR_REG) |
| 886 | #define SMC_GET_RCR() SMC_inw( ioaddr, RCR_REG ) | 1050 | |
| 887 | #define SMC_SET_RCR(x) SMC_outw( x, ioaddr, RCR_REG ) | 1051 | #define SMC_SET_TCR(x) SMC_outw(x, ioaddr, TCR_REG) |
| 888 | #define SMC_GET_REV() SMC_inw( ioaddr, REV_REG ) | ||
| 889 | #define SMC_GET_RPC() SMC_inw( ioaddr, RPC_REG ) | ||
| 890 | #define SMC_SET_RPC(x) SMC_outw( x, ioaddr, RPC_REG ) | ||
| 891 | #define SMC_GET_TCR() SMC_inw( ioaddr, TCR_REG ) | ||
| 892 | #define SMC_SET_TCR(x) SMC_outw( x, ioaddr, TCR_REG ) | ||
| 893 | 1052 | ||
| 894 | #ifndef SMC_GET_MAC_ADDR | 1053 | #ifndef SMC_GET_MAC_ADDR |
| 895 | #define SMC_GET_MAC_ADDR(addr) \ | 1054 | #define SMC_GET_MAC_ADDR(addr) \ |
| @@ -920,151 +1079,84 @@ static const char * chip_ids[ 16 ] = { | |||
| 920 | SMC_outw( mt[6] | (mt[7] << 8), ioaddr, MCAST_REG4 ); \ | 1079 | SMC_outw( mt[6] | (mt[7] << 8), ioaddr, MCAST_REG4 ); \ |
| 921 | } while (0) | 1080 | } while (0) |
| 922 | 1081 | ||
| 923 | #if SMC_CAN_USE_32BIT | ||
| 924 | /* | ||
| 925 | * Some setups just can't write 8 or 16 bits reliably when not aligned | ||
| 926 | * to a 32 bit boundary. I tell you that exists! | ||
| 927 | * We re-do the ones here that can be easily worked around if they can have | ||
| 928 | * their low parts written to 0 without adverse effects. | ||
| 929 | */ | ||
| 930 | #undef SMC_SELECT_BANK | ||
| 931 | #define SMC_SELECT_BANK(x) SMC_outl( (x)<<16, ioaddr, 12<<SMC_IO_SHIFT ) | ||
| 932 | #undef SMC_SET_RPC | ||
| 933 | #define SMC_SET_RPC(x) SMC_outl( (x)<<16, ioaddr, SMC_REG(8, 0) ) | ||
| 934 | #undef SMC_SET_PN | ||
| 935 | #define SMC_SET_PN(x) SMC_outl( (x)<<16, ioaddr, SMC_REG(0, 2) ) | ||
| 936 | #undef SMC_SET_PTR | ||
| 937 | #define SMC_SET_PTR(x) SMC_outl( (x)<<16, ioaddr, SMC_REG(4, 2) ) | ||
| 938 | #endif | ||
| 939 | |||
| 940 | #if SMC_CAN_USE_32BIT | ||
| 941 | #define SMC_PUT_PKT_HDR(status, length) \ | ||
| 942 | SMC_outl( (status) | (length) << 16, ioaddr, DATA_REG ) | ||
| 943 | #define SMC_GET_PKT_HDR(status, length) \ | ||
| 944 | do { \ | ||
| 945 | unsigned int __val = SMC_inl( ioaddr, DATA_REG ); \ | ||
| 946 | (status) = __val & 0xffff; \ | ||
| 947 | (length) = __val >> 16; \ | ||
| 948 | } while (0) | ||
| 949 | #else | ||
| 950 | #define SMC_PUT_PKT_HDR(status, length) \ | 1082 | #define SMC_PUT_PKT_HDR(status, length) \ |
| 951 | do { \ | 1083 | do { \ |
| 952 | SMC_outw( status, ioaddr, DATA_REG ); \ | 1084 | if (SMC_CAN_USE_32BIT) \ |
| 953 | SMC_outw( length, ioaddr, DATA_REG ); \ | 1085 | SMC_outl((status) | (length)<<16, ioaddr, DATA_REG); \ |
| 954 | } while (0) | 1086 | else { \ |
| 955 | #define SMC_GET_PKT_HDR(status, length) \ | 1087 | SMC_outw(status, ioaddr, DATA_REG); \ |
| 956 | do { \ | 1088 | SMC_outw(length, ioaddr, DATA_REG); \ |
| 957 | (status) = SMC_inw( ioaddr, DATA_REG ); \ | 1089 | } \ |
| 958 | (length) = SMC_inw( ioaddr, DATA_REG ); \ | ||
| 959 | } while (0) | 1090 | } while (0) |
| 960 | #endif | ||
| 961 | 1091 | ||
| 962 | #if SMC_CAN_USE_32BIT | 1092 | #define SMC_GET_PKT_HDR(status, length) \ |
| 963 | #define _SMC_PUSH_DATA(p, l) \ | ||
| 964 | do { \ | 1093 | do { \ |
| 965 | char *__ptr = (p); \ | 1094 | if (SMC_CAN_USE_32BIT) { \ |
| 966 | int __len = (l); \ | 1095 | unsigned int __val = SMC_inl(ioaddr, DATA_REG); \ |
| 967 | if (__len >= 2 && (unsigned long)__ptr & 2) { \ | 1096 | (status) = __val & 0xffff; \ |
| 968 | __len -= 2; \ | 1097 | (length) = __val >> 16; \ |
| 969 | SMC_outw( *(u16 *)__ptr, ioaddr, DATA_REG ); \ | 1098 | } else { \ |
| 970 | __ptr += 2; \ | 1099 | (status) = SMC_inw(ioaddr, DATA_REG); \ |
| 971 | } \ | 1100 | (length) = SMC_inw(ioaddr, DATA_REG); \ |
| 972 | SMC_outsl( ioaddr, DATA_REG, __ptr, __len >> 2); \ | ||
| 973 | if (__len & 2) { \ | ||
| 974 | __ptr += (__len & ~3); \ | ||
| 975 | SMC_outw( *((u16 *)__ptr), ioaddr, DATA_REG ); \ | ||
| 976 | } \ | 1101 | } \ |
| 977 | } while (0) | 1102 | } while (0) |
| 978 | #define _SMC_PULL_DATA(p, l) \ | ||
| 979 | do { \ | ||
| 980 | char *__ptr = (p); \ | ||
| 981 | int __len = (l); \ | ||
| 982 | if ((unsigned long)__ptr & 2) { \ | ||
| 983 | /* \ | ||
| 984 | * We want 32bit alignment here. \ | ||
| 985 | * Since some buses perform a full 32bit \ | ||
| 986 | * fetch even for 16bit data we can't use \ | ||
| 987 | * SMC_inw() here. Back both source (on chip \ | ||
| 988 | * and destination) pointers of 2 bytes. \ | ||
| 989 | */ \ | ||
| 990 | __ptr -= 2; \ | ||
| 991 | __len += 2; \ | ||
| 992 | SMC_SET_PTR( 2|PTR_READ|PTR_RCV|PTR_AUTOINC ); \ | ||
| 993 | } \ | ||
| 994 | __len += 2; \ | ||
| 995 | SMC_insl( ioaddr, DATA_REG, __ptr, __len >> 2); \ | ||
| 996 | } while (0) | ||
| 997 | #elif SMC_CAN_USE_16BIT | ||
| 998 | #define _SMC_PUSH_DATA(p, l) SMC_outsw( ioaddr, DATA_REG, p, (l) >> 1 ) | ||
| 999 | #define _SMC_PULL_DATA(p, l) SMC_insw ( ioaddr, DATA_REG, p, (l) >> 1 ) | ||
| 1000 | #elif SMC_CAN_USE_8BIT | ||
| 1001 | #define _SMC_PUSH_DATA(p, l) SMC_outsb( ioaddr, DATA_REG, p, l ) | ||
| 1002 | #define _SMC_PULL_DATA(p, l) SMC_insb ( ioaddr, DATA_REG, p, l ) | ||
| 1003 | #endif | ||
| 1004 | 1103 | ||
| 1005 | #if ! SMC_CAN_USE_16BIT | 1104 | #define SMC_PUSH_DATA(p, l) \ |
| 1006 | #define SMC_outw(x, ioaddr, reg) \ | ||
| 1007 | do { \ | 1105 | do { \ |
| 1008 | unsigned int __val16 = (x); \ | 1106 | if (SMC_CAN_USE_32BIT) { \ |
| 1009 | SMC_outb( __val16, ioaddr, reg ); \ | 1107 | void *__ptr = (p); \ |
| 1010 | SMC_outb( __val16 >> 8, ioaddr, reg + (1 << SMC_IO_SHIFT));\ | 1108 | int __len = (l); \ |
| 1109 | void *__ioaddr = ioaddr; \ | ||
| 1110 | if (__len >= 2 && (unsigned long)__ptr & 2) { \ | ||
| 1111 | __len -= 2; \ | ||
| 1112 | SMC_outw(*(u16 *)__ptr, ioaddr, DATA_REG); \ | ||
| 1113 | __ptr += 2; \ | ||
| 1114 | } \ | ||
| 1115 | if (SMC_CAN_USE_DATACS && lp->datacs) \ | ||
| 1116 | __ioaddr = lp->datacs; \ | ||
| 1117 | SMC_outsl(__ioaddr, DATA_REG, __ptr, __len>>2); \ | ||
| 1118 | if (__len & 2) { \ | ||
| 1119 | __ptr += (__len & ~3); \ | ||
| 1120 | SMC_outw(*((u16 *)__ptr), ioaddr, DATA_REG); \ | ||
| 1121 | } \ | ||
| 1122 | } else if (SMC_CAN_USE_16BIT) \ | ||
| 1123 | SMC_outsw(ioaddr, DATA_REG, p, (l) >> 1); \ | ||
| 1124 | else if (SMC_CAN_USE_8BIT) \ | ||
| 1125 | SMC_outsb(ioaddr, DATA_REG, p, l); \ | ||
| 1011 | } while (0) | 1126 | } while (0) |
| 1012 | #define SMC_inw(ioaddr, reg) \ | ||
| 1013 | ({ \ | ||
| 1014 | unsigned int __val16; \ | ||
| 1015 | __val16 = SMC_inb( ioaddr, reg ); \ | ||
| 1016 | __val16 |= SMC_inb( ioaddr, reg + (1 << SMC_IO_SHIFT)) << 8; \ | ||
| 1017 | __val16; \ | ||
| 1018 | }) | ||
| 1019 | #endif | ||
| 1020 | |||
| 1021 | #ifdef SMC_CAN_USE_DATACS | ||
| 1022 | #define SMC_PUSH_DATA(p, l) \ | ||
| 1023 | if ( lp->datacs ) { \ | ||
| 1024 | unsigned char *__ptr = (p); \ | ||
| 1025 | int __len = (l); \ | ||
| 1026 | if (__len >= 2 && (unsigned long)__ptr & 2) { \ | ||
| 1027 | __len -= 2; \ | ||
| 1028 | SMC_outw( *((u16 *)__ptr), ioaddr, DATA_REG ); \ | ||
| 1029 | __ptr += 2; \ | ||
| 1030 | } \ | ||
| 1031 | outsl(lp->datacs, __ptr, __len >> 2); \ | ||
| 1032 | if (__len & 2) { \ | ||
| 1033 | __ptr += (__len & ~3); \ | ||
| 1034 | SMC_outw( *((u16 *)__ptr), ioaddr, DATA_REG ); \ | ||
| 1035 | } \ | ||
| 1036 | } else { \ | ||
| 1037 | _SMC_PUSH_DATA(p, l); \ | ||
| 1038 | } | ||
| 1039 | 1127 | ||
| 1040 | #define SMC_PULL_DATA(p, l) \ | 1128 | #define SMC_PULL_DATA(p, l) \ |
| 1041 | if ( lp->datacs ) { \ | 1129 | do { \ |
| 1042 | unsigned char *__ptr = (p); \ | 1130 | if (SMC_CAN_USE_32BIT) { \ |
| 1043 | int __len = (l); \ | 1131 | void *__ptr = (p); \ |
| 1044 | if ((unsigned long)__ptr & 2) { \ | 1132 | int __len = (l); \ |
| 1045 | /* \ | 1133 | void *__ioaddr = ioaddr; \ |
| 1046 | * We want 32bit alignment here. \ | 1134 | if ((unsigned long)__ptr & 2) { \ |
| 1047 | * Since some buses perform a full 32bit \ | 1135 | /* \ |
| 1048 | * fetch even for 16bit data we can't use \ | 1136 | * We want 32bit alignment here. \ |
| 1049 | * SMC_inw() here. Back both source (on chip \ | 1137 | * Since some buses perform a full \ |
| 1050 | * and destination) pointers of 2 bytes. \ | 1138 | * 32bit fetch even for 16bit data \ |
| 1051 | */ \ | 1139 | * we can't use SMC_inw() here. \ |
| 1052 | __ptr -= 2; \ | 1140 | * Back both source (on-chip) and \ |
| 1141 | * destination pointers of 2 bytes. \ | ||
| 1142 | * This is possible since the call to \ | ||
| 1143 | * SMC_GET_PKT_HDR() already advanced \ | ||
| 1144 | * the source pointer of 4 bytes, and \ | ||
| 1145 | * the skb_reserve(skb, 2) advanced \ | ||
| 1146 | * the destination pointer of 2 bytes. \ | ||
| 1147 | */ \ | ||
| 1148 | __ptr -= 2; \ | ||
| 1149 | __len += 2; \ | ||
| 1150 | SMC_SET_PTR(2|PTR_READ|PTR_RCV|PTR_AUTOINC); \ | ||
| 1151 | } \ | ||
| 1152 | if (SMC_CAN_USE_DATACS && lp->datacs) \ | ||
| 1153 | __ioaddr = lp->datacs; \ | ||
| 1053 | __len += 2; \ | 1154 | __len += 2; \ |
| 1054 | SMC_SET_PTR( 2|PTR_READ|PTR_RCV|PTR_AUTOINC ); \ | 1155 | SMC_insl(__ioaddr, DATA_REG, __ptr, __len>>2); \ |
| 1055 | } \ | 1156 | } else if (SMC_CAN_USE_16BIT) \ |
| 1056 | __len += 2; \ | 1157 | SMC_insw(ioaddr, DATA_REG, p, (l) >> 1); \ |
| 1057 | insl( lp->datacs, __ptr, __len >> 2); \ | 1158 | else if (SMC_CAN_USE_8BIT) \ |
| 1058 | } else { \ | 1159 | SMC_insb(ioaddr, DATA_REG, p, l); \ |
| 1059 | _SMC_PULL_DATA(p, l); \ | 1160 | } while (0) |
| 1060 | } | ||
| 1061 | #else | ||
| 1062 | #define SMC_PUSH_DATA(p, l) _SMC_PUSH_DATA(p, l) | ||
| 1063 | #define SMC_PULL_DATA(p, l) _SMC_PULL_DATA(p, l) | ||
| 1064 | #endif | ||
| 1065 | |||
| 1066 | #if !defined (SMC_INTERRUPT_PREAMBLE) | ||
| 1067 | # define SMC_INTERRUPT_PREAMBLE | ||
| 1068 | #endif | ||
| 1069 | 1161 | ||
| 1070 | #endif /* _SMC91X_H_ */ | 1162 | #endif /* _SMC91X_H_ */ |
diff --git a/drivers/scsi/iscsi_tcp.c b/drivers/scsi/iscsi_tcp.c index ff79e68b347c..7b82ff090d42 100644 --- a/drivers/scsi/iscsi_tcp.c +++ b/drivers/scsi/iscsi_tcp.c | |||
| @@ -3639,7 +3639,7 @@ iscsi_tcp_init(void) | |||
| 3639 | 3639 | ||
| 3640 | taskcache = kmem_cache_create("iscsi_taskcache", | 3640 | taskcache = kmem_cache_create("iscsi_taskcache", |
| 3641 | sizeof(struct iscsi_data_task), 0, | 3641 | sizeof(struct iscsi_data_task), 0, |
| 3642 | SLAB_HWCACHE_ALIGN | SLAB_NO_REAP, NULL, NULL); | 3642 | SLAB_HWCACHE_ALIGN, NULL, NULL); |
| 3643 | if (!taskcache) | 3643 | if (!taskcache) |
| 3644 | return -ENOMEM; | 3644 | return -ENOMEM; |
| 3645 | 3645 | ||
diff --git a/drivers/scsi/sg.c b/drivers/scsi/sg.c index a8b05ce5de52..7405d0df95db 100644 --- a/drivers/scsi/sg.c +++ b/drivers/scsi/sg.c | |||
| @@ -1139,32 +1139,6 @@ sg_fasync(int fd, struct file *filp, int mode) | |||
| 1139 | return (retval < 0) ? retval : 0; | 1139 | return (retval < 0) ? retval : 0; |
| 1140 | } | 1140 | } |
| 1141 | 1141 | ||
| 1142 | /* When startFinish==1 increments page counts for pages other than the | ||
| 1143 | first of scatter gather elements obtained from alloc_pages(). | ||
| 1144 | When startFinish==0 decrements ... */ | ||
| 1145 | static void | ||
| 1146 | sg_rb_correct4mmap(Sg_scatter_hold * rsv_schp, int startFinish) | ||
| 1147 | { | ||
| 1148 | struct scatterlist *sg = rsv_schp->buffer; | ||
| 1149 | struct page *page; | ||
| 1150 | int k, m; | ||
| 1151 | |||
| 1152 | SCSI_LOG_TIMEOUT(3, printk("sg_rb_correct4mmap: startFinish=%d, scatg=%d\n", | ||
| 1153 | startFinish, rsv_schp->k_use_sg)); | ||
| 1154 | /* N.B. correction _not_ applied to base page of each allocation */ | ||
| 1155 | for (k = 0; k < rsv_schp->k_use_sg; ++k, ++sg) { | ||
| 1156 | for (m = PAGE_SIZE; m < sg->length; m += PAGE_SIZE) { | ||
| 1157 | page = sg->page; | ||
| 1158 | if (startFinish) | ||
| 1159 | get_page(page); | ||
| 1160 | else { | ||
| 1161 | if (page_count(page) > 0) | ||
| 1162 | __put_page(page); | ||
| 1163 | } | ||
| 1164 | } | ||
| 1165 | } | ||
| 1166 | } | ||
| 1167 | |||
| 1168 | static struct page * | 1142 | static struct page * |
| 1169 | sg_vma_nopage(struct vm_area_struct *vma, unsigned long addr, int *type) | 1143 | sg_vma_nopage(struct vm_area_struct *vma, unsigned long addr, int *type) |
| 1170 | { | 1144 | { |
| @@ -1236,10 +1210,7 @@ sg_mmap(struct file *filp, struct vm_area_struct *vma) | |||
| 1236 | sa += len; | 1210 | sa += len; |
| 1237 | } | 1211 | } |
| 1238 | 1212 | ||
| 1239 | if (0 == sfp->mmap_called) { | 1213 | sfp->mmap_called = 1; |
| 1240 | sg_rb_correct4mmap(rsv_schp, 1); /* do only once per fd lifetime */ | ||
| 1241 | sfp->mmap_called = 1; | ||
| 1242 | } | ||
| 1243 | vma->vm_flags |= VM_RESERVED; | 1214 | vma->vm_flags |= VM_RESERVED; |
| 1244 | vma->vm_private_data = sfp; | 1215 | vma->vm_private_data = sfp; |
| 1245 | vma->vm_ops = &sg_mmap_vm_ops; | 1216 | vma->vm_ops = &sg_mmap_vm_ops; |
| @@ -2388,8 +2359,6 @@ __sg_remove_sfp(Sg_device * sdp, Sg_fd * sfp) | |||
| 2388 | SCSI_LOG_TIMEOUT(6, | 2359 | SCSI_LOG_TIMEOUT(6, |
| 2389 | printk("__sg_remove_sfp: bufflen=%d, k_use_sg=%d\n", | 2360 | printk("__sg_remove_sfp: bufflen=%d, k_use_sg=%d\n", |
| 2390 | (int) sfp->reserve.bufflen, (int) sfp->reserve.k_use_sg)); | 2361 | (int) sfp->reserve.bufflen, (int) sfp->reserve.k_use_sg)); |
| 2391 | if (sfp->mmap_called) | ||
| 2392 | sg_rb_correct4mmap(&sfp->reserve, 0); /* undo correction */ | ||
| 2393 | sg_remove_scat(&sfp->reserve); | 2362 | sg_remove_scat(&sfp->reserve); |
| 2394 | } | 2363 | } |
| 2395 | sfp->parentdp = NULL; | 2364 | sfp->parentdp = NULL; |
| @@ -2471,9 +2440,9 @@ sg_page_malloc(int rqSz, int lowDma, int *retSzp) | |||
| 2471 | return resp; | 2440 | return resp; |
| 2472 | 2441 | ||
| 2473 | if (lowDma) | 2442 | if (lowDma) |
| 2474 | page_mask = GFP_ATOMIC | GFP_DMA | __GFP_NOWARN; | 2443 | page_mask = GFP_ATOMIC | GFP_DMA | __GFP_COMP | __GFP_NOWARN; |
| 2475 | else | 2444 | else |
| 2476 | page_mask = GFP_ATOMIC | __GFP_NOWARN; | 2445 | page_mask = GFP_ATOMIC | __GFP_COMP | __GFP_NOWARN; |
| 2477 | 2446 | ||
| 2478 | for (order = 0, a_size = PAGE_SIZE; a_size < rqSz; | 2447 | for (order = 0, a_size = PAGE_SIZE; a_size < rqSz; |
| 2479 | order++, a_size <<= 1) ; | 2448 | order++, a_size <<= 1) ; |
diff --git a/drivers/serial/Kconfig b/drivers/serial/Kconfig index 89e5413cc2a3..c66ef96c71b4 100644 --- a/drivers/serial/Kconfig +++ b/drivers/serial/Kconfig | |||
| @@ -866,7 +866,7 @@ config SERIAL_M32R_PLDSIO | |||
| 866 | 866 | ||
| 867 | config SERIAL_TXX9 | 867 | config SERIAL_TXX9 |
| 868 | bool "TMPTX39XX/49XX SIO support" | 868 | bool "TMPTX39XX/49XX SIO support" |
| 869 | depends HAS_TXX9_SERIAL && BROKEN | 869 | depends HAS_TXX9_SERIAL |
| 870 | select SERIAL_CORE | 870 | select SERIAL_CORE |
| 871 | default y | 871 | default y |
| 872 | 872 | ||
diff --git a/drivers/serial/serial_txx9.c b/drivers/serial/serial_txx9.c index ee98a867bc6d..141173efd463 100644 --- a/drivers/serial/serial_txx9.c +++ b/drivers/serial/serial_txx9.c | |||
| @@ -33,6 +33,10 @@ | |||
| 33 | * 1.02 Cleanup. (import 8250.c changes) | 33 | * 1.02 Cleanup. (import 8250.c changes) |
| 34 | * 1.03 Fix low-latency mode. (import 8250.c changes) | 34 | * 1.03 Fix low-latency mode. (import 8250.c changes) |
| 35 | * 1.04 Remove usage of deprecated functions, cleanup. | 35 | * 1.04 Remove usage of deprecated functions, cleanup. |
| 36 | * 1.05 More strict check in verify_port. Cleanup. | ||
| 37 | * 1.06 Do not insert a char caused previous overrun. | ||
| 38 | * Fix some spin_locks. | ||
| 39 | * Do not call uart_add_one_port for absent ports. | ||
| 36 | */ | 40 | */ |
| 37 | #include <linux/config.h> | 41 | #include <linux/config.h> |
| 38 | 42 | ||
| @@ -57,7 +61,7 @@ | |||
| 57 | #include <asm/io.h> | 61 | #include <asm/io.h> |
| 58 | #include <asm/irq.h> | 62 | #include <asm/irq.h> |
| 59 | 63 | ||
| 60 | static char *serial_version = "1.04"; | 64 | static char *serial_version = "1.06"; |
| 61 | static char *serial_name = "TX39/49 Serial driver"; | 65 | static char *serial_name = "TX39/49 Serial driver"; |
| 62 | 66 | ||
| 63 | #define PASS_LIMIT 256 | 67 | #define PASS_LIMIT 256 |
| @@ -94,6 +98,8 @@ static char *serial_name = "TX39/49 Serial driver"; | |||
| 94 | #define UART_NR 4 | 98 | #define UART_NR 4 |
| 95 | #endif | 99 | #endif |
| 96 | 100 | ||
| 101 | #define HIGH_BITS_OFFSET ((sizeof(long)-sizeof(int))*8) | ||
| 102 | |||
| 97 | struct uart_txx9_port { | 103 | struct uart_txx9_port { |
| 98 | struct uart_port port; | 104 | struct uart_port port; |
| 99 | 105 | ||
| @@ -210,7 +216,7 @@ static inline unsigned int sio_in(struct uart_txx9_port *up, int offset) | |||
| 210 | { | 216 | { |
| 211 | switch (up->port.iotype) { | 217 | switch (up->port.iotype) { |
| 212 | default: | 218 | default: |
| 213 | return *(volatile u32 *)(up->port.membase + offset); | 219 | return __raw_readl(up->port.membase + offset); |
| 214 | case UPIO_PORT: | 220 | case UPIO_PORT: |
| 215 | return inl(up->port.iobase + offset); | 221 | return inl(up->port.iobase + offset); |
| 216 | } | 222 | } |
| @@ -221,7 +227,7 @@ sio_out(struct uart_txx9_port *up, int offset, int value) | |||
| 221 | { | 227 | { |
| 222 | switch (up->port.iotype) { | 228 | switch (up->port.iotype) { |
| 223 | default: | 229 | default: |
| 224 | *(volatile u32 *)(up->port.membase + offset) = value; | 230 | __raw_writel(value, up->port.membase + offset); |
| 225 | break; | 231 | break; |
| 226 | case UPIO_PORT: | 232 | case UPIO_PORT: |
| 227 | outl(value, up->port.iobase + offset); | 233 | outl(value, up->port.iobase + offset); |
| @@ -259,34 +265,19 @@ sio_quot_set(struct uart_txx9_port *up, int quot) | |||
| 259 | static void serial_txx9_stop_tx(struct uart_port *port) | 265 | static void serial_txx9_stop_tx(struct uart_port *port) |
| 260 | { | 266 | { |
| 261 | struct uart_txx9_port *up = (struct uart_txx9_port *)port; | 267 | struct uart_txx9_port *up = (struct uart_txx9_port *)port; |
| 262 | unsigned long flags; | ||
| 263 | |||
| 264 | spin_lock_irqsave(&up->port.lock, flags); | ||
| 265 | sio_mask(up, TXX9_SIDICR, TXX9_SIDICR_TIE); | 268 | sio_mask(up, TXX9_SIDICR, TXX9_SIDICR_TIE); |
| 266 | spin_unlock_irqrestore(&up->port.lock, flags); | ||
| 267 | } | 269 | } |
| 268 | 270 | ||
| 269 | static void serial_txx9_start_tx(struct uart_port *port) | 271 | static void serial_txx9_start_tx(struct uart_port *port) |
| 270 | { | 272 | { |
| 271 | struct uart_txx9_port *up = (struct uart_txx9_port *)port; | 273 | struct uart_txx9_port *up = (struct uart_txx9_port *)port; |
| 272 | unsigned long flags; | ||
| 273 | |||
| 274 | spin_lock_irqsave(&up->port.lock, flags); | ||
| 275 | sio_set(up, TXX9_SIDICR, TXX9_SIDICR_TIE); | 274 | sio_set(up, TXX9_SIDICR, TXX9_SIDICR_TIE); |
| 276 | spin_unlock_irqrestore(&up->port.lock, flags); | ||
| 277 | } | 275 | } |
| 278 | 276 | ||
| 279 | static void serial_txx9_stop_rx(struct uart_port *port) | 277 | static void serial_txx9_stop_rx(struct uart_port *port) |
| 280 | { | 278 | { |
| 281 | struct uart_txx9_port *up = (struct uart_txx9_port *)port; | 279 | struct uart_txx9_port *up = (struct uart_txx9_port *)port; |
| 282 | unsigned long flags; | ||
| 283 | |||
| 284 | spin_lock_irqsave(&up->port.lock, flags); | ||
| 285 | up->port.read_status_mask &= ~TXX9_SIDISR_RDIS; | 280 | up->port.read_status_mask &= ~TXX9_SIDISR_RDIS; |
| 286 | #if 0 | ||
| 287 | sio_mask(up, TXX9_SIDICR, TXX9_SIDICR_RIE); | ||
| 288 | #endif | ||
| 289 | spin_unlock_irqrestore(&up->port.lock, flags); | ||
| 290 | } | 281 | } |
| 291 | 282 | ||
| 292 | static void serial_txx9_enable_ms(struct uart_port *port) | 283 | static void serial_txx9_enable_ms(struct uart_port *port) |
| @@ -302,12 +293,16 @@ receive_chars(struct uart_txx9_port *up, unsigned int *status, struct pt_regs *r | |||
| 302 | unsigned int disr = *status; | 293 | unsigned int disr = *status; |
| 303 | int max_count = 256; | 294 | int max_count = 256; |
| 304 | char flag; | 295 | char flag; |
| 296 | unsigned int next_ignore_status_mask; | ||
| 305 | 297 | ||
| 306 | do { | 298 | do { |
| 307 | ch = sio_in(up, TXX9_SIRFIFO); | 299 | ch = sio_in(up, TXX9_SIRFIFO); |
| 308 | flag = TTY_NORMAL; | 300 | flag = TTY_NORMAL; |
| 309 | up->port.icount.rx++; | 301 | up->port.icount.rx++; |
| 310 | 302 | ||
| 303 | /* mask out RFDN_MASK bit added by previous overrun */ | ||
| 304 | next_ignore_status_mask = | ||
| 305 | up->port.ignore_status_mask & ~TXX9_SIDISR_RFDN_MASK; | ||
| 311 | if (unlikely(disr & (TXX9_SIDISR_UBRK | TXX9_SIDISR_UPER | | 306 | if (unlikely(disr & (TXX9_SIDISR_UBRK | TXX9_SIDISR_UPER | |
| 312 | TXX9_SIDISR_UFER | TXX9_SIDISR_UOER))) { | 307 | TXX9_SIDISR_UFER | TXX9_SIDISR_UOER))) { |
| 313 | /* | 308 | /* |
| @@ -328,8 +323,17 @@ receive_chars(struct uart_txx9_port *up, unsigned int *status, struct pt_regs *r | |||
| 328 | up->port.icount.parity++; | 323 | up->port.icount.parity++; |
| 329 | else if (disr & TXX9_SIDISR_UFER) | 324 | else if (disr & TXX9_SIDISR_UFER) |
| 330 | up->port.icount.frame++; | 325 | up->port.icount.frame++; |
| 331 | if (disr & TXX9_SIDISR_UOER) | 326 | if (disr & TXX9_SIDISR_UOER) { |
| 332 | up->port.icount.overrun++; | 327 | up->port.icount.overrun++; |
| 328 | /* | ||
| 329 | * The receiver read buffer still hold | ||
| 330 | * a char which caused overrun. | ||
| 331 | * Ignore next char by adding RFDN_MASK | ||
| 332 | * to ignore_status_mask temporarily. | ||
| 333 | */ | ||
| 334 | next_ignore_status_mask |= | ||
| 335 | TXX9_SIDISR_RFDN_MASK; | ||
| 336 | } | ||
| 333 | 337 | ||
| 334 | /* | 338 | /* |
| 335 | * Mask off conditions which should be ingored. | 339 | * Mask off conditions which should be ingored. |
| @@ -349,6 +353,7 @@ receive_chars(struct uart_txx9_port *up, unsigned int *status, struct pt_regs *r | |||
| 349 | uart_insert_char(&up->port, disr, TXX9_SIDISR_UOER, ch, flag); | 353 | uart_insert_char(&up->port, disr, TXX9_SIDISR_UOER, ch, flag); |
| 350 | 354 | ||
| 351 | ignore_char: | 355 | ignore_char: |
| 356 | up->port.ignore_status_mask = next_ignore_status_mask; | ||
| 352 | disr = sio_in(up, TXX9_SIDISR); | 357 | disr = sio_in(up, TXX9_SIDISR); |
| 353 | } while (!(disr & TXX9_SIDISR_UVALID) && (max_count-- > 0)); | 358 | } while (!(disr & TXX9_SIDISR_UVALID) && (max_count-- > 0)); |
| 354 | spin_unlock(&up->port.lock); | 359 | spin_unlock(&up->port.lock); |
| @@ -450,14 +455,11 @@ static unsigned int serial_txx9_get_mctrl(struct uart_port *port) | |||
| 450 | static void serial_txx9_set_mctrl(struct uart_port *port, unsigned int mctrl) | 455 | static void serial_txx9_set_mctrl(struct uart_port *port, unsigned int mctrl) |
| 451 | { | 456 | { |
| 452 | struct uart_txx9_port *up = (struct uart_txx9_port *)port; | 457 | struct uart_txx9_port *up = (struct uart_txx9_port *)port; |
| 453 | unsigned long flags; | ||
| 454 | 458 | ||
| 455 | spin_lock_irqsave(&up->port.lock, flags); | ||
| 456 | if (mctrl & TIOCM_RTS) | 459 | if (mctrl & TIOCM_RTS) |
| 457 | sio_mask(up, TXX9_SIFLCR, TXX9_SIFLCR_RTSSC); | 460 | sio_mask(up, TXX9_SIFLCR, TXX9_SIFLCR_RTSSC); |
| 458 | else | 461 | else |
| 459 | sio_set(up, TXX9_SIFLCR, TXX9_SIFLCR_RTSSC); | 462 | sio_set(up, TXX9_SIFLCR, TXX9_SIFLCR_RTSSC); |
| 460 | spin_unlock_irqrestore(&up->port.lock, flags); | ||
| 461 | } | 463 | } |
| 462 | 464 | ||
| 463 | static void serial_txx9_break_ctl(struct uart_port *port, int break_state) | 465 | static void serial_txx9_break_ctl(struct uart_port *port, int break_state) |
| @@ -784,8 +786,14 @@ static void serial_txx9_config_port(struct uart_port *port, int uflags) | |||
| 784 | static int | 786 | static int |
| 785 | serial_txx9_verify_port(struct uart_port *port, struct serial_struct *ser) | 787 | serial_txx9_verify_port(struct uart_port *port, struct serial_struct *ser) |
| 786 | { | 788 | { |
| 787 | if (ser->irq < 0 || | 789 | unsigned long new_port = ser->port; |
| 788 | ser->baud_base < 9600 || ser->type != PORT_TXX9) | 790 | if (HIGH_BITS_OFFSET) |
| 791 | new_port += (unsigned long)ser->port_high << HIGH_BITS_OFFSET; | ||
| 792 | if (ser->type != port->type || | ||
| 793 | ser->irq != port->irq || | ||
| 794 | ser->io_type != port->iotype || | ||
| 795 | new_port != port->iobase || | ||
| 796 | (unsigned long)ser->iomem_base != port->mapbase) | ||
| 789 | return -EINVAL; | 797 | return -EINVAL; |
| 790 | return 0; | 798 | return 0; |
| 791 | } | 799 | } |
| @@ -827,7 +835,8 @@ static void __init serial_txx9_register_ports(struct uart_driver *drv) | |||
| 827 | 835 | ||
| 828 | up->port.line = i; | 836 | up->port.line = i; |
| 829 | up->port.ops = &serial_txx9_pops; | 837 | up->port.ops = &serial_txx9_pops; |
| 830 | uart_add_one_port(drv, &up->port); | 838 | if (up->port.iobase || up->port.mapbase) |
| 839 | uart_add_one_port(drv, &up->port); | ||
| 831 | } | 840 | } |
| 832 | } | 841 | } |
| 833 | 842 | ||
| @@ -927,11 +936,6 @@ static int serial_txx9_console_setup(struct console *co, char *options) | |||
| 927 | return -ENODEV; | 936 | return -ENODEV; |
| 928 | 937 | ||
| 929 | /* | 938 | /* |
| 930 | * Temporary fix. | ||
| 931 | */ | ||
| 932 | spin_lock_init(&port->lock); | ||
| 933 | |||
| 934 | /* | ||
| 935 | * Disable UART interrupts, set DTR and RTS high | 939 | * Disable UART interrupts, set DTR and RTS high |
| 936 | * and set speed. | 940 | * and set speed. |
| 937 | */ | 941 | */ |
| @@ -1041,11 +1045,10 @@ static int __devinit serial_txx9_register_port(struct uart_port *port) | |||
| 1041 | mutex_lock(&serial_txx9_mutex); | 1045 | mutex_lock(&serial_txx9_mutex); |
| 1042 | for (i = 0; i < UART_NR; i++) { | 1046 | for (i = 0; i < UART_NR; i++) { |
| 1043 | uart = &serial_txx9_ports[i]; | 1047 | uart = &serial_txx9_ports[i]; |
| 1044 | if (uart->port.type == PORT_UNKNOWN) | 1048 | if (!(uart->port.iobase || uart->port.mapbase)) |
| 1045 | break; | 1049 | break; |
| 1046 | } | 1050 | } |
| 1047 | if (i < UART_NR) { | 1051 | if (i < UART_NR) { |
| 1048 | uart_remove_one_port(&serial_txx9_reg, &uart->port); | ||
| 1049 | uart->port.iobase = port->iobase; | 1052 | uart->port.iobase = port->iobase; |
| 1050 | uart->port.membase = port->membase; | 1053 | uart->port.membase = port->membase; |
| 1051 | uart->port.irq = port->irq; | 1054 | uart->port.irq = port->irq; |
| @@ -1080,9 +1083,8 @@ static void __devexit serial_txx9_unregister_port(int line) | |||
| 1080 | uart->port.type = PORT_UNKNOWN; | 1083 | uart->port.type = PORT_UNKNOWN; |
| 1081 | uart->port.iobase = 0; | 1084 | uart->port.iobase = 0; |
| 1082 | uart->port.mapbase = 0; | 1085 | uart->port.mapbase = 0; |
| 1083 | uart->port.membase = 0; | 1086 | uart->port.membase = NULL; |
| 1084 | uart->port.dev = NULL; | 1087 | uart->port.dev = NULL; |
| 1085 | uart_add_one_port(&serial_txx9_reg, &uart->port); | ||
| 1086 | mutex_unlock(&serial_txx9_mutex); | 1088 | mutex_unlock(&serial_txx9_mutex); |
| 1087 | } | 1089 | } |
| 1088 | 1090 | ||
| @@ -1198,8 +1200,11 @@ static void __exit serial_txx9_exit(void) | |||
| 1198 | #ifdef ENABLE_SERIAL_TXX9_PCI | 1200 | #ifdef ENABLE_SERIAL_TXX9_PCI |
| 1199 | pci_unregister_driver(&serial_txx9_pci_driver); | 1201 | pci_unregister_driver(&serial_txx9_pci_driver); |
| 1200 | #endif | 1202 | #endif |
| 1201 | for (i = 0; i < UART_NR; i++) | 1203 | for (i = 0; i < UART_NR; i++) { |
| 1202 | uart_remove_one_port(&serial_txx9_reg, &serial_txx9_ports[i].port); | 1204 | struct uart_txx9_port *up = &serial_txx9_ports[i]; |
| 1205 | if (up->port.iobase || up->port.mapbase) | ||
| 1206 | uart_remove_one_port(&serial_txx9_reg, &up->port); | ||
| 1207 | } | ||
| 1203 | 1208 | ||
| 1204 | uart_unregister_driver(&serial_txx9_reg); | 1209 | uart_unregister_driver(&serial_txx9_reg); |
| 1205 | } | 1210 | } |
diff --git a/drivers/serial/vr41xx_siu.c b/drivers/serial/vr41xx_siu.c index d61494d185cd..bd6294132c18 100644 --- a/drivers/serial/vr41xx_siu.c +++ b/drivers/serial/vr41xx_siu.c | |||
| @@ -919,7 +919,7 @@ static struct uart_driver siu_uart_driver = { | |||
| 919 | .cons = SERIAL_VR41XX_CONSOLE, | 919 | .cons = SERIAL_VR41XX_CONSOLE, |
| 920 | }; | 920 | }; |
| 921 | 921 | ||
| 922 | static int siu_probe(struct platform_device *dev) | 922 | static int __devinit siu_probe(struct platform_device *dev) |
| 923 | { | 923 | { |
| 924 | struct uart_port *port; | 924 | struct uart_port *port; |
| 925 | int num, i, retval; | 925 | int num, i, retval; |
| @@ -953,7 +953,7 @@ static int siu_probe(struct platform_device *dev) | |||
| 953 | return 0; | 953 | return 0; |
| 954 | } | 954 | } |
| 955 | 955 | ||
| 956 | static int siu_remove(struct platform_device *dev) | 956 | static int __devexit siu_remove(struct platform_device *dev) |
| 957 | { | 957 | { |
| 958 | struct uart_port *port; | 958 | struct uart_port *port; |
| 959 | int i; | 959 | int i; |
| @@ -1006,21 +1006,28 @@ static struct platform_device *siu_platform_device; | |||
| 1006 | 1006 | ||
| 1007 | static struct platform_driver siu_device_driver = { | 1007 | static struct platform_driver siu_device_driver = { |
| 1008 | .probe = siu_probe, | 1008 | .probe = siu_probe, |
| 1009 | .remove = siu_remove, | 1009 | .remove = __devexit_p(siu_remove), |
| 1010 | .suspend = siu_suspend, | 1010 | .suspend = siu_suspend, |
| 1011 | .resume = siu_resume, | 1011 | .resume = siu_resume, |
| 1012 | .driver = { | 1012 | .driver = { |
| 1013 | .name = "SIU", | 1013 | .name = "SIU", |
| 1014 | .owner = THIS_MODULE, | ||
| 1014 | }, | 1015 | }, |
| 1015 | }; | 1016 | }; |
| 1016 | 1017 | ||
| 1017 | static int __devinit vr41xx_siu_init(void) | 1018 | static int __init vr41xx_siu_init(void) |
| 1018 | { | 1019 | { |
| 1019 | int retval; | 1020 | int retval; |
| 1020 | 1021 | ||
| 1021 | siu_platform_device = platform_device_register_simple("SIU", -1, NULL, 0); | 1022 | siu_platform_device = platform_device_alloc("SIU", -1); |
| 1022 | if (IS_ERR(siu_platform_device)) | 1023 | if (!siu_platform_device) |
| 1023 | return PTR_ERR(siu_platform_device); | 1024 | return -ENOMEM; |
| 1025 | |||
| 1026 | retval = platform_device_add(siu_platform_device); | ||
| 1027 | if (retval < 0) { | ||
| 1028 | platform_device_put(siu_platform_device); | ||
| 1029 | return retval; | ||
| 1030 | } | ||
| 1024 | 1031 | ||
| 1025 | retval = platform_driver_register(&siu_device_driver); | 1032 | retval = platform_driver_register(&siu_device_driver); |
| 1026 | if (retval < 0) | 1033 | if (retval < 0) |
| @@ -1029,10 +1036,9 @@ static int __devinit vr41xx_siu_init(void) | |||
| 1029 | return retval; | 1036 | return retval; |
| 1030 | } | 1037 | } |
| 1031 | 1038 | ||
| 1032 | static void __devexit vr41xx_siu_exit(void) | 1039 | static void __exit vr41xx_siu_exit(void) |
| 1033 | { | 1040 | { |
| 1034 | platform_driver_unregister(&siu_device_driver); | 1041 | platform_driver_unregister(&siu_device_driver); |
| 1035 | |||
| 1036 | platform_device_unregister(siu_platform_device); | 1042 | platform_device_unregister(siu_platform_device); |
| 1037 | } | 1043 | } |
| 1038 | 1044 | ||
diff --git a/drivers/sn/ioc4.c b/drivers/sn/ioc4.c index ea75b3d0612b..67140a5804f5 100644 --- a/drivers/sn/ioc4.c +++ b/drivers/sn/ioc4.c | |||
| @@ -31,7 +31,7 @@ | |||
| 31 | #include <linux/ioc4.h> | 31 | #include <linux/ioc4.h> |
| 32 | #include <linux/mmtimer.h> | 32 | #include <linux/mmtimer.h> |
| 33 | #include <linux/rtc.h> | 33 | #include <linux/rtc.h> |
| 34 | #include <linux/rwsem.h> | 34 | #include <linux/mutex.h> |
| 35 | #include <asm/sn/addrs.h> | 35 | #include <asm/sn/addrs.h> |
| 36 | #include <asm/sn/clksupport.h> | 36 | #include <asm/sn/clksupport.h> |
| 37 | #include <asm/sn/shub_mmr.h> | 37 | #include <asm/sn/shub_mmr.h> |
| @@ -54,11 +54,10 @@ | |||
| 54 | * Submodule management * | 54 | * Submodule management * |
| 55 | ************************/ | 55 | ************************/ |
| 56 | 56 | ||
| 57 | static LIST_HEAD(ioc4_devices); | 57 | static DEFINE_MUTEX(ioc4_mutex); |
| 58 | static DECLARE_RWSEM(ioc4_devices_rwsem); | ||
| 59 | 58 | ||
| 59 | static LIST_HEAD(ioc4_devices); | ||
| 60 | static LIST_HEAD(ioc4_submodules); | 60 | static LIST_HEAD(ioc4_submodules); |
| 61 | static DECLARE_RWSEM(ioc4_submodules_rwsem); | ||
| 62 | 61 | ||
| 63 | /* Register an IOC4 submodule */ | 62 | /* Register an IOC4 submodule */ |
| 64 | int | 63 | int |
| @@ -66,15 +65,13 @@ ioc4_register_submodule(struct ioc4_submodule *is) | |||
| 66 | { | 65 | { |
| 67 | struct ioc4_driver_data *idd; | 66 | struct ioc4_driver_data *idd; |
| 68 | 67 | ||
| 69 | down_write(&ioc4_submodules_rwsem); | 68 | mutex_lock(&ioc4_mutex); |
| 70 | list_add(&is->is_list, &ioc4_submodules); | 69 | list_add(&is->is_list, &ioc4_submodules); |
| 71 | up_write(&ioc4_submodules_rwsem); | ||
| 72 | 70 | ||
| 73 | /* Initialize submodule for each IOC4 */ | 71 | /* Initialize submodule for each IOC4 */ |
| 74 | if (!is->is_probe) | 72 | if (!is->is_probe) |
| 75 | return 0; | 73 | goto out; |
| 76 | 74 | ||
| 77 | down_read(&ioc4_devices_rwsem); | ||
| 78 | list_for_each_entry(idd, &ioc4_devices, idd_list) { | 75 | list_for_each_entry(idd, &ioc4_devices, idd_list) { |
| 79 | if (is->is_probe(idd)) { | 76 | if (is->is_probe(idd)) { |
| 80 | printk(KERN_WARNING | 77 | printk(KERN_WARNING |
| @@ -84,8 +81,8 @@ ioc4_register_submodule(struct ioc4_submodule *is) | |||
| 84 | pci_name(idd->idd_pdev)); | 81 | pci_name(idd->idd_pdev)); |
| 85 | } | 82 | } |
| 86 | } | 83 | } |
| 87 | up_read(&ioc4_devices_rwsem); | 84 | out: |
| 88 | 85 | mutex_unlock(&ioc4_mutex); | |
| 89 | return 0; | 86 | return 0; |
| 90 | } | 87 | } |
| 91 | 88 | ||
| @@ -95,15 +92,13 @@ ioc4_unregister_submodule(struct ioc4_submodule *is) | |||
| 95 | { | 92 | { |
| 96 | struct ioc4_driver_data *idd; | 93 | struct ioc4_driver_data *idd; |
| 97 | 94 | ||
| 98 | down_write(&ioc4_submodules_rwsem); | 95 | mutex_lock(&ioc4_mutex); |
| 99 | list_del(&is->is_list); | 96 | list_del(&is->is_list); |
| 100 | up_write(&ioc4_submodules_rwsem); | ||
| 101 | 97 | ||
| 102 | /* Remove submodule for each IOC4 */ | 98 | /* Remove submodule for each IOC4 */ |
| 103 | if (!is->is_remove) | 99 | if (!is->is_remove) |
| 104 | return; | 100 | goto out; |
| 105 | 101 | ||
| 106 | down_read(&ioc4_devices_rwsem); | ||
| 107 | list_for_each_entry(idd, &ioc4_devices, idd_list) { | 102 | list_for_each_entry(idd, &ioc4_devices, idd_list) { |
| 108 | if (is->is_remove(idd)) { | 103 | if (is->is_remove(idd)) { |
| 109 | printk(KERN_WARNING | 104 | printk(KERN_WARNING |
| @@ -113,7 +108,8 @@ ioc4_unregister_submodule(struct ioc4_submodule *is) | |||
| 113 | pci_name(idd->idd_pdev)); | 108 | pci_name(idd->idd_pdev)); |
| 114 | } | 109 | } |
| 115 | } | 110 | } |
| 116 | up_read(&ioc4_devices_rwsem); | 111 | out: |
| 112 | mutex_unlock(&ioc4_mutex); | ||
| 117 | } | 113 | } |
| 118 | 114 | ||
| 119 | /********************* | 115 | /********************* |
| @@ -312,12 +308,11 @@ ioc4_probe(struct pci_dev *pdev, const struct pci_device_id *pci_id) | |||
| 312 | /* Track PCI-device specific data */ | 308 | /* Track PCI-device specific data */ |
| 313 | idd->idd_serial_data = NULL; | 309 | idd->idd_serial_data = NULL; |
| 314 | pci_set_drvdata(idd->idd_pdev, idd); | 310 | pci_set_drvdata(idd->idd_pdev, idd); |
| 315 | down_write(&ioc4_devices_rwsem); | 311 | |
| 312 | mutex_lock(&ioc4_mutex); | ||
| 316 | list_add(&idd->idd_list, &ioc4_devices); | 313 | list_add(&idd->idd_list, &ioc4_devices); |
| 317 | up_write(&ioc4_devices_rwsem); | ||
| 318 | 314 | ||
| 319 | /* Add this IOC4 to all submodules */ | 315 | /* Add this IOC4 to all submodules */ |
| 320 | down_read(&ioc4_submodules_rwsem); | ||
| 321 | list_for_each_entry(is, &ioc4_submodules, is_list) { | 316 | list_for_each_entry(is, &ioc4_submodules, is_list) { |
| 322 | if (is->is_probe && is->is_probe(idd)) { | 317 | if (is->is_probe && is->is_probe(idd)) { |
| 323 | printk(KERN_WARNING | 318 | printk(KERN_WARNING |
| @@ -327,7 +322,7 @@ ioc4_probe(struct pci_dev *pdev, const struct pci_device_id *pci_id) | |||
| 327 | pci_name(idd->idd_pdev)); | 322 | pci_name(idd->idd_pdev)); |
| 328 | } | 323 | } |
| 329 | } | 324 | } |
| 330 | up_read(&ioc4_submodules_rwsem); | 325 | mutex_unlock(&ioc4_mutex); |
| 331 | 326 | ||
| 332 | return 0; | 327 | return 0; |
| 333 | 328 | ||
| @@ -351,7 +346,7 @@ ioc4_remove(struct pci_dev *pdev) | |||
| 351 | idd = pci_get_drvdata(pdev); | 346 | idd = pci_get_drvdata(pdev); |
| 352 | 347 | ||
| 353 | /* Remove this IOC4 from all submodules */ | 348 | /* Remove this IOC4 from all submodules */ |
| 354 | down_read(&ioc4_submodules_rwsem); | 349 | mutex_lock(&ioc4_mutex); |
| 355 | list_for_each_entry(is, &ioc4_submodules, is_list) { | 350 | list_for_each_entry(is, &ioc4_submodules, is_list) { |
| 356 | if (is->is_remove && is->is_remove(idd)) { | 351 | if (is->is_remove && is->is_remove(idd)) { |
| 357 | printk(KERN_WARNING | 352 | printk(KERN_WARNING |
| @@ -361,7 +356,7 @@ ioc4_remove(struct pci_dev *pdev) | |||
| 361 | pci_name(idd->idd_pdev)); | 356 | pci_name(idd->idd_pdev)); |
| 362 | } | 357 | } |
| 363 | } | 358 | } |
| 364 | up_read(&ioc4_submodules_rwsem); | 359 | mutex_unlock(&ioc4_mutex); |
| 365 | 360 | ||
| 366 | /* Release resources */ | 361 | /* Release resources */ |
| 367 | iounmap(idd->idd_misc_regs); | 362 | iounmap(idd->idd_misc_regs); |
| @@ -377,9 +372,9 @@ ioc4_remove(struct pci_dev *pdev) | |||
| 377 | pci_disable_device(pdev); | 372 | pci_disable_device(pdev); |
| 378 | 373 | ||
| 379 | /* Remove and free driver data */ | 374 | /* Remove and free driver data */ |
| 380 | down_write(&ioc4_devices_rwsem); | 375 | mutex_lock(&ioc4_mutex); |
| 381 | list_del(&idd->idd_list); | 376 | list_del(&idd->idd_list); |
| 382 | up_write(&ioc4_devices_rwsem); | 377 | mutex_unlock(&ioc4_mutex); |
| 383 | kfree(idd); | 378 | kfree(idd); |
| 384 | } | 379 | } |
| 385 | 380 | ||
diff --git a/drivers/video/acornfb.c b/drivers/video/acornfb.c index b058273527bb..76448d6ae896 100644 --- a/drivers/video/acornfb.c +++ b/drivers/video/acornfb.c | |||
| @@ -1269,7 +1269,7 @@ free_unused_pages(unsigned int virtual_start, unsigned int virtual_end) | |||
| 1269 | */ | 1269 | */ |
| 1270 | page = virt_to_page(virtual_start); | 1270 | page = virt_to_page(virtual_start); |
| 1271 | ClearPageReserved(page); | 1271 | ClearPageReserved(page); |
| 1272 | set_page_count(page, 1); | 1272 | init_page_count(page); |
| 1273 | free_page(virtual_start); | 1273 | free_page(virtual_start); |
| 1274 | 1274 | ||
| 1275 | virtual_start += PAGE_SIZE; | 1275 | virtual_start += PAGE_SIZE; |
diff --git a/drivers/video/i810/i810_main.c b/drivers/video/i810/i810_main.c index d8467c03b49f..788297e9d59e 100644 --- a/drivers/video/i810/i810_main.c +++ b/drivers/video/i810/i810_main.c | |||
| @@ -1508,7 +1508,7 @@ static int i810fb_cursor(struct fb_info *info, struct fb_cursor *cursor) | |||
| 1508 | int size = ((cursor->image.width + 7) >> 3) * | 1508 | int size = ((cursor->image.width + 7) >> 3) * |
| 1509 | cursor->image.height; | 1509 | cursor->image.height; |
| 1510 | int i; | 1510 | int i; |
| 1511 | u8 *data = kmalloc(64 * 8, GFP_KERNEL); | 1511 | u8 *data = kmalloc(64 * 8, GFP_ATOMIC); |
| 1512 | 1512 | ||
| 1513 | if (data == NULL) | 1513 | if (data == NULL) |
| 1514 | return -ENOMEM; | 1514 | return -ENOMEM; |
diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c index 3ad8455f8577..651a9e14d9a9 100644 --- a/fs/9p/vfs_inode.c +++ b/fs/9p/vfs_inode.c | |||
| @@ -614,6 +614,7 @@ static struct dentry *v9fs_vfs_lookup(struct inode *dir, struct dentry *dentry, | |||
| 614 | 614 | ||
| 615 | sb = dir->i_sb; | 615 | sb = dir->i_sb; |
| 616 | v9ses = v9fs_inode2v9ses(dir); | 616 | v9ses = v9fs_inode2v9ses(dir); |
| 617 | dentry->d_op = &v9fs_dentry_operations; | ||
| 617 | dirfid = v9fs_fid_lookup(dentry->d_parent); | 618 | dirfid = v9fs_fid_lookup(dentry->d_parent); |
| 618 | 619 | ||
| 619 | if (!dirfid) { | 620 | if (!dirfid) { |
| @@ -681,8 +682,6 @@ static struct dentry *v9fs_vfs_lookup(struct inode *dir, struct dentry *dentry, | |||
| 681 | goto FreeFcall; | 682 | goto FreeFcall; |
| 682 | 683 | ||
| 683 | fid->qid = fcall->params.rstat.stat.qid; | 684 | fid->qid = fcall->params.rstat.stat.qid; |
| 684 | |||
| 685 | dentry->d_op = &v9fs_dentry_operations; | ||
| 686 | v9fs_stat2inode(&fcall->params.rstat.stat, inode, inode->i_sb); | 685 | v9fs_stat2inode(&fcall->params.rstat.stat, inode, inode->i_sb); |
| 687 | 686 | ||
| 688 | d_add(dentry, inode); | 687 | d_add(dentry, inode); |
diff --git a/fs/buffer.c b/fs/buffer.c index a9b399402007..1d3683d496f8 100644 --- a/fs/buffer.c +++ b/fs/buffer.c | |||
| @@ -3051,68 +3051,6 @@ asmlinkage long sys_bdflush(int func, long data) | |||
| 3051 | } | 3051 | } |
| 3052 | 3052 | ||
| 3053 | /* | 3053 | /* |
| 3054 | * Migration function for pages with buffers. This function can only be used | ||
| 3055 | * if the underlying filesystem guarantees that no other references to "page" | ||
| 3056 | * exist. | ||
| 3057 | */ | ||
| 3058 | #ifdef CONFIG_MIGRATION | ||
| 3059 | int buffer_migrate_page(struct page *newpage, struct page *page) | ||
| 3060 | { | ||
| 3061 | struct address_space *mapping = page->mapping; | ||
| 3062 | struct buffer_head *bh, *head; | ||
| 3063 | int rc; | ||
| 3064 | |||
| 3065 | if (!mapping) | ||
| 3066 | return -EAGAIN; | ||
| 3067 | |||
| 3068 | if (!page_has_buffers(page)) | ||
| 3069 | return migrate_page(newpage, page); | ||
| 3070 | |||
| 3071 | head = page_buffers(page); | ||
| 3072 | |||
| 3073 | rc = migrate_page_remove_references(newpage, page, 3); | ||
| 3074 | if (rc) | ||
| 3075 | return rc; | ||
| 3076 | |||
| 3077 | bh = head; | ||
| 3078 | do { | ||
| 3079 | get_bh(bh); | ||
| 3080 | lock_buffer(bh); | ||
| 3081 | bh = bh->b_this_page; | ||
| 3082 | |||
| 3083 | } while (bh != head); | ||
| 3084 | |||
| 3085 | ClearPagePrivate(page); | ||
| 3086 | set_page_private(newpage, page_private(page)); | ||
| 3087 | set_page_private(page, 0); | ||
| 3088 | put_page(page); | ||
| 3089 | get_page(newpage); | ||
| 3090 | |||
| 3091 | bh = head; | ||
| 3092 | do { | ||
| 3093 | set_bh_page(bh, newpage, bh_offset(bh)); | ||
| 3094 | bh = bh->b_this_page; | ||
| 3095 | |||
| 3096 | } while (bh != head); | ||
| 3097 | |||
| 3098 | SetPagePrivate(newpage); | ||
| 3099 | |||
| 3100 | migrate_page_copy(newpage, page); | ||
| 3101 | |||
| 3102 | bh = head; | ||
| 3103 | do { | ||
| 3104 | unlock_buffer(bh); | ||
| 3105 | put_bh(bh); | ||
| 3106 | bh = bh->b_this_page; | ||
| 3107 | |||
| 3108 | } while (bh != head); | ||
| 3109 | |||
| 3110 | return 0; | ||
| 3111 | } | ||
| 3112 | EXPORT_SYMBOL(buffer_migrate_page); | ||
| 3113 | #endif | ||
| 3114 | |||
| 3115 | /* | ||
| 3116 | * Buffer-head allocation | 3054 | * Buffer-head allocation |
| 3117 | */ | 3055 | */ |
| 3118 | static kmem_cache_t *bh_cachep; | 3056 | static kmem_cache_t *bh_cachep; |
diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c index b35195289945..25fa8bba8cb5 100644 --- a/fs/hugetlbfs/inode.c +++ b/fs/hugetlbfs/inode.c | |||
| @@ -56,48 +56,10 @@ static void huge_pagevec_release(struct pagevec *pvec) | |||
| 56 | pagevec_reinit(pvec); | 56 | pagevec_reinit(pvec); |
| 57 | } | 57 | } |
| 58 | 58 | ||
| 59 | /* | ||
| 60 | * huge_pages_needed tries to determine the number of new huge pages that | ||
| 61 | * will be required to fully populate this VMA. This will be equal to | ||
| 62 | * the size of the VMA in huge pages minus the number of huge pages | ||
| 63 | * (covered by this VMA) that are found in the page cache. | ||
| 64 | * | ||
| 65 | * Result is in bytes to be compatible with is_hugepage_mem_enough() | ||
| 66 | */ | ||
| 67 | static unsigned long | ||
| 68 | huge_pages_needed(struct address_space *mapping, struct vm_area_struct *vma) | ||
| 69 | { | ||
| 70 | int i; | ||
| 71 | struct pagevec pvec; | ||
| 72 | unsigned long start = vma->vm_start; | ||
| 73 | unsigned long end = vma->vm_end; | ||
| 74 | unsigned long hugepages = (end - start) >> HPAGE_SHIFT; | ||
| 75 | pgoff_t next = vma->vm_pgoff >> (HPAGE_SHIFT - PAGE_SHIFT); | ||
| 76 | pgoff_t endpg = next + hugepages; | ||
| 77 | |||
| 78 | pagevec_init(&pvec, 0); | ||
| 79 | while (next < endpg) { | ||
| 80 | if (!pagevec_lookup(&pvec, mapping, next, PAGEVEC_SIZE)) | ||
| 81 | break; | ||
| 82 | for (i = 0; i < pagevec_count(&pvec); i++) { | ||
| 83 | struct page *page = pvec.pages[i]; | ||
| 84 | if (page->index > next) | ||
| 85 | next = page->index; | ||
| 86 | if (page->index >= endpg) | ||
| 87 | break; | ||
| 88 | next++; | ||
| 89 | hugepages--; | ||
| 90 | } | ||
| 91 | huge_pagevec_release(&pvec); | ||
| 92 | } | ||
| 93 | return hugepages << HPAGE_SHIFT; | ||
| 94 | } | ||
| 95 | |||
| 96 | static int hugetlbfs_file_mmap(struct file *file, struct vm_area_struct *vma) | 59 | static int hugetlbfs_file_mmap(struct file *file, struct vm_area_struct *vma) |
| 97 | { | 60 | { |
| 98 | struct inode *inode = file->f_dentry->d_inode; | 61 | struct inode *inode = file->f_dentry->d_inode; |
| 99 | struct address_space *mapping = inode->i_mapping; | 62 | struct hugetlbfs_inode_info *info = HUGETLBFS_I(inode); |
| 100 | unsigned long bytes; | ||
| 101 | loff_t len, vma_len; | 63 | loff_t len, vma_len; |
| 102 | int ret; | 64 | int ret; |
| 103 | 65 | ||
| @@ -113,10 +75,6 @@ static int hugetlbfs_file_mmap(struct file *file, struct vm_area_struct *vma) | |||
| 113 | if (vma->vm_end - vma->vm_start < HPAGE_SIZE) | 75 | if (vma->vm_end - vma->vm_start < HPAGE_SIZE) |
| 114 | return -EINVAL; | 76 | return -EINVAL; |
| 115 | 77 | ||
| 116 | bytes = huge_pages_needed(mapping, vma); | ||
| 117 | if (!is_hugepage_mem_enough(bytes)) | ||
| 118 | return -ENOMEM; | ||
| 119 | |||
| 120 | vma_len = (loff_t)(vma->vm_end - vma->vm_start); | 78 | vma_len = (loff_t)(vma->vm_end - vma->vm_start); |
| 121 | 79 | ||
| 122 | mutex_lock(&inode->i_mutex); | 80 | mutex_lock(&inode->i_mutex); |
| @@ -129,6 +87,10 @@ static int hugetlbfs_file_mmap(struct file *file, struct vm_area_struct *vma) | |||
| 129 | if (!(vma->vm_flags & VM_WRITE) && len > inode->i_size) | 87 | if (!(vma->vm_flags & VM_WRITE) && len > inode->i_size) |
| 130 | goto out; | 88 | goto out; |
| 131 | 89 | ||
| 90 | if (vma->vm_flags & VM_MAYSHARE) | ||
| 91 | if (hugetlb_extend_reservation(info, len >> HPAGE_SHIFT) != 0) | ||
| 92 | goto out; | ||
| 93 | |||
| 132 | ret = 0; | 94 | ret = 0; |
| 133 | hugetlb_prefault_arch_hook(vma->vm_mm); | 95 | hugetlb_prefault_arch_hook(vma->vm_mm); |
| 134 | if (inode->i_size < len) | 96 | if (inode->i_size < len) |
| @@ -227,13 +189,18 @@ static void truncate_huge_page(struct page *page) | |||
| 227 | put_page(page); | 189 | put_page(page); |
| 228 | } | 190 | } |
| 229 | 191 | ||
| 230 | static void truncate_hugepages(struct address_space *mapping, loff_t lstart) | 192 | static void truncate_hugepages(struct inode *inode, loff_t lstart) |
| 231 | { | 193 | { |
| 194 | struct address_space *mapping = &inode->i_data; | ||
| 232 | const pgoff_t start = lstart >> HPAGE_SHIFT; | 195 | const pgoff_t start = lstart >> HPAGE_SHIFT; |
| 233 | struct pagevec pvec; | 196 | struct pagevec pvec; |
| 234 | pgoff_t next; | 197 | pgoff_t next; |
| 235 | int i; | 198 | int i; |
| 236 | 199 | ||
| 200 | hugetlb_truncate_reservation(HUGETLBFS_I(inode), | ||
| 201 | lstart >> HPAGE_SHIFT); | ||
| 202 | if (!mapping->nrpages) | ||
| 203 | return; | ||
| 237 | pagevec_init(&pvec, 0); | 204 | pagevec_init(&pvec, 0); |
| 238 | next = start; | 205 | next = start; |
| 239 | while (1) { | 206 | while (1) { |
| @@ -262,8 +229,7 @@ static void truncate_hugepages(struct address_space *mapping, loff_t lstart) | |||
| 262 | 229 | ||
| 263 | static void hugetlbfs_delete_inode(struct inode *inode) | 230 | static void hugetlbfs_delete_inode(struct inode *inode) |
| 264 | { | 231 | { |
| 265 | if (inode->i_data.nrpages) | 232 | truncate_hugepages(inode, 0); |
| 266 | truncate_hugepages(&inode->i_data, 0); | ||
| 267 | clear_inode(inode); | 233 | clear_inode(inode); |
| 268 | } | 234 | } |
| 269 | 235 | ||
| @@ -296,8 +262,7 @@ static void hugetlbfs_forget_inode(struct inode *inode) | |||
| 296 | inode->i_state |= I_FREEING; | 262 | inode->i_state |= I_FREEING; |
| 297 | inodes_stat.nr_inodes--; | 263 | inodes_stat.nr_inodes--; |
| 298 | spin_unlock(&inode_lock); | 264 | spin_unlock(&inode_lock); |
| 299 | if (inode->i_data.nrpages) | 265 | truncate_hugepages(inode, 0); |
| 300 | truncate_hugepages(&inode->i_data, 0); | ||
| 301 | clear_inode(inode); | 266 | clear_inode(inode); |
| 302 | destroy_inode(inode); | 267 | destroy_inode(inode); |
| 303 | } | 268 | } |
| @@ -356,7 +321,7 @@ static int hugetlb_vmtruncate(struct inode *inode, loff_t offset) | |||
| 356 | if (!prio_tree_empty(&mapping->i_mmap)) | 321 | if (!prio_tree_empty(&mapping->i_mmap)) |
| 357 | hugetlb_vmtruncate_list(&mapping->i_mmap, pgoff); | 322 | hugetlb_vmtruncate_list(&mapping->i_mmap, pgoff); |
| 358 | spin_unlock(&mapping->i_mmap_lock); | 323 | spin_unlock(&mapping->i_mmap_lock); |
| 359 | truncate_hugepages(mapping, offset); | 324 | truncate_hugepages(inode, offset); |
| 360 | return 0; | 325 | return 0; |
| 361 | } | 326 | } |
| 362 | 327 | ||
| @@ -573,6 +538,7 @@ static struct inode *hugetlbfs_alloc_inode(struct super_block *sb) | |||
| 573 | hugetlbfs_inc_free_inodes(sbinfo); | 538 | hugetlbfs_inc_free_inodes(sbinfo); |
| 574 | return NULL; | 539 | return NULL; |
| 575 | } | 540 | } |
| 541 | p->prereserved_hpages = 0; | ||
| 576 | return &p->vfs_inode; | 542 | return &p->vfs_inode; |
| 577 | } | 543 | } |
| 578 | 544 | ||
| @@ -771,21 +737,6 @@ static struct file_system_type hugetlbfs_fs_type = { | |||
| 771 | 737 | ||
| 772 | static struct vfsmount *hugetlbfs_vfsmount; | 738 | static struct vfsmount *hugetlbfs_vfsmount; |
| 773 | 739 | ||
| 774 | /* | ||
| 775 | * Return the next identifier for a shm file | ||
| 776 | */ | ||
| 777 | static unsigned long hugetlbfs_counter(void) | ||
| 778 | { | ||
| 779 | static DEFINE_SPINLOCK(lock); | ||
| 780 | static unsigned long counter; | ||
| 781 | unsigned long ret; | ||
| 782 | |||
| 783 | spin_lock(&lock); | ||
| 784 | ret = ++counter; | ||
| 785 | spin_unlock(&lock); | ||
| 786 | return ret; | ||
| 787 | } | ||
| 788 | |||
| 789 | static int can_do_hugetlb_shm(void) | 740 | static int can_do_hugetlb_shm(void) |
| 790 | { | 741 | { |
| 791 | return likely(capable(CAP_IPC_LOCK) || | 742 | return likely(capable(CAP_IPC_LOCK) || |
| @@ -801,18 +752,16 @@ struct file *hugetlb_zero_setup(size_t size) | |||
| 801 | struct dentry *dentry, *root; | 752 | struct dentry *dentry, *root; |
| 802 | struct qstr quick_string; | 753 | struct qstr quick_string; |
| 803 | char buf[16]; | 754 | char buf[16]; |
| 755 | static atomic_t counter; | ||
| 804 | 756 | ||
| 805 | if (!can_do_hugetlb_shm()) | 757 | if (!can_do_hugetlb_shm()) |
| 806 | return ERR_PTR(-EPERM); | 758 | return ERR_PTR(-EPERM); |
| 807 | 759 | ||
| 808 | if (!is_hugepage_mem_enough(size)) | ||
| 809 | return ERR_PTR(-ENOMEM); | ||
| 810 | |||
| 811 | if (!user_shm_lock(size, current->user)) | 760 | if (!user_shm_lock(size, current->user)) |
| 812 | return ERR_PTR(-ENOMEM); | 761 | return ERR_PTR(-ENOMEM); |
| 813 | 762 | ||
| 814 | root = hugetlbfs_vfsmount->mnt_root; | 763 | root = hugetlbfs_vfsmount->mnt_root; |
| 815 | snprintf(buf, 16, "%lu", hugetlbfs_counter()); | 764 | snprintf(buf, 16, "%u", atomic_inc_return(&counter)); |
| 816 | quick_string.name = buf; | 765 | quick_string.name = buf; |
| 817 | quick_string.len = strlen(quick_string.name); | 766 | quick_string.len = strlen(quick_string.name); |
| 818 | quick_string.hash = 0; | 767 | quick_string.hash = 0; |
| @@ -831,6 +780,11 @@ struct file *hugetlb_zero_setup(size_t size) | |||
| 831 | if (!inode) | 780 | if (!inode) |
| 832 | goto out_file; | 781 | goto out_file; |
| 833 | 782 | ||
| 783 | error = -ENOMEM; | ||
| 784 | if (hugetlb_extend_reservation(HUGETLBFS_I(inode), | ||
| 785 | size >> HPAGE_SHIFT) != 0) | ||
| 786 | goto out_inode; | ||
| 787 | |||
| 834 | d_instantiate(dentry, inode); | 788 | d_instantiate(dentry, inode); |
| 835 | inode->i_size = size; | 789 | inode->i_size = size; |
| 836 | inode->i_nlink = 0; | 790 | inode->i_nlink = 0; |
| @@ -841,6 +795,8 @@ struct file *hugetlb_zero_setup(size_t size) | |||
| 841 | file->f_mode = FMODE_WRITE | FMODE_READ; | 795 | file->f_mode = FMODE_WRITE | FMODE_READ; |
| 842 | return file; | 796 | return file; |
| 843 | 797 | ||
| 798 | out_inode: | ||
| 799 | iput(inode); | ||
| 844 | out_file: | 800 | out_file: |
| 845 | put_filp(file); | 801 | put_filp(file); |
| 846 | out_dentry: | 802 | out_dentry: |
diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c index 8dd3aafec499..09e1c57a86a0 100644 --- a/fs/ocfs2/super.c +++ b/fs/ocfs2/super.c | |||
| @@ -959,7 +959,7 @@ static int ocfs2_initialize_mem_caches(void) | |||
| 959 | ocfs2_lock_cache = kmem_cache_create("ocfs2_lock", | 959 | ocfs2_lock_cache = kmem_cache_create("ocfs2_lock", |
| 960 | sizeof(struct ocfs2_journal_lock), | 960 | sizeof(struct ocfs2_journal_lock), |
| 961 | 0, | 961 | 0, |
| 962 | SLAB_NO_REAP|SLAB_HWCACHE_ALIGN, | 962 | SLAB_HWCACHE_ALIGN, |
| 963 | NULL, NULL); | 963 | NULL, NULL); |
| 964 | if (!ocfs2_lock_cache) | 964 | if (!ocfs2_lock_cache) |
| 965 | return -ENOMEM; | 965 | return -ENOMEM; |
diff --git a/fs/ramfs/file-nommu.c b/fs/ramfs/file-nommu.c index 3f810acd0bfa..b1ca234068f6 100644 --- a/fs/ramfs/file-nommu.c +++ b/fs/ramfs/file-nommu.c | |||
| @@ -87,8 +87,7 @@ static int ramfs_nommu_expand_for_mapping(struct inode *inode, size_t newsize) | |||
| 87 | xpages = 1UL << order; | 87 | xpages = 1UL << order; |
| 88 | npages = (newsize + PAGE_SIZE - 1) >> PAGE_SHIFT; | 88 | npages = (newsize + PAGE_SIZE - 1) >> PAGE_SHIFT; |
| 89 | 89 | ||
| 90 | for (loop = 0; loop < npages; loop++) | 90 | split_page(pages, order); |
| 91 | set_page_count(pages + loop, 1); | ||
| 92 | 91 | ||
| 93 | /* trim off any pages we don't actually require */ | 92 | /* trim off any pages we don't actually require */ |
| 94 | for (loop = npages; loop < xpages; loop++) | 93 | for (loop = npages; loop < xpages; loop++) |
diff --git a/fs/xfs/linux-2.6/xfs_buf.c b/fs/xfs/linux-2.6/xfs_buf.c index bfb4f2917bb6..8cdfa4151659 100644 --- a/fs/xfs/linux-2.6/xfs_buf.c +++ b/fs/xfs/linux-2.6/xfs_buf.c | |||
| @@ -29,6 +29,7 @@ | |||
| 29 | #include <linux/blkdev.h> | 29 | #include <linux/blkdev.h> |
| 30 | #include <linux/hash.h> | 30 | #include <linux/hash.h> |
| 31 | #include <linux/kthread.h> | 31 | #include <linux/kthread.h> |
| 32 | #include <linux/migrate.h> | ||
| 32 | #include "xfs_linux.h" | 33 | #include "xfs_linux.h" |
| 33 | 34 | ||
| 34 | STATIC kmem_zone_t *xfs_buf_zone; | 35 | STATIC kmem_zone_t *xfs_buf_zone; |
diff --git a/include/asm-i386/acpi.h b/include/asm-i386/acpi.h index 55059abf9c95..20f523954218 100644 --- a/include/asm-i386/acpi.h +++ b/include/asm-i386/acpi.h | |||
| @@ -103,6 +103,12 @@ __acpi_release_global_lock (unsigned int *lock) | |||
| 103 | :"=r"(n_hi), "=r"(n_lo) \ | 103 | :"=r"(n_hi), "=r"(n_lo) \ |
| 104 | :"0"(n_hi), "1"(n_lo)) | 104 | :"0"(n_hi), "1"(n_lo)) |
| 105 | 105 | ||
| 106 | #ifdef CONFIG_X86_IO_APIC | ||
| 107 | extern void check_acpi_pci(void); | ||
| 108 | #else | ||
| 109 | static inline void check_acpi_pci(void) { } | ||
| 110 | #endif | ||
| 111 | |||
| 106 | #ifdef CONFIG_ACPI | 112 | #ifdef CONFIG_ACPI |
| 107 | extern int acpi_lapic; | 113 | extern int acpi_lapic; |
| 108 | extern int acpi_ioapic; | 114 | extern int acpi_ioapic; |
| @@ -128,8 +134,6 @@ extern int acpi_gsi_to_irq(u32 gsi, unsigned int *irq); | |||
| 128 | extern int skip_ioapic_setup; | 134 | extern int skip_ioapic_setup; |
| 129 | extern int acpi_skip_timer_override; | 135 | extern int acpi_skip_timer_override; |
| 130 | 136 | ||
| 131 | extern void check_acpi_pci(void); | ||
| 132 | |||
| 133 | static inline void disable_ioapic_setup(void) | 137 | static inline void disable_ioapic_setup(void) |
| 134 | { | 138 | { |
| 135 | skip_ioapic_setup = 1; | 139 | skip_ioapic_setup = 1; |
| @@ -142,8 +146,6 @@ static inline int ioapic_setup_disabled(void) | |||
| 142 | 146 | ||
| 143 | #else | 147 | #else |
| 144 | static inline void disable_ioapic_setup(void) { } | 148 | static inline void disable_ioapic_setup(void) { } |
| 145 | static inline void check_acpi_pci(void) { } | ||
| 146 | |||
| 147 | #endif | 149 | #endif |
| 148 | 150 | ||
| 149 | static inline void acpi_noirq_set(void) { acpi_noirq = 1; } | 151 | static inline void acpi_noirq_set(void) { acpi_noirq = 1; } |
diff --git a/include/asm-i386/pgtable.h b/include/asm-i386/pgtable.h index 088a945bf26b..ee056c41a9fb 100644 --- a/include/asm-i386/pgtable.h +++ b/include/asm-i386/pgtable.h | |||
| @@ -219,13 +219,12 @@ extern unsigned long pg0[]; | |||
| 219 | * The following only work if pte_present() is true. | 219 | * The following only work if pte_present() is true. |
| 220 | * Undefined behaviour if not.. | 220 | * Undefined behaviour if not.. |
| 221 | */ | 221 | */ |
| 222 | #define __LARGE_PTE (_PAGE_PSE | _PAGE_PRESENT) | ||
| 223 | static inline int pte_user(pte_t pte) { return (pte).pte_low & _PAGE_USER; } | 222 | static inline int pte_user(pte_t pte) { return (pte).pte_low & _PAGE_USER; } |
| 224 | static inline int pte_read(pte_t pte) { return (pte).pte_low & _PAGE_USER; } | 223 | static inline int pte_read(pte_t pte) { return (pte).pte_low & _PAGE_USER; } |
| 225 | static inline int pte_dirty(pte_t pte) { return (pte).pte_low & _PAGE_DIRTY; } | 224 | static inline int pte_dirty(pte_t pte) { return (pte).pte_low & _PAGE_DIRTY; } |
| 226 | static inline int pte_young(pte_t pte) { return (pte).pte_low & _PAGE_ACCESSED; } | 225 | static inline int pte_young(pte_t pte) { return (pte).pte_low & _PAGE_ACCESSED; } |
| 227 | static inline int pte_write(pte_t pte) { return (pte).pte_low & _PAGE_RW; } | 226 | static inline int pte_write(pte_t pte) { return (pte).pte_low & _PAGE_RW; } |
| 228 | static inline int pte_huge(pte_t pte) { return ((pte).pte_low & __LARGE_PTE) == __LARGE_PTE; } | 227 | static inline int pte_huge(pte_t pte) { return (pte).pte_low & _PAGE_PSE; } |
| 229 | 228 | ||
| 230 | /* | 229 | /* |
| 231 | * The following only works if pte_present() is not true. | 230 | * The following only works if pte_present() is not true. |
| @@ -242,7 +241,7 @@ static inline pte_t pte_mkexec(pte_t pte) { (pte).pte_low |= _PAGE_USER; return | |||
| 242 | static inline pte_t pte_mkdirty(pte_t pte) { (pte).pte_low |= _PAGE_DIRTY; return pte; } | 241 | static inline pte_t pte_mkdirty(pte_t pte) { (pte).pte_low |= _PAGE_DIRTY; return pte; } |
| 243 | static inline pte_t pte_mkyoung(pte_t pte) { (pte).pte_low |= _PAGE_ACCESSED; return pte; } | 242 | static inline pte_t pte_mkyoung(pte_t pte) { (pte).pte_low |= _PAGE_ACCESSED; return pte; } |
| 244 | static inline pte_t pte_mkwrite(pte_t pte) { (pte).pte_low |= _PAGE_RW; return pte; } | 243 | static inline pte_t pte_mkwrite(pte_t pte) { (pte).pte_low |= _PAGE_RW; return pte; } |
| 245 | static inline pte_t pte_mkhuge(pte_t pte) { (pte).pte_low |= __LARGE_PTE; return pte; } | 244 | static inline pte_t pte_mkhuge(pte_t pte) { (pte).pte_low |= _PAGE_PSE; return pte; } |
| 246 | 245 | ||
| 247 | #ifdef CONFIG_X86_PAE | 246 | #ifdef CONFIG_X86_PAE |
| 248 | # include <asm/pgtable-3level.h> | 247 | # include <asm/pgtable-3level.h> |
diff --git a/include/asm-ia64/intel_intrin.h b/include/asm-ia64/intel_intrin.h index a7122d850177..d069b6acddce 100644 --- a/include/asm-ia64/intel_intrin.h +++ b/include/asm-ia64/intel_intrin.h | |||
| @@ -5,113 +5,10 @@ | |||
| 5 | * | 5 | * |
| 6 | * Copyright (C) 2002,2003 Jun Nakajima <jun.nakajima@intel.com> | 6 | * Copyright (C) 2002,2003 Jun Nakajima <jun.nakajima@intel.com> |
| 7 | * Copyright (C) 2002,2003 Suresh Siddha <suresh.b.siddha@intel.com> | 7 | * Copyright (C) 2002,2003 Suresh Siddha <suresh.b.siddha@intel.com> |
| 8 | * Copyright (C) 2005,2006 Hongjiu Lu <hongjiu.lu@intel.com> | ||
| 8 | * | 9 | * |
| 9 | */ | 10 | */ |
| 10 | #include <asm/types.h> | 11 | #include <ia64intrin.h> |
| 11 | |||
| 12 | void __lfetch(int lfhint, void *y); | ||
| 13 | void __lfetch_excl(int lfhint, void *y); | ||
| 14 | void __lfetch_fault(int lfhint, void *y); | ||
| 15 | void __lfetch_fault_excl(int lfhint, void *y); | ||
| 16 | |||
| 17 | /* In the following, whichFloatReg should be an integer from 0-127 */ | ||
| 18 | void __ldfs(const int whichFloatReg, void *src); | ||
| 19 | void __ldfd(const int whichFloatReg, void *src); | ||
| 20 | void __ldfe(const int whichFloatReg, void *src); | ||
| 21 | void __ldf8(const int whichFloatReg, void *src); | ||
| 22 | void __ldf_fill(const int whichFloatReg, void *src); | ||
| 23 | void __stfs(void *dst, const int whichFloatReg); | ||
| 24 | void __stfd(void *dst, const int whichFloatReg); | ||
| 25 | void __stfe(void *dst, const int whichFloatReg); | ||
| 26 | void __stf8(void *dst, const int whichFloatReg); | ||
| 27 | void __stf_spill(void *dst, const int whichFloatReg); | ||
| 28 | |||
| 29 | void __st1_rel(void *dst, const __s8 value); | ||
| 30 | void __st2_rel(void *dst, const __s16 value); | ||
| 31 | void __st4_rel(void *dst, const __s32 value); | ||
| 32 | void __st8_rel(void *dst, const __s64 value); | ||
| 33 | __u8 __ld1_acq(void *src); | ||
| 34 | __u16 __ld2_acq(void *src); | ||
| 35 | __u32 __ld4_acq(void *src); | ||
| 36 | __u64 __ld8_acq(void *src); | ||
| 37 | |||
| 38 | __u64 __fetchadd4_acq(__u32 *addend, const int increment); | ||
| 39 | __u64 __fetchadd4_rel(__u32 *addend, const int increment); | ||
| 40 | __u64 __fetchadd8_acq(__u64 *addend, const int increment); | ||
| 41 | __u64 __fetchadd8_rel(__u64 *addend, const int increment); | ||
| 42 | |||
| 43 | __u64 __getf_exp(double d); | ||
| 44 | |||
| 45 | /* OS Related Itanium(R) Intrinsics */ | ||
| 46 | |||
| 47 | /* The names to use for whichReg and whichIndReg below come from | ||
| 48 | the include file asm/ia64regs.h */ | ||
| 49 | |||
| 50 | __u64 __getIndReg(const int whichIndReg, __s64 index); | ||
| 51 | __u64 __getReg(const int whichReg); | ||
| 52 | |||
| 53 | void __setIndReg(const int whichIndReg, __s64 index, __u64 value); | ||
| 54 | void __setReg(const int whichReg, __u64 value); | ||
| 55 | |||
| 56 | void __mf(void); | ||
| 57 | void __mfa(void); | ||
| 58 | void __synci(void); | ||
| 59 | void __itcd(__s64 pa); | ||
| 60 | void __itci(__s64 pa); | ||
| 61 | void __itrd(__s64 whichTransReg, __s64 pa); | ||
| 62 | void __itri(__s64 whichTransReg, __s64 pa); | ||
| 63 | void __ptce(__s64 va); | ||
| 64 | void __ptcl(__s64 va, __s64 pagesz); | ||
| 65 | void __ptcg(__s64 va, __s64 pagesz); | ||
| 66 | void __ptcga(__s64 va, __s64 pagesz); | ||
| 67 | void __ptri(__s64 va, __s64 pagesz); | ||
| 68 | void __ptrd(__s64 va, __s64 pagesz); | ||
| 69 | void __invala (void); | ||
| 70 | void __invala_gr(const int whichGeneralReg /* 0-127 */ ); | ||
| 71 | void __invala_fr(const int whichFloatReg /* 0-127 */ ); | ||
| 72 | void __nop(const int); | ||
| 73 | void __fc(__u64 *addr); | ||
| 74 | void __sum(int mask); | ||
| 75 | void __rum(int mask); | ||
| 76 | void __ssm(int mask); | ||
| 77 | void __rsm(int mask); | ||
| 78 | __u64 __thash(__s64); | ||
| 79 | __u64 __ttag(__s64); | ||
| 80 | __s64 __tpa(__s64); | ||
| 81 | |||
| 82 | /* Intrinsics for implementing get/put_user macros */ | ||
| 83 | void __st_user(const char *tableName, __u64 addr, char size, char relocType, __u64 val); | ||
| 84 | void __ld_user(const char *tableName, __u64 addr, char size, char relocType); | ||
| 85 | |||
| 86 | /* This intrinsic does not generate code, it creates a barrier across which | ||
| 87 | * the compiler will not schedule data access instructions. | ||
| 88 | */ | ||
| 89 | void __memory_barrier(void); | ||
| 90 | |||
| 91 | void __isrlz(void); | ||
| 92 | void __dsrlz(void); | ||
| 93 | |||
| 94 | __u64 _m64_mux1(__u64 a, const int n); | ||
| 95 | __u64 __thash(__u64); | ||
| 96 | |||
| 97 | /* Lock and Atomic Operation Related Intrinsics */ | ||
| 98 | __u64 _InterlockedExchange8(volatile __u8 *trgt, __u8 value); | ||
| 99 | __u64 _InterlockedExchange16(volatile __u16 *trgt, __u16 value); | ||
| 100 | __s64 _InterlockedExchange(volatile __u32 *trgt, __u32 value); | ||
| 101 | __s64 _InterlockedExchange64(volatile __u64 *trgt, __u64 value); | ||
| 102 | |||
| 103 | __u64 _InterlockedCompareExchange8_rel(volatile __u8 *dest, __u64 xchg, __u64 comp); | ||
| 104 | __u64 _InterlockedCompareExchange8_acq(volatile __u8 *dest, __u64 xchg, __u64 comp); | ||
| 105 | __u64 _InterlockedCompareExchange16_rel(volatile __u16 *dest, __u64 xchg, __u64 comp); | ||
| 106 | __u64 _InterlockedCompareExchange16_acq(volatile __u16 *dest, __u64 xchg, __u64 comp); | ||
| 107 | __u64 _InterlockedCompareExchange_rel(volatile __u32 *dest, __u64 xchg, __u64 comp); | ||
| 108 | __u64 _InterlockedCompareExchange_acq(volatile __u32 *dest, __u64 xchg, __u64 comp); | ||
| 109 | __u64 _InterlockedCompareExchange64_rel(volatile __u64 *dest, __u64 xchg, __u64 comp); | ||
| 110 | __u64 _InterlockedCompareExchange64_acq(volatile __u64 *dest, __u64 xchg, __u64 comp); | ||
| 111 | |||
| 112 | __s64 _m64_dep_mi(const int v, __s64 s, const int p, const int len); | ||
| 113 | __s64 _m64_shrp(__s64 a, __s64 b, const int count); | ||
| 114 | __s64 _m64_popcnt(__s64 a); | ||
| 115 | 12 | ||
| 116 | #define ia64_barrier() __memory_barrier() | 13 | #define ia64_barrier() __memory_barrier() |
| 117 | 14 | ||
| @@ -122,15 +19,16 @@ __s64 _m64_popcnt(__s64 a); | |||
| 122 | #define ia64_getreg __getReg | 19 | #define ia64_getreg __getReg |
| 123 | #define ia64_setreg __setReg | 20 | #define ia64_setreg __setReg |
| 124 | 21 | ||
| 125 | #define ia64_hint(x) | 22 | #define ia64_hint __hint |
| 23 | #define ia64_hint_pause __hint_pause | ||
| 126 | 24 | ||
| 127 | #define ia64_mux1_brcst 0 | 25 | #define ia64_mux1_brcst _m64_mux1_brcst |
| 128 | #define ia64_mux1_mix 8 | 26 | #define ia64_mux1_mix _m64_mux1_mix |
| 129 | #define ia64_mux1_shuf 9 | 27 | #define ia64_mux1_shuf _m64_mux1_shuf |
| 130 | #define ia64_mux1_alt 10 | 28 | #define ia64_mux1_alt _m64_mux1_alt |
| 131 | #define ia64_mux1_rev 11 | 29 | #define ia64_mux1_rev _m64_mux1_rev |
| 132 | 30 | ||
| 133 | #define ia64_mux1 _m64_mux1 | 31 | #define ia64_mux1(x,v) _m_to_int64(_m64_mux1(_m_from_int64(x), (v))) |
| 134 | #define ia64_popcnt _m64_popcnt | 32 | #define ia64_popcnt _m64_popcnt |
| 135 | #define ia64_getf_exp __getf_exp | 33 | #define ia64_getf_exp __getf_exp |
| 136 | #define ia64_shrp _m64_shrp | 34 | #define ia64_shrp _m64_shrp |
| @@ -158,7 +56,7 @@ __s64 _m64_popcnt(__s64 a); | |||
| 158 | #define ia64_stf8 __stf8 | 56 | #define ia64_stf8 __stf8 |
| 159 | #define ia64_stf_spill __stf_spill | 57 | #define ia64_stf_spill __stf_spill |
| 160 | 58 | ||
| 161 | #define ia64_mf __mf | 59 | #define ia64_mf __mf |
| 162 | #define ia64_mfa __mfa | 60 | #define ia64_mfa __mfa |
| 163 | 61 | ||
| 164 | #define ia64_fetchadd4_acq __fetchadd4_acq | 62 | #define ia64_fetchadd4_acq __fetchadd4_acq |
| @@ -234,10 +132,10 @@ __s64 _m64_popcnt(__s64 a); | |||
| 234 | 132 | ||
| 235 | /* Values for lfhint in __lfetch and __lfetch_fault */ | 133 | /* Values for lfhint in __lfetch and __lfetch_fault */ |
| 236 | 134 | ||
| 237 | #define ia64_lfhint_none 0 | 135 | #define ia64_lfhint_none __lfhint_none |
| 238 | #define ia64_lfhint_nt1 1 | 136 | #define ia64_lfhint_nt1 __lfhint_nt1 |
| 239 | #define ia64_lfhint_nt2 2 | 137 | #define ia64_lfhint_nt2 __lfhint_nt2 |
| 240 | #define ia64_lfhint_nta 3 | 138 | #define ia64_lfhint_nta __lfhint_nta |
| 241 | 139 | ||
| 242 | #define ia64_lfetch __lfetch | 140 | #define ia64_lfetch __lfetch |
| 243 | #define ia64_lfetch_excl __lfetch_excl | 141 | #define ia64_lfetch_excl __lfetch_excl |
| @@ -254,4 +152,6 @@ do { \ | |||
| 254 | } \ | 152 | } \ |
| 255 | } while (0) | 153 | } while (0) |
| 256 | 154 | ||
| 155 | #define __builtin_trap() __break(0); | ||
| 156 | |||
| 257 | #endif /* _ASM_IA64_INTEL_INTRIN_H */ | 157 | #endif /* _ASM_IA64_INTEL_INTRIN_H */ |
diff --git a/include/asm-ia64/machvec.h b/include/asm-ia64/machvec.h index ca5ea994d688..c3e4ed8a3e17 100644 --- a/include/asm-ia64/machvec.h +++ b/include/asm-ia64/machvec.h | |||
| @@ -20,6 +20,7 @@ struct scatterlist; | |||
| 20 | struct page; | 20 | struct page; |
| 21 | struct mm_struct; | 21 | struct mm_struct; |
| 22 | struct pci_bus; | 22 | struct pci_bus; |
| 23 | struct task_struct; | ||
| 23 | 24 | ||
| 24 | typedef void ia64_mv_setup_t (char **); | 25 | typedef void ia64_mv_setup_t (char **); |
| 25 | typedef void ia64_mv_cpu_init_t (void); | 26 | typedef void ia64_mv_cpu_init_t (void); |
| @@ -34,6 +35,7 @@ typedef int ia64_mv_pci_legacy_read_t (struct pci_bus *, u16 port, u32 *val, | |||
| 34 | u8 size); | 35 | u8 size); |
| 35 | typedef int ia64_mv_pci_legacy_write_t (struct pci_bus *, u16 port, u32 val, | 36 | typedef int ia64_mv_pci_legacy_write_t (struct pci_bus *, u16 port, u32 val, |
| 36 | u8 size); | 37 | u8 size); |
| 38 | typedef void ia64_mv_migrate_t(struct task_struct * task); | ||
| 37 | 39 | ||
| 38 | /* DMA-mapping interface: */ | 40 | /* DMA-mapping interface: */ |
| 39 | typedef void ia64_mv_dma_init (void); | 41 | typedef void ia64_mv_dma_init (void); |
| @@ -85,6 +87,11 @@ machvec_noop_mm (struct mm_struct *mm) | |||
| 85 | { | 87 | { |
| 86 | } | 88 | } |
| 87 | 89 | ||
| 90 | static inline void | ||
| 91 | machvec_noop_task (struct task_struct *task) | ||
| 92 | { | ||
| 93 | } | ||
| 94 | |||
| 88 | extern void machvec_setup (char **); | 95 | extern void machvec_setup (char **); |
| 89 | extern void machvec_timer_interrupt (int, void *, struct pt_regs *); | 96 | extern void machvec_timer_interrupt (int, void *, struct pt_regs *); |
| 90 | extern void machvec_dma_sync_single (struct device *, dma_addr_t, size_t, int); | 97 | extern void machvec_dma_sync_single (struct device *, dma_addr_t, size_t, int); |
| @@ -146,6 +153,7 @@ extern void machvec_tlb_migrate_finish (struct mm_struct *); | |||
| 146 | # define platform_readw_relaxed ia64_mv.readw_relaxed | 153 | # define platform_readw_relaxed ia64_mv.readw_relaxed |
| 147 | # define platform_readl_relaxed ia64_mv.readl_relaxed | 154 | # define platform_readl_relaxed ia64_mv.readl_relaxed |
| 148 | # define platform_readq_relaxed ia64_mv.readq_relaxed | 155 | # define platform_readq_relaxed ia64_mv.readq_relaxed |
| 156 | # define platform_migrate ia64_mv.migrate | ||
| 149 | # endif | 157 | # endif |
| 150 | 158 | ||
| 151 | /* __attribute__((__aligned__(16))) is required to make size of the | 159 | /* __attribute__((__aligned__(16))) is required to make size of the |
| @@ -194,6 +202,7 @@ struct ia64_machine_vector { | |||
| 194 | ia64_mv_readw_relaxed_t *readw_relaxed; | 202 | ia64_mv_readw_relaxed_t *readw_relaxed; |
| 195 | ia64_mv_readl_relaxed_t *readl_relaxed; | 203 | ia64_mv_readl_relaxed_t *readl_relaxed; |
| 196 | ia64_mv_readq_relaxed_t *readq_relaxed; | 204 | ia64_mv_readq_relaxed_t *readq_relaxed; |
| 205 | ia64_mv_migrate_t *migrate; | ||
| 197 | } __attribute__((__aligned__(16))); /* align attrib? see above comment */ | 206 | } __attribute__((__aligned__(16))); /* align attrib? see above comment */ |
| 198 | 207 | ||
| 199 | #define MACHVEC_INIT(name) \ | 208 | #define MACHVEC_INIT(name) \ |
| @@ -238,6 +247,7 @@ struct ia64_machine_vector { | |||
| 238 | platform_readw_relaxed, \ | 247 | platform_readw_relaxed, \ |
| 239 | platform_readl_relaxed, \ | 248 | platform_readl_relaxed, \ |
| 240 | platform_readq_relaxed, \ | 249 | platform_readq_relaxed, \ |
| 250 | platform_migrate, \ | ||
| 241 | } | 251 | } |
| 242 | 252 | ||
| 243 | extern struct ia64_machine_vector ia64_mv; | 253 | extern struct ia64_machine_vector ia64_mv; |
| @@ -386,5 +396,8 @@ extern ia64_mv_dma_supported swiotlb_dma_supported; | |||
| 386 | #ifndef platform_readq_relaxed | 396 | #ifndef platform_readq_relaxed |
| 387 | # define platform_readq_relaxed __ia64_readq_relaxed | 397 | # define platform_readq_relaxed __ia64_readq_relaxed |
| 388 | #endif | 398 | #endif |
| 399 | #ifndef platform_migrate | ||
| 400 | # define platform_migrate machvec_noop_task | ||
| 401 | #endif | ||
| 389 | 402 | ||
| 390 | #endif /* _ASM_IA64_MACHVEC_H */ | 403 | #endif /* _ASM_IA64_MACHVEC_H */ |
diff --git a/include/asm-ia64/machvec_sn2.h b/include/asm-ia64/machvec_sn2.h index 03d00faf03b5..da1d43755afe 100644 --- a/include/asm-ia64/machvec_sn2.h +++ b/include/asm-ia64/machvec_sn2.h | |||
| @@ -1,5 +1,5 @@ | |||
| 1 | /* | 1 | /* |
| 2 | * Copyright (c) 2002-2003, 2006 Silicon Graphics, Inc. All Rights Reserved. | 2 | * Copyright (c) 2002-2003,2006 Silicon Graphics, Inc. All Rights Reserved. |
| 3 | * | 3 | * |
| 4 | * This program is free software; you can redistribute it and/or modify it | 4 | * This program is free software; you can redistribute it and/or modify it |
| 5 | * under the terms of version 2 of the GNU General Public License | 5 | * under the terms of version 2 of the GNU General Public License |
| @@ -66,6 +66,7 @@ extern ia64_mv_dma_sync_single_for_device sn_dma_sync_single_for_device; | |||
| 66 | extern ia64_mv_dma_sync_sg_for_device sn_dma_sync_sg_for_device; | 66 | extern ia64_mv_dma_sync_sg_for_device sn_dma_sync_sg_for_device; |
| 67 | extern ia64_mv_dma_mapping_error sn_dma_mapping_error; | 67 | extern ia64_mv_dma_mapping_error sn_dma_mapping_error; |
| 68 | extern ia64_mv_dma_supported sn_dma_supported; | 68 | extern ia64_mv_dma_supported sn_dma_supported; |
| 69 | extern ia64_mv_migrate_t sn_migrate; | ||
| 69 | 70 | ||
| 70 | /* | 71 | /* |
| 71 | * This stuff has dual use! | 72 | * This stuff has dual use! |
| @@ -115,6 +116,7 @@ extern ia64_mv_dma_supported sn_dma_supported; | |||
| 115 | #define platform_dma_sync_sg_for_device sn_dma_sync_sg_for_device | 116 | #define platform_dma_sync_sg_for_device sn_dma_sync_sg_for_device |
| 116 | #define platform_dma_mapping_error sn_dma_mapping_error | 117 | #define platform_dma_mapping_error sn_dma_mapping_error |
| 117 | #define platform_dma_supported sn_dma_supported | 118 | #define platform_dma_supported sn_dma_supported |
| 119 | #define platform_migrate sn_migrate | ||
| 118 | 120 | ||
| 119 | #include <asm/sn/io.h> | 121 | #include <asm/sn/io.h> |
| 120 | 122 | ||
diff --git a/include/asm-ia64/mca.h b/include/asm-ia64/mca.h index c7d9c9ed38ba..bfbbb8da79c7 100644 --- a/include/asm-ia64/mca.h +++ b/include/asm-ia64/mca.h | |||
| @@ -131,6 +131,8 @@ struct ia64_mca_cpu { | |||
| 131 | /* Array of physical addresses of each CPU's MCA area. */ | 131 | /* Array of physical addresses of each CPU's MCA area. */ |
| 132 | extern unsigned long __per_cpu_mca[NR_CPUS]; | 132 | extern unsigned long __per_cpu_mca[NR_CPUS]; |
| 133 | 133 | ||
| 134 | extern int cpe_vector; | ||
| 135 | extern int ia64_cpe_irq; | ||
| 134 | extern void ia64_mca_init(void); | 136 | extern void ia64_mca_init(void); |
| 135 | extern void ia64_mca_cpu_init(void *); | 137 | extern void ia64_mca_cpu_init(void *); |
| 136 | extern void ia64_os_mca_dispatch(void); | 138 | extern void ia64_os_mca_dispatch(void); |
diff --git a/include/asm-ia64/mutex.h b/include/asm-ia64/mutex.h index 458c1f7fbc18..5a3224f6af38 100644 --- a/include/asm-ia64/mutex.h +++ b/include/asm-ia64/mutex.h | |||
| @@ -1,9 +1,92 @@ | |||
| 1 | /* | 1 | /* |
| 2 | * Pull in the generic implementation for the mutex fastpath. | 2 | * ia64 implementation of the mutex fastpath. |
| 3 | * | 3 | * |
| 4 | * TODO: implement optimized primitives instead, or leave the generic | 4 | * Copyright (C) 2006 Ken Chen <kenneth.w.chen@intel.com> |
| 5 | * implementation in place, or pick the atomic_xchg() based generic | 5 | * |
| 6 | * implementation. (see asm-generic/mutex-xchg.h for details) | 6 | */ |
| 7 | |||
| 8 | #ifndef _ASM_MUTEX_H | ||
| 9 | #define _ASM_MUTEX_H | ||
| 10 | |||
| 11 | /** | ||
| 12 | * __mutex_fastpath_lock - try to take the lock by moving the count | ||
| 13 | * from 1 to a 0 value | ||
| 14 | * @count: pointer of type atomic_t | ||
| 15 | * @fail_fn: function to call if the original value was not 1 | ||
| 16 | * | ||
| 17 | * Change the count from 1 to a value lower than 1, and call <fail_fn> if | ||
| 18 | * it wasn't 1 originally. This function MUST leave the value lower than | ||
| 19 | * 1 even when the "1" assertion wasn't true. | ||
| 20 | */ | ||
| 21 | static inline void | ||
| 22 | __mutex_fastpath_lock(atomic_t *count, void (*fail_fn)(atomic_t *)) | ||
| 23 | { | ||
| 24 | if (unlikely(ia64_fetchadd4_acq(count, -1) != 1)) | ||
| 25 | fail_fn(count); | ||
| 26 | } | ||
| 27 | |||
| 28 | /** | ||
| 29 | * __mutex_fastpath_lock_retval - try to take the lock by moving the count | ||
| 30 | * from 1 to a 0 value | ||
| 31 | * @count: pointer of type atomic_t | ||
| 32 | * @fail_fn: function to call if the original value was not 1 | ||
| 33 | * | ||
| 34 | * Change the count from 1 to a value lower than 1, and call <fail_fn> if | ||
| 35 | * it wasn't 1 originally. This function returns 0 if the fastpath succeeds, | ||
| 36 | * or anything the slow path function returns. | ||
| 37 | */ | ||
| 38 | static inline int | ||
| 39 | __mutex_fastpath_lock_retval(atomic_t *count, int (*fail_fn)(atomic_t *)) | ||
| 40 | { | ||
| 41 | if (unlikely(ia64_fetchadd4_acq(count, -1) != 1)) | ||
| 42 | return fail_fn(count); | ||
| 43 | return 0; | ||
| 44 | } | ||
| 45 | |||
| 46 | /** | ||
| 47 | * __mutex_fastpath_unlock - try to promote the count from 0 to 1 | ||
| 48 | * @count: pointer of type atomic_t | ||
| 49 | * @fail_fn: function to call if the original value was not 0 | ||
| 50 | * | ||
| 51 | * Try to promote the count from 0 to 1. If it wasn't 0, call <fail_fn>. | ||
| 52 | * In the failure case, this function is allowed to either set the value to | ||
| 53 | * 1, or to set it to a value lower than 1. | ||
| 54 | * | ||
| 55 | * If the implementation sets it to a value of lower than 1, then the | ||
| 56 | * __mutex_slowpath_needs_to_unlock() macro needs to return 1, it needs | ||
| 57 | * to return 0 otherwise. | ||
| 58 | */ | ||
| 59 | static inline void | ||
| 60 | __mutex_fastpath_unlock(atomic_t *count, void (*fail_fn)(atomic_t *)) | ||
| 61 | { | ||
| 62 | int ret = ia64_fetchadd4_rel(count, 1); | ||
| 63 | if (unlikely(ret < 0)) | ||
| 64 | fail_fn(count); | ||
| 65 | } | ||
| 66 | |||
| 67 | #define __mutex_slowpath_needs_to_unlock() 1 | ||
| 68 | |||
| 69 | /** | ||
| 70 | * __mutex_fastpath_trylock - try to acquire the mutex, without waiting | ||
| 71 | * | ||
| 72 | * @count: pointer of type atomic_t | ||
| 73 | * @fail_fn: fallback function | ||
| 74 | * | ||
| 75 | * Change the count from 1 to a value lower than 1, and return 0 (failure) | ||
| 76 | * if it wasn't 1 originally, or return 1 (success) otherwise. This function | ||
| 77 | * MUST leave the value lower than 1 even when the "1" assertion wasn't true. | ||
| 78 | * Additionally, if the value was < 0 originally, this function must not leave | ||
| 79 | * it to 0 on failure. | ||
| 80 | * | ||
| 81 | * If the architecture has no effective trylock variant, it should call the | ||
| 82 | * <fail_fn> spinlock-based trylock variant unconditionally. | ||
| 7 | */ | 83 | */ |
| 84 | static inline int | ||
| 85 | __mutex_fastpath_trylock(atomic_t *count, int (*fail_fn)(atomic_t *)) | ||
| 86 | { | ||
| 87 | if (likely(cmpxchg_acq(count, 1, 0)) == 1) | ||
| 88 | return 1; | ||
| 89 | return 0; | ||
| 90 | } | ||
| 8 | 91 | ||
| 9 | #include <asm-generic/mutex-dec.h> | 92 | #endif |
diff --git a/include/asm-ia64/page.h b/include/asm-ia64/page.h index 5e6362a786b7..3ab27333dae4 100644 --- a/include/asm-ia64/page.h +++ b/include/asm-ia64/page.h | |||
| @@ -57,6 +57,8 @@ | |||
| 57 | 57 | ||
| 58 | # define HAVE_ARCH_HUGETLB_UNMAPPED_AREA | 58 | # define HAVE_ARCH_HUGETLB_UNMAPPED_AREA |
| 59 | # define ARCH_HAS_HUGEPAGE_ONLY_RANGE | 59 | # define ARCH_HAS_HUGEPAGE_ONLY_RANGE |
| 60 | # define ARCH_HAS_PREPARE_HUGEPAGE_RANGE | ||
| 61 | # define ARCH_HAS_HUGETLB_FREE_PGD_RANGE | ||
| 60 | #endif /* CONFIG_HUGETLB_PAGE */ | 62 | #endif /* CONFIG_HUGETLB_PAGE */ |
| 61 | 63 | ||
| 62 | #ifdef __ASSEMBLY__ | 64 | #ifdef __ASSEMBLY__ |
diff --git a/include/asm-ia64/pgtable.h b/include/asm-ia64/pgtable.h index e2560c58384b..c0f8144f2349 100644 --- a/include/asm-ia64/pgtable.h +++ b/include/asm-ia64/pgtable.h | |||
| @@ -314,7 +314,7 @@ ia64_phys_addr_valid (unsigned long addr) | |||
| 314 | #define pte_mkyoung(pte) (__pte(pte_val(pte) | _PAGE_A)) | 314 | #define pte_mkyoung(pte) (__pte(pte_val(pte) | _PAGE_A)) |
| 315 | #define pte_mkclean(pte) (__pte(pte_val(pte) & ~_PAGE_D)) | 315 | #define pte_mkclean(pte) (__pte(pte_val(pte) & ~_PAGE_D)) |
| 316 | #define pte_mkdirty(pte) (__pte(pte_val(pte) | _PAGE_D)) | 316 | #define pte_mkdirty(pte) (__pte(pte_val(pte) | _PAGE_D)) |
| 317 | #define pte_mkhuge(pte) (__pte(pte_val(pte) | _PAGE_P)) | 317 | #define pte_mkhuge(pte) (__pte(pte_val(pte))) |
| 318 | 318 | ||
| 319 | /* | 319 | /* |
| 320 | * Macro to a page protection value as "uncacheable". Note that "protection" is really a | 320 | * Macro to a page protection value as "uncacheable". Note that "protection" is really a |
| @@ -505,9 +505,6 @@ extern struct page *zero_page_memmap_ptr; | |||
| 505 | #define HUGETLB_PGDIR_SHIFT (HPAGE_SHIFT + 2*(PAGE_SHIFT-3)) | 505 | #define HUGETLB_PGDIR_SHIFT (HPAGE_SHIFT + 2*(PAGE_SHIFT-3)) |
| 506 | #define HUGETLB_PGDIR_SIZE (__IA64_UL(1) << HUGETLB_PGDIR_SHIFT) | 506 | #define HUGETLB_PGDIR_SIZE (__IA64_UL(1) << HUGETLB_PGDIR_SHIFT) |
| 507 | #define HUGETLB_PGDIR_MASK (~(HUGETLB_PGDIR_SIZE-1)) | 507 | #define HUGETLB_PGDIR_MASK (~(HUGETLB_PGDIR_SIZE-1)) |
| 508 | struct mmu_gather; | ||
| 509 | void hugetlb_free_pgd_range(struct mmu_gather **tlb, unsigned long addr, | ||
| 510 | unsigned long end, unsigned long floor, unsigned long ceiling); | ||
| 511 | #endif | 508 | #endif |
| 512 | 509 | ||
| 513 | /* | 510 | /* |
diff --git a/include/asm-ia64/processor.h b/include/asm-ia64/processor.h index 23c8e1be1911..128fefd8056f 100644 --- a/include/asm-ia64/processor.h +++ b/include/asm-ia64/processor.h | |||
| @@ -50,7 +50,8 @@ | |||
| 50 | #define IA64_THREAD_PM_VALID (__IA64_UL(1) << 2) /* performance registers valid? */ | 50 | #define IA64_THREAD_PM_VALID (__IA64_UL(1) << 2) /* performance registers valid? */ |
| 51 | #define IA64_THREAD_UAC_NOPRINT (__IA64_UL(1) << 3) /* don't log unaligned accesses */ | 51 | #define IA64_THREAD_UAC_NOPRINT (__IA64_UL(1) << 3) /* don't log unaligned accesses */ |
| 52 | #define IA64_THREAD_UAC_SIGBUS (__IA64_UL(1) << 4) /* generate SIGBUS on unaligned acc. */ | 52 | #define IA64_THREAD_UAC_SIGBUS (__IA64_UL(1) << 4) /* generate SIGBUS on unaligned acc. */ |
| 53 | /* bit 5 is currently unused */ | 53 | #define IA64_THREAD_MIGRATION (__IA64_UL(1) << 5) /* require migration |
| 54 | sync at ctx sw */ | ||
| 54 | #define IA64_THREAD_FPEMU_NOPRINT (__IA64_UL(1) << 6) /* don't log any fpswa faults */ | 55 | #define IA64_THREAD_FPEMU_NOPRINT (__IA64_UL(1) << 6) /* don't log any fpswa faults */ |
| 55 | #define IA64_THREAD_FPEMU_SIGFPE (__IA64_UL(1) << 7) /* send a SIGFPE for fpswa faults */ | 56 | #define IA64_THREAD_FPEMU_SIGFPE (__IA64_UL(1) << 7) /* send a SIGFPE for fpswa faults */ |
| 56 | 57 | ||
diff --git a/include/asm-ia64/signal.h b/include/asm-ia64/signal.h index 608168d713d3..5e328ed5d01d 100644 --- a/include/asm-ia64/signal.h +++ b/include/asm-ia64/signal.h | |||
| @@ -158,8 +158,6 @@ struct k_sigaction { | |||
| 158 | 158 | ||
| 159 | #define ptrace_signal_deliver(regs, cookie) do { } while (0) | 159 | #define ptrace_signal_deliver(regs, cookie) do { } while (0) |
| 160 | 160 | ||
| 161 | void set_sigdelayed(pid_t pid, int signo, int code, void __user *addr); | ||
| 162 | |||
| 163 | #endif /* __KERNEL__ */ | 161 | #endif /* __KERNEL__ */ |
| 164 | 162 | ||
| 165 | # endif /* !__ASSEMBLY__ */ | 163 | # endif /* !__ASSEMBLY__ */ |
diff --git a/include/asm-ia64/sn/addrs.h b/include/asm-ia64/sn/addrs.h index 2c32e4b77b54..1d9efe541662 100644 --- a/include/asm-ia64/sn/addrs.h +++ b/include/asm-ia64/sn/addrs.h | |||
| @@ -283,5 +283,13 @@ | |||
| 283 | #define REMOTE_HUB_L(n, a) HUB_L(REMOTE_HUB_ADDR((n), (a))) | 283 | #define REMOTE_HUB_L(n, a) HUB_L(REMOTE_HUB_ADDR((n), (a))) |
| 284 | #define REMOTE_HUB_S(n, a, d) HUB_S(REMOTE_HUB_ADDR((n), (a)), (d)) | 284 | #define REMOTE_HUB_S(n, a, d) HUB_S(REMOTE_HUB_ADDR((n), (a)), (d)) |
| 285 | 285 | ||
| 286 | /* | ||
| 287 | * Coretalk address breakdown | ||
| 288 | */ | ||
| 289 | #define CTALK_NASID_SHFT 40 | ||
| 290 | #define CTALK_NASID_MASK (0x3FFFULL << CTALK_NASID_SHFT) | ||
| 291 | #define CTALK_CID_SHFT 38 | ||
| 292 | #define CTALK_CID_MASK (0x3ULL << CTALK_CID_SHFT) | ||
| 293 | #define CTALK_NODE_OFFSET 0x3FFFFFFFFF | ||
| 286 | 294 | ||
| 287 | #endif /* _ASM_IA64_SN_ADDRS_H */ | 295 | #endif /* _ASM_IA64_SN_ADDRS_H */ |
diff --git a/include/asm-ia64/sn/rw_mmr.h b/include/asm-ia64/sn/rw_mmr.h index f40fd1a5510d..2d78f4c5a45e 100644 --- a/include/asm-ia64/sn/rw_mmr.h +++ b/include/asm-ia64/sn/rw_mmr.h | |||
| @@ -3,15 +3,14 @@ | |||
| 3 | * License. See the file "COPYING" in the main directory of this archive | 3 | * License. See the file "COPYING" in the main directory of this archive |
| 4 | * for more details. | 4 | * for more details. |
| 5 | * | 5 | * |
| 6 | * Copyright (C) 2002-2004 Silicon Graphics, Inc. All Rights Reserved. | 6 | * Copyright (C) 2002-2006 Silicon Graphics, Inc. All Rights Reserved. |
| 7 | */ | 7 | */ |
| 8 | #ifndef _ASM_IA64_SN_RW_MMR_H | 8 | #ifndef _ASM_IA64_SN_RW_MMR_H |
| 9 | #define _ASM_IA64_SN_RW_MMR_H | 9 | #define _ASM_IA64_SN_RW_MMR_H |
| 10 | 10 | ||
| 11 | 11 | ||
| 12 | /* | 12 | /* |
| 13 | * This file contains macros used to access MMR registers via | 13 | * This file that access MMRs via uncached physical addresses. |
| 14 | * uncached physical addresses. | ||
| 15 | * pio_phys_read_mmr - read an MMR | 14 | * pio_phys_read_mmr - read an MMR |
| 16 | * pio_phys_write_mmr - write an MMR | 15 | * pio_phys_write_mmr - write an MMR |
| 17 | * pio_atomic_phys_write_mmrs - atomically write 1 or 2 MMRs with psr.ic=0 | 16 | * pio_atomic_phys_write_mmrs - atomically write 1 or 2 MMRs with psr.ic=0 |
| @@ -22,53 +21,8 @@ | |||
| 22 | */ | 21 | */ |
| 23 | 22 | ||
| 24 | 23 | ||
| 25 | extern inline long | 24 | extern long pio_phys_read_mmr(volatile long *mmr); |
| 26 | pio_phys_read_mmr(volatile long *mmr) | 25 | extern void pio_phys_write_mmr(volatile long *mmr, long val); |
| 27 | { | 26 | extern void pio_atomic_phys_write_mmrs(volatile long *mmr1, long val1, volatile long *mmr2, long val2); |
| 28 | long val; | ||
| 29 | asm volatile | ||
| 30 | ("mov r2=psr;;" | ||
| 31 | "rsm psr.i | psr.dt;;" | ||
| 32 | "srlz.i;;" | ||
| 33 | "ld8.acq %0=[%1];;" | ||
| 34 | "mov psr.l=r2;;" | ||
| 35 | "srlz.i;;" | ||
| 36 | : "=r"(val) | ||
| 37 | : "r"(mmr) | ||
| 38 | : "r2"); | ||
| 39 | return val; | ||
| 40 | } | ||
| 41 | |||
| 42 | |||
| 43 | |||
| 44 | extern inline void | ||
| 45 | pio_phys_write_mmr(volatile long *mmr, long val) | ||
| 46 | { | ||
| 47 | asm volatile | ||
| 48 | ("mov r2=psr;;" | ||
| 49 | "rsm psr.i | psr.dt;;" | ||
| 50 | "srlz.i;;" | ||
| 51 | "st8.rel [%0]=%1;;" | ||
| 52 | "mov psr.l=r2;;" | ||
| 53 | "srlz.i;;" | ||
| 54 | :: "r"(mmr), "r"(val) | ||
| 55 | : "r2", "memory"); | ||
| 56 | } | ||
| 57 | |||
| 58 | extern inline void | ||
| 59 | pio_atomic_phys_write_mmrs(volatile long *mmr1, long val1, volatile long *mmr2, long val2) | ||
| 60 | { | ||
| 61 | asm volatile | ||
| 62 | ("mov r2=psr;;" | ||
| 63 | "rsm psr.i | psr.dt | psr.ic;;" | ||
| 64 | "cmp.ne p9,p0=%2,r0;" | ||
| 65 | "srlz.i;;" | ||
| 66 | "st8.rel [%0]=%1;" | ||
| 67 | "(p9) st8.rel [%2]=%3;;" | ||
| 68 | "mov psr.l=r2;;" | ||
| 69 | "srlz.i;;" | ||
| 70 | :: "r"(mmr1), "r"(val1), "r"(mmr2), "r"(val2) | ||
| 71 | : "p9", "r2", "memory"); | ||
| 72 | } | ||
| 73 | 27 | ||
| 74 | #endif /* _ASM_IA64_SN_RW_MMR_H */ | 28 | #endif /* _ASM_IA64_SN_RW_MMR_H */ |
diff --git a/include/asm-ia64/sn/tioce.h b/include/asm-ia64/sn/tioce.h index d4c990712eac..893468e1b41b 100644 --- a/include/asm-ia64/sn/tioce.h +++ b/include/asm-ia64/sn/tioce.h | |||
| @@ -11,7 +11,7 @@ | |||
| 11 | 11 | ||
| 12 | /* CE ASIC part & mfgr information */ | 12 | /* CE ASIC part & mfgr information */ |
| 13 | #define TIOCE_PART_NUM 0xCE00 | 13 | #define TIOCE_PART_NUM 0xCE00 |
| 14 | #define TIOCE_MFGR_NUM 0x36 | 14 | #define TIOCE_SRC_ID 0x01 |
| 15 | #define TIOCE_REV_A 0x1 | 15 | #define TIOCE_REV_A 0x1 |
| 16 | 16 | ||
| 17 | /* CE Virtual PPB Vendor/Device IDs */ | 17 | /* CE Virtual PPB Vendor/Device IDs */ |
| @@ -20,7 +20,7 @@ | |||
| 20 | 20 | ||
| 21 | /* CE Host Bridge Vendor/Device IDs */ | 21 | /* CE Host Bridge Vendor/Device IDs */ |
| 22 | #define CE_HOST_BRIDGE_VENDOR_ID 0x10a9 | 22 | #define CE_HOST_BRIDGE_VENDOR_ID 0x10a9 |
| 23 | #define CE_HOST_BRIDGE_DEVICE_ID 0x4003 | 23 | #define CE_HOST_BRIDGE_DEVICE_ID 0x4001 |
| 24 | 24 | ||
| 25 | 25 | ||
| 26 | #define TIOCE_NUM_M40_ATES 4096 | 26 | #define TIOCE_NUM_M40_ATES 4096 |
| @@ -463,6 +463,25 @@ typedef volatile struct tioce { | |||
| 463 | u64 ce_end_of_struct; /* 0x044400 */ | 463 | u64 ce_end_of_struct; /* 0x044400 */ |
| 464 | } tioce_t; | 464 | } tioce_t; |
| 465 | 465 | ||
| 466 | /* ce_lsiX_gb_cfg1 register bit masks & shifts */ | ||
| 467 | #define CE_LSI_GB_CFG1_RXL0S_THS_SHFT 0 | ||
| 468 | #define CE_LSI_GB_CFG1_RXL0S_THS_MASK (0xffULL << 0) | ||
| 469 | #define CE_LSI_GB_CFG1_RXL0S_SMP_SHFT 8 | ||
| 470 | #define CE_LSI_GB_CFG1_RXL0S_SMP_MASK (0xfULL << 8); | ||
| 471 | #define CE_LSI_GB_CFG1_RXL0S_ADJ_SHFT 12 | ||
| 472 | #define CE_LSI_GB_CFG1_RXL0S_ADJ_MASK (0x7ULL << 12) | ||
| 473 | #define CE_LSI_GB_CFG1_RXL0S_FLT_SHFT 15 | ||
| 474 | #define CE_LSI_GB_CFG1_RXL0S_FLT_MASK (0x1ULL << 15) | ||
| 475 | #define CE_LSI_GB_CFG1_LPBK_SEL_SHFT 16 | ||
| 476 | #define CE_LSI_GB_CFG1_LPBK_SEL_MASK (0x3ULL << 16) | ||
| 477 | #define CE_LSI_GB_CFG1_LPBK_EN_SHFT 18 | ||
| 478 | #define CE_LSI_GB_CFG1_LPBK_EN_MASK (0x1ULL << 18) | ||
| 479 | #define CE_LSI_GB_CFG1_RVRS_LB_SHFT 19 | ||
| 480 | #define CE_LSI_GB_CFG1_RVRS_LB_MASK (0x1ULL << 19) | ||
| 481 | #define CE_LSI_GB_CFG1_RVRS_CLK_SHFT 20 | ||
| 482 | #define CE_LSI_GB_CFG1_RVRS_CLK_MASK (0x3ULL << 20) | ||
| 483 | #define CE_LSI_GB_CFG1_SLF_TS_SHFT 24 | ||
| 484 | #define CE_LSI_GB_CFG1_SLF_TS_MASK (0xfULL << 24) | ||
| 466 | 485 | ||
| 467 | /* ce_adm_int_mask/ce_adm_int_status register bit defines */ | 486 | /* ce_adm_int_mask/ce_adm_int_status register bit defines */ |
| 468 | #define CE_ADM_INT_CE_ERROR_SHFT 0 | 487 | #define CE_ADM_INT_CE_ERROR_SHFT 0 |
| @@ -592,6 +611,11 @@ typedef volatile struct tioce { | |||
| 592 | #define CE_URE_RD_MRG_ENABLE (0x1ULL << 0) | 611 | #define CE_URE_RD_MRG_ENABLE (0x1ULL << 0) |
| 593 | #define CE_URE_WRT_MRG_ENABLE1 (0x1ULL << 4) | 612 | #define CE_URE_WRT_MRG_ENABLE1 (0x1ULL << 4) |
| 594 | #define CE_URE_WRT_MRG_ENABLE2 (0x1ULL << 5) | 613 | #define CE_URE_WRT_MRG_ENABLE2 (0x1ULL << 5) |
| 614 | #define CE_URE_WRT_MRG_TIMER_SHFT 12 | ||
| 615 | #define CE_URE_WRT_MRG_TIMER_MASK (0x7FFULL << CE_URE_WRT_MRG_TIMER_SHFT) | ||
| 616 | #define CE_URE_WRT_MRG_TIMER(x) (((u64)(x) << \ | ||
| 617 | CE_URE_WRT_MRG_TIMER_SHFT) & \ | ||
| 618 | CE_URE_WRT_MRG_TIMER_MASK) | ||
| 595 | #define CE_URE_RSPQ_BYPASS_DISABLE (0x1ULL << 24) | 619 | #define CE_URE_RSPQ_BYPASS_DISABLE (0x1ULL << 24) |
| 596 | #define CE_URE_UPS_DAT1_PAR_DISABLE (0x1ULL << 32) | 620 | #define CE_URE_UPS_DAT1_PAR_DISABLE (0x1ULL << 32) |
| 597 | #define CE_URE_UPS_HDR1_PAR_DISABLE (0x1ULL << 33) | 621 | #define CE_URE_UPS_HDR1_PAR_DISABLE (0x1ULL << 33) |
| @@ -653,8 +677,12 @@ typedef volatile struct tioce { | |||
| 653 | #define CE_URE_SI (0x1ULL << 0) | 677 | #define CE_URE_SI (0x1ULL << 0) |
| 654 | #define CE_URE_ELAL_SHFT 4 | 678 | #define CE_URE_ELAL_SHFT 4 |
| 655 | #define CE_URE_ELAL_MASK (0x7ULL << CE_URE_ELAL_SHFT) | 679 | #define CE_URE_ELAL_MASK (0x7ULL << CE_URE_ELAL_SHFT) |
| 680 | #define CE_URE_ELAL_SET(n) (((u64)(n) << CE_URE_ELAL_SHFT) & \ | ||
| 681 | CE_URE_ELAL_MASK) | ||
| 656 | #define CE_URE_ELAL1_SHFT 8 | 682 | #define CE_URE_ELAL1_SHFT 8 |
| 657 | #define CE_URE_ELAL1_MASK (0x7ULL << CE_URE_ELAL1_SHFT) | 683 | #define CE_URE_ELAL1_MASK (0x7ULL << CE_URE_ELAL1_SHFT) |
| 684 | #define CE_URE_ELAL1_SET(n) (((u64)(n) << CE_URE_ELAL1_SHFT) & \ | ||
| 685 | CE_URE_ELAL1_MASK) | ||
| 658 | #define CE_URE_SCC (0x1ULL << 12) | 686 | #define CE_URE_SCC (0x1ULL << 12) |
| 659 | #define CE_URE_PN1_SHFT 16 | 687 | #define CE_URE_PN1_SHFT 16 |
| 660 | #define CE_URE_PN1_MASK (0xFFULL << CE_URE_PN1_SHFT) | 688 | #define CE_URE_PN1_MASK (0xFFULL << CE_URE_PN1_SHFT) |
| @@ -675,8 +703,12 @@ typedef volatile struct tioce { | |||
| 675 | #define CE_URE_HPC (0x1ULL << 6) | 703 | #define CE_URE_HPC (0x1ULL << 6) |
| 676 | #define CE_URE_SPLV_SHFT 7 | 704 | #define CE_URE_SPLV_SHFT 7 |
| 677 | #define CE_URE_SPLV_MASK (0xFFULL << CE_URE_SPLV_SHFT) | 705 | #define CE_URE_SPLV_MASK (0xFFULL << CE_URE_SPLV_SHFT) |
| 706 | #define CE_URE_SPLV_SET(n) (((u64)(n) << CE_URE_SPLV_SHFT) & \ | ||
| 707 | CE_URE_SPLV_MASK) | ||
| 678 | #define CE_URE_SPLS_SHFT 15 | 708 | #define CE_URE_SPLS_SHFT 15 |
| 679 | #define CE_URE_SPLS_MASK (0x3ULL << CE_URE_SPLS_SHFT) | 709 | #define CE_URE_SPLS_MASK (0x3ULL << CE_URE_SPLS_SHFT) |
| 710 | #define CE_URE_SPLS_SET(n) (((u64)(n) << CE_URE_SPLS_SHFT) & \ | ||
| 711 | CE_URE_SPLS_MASK) | ||
| 680 | #define CE_URE_PSN1_SHFT 19 | 712 | #define CE_URE_PSN1_SHFT 19 |
| 681 | #define CE_URE_PSN1_MASK (0x1FFFULL << CE_URE_PSN1_SHFT) | 713 | #define CE_URE_PSN1_MASK (0x1FFFULL << CE_URE_PSN1_SHFT) |
| 682 | #define CE_URE_PSN2_SHFT 32 | 714 | #define CE_URE_PSN2_SHFT 32 |
diff --git a/include/asm-ia64/sn/xpc.h b/include/asm-ia64/sn/xpc.h index df7f5f4f3cde..aa3b8ace9030 100644 --- a/include/asm-ia64/sn/xpc.h +++ b/include/asm-ia64/sn/xpc.h | |||
| @@ -1227,28 +1227,6 @@ xpc_map_bte_errors(bte_result_t error) | |||
| 1227 | 1227 | ||
| 1228 | 1228 | ||
| 1229 | 1229 | ||
| 1230 | static inline void * | ||
| 1231 | xpc_kmalloc_cacheline_aligned(size_t size, gfp_t flags, void **base) | ||
| 1232 | { | ||
| 1233 | /* see if kmalloc will give us cachline aligned memory by default */ | ||
| 1234 | *base = kmalloc(size, flags); | ||
| 1235 | if (*base == NULL) { | ||
| 1236 | return NULL; | ||
| 1237 | } | ||
| 1238 | if ((u64) *base == L1_CACHE_ALIGN((u64) *base)) { | ||
| 1239 | return *base; | ||
| 1240 | } | ||
| 1241 | kfree(*base); | ||
| 1242 | |||
| 1243 | /* nope, we'll have to do it ourselves */ | ||
| 1244 | *base = kmalloc(size + L1_CACHE_BYTES, flags); | ||
| 1245 | if (*base == NULL) { | ||
| 1246 | return NULL; | ||
| 1247 | } | ||
| 1248 | return (void *) L1_CACHE_ALIGN((u64) *base); | ||
| 1249 | } | ||
| 1250 | |||
| 1251 | |||
| 1252 | /* | 1230 | /* |
| 1253 | * Check to see if there is any channel activity to/from the specified | 1231 | * Check to see if there is any channel activity to/from the specified |
| 1254 | * partition. | 1232 | * partition. |
diff --git a/include/asm-ia64/system.h b/include/asm-ia64/system.h index 062538715623..cd4233d66f15 100644 --- a/include/asm-ia64/system.h +++ b/include/asm-ia64/system.h | |||
| @@ -244,6 +244,13 @@ extern void ia64_load_extra (struct task_struct *task); | |||
| 244 | __ia64_save_fpu((prev)->thread.fph); \ | 244 | __ia64_save_fpu((prev)->thread.fph); \ |
| 245 | } \ | 245 | } \ |
| 246 | __switch_to(prev, next, last); \ | 246 | __switch_to(prev, next, last); \ |
| 247 | /* "next" in old context is "current" in new context */ \ | ||
| 248 | if (unlikely((current->thread.flags & IA64_THREAD_MIGRATION) && \ | ||
| 249 | (task_cpu(current) != \ | ||
| 250 | task_thread_info(current)->last_cpu))) { \ | ||
| 251 | platform_migrate(current); \ | ||
| 252 | task_thread_info(current)->last_cpu = task_cpu(current); \ | ||
| 253 | } \ | ||
| 247 | } while (0) | 254 | } while (0) |
| 248 | #else | 255 | #else |
| 249 | # define switch_to(prev,next,last) __switch_to(prev, next, last) | 256 | # define switch_to(prev,next,last) __switch_to(prev, next, last) |
diff --git a/include/asm-ia64/thread_info.h b/include/asm-ia64/thread_info.h index 1d6518fe1f02..56394a2c7055 100644 --- a/include/asm-ia64/thread_info.h +++ b/include/asm-ia64/thread_info.h | |||
| @@ -26,16 +26,10 @@ struct thread_info { | |||
| 26 | struct exec_domain *exec_domain;/* execution domain */ | 26 | struct exec_domain *exec_domain;/* execution domain */ |
| 27 | __u32 flags; /* thread_info flags (see TIF_*) */ | 27 | __u32 flags; /* thread_info flags (see TIF_*) */ |
| 28 | __u32 cpu; /* current CPU */ | 28 | __u32 cpu; /* current CPU */ |
| 29 | __u32 last_cpu; /* Last CPU thread ran on */ | ||
| 29 | mm_segment_t addr_limit; /* user-level address space limit */ | 30 | mm_segment_t addr_limit; /* user-level address space limit */ |
| 30 | int preempt_count; /* 0=premptable, <0=BUG; will also serve as bh-counter */ | 31 | int preempt_count; /* 0=premptable, <0=BUG; will also serve as bh-counter */ |
| 31 | struct restart_block restart_block; | 32 | struct restart_block restart_block; |
| 32 | struct { | ||
| 33 | int signo; | ||
| 34 | int code; | ||
| 35 | void __user *addr; | ||
| 36 | unsigned long start_time; | ||
| 37 | pid_t pid; | ||
| 38 | } sigdelayed; /* Saved information for TIF_SIGDELAYED */ | ||
| 39 | }; | 33 | }; |
| 40 | 34 | ||
| 41 | #define THREAD_SIZE KERNEL_STACK_SIZE | 35 | #define THREAD_SIZE KERNEL_STACK_SIZE |
| @@ -89,7 +83,6 @@ struct thread_info { | |||
| 89 | #define TIF_NEED_RESCHED 2 /* rescheduling necessary */ | 83 | #define TIF_NEED_RESCHED 2 /* rescheduling necessary */ |
| 90 | #define TIF_SYSCALL_TRACE 3 /* syscall trace active */ | 84 | #define TIF_SYSCALL_TRACE 3 /* syscall trace active */ |
| 91 | #define TIF_SYSCALL_AUDIT 4 /* syscall auditing active */ | 85 | #define TIF_SYSCALL_AUDIT 4 /* syscall auditing active */ |
| 92 | #define TIF_SIGDELAYED 5 /* signal delayed from MCA/INIT/NMI/PMI context */ | ||
| 93 | #define TIF_POLLING_NRFLAG 16 /* true if poll_idle() is polling TIF_NEED_RESCHED */ | 86 | #define TIF_POLLING_NRFLAG 16 /* true if poll_idle() is polling TIF_NEED_RESCHED */ |
| 94 | #define TIF_MEMDIE 17 | 87 | #define TIF_MEMDIE 17 |
| 95 | #define TIF_MCA_INIT 18 /* this task is processing MCA or INIT */ | 88 | #define TIF_MCA_INIT 18 /* this task is processing MCA or INIT */ |
| @@ -101,13 +94,12 @@ struct thread_info { | |||
| 101 | #define _TIF_NOTIFY_RESUME (1 << TIF_NOTIFY_RESUME) | 94 | #define _TIF_NOTIFY_RESUME (1 << TIF_NOTIFY_RESUME) |
| 102 | #define _TIF_SIGPENDING (1 << TIF_SIGPENDING) | 95 | #define _TIF_SIGPENDING (1 << TIF_SIGPENDING) |
| 103 | #define _TIF_NEED_RESCHED (1 << TIF_NEED_RESCHED) | 96 | #define _TIF_NEED_RESCHED (1 << TIF_NEED_RESCHED) |
| 104 | #define _TIF_SIGDELAYED (1 << TIF_SIGDELAYED) | ||
| 105 | #define _TIF_POLLING_NRFLAG (1 << TIF_POLLING_NRFLAG) | 97 | #define _TIF_POLLING_NRFLAG (1 << TIF_POLLING_NRFLAG) |
| 106 | #define _TIF_MCA_INIT (1 << TIF_MCA_INIT) | 98 | #define _TIF_MCA_INIT (1 << TIF_MCA_INIT) |
| 107 | #define _TIF_DB_DISABLED (1 << TIF_DB_DISABLED) | 99 | #define _TIF_DB_DISABLED (1 << TIF_DB_DISABLED) |
| 108 | 100 | ||
| 109 | /* "work to do on user-return" bits */ | 101 | /* "work to do on user-return" bits */ |
| 110 | #define TIF_ALLWORK_MASK (_TIF_NOTIFY_RESUME|_TIF_SIGPENDING|_TIF_NEED_RESCHED|_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SIGDELAYED) | 102 | #define TIF_ALLWORK_MASK (_TIF_NOTIFY_RESUME|_TIF_SIGPENDING|_TIF_NEED_RESCHED|_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT) |
| 111 | /* like TIF_ALLWORK_BITS but sans TIF_SYSCALL_TRACE or TIF_SYSCALL_AUDIT */ | 103 | /* like TIF_ALLWORK_BITS but sans TIF_SYSCALL_TRACE or TIF_SYSCALL_AUDIT */ |
| 112 | #define TIF_WORK_MASK (TIF_ALLWORK_MASK&~(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT)) | 104 | #define TIF_WORK_MASK (TIF_ALLWORK_MASK&~(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT)) |
| 113 | 105 | ||
diff --git a/include/asm-powerpc/pgtable.h b/include/asm-powerpc/pgtable.h index e38931379a72..185ee15963a1 100644 --- a/include/asm-powerpc/pgtable.h +++ b/include/asm-powerpc/pgtable.h | |||
| @@ -468,11 +468,6 @@ extern pgd_t swapper_pg_dir[]; | |||
| 468 | 468 | ||
| 469 | extern void paging_init(void); | 469 | extern void paging_init(void); |
| 470 | 470 | ||
| 471 | #ifdef CONFIG_HUGETLB_PAGE | ||
| 472 | #define hugetlb_free_pgd_range(tlb, addr, end, floor, ceiling) \ | ||
| 473 | free_pgd_range(tlb, addr, end, floor, ceiling) | ||
| 474 | #endif | ||
| 475 | |||
| 476 | /* | 471 | /* |
| 477 | * This gets called at the end of handling a page fault, when | 472 | * This gets called at the end of handling a page fault, when |
| 478 | * the kernel has put a new PTE into the page table for the process. | 473 | * the kernel has put a new PTE into the page table for the process. |
diff --git a/include/asm-s390/pgalloc.h b/include/asm-s390/pgalloc.h index 3417dd71ab43..e28aaf28e4a8 100644 --- a/include/asm-s390/pgalloc.h +++ b/include/asm-s390/pgalloc.h | |||
| @@ -158,11 +158,4 @@ static inline void pte_free(struct page *pte) | |||
| 158 | 158 | ||
| 159 | #define __pte_free_tlb(tlb,pte) tlb_remove_page(tlb,pte) | 159 | #define __pte_free_tlb(tlb,pte) tlb_remove_page(tlb,pte) |
| 160 | 160 | ||
| 161 | /* | ||
| 162 | * This establishes kernel virtual mappings (e.g., as a result of a | ||
| 163 | * vmalloc call). Since s390-esame uses a separate kernel page table, | ||
| 164 | * there is nothing to do here... :) | ||
| 165 | */ | ||
| 166 | #define set_pgdir(addr,entry) do { } while(0) | ||
| 167 | |||
| 168 | #endif /* _S390_PGALLOC_H */ | 161 | #endif /* _S390_PGALLOC_H */ |
diff --git a/include/asm-sh64/pgalloc.h b/include/asm-sh64/pgalloc.h index 678251ac1db8..b29dd468817e 100644 --- a/include/asm-sh64/pgalloc.h +++ b/include/asm-sh64/pgalloc.h | |||
| @@ -167,22 +167,6 @@ static __inline__ void pmd_free(pmd_t *pmd) | |||
| 167 | 167 | ||
| 168 | extern int do_check_pgt_cache(int, int); | 168 | extern int do_check_pgt_cache(int, int); |
| 169 | 169 | ||
| 170 | static inline void set_pgdir(unsigned long address, pgd_t entry) | ||
| 171 | { | ||
| 172 | struct task_struct * p; | ||
| 173 | pgd_t *pgd; | ||
| 174 | |||
| 175 | read_lock(&tasklist_lock); | ||
| 176 | for_each_process(p) { | ||
| 177 | if (!p->mm) | ||
| 178 | continue; | ||
| 179 | *pgd_offset(p->mm,address) = entry; | ||
| 180 | } | ||
| 181 | read_unlock(&tasklist_lock); | ||
| 182 | for (pgd = (pgd_t *)pgd_quicklist; pgd; pgd = (pgd_t *)*(unsigned long *)pgd) | ||
| 183 | pgd[address >> PGDIR_SHIFT] = entry; | ||
| 184 | } | ||
| 185 | |||
| 186 | #define pmd_populate_kernel(mm, pmd, pte) \ | 170 | #define pmd_populate_kernel(mm, pmd, pte) \ |
| 187 | set_pmd(pmd, __pmd(_PAGE_TABLE + (unsigned long) (pte))) | 171 | set_pmd(pmd, __pmd(_PAGE_TABLE + (unsigned long) (pte))) |
| 188 | 172 | ||
diff --git a/include/asm-x86_64/pgtable.h b/include/asm-x86_64/pgtable.h index 715fd94cf577..a617d364d08d 100644 --- a/include/asm-x86_64/pgtable.h +++ b/include/asm-x86_64/pgtable.h | |||
| @@ -273,7 +273,7 @@ static inline int pte_dirty(pte_t pte) { return pte_val(pte) & _PAGE_DIRTY; } | |||
| 273 | static inline int pte_young(pte_t pte) { return pte_val(pte) & _PAGE_ACCESSED; } | 273 | static inline int pte_young(pte_t pte) { return pte_val(pte) & _PAGE_ACCESSED; } |
| 274 | static inline int pte_write(pte_t pte) { return pte_val(pte) & _PAGE_RW; } | 274 | static inline int pte_write(pte_t pte) { return pte_val(pte) & _PAGE_RW; } |
| 275 | static inline int pte_file(pte_t pte) { return pte_val(pte) & _PAGE_FILE; } | 275 | static inline int pte_file(pte_t pte) { return pte_val(pte) & _PAGE_FILE; } |
| 276 | static inline int pte_huge(pte_t pte) { return (pte_val(pte) & __LARGE_PTE) == __LARGE_PTE; } | 276 | static inline int pte_huge(pte_t pte) { return pte_val(pte) & _PAGE_PSE; } |
| 277 | 277 | ||
| 278 | static inline pte_t pte_rdprotect(pte_t pte) { set_pte(&pte, __pte(pte_val(pte) & ~_PAGE_USER)); return pte; } | 278 | static inline pte_t pte_rdprotect(pte_t pte) { set_pte(&pte, __pte(pte_val(pte) & ~_PAGE_USER)); return pte; } |
| 279 | static inline pte_t pte_exprotect(pte_t pte) { set_pte(&pte, __pte(pte_val(pte) & ~_PAGE_USER)); return pte; } | 279 | static inline pte_t pte_exprotect(pte_t pte) { set_pte(&pte, __pte(pte_val(pte) & ~_PAGE_USER)); return pte; } |
| @@ -285,7 +285,7 @@ static inline pte_t pte_mkexec(pte_t pte) { set_pte(&pte, __pte(pte_val(pte) | _ | |||
| 285 | static inline pte_t pte_mkdirty(pte_t pte) { set_pte(&pte, __pte(pte_val(pte) | _PAGE_DIRTY)); return pte; } | 285 | static inline pte_t pte_mkdirty(pte_t pte) { set_pte(&pte, __pte(pte_val(pte) | _PAGE_DIRTY)); return pte; } |
| 286 | static inline pte_t pte_mkyoung(pte_t pte) { set_pte(&pte, __pte(pte_val(pte) | _PAGE_ACCESSED)); return pte; } | 286 | static inline pte_t pte_mkyoung(pte_t pte) { set_pte(&pte, __pte(pte_val(pte) | _PAGE_ACCESSED)); return pte; } |
| 287 | static inline pte_t pte_mkwrite(pte_t pte) { set_pte(&pte, __pte(pte_val(pte) | _PAGE_RW)); return pte; } | 287 | static inline pte_t pte_mkwrite(pte_t pte) { set_pte(&pte, __pte(pte_val(pte) | _PAGE_RW)); return pte; } |
| 288 | static inline pte_t pte_mkhuge(pte_t pte) { set_pte(&pte, __pte(pte_val(pte) | __LARGE_PTE)); return pte; } | 288 | static inline pte_t pte_mkhuge(pte_t pte) { set_pte(&pte, __pte(pte_val(pte) | _PAGE_PSE)); return pte; } |
| 289 | 289 | ||
| 290 | struct vm_area_struct; | 290 | struct vm_area_struct; |
| 291 | 291 | ||
diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h index 68d82ad6b17c..d6f1019625af 100644 --- a/include/linux/hugetlb.h +++ b/include/linux/hugetlb.h | |||
| @@ -20,10 +20,7 @@ void unmap_hugepage_range(struct vm_area_struct *, unsigned long, unsigned long) | |||
| 20 | int hugetlb_prefault(struct address_space *, struct vm_area_struct *); | 20 | int hugetlb_prefault(struct address_space *, struct vm_area_struct *); |
| 21 | int hugetlb_report_meminfo(char *); | 21 | int hugetlb_report_meminfo(char *); |
| 22 | int hugetlb_report_node_meminfo(int, char *); | 22 | int hugetlb_report_node_meminfo(int, char *); |
| 23 | int is_hugepage_mem_enough(size_t); | ||
| 24 | unsigned long hugetlb_total_pages(void); | 23 | unsigned long hugetlb_total_pages(void); |
| 25 | struct page *alloc_huge_page(struct vm_area_struct *, unsigned long); | ||
| 26 | void free_huge_page(struct page *); | ||
| 27 | int hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma, | 24 | int hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma, |
| 28 | unsigned long address, int write_access); | 25 | unsigned long address, int write_access); |
| 29 | 26 | ||
| @@ -39,18 +36,35 @@ struct page *follow_huge_addr(struct mm_struct *mm, unsigned long address, | |||
| 39 | int write); | 36 | int write); |
| 40 | struct page *follow_huge_pmd(struct mm_struct *mm, unsigned long address, | 37 | struct page *follow_huge_pmd(struct mm_struct *mm, unsigned long address, |
| 41 | pmd_t *pmd, int write); | 38 | pmd_t *pmd, int write); |
| 42 | int is_aligned_hugepage_range(unsigned long addr, unsigned long len); | ||
| 43 | int pmd_huge(pmd_t pmd); | 39 | int pmd_huge(pmd_t pmd); |
| 40 | void hugetlb_change_protection(struct vm_area_struct *vma, | ||
| 41 | unsigned long address, unsigned long end, pgprot_t newprot); | ||
| 44 | 42 | ||
| 45 | #ifndef ARCH_HAS_HUGEPAGE_ONLY_RANGE | 43 | #ifndef ARCH_HAS_HUGEPAGE_ONLY_RANGE |
| 46 | #define is_hugepage_only_range(mm, addr, len) 0 | 44 | #define is_hugepage_only_range(mm, addr, len) 0 |
| 47 | #define hugetlb_free_pgd_range(tlb, addr, end, floor, ceiling) \ | 45 | #endif |
| 48 | do { } while (0) | 46 | |
| 47 | #ifndef ARCH_HAS_HUGETLB_FREE_PGD_RANGE | ||
| 48 | #define hugetlb_free_pgd_range free_pgd_range | ||
| 49 | #else | ||
| 50 | void hugetlb_free_pgd_range(struct mmu_gather **tlb, unsigned long addr, | ||
| 51 | unsigned long end, unsigned long floor, | ||
| 52 | unsigned long ceiling); | ||
| 49 | #endif | 53 | #endif |
| 50 | 54 | ||
| 51 | #ifndef ARCH_HAS_PREPARE_HUGEPAGE_RANGE | 55 | #ifndef ARCH_HAS_PREPARE_HUGEPAGE_RANGE |
| 52 | #define prepare_hugepage_range(addr, len) \ | 56 | /* |
| 53 | is_aligned_hugepage_range(addr, len) | 57 | * If the arch doesn't supply something else, assume that hugepage |
| 58 | * size aligned regions are ok without further preparation. | ||
| 59 | */ | ||
| 60 | static inline int prepare_hugepage_range(unsigned long addr, unsigned long len) | ||
| 61 | { | ||
| 62 | if (len & ~HPAGE_MASK) | ||
| 63 | return -EINVAL; | ||
| 64 | if (addr & ~HPAGE_MASK) | ||
| 65 | return -EINVAL; | ||
| 66 | return 0; | ||
| 67 | } | ||
| 54 | #else | 68 | #else |
| 55 | int prepare_hugepage_range(unsigned long addr, unsigned long len); | 69 | int prepare_hugepage_range(unsigned long addr, unsigned long len); |
| 56 | #endif | 70 | #endif |
| @@ -87,20 +101,17 @@ static inline unsigned long hugetlb_total_pages(void) | |||
| 87 | #define copy_hugetlb_page_range(src, dst, vma) ({ BUG(); 0; }) | 101 | #define copy_hugetlb_page_range(src, dst, vma) ({ BUG(); 0; }) |
| 88 | #define hugetlb_prefault(mapping, vma) ({ BUG(); 0; }) | 102 | #define hugetlb_prefault(mapping, vma) ({ BUG(); 0; }) |
| 89 | #define unmap_hugepage_range(vma, start, end) BUG() | 103 | #define unmap_hugepage_range(vma, start, end) BUG() |
| 90 | #define is_hugepage_mem_enough(size) 0 | ||
| 91 | #define hugetlb_report_meminfo(buf) 0 | 104 | #define hugetlb_report_meminfo(buf) 0 |
| 92 | #define hugetlb_report_node_meminfo(n, buf) 0 | 105 | #define hugetlb_report_node_meminfo(n, buf) 0 |
| 93 | #define follow_huge_pmd(mm, addr, pmd, write) NULL | 106 | #define follow_huge_pmd(mm, addr, pmd, write) NULL |
| 94 | #define is_aligned_hugepage_range(addr, len) 0 | ||
| 95 | #define prepare_hugepage_range(addr, len) (-EINVAL) | 107 | #define prepare_hugepage_range(addr, len) (-EINVAL) |
| 96 | #define pmd_huge(x) 0 | 108 | #define pmd_huge(x) 0 |
| 97 | #define is_hugepage_only_range(mm, addr, len) 0 | 109 | #define is_hugepage_only_range(mm, addr, len) 0 |
| 98 | #define hugetlb_free_pgd_range(tlb, addr, end, floor, ceiling) \ | 110 | #define hugetlb_free_pgd_range(tlb, addr, end, floor, ceiling) ({BUG(); 0; }) |
| 99 | do { } while (0) | ||
| 100 | #define alloc_huge_page(vma, addr) ({ NULL; }) | ||
| 101 | #define free_huge_page(p) ({ (void)(p); BUG(); }) | ||
| 102 | #define hugetlb_fault(mm, vma, addr, write) ({ BUG(); 0; }) | 111 | #define hugetlb_fault(mm, vma, addr, write) ({ BUG(); 0; }) |
| 103 | 112 | ||
| 113 | #define hugetlb_change_protection(vma, address, end, newprot) | ||
| 114 | |||
| 104 | #ifndef HPAGE_MASK | 115 | #ifndef HPAGE_MASK |
| 105 | #define HPAGE_MASK PAGE_MASK /* Keep the compiler happy */ | 116 | #define HPAGE_MASK PAGE_MASK /* Keep the compiler happy */ |
| 106 | #define HPAGE_SIZE PAGE_SIZE | 117 | #define HPAGE_SIZE PAGE_SIZE |
| @@ -128,6 +139,8 @@ struct hugetlbfs_sb_info { | |||
| 128 | 139 | ||
| 129 | struct hugetlbfs_inode_info { | 140 | struct hugetlbfs_inode_info { |
| 130 | struct shared_policy policy; | 141 | struct shared_policy policy; |
| 142 | /* Protected by the (global) hugetlb_lock */ | ||
| 143 | unsigned long prereserved_hpages; | ||
| 131 | struct inode vfs_inode; | 144 | struct inode vfs_inode; |
| 132 | }; | 145 | }; |
| 133 | 146 | ||
| @@ -144,6 +157,10 @@ static inline struct hugetlbfs_sb_info *HUGETLBFS_SB(struct super_block *sb) | |||
| 144 | extern struct file_operations hugetlbfs_file_operations; | 157 | extern struct file_operations hugetlbfs_file_operations; |
| 145 | extern struct vm_operations_struct hugetlb_vm_ops; | 158 | extern struct vm_operations_struct hugetlb_vm_ops; |
| 146 | struct file *hugetlb_zero_setup(size_t); | 159 | struct file *hugetlb_zero_setup(size_t); |
| 160 | int hugetlb_extend_reservation(struct hugetlbfs_inode_info *info, | ||
| 161 | unsigned long atleast_hpages); | ||
| 162 | void hugetlb_truncate_reservation(struct hugetlbfs_inode_info *info, | ||
| 163 | unsigned long atmost_hpages); | ||
| 147 | int hugetlb_get_quota(struct address_space *mapping); | 164 | int hugetlb_get_quota(struct address_space *mapping); |
| 148 | void hugetlb_put_quota(struct address_space *mapping); | 165 | void hugetlb_put_quota(struct address_space *mapping); |
| 149 | 166 | ||
diff --git a/include/linux/migrate.h b/include/linux/migrate.h new file mode 100644 index 000000000000..7d09962c3c0b --- /dev/null +++ b/include/linux/migrate.h | |||
| @@ -0,0 +1,36 @@ | |||
| 1 | #ifndef _LINUX_MIGRATE_H | ||
| 2 | #define _LINUX_MIGRATE_H | ||
| 3 | |||
| 4 | #include <linux/config.h> | ||
| 5 | #include <linux/mm.h> | ||
| 6 | |||
| 7 | #ifdef CONFIG_MIGRATION | ||
| 8 | extern int isolate_lru_page(struct page *p, struct list_head *pagelist); | ||
| 9 | extern int putback_lru_pages(struct list_head *l); | ||
| 10 | extern int migrate_page(struct page *, struct page *); | ||
| 11 | extern void migrate_page_copy(struct page *, struct page *); | ||
| 12 | extern int migrate_page_remove_references(struct page *, struct page *, int); | ||
| 13 | extern int migrate_pages(struct list_head *l, struct list_head *t, | ||
| 14 | struct list_head *moved, struct list_head *failed); | ||
| 15 | int migrate_pages_to(struct list_head *pagelist, | ||
| 16 | struct vm_area_struct *vma, int dest); | ||
| 17 | extern int fail_migrate_page(struct page *, struct page *); | ||
| 18 | |||
| 19 | extern int migrate_prep(void); | ||
| 20 | |||
| 21 | #else | ||
| 22 | |||
| 23 | static inline int isolate_lru_page(struct page *p, struct list_head *list) | ||
| 24 | { return -ENOSYS; } | ||
| 25 | static inline int putback_lru_pages(struct list_head *l) { return 0; } | ||
| 26 | static inline int migrate_pages(struct list_head *l, struct list_head *t, | ||
| 27 | struct list_head *moved, struct list_head *failed) { return -ENOSYS; } | ||
| 28 | |||
| 29 | static inline int migrate_prep(void) { return -ENOSYS; } | ||
| 30 | |||
| 31 | /* Possible settings for the migrate_page() method in address_operations */ | ||
| 32 | #define migrate_page NULL | ||
| 33 | #define fail_migrate_page NULL | ||
| 34 | |||
| 35 | #endif /* CONFIG_MIGRATION */ | ||
| 36 | #endif /* _LINUX_MIGRATE_H */ | ||
diff --git a/include/linux/mm.h b/include/linux/mm.h index 498ff8778fb6..6aa016f1d3ae 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h | |||
| @@ -286,43 +286,34 @@ struct page { | |||
| 286 | * | 286 | * |
| 287 | * Also, many kernel routines increase the page count before a critical | 287 | * Also, many kernel routines increase the page count before a critical |
| 288 | * routine so they can be sure the page doesn't go away from under them. | 288 | * routine so they can be sure the page doesn't go away from under them. |
| 289 | * | ||
| 290 | * Since 2.6.6 (approx), a free page has ->_count = -1. This is so that we | ||
| 291 | * can use atomic_add_negative(-1, page->_count) to detect when the page | ||
| 292 | * becomes free and so that we can also use atomic_inc_and_test to atomically | ||
| 293 | * detect when we just tried to grab a ref on a page which some other CPU has | ||
| 294 | * already deemed to be freeable. | ||
| 295 | * | ||
| 296 | * NO code should make assumptions about this internal detail! Use the provided | ||
| 297 | * macros which retain the old rules: page_count(page) == 0 is a free page. | ||
| 298 | */ | 289 | */ |
| 299 | 290 | ||
| 300 | /* | 291 | /* |
| 301 | * Drop a ref, return true if the logical refcount fell to zero (the page has | 292 | * Drop a ref, return true if the logical refcount fell to zero (the page has |
| 302 | * no users) | 293 | * no users) |
| 303 | */ | 294 | */ |
| 304 | #define put_page_testzero(p) \ | 295 | static inline int put_page_testzero(struct page *page) |
| 305 | ({ \ | 296 | { |
| 306 | BUG_ON(atomic_read(&(p)->_count) == -1);\ | 297 | BUG_ON(atomic_read(&page->_count) == 0); |
| 307 | atomic_add_negative(-1, &(p)->_count); \ | 298 | return atomic_dec_and_test(&page->_count); |
| 308 | }) | 299 | } |
| 309 | 300 | ||
| 310 | /* | 301 | /* |
| 311 | * Grab a ref, return true if the page previously had a logical refcount of | 302 | * Try to grab a ref unless the page has a refcount of zero, return false if |
| 312 | * zero. ie: returns true if we just grabbed an already-deemed-to-be-free page | 303 | * that is the case. |
| 313 | */ | 304 | */ |
| 314 | #define get_page_testone(p) atomic_inc_and_test(&(p)->_count) | 305 | static inline int get_page_unless_zero(struct page *page) |
| 315 | 306 | { | |
| 316 | #define set_page_count(p,v) atomic_set(&(p)->_count, (v) - 1) | 307 | return atomic_inc_not_zero(&page->_count); |
| 317 | #define __put_page(p) atomic_dec(&(p)->_count) | 308 | } |
| 318 | 309 | ||
| 319 | extern void FASTCALL(__page_cache_release(struct page *)); | 310 | extern void FASTCALL(__page_cache_release(struct page *)); |
| 320 | 311 | ||
| 321 | static inline int page_count(struct page *page) | 312 | static inline int page_count(struct page *page) |
| 322 | { | 313 | { |
| 323 | if (PageCompound(page)) | 314 | if (unlikely(PageCompound(page))) |
| 324 | page = (struct page *)page_private(page); | 315 | page = (struct page *)page_private(page); |
| 325 | return atomic_read(&page->_count) + 1; | 316 | return atomic_read(&page->_count); |
| 326 | } | 317 | } |
| 327 | 318 | ||
| 328 | static inline void get_page(struct page *page) | 319 | static inline void get_page(struct page *page) |
| @@ -332,8 +323,19 @@ static inline void get_page(struct page *page) | |||
| 332 | atomic_inc(&page->_count); | 323 | atomic_inc(&page->_count); |
| 333 | } | 324 | } |
| 334 | 325 | ||
| 326 | /* | ||
| 327 | * Setup the page count before being freed into the page allocator for | ||
| 328 | * the first time (boot or memory hotplug) | ||
| 329 | */ | ||
| 330 | static inline void init_page_count(struct page *page) | ||
| 331 | { | ||
| 332 | atomic_set(&page->_count, 1); | ||
| 333 | } | ||
| 334 | |||
| 335 | void put_page(struct page *page); | 335 | void put_page(struct page *page); |
| 336 | 336 | ||
| 337 | void split_page(struct page *page, unsigned int order); | ||
| 338 | |||
| 337 | /* | 339 | /* |
| 338 | * Multiple processes may "see" the same page. E.g. for untouched | 340 | * Multiple processes may "see" the same page. E.g. for untouched |
| 339 | * mappings of /dev/null, all processes see the same page full of | 341 | * mappings of /dev/null, all processes see the same page full of |
| @@ -1046,7 +1048,7 @@ int in_gate_area_no_task(unsigned long addr); | |||
| 1046 | 1048 | ||
| 1047 | int drop_caches_sysctl_handler(struct ctl_table *, int, struct file *, | 1049 | int drop_caches_sysctl_handler(struct ctl_table *, int, struct file *, |
| 1048 | void __user *, size_t *, loff_t *); | 1050 | void __user *, size_t *, loff_t *); |
| 1049 | int shrink_slab(unsigned long scanned, gfp_t gfp_mask, | 1051 | unsigned long shrink_slab(unsigned long scanned, gfp_t gfp_mask, |
| 1050 | unsigned long lru_pages); | 1052 | unsigned long lru_pages); |
| 1051 | void drop_pagecache(void); | 1053 | void drop_pagecache(void); |
| 1052 | void drop_slab(void); | 1054 | void drop_slab(void); |
diff --git a/include/linux/mm_inline.h b/include/linux/mm_inline.h index 8ac854f7f190..3b6723dfaff3 100644 --- a/include/linux/mm_inline.h +++ b/include/linux/mm_inline.h | |||
| @@ -32,7 +32,7 @@ del_page_from_lru(struct zone *zone, struct page *page) | |||
| 32 | { | 32 | { |
| 33 | list_del(&page->lru); | 33 | list_del(&page->lru); |
| 34 | if (PageActive(page)) { | 34 | if (PageActive(page)) { |
| 35 | ClearPageActive(page); | 35 | __ClearPageActive(page); |
| 36 | zone->nr_active--; | 36 | zone->nr_active--; |
| 37 | } else { | 37 | } else { |
| 38 | zone->nr_inactive--; | 38 | zone->nr_inactive--; |
diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h index d52999c43336..9ea629c02a4b 100644 --- a/include/linux/page-flags.h +++ b/include/linux/page-flags.h | |||
| @@ -86,8 +86,9 @@ | |||
| 86 | * - The __xxx_page_state variants can be used safely when interrupts are | 86 | * - The __xxx_page_state variants can be used safely when interrupts are |
| 87 | * disabled. | 87 | * disabled. |
| 88 | * - The __xxx_page_state variants can be used if the field is only | 88 | * - The __xxx_page_state variants can be used if the field is only |
| 89 | * modified from process context, or only modified from interrupt context. | 89 | * modified from process context and protected from preemption, or only |
| 90 | * In this case, the field should be commented here. | 90 | * modified from interrupt context. In this case, the field should be |
| 91 | * commented here. | ||
| 91 | */ | 92 | */ |
| 92 | struct page_state { | 93 | struct page_state { |
| 93 | unsigned long nr_dirty; /* Dirty writeable pages */ | 94 | unsigned long nr_dirty; /* Dirty writeable pages */ |
| @@ -239,22 +240,19 @@ extern void __mod_page_state_offset(unsigned long offset, unsigned long delta); | |||
| 239 | #define __ClearPageDirty(page) __clear_bit(PG_dirty, &(page)->flags) | 240 | #define __ClearPageDirty(page) __clear_bit(PG_dirty, &(page)->flags) |
| 240 | #define TestClearPageDirty(page) test_and_clear_bit(PG_dirty, &(page)->flags) | 241 | #define TestClearPageDirty(page) test_and_clear_bit(PG_dirty, &(page)->flags) |
| 241 | 242 | ||
| 242 | #define SetPageLRU(page) set_bit(PG_lru, &(page)->flags) | ||
| 243 | #define PageLRU(page) test_bit(PG_lru, &(page)->flags) | 243 | #define PageLRU(page) test_bit(PG_lru, &(page)->flags) |
| 244 | #define TestSetPageLRU(page) test_and_set_bit(PG_lru, &(page)->flags) | 244 | #define SetPageLRU(page) set_bit(PG_lru, &(page)->flags) |
| 245 | #define TestClearPageLRU(page) test_and_clear_bit(PG_lru, &(page)->flags) | 245 | #define ClearPageLRU(page) clear_bit(PG_lru, &(page)->flags) |
| 246 | #define __ClearPageLRU(page) __clear_bit(PG_lru, &(page)->flags) | ||
| 246 | 247 | ||
| 247 | #define PageActive(page) test_bit(PG_active, &(page)->flags) | 248 | #define PageActive(page) test_bit(PG_active, &(page)->flags) |
| 248 | #define SetPageActive(page) set_bit(PG_active, &(page)->flags) | 249 | #define SetPageActive(page) set_bit(PG_active, &(page)->flags) |
| 249 | #define ClearPageActive(page) clear_bit(PG_active, &(page)->flags) | 250 | #define ClearPageActive(page) clear_bit(PG_active, &(page)->flags) |
| 250 | #define TestClearPageActive(page) test_and_clear_bit(PG_active, &(page)->flags) | 251 | #define __ClearPageActive(page) __clear_bit(PG_active, &(page)->flags) |
| 251 | #define TestSetPageActive(page) test_and_set_bit(PG_active, &(page)->flags) | ||
| 252 | 252 | ||
| 253 | #define PageSlab(page) test_bit(PG_slab, &(page)->flags) | 253 | #define PageSlab(page) test_bit(PG_slab, &(page)->flags) |
| 254 | #define SetPageSlab(page) set_bit(PG_slab, &(page)->flags) | 254 | #define __SetPageSlab(page) __set_bit(PG_slab, &(page)->flags) |
| 255 | #define ClearPageSlab(page) clear_bit(PG_slab, &(page)->flags) | 255 | #define __ClearPageSlab(page) __clear_bit(PG_slab, &(page)->flags) |
| 256 | #define TestClearPageSlab(page) test_and_clear_bit(PG_slab, &(page)->flags) | ||
| 257 | #define TestSetPageSlab(page) test_and_set_bit(PG_slab, &(page)->flags) | ||
| 258 | 256 | ||
| 259 | #ifdef CONFIG_HIGHMEM | 257 | #ifdef CONFIG_HIGHMEM |
| 260 | #define PageHighMem(page) is_highmem(page_zone(page)) | 258 | #define PageHighMem(page) is_highmem(page_zone(page)) |
| @@ -329,8 +327,8 @@ extern void __mod_page_state_offset(unsigned long offset, unsigned long delta); | |||
| 329 | #define TestClearPageReclaim(page) test_and_clear_bit(PG_reclaim, &(page)->flags) | 327 | #define TestClearPageReclaim(page) test_and_clear_bit(PG_reclaim, &(page)->flags) |
| 330 | 328 | ||
| 331 | #define PageCompound(page) test_bit(PG_compound, &(page)->flags) | 329 | #define PageCompound(page) test_bit(PG_compound, &(page)->flags) |
| 332 | #define SetPageCompound(page) set_bit(PG_compound, &(page)->flags) | 330 | #define __SetPageCompound(page) __set_bit(PG_compound, &(page)->flags) |
| 333 | #define ClearPageCompound(page) clear_bit(PG_compound, &(page)->flags) | 331 | #define __ClearPageCompound(page) __clear_bit(PG_compound, &(page)->flags) |
| 334 | 332 | ||
| 335 | #ifdef CONFIG_SWAP | 333 | #ifdef CONFIG_SWAP |
| 336 | #define PageSwapCache(page) test_bit(PG_swapcache, &(page)->flags) | 334 | #define PageSwapCache(page) test_bit(PG_swapcache, &(page)->flags) |
diff --git a/include/linux/rtc.h b/include/linux/rtc.h index 0b2ba67ff13c..b739ac1f7ca0 100644 --- a/include/linux/rtc.h +++ b/include/linux/rtc.h | |||
| @@ -11,8 +11,6 @@ | |||
| 11 | #ifndef _LINUX_RTC_H_ | 11 | #ifndef _LINUX_RTC_H_ |
| 12 | #define _LINUX_RTC_H_ | 12 | #define _LINUX_RTC_H_ |
| 13 | 13 | ||
| 14 | #include <linux/interrupt.h> | ||
| 15 | |||
| 16 | /* | 14 | /* |
| 17 | * The struct used to pass data via the following ioctl. Similar to the | 15 | * The struct used to pass data via the following ioctl. Similar to the |
| 18 | * struct tm in <time.h>, but it needs to be here so that the kernel | 16 | * struct tm in <time.h>, but it needs to be here so that the kernel |
| @@ -95,6 +93,8 @@ struct rtc_pll_info { | |||
| 95 | 93 | ||
| 96 | #ifdef __KERNEL__ | 94 | #ifdef __KERNEL__ |
| 97 | 95 | ||
| 96 | #include <linux/interrupt.h> | ||
| 97 | |||
| 98 | typedef struct rtc_task { | 98 | typedef struct rtc_task { |
| 99 | void (*func)(void *private_data); | 99 | void (*func)(void *private_data); |
| 100 | void *private_data; | 100 | void *private_data; |
diff --git a/include/linux/slab.h b/include/linux/slab.h index 8cf52939d0ab..2b28c849d75a 100644 --- a/include/linux/slab.h +++ b/include/linux/slab.h | |||
| @@ -38,7 +38,6 @@ typedef struct kmem_cache kmem_cache_t; | |||
| 38 | #define SLAB_DEBUG_INITIAL 0x00000200UL /* Call constructor (as verifier) */ | 38 | #define SLAB_DEBUG_INITIAL 0x00000200UL /* Call constructor (as verifier) */ |
| 39 | #define SLAB_RED_ZONE 0x00000400UL /* Red zone objs in a cache */ | 39 | #define SLAB_RED_ZONE 0x00000400UL /* Red zone objs in a cache */ |
| 40 | #define SLAB_POISON 0x00000800UL /* Poison objects */ | 40 | #define SLAB_POISON 0x00000800UL /* Poison objects */ |
| 41 | #define SLAB_NO_REAP 0x00001000UL /* never reap from the cache */ | ||
| 42 | #define SLAB_HWCACHE_ALIGN 0x00002000UL /* align objs on a h/w cache lines */ | 41 | #define SLAB_HWCACHE_ALIGN 0x00002000UL /* align objs on a h/w cache lines */ |
| 43 | #define SLAB_CACHE_DMA 0x00004000UL /* use GFP_DMA memory */ | 42 | #define SLAB_CACHE_DMA 0x00004000UL /* use GFP_DMA memory */ |
| 44 | #define SLAB_MUST_HWCACHE_ALIGN 0x00008000UL /* force alignment */ | 43 | #define SLAB_MUST_HWCACHE_ALIGN 0x00008000UL /* force alignment */ |
| @@ -118,7 +117,7 @@ extern void *kzalloc(size_t, gfp_t); | |||
| 118 | */ | 117 | */ |
| 119 | static inline void *kcalloc(size_t n, size_t size, gfp_t flags) | 118 | static inline void *kcalloc(size_t n, size_t size, gfp_t flags) |
| 120 | { | 119 | { |
| 121 | if (n != 0 && size > INT_MAX / n) | 120 | if (n != 0 && size > ULONG_MAX / n) |
| 122 | return NULL; | 121 | return NULL; |
| 123 | return kzalloc(n * size, flags); | 122 | return kzalloc(n * size, flags); |
| 124 | } | 123 | } |
diff --git a/include/linux/smp.h b/include/linux/smp.h index 44153fdf73fc..d699a16b0cb2 100644 --- a/include/linux/smp.h +++ b/include/linux/smp.h | |||
| @@ -52,23 +52,12 @@ extern void smp_cpus_done(unsigned int max_cpus); | |||
| 52 | /* | 52 | /* |
| 53 | * Call a function on all other processors | 53 | * Call a function on all other processors |
| 54 | */ | 54 | */ |
| 55 | extern int smp_call_function (void (*func) (void *info), void *info, | 55 | int smp_call_function(void(*func)(void *info), void *info, int retry, int wait); |
| 56 | int retry, int wait); | ||
| 57 | 56 | ||
| 58 | /* | 57 | /* |
| 59 | * Call a function on all processors | 58 | * Call a function on all processors |
| 60 | */ | 59 | */ |
| 61 | static inline int on_each_cpu(void (*func) (void *info), void *info, | 60 | int on_each_cpu(void (*func) (void *info), void *info, int retry, int wait); |
| 62 | int retry, int wait) | ||
| 63 | { | ||
| 64 | int ret = 0; | ||
| 65 | |||
| 66 | preempt_disable(); | ||
| 67 | ret = smp_call_function(func, info, retry, wait); | ||
| 68 | func(info); | ||
| 69 | preempt_enable(); | ||
| 70 | return ret; | ||
| 71 | } | ||
| 72 | 61 | ||
| 73 | #define MSG_ALL_BUT_SELF 0x8000 /* Assume <32768 CPU's */ | 62 | #define MSG_ALL_BUT_SELF 0x8000 /* Assume <32768 CPU's */ |
| 74 | #define MSG_ALL 0x8001 | 63 | #define MSG_ALL 0x8001 |
| @@ -94,7 +83,13 @@ void smp_prepare_boot_cpu(void); | |||
| 94 | #define raw_smp_processor_id() 0 | 83 | #define raw_smp_processor_id() 0 |
| 95 | #define hard_smp_processor_id() 0 | 84 | #define hard_smp_processor_id() 0 |
| 96 | #define smp_call_function(func,info,retry,wait) ({ 0; }) | 85 | #define smp_call_function(func,info,retry,wait) ({ 0; }) |
| 97 | #define on_each_cpu(func,info,retry,wait) ({ func(info); 0; }) | 86 | #define on_each_cpu(func,info,retry,wait) \ |
| 87 | ({ \ | ||
| 88 | local_irq_disable(); \ | ||
| 89 | func(info); \ | ||
| 90 | local_irq_enable(); \ | ||
| 91 | 0; \ | ||
| 92 | }) | ||
| 98 | static inline void smp_send_reschedule(int cpu) { } | 93 | static inline void smp_send_reschedule(int cpu) { } |
| 99 | #define num_booting_cpus() 1 | 94 | #define num_booting_cpus() 1 |
| 100 | #define smp_prepare_boot_cpu() do {} while (0) | 95 | #define smp_prepare_boot_cpu() do {} while (0) |
diff --git a/include/linux/swap.h b/include/linux/swap.h index d572b19afb7d..12415dd94451 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h | |||
| @@ -172,9 +172,24 @@ extern int rotate_reclaimable_page(struct page *page); | |||
| 172 | extern void swap_setup(void); | 172 | extern void swap_setup(void); |
| 173 | 173 | ||
| 174 | /* linux/mm/vmscan.c */ | 174 | /* linux/mm/vmscan.c */ |
| 175 | extern int try_to_free_pages(struct zone **, gfp_t); | 175 | extern unsigned long try_to_free_pages(struct zone **, gfp_t); |
| 176 | extern int shrink_all_memory(int); | 176 | extern unsigned long shrink_all_memory(unsigned long nr_pages); |
| 177 | extern int vm_swappiness; | 177 | extern int vm_swappiness; |
| 178 | extern int remove_mapping(struct address_space *mapping, struct page *page); | ||
| 179 | |||
| 180 | /* possible outcome of pageout() */ | ||
| 181 | typedef enum { | ||
| 182 | /* failed to write page out, page is locked */ | ||
| 183 | PAGE_KEEP, | ||
| 184 | /* move page to the active list, page is locked */ | ||
| 185 | PAGE_ACTIVATE, | ||
| 186 | /* page has been sent to the disk successfully, page is unlocked */ | ||
| 187 | PAGE_SUCCESS, | ||
| 188 | /* page is clean and locked */ | ||
| 189 | PAGE_CLEAN, | ||
| 190 | } pageout_t; | ||
| 191 | |||
| 192 | extern pageout_t pageout(struct page *page, struct address_space *mapping); | ||
| 178 | 193 | ||
| 179 | #ifdef CONFIG_NUMA | 194 | #ifdef CONFIG_NUMA |
| 180 | extern int zone_reclaim_mode; | 195 | extern int zone_reclaim_mode; |
| @@ -188,25 +203,6 @@ static inline int zone_reclaim(struct zone *z, gfp_t mask, unsigned int order) | |||
| 188 | } | 203 | } |
| 189 | #endif | 204 | #endif |
| 190 | 205 | ||
| 191 | #ifdef CONFIG_MIGRATION | ||
| 192 | extern int isolate_lru_page(struct page *p); | ||
| 193 | extern int putback_lru_pages(struct list_head *l); | ||
| 194 | extern int migrate_page(struct page *, struct page *); | ||
| 195 | extern void migrate_page_copy(struct page *, struct page *); | ||
| 196 | extern int migrate_page_remove_references(struct page *, struct page *, int); | ||
| 197 | extern int migrate_pages(struct list_head *l, struct list_head *t, | ||
| 198 | struct list_head *moved, struct list_head *failed); | ||
| 199 | extern int fail_migrate_page(struct page *, struct page *); | ||
| 200 | #else | ||
| 201 | static inline int isolate_lru_page(struct page *p) { return -ENOSYS; } | ||
| 202 | static inline int putback_lru_pages(struct list_head *l) { return 0; } | ||
| 203 | static inline int migrate_pages(struct list_head *l, struct list_head *t, | ||
| 204 | struct list_head *moved, struct list_head *failed) { return -ENOSYS; } | ||
| 205 | /* Possible settings for the migrate_page() method in address_operations */ | ||
| 206 | #define migrate_page NULL | ||
| 207 | #define fail_migrate_page NULL | ||
| 208 | #endif | ||
| 209 | |||
| 210 | #ifdef CONFIG_MMU | 206 | #ifdef CONFIG_MMU |
| 211 | /* linux/mm/shmem.c */ | 207 | /* linux/mm/shmem.c */ |
| 212 | extern int shmem_unuse(swp_entry_t entry, struct page *page); | 208 | extern int shmem_unuse(swp_entry_t entry, struct page *page); |
diff --git a/kernel/fork.c b/kernel/fork.c index b373322ca497..9bd7b65ee418 100644 --- a/kernel/fork.c +++ b/kernel/fork.c | |||
| @@ -1534,6 +1534,12 @@ asmlinkage long sys_unshare(unsigned long unshare_flags) | |||
| 1534 | 1534 | ||
| 1535 | check_unshare_flags(&unshare_flags); | 1535 | check_unshare_flags(&unshare_flags); |
| 1536 | 1536 | ||
| 1537 | /* Return -EINVAL for all unsupported flags */ | ||
| 1538 | err = -EINVAL; | ||
| 1539 | if (unshare_flags & ~(CLONE_THREAD|CLONE_FS|CLONE_NEWNS|CLONE_SIGHAND| | ||
| 1540 | CLONE_VM|CLONE_FILES|CLONE_SYSVSEM)) | ||
| 1541 | goto bad_unshare_out; | ||
| 1542 | |||
| 1537 | if ((err = unshare_thread(unshare_flags))) | 1543 | if ((err = unshare_thread(unshare_flags))) |
| 1538 | goto bad_unshare_out; | 1544 | goto bad_unshare_out; |
| 1539 | if ((err = unshare_fs(unshare_flags, &new_fs))) | 1545 | if ((err = unshare_fs(unshare_flags, &new_fs))) |
diff --git a/kernel/sched.c b/kernel/sched.c index 4d46e90f59c3..6b6e0d70eb30 100644 --- a/kernel/sched.c +++ b/kernel/sched.c | |||
| @@ -707,12 +707,6 @@ static int recalc_task_prio(task_t *p, unsigned long long now) | |||
| 707 | DEF_TIMESLICE); | 707 | DEF_TIMESLICE); |
| 708 | } else { | 708 | } else { |
| 709 | /* | 709 | /* |
| 710 | * The lower the sleep avg a task has the more | ||
| 711 | * rapidly it will rise with sleep time. | ||
| 712 | */ | ||
| 713 | sleep_time *= (MAX_BONUS - CURRENT_BONUS(p)) ? : 1; | ||
| 714 | |||
| 715 | /* | ||
| 716 | * Tasks waking from uninterruptible sleep are | 710 | * Tasks waking from uninterruptible sleep are |
| 717 | * limited in their sleep_avg rise as they | 711 | * limited in their sleep_avg rise as they |
| 718 | * are likely to be waiting on I/O | 712 | * are likely to be waiting on I/O |
diff --git a/kernel/softirq.c b/kernel/softirq.c index ad3295cdded5..ec8fed42a86f 100644 --- a/kernel/softirq.c +++ b/kernel/softirq.c | |||
| @@ -16,6 +16,7 @@ | |||
| 16 | #include <linux/cpu.h> | 16 | #include <linux/cpu.h> |
| 17 | #include <linux/kthread.h> | 17 | #include <linux/kthread.h> |
| 18 | #include <linux/rcupdate.h> | 18 | #include <linux/rcupdate.h> |
| 19 | #include <linux/smp.h> | ||
| 19 | 20 | ||
| 20 | #include <asm/irq.h> | 21 | #include <asm/irq.h> |
| 21 | /* | 22 | /* |
| @@ -495,3 +496,22 @@ __init int spawn_ksoftirqd(void) | |||
| 495 | register_cpu_notifier(&cpu_nfb); | 496 | register_cpu_notifier(&cpu_nfb); |
| 496 | return 0; | 497 | return 0; |
| 497 | } | 498 | } |
| 499 | |||
| 500 | #ifdef CONFIG_SMP | ||
| 501 | /* | ||
| 502 | * Call a function on all processors | ||
| 503 | */ | ||
| 504 | int on_each_cpu(void (*func) (void *info), void *info, int retry, int wait) | ||
| 505 | { | ||
| 506 | int ret = 0; | ||
| 507 | |||
| 508 | preempt_disable(); | ||
| 509 | ret = smp_call_function(func, info, retry, wait); | ||
| 510 | local_irq_disable(); | ||
| 511 | func(info); | ||
| 512 | local_irq_enable(); | ||
| 513 | preempt_enable(); | ||
| 514 | return ret; | ||
| 515 | } | ||
| 516 | EXPORT_SYMBOL(on_each_cpu); | ||
| 517 | #endif | ||
diff --git a/lib/string.c b/lib/string.c index 037a48acedbb..b3c28a3f6332 100644 --- a/lib/string.c +++ b/lib/string.c | |||
| @@ -403,7 +403,6 @@ char *strpbrk(const char *cs, const char *ct) | |||
| 403 | } | 403 | } |
| 404 | return NULL; | 404 | return NULL; |
| 405 | } | 405 | } |
| 406 | EXPORT_SYMBOL(strpbrk); | ||
| 407 | #endif | 406 | #endif |
| 408 | 407 | ||
| 409 | #ifndef __HAVE_ARCH_STRSEP | 408 | #ifndef __HAVE_ARCH_STRSEP |
diff --git a/mm/Kconfig b/mm/Kconfig index a9cb80ae6409..bd80460360db 100644 --- a/mm/Kconfig +++ b/mm/Kconfig | |||
| @@ -137,5 +137,11 @@ config SPLIT_PTLOCK_CPUS | |||
| 137 | # support for page migration | 137 | # support for page migration |
| 138 | # | 138 | # |
| 139 | config MIGRATION | 139 | config MIGRATION |
| 140 | bool "Page migration" | ||
| 140 | def_bool y if NUMA || SPARSEMEM || DISCONTIGMEM | 141 | def_bool y if NUMA || SPARSEMEM || DISCONTIGMEM |
| 141 | depends on SWAP | 142 | depends on SWAP |
| 143 | help | ||
| 144 | Allows the migration of the physical location of pages of processes | ||
| 145 | while the virtual addresses are not changed. This is useful for | ||
| 146 | example on NUMA systems to put pages nearer to the processors accessing | ||
| 147 | the page. | ||
diff --git a/mm/Makefile b/mm/Makefile index 9aa03fa1dcc3..f10c753dce6d 100644 --- a/mm/Makefile +++ b/mm/Makefile | |||
| @@ -22,3 +22,5 @@ obj-$(CONFIG_SLOB) += slob.o | |||
| 22 | obj-$(CONFIG_SLAB) += slab.o | 22 | obj-$(CONFIG_SLAB) += slab.o |
| 23 | obj-$(CONFIG_MEMORY_HOTPLUG) += memory_hotplug.o | 23 | obj-$(CONFIG_MEMORY_HOTPLUG) += memory_hotplug.o |
| 24 | obj-$(CONFIG_FS_XIP) += filemap_xip.o | 24 | obj-$(CONFIG_FS_XIP) += filemap_xip.o |
| 25 | obj-$(CONFIG_MIGRATION) += migrate.o | ||
| 26 | |||
diff --git a/mm/filemap.c b/mm/filemap.c index 44da3d476994..e8f58f7dd7a5 100644 --- a/mm/filemap.c +++ b/mm/filemap.c | |||
| @@ -30,6 +30,8 @@ | |||
| 30 | #include <linux/security.h> | 30 | #include <linux/security.h> |
| 31 | #include <linux/syscalls.h> | 31 | #include <linux/syscalls.h> |
| 32 | #include "filemap.h" | 32 | #include "filemap.h" |
| 33 | #include "internal.h" | ||
| 34 | |||
| 33 | /* | 35 | /* |
| 34 | * FIXME: remove all knowledge of the buffer layer from the core VM | 36 | * FIXME: remove all knowledge of the buffer layer from the core VM |
| 35 | */ | 37 | */ |
diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 508707704d2c..ebad6bbb3501 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c | |||
| @@ -13,24 +13,48 @@ | |||
| 13 | #include <linux/pagemap.h> | 13 | #include <linux/pagemap.h> |
| 14 | #include <linux/mempolicy.h> | 14 | #include <linux/mempolicy.h> |
| 15 | #include <linux/cpuset.h> | 15 | #include <linux/cpuset.h> |
| 16 | #include <linux/mutex.h> | ||
| 16 | 17 | ||
| 17 | #include <asm/page.h> | 18 | #include <asm/page.h> |
| 18 | #include <asm/pgtable.h> | 19 | #include <asm/pgtable.h> |
| 19 | 20 | ||
| 20 | #include <linux/hugetlb.h> | 21 | #include <linux/hugetlb.h> |
| 22 | #include "internal.h" | ||
| 21 | 23 | ||
| 22 | const unsigned long hugetlb_zero = 0, hugetlb_infinity = ~0UL; | 24 | const unsigned long hugetlb_zero = 0, hugetlb_infinity = ~0UL; |
| 23 | static unsigned long nr_huge_pages, free_huge_pages; | 25 | static unsigned long nr_huge_pages, free_huge_pages, reserved_huge_pages; |
| 24 | unsigned long max_huge_pages; | 26 | unsigned long max_huge_pages; |
| 25 | static struct list_head hugepage_freelists[MAX_NUMNODES]; | 27 | static struct list_head hugepage_freelists[MAX_NUMNODES]; |
| 26 | static unsigned int nr_huge_pages_node[MAX_NUMNODES]; | 28 | static unsigned int nr_huge_pages_node[MAX_NUMNODES]; |
| 27 | static unsigned int free_huge_pages_node[MAX_NUMNODES]; | 29 | static unsigned int free_huge_pages_node[MAX_NUMNODES]; |
| 28 | |||
| 29 | /* | 30 | /* |
| 30 | * Protects updates to hugepage_freelists, nr_huge_pages, and free_huge_pages | 31 | * Protects updates to hugepage_freelists, nr_huge_pages, and free_huge_pages |
| 31 | */ | 32 | */ |
| 32 | static DEFINE_SPINLOCK(hugetlb_lock); | 33 | static DEFINE_SPINLOCK(hugetlb_lock); |
| 33 | 34 | ||
| 35 | static void clear_huge_page(struct page *page, unsigned long addr) | ||
| 36 | { | ||
| 37 | int i; | ||
| 38 | |||
| 39 | might_sleep(); | ||
| 40 | for (i = 0; i < (HPAGE_SIZE/PAGE_SIZE); i++) { | ||
| 41 | cond_resched(); | ||
| 42 | clear_user_highpage(page + i, addr); | ||
| 43 | } | ||
| 44 | } | ||
| 45 | |||
| 46 | static void copy_huge_page(struct page *dst, struct page *src, | ||
| 47 | unsigned long addr) | ||
| 48 | { | ||
| 49 | int i; | ||
| 50 | |||
| 51 | might_sleep(); | ||
| 52 | for (i = 0; i < HPAGE_SIZE/PAGE_SIZE; i++) { | ||
| 53 | cond_resched(); | ||
| 54 | copy_user_highpage(dst + i, src + i, addr + i*PAGE_SIZE); | ||
| 55 | } | ||
| 56 | } | ||
| 57 | |||
| 34 | static void enqueue_huge_page(struct page *page) | 58 | static void enqueue_huge_page(struct page *page) |
| 35 | { | 59 | { |
| 36 | int nid = page_to_nid(page); | 60 | int nid = page_to_nid(page); |
| @@ -64,57 +88,176 @@ static struct page *dequeue_huge_page(struct vm_area_struct *vma, | |||
| 64 | return page; | 88 | return page; |
| 65 | } | 89 | } |
| 66 | 90 | ||
| 67 | static struct page *alloc_fresh_huge_page(void) | 91 | static void free_huge_page(struct page *page) |
| 92 | { | ||
| 93 | BUG_ON(page_count(page)); | ||
| 94 | |||
| 95 | INIT_LIST_HEAD(&page->lru); | ||
| 96 | |||
| 97 | spin_lock(&hugetlb_lock); | ||
| 98 | enqueue_huge_page(page); | ||
| 99 | spin_unlock(&hugetlb_lock); | ||
| 100 | } | ||
| 101 | |||
| 102 | static int alloc_fresh_huge_page(void) | ||
| 68 | { | 103 | { |
| 69 | static int nid = 0; | 104 | static int nid = 0; |
| 70 | struct page *page; | 105 | struct page *page; |
| 71 | page = alloc_pages_node(nid, GFP_HIGHUSER|__GFP_COMP|__GFP_NOWARN, | 106 | page = alloc_pages_node(nid, GFP_HIGHUSER|__GFP_COMP|__GFP_NOWARN, |
| 72 | HUGETLB_PAGE_ORDER); | 107 | HUGETLB_PAGE_ORDER); |
| 73 | nid = (nid + 1) % num_online_nodes(); | 108 | nid = next_node(nid, node_online_map); |
| 109 | if (nid == MAX_NUMNODES) | ||
| 110 | nid = first_node(node_online_map); | ||
| 74 | if (page) { | 111 | if (page) { |
| 112 | page[1].lru.next = (void *)free_huge_page; /* dtor */ | ||
| 75 | spin_lock(&hugetlb_lock); | 113 | spin_lock(&hugetlb_lock); |
| 76 | nr_huge_pages++; | 114 | nr_huge_pages++; |
| 77 | nr_huge_pages_node[page_to_nid(page)]++; | 115 | nr_huge_pages_node[page_to_nid(page)]++; |
| 78 | spin_unlock(&hugetlb_lock); | 116 | spin_unlock(&hugetlb_lock); |
| 117 | put_page(page); /* free it into the hugepage allocator */ | ||
| 118 | return 1; | ||
| 79 | } | 119 | } |
| 80 | return page; | 120 | return 0; |
| 81 | } | 121 | } |
| 82 | 122 | ||
| 83 | void free_huge_page(struct page *page) | 123 | static struct page *alloc_huge_page(struct vm_area_struct *vma, |
| 124 | unsigned long addr) | ||
| 84 | { | 125 | { |
| 85 | BUG_ON(page_count(page)); | 126 | struct inode *inode = vma->vm_file->f_dentry->d_inode; |
| 127 | struct page *page; | ||
| 128 | int use_reserve = 0; | ||
| 129 | unsigned long idx; | ||
| 86 | 130 | ||
| 87 | INIT_LIST_HEAD(&page->lru); | 131 | spin_lock(&hugetlb_lock); |
| 88 | page[1].lru.next = NULL; /* reset dtor */ | 132 | |
| 133 | if (vma->vm_flags & VM_MAYSHARE) { | ||
| 134 | |||
| 135 | /* idx = radix tree index, i.e. offset into file in | ||
| 136 | * HPAGE_SIZE units */ | ||
| 137 | idx = ((addr - vma->vm_start) >> HPAGE_SHIFT) | ||
| 138 | + (vma->vm_pgoff >> (HPAGE_SHIFT - PAGE_SHIFT)); | ||
| 139 | |||
| 140 | /* The hugetlbfs specific inode info stores the number | ||
| 141 | * of "guaranteed available" (huge) pages. That is, | ||
| 142 | * the first 'prereserved_hpages' pages of the inode | ||
| 143 | * are either already instantiated, or have been | ||
| 144 | * pre-reserved (by hugetlb_reserve_for_inode()). Here | ||
| 145 | * we're in the process of instantiating the page, so | ||
| 146 | * we use this to determine whether to draw from the | ||
| 147 | * pre-reserved pool or the truly free pool. */ | ||
| 148 | if (idx < HUGETLBFS_I(inode)->prereserved_hpages) | ||
| 149 | use_reserve = 1; | ||
| 150 | } | ||
| 151 | |||
| 152 | if (!use_reserve) { | ||
| 153 | if (free_huge_pages <= reserved_huge_pages) | ||
| 154 | goto fail; | ||
| 155 | } else { | ||
| 156 | BUG_ON(reserved_huge_pages == 0); | ||
| 157 | reserved_huge_pages--; | ||
| 158 | } | ||
| 159 | |||
| 160 | page = dequeue_huge_page(vma, addr); | ||
| 161 | if (!page) | ||
| 162 | goto fail; | ||
| 163 | |||
| 164 | spin_unlock(&hugetlb_lock); | ||
| 165 | set_page_refcounted(page); | ||
| 166 | return page; | ||
| 167 | |||
| 168 | fail: | ||
| 169 | WARN_ON(use_reserve); /* reserved allocations shouldn't fail */ | ||
| 170 | spin_unlock(&hugetlb_lock); | ||
| 171 | return NULL; | ||
| 172 | } | ||
| 173 | |||
| 174 | /* hugetlb_extend_reservation() | ||
| 175 | * | ||
| 176 | * Ensure that at least 'atleast' hugepages are, and will remain, | ||
| 177 | * available to instantiate the first 'atleast' pages of the given | ||
| 178 | * inode. If the inode doesn't already have this many pages reserved | ||
| 179 | * or instantiated, set aside some hugepages in the reserved pool to | ||
| 180 | * satisfy later faults (or fail now if there aren't enough, rather | ||
| 181 | * than getting the SIGBUS later). | ||
| 182 | */ | ||
| 183 | int hugetlb_extend_reservation(struct hugetlbfs_inode_info *info, | ||
| 184 | unsigned long atleast) | ||
| 185 | { | ||
| 186 | struct inode *inode = &info->vfs_inode; | ||
| 187 | unsigned long change_in_reserve = 0; | ||
| 188 | int ret = 0; | ||
| 89 | 189 | ||
| 90 | spin_lock(&hugetlb_lock); | 190 | spin_lock(&hugetlb_lock); |
| 91 | enqueue_huge_page(page); | 191 | read_lock_irq(&inode->i_mapping->tree_lock); |
| 192 | |||
| 193 | if (info->prereserved_hpages >= atleast) | ||
| 194 | goto out; | ||
| 195 | |||
| 196 | /* Because we always call this on shared mappings, none of the | ||
| 197 | * pages beyond info->prereserved_hpages can have been | ||
| 198 | * instantiated, so we need to reserve all of them now. */ | ||
| 199 | change_in_reserve = atleast - info->prereserved_hpages; | ||
| 200 | |||
| 201 | if ((reserved_huge_pages + change_in_reserve) > free_huge_pages) { | ||
| 202 | ret = -ENOMEM; | ||
| 203 | goto out; | ||
| 204 | } | ||
| 205 | |||
| 206 | reserved_huge_pages += change_in_reserve; | ||
| 207 | info->prereserved_hpages = atleast; | ||
| 208 | |||
| 209 | out: | ||
| 210 | read_unlock_irq(&inode->i_mapping->tree_lock); | ||
| 92 | spin_unlock(&hugetlb_lock); | 211 | spin_unlock(&hugetlb_lock); |
| 212 | |||
| 213 | return ret; | ||
| 93 | } | 214 | } |
| 94 | 215 | ||
| 95 | struct page *alloc_huge_page(struct vm_area_struct *vma, unsigned long addr) | 216 | /* hugetlb_truncate_reservation() |
| 217 | * | ||
| 218 | * This returns pages reserved for the given inode to the general free | ||
| 219 | * hugepage pool. If the inode has any pages prereserved, but not | ||
| 220 | * instantiated, beyond offset (atmost << HPAGE_SIZE), then release | ||
| 221 | * them. | ||
| 222 | */ | ||
| 223 | void hugetlb_truncate_reservation(struct hugetlbfs_inode_info *info, | ||
| 224 | unsigned long atmost) | ||
| 96 | { | 225 | { |
| 226 | struct inode *inode = &info->vfs_inode; | ||
| 227 | struct address_space *mapping = inode->i_mapping; | ||
| 228 | unsigned long idx; | ||
| 229 | unsigned long change_in_reserve = 0; | ||
| 97 | struct page *page; | 230 | struct page *page; |
| 98 | int i; | ||
| 99 | 231 | ||
| 100 | spin_lock(&hugetlb_lock); | 232 | spin_lock(&hugetlb_lock); |
| 101 | page = dequeue_huge_page(vma, addr); | 233 | read_lock_irq(&inode->i_mapping->tree_lock); |
| 102 | if (!page) { | 234 | |
| 103 | spin_unlock(&hugetlb_lock); | 235 | if (info->prereserved_hpages <= atmost) |
| 104 | return NULL; | 236 | goto out; |
| 237 | |||
| 238 | /* Count pages which were reserved, but not instantiated, and | ||
| 239 | * which we can now release. */ | ||
| 240 | for (idx = atmost; idx < info->prereserved_hpages; idx++) { | ||
| 241 | page = radix_tree_lookup(&mapping->page_tree, idx); | ||
| 242 | if (!page) | ||
| 243 | /* Pages which are already instantiated can't | ||
| 244 | * be unreserved (and in fact have already | ||
| 245 | * been removed from the reserved pool) */ | ||
| 246 | change_in_reserve++; | ||
| 105 | } | 247 | } |
| 248 | |||
| 249 | BUG_ON(reserved_huge_pages < change_in_reserve); | ||
| 250 | reserved_huge_pages -= change_in_reserve; | ||
| 251 | info->prereserved_hpages = atmost; | ||
| 252 | |||
| 253 | out: | ||
| 254 | read_unlock_irq(&inode->i_mapping->tree_lock); | ||
| 106 | spin_unlock(&hugetlb_lock); | 255 | spin_unlock(&hugetlb_lock); |
| 107 | set_page_count(page, 1); | ||
| 108 | page[1].lru.next = (void *)free_huge_page; /* set dtor */ | ||
| 109 | for (i = 0; i < (HPAGE_SIZE/PAGE_SIZE); ++i) | ||
| 110 | clear_user_highpage(&page[i], addr); | ||
| 111 | return page; | ||
| 112 | } | 256 | } |
| 113 | 257 | ||
| 114 | static int __init hugetlb_init(void) | 258 | static int __init hugetlb_init(void) |
| 115 | { | 259 | { |
| 116 | unsigned long i; | 260 | unsigned long i; |
| 117 | struct page *page; | ||
| 118 | 261 | ||
| 119 | if (HPAGE_SHIFT == 0) | 262 | if (HPAGE_SHIFT == 0) |
| 120 | return 0; | 263 | return 0; |
| @@ -123,12 +266,8 @@ static int __init hugetlb_init(void) | |||
| 123 | INIT_LIST_HEAD(&hugepage_freelists[i]); | 266 | INIT_LIST_HEAD(&hugepage_freelists[i]); |
| 124 | 267 | ||
| 125 | for (i = 0; i < max_huge_pages; ++i) { | 268 | for (i = 0; i < max_huge_pages; ++i) { |
| 126 | page = alloc_fresh_huge_page(); | 269 | if (!alloc_fresh_huge_page()) |
| 127 | if (!page) | ||
| 128 | break; | 270 | break; |
| 129 | spin_lock(&hugetlb_lock); | ||
| 130 | enqueue_huge_page(page); | ||
| 131 | spin_unlock(&hugetlb_lock); | ||
| 132 | } | 271 | } |
| 133 | max_huge_pages = free_huge_pages = nr_huge_pages = i; | 272 | max_huge_pages = free_huge_pages = nr_huge_pages = i; |
| 134 | printk("Total HugeTLB memory allocated, %ld\n", free_huge_pages); | 273 | printk("Total HugeTLB memory allocated, %ld\n", free_huge_pages); |
| @@ -154,9 +293,9 @@ static void update_and_free_page(struct page *page) | |||
| 154 | page[i].flags &= ~(1 << PG_locked | 1 << PG_error | 1 << PG_referenced | | 293 | page[i].flags &= ~(1 << PG_locked | 1 << PG_error | 1 << PG_referenced | |
| 155 | 1 << PG_dirty | 1 << PG_active | 1 << PG_reserved | | 294 | 1 << PG_dirty | 1 << PG_active | 1 << PG_reserved | |
| 156 | 1 << PG_private | 1<< PG_writeback); | 295 | 1 << PG_private | 1<< PG_writeback); |
| 157 | set_page_count(&page[i], 0); | ||
| 158 | } | 296 | } |
| 159 | set_page_count(page, 1); | 297 | page[1].lru.next = NULL; |
| 298 | set_page_refcounted(page); | ||
| 160 | __free_pages(page, HUGETLB_PAGE_ORDER); | 299 | __free_pages(page, HUGETLB_PAGE_ORDER); |
| 161 | } | 300 | } |
| 162 | 301 | ||
| @@ -188,12 +327,8 @@ static inline void try_to_free_low(unsigned long count) | |||
| 188 | static unsigned long set_max_huge_pages(unsigned long count) | 327 | static unsigned long set_max_huge_pages(unsigned long count) |
| 189 | { | 328 | { |
| 190 | while (count > nr_huge_pages) { | 329 | while (count > nr_huge_pages) { |
| 191 | struct page *page = alloc_fresh_huge_page(); | 330 | if (!alloc_fresh_huge_page()) |
| 192 | if (!page) | ||
| 193 | return nr_huge_pages; | 331 | return nr_huge_pages; |
| 194 | spin_lock(&hugetlb_lock); | ||
| 195 | enqueue_huge_page(page); | ||
| 196 | spin_unlock(&hugetlb_lock); | ||
| 197 | } | 332 | } |
| 198 | if (count >= nr_huge_pages) | 333 | if (count >= nr_huge_pages) |
| 199 | return nr_huge_pages; | 334 | return nr_huge_pages; |
| @@ -225,9 +360,11 @@ int hugetlb_report_meminfo(char *buf) | |||
| 225 | return sprintf(buf, | 360 | return sprintf(buf, |
| 226 | "HugePages_Total: %5lu\n" | 361 | "HugePages_Total: %5lu\n" |
| 227 | "HugePages_Free: %5lu\n" | 362 | "HugePages_Free: %5lu\n" |
| 363 | "HugePages_Rsvd: %5lu\n" | ||
| 228 | "Hugepagesize: %5lu kB\n", | 364 | "Hugepagesize: %5lu kB\n", |
| 229 | nr_huge_pages, | 365 | nr_huge_pages, |
| 230 | free_huge_pages, | 366 | free_huge_pages, |
| 367 | reserved_huge_pages, | ||
| 231 | HPAGE_SIZE/1024); | 368 | HPAGE_SIZE/1024); |
| 232 | } | 369 | } |
| 233 | 370 | ||
| @@ -240,11 +377,6 @@ int hugetlb_report_node_meminfo(int nid, char *buf) | |||
| 240 | nid, free_huge_pages_node[nid]); | 377 | nid, free_huge_pages_node[nid]); |
| 241 | } | 378 | } |
| 242 | 379 | ||
| 243 | int is_hugepage_mem_enough(size_t size) | ||
| 244 | { | ||
| 245 | return (size + ~HPAGE_MASK)/HPAGE_SIZE <= free_huge_pages; | ||
| 246 | } | ||
| 247 | |||
| 248 | /* Return the number pages of memory we physically have, in PAGE_SIZE units. */ | 380 | /* Return the number pages of memory we physically have, in PAGE_SIZE units. */ |
| 249 | unsigned long hugetlb_total_pages(void) | 381 | unsigned long hugetlb_total_pages(void) |
| 250 | { | 382 | { |
| @@ -374,7 +506,7 @@ static int hugetlb_cow(struct mm_struct *mm, struct vm_area_struct *vma, | |||
| 374 | unsigned long address, pte_t *ptep, pte_t pte) | 506 | unsigned long address, pte_t *ptep, pte_t pte) |
| 375 | { | 507 | { |
| 376 | struct page *old_page, *new_page; | 508 | struct page *old_page, *new_page; |
| 377 | int i, avoidcopy; | 509 | int avoidcopy; |
| 378 | 510 | ||
| 379 | old_page = pte_page(pte); | 511 | old_page = pte_page(pte); |
| 380 | 512 | ||
| @@ -395,9 +527,7 @@ static int hugetlb_cow(struct mm_struct *mm, struct vm_area_struct *vma, | |||
| 395 | } | 527 | } |
| 396 | 528 | ||
| 397 | spin_unlock(&mm->page_table_lock); | 529 | spin_unlock(&mm->page_table_lock); |
| 398 | for (i = 0; i < HPAGE_SIZE/PAGE_SIZE; i++) | 530 | copy_huge_page(new_page, old_page, address); |
| 399 | copy_user_highpage(new_page + i, old_page + i, | ||
| 400 | address + i*PAGE_SIZE); | ||
| 401 | spin_lock(&mm->page_table_lock); | 531 | spin_lock(&mm->page_table_lock); |
| 402 | 532 | ||
| 403 | ptep = huge_pte_offset(mm, address & HPAGE_MASK); | 533 | ptep = huge_pte_offset(mm, address & HPAGE_MASK); |
| @@ -442,6 +572,7 @@ retry: | |||
| 442 | ret = VM_FAULT_OOM; | 572 | ret = VM_FAULT_OOM; |
| 443 | goto out; | 573 | goto out; |
| 444 | } | 574 | } |
| 575 | clear_huge_page(page, address); | ||
| 445 | 576 | ||
| 446 | if (vma->vm_flags & VM_SHARED) { | 577 | if (vma->vm_flags & VM_SHARED) { |
| 447 | int err; | 578 | int err; |
| @@ -496,14 +627,24 @@ int hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma, | |||
| 496 | pte_t *ptep; | 627 | pte_t *ptep; |
| 497 | pte_t entry; | 628 | pte_t entry; |
| 498 | int ret; | 629 | int ret; |
| 630 | static DEFINE_MUTEX(hugetlb_instantiation_mutex); | ||
| 499 | 631 | ||
| 500 | ptep = huge_pte_alloc(mm, address); | 632 | ptep = huge_pte_alloc(mm, address); |
| 501 | if (!ptep) | 633 | if (!ptep) |
| 502 | return VM_FAULT_OOM; | 634 | return VM_FAULT_OOM; |
| 503 | 635 | ||
| 636 | /* | ||
| 637 | * Serialize hugepage allocation and instantiation, so that we don't | ||
| 638 | * get spurious allocation failures if two CPUs race to instantiate | ||
| 639 | * the same page in the page cache. | ||
| 640 | */ | ||
| 641 | mutex_lock(&hugetlb_instantiation_mutex); | ||
| 504 | entry = *ptep; | 642 | entry = *ptep; |
| 505 | if (pte_none(entry)) | 643 | if (pte_none(entry)) { |
| 506 | return hugetlb_no_page(mm, vma, address, ptep, write_access); | 644 | ret = hugetlb_no_page(mm, vma, address, ptep, write_access); |
| 645 | mutex_unlock(&hugetlb_instantiation_mutex); | ||
| 646 | return ret; | ||
| 647 | } | ||
| 507 | 648 | ||
| 508 | ret = VM_FAULT_MINOR; | 649 | ret = VM_FAULT_MINOR; |
| 509 | 650 | ||
| @@ -513,6 +654,7 @@ int hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma, | |||
| 513 | if (write_access && !pte_write(entry)) | 654 | if (write_access && !pte_write(entry)) |
| 514 | ret = hugetlb_cow(mm, vma, address, ptep, entry); | 655 | ret = hugetlb_cow(mm, vma, address, ptep, entry); |
| 515 | spin_unlock(&mm->page_table_lock); | 656 | spin_unlock(&mm->page_table_lock); |
| 657 | mutex_unlock(&hugetlb_instantiation_mutex); | ||
| 516 | 658 | ||
| 517 | return ret; | 659 | return ret; |
| 518 | } | 660 | } |
| @@ -521,10 +663,10 @@ int follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma, | |||
| 521 | struct page **pages, struct vm_area_struct **vmas, | 663 | struct page **pages, struct vm_area_struct **vmas, |
| 522 | unsigned long *position, int *length, int i) | 664 | unsigned long *position, int *length, int i) |
| 523 | { | 665 | { |
| 524 | unsigned long vpfn, vaddr = *position; | 666 | unsigned long pfn_offset; |
| 667 | unsigned long vaddr = *position; | ||
| 525 | int remainder = *length; | 668 | int remainder = *length; |
| 526 | 669 | ||
| 527 | vpfn = vaddr/PAGE_SIZE; | ||
| 528 | spin_lock(&mm->page_table_lock); | 670 | spin_lock(&mm->page_table_lock); |
| 529 | while (vaddr < vma->vm_end && remainder) { | 671 | while (vaddr < vma->vm_end && remainder) { |
| 530 | pte_t *pte; | 672 | pte_t *pte; |
| @@ -552,19 +694,28 @@ int follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma, | |||
| 552 | break; | 694 | break; |
| 553 | } | 695 | } |
| 554 | 696 | ||
| 555 | if (pages) { | 697 | pfn_offset = (vaddr & ~HPAGE_MASK) >> PAGE_SHIFT; |
| 556 | page = &pte_page(*pte)[vpfn % (HPAGE_SIZE/PAGE_SIZE)]; | 698 | page = pte_page(*pte); |
| 557 | get_page(page); | 699 | same_page: |
| 558 | pages[i] = page; | 700 | get_page(page); |
| 559 | } | 701 | if (pages) |
| 702 | pages[i] = page + pfn_offset; | ||
| 560 | 703 | ||
| 561 | if (vmas) | 704 | if (vmas) |
| 562 | vmas[i] = vma; | 705 | vmas[i] = vma; |
| 563 | 706 | ||
| 564 | vaddr += PAGE_SIZE; | 707 | vaddr += PAGE_SIZE; |
| 565 | ++vpfn; | 708 | ++pfn_offset; |
| 566 | --remainder; | 709 | --remainder; |
| 567 | ++i; | 710 | ++i; |
| 711 | if (vaddr < vma->vm_end && remainder && | ||
| 712 | pfn_offset < HPAGE_SIZE/PAGE_SIZE) { | ||
| 713 | /* | ||
| 714 | * We use pfn_offset to avoid touching the pageframes | ||
| 715 | * of this compound page. | ||
| 716 | */ | ||
| 717 | goto same_page; | ||
| 718 | } | ||
| 568 | } | 719 | } |
| 569 | spin_unlock(&mm->page_table_lock); | 720 | spin_unlock(&mm->page_table_lock); |
| 570 | *length = remainder; | 721 | *length = remainder; |
| @@ -572,3 +723,32 @@ int follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma, | |||
| 572 | 723 | ||
| 573 | return i; | 724 | return i; |
| 574 | } | 725 | } |
| 726 | |||
| 727 | void hugetlb_change_protection(struct vm_area_struct *vma, | ||
| 728 | unsigned long address, unsigned long end, pgprot_t newprot) | ||
| 729 | { | ||
| 730 | struct mm_struct *mm = vma->vm_mm; | ||
| 731 | unsigned long start = address; | ||
| 732 | pte_t *ptep; | ||
| 733 | pte_t pte; | ||
| 734 | |||
| 735 | BUG_ON(address >= end); | ||
| 736 | flush_cache_range(vma, address, end); | ||
| 737 | |||
| 738 | spin_lock(&mm->page_table_lock); | ||
| 739 | for (; address < end; address += HPAGE_SIZE) { | ||
| 740 | ptep = huge_pte_offset(mm, address); | ||
| 741 | if (!ptep) | ||
| 742 | continue; | ||
| 743 | if (!pte_none(*ptep)) { | ||
| 744 | pte = huge_ptep_get_and_clear(mm, address, ptep); | ||
| 745 | pte = pte_mkhuge(pte_modify(pte, newprot)); | ||
| 746 | set_huge_pte_at(mm, address, ptep, pte); | ||
| 747 | lazy_mmu_prot_update(pte); | ||
| 748 | } | ||
| 749 | } | ||
| 750 | spin_unlock(&mm->page_table_lock); | ||
| 751 | |||
| 752 | flush_tlb_range(vma, start, end); | ||
| 753 | } | ||
| 754 | |||
diff --git a/mm/internal.h b/mm/internal.h index 17256bb2f4ef..d20e3cc4aef0 100644 --- a/mm/internal.h +++ b/mm/internal.h | |||
| @@ -8,23 +8,33 @@ | |||
| 8 | * as published by the Free Software Foundation; either version | 8 | * as published by the Free Software Foundation; either version |
| 9 | * 2 of the License, or (at your option) any later version. | 9 | * 2 of the License, or (at your option) any later version. |
| 10 | */ | 10 | */ |
| 11 | #ifndef __MM_INTERNAL_H | ||
| 12 | #define __MM_INTERNAL_H | ||
| 11 | 13 | ||
| 12 | static inline void set_page_refs(struct page *page, int order) | 14 | #include <linux/mm.h> |
| 15 | |||
| 16 | static inline void set_page_count(struct page *page, int v) | ||
| 17 | { | ||
| 18 | atomic_set(&page->_count, v); | ||
| 19 | } | ||
| 20 | |||
| 21 | /* | ||
| 22 | * Turn a non-refcounted page (->_count == 0) into refcounted with | ||
| 23 | * a count of one. | ||
| 24 | */ | ||
| 25 | static inline void set_page_refcounted(struct page *page) | ||
| 13 | { | 26 | { |
| 14 | #ifdef CONFIG_MMU | 27 | BUG_ON(PageCompound(page) && page_private(page) != (unsigned long)page); |
| 28 | BUG_ON(atomic_read(&page->_count)); | ||
| 15 | set_page_count(page, 1); | 29 | set_page_count(page, 1); |
| 16 | #else | 30 | } |
| 17 | int i; | ||
| 18 | 31 | ||
| 19 | /* | 32 | static inline void __put_page(struct page *page) |
| 20 | * We need to reference all the pages for this order, otherwise if | 33 | { |
| 21 | * anyone accesses one of the pages with (get/put) it will be freed. | 34 | atomic_dec(&page->_count); |
| 22 | * - eg: access_process_vm() | ||
| 23 | */ | ||
| 24 | for (i = 0; i < (1 << order); i++) | ||
| 25 | set_page_count(page + i, 1); | ||
| 26 | #endif /* CONFIG_MMU */ | ||
| 27 | } | 35 | } |
| 28 | 36 | ||
| 29 | extern void fastcall __init __free_pages_bootmem(struct page *page, | 37 | extern void fastcall __init __free_pages_bootmem(struct page *page, |
| 30 | unsigned int order); | 38 | unsigned int order); |
| 39 | |||
| 40 | #endif | ||
diff --git a/mm/memory.c b/mm/memory.c index 85e80a57db29..80c3fb370f91 100644 --- a/mm/memory.c +++ b/mm/memory.c | |||
| @@ -277,7 +277,7 @@ void free_pgtables(struct mmu_gather **tlb, struct vm_area_struct *vma, | |||
| 277 | anon_vma_unlink(vma); | 277 | anon_vma_unlink(vma); |
| 278 | unlink_file_vma(vma); | 278 | unlink_file_vma(vma); |
| 279 | 279 | ||
| 280 | if (is_hugepage_only_range(vma->vm_mm, addr, HPAGE_SIZE)) { | 280 | if (is_vm_hugetlb_page(vma)) { |
| 281 | hugetlb_free_pgd_range(tlb, addr, vma->vm_end, | 281 | hugetlb_free_pgd_range(tlb, addr, vma->vm_end, |
| 282 | floor, next? next->vm_start: ceiling); | 282 | floor, next? next->vm_start: ceiling); |
| 283 | } else { | 283 | } else { |
| @@ -285,8 +285,7 @@ void free_pgtables(struct mmu_gather **tlb, struct vm_area_struct *vma, | |||
| 285 | * Optimization: gather nearby vmas into one call down | 285 | * Optimization: gather nearby vmas into one call down |
| 286 | */ | 286 | */ |
| 287 | while (next && next->vm_start <= vma->vm_end + PMD_SIZE | 287 | while (next && next->vm_start <= vma->vm_end + PMD_SIZE |
| 288 | && !is_hugepage_only_range(vma->vm_mm, next->vm_start, | 288 | && !is_vm_hugetlb_page(next)) { |
| 289 | HPAGE_SIZE)) { | ||
| 290 | vma = next; | 289 | vma = next; |
| 291 | next = vma->vm_next; | 290 | next = vma->vm_next; |
| 292 | anon_vma_unlink(vma); | 291 | anon_vma_unlink(vma); |
| @@ -388,7 +387,7 @@ struct page *vm_normal_page(struct vm_area_struct *vma, unsigned long addr, pte_ | |||
| 388 | { | 387 | { |
| 389 | unsigned long pfn = pte_pfn(pte); | 388 | unsigned long pfn = pte_pfn(pte); |
| 390 | 389 | ||
| 391 | if (vma->vm_flags & VM_PFNMAP) { | 390 | if (unlikely(vma->vm_flags & VM_PFNMAP)) { |
| 392 | unsigned long off = (addr - vma->vm_start) >> PAGE_SHIFT; | 391 | unsigned long off = (addr - vma->vm_start) >> PAGE_SHIFT; |
| 393 | if (pfn == vma->vm_pgoff + off) | 392 | if (pfn == vma->vm_pgoff + off) |
| 394 | return NULL; | 393 | return NULL; |
| @@ -396,18 +395,12 @@ struct page *vm_normal_page(struct vm_area_struct *vma, unsigned long addr, pte_ | |||
| 396 | return NULL; | 395 | return NULL; |
| 397 | } | 396 | } |
| 398 | 397 | ||
| 399 | /* | 398 | #ifdef CONFIG_DEBUG_VM |
| 400 | * Add some anal sanity checks for now. Eventually, | ||
| 401 | * we should just do "return pfn_to_page(pfn)", but | ||
| 402 | * in the meantime we check that we get a valid pfn, | ||
| 403 | * and that the resulting page looks ok. | ||
| 404 | * | ||
| 405 | * Remove this test eventually! | ||
| 406 | */ | ||
| 407 | if (unlikely(!pfn_valid(pfn))) { | 399 | if (unlikely(!pfn_valid(pfn))) { |
| 408 | print_bad_pte(vma, pte, addr); | 400 | print_bad_pte(vma, pte, addr); |
| 409 | return NULL; | 401 | return NULL; |
| 410 | } | 402 | } |
| 403 | #endif | ||
| 411 | 404 | ||
| 412 | /* | 405 | /* |
| 413 | * NOTE! We still have PageReserved() pages in the page | 406 | * NOTE! We still have PageReserved() pages in the page |
| @@ -1221,9 +1214,7 @@ out: | |||
| 1221 | * The page has to be a nice clean _individual_ kernel allocation. | 1214 | * The page has to be a nice clean _individual_ kernel allocation. |
| 1222 | * If you allocate a compound page, you need to have marked it as | 1215 | * If you allocate a compound page, you need to have marked it as |
| 1223 | * such (__GFP_COMP), or manually just split the page up yourself | 1216 | * such (__GFP_COMP), or manually just split the page up yourself |
| 1224 | * (which is mainly an issue of doing "set_page_count(page, 1)" for | 1217 | * (see split_page()). |
| 1225 | * each sub-page, and then freeing them one by one when you free | ||
| 1226 | * them rather than freeing it as a compound page). | ||
| 1227 | * | 1218 | * |
| 1228 | * NOTE! Traditionally this was done with "remap_pfn_range()" which | 1219 | * NOTE! Traditionally this was done with "remap_pfn_range()" which |
| 1229 | * took an arbitrary page protection parameter. This doesn't allow | 1220 | * took an arbitrary page protection parameter. This doesn't allow |
diff --git a/mm/mempolicy.c b/mm/mempolicy.c index b21869a39f0b..e93cc740c22b 100644 --- a/mm/mempolicy.c +++ b/mm/mempolicy.c | |||
| @@ -86,6 +86,7 @@ | |||
| 86 | #include <linux/swap.h> | 86 | #include <linux/swap.h> |
| 87 | #include <linux/seq_file.h> | 87 | #include <linux/seq_file.h> |
| 88 | #include <linux/proc_fs.h> | 88 | #include <linux/proc_fs.h> |
| 89 | #include <linux/migrate.h> | ||
| 89 | 90 | ||
| 90 | #include <asm/tlbflush.h> | 91 | #include <asm/tlbflush.h> |
| 91 | #include <asm/uaccess.h> | 92 | #include <asm/uaccess.h> |
| @@ -95,11 +96,8 @@ | |||
| 95 | #define MPOL_MF_INVERT (MPOL_MF_INTERNAL << 1) /* Invert check for nodemask */ | 96 | #define MPOL_MF_INVERT (MPOL_MF_INTERNAL << 1) /* Invert check for nodemask */ |
| 96 | #define MPOL_MF_STATS (MPOL_MF_INTERNAL << 2) /* Gather statistics */ | 97 | #define MPOL_MF_STATS (MPOL_MF_INTERNAL << 2) /* Gather statistics */ |
| 97 | 98 | ||
| 98 | /* The number of pages to migrate per call to migrate_pages() */ | 99 | static struct kmem_cache *policy_cache; |
| 99 | #define MIGRATE_CHUNK_SIZE 256 | 100 | static struct kmem_cache *sn_cache; |
| 100 | |||
| 101 | static kmem_cache_t *policy_cache; | ||
| 102 | static kmem_cache_t *sn_cache; | ||
| 103 | 101 | ||
| 104 | #define PDprintk(fmt...) | 102 | #define PDprintk(fmt...) |
| 105 | 103 | ||
| @@ -331,17 +329,10 @@ check_range(struct mm_struct *mm, unsigned long start, unsigned long end, | |||
| 331 | struct vm_area_struct *first, *vma, *prev; | 329 | struct vm_area_struct *first, *vma, *prev; |
| 332 | 330 | ||
| 333 | if (flags & (MPOL_MF_MOVE | MPOL_MF_MOVE_ALL)) { | 331 | if (flags & (MPOL_MF_MOVE | MPOL_MF_MOVE_ALL)) { |
| 334 | /* Must have swap device for migration */ | ||
| 335 | if (nr_swap_pages <= 0) | ||
| 336 | return ERR_PTR(-ENODEV); | ||
| 337 | 332 | ||
| 338 | /* | 333 | err = migrate_prep(); |
| 339 | * Clear the LRU lists so pages can be isolated. | 334 | if (err) |
| 340 | * Note that pages may be moved off the LRU after we have | 335 | return ERR_PTR(err); |
| 341 | * drained them. Those pages will fail to migrate like other | ||
| 342 | * pages that may be busy. | ||
| 343 | */ | ||
| 344 | lru_add_drain_all(); | ||
| 345 | } | 336 | } |
| 346 | 337 | ||
| 347 | first = find_vma(mm, start); | 338 | first = find_vma(mm, start); |
| @@ -550,92 +541,18 @@ long do_get_mempolicy(int *policy, nodemask_t *nmask, | |||
| 550 | return err; | 541 | return err; |
| 551 | } | 542 | } |
| 552 | 543 | ||
| 544 | #ifdef CONFIG_MIGRATION | ||
| 553 | /* | 545 | /* |
| 554 | * page migration | 546 | * page migration |
| 555 | */ | 547 | */ |
| 556 | |||
| 557 | static void migrate_page_add(struct page *page, struct list_head *pagelist, | 548 | static void migrate_page_add(struct page *page, struct list_head *pagelist, |
| 558 | unsigned long flags) | 549 | unsigned long flags) |
| 559 | { | 550 | { |
| 560 | /* | 551 | /* |
| 561 | * Avoid migrating a page that is shared with others. | 552 | * Avoid migrating a page that is shared with others. |
| 562 | */ | 553 | */ |
| 563 | if ((flags & MPOL_MF_MOVE_ALL) || page_mapcount(page) == 1) { | 554 | if ((flags & MPOL_MF_MOVE_ALL) || page_mapcount(page) == 1) |
| 564 | if (isolate_lru_page(page)) | 555 | isolate_lru_page(page, pagelist); |
| 565 | list_add_tail(&page->lru, pagelist); | ||
| 566 | } | ||
| 567 | } | ||
| 568 | |||
| 569 | /* | ||
| 570 | * Migrate the list 'pagelist' of pages to a certain destination. | ||
| 571 | * | ||
| 572 | * Specify destination with either non-NULL vma or dest_node >= 0 | ||
| 573 | * Return the number of pages not migrated or error code | ||
| 574 | */ | ||
| 575 | static int migrate_pages_to(struct list_head *pagelist, | ||
| 576 | struct vm_area_struct *vma, int dest) | ||
| 577 | { | ||
| 578 | LIST_HEAD(newlist); | ||
| 579 | LIST_HEAD(moved); | ||
| 580 | LIST_HEAD(failed); | ||
| 581 | int err = 0; | ||
| 582 | unsigned long offset = 0; | ||
| 583 | int nr_pages; | ||
| 584 | struct page *page; | ||
| 585 | struct list_head *p; | ||
| 586 | |||
| 587 | redo: | ||
| 588 | nr_pages = 0; | ||
| 589 | list_for_each(p, pagelist) { | ||
| 590 | if (vma) { | ||
| 591 | /* | ||
| 592 | * The address passed to alloc_page_vma is used to | ||
| 593 | * generate the proper interleave behavior. We fake | ||
| 594 | * the address here by an increasing offset in order | ||
| 595 | * to get the proper distribution of pages. | ||
| 596 | * | ||
| 597 | * No decision has been made as to which page | ||
| 598 | * a certain old page is moved to so we cannot | ||
| 599 | * specify the correct address. | ||
| 600 | */ | ||
| 601 | page = alloc_page_vma(GFP_HIGHUSER, vma, | ||
| 602 | offset + vma->vm_start); | ||
| 603 | offset += PAGE_SIZE; | ||
| 604 | } | ||
| 605 | else | ||
| 606 | page = alloc_pages_node(dest, GFP_HIGHUSER, 0); | ||
| 607 | |||
| 608 | if (!page) { | ||
| 609 | err = -ENOMEM; | ||
| 610 | goto out; | ||
| 611 | } | ||
| 612 | list_add_tail(&page->lru, &newlist); | ||
| 613 | nr_pages++; | ||
| 614 | if (nr_pages > MIGRATE_CHUNK_SIZE) | ||
| 615 | break; | ||
| 616 | } | ||
| 617 | err = migrate_pages(pagelist, &newlist, &moved, &failed); | ||
| 618 | |||
| 619 | putback_lru_pages(&moved); /* Call release pages instead ?? */ | ||
| 620 | |||
| 621 | if (err >= 0 && list_empty(&newlist) && !list_empty(pagelist)) | ||
| 622 | goto redo; | ||
| 623 | out: | ||
| 624 | /* Return leftover allocated pages */ | ||
| 625 | while (!list_empty(&newlist)) { | ||
| 626 | page = list_entry(newlist.next, struct page, lru); | ||
| 627 | list_del(&page->lru); | ||
| 628 | __free_page(page); | ||
| 629 | } | ||
| 630 | list_splice(&failed, pagelist); | ||
| 631 | if (err < 0) | ||
| 632 | return err; | ||
| 633 | |||
| 634 | /* Calculate number of leftover pages */ | ||
| 635 | nr_pages = 0; | ||
| 636 | list_for_each(p, pagelist) | ||
| 637 | nr_pages++; | ||
| 638 | return nr_pages; | ||
| 639 | } | 556 | } |
| 640 | 557 | ||
| 641 | /* | 558 | /* |
| @@ -742,8 +659,23 @@ int do_migrate_pages(struct mm_struct *mm, | |||
| 742 | if (err < 0) | 659 | if (err < 0) |
| 743 | return err; | 660 | return err; |
| 744 | return busy; | 661 | return busy; |
| 662 | |||
| 745 | } | 663 | } |
| 746 | 664 | ||
| 665 | #else | ||
| 666 | |||
| 667 | static void migrate_page_add(struct page *page, struct list_head *pagelist, | ||
| 668 | unsigned long flags) | ||
| 669 | { | ||
| 670 | } | ||
| 671 | |||
| 672 | int do_migrate_pages(struct mm_struct *mm, | ||
| 673 | const nodemask_t *from_nodes, const nodemask_t *to_nodes, int flags) | ||
| 674 | { | ||
| 675 | return -ENOSYS; | ||
| 676 | } | ||
| 677 | #endif | ||
| 678 | |||
| 747 | long do_mbind(unsigned long start, unsigned long len, | 679 | long do_mbind(unsigned long start, unsigned long len, |
| 748 | unsigned long mode, nodemask_t *nmask, unsigned long flags) | 680 | unsigned long mode, nodemask_t *nmask, unsigned long flags) |
| 749 | { | 681 | { |
| @@ -808,6 +740,7 @@ long do_mbind(unsigned long start, unsigned long len, | |||
| 808 | if (!err && nr_failed && (flags & MPOL_MF_STRICT)) | 740 | if (!err && nr_failed && (flags & MPOL_MF_STRICT)) |
| 809 | err = -EIO; | 741 | err = -EIO; |
| 810 | } | 742 | } |
| 743 | |||
| 811 | if (!list_empty(&pagelist)) | 744 | if (!list_empty(&pagelist)) |
| 812 | putback_lru_pages(&pagelist); | 745 | putback_lru_pages(&pagelist); |
| 813 | 746 | ||
diff --git a/mm/mempool.c b/mm/mempool.c index 1a99b80480d3..f71893ed3543 100644 --- a/mm/mempool.c +++ b/mm/mempool.c | |||
| @@ -278,14 +278,14 @@ EXPORT_SYMBOL(mempool_free); | |||
| 278 | */ | 278 | */ |
| 279 | void *mempool_alloc_slab(gfp_t gfp_mask, void *pool_data) | 279 | void *mempool_alloc_slab(gfp_t gfp_mask, void *pool_data) |
| 280 | { | 280 | { |
| 281 | kmem_cache_t *mem = (kmem_cache_t *) pool_data; | 281 | struct kmem_cache *mem = pool_data; |
| 282 | return kmem_cache_alloc(mem, gfp_mask); | 282 | return kmem_cache_alloc(mem, gfp_mask); |
| 283 | } | 283 | } |
| 284 | EXPORT_SYMBOL(mempool_alloc_slab); | 284 | EXPORT_SYMBOL(mempool_alloc_slab); |
| 285 | 285 | ||
| 286 | void mempool_free_slab(void *element, void *pool_data) | 286 | void mempool_free_slab(void *element, void *pool_data) |
| 287 | { | 287 | { |
| 288 | kmem_cache_t *mem = (kmem_cache_t *) pool_data; | 288 | struct kmem_cache *mem = pool_data; |
| 289 | kmem_cache_free(mem, element); | 289 | kmem_cache_free(mem, element); |
| 290 | } | 290 | } |
| 291 | EXPORT_SYMBOL(mempool_free_slab); | 291 | EXPORT_SYMBOL(mempool_free_slab); |
diff --git a/mm/migrate.c b/mm/migrate.c new file mode 100644 index 000000000000..09f6e4aa87fc --- /dev/null +++ b/mm/migrate.c | |||
| @@ -0,0 +1,655 @@ | |||
| 1 | /* | ||
| 2 | * Memory Migration functionality - linux/mm/migration.c | ||
| 3 | * | ||
| 4 | * Copyright (C) 2006 Silicon Graphics, Inc., Christoph Lameter | ||
| 5 | * | ||
| 6 | * Page migration was first developed in the context of the memory hotplug | ||
| 7 | * project. The main authors of the migration code are: | ||
| 8 | * | ||
| 9 | * IWAMOTO Toshihiro <iwamoto@valinux.co.jp> | ||
| 10 | * Hirokazu Takahashi <taka@valinux.co.jp> | ||
| 11 | * Dave Hansen <haveblue@us.ibm.com> | ||
| 12 | * Christoph Lameter <clameter@sgi.com> | ||
| 13 | */ | ||
| 14 | |||
| 15 | #include <linux/migrate.h> | ||
| 16 | #include <linux/module.h> | ||
| 17 | #include <linux/swap.h> | ||
| 18 | #include <linux/pagemap.h> | ||
| 19 | #include <linux/buffer_head.h> /* for try_to_release_page(), | ||
| 20 | buffer_heads_over_limit */ | ||
| 21 | #include <linux/mm_inline.h> | ||
| 22 | #include <linux/pagevec.h> | ||
| 23 | #include <linux/rmap.h> | ||
| 24 | #include <linux/topology.h> | ||
| 25 | #include <linux/cpu.h> | ||
| 26 | #include <linux/cpuset.h> | ||
| 27 | #include <linux/swapops.h> | ||
| 28 | |||
| 29 | #include "internal.h" | ||
| 30 | |||
| 31 | #include "internal.h" | ||
| 32 | |||
| 33 | /* The maximum number of pages to take off the LRU for migration */ | ||
| 34 | #define MIGRATE_CHUNK_SIZE 256 | ||
| 35 | |||
| 36 | #define lru_to_page(_head) (list_entry((_head)->prev, struct page, lru)) | ||
| 37 | |||
| 38 | /* | ||
| 39 | * Isolate one page from the LRU lists. If successful put it onto | ||
| 40 | * the indicated list with elevated page count. | ||
| 41 | * | ||
| 42 | * Result: | ||
| 43 | * -EBUSY: page not on LRU list | ||
| 44 | * 0: page removed from LRU list and added to the specified list. | ||
| 45 | */ | ||
| 46 | int isolate_lru_page(struct page *page, struct list_head *pagelist) | ||
| 47 | { | ||
| 48 | int ret = -EBUSY; | ||
| 49 | |||
| 50 | if (PageLRU(page)) { | ||
| 51 | struct zone *zone = page_zone(page); | ||
| 52 | |||
| 53 | spin_lock_irq(&zone->lru_lock); | ||
| 54 | if (PageLRU(page)) { | ||
| 55 | ret = 0; | ||
| 56 | get_page(page); | ||
| 57 | ClearPageLRU(page); | ||
| 58 | if (PageActive(page)) | ||
| 59 | del_page_from_active_list(zone, page); | ||
| 60 | else | ||
| 61 | del_page_from_inactive_list(zone, page); | ||
| 62 | list_add_tail(&page->lru, pagelist); | ||
| 63 | } | ||
| 64 | spin_unlock_irq(&zone->lru_lock); | ||
| 65 | } | ||
| 66 | return ret; | ||
| 67 | } | ||
| 68 | |||
| 69 | /* | ||
| 70 | * migrate_prep() needs to be called after we have compiled the list of pages | ||
| 71 | * to be migrated using isolate_lru_page() but before we begin a series of calls | ||
| 72 | * to migrate_pages(). | ||
| 73 | */ | ||
| 74 | int migrate_prep(void) | ||
| 75 | { | ||
| 76 | /* Must have swap device for migration */ | ||
| 77 | if (nr_swap_pages <= 0) | ||
| 78 | return -ENODEV; | ||
| 79 | |||
| 80 | /* | ||
| 81 | * Clear the LRU lists so pages can be isolated. | ||
| 82 | * Note that pages may be moved off the LRU after we have | ||
| 83 | * drained them. Those pages will fail to migrate like other | ||
| 84 | * pages that may be busy. | ||
| 85 | */ | ||
| 86 | lru_add_drain_all(); | ||
| 87 | |||
| 88 | return 0; | ||
| 89 | } | ||
| 90 | |||
| 91 | static inline void move_to_lru(struct page *page) | ||
| 92 | { | ||
| 93 | list_del(&page->lru); | ||
| 94 | if (PageActive(page)) { | ||
| 95 | /* | ||
| 96 | * lru_cache_add_active checks that | ||
| 97 | * the PG_active bit is off. | ||
| 98 | */ | ||
| 99 | ClearPageActive(page); | ||
| 100 | lru_cache_add_active(page); | ||
| 101 | } else { | ||
| 102 | lru_cache_add(page); | ||
| 103 | } | ||
| 104 | put_page(page); | ||
| 105 | } | ||
| 106 | |||
| 107 | /* | ||
| 108 | * Add isolated pages on the list back to the LRU. | ||
| 109 | * | ||
| 110 | * returns the number of pages put back. | ||
| 111 | */ | ||
| 112 | int putback_lru_pages(struct list_head *l) | ||
| 113 | { | ||
| 114 | struct page *page; | ||
| 115 | struct page *page2; | ||
| 116 | int count = 0; | ||
| 117 | |||
| 118 | list_for_each_entry_safe(page, page2, l, lru) { | ||
| 119 | move_to_lru(page); | ||
| 120 | count++; | ||
| 121 | } | ||
| 122 | return count; | ||
| 123 | } | ||
| 124 | |||
| 125 | /* | ||
| 126 | * Non migratable page | ||
| 127 | */ | ||
| 128 | int fail_migrate_page(struct page *newpage, struct page *page) | ||
| 129 | { | ||
| 130 | return -EIO; | ||
| 131 | } | ||
| 132 | EXPORT_SYMBOL(fail_migrate_page); | ||
| 133 | |||
| 134 | /* | ||
| 135 | * swapout a single page | ||
| 136 | * page is locked upon entry, unlocked on exit | ||
| 137 | */ | ||
| 138 | static int swap_page(struct page *page) | ||
| 139 | { | ||
| 140 | struct address_space *mapping = page_mapping(page); | ||
| 141 | |||
| 142 | if (page_mapped(page) && mapping) | ||
| 143 | if (try_to_unmap(page, 1) != SWAP_SUCCESS) | ||
| 144 | goto unlock_retry; | ||
| 145 | |||
| 146 | if (PageDirty(page)) { | ||
| 147 | /* Page is dirty, try to write it out here */ | ||
| 148 | switch(pageout(page, mapping)) { | ||
| 149 | case PAGE_KEEP: | ||
| 150 | case PAGE_ACTIVATE: | ||
| 151 | goto unlock_retry; | ||
| 152 | |||
| 153 | case PAGE_SUCCESS: | ||
| 154 | goto retry; | ||
| 155 | |||
| 156 | case PAGE_CLEAN: | ||
| 157 | ; /* try to free the page below */ | ||
| 158 | } | ||
| 159 | } | ||
| 160 | |||
| 161 | if (PagePrivate(page)) { | ||
| 162 | if (!try_to_release_page(page, GFP_KERNEL) || | ||
| 163 | (!mapping && page_count(page) == 1)) | ||
| 164 | goto unlock_retry; | ||
| 165 | } | ||
| 166 | |||
| 167 | if (remove_mapping(mapping, page)) { | ||
| 168 | /* Success */ | ||
| 169 | unlock_page(page); | ||
| 170 | return 0; | ||
| 171 | } | ||
| 172 | |||
| 173 | unlock_retry: | ||
| 174 | unlock_page(page); | ||
| 175 | |||
| 176 | retry: | ||
| 177 | return -EAGAIN; | ||
| 178 | } | ||
| 179 | EXPORT_SYMBOL(swap_page); | ||
| 180 | |||
| 181 | /* | ||
| 182 | * Remove references for a page and establish the new page with the correct | ||
| 183 | * basic settings to be able to stop accesses to the page. | ||
| 184 | */ | ||
| 185 | int migrate_page_remove_references(struct page *newpage, | ||
| 186 | struct page *page, int nr_refs) | ||
| 187 | { | ||
| 188 | struct address_space *mapping = page_mapping(page); | ||
| 189 | struct page **radix_pointer; | ||
| 190 | |||
| 191 | /* | ||
| 192 | * Avoid doing any of the following work if the page count | ||
| 193 | * indicates that the page is in use or truncate has removed | ||
| 194 | * the page. | ||
| 195 | */ | ||
| 196 | if (!mapping || page_mapcount(page) + nr_refs != page_count(page)) | ||
| 197 | return -EAGAIN; | ||
| 198 | |||
| 199 | /* | ||
| 200 | * Establish swap ptes for anonymous pages or destroy pte | ||
| 201 | * maps for files. | ||
| 202 | * | ||
| 203 | * In order to reestablish file backed mappings the fault handlers | ||
| 204 | * will take the radix tree_lock which may then be used to stop | ||
| 205 | * processses from accessing this page until the new page is ready. | ||
| 206 | * | ||
| 207 | * A process accessing via a swap pte (an anonymous page) will take a | ||
| 208 | * page_lock on the old page which will block the process until the | ||
| 209 | * migration attempt is complete. At that time the PageSwapCache bit | ||
| 210 | * will be examined. If the page was migrated then the PageSwapCache | ||
| 211 | * bit will be clear and the operation to retrieve the page will be | ||
| 212 | * retried which will find the new page in the radix tree. Then a new | ||
| 213 | * direct mapping may be generated based on the radix tree contents. | ||
| 214 | * | ||
| 215 | * If the page was not migrated then the PageSwapCache bit | ||
| 216 | * is still set and the operation may continue. | ||
| 217 | */ | ||
| 218 | if (try_to_unmap(page, 1) == SWAP_FAIL) | ||
| 219 | /* A vma has VM_LOCKED set -> permanent failure */ | ||
| 220 | return -EPERM; | ||
| 221 | |||
| 222 | /* | ||
| 223 | * Give up if we were unable to remove all mappings. | ||
| 224 | */ | ||
| 225 | if (page_mapcount(page)) | ||
| 226 | return -EAGAIN; | ||
| 227 | |||
| 228 | write_lock_irq(&mapping->tree_lock); | ||
| 229 | |||
| 230 | radix_pointer = (struct page **)radix_tree_lookup_slot( | ||
| 231 | &mapping->page_tree, | ||
| 232 | page_index(page)); | ||
| 233 | |||
| 234 | if (!page_mapping(page) || page_count(page) != nr_refs || | ||
| 235 | *radix_pointer != page) { | ||
| 236 | write_unlock_irq(&mapping->tree_lock); | ||
| 237 | return 1; | ||
| 238 | } | ||
| 239 | |||
| 240 | /* | ||
| 241 | * Now we know that no one else is looking at the page. | ||
| 242 | * | ||
| 243 | * Certain minimal information about a page must be available | ||
| 244 | * in order for other subsystems to properly handle the page if they | ||
| 245 | * find it through the radix tree update before we are finished | ||
| 246 | * copying the page. | ||
| 247 | */ | ||
| 248 | get_page(newpage); | ||
| 249 | newpage->index = page->index; | ||
| 250 | newpage->mapping = page->mapping; | ||
| 251 | if (PageSwapCache(page)) { | ||
| 252 | SetPageSwapCache(newpage); | ||
| 253 | set_page_private(newpage, page_private(page)); | ||
| 254 | } | ||
| 255 | |||
| 256 | *radix_pointer = newpage; | ||
| 257 | __put_page(page); | ||
| 258 | write_unlock_irq(&mapping->tree_lock); | ||
| 259 | |||
| 260 | return 0; | ||
| 261 | } | ||
| 262 | EXPORT_SYMBOL(migrate_page_remove_references); | ||
| 263 | |||
| 264 | /* | ||
| 265 | * Copy the page to its new location | ||
| 266 | */ | ||
| 267 | void migrate_page_copy(struct page *newpage, struct page *page) | ||
| 268 | { | ||
| 269 | copy_highpage(newpage, page); | ||
| 270 | |||
| 271 | if (PageError(page)) | ||
| 272 | SetPageError(newpage); | ||
| 273 | if (PageReferenced(page)) | ||
| 274 | SetPageReferenced(newpage); | ||
| 275 | if (PageUptodate(page)) | ||
| 276 | SetPageUptodate(newpage); | ||
| 277 | if (PageActive(page)) | ||
| 278 | SetPageActive(newpage); | ||
| 279 | if (PageChecked(page)) | ||
| 280 | SetPageChecked(newpage); | ||
| 281 | if (PageMappedToDisk(page)) | ||
| 282 | SetPageMappedToDisk(newpage); | ||
| 283 | |||
| 284 | if (PageDirty(page)) { | ||
| 285 | clear_page_dirty_for_io(page); | ||
| 286 | set_page_dirty(newpage); | ||
| 287 | } | ||
| 288 | |||
| 289 | ClearPageSwapCache(page); | ||
| 290 | ClearPageActive(page); | ||
| 291 | ClearPagePrivate(page); | ||
| 292 | set_page_private(page, 0); | ||
| 293 | page->mapping = NULL; | ||
| 294 | |||
| 295 | /* | ||
| 296 | * If any waiters have accumulated on the new page then | ||
| 297 | * wake them up. | ||
| 298 | */ | ||
| 299 | if (PageWriteback(newpage)) | ||
| 300 | end_page_writeback(newpage); | ||
| 301 | } | ||
| 302 | EXPORT_SYMBOL(migrate_page_copy); | ||
| 303 | |||
| 304 | /* | ||
| 305 | * Common logic to directly migrate a single page suitable for | ||
| 306 | * pages that do not use PagePrivate. | ||
| 307 | * | ||
| 308 | * Pages are locked upon entry and exit. | ||
| 309 | */ | ||
| 310 | int migrate_page(struct page *newpage, struct page *page) | ||
| 311 | { | ||
| 312 | int rc; | ||
| 313 | |||
| 314 | BUG_ON(PageWriteback(page)); /* Writeback must be complete */ | ||
| 315 | |||
| 316 | rc = migrate_page_remove_references(newpage, page, 2); | ||
| 317 | |||
| 318 | if (rc) | ||
| 319 | return rc; | ||
| 320 | |||
| 321 | migrate_page_copy(newpage, page); | ||
| 322 | |||
| 323 | /* | ||
| 324 | * Remove auxiliary swap entries and replace | ||
| 325 | * them with real ptes. | ||
| 326 | * | ||
| 327 | * Note that a real pte entry will allow processes that are not | ||
| 328 | * waiting on the page lock to use the new page via the page tables | ||
| 329 | * before the new page is unlocked. | ||
| 330 | */ | ||
| 331 | remove_from_swap(newpage); | ||
| 332 | return 0; | ||
| 333 | } | ||
| 334 | EXPORT_SYMBOL(migrate_page); | ||
| 335 | |||
| 336 | /* | ||
| 337 | * migrate_pages | ||
| 338 | * | ||
| 339 | * Two lists are passed to this function. The first list | ||
| 340 | * contains the pages isolated from the LRU to be migrated. | ||
| 341 | * The second list contains new pages that the pages isolated | ||
| 342 | * can be moved to. If the second list is NULL then all | ||
| 343 | * pages are swapped out. | ||
| 344 | * | ||
| 345 | * The function returns after 10 attempts or if no pages | ||
| 346 | * are movable anymore because to has become empty | ||
| 347 | * or no retryable pages exist anymore. | ||
| 348 | * | ||
| 349 | * Return: Number of pages not migrated when "to" ran empty. | ||
| 350 | */ | ||
| 351 | int migrate_pages(struct list_head *from, struct list_head *to, | ||
| 352 | struct list_head *moved, struct list_head *failed) | ||
| 353 | { | ||
| 354 | int retry; | ||
| 355 | int nr_failed = 0; | ||
| 356 | int pass = 0; | ||
| 357 | struct page *page; | ||
| 358 | struct page *page2; | ||
| 359 | int swapwrite = current->flags & PF_SWAPWRITE; | ||
| 360 | int rc; | ||
| 361 | |||
| 362 | if (!swapwrite) | ||
| 363 | current->flags |= PF_SWAPWRITE; | ||
| 364 | |||
| 365 | redo: | ||
| 366 | retry = 0; | ||
| 367 | |||
| 368 | list_for_each_entry_safe(page, page2, from, lru) { | ||
| 369 | struct page *newpage = NULL; | ||
| 370 | struct address_space *mapping; | ||
| 371 | |||
| 372 | cond_resched(); | ||
| 373 | |||
| 374 | rc = 0; | ||
| 375 | if (page_count(page) == 1) | ||
| 376 | /* page was freed from under us. So we are done. */ | ||
| 377 | goto next; | ||
| 378 | |||
| 379 | if (to && list_empty(to)) | ||
| 380 | break; | ||
| 381 | |||
| 382 | /* | ||
| 383 | * Skip locked pages during the first two passes to give the | ||
| 384 | * functions holding the lock time to release the page. Later we | ||
| 385 | * use lock_page() to have a higher chance of acquiring the | ||
| 386 | * lock. | ||
| 387 | */ | ||
| 388 | rc = -EAGAIN; | ||
| 389 | if (pass > 2) | ||
| 390 | lock_page(page); | ||
| 391 | else | ||
| 392 | if (TestSetPageLocked(page)) | ||
| 393 | goto next; | ||
| 394 | |||
| 395 | /* | ||
| 396 | * Only wait on writeback if we have already done a pass where | ||
| 397 | * we we may have triggered writeouts for lots of pages. | ||
| 398 | */ | ||
| 399 | if (pass > 0) { | ||
| 400 | wait_on_page_writeback(page); | ||
| 401 | } else { | ||
| 402 | if (PageWriteback(page)) | ||
| 403 | goto unlock_page; | ||
| 404 | } | ||
| 405 | |||
| 406 | /* | ||
| 407 | * Anonymous pages must have swap cache references otherwise | ||
| 408 | * the information contained in the page maps cannot be | ||
| 409 | * preserved. | ||
| 410 | */ | ||
| 411 | if (PageAnon(page) && !PageSwapCache(page)) { | ||
| 412 | if (!add_to_swap(page, GFP_KERNEL)) { | ||
| 413 | rc = -ENOMEM; | ||
| 414 | goto unlock_page; | ||
| 415 | } | ||
| 416 | } | ||
| 417 | |||
| 418 | if (!to) { | ||
| 419 | rc = swap_page(page); | ||
| 420 | goto next; | ||
| 421 | } | ||
| 422 | |||
| 423 | newpage = lru_to_page(to); | ||
| 424 | lock_page(newpage); | ||
| 425 | |||
| 426 | /* | ||
| 427 | * Pages are properly locked and writeback is complete. | ||
| 428 | * Try to migrate the page. | ||
| 429 | */ | ||
| 430 | mapping = page_mapping(page); | ||
| 431 | if (!mapping) | ||
| 432 | goto unlock_both; | ||
| 433 | |||
| 434 | if (mapping->a_ops->migratepage) { | ||
| 435 | /* | ||
| 436 | * Most pages have a mapping and most filesystems | ||
| 437 | * should provide a migration function. Anonymous | ||
| 438 | * pages are part of swap space which also has its | ||
| 439 | * own migration function. This is the most common | ||
| 440 | * path for page migration. | ||
| 441 | */ | ||
| 442 | rc = mapping->a_ops->migratepage(newpage, page); | ||
| 443 | goto unlock_both; | ||
| 444 | } | ||
| 445 | |||
| 446 | /* | ||
| 447 | * Default handling if a filesystem does not provide | ||
| 448 | * a migration function. We can only migrate clean | ||
| 449 | * pages so try to write out any dirty pages first. | ||
| 450 | */ | ||
| 451 | if (PageDirty(page)) { | ||
| 452 | switch (pageout(page, mapping)) { | ||
| 453 | case PAGE_KEEP: | ||
| 454 | case PAGE_ACTIVATE: | ||
| 455 | goto unlock_both; | ||
| 456 | |||
| 457 | case PAGE_SUCCESS: | ||
| 458 | unlock_page(newpage); | ||
| 459 | goto next; | ||
| 460 | |||
| 461 | case PAGE_CLEAN: | ||
| 462 | ; /* try to migrate the page below */ | ||
| 463 | } | ||
| 464 | } | ||
| 465 | |||
| 466 | /* | ||
| 467 | * Buffers are managed in a filesystem specific way. | ||
| 468 | * We must have no buffers or drop them. | ||
| 469 | */ | ||
| 470 | if (!page_has_buffers(page) || | ||
| 471 | try_to_release_page(page, GFP_KERNEL)) { | ||
| 472 | rc = migrate_page(newpage, page); | ||
| 473 | goto unlock_both; | ||
| 474 | } | ||
| 475 | |||
| 476 | /* | ||
| 477 | * On early passes with mapped pages simply | ||
| 478 | * retry. There may be a lock held for some | ||
| 479 | * buffers that may go away. Later | ||
| 480 | * swap them out. | ||
| 481 | */ | ||
| 482 | if (pass > 4) { | ||
| 483 | /* | ||
| 484 | * Persistently unable to drop buffers..... As a | ||
| 485 | * measure of last resort we fall back to | ||
| 486 | * swap_page(). | ||
| 487 | */ | ||
| 488 | unlock_page(newpage); | ||
| 489 | newpage = NULL; | ||
| 490 | rc = swap_page(page); | ||
| 491 | goto next; | ||
| 492 | } | ||
| 493 | |||
| 494 | unlock_both: | ||
| 495 | unlock_page(newpage); | ||
| 496 | |||
| 497 | unlock_page: | ||
| 498 | unlock_page(page); | ||
| 499 | |||
| 500 | next: | ||
| 501 | if (rc == -EAGAIN) { | ||
| 502 | retry++; | ||
| 503 | } else if (rc) { | ||
| 504 | /* Permanent failure */ | ||
| 505 | list_move(&page->lru, failed); | ||
| 506 | nr_failed++; | ||
| 507 | } else { | ||
| 508 | if (newpage) { | ||
| 509 | /* Successful migration. Return page to LRU */ | ||
| 510 | move_to_lru(newpage); | ||
| 511 | } | ||
| 512 | list_move(&page->lru, moved); | ||
| 513 | } | ||
| 514 | } | ||
| 515 | if (retry && pass++ < 10) | ||
| 516 | goto redo; | ||
| 517 | |||
| 518 | if (!swapwrite) | ||
| 519 | current->flags &= ~PF_SWAPWRITE; | ||
| 520 | |||
| 521 | return nr_failed + retry; | ||
| 522 | } | ||
| 523 | |||
| 524 | /* | ||
| 525 | * Migration function for pages with buffers. This function can only be used | ||
| 526 | * if the underlying filesystem guarantees that no other references to "page" | ||
| 527 | * exist. | ||
| 528 | */ | ||
| 529 | int buffer_migrate_page(struct page *newpage, struct page *page) | ||
| 530 | { | ||
| 531 | struct address_space *mapping = page->mapping; | ||
| 532 | struct buffer_head *bh, *head; | ||
| 533 | int rc; | ||
| 534 | |||
| 535 | if (!mapping) | ||
| 536 | return -EAGAIN; | ||
| 537 | |||
| 538 | if (!page_has_buffers(page)) | ||
| 539 | return migrate_page(newpage, page); | ||
| 540 | |||
| 541 | head = page_buffers(page); | ||
| 542 | |||
| 543 | rc = migrate_page_remove_references(newpage, page, 3); | ||
| 544 | |||
| 545 | if (rc) | ||
| 546 | return rc; | ||
| 547 | |||
| 548 | bh = head; | ||
| 549 | do { | ||
| 550 | get_bh(bh); | ||
| 551 | lock_buffer(bh); | ||
| 552 | bh = bh->b_this_page; | ||
| 553 | |||
| 554 | } while (bh != head); | ||
| 555 | |||
| 556 | ClearPagePrivate(page); | ||
| 557 | set_page_private(newpage, page_private(page)); | ||
| 558 | set_page_private(page, 0); | ||
| 559 | put_page(page); | ||
| 560 | get_page(newpage); | ||
| 561 | |||
| 562 | bh = head; | ||
| 563 | do { | ||
| 564 | set_bh_page(bh, newpage, bh_offset(bh)); | ||
| 565 | bh = bh->b_this_page; | ||
| 566 | |||
| 567 | } while (bh != head); | ||
| 568 | |||
| 569 | SetPagePrivate(newpage); | ||
| 570 | |||
| 571 | migrate_page_copy(newpage, page); | ||
| 572 | |||
| 573 | bh = head; | ||
| 574 | do { | ||
| 575 | unlock_buffer(bh); | ||
| 576 | put_bh(bh); | ||
| 577 | bh = bh->b_this_page; | ||
| 578 | |||
| 579 | } while (bh != head); | ||
| 580 | |||
| 581 | return 0; | ||
| 582 | } | ||
| 583 | EXPORT_SYMBOL(buffer_migrate_page); | ||
| 584 | |||
| 585 | /* | ||
| 586 | * Migrate the list 'pagelist' of pages to a certain destination. | ||
| 587 | * | ||
| 588 | * Specify destination with either non-NULL vma or dest_node >= 0 | ||
| 589 | * Return the number of pages not migrated or error code | ||
| 590 | */ | ||
| 591 | int migrate_pages_to(struct list_head *pagelist, | ||
| 592 | struct vm_area_struct *vma, int dest) | ||
| 593 | { | ||
| 594 | LIST_HEAD(newlist); | ||
| 595 | LIST_HEAD(moved); | ||
| 596 | LIST_HEAD(failed); | ||
| 597 | int err = 0; | ||
| 598 | unsigned long offset = 0; | ||
| 599 | int nr_pages; | ||
| 600 | struct page *page; | ||
| 601 | struct list_head *p; | ||
| 602 | |||
| 603 | redo: | ||
| 604 | nr_pages = 0; | ||
| 605 | list_for_each(p, pagelist) { | ||
| 606 | if (vma) { | ||
| 607 | /* | ||
| 608 | * The address passed to alloc_page_vma is used to | ||
| 609 | * generate the proper interleave behavior. We fake | ||
| 610 | * the address here by an increasing offset in order | ||
| 611 | * to get the proper distribution of pages. | ||
| 612 | * | ||
| 613 | * No decision has been made as to which page | ||
| 614 | * a certain old page is moved to so we cannot | ||
| 615 | * specify the correct address. | ||
| 616 | */ | ||
| 617 | page = alloc_page_vma(GFP_HIGHUSER, vma, | ||
| 618 | offset + vma->vm_start); | ||
| 619 | offset += PAGE_SIZE; | ||
| 620 | } | ||
| 621 | else | ||
| 622 | page = alloc_pages_node(dest, GFP_HIGHUSER, 0); | ||
| 623 | |||
| 624 | if (!page) { | ||
| 625 | err = -ENOMEM; | ||
| 626 | goto out; | ||
| 627 | } | ||
| 628 | list_add_tail(&page->lru, &newlist); | ||
| 629 | nr_pages++; | ||
| 630 | if (nr_pages > MIGRATE_CHUNK_SIZE) | ||
| 631 | break; | ||
| 632 | } | ||
| 633 | err = migrate_pages(pagelist, &newlist, &moved, &failed); | ||
| 634 | |||
| 635 | putback_lru_pages(&moved); /* Call release pages instead ?? */ | ||
| 636 | |||
| 637 | if (err >= 0 && list_empty(&newlist) && !list_empty(pagelist)) | ||
| 638 | goto redo; | ||
| 639 | out: | ||
| 640 | /* Return leftover allocated pages */ | ||
| 641 | while (!list_empty(&newlist)) { | ||
| 642 | page = list_entry(newlist.next, struct page, lru); | ||
| 643 | list_del(&page->lru); | ||
| 644 | __free_page(page); | ||
| 645 | } | ||
| 646 | list_splice(&failed, pagelist); | ||
| 647 | if (err < 0) | ||
| 648 | return err; | ||
| 649 | |||
| 650 | /* Calculate number of leftover pages */ | ||
| 651 | nr_pages = 0; | ||
| 652 | list_for_each(p, pagelist) | ||
| 653 | nr_pages++; | ||
| 654 | return nr_pages; | ||
| 655 | } | ||
| @@ -612,7 +612,7 @@ again: remove_next = 1 + (end > next->vm_end); | |||
| 612 | * If the vma has a ->close operation then the driver probably needs to release | 612 | * If the vma has a ->close operation then the driver probably needs to release |
| 613 | * per-vma resources, so we don't attempt to merge those. | 613 | * per-vma resources, so we don't attempt to merge those. |
| 614 | */ | 614 | */ |
| 615 | #define VM_SPECIAL (VM_IO | VM_DONTCOPY | VM_DONTEXPAND | VM_RESERVED | VM_PFNMAP) | 615 | #define VM_SPECIAL (VM_IO | VM_DONTEXPAND | VM_RESERVED | VM_PFNMAP) |
| 616 | 616 | ||
| 617 | static inline int is_mergeable_vma(struct vm_area_struct *vma, | 617 | static inline int is_mergeable_vma(struct vm_area_struct *vma, |
| 618 | struct file *file, unsigned long vm_flags) | 618 | struct file *file, unsigned long vm_flags) |
| @@ -845,14 +845,6 @@ void vm_stat_account(struct mm_struct *mm, unsigned long flags, | |||
| 845 | const unsigned long stack_flags | 845 | const unsigned long stack_flags |
| 846 | = VM_STACK_FLAGS & (VM_GROWSUP|VM_GROWSDOWN); | 846 | = VM_STACK_FLAGS & (VM_GROWSUP|VM_GROWSDOWN); |
| 847 | 847 | ||
| 848 | #ifdef CONFIG_HUGETLB | ||
| 849 | if (flags & VM_HUGETLB) { | ||
| 850 | if (!(flags & VM_DONTCOPY)) | ||
| 851 | mm->shared_vm += pages; | ||
| 852 | return; | ||
| 853 | } | ||
| 854 | #endif /* CONFIG_HUGETLB */ | ||
| 855 | |||
| 856 | if (file) { | 848 | if (file) { |
| 857 | mm->shared_vm += pages; | 849 | mm->shared_vm += pages; |
| 858 | if ((flags & (VM_EXEC|VM_WRITE)) == VM_EXEC) | 850 | if ((flags & (VM_EXEC|VM_WRITE)) == VM_EXEC) |
diff --git a/mm/mprotect.c b/mm/mprotect.c index 653b8571c1ed..4c14d4289b61 100644 --- a/mm/mprotect.c +++ b/mm/mprotect.c | |||
| @@ -124,7 +124,7 @@ mprotect_fixup(struct vm_area_struct *vma, struct vm_area_struct **pprev, | |||
| 124 | * a MAP_NORESERVE private mapping to writable will now reserve. | 124 | * a MAP_NORESERVE private mapping to writable will now reserve. |
| 125 | */ | 125 | */ |
| 126 | if (newflags & VM_WRITE) { | 126 | if (newflags & VM_WRITE) { |
| 127 | if (!(oldflags & (VM_ACCOUNT|VM_WRITE|VM_SHARED|VM_HUGETLB))) { | 127 | if (!(oldflags & (VM_ACCOUNT|VM_WRITE|VM_SHARED))) { |
| 128 | charged = nrpages; | 128 | charged = nrpages; |
| 129 | if (security_vm_enough_memory(charged)) | 129 | if (security_vm_enough_memory(charged)) |
| 130 | return -ENOMEM; | 130 | return -ENOMEM; |
| @@ -166,7 +166,10 @@ success: | |||
| 166 | */ | 166 | */ |
| 167 | vma->vm_flags = newflags; | 167 | vma->vm_flags = newflags; |
| 168 | vma->vm_page_prot = newprot; | 168 | vma->vm_page_prot = newprot; |
| 169 | change_protection(vma, start, end, newprot); | 169 | if (is_vm_hugetlb_page(vma)) |
| 170 | hugetlb_change_protection(vma, start, end, newprot); | ||
| 171 | else | ||
| 172 | change_protection(vma, start, end, newprot); | ||
| 170 | vm_stat_account(mm, oldflags, vma->vm_file, -nrpages); | 173 | vm_stat_account(mm, oldflags, vma->vm_file, -nrpages); |
| 171 | vm_stat_account(mm, newflags, vma->vm_file, nrpages); | 174 | vm_stat_account(mm, newflags, vma->vm_file, nrpages); |
| 172 | return 0; | 175 | return 0; |
| @@ -240,11 +243,6 @@ sys_mprotect(unsigned long start, size_t len, unsigned long prot) | |||
| 240 | 243 | ||
| 241 | /* Here we know that vma->vm_start <= nstart < vma->vm_end. */ | 244 | /* Here we know that vma->vm_start <= nstart < vma->vm_end. */ |
| 242 | 245 | ||
| 243 | if (is_vm_hugetlb_page(vma)) { | ||
| 244 | error = -EACCES; | ||
| 245 | goto out; | ||
| 246 | } | ||
| 247 | |||
| 248 | newflags = vm_flags | (vma->vm_flags & ~(VM_READ | VM_WRITE | VM_EXEC)); | 246 | newflags = vm_flags | (vma->vm_flags & ~(VM_READ | VM_WRITE | VM_EXEC)); |
| 249 | 247 | ||
| 250 | /* newflags >> 4 shift VM_MAY% in place of VM_% */ | 248 | /* newflags >> 4 shift VM_MAY% in place of VM_% */ |
diff --git a/mm/nommu.c b/mm/nommu.c index 4951f4786f28..db45efac17cc 100644 --- a/mm/nommu.c +++ b/mm/nommu.c | |||
| @@ -159,7 +159,7 @@ void *__vmalloc(unsigned long size, gfp_t gfp_mask, pgprot_t prot) | |||
| 159 | /* | 159 | /* |
| 160 | * kmalloc doesn't like __GFP_HIGHMEM for some reason | 160 | * kmalloc doesn't like __GFP_HIGHMEM for some reason |
| 161 | */ | 161 | */ |
| 162 | return kmalloc(size, gfp_mask & ~__GFP_HIGHMEM); | 162 | return kmalloc(size, (gfp_mask | __GFP_COMP) & ~__GFP_HIGHMEM); |
| 163 | } | 163 | } |
| 164 | 164 | ||
| 165 | struct page * vmalloc_to_page(void *addr) | 165 | struct page * vmalloc_to_page(void *addr) |
| @@ -623,7 +623,7 @@ static int do_mmap_private(struct vm_area_struct *vma, unsigned long len) | |||
| 623 | * - note that this may not return a page-aligned address if the object | 623 | * - note that this may not return a page-aligned address if the object |
| 624 | * we're allocating is smaller than a page | 624 | * we're allocating is smaller than a page |
| 625 | */ | 625 | */ |
| 626 | base = kmalloc(len, GFP_KERNEL); | 626 | base = kmalloc(len, GFP_KERNEL|__GFP_COMP); |
| 627 | if (!base) | 627 | if (!base) |
| 628 | goto enomem; | 628 | goto enomem; |
| 629 | 629 | ||
diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 234bd4895d14..b7f14a4799a5 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c | |||
| @@ -55,7 +55,6 @@ unsigned long totalhigh_pages __read_mostly; | |||
| 55 | long nr_swap_pages; | 55 | long nr_swap_pages; |
| 56 | int percpu_pagelist_fraction; | 56 | int percpu_pagelist_fraction; |
| 57 | 57 | ||
| 58 | static void fastcall free_hot_cold_page(struct page *page, int cold); | ||
| 59 | static void __free_pages_ok(struct page *page, unsigned int order); | 58 | static void __free_pages_ok(struct page *page, unsigned int order); |
| 60 | 59 | ||
| 61 | /* | 60 | /* |
| @@ -190,7 +189,7 @@ static void prep_compound_page(struct page *page, unsigned long order) | |||
| 190 | for (i = 0; i < nr_pages; i++) { | 189 | for (i = 0; i < nr_pages; i++) { |
| 191 | struct page *p = page + i; | 190 | struct page *p = page + i; |
| 192 | 191 | ||
| 193 | SetPageCompound(p); | 192 | __SetPageCompound(p); |
| 194 | set_page_private(p, (unsigned long)page); | 193 | set_page_private(p, (unsigned long)page); |
| 195 | } | 194 | } |
| 196 | } | 195 | } |
| @@ -209,10 +208,24 @@ static void destroy_compound_page(struct page *page, unsigned long order) | |||
| 209 | if (unlikely(!PageCompound(p) | | 208 | if (unlikely(!PageCompound(p) | |
| 210 | (page_private(p) != (unsigned long)page))) | 209 | (page_private(p) != (unsigned long)page))) |
| 211 | bad_page(page); | 210 | bad_page(page); |
| 212 | ClearPageCompound(p); | 211 | __ClearPageCompound(p); |
| 213 | } | 212 | } |
| 214 | } | 213 | } |
| 215 | 214 | ||
| 215 | static inline void prep_zero_page(struct page *page, int order, gfp_t gfp_flags) | ||
| 216 | { | ||
| 217 | int i; | ||
| 218 | |||
| 219 | BUG_ON((gfp_flags & (__GFP_WAIT | __GFP_HIGHMEM)) == __GFP_HIGHMEM); | ||
| 220 | /* | ||
| 221 | * clear_highpage() will use KM_USER0, so it's a bug to use __GFP_ZERO | ||
| 222 | * and __GFP_HIGHMEM from hard or soft interrupt context. | ||
| 223 | */ | ||
| 224 | BUG_ON((gfp_flags & __GFP_HIGHMEM) && in_interrupt()); | ||
| 225 | for (i = 0; i < (1 << order); i++) | ||
| 226 | clear_highpage(page + i); | ||
| 227 | } | ||
| 228 | |||
| 216 | /* | 229 | /* |
| 217 | * function for dealing with page's order in buddy system. | 230 | * function for dealing with page's order in buddy system. |
| 218 | * zone->lock is already acquired when we use these. | 231 | * zone->lock is already acquired when we use these. |
| @@ -423,11 +436,6 @@ static void __free_pages_ok(struct page *page, unsigned int order) | |||
| 423 | mutex_debug_check_no_locks_freed(page_address(page), | 436 | mutex_debug_check_no_locks_freed(page_address(page), |
| 424 | PAGE_SIZE<<order); | 437 | PAGE_SIZE<<order); |
| 425 | 438 | ||
| 426 | #ifndef CONFIG_MMU | ||
| 427 | for (i = 1 ; i < (1 << order) ; ++i) | ||
| 428 | __put_page(page + i); | ||
| 429 | #endif | ||
| 430 | |||
| 431 | for (i = 0 ; i < (1 << order) ; ++i) | 439 | for (i = 0 ; i < (1 << order) ; ++i) |
| 432 | reserved += free_pages_check(page + i); | 440 | reserved += free_pages_check(page + i); |
| 433 | if (reserved) | 441 | if (reserved) |
| @@ -448,28 +456,23 @@ void fastcall __init __free_pages_bootmem(struct page *page, unsigned int order) | |||
| 448 | if (order == 0) { | 456 | if (order == 0) { |
| 449 | __ClearPageReserved(page); | 457 | __ClearPageReserved(page); |
| 450 | set_page_count(page, 0); | 458 | set_page_count(page, 0); |
| 451 | 459 | set_page_refcounted(page); | |
| 452 | free_hot_cold_page(page, 0); | 460 | __free_page(page); |
| 453 | } else { | 461 | } else { |
| 454 | LIST_HEAD(list); | ||
| 455 | int loop; | 462 | int loop; |
| 456 | 463 | ||
| 464 | prefetchw(page); | ||
| 457 | for (loop = 0; loop < BITS_PER_LONG; loop++) { | 465 | for (loop = 0; loop < BITS_PER_LONG; loop++) { |
| 458 | struct page *p = &page[loop]; | 466 | struct page *p = &page[loop]; |
| 459 | 467 | ||
| 460 | if (loop + 16 < BITS_PER_LONG) | 468 | if (loop + 1 < BITS_PER_LONG) |
| 461 | prefetchw(p + 16); | 469 | prefetchw(p + 1); |
| 462 | __ClearPageReserved(p); | 470 | __ClearPageReserved(p); |
| 463 | set_page_count(p, 0); | 471 | set_page_count(p, 0); |
| 464 | } | 472 | } |
| 465 | 473 | ||
| 466 | arch_free_page(page, order); | 474 | set_page_refcounted(page); |
| 467 | 475 | __free_pages(page, order); | |
| 468 | mod_page_state(pgfree, 1 << order); | ||
| 469 | |||
| 470 | list_add(&page->lru, &list); | ||
| 471 | kernel_map_pages(page, 1 << order, 0); | ||
| 472 | free_pages_bulk(page_zone(page), 1, &list, order); | ||
| 473 | } | 476 | } |
| 474 | } | 477 | } |
| 475 | 478 | ||
| @@ -507,7 +510,7 @@ static inline void expand(struct zone *zone, struct page *page, | |||
| 507 | /* | 510 | /* |
| 508 | * This page is about to be returned from the page allocator | 511 | * This page is about to be returned from the page allocator |
| 509 | */ | 512 | */ |
| 510 | static int prep_new_page(struct page *page, int order) | 513 | static int prep_new_page(struct page *page, int order, gfp_t gfp_flags) |
| 511 | { | 514 | { |
| 512 | if (unlikely(page_mapcount(page) | | 515 | if (unlikely(page_mapcount(page) | |
| 513 | (page->mapping != NULL) | | 516 | (page->mapping != NULL) | |
| @@ -536,8 +539,15 @@ static int prep_new_page(struct page *page, int order) | |||
| 536 | 1 << PG_referenced | 1 << PG_arch_1 | | 539 | 1 << PG_referenced | 1 << PG_arch_1 | |
| 537 | 1 << PG_checked | 1 << PG_mappedtodisk); | 540 | 1 << PG_checked | 1 << PG_mappedtodisk); |
| 538 | set_page_private(page, 0); | 541 | set_page_private(page, 0); |
| 539 | set_page_refs(page, order); | 542 | set_page_refcounted(page); |
| 540 | kernel_map_pages(page, 1 << order, 1); | 543 | kernel_map_pages(page, 1 << order, 1); |
| 544 | |||
| 545 | if (gfp_flags & __GFP_ZERO) | ||
| 546 | prep_zero_page(page, order, gfp_flags); | ||
| 547 | |||
| 548 | if (order && (gfp_flags & __GFP_COMP)) | ||
| 549 | prep_compound_page(page, order); | ||
| 550 | |||
| 541 | return 0; | 551 | return 0; |
| 542 | } | 552 | } |
| 543 | 553 | ||
| @@ -593,13 +603,14 @@ static int rmqueue_bulk(struct zone *zone, unsigned int order, | |||
| 593 | /* | 603 | /* |
| 594 | * Called from the slab reaper to drain pagesets on a particular node that | 604 | * Called from the slab reaper to drain pagesets on a particular node that |
| 595 | * belong to the currently executing processor. | 605 | * belong to the currently executing processor. |
| 606 | * Note that this function must be called with the thread pinned to | ||
| 607 | * a single processor. | ||
| 596 | */ | 608 | */ |
| 597 | void drain_node_pages(int nodeid) | 609 | void drain_node_pages(int nodeid) |
| 598 | { | 610 | { |
| 599 | int i, z; | 611 | int i, z; |
| 600 | unsigned long flags; | 612 | unsigned long flags; |
| 601 | 613 | ||
| 602 | local_irq_save(flags); | ||
| 603 | for (z = 0; z < MAX_NR_ZONES; z++) { | 614 | for (z = 0; z < MAX_NR_ZONES; z++) { |
| 604 | struct zone *zone = NODE_DATA(nodeid)->node_zones + z; | 615 | struct zone *zone = NODE_DATA(nodeid)->node_zones + z; |
| 605 | struct per_cpu_pageset *pset; | 616 | struct per_cpu_pageset *pset; |
| @@ -609,11 +620,14 @@ void drain_node_pages(int nodeid) | |||
| 609 | struct per_cpu_pages *pcp; | 620 | struct per_cpu_pages *pcp; |
| 610 | 621 | ||
| 611 | pcp = &pset->pcp[i]; | 622 | pcp = &pset->pcp[i]; |
| 612 | free_pages_bulk(zone, pcp->count, &pcp->list, 0); | 623 | if (pcp->count) { |
| 613 | pcp->count = 0; | 624 | local_irq_save(flags); |
| 625 | free_pages_bulk(zone, pcp->count, &pcp->list, 0); | ||
| 626 | pcp->count = 0; | ||
| 627 | local_irq_restore(flags); | ||
| 628 | } | ||
| 614 | } | 629 | } |
| 615 | } | 630 | } |
| 616 | local_irq_restore(flags); | ||
| 617 | } | 631 | } |
| 618 | #endif | 632 | #endif |
| 619 | 633 | ||
| @@ -743,13 +757,22 @@ void fastcall free_cold_page(struct page *page) | |||
| 743 | free_hot_cold_page(page, 1); | 757 | free_hot_cold_page(page, 1); |
| 744 | } | 758 | } |
| 745 | 759 | ||
| 746 | static inline void prep_zero_page(struct page *page, int order, gfp_t gfp_flags) | 760 | /* |
| 761 | * split_page takes a non-compound higher-order page, and splits it into | ||
| 762 | * n (1<<order) sub-pages: page[0..n] | ||
| 763 | * Each sub-page must be freed individually. | ||
| 764 | * | ||
| 765 | * Note: this is probably too low level an operation for use in drivers. | ||
| 766 | * Please consult with lkml before using this in your driver. | ||
| 767 | */ | ||
| 768 | void split_page(struct page *page, unsigned int order) | ||
| 747 | { | 769 | { |
| 748 | int i; | 770 | int i; |
| 749 | 771 | ||
| 750 | BUG_ON((gfp_flags & (__GFP_WAIT | __GFP_HIGHMEM)) == __GFP_HIGHMEM); | 772 | BUG_ON(PageCompound(page)); |
| 751 | for(i = 0; i < (1 << order); i++) | 773 | BUG_ON(!page_count(page)); |
| 752 | clear_highpage(page + i); | 774 | for (i = 1; i < (1 << order); i++) |
| 775 | set_page_refcounted(page + i); | ||
| 753 | } | 776 | } |
| 754 | 777 | ||
| 755 | /* | 778 | /* |
| @@ -795,14 +818,8 @@ again: | |||
| 795 | put_cpu(); | 818 | put_cpu(); |
| 796 | 819 | ||
| 797 | BUG_ON(bad_range(zone, page)); | 820 | BUG_ON(bad_range(zone, page)); |
| 798 | if (prep_new_page(page, order)) | 821 | if (prep_new_page(page, order, gfp_flags)) |
| 799 | goto again; | 822 | goto again; |
| 800 | |||
| 801 | if (gfp_flags & __GFP_ZERO) | ||
| 802 | prep_zero_page(page, order, gfp_flags); | ||
| 803 | |||
| 804 | if (order && (gfp_flags & __GFP_COMP)) | ||
| 805 | prep_compound_page(page, order); | ||
| 806 | return page; | 823 | return page; |
| 807 | 824 | ||
| 808 | failed: | 825 | failed: |
| @@ -1214,24 +1231,22 @@ DEFINE_PER_CPU(long, nr_pagecache_local) = 0; | |||
| 1214 | 1231 | ||
| 1215 | static void __get_page_state(struct page_state *ret, int nr, cpumask_t *cpumask) | 1232 | static void __get_page_state(struct page_state *ret, int nr, cpumask_t *cpumask) |
| 1216 | { | 1233 | { |
| 1217 | int cpu = 0; | 1234 | unsigned cpu; |
| 1218 | 1235 | ||
| 1219 | memset(ret, 0, nr * sizeof(unsigned long)); | 1236 | memset(ret, 0, nr * sizeof(unsigned long)); |
| 1220 | cpus_and(*cpumask, *cpumask, cpu_online_map); | 1237 | cpus_and(*cpumask, *cpumask, cpu_online_map); |
| 1221 | 1238 | ||
| 1222 | cpu = first_cpu(*cpumask); | 1239 | for_each_cpu_mask(cpu, *cpumask) { |
| 1223 | while (cpu < NR_CPUS) { | 1240 | unsigned long *in; |
| 1224 | unsigned long *in, *out, off; | 1241 | unsigned long *out; |
| 1225 | 1242 | unsigned off; | |
| 1226 | if (!cpu_isset(cpu, *cpumask)) | 1243 | unsigned next_cpu; |
| 1227 | continue; | ||
| 1228 | 1244 | ||
| 1229 | in = (unsigned long *)&per_cpu(page_states, cpu); | 1245 | in = (unsigned long *)&per_cpu(page_states, cpu); |
| 1230 | 1246 | ||
| 1231 | cpu = next_cpu(cpu, *cpumask); | 1247 | next_cpu = next_cpu(cpu, *cpumask); |
| 1232 | 1248 | if (likely(next_cpu < NR_CPUS)) | |
| 1233 | if (likely(cpu < NR_CPUS)) | 1249 | prefetch(&per_cpu(page_states, next_cpu)); |
| 1234 | prefetch(&per_cpu(page_states, cpu)); | ||
| 1235 | 1250 | ||
| 1236 | out = (unsigned long *)ret; | 1251 | out = (unsigned long *)ret; |
| 1237 | for (off = 0; off < nr; off++) | 1252 | for (off = 0; off < nr; off++) |
| @@ -1764,7 +1779,7 @@ void __meminit memmap_init_zone(unsigned long size, int nid, unsigned long zone, | |||
| 1764 | continue; | 1779 | continue; |
| 1765 | page = pfn_to_page(pfn); | 1780 | page = pfn_to_page(pfn); |
| 1766 | set_page_links(page, zone, nid, pfn); | 1781 | set_page_links(page, zone, nid, pfn); |
| 1767 | set_page_count(page, 1); | 1782 | init_page_count(page); |
| 1768 | reset_page_mapcount(page); | 1783 | reset_page_mapcount(page); |
| 1769 | SetPageReserved(page); | 1784 | SetPageReserved(page); |
| 1770 | INIT_LIST_HEAD(&page->lru); | 1785 | INIT_LIST_HEAD(&page->lru); |
diff --git a/mm/readahead.c b/mm/readahead.c index 8d6eeaaa6296..301b36c4a0ce 100644 --- a/mm/readahead.c +++ b/mm/readahead.c | |||
| @@ -52,13 +52,24 @@ static inline unsigned long get_min_readahead(struct file_ra_state *ra) | |||
| 52 | return (VM_MIN_READAHEAD * 1024) / PAGE_CACHE_SIZE; | 52 | return (VM_MIN_READAHEAD * 1024) / PAGE_CACHE_SIZE; |
| 53 | } | 53 | } |
| 54 | 54 | ||
| 55 | static inline void reset_ahead_window(struct file_ra_state *ra) | ||
| 56 | { | ||
| 57 | /* | ||
| 58 | * ... but preserve ahead_start + ahead_size value, | ||
| 59 | * see 'recheck:' label in page_cache_readahead(). | ||
| 60 | * Note: We never use ->ahead_size as rvalue without | ||
| 61 | * checking ->ahead_start != 0 first. | ||
| 62 | */ | ||
| 63 | ra->ahead_size += ra->ahead_start; | ||
| 64 | ra->ahead_start = 0; | ||
| 65 | } | ||
| 66 | |||
| 55 | static inline void ra_off(struct file_ra_state *ra) | 67 | static inline void ra_off(struct file_ra_state *ra) |
| 56 | { | 68 | { |
| 57 | ra->start = 0; | 69 | ra->start = 0; |
| 58 | ra->flags = 0; | 70 | ra->flags = 0; |
| 59 | ra->size = 0; | 71 | ra->size = 0; |
| 60 | ra->ahead_start = 0; | 72 | reset_ahead_window(ra); |
| 61 | ra->ahead_size = 0; | ||
| 62 | return; | 73 | return; |
| 63 | } | 74 | } |
| 64 | 75 | ||
| @@ -72,10 +83,10 @@ static unsigned long get_init_ra_size(unsigned long size, unsigned long max) | |||
| 72 | { | 83 | { |
| 73 | unsigned long newsize = roundup_pow_of_two(size); | 84 | unsigned long newsize = roundup_pow_of_two(size); |
| 74 | 85 | ||
| 75 | if (newsize <= max / 64) | 86 | if (newsize <= max / 32) |
| 76 | newsize = newsize * newsize; | 87 | newsize = newsize * 4; |
| 77 | else if (newsize <= max / 4) | 88 | else if (newsize <= max / 4) |
| 78 | newsize = max / 4; | 89 | newsize = newsize * 2; |
| 79 | else | 90 | else |
| 80 | newsize = max; | 91 | newsize = max; |
| 81 | return newsize; | 92 | return newsize; |
| @@ -426,8 +437,7 @@ static int make_ahead_window(struct address_space *mapping, struct file *filp, | |||
| 426 | * congestion. The ahead window will any way be closed | 437 | * congestion. The ahead window will any way be closed |
| 427 | * in case we failed due to excessive page cache hits. | 438 | * in case we failed due to excessive page cache hits. |
| 428 | */ | 439 | */ |
| 429 | ra->ahead_start = 0; | 440 | reset_ahead_window(ra); |
| 430 | ra->ahead_size = 0; | ||
| 431 | } | 441 | } |
| 432 | 442 | ||
| 433 | return ret; | 443 | return ret; |
| @@ -520,11 +530,11 @@ page_cache_readahead(struct address_space *mapping, struct file_ra_state *ra, | |||
| 520 | * If we get here we are doing sequential IO and this was not the first | 530 | * If we get here we are doing sequential IO and this was not the first |
| 521 | * occurence (ie we have an existing window) | 531 | * occurence (ie we have an existing window) |
| 522 | */ | 532 | */ |
| 523 | |||
| 524 | if (ra->ahead_start == 0) { /* no ahead window yet */ | 533 | if (ra->ahead_start == 0) { /* no ahead window yet */ |
| 525 | if (!make_ahead_window(mapping, filp, ra, 0)) | 534 | if (!make_ahead_window(mapping, filp, ra, 0)) |
| 526 | goto out; | 535 | goto recheck; |
| 527 | } | 536 | } |
| 537 | |||
| 528 | /* | 538 | /* |
| 529 | * Already have an ahead window, check if we crossed into it. | 539 | * Already have an ahead window, check if we crossed into it. |
| 530 | * If so, shift windows and issue a new ahead window. | 540 | * If so, shift windows and issue a new ahead window. |
| @@ -536,6 +546,10 @@ page_cache_readahead(struct address_space *mapping, struct file_ra_state *ra, | |||
| 536 | ra->start = ra->ahead_start; | 546 | ra->start = ra->ahead_start; |
| 537 | ra->size = ra->ahead_size; | 547 | ra->size = ra->ahead_size; |
| 538 | make_ahead_window(mapping, filp, ra, 0); | 548 | make_ahead_window(mapping, filp, ra, 0); |
| 549 | recheck: | ||
| 550 | /* prev_page shouldn't overrun the ahead window */ | ||
| 551 | ra->prev_page = min(ra->prev_page, | ||
| 552 | ra->ahead_start + ra->ahead_size - 1); | ||
| 539 | } | 553 | } |
| 540 | 554 | ||
| 541 | out: | 555 | out: |
| @@ -56,13 +56,11 @@ | |||
| 56 | 56 | ||
| 57 | #include <asm/tlbflush.h> | 57 | #include <asm/tlbflush.h> |
| 58 | 58 | ||
| 59 | //#define RMAP_DEBUG /* can be enabled only for debugging */ | 59 | struct kmem_cache *anon_vma_cachep; |
| 60 | |||
| 61 | kmem_cache_t *anon_vma_cachep; | ||
| 62 | 60 | ||
| 63 | static inline void validate_anon_vma(struct vm_area_struct *find_vma) | 61 | static inline void validate_anon_vma(struct vm_area_struct *find_vma) |
| 64 | { | 62 | { |
| 65 | #ifdef RMAP_DEBUG | 63 | #ifdef CONFIG_DEBUG_VM |
| 66 | struct anon_vma *anon_vma = find_vma->anon_vma; | 64 | struct anon_vma *anon_vma = find_vma->anon_vma; |
| 67 | struct vm_area_struct *vma; | 65 | struct vm_area_struct *vma; |
| 68 | unsigned int mapcount = 0; | 66 | unsigned int mapcount = 0; |
| @@ -166,7 +164,8 @@ void anon_vma_unlink(struct vm_area_struct *vma) | |||
| 166 | anon_vma_free(anon_vma); | 164 | anon_vma_free(anon_vma); |
| 167 | } | 165 | } |
| 168 | 166 | ||
| 169 | static void anon_vma_ctor(void *data, kmem_cache_t *cachep, unsigned long flags) | 167 | static void anon_vma_ctor(void *data, struct kmem_cache *cachep, |
| 168 | unsigned long flags) | ||
| 170 | { | 169 | { |
| 171 | if ((flags & (SLAB_CTOR_VERIFY|SLAB_CTOR_CONSTRUCTOR)) == | 170 | if ((flags & (SLAB_CTOR_VERIFY|SLAB_CTOR_CONSTRUCTOR)) == |
| 172 | SLAB_CTOR_CONSTRUCTOR) { | 171 | SLAB_CTOR_CONSTRUCTOR) { |
| @@ -550,13 +549,14 @@ void page_add_file_rmap(struct page *page) | |||
| 550 | void page_remove_rmap(struct page *page) | 549 | void page_remove_rmap(struct page *page) |
| 551 | { | 550 | { |
| 552 | if (atomic_add_negative(-1, &page->_mapcount)) { | 551 | if (atomic_add_negative(-1, &page->_mapcount)) { |
| 553 | if (page_mapcount(page) < 0) { | 552 | #ifdef CONFIG_DEBUG_VM |
| 553 | if (unlikely(page_mapcount(page) < 0)) { | ||
| 554 | printk (KERN_EMERG "Eeek! page_mapcount(page) went negative! (%d)\n", page_mapcount(page)); | 554 | printk (KERN_EMERG "Eeek! page_mapcount(page) went negative! (%d)\n", page_mapcount(page)); |
| 555 | printk (KERN_EMERG " page->flags = %lx\n", page->flags); | 555 | printk (KERN_EMERG " page->flags = %lx\n", page->flags); |
| 556 | printk (KERN_EMERG " page->count = %x\n", page_count(page)); | 556 | printk (KERN_EMERG " page->count = %x\n", page_count(page)); |
| 557 | printk (KERN_EMERG " page->mapping = %p\n", page->mapping); | 557 | printk (KERN_EMERG " page->mapping = %p\n", page->mapping); |
| 558 | } | 558 | } |
| 559 | 559 | #endif | |
| 560 | BUG_ON(page_mapcount(page) < 0); | 560 | BUG_ON(page_mapcount(page) < 0); |
| 561 | /* | 561 | /* |
| 562 | * It would be tidy to reset the PageAnon mapping here, | 562 | * It would be tidy to reset the PageAnon mapping here, |
diff --git a/mm/shmem.c b/mm/shmem.c index 7c455fbaff7b..37eaf42ed2c6 100644 --- a/mm/shmem.c +++ b/mm/shmem.c | |||
| @@ -875,7 +875,7 @@ redirty: | |||
| 875 | } | 875 | } |
| 876 | 876 | ||
| 877 | #ifdef CONFIG_NUMA | 877 | #ifdef CONFIG_NUMA |
| 878 | static int shmem_parse_mpol(char *value, int *policy, nodemask_t *policy_nodes) | 878 | static inline int shmem_parse_mpol(char *value, int *policy, nodemask_t *policy_nodes) |
| 879 | { | 879 | { |
| 880 | char *nodelist = strchr(value, ':'); | 880 | char *nodelist = strchr(value, ':'); |
| 881 | int err = 1; | 881 | int err = 1; |
| @@ -2119,7 +2119,7 @@ failed: | |||
| 2119 | return err; | 2119 | return err; |
| 2120 | } | 2120 | } |
| 2121 | 2121 | ||
| 2122 | static kmem_cache_t *shmem_inode_cachep; | 2122 | static struct kmem_cache *shmem_inode_cachep; |
| 2123 | 2123 | ||
| 2124 | static struct inode *shmem_alloc_inode(struct super_block *sb) | 2124 | static struct inode *shmem_alloc_inode(struct super_block *sb) |
| 2125 | { | 2125 | { |
| @@ -2139,7 +2139,8 @@ static void shmem_destroy_inode(struct inode *inode) | |||
| 2139 | kmem_cache_free(shmem_inode_cachep, SHMEM_I(inode)); | 2139 | kmem_cache_free(shmem_inode_cachep, SHMEM_I(inode)); |
| 2140 | } | 2140 | } |
| 2141 | 2141 | ||
| 2142 | static void init_once(void *foo, kmem_cache_t *cachep, unsigned long flags) | 2142 | static void init_once(void *foo, struct kmem_cache *cachep, |
| 2143 | unsigned long flags) | ||
| 2143 | { | 2144 | { |
| 2144 | struct shmem_inode_info *p = (struct shmem_inode_info *) foo; | 2145 | struct shmem_inode_info *p = (struct shmem_inode_info *) foo; |
| 2145 | 2146 | ||
| @@ -50,7 +50,7 @@ | |||
| 50 | * The head array is strictly LIFO and should improve the cache hit rates. | 50 | * The head array is strictly LIFO and should improve the cache hit rates. |
| 51 | * On SMP, it additionally reduces the spinlock operations. | 51 | * On SMP, it additionally reduces the spinlock operations. |
| 52 | * | 52 | * |
| 53 | * The c_cpuarray may not be read with enabled local interrupts - | 53 | * The c_cpuarray may not be read with enabled local interrupts - |
| 54 | * it's changed with a smp_call_function(). | 54 | * it's changed with a smp_call_function(). |
| 55 | * | 55 | * |
| 56 | * SMP synchronization: | 56 | * SMP synchronization: |
| @@ -170,12 +170,12 @@ | |||
| 170 | #if DEBUG | 170 | #if DEBUG |
| 171 | # define CREATE_MASK (SLAB_DEBUG_INITIAL | SLAB_RED_ZONE | \ | 171 | # define CREATE_MASK (SLAB_DEBUG_INITIAL | SLAB_RED_ZONE | \ |
| 172 | SLAB_POISON | SLAB_HWCACHE_ALIGN | \ | 172 | SLAB_POISON | SLAB_HWCACHE_ALIGN | \ |
| 173 | SLAB_NO_REAP | SLAB_CACHE_DMA | \ | 173 | SLAB_CACHE_DMA | \ |
| 174 | SLAB_MUST_HWCACHE_ALIGN | SLAB_STORE_USER | \ | 174 | SLAB_MUST_HWCACHE_ALIGN | SLAB_STORE_USER | \ |
| 175 | SLAB_RECLAIM_ACCOUNT | SLAB_PANIC | \ | 175 | SLAB_RECLAIM_ACCOUNT | SLAB_PANIC | \ |
| 176 | SLAB_DESTROY_BY_RCU) | 176 | SLAB_DESTROY_BY_RCU) |
| 177 | #else | 177 | #else |
| 178 | # define CREATE_MASK (SLAB_HWCACHE_ALIGN | SLAB_NO_REAP | \ | 178 | # define CREATE_MASK (SLAB_HWCACHE_ALIGN | \ |
| 179 | SLAB_CACHE_DMA | SLAB_MUST_HWCACHE_ALIGN | \ | 179 | SLAB_CACHE_DMA | SLAB_MUST_HWCACHE_ALIGN | \ |
| 180 | SLAB_RECLAIM_ACCOUNT | SLAB_PANIC | \ | 180 | SLAB_RECLAIM_ACCOUNT | SLAB_PANIC | \ |
| 181 | SLAB_DESTROY_BY_RCU) | 181 | SLAB_DESTROY_BY_RCU) |
| @@ -266,16 +266,17 @@ struct array_cache { | |||
| 266 | unsigned int batchcount; | 266 | unsigned int batchcount; |
| 267 | unsigned int touched; | 267 | unsigned int touched; |
| 268 | spinlock_t lock; | 268 | spinlock_t lock; |
| 269 | void *entry[0]; /* | 269 | void *entry[0]; /* |
| 270 | * Must have this definition in here for the proper | 270 | * Must have this definition in here for the proper |
| 271 | * alignment of array_cache. Also simplifies accessing | 271 | * alignment of array_cache. Also simplifies accessing |
| 272 | * the entries. | 272 | * the entries. |
| 273 | * [0] is for gcc 2.95. It should really be []. | 273 | * [0] is for gcc 2.95. It should really be []. |
| 274 | */ | 274 | */ |
| 275 | }; | 275 | }; |
| 276 | 276 | ||
| 277 | /* bootstrap: The caches do not work without cpuarrays anymore, | 277 | /* |
| 278 | * but the cpuarrays are allocated from the generic caches... | 278 | * bootstrap: The caches do not work without cpuarrays anymore, but the |
| 279 | * cpuarrays are allocated from the generic caches... | ||
| 279 | */ | 280 | */ |
| 280 | #define BOOT_CPUCACHE_ENTRIES 1 | 281 | #define BOOT_CPUCACHE_ENTRIES 1 |
| 281 | struct arraycache_init { | 282 | struct arraycache_init { |
| @@ -291,13 +292,13 @@ struct kmem_list3 { | |||
| 291 | struct list_head slabs_full; | 292 | struct list_head slabs_full; |
| 292 | struct list_head slabs_free; | 293 | struct list_head slabs_free; |
| 293 | unsigned long free_objects; | 294 | unsigned long free_objects; |
| 294 | unsigned long next_reap; | ||
| 295 | int free_touched; | ||
| 296 | unsigned int free_limit; | 295 | unsigned int free_limit; |
| 297 | unsigned int colour_next; /* Per-node cache coloring */ | 296 | unsigned int colour_next; /* Per-node cache coloring */ |
| 298 | spinlock_t list_lock; | 297 | spinlock_t list_lock; |
| 299 | struct array_cache *shared; /* shared per node */ | 298 | struct array_cache *shared; /* shared per node */ |
| 300 | struct array_cache **alien; /* on other nodes */ | 299 | struct array_cache **alien; /* on other nodes */ |
| 300 | unsigned long next_reap; /* updated without locking */ | ||
| 301 | int free_touched; /* updated without locking */ | ||
| 301 | }; | 302 | }; |
| 302 | 303 | ||
| 303 | /* | 304 | /* |
| @@ -310,10 +311,8 @@ struct kmem_list3 __initdata initkmem_list3[NUM_INIT_LISTS]; | |||
| 310 | #define SIZE_L3 (1 + MAX_NUMNODES) | 311 | #define SIZE_L3 (1 + MAX_NUMNODES) |
| 311 | 312 | ||
| 312 | /* | 313 | /* |
| 313 | * This function must be completely optimized away if | 314 | * This function must be completely optimized away if a constant is passed to |
| 314 | * a constant is passed to it. Mostly the same as | 315 | * it. Mostly the same as what is in linux/slab.h except it returns an index. |
| 315 | * what is in linux/slab.h except it returns an | ||
| 316 | * index. | ||
| 317 | */ | 316 | */ |
| 318 | static __always_inline int index_of(const size_t size) | 317 | static __always_inline int index_of(const size_t size) |
| 319 | { | 318 | { |
| @@ -351,14 +350,14 @@ static void kmem_list3_init(struct kmem_list3 *parent) | |||
| 351 | parent->free_touched = 0; | 350 | parent->free_touched = 0; |
| 352 | } | 351 | } |
| 353 | 352 | ||
| 354 | #define MAKE_LIST(cachep, listp, slab, nodeid) \ | 353 | #define MAKE_LIST(cachep, listp, slab, nodeid) \ |
| 355 | do { \ | 354 | do { \ |
| 356 | INIT_LIST_HEAD(listp); \ | 355 | INIT_LIST_HEAD(listp); \ |
| 357 | list_splice(&(cachep->nodelists[nodeid]->slab), listp); \ | 356 | list_splice(&(cachep->nodelists[nodeid]->slab), listp); \ |
| 358 | } while (0) | 357 | } while (0) |
| 359 | 358 | ||
| 360 | #define MAKE_ALL_LISTS(cachep, ptr, nodeid) \ | 359 | #define MAKE_ALL_LISTS(cachep, ptr, nodeid) \ |
| 361 | do { \ | 360 | do { \ |
| 362 | MAKE_LIST((cachep), (&(ptr)->slabs_full), slabs_full, nodeid); \ | 361 | MAKE_LIST((cachep), (&(ptr)->slabs_full), slabs_full, nodeid); \ |
| 363 | MAKE_LIST((cachep), (&(ptr)->slabs_partial), slabs_partial, nodeid); \ | 362 | MAKE_LIST((cachep), (&(ptr)->slabs_partial), slabs_partial, nodeid); \ |
| 364 | MAKE_LIST((cachep), (&(ptr)->slabs_free), slabs_free, nodeid); \ | 363 | MAKE_LIST((cachep), (&(ptr)->slabs_free), slabs_free, nodeid); \ |
| @@ -373,28 +372,30 @@ static void kmem_list3_init(struct kmem_list3 *parent) | |||
| 373 | struct kmem_cache { | 372 | struct kmem_cache { |
| 374 | /* 1) per-cpu data, touched during every alloc/free */ | 373 | /* 1) per-cpu data, touched during every alloc/free */ |
| 375 | struct array_cache *array[NR_CPUS]; | 374 | struct array_cache *array[NR_CPUS]; |
| 375 | /* 2) Cache tunables. Protected by cache_chain_mutex */ | ||
| 376 | unsigned int batchcount; | 376 | unsigned int batchcount; |
| 377 | unsigned int limit; | 377 | unsigned int limit; |
| 378 | unsigned int shared; | 378 | unsigned int shared; |
| 379 | |||
| 379 | unsigned int buffer_size; | 380 | unsigned int buffer_size; |
| 380 | /* 2) touched by every alloc & free from the backend */ | 381 | /* 3) touched by every alloc & free from the backend */ |
| 381 | struct kmem_list3 *nodelists[MAX_NUMNODES]; | 382 | struct kmem_list3 *nodelists[MAX_NUMNODES]; |
| 382 | unsigned int flags; /* constant flags */ | ||
| 383 | unsigned int num; /* # of objs per slab */ | ||
| 384 | spinlock_t spinlock; | ||
| 385 | 383 | ||
| 386 | /* 3) cache_grow/shrink */ | 384 | unsigned int flags; /* constant flags */ |
| 385 | unsigned int num; /* # of objs per slab */ | ||
| 386 | |||
| 387 | /* 4) cache_grow/shrink */ | ||
| 387 | /* order of pgs per slab (2^n) */ | 388 | /* order of pgs per slab (2^n) */ |
| 388 | unsigned int gfporder; | 389 | unsigned int gfporder; |
| 389 | 390 | ||
| 390 | /* force GFP flags, e.g. GFP_DMA */ | 391 | /* force GFP flags, e.g. GFP_DMA */ |
| 391 | gfp_t gfpflags; | 392 | gfp_t gfpflags; |
| 392 | 393 | ||
| 393 | size_t colour; /* cache colouring range */ | 394 | size_t colour; /* cache colouring range */ |
| 394 | unsigned int colour_off; /* colour offset */ | 395 | unsigned int colour_off; /* colour offset */ |
| 395 | struct kmem_cache *slabp_cache; | 396 | struct kmem_cache *slabp_cache; |
| 396 | unsigned int slab_size; | 397 | unsigned int slab_size; |
| 397 | unsigned int dflags; /* dynamic flags */ | 398 | unsigned int dflags; /* dynamic flags */ |
| 398 | 399 | ||
| 399 | /* constructor func */ | 400 | /* constructor func */ |
| 400 | void (*ctor) (void *, struct kmem_cache *, unsigned long); | 401 | void (*ctor) (void *, struct kmem_cache *, unsigned long); |
| @@ -402,11 +403,11 @@ struct kmem_cache { | |||
| 402 | /* de-constructor func */ | 403 | /* de-constructor func */ |
| 403 | void (*dtor) (void *, struct kmem_cache *, unsigned long); | 404 | void (*dtor) (void *, struct kmem_cache *, unsigned long); |
| 404 | 405 | ||
| 405 | /* 4) cache creation/removal */ | 406 | /* 5) cache creation/removal */ |
| 406 | const char *name; | 407 | const char *name; |
| 407 | struct list_head next; | 408 | struct list_head next; |
| 408 | 409 | ||
| 409 | /* 5) statistics */ | 410 | /* 6) statistics */ |
| 410 | #if STATS | 411 | #if STATS |
| 411 | unsigned long num_active; | 412 | unsigned long num_active; |
| 412 | unsigned long num_allocations; | 413 | unsigned long num_allocations; |
| @@ -438,8 +439,9 @@ struct kmem_cache { | |||
| 438 | #define OFF_SLAB(x) ((x)->flags & CFLGS_OFF_SLAB) | 439 | #define OFF_SLAB(x) ((x)->flags & CFLGS_OFF_SLAB) |
| 439 | 440 | ||
| 440 | #define BATCHREFILL_LIMIT 16 | 441 | #define BATCHREFILL_LIMIT 16 |
| 441 | /* Optimization question: fewer reaps means less | 442 | /* |
| 442 | * probability for unnessary cpucache drain/refill cycles. | 443 | * Optimization question: fewer reaps means less probability for unnessary |
| 444 | * cpucache drain/refill cycles. | ||
| 443 | * | 445 | * |
| 444 | * OTOH the cpuarrays can contain lots of objects, | 446 | * OTOH the cpuarrays can contain lots of objects, |
| 445 | * which could lock up otherwise freeable slabs. | 447 | * which could lock up otherwise freeable slabs. |
| @@ -453,17 +455,19 @@ struct kmem_cache { | |||
| 453 | #define STATS_INC_ALLOCED(x) ((x)->num_allocations++) | 455 | #define STATS_INC_ALLOCED(x) ((x)->num_allocations++) |
| 454 | #define STATS_INC_GROWN(x) ((x)->grown++) | 456 | #define STATS_INC_GROWN(x) ((x)->grown++) |
| 455 | #define STATS_INC_REAPED(x) ((x)->reaped++) | 457 | #define STATS_INC_REAPED(x) ((x)->reaped++) |
| 456 | #define STATS_SET_HIGH(x) do { if ((x)->num_active > (x)->high_mark) \ | 458 | #define STATS_SET_HIGH(x) \ |
| 457 | (x)->high_mark = (x)->num_active; \ | 459 | do { \ |
| 458 | } while (0) | 460 | if ((x)->num_active > (x)->high_mark) \ |
| 461 | (x)->high_mark = (x)->num_active; \ | ||
| 462 | } while (0) | ||
| 459 | #define STATS_INC_ERR(x) ((x)->errors++) | 463 | #define STATS_INC_ERR(x) ((x)->errors++) |
| 460 | #define STATS_INC_NODEALLOCS(x) ((x)->node_allocs++) | 464 | #define STATS_INC_NODEALLOCS(x) ((x)->node_allocs++) |
| 461 | #define STATS_INC_NODEFREES(x) ((x)->node_frees++) | 465 | #define STATS_INC_NODEFREES(x) ((x)->node_frees++) |
| 462 | #define STATS_SET_FREEABLE(x, i) \ | 466 | #define STATS_SET_FREEABLE(x, i) \ |
| 463 | do { if ((x)->max_freeable < i) \ | 467 | do { \ |
| 464 | (x)->max_freeable = i; \ | 468 | if ((x)->max_freeable < i) \ |
| 465 | } while (0) | 469 | (x)->max_freeable = i; \ |
| 466 | 470 | } while (0) | |
| 467 | #define STATS_INC_ALLOCHIT(x) atomic_inc(&(x)->allochit) | 471 | #define STATS_INC_ALLOCHIT(x) atomic_inc(&(x)->allochit) |
| 468 | #define STATS_INC_ALLOCMISS(x) atomic_inc(&(x)->allocmiss) | 472 | #define STATS_INC_ALLOCMISS(x) atomic_inc(&(x)->allocmiss) |
| 469 | #define STATS_INC_FREEHIT(x) atomic_inc(&(x)->freehit) | 473 | #define STATS_INC_FREEHIT(x) atomic_inc(&(x)->freehit) |
| @@ -478,9 +482,7 @@ struct kmem_cache { | |||
| 478 | #define STATS_INC_ERR(x) do { } while (0) | 482 | #define STATS_INC_ERR(x) do { } while (0) |
| 479 | #define STATS_INC_NODEALLOCS(x) do { } while (0) | 483 | #define STATS_INC_NODEALLOCS(x) do { } while (0) |
| 480 | #define STATS_INC_NODEFREES(x) do { } while (0) | 484 | #define STATS_INC_NODEFREES(x) do { } while (0) |
| 481 | #define STATS_SET_FREEABLE(x, i) \ | 485 | #define STATS_SET_FREEABLE(x, i) do { } while (0) |
| 482 | do { } while (0) | ||
| 483 | |||
| 484 | #define STATS_INC_ALLOCHIT(x) do { } while (0) | 486 | #define STATS_INC_ALLOCHIT(x) do { } while (0) |
| 485 | #define STATS_INC_ALLOCMISS(x) do { } while (0) | 487 | #define STATS_INC_ALLOCMISS(x) do { } while (0) |
| 486 | #define STATS_INC_FREEHIT(x) do { } while (0) | 488 | #define STATS_INC_FREEHIT(x) do { } while (0) |
| @@ -488,7 +490,8 @@ struct kmem_cache { | |||
| 488 | #endif | 490 | #endif |
| 489 | 491 | ||
| 490 | #if DEBUG | 492 | #if DEBUG |
| 491 | /* Magic nums for obj red zoning. | 493 | /* |
| 494 | * Magic nums for obj red zoning. | ||
| 492 | * Placed in the first word before and the first word after an obj. | 495 | * Placed in the first word before and the first word after an obj. |
| 493 | */ | 496 | */ |
| 494 | #define RED_INACTIVE 0x5A2CF071UL /* when obj is inactive */ | 497 | #define RED_INACTIVE 0x5A2CF071UL /* when obj is inactive */ |
| @@ -499,7 +502,8 @@ struct kmem_cache { | |||
| 499 | #define POISON_FREE 0x6b /* for use-after-free poisoning */ | 502 | #define POISON_FREE 0x6b /* for use-after-free poisoning */ |
| 500 | #define POISON_END 0xa5 /* end-byte of poisoning */ | 503 | #define POISON_END 0xa5 /* end-byte of poisoning */ |
| 501 | 504 | ||
| 502 | /* memory layout of objects: | 505 | /* |
| 506 | * memory layout of objects: | ||
| 503 | * 0 : objp | 507 | * 0 : objp |
| 504 | * 0 .. cachep->obj_offset - BYTES_PER_WORD - 1: padding. This ensures that | 508 | * 0 .. cachep->obj_offset - BYTES_PER_WORD - 1: padding. This ensures that |
| 505 | * the end of an object is aligned with the end of the real | 509 | * the end of an object is aligned with the end of the real |
| @@ -508,7 +512,8 @@ struct kmem_cache { | |||
| 508 | * redzone word. | 512 | * redzone word. |
| 509 | * cachep->obj_offset: The real object. | 513 | * cachep->obj_offset: The real object. |
| 510 | * cachep->buffer_size - 2* BYTES_PER_WORD: redzone word [BYTES_PER_WORD long] | 514 | * cachep->buffer_size - 2* BYTES_PER_WORD: redzone word [BYTES_PER_WORD long] |
| 511 | * cachep->buffer_size - 1* BYTES_PER_WORD: last caller address [BYTES_PER_WORD long] | 515 | * cachep->buffer_size - 1* BYTES_PER_WORD: last caller address |
| 516 | * [BYTES_PER_WORD long] | ||
| 512 | */ | 517 | */ |
| 513 | static int obj_offset(struct kmem_cache *cachep) | 518 | static int obj_offset(struct kmem_cache *cachep) |
| 514 | { | 519 | { |
| @@ -552,8 +557,8 @@ static void **dbg_userword(struct kmem_cache *cachep, void *objp) | |||
| 552 | #endif | 557 | #endif |
| 553 | 558 | ||
| 554 | /* | 559 | /* |
| 555 | * Maximum size of an obj (in 2^order pages) | 560 | * Maximum size of an obj (in 2^order pages) and absolute limit for the gfp |
| 556 | * and absolute limit for the gfp order. | 561 | * order. |
| 557 | */ | 562 | */ |
| 558 | #if defined(CONFIG_LARGE_ALLOCS) | 563 | #if defined(CONFIG_LARGE_ALLOCS) |
| 559 | #define MAX_OBJ_ORDER 13 /* up to 32Mb */ | 564 | #define MAX_OBJ_ORDER 13 /* up to 32Mb */ |
| @@ -573,9 +578,10 @@ static void **dbg_userword(struct kmem_cache *cachep, void *objp) | |||
| 573 | #define BREAK_GFP_ORDER_LO 0 | 578 | #define BREAK_GFP_ORDER_LO 0 |
| 574 | static int slab_break_gfp_order = BREAK_GFP_ORDER_LO; | 579 | static int slab_break_gfp_order = BREAK_GFP_ORDER_LO; |
| 575 | 580 | ||
| 576 | /* Functions for storing/retrieving the cachep and or slab from the | 581 | /* |
| 577 | * global 'mem_map'. These are used to find the slab an obj belongs to. | 582 | * Functions for storing/retrieving the cachep and or slab from the page |
| 578 | * With kfree(), these are used to find the cache which an obj belongs to. | 583 | * allocator. These are used to find the slab an obj belongs to. With kfree(), |
| 584 | * these are used to find the cache which an obj belongs to. | ||
| 579 | */ | 585 | */ |
| 580 | static inline void page_set_cache(struct page *page, struct kmem_cache *cache) | 586 | static inline void page_set_cache(struct page *page, struct kmem_cache *cache) |
| 581 | { | 587 | { |
| @@ -584,6 +590,8 @@ static inline void page_set_cache(struct page *page, struct kmem_cache *cache) | |||
| 584 | 590 | ||
| 585 | static inline struct kmem_cache *page_get_cache(struct page *page) | 591 | static inline struct kmem_cache *page_get_cache(struct page *page) |
| 586 | { | 592 | { |
| 593 | if (unlikely(PageCompound(page))) | ||
| 594 | page = (struct page *)page_private(page); | ||
| 587 | return (struct kmem_cache *)page->lru.next; | 595 | return (struct kmem_cache *)page->lru.next; |
| 588 | } | 596 | } |
| 589 | 597 | ||
| @@ -594,6 +602,8 @@ static inline void page_set_slab(struct page *page, struct slab *slab) | |||
| 594 | 602 | ||
| 595 | static inline struct slab *page_get_slab(struct page *page) | 603 | static inline struct slab *page_get_slab(struct page *page) |
| 596 | { | 604 | { |
| 605 | if (unlikely(PageCompound(page))) | ||
| 606 | page = (struct page *)page_private(page); | ||
| 597 | return (struct slab *)page->lru.prev; | 607 | return (struct slab *)page->lru.prev; |
| 598 | } | 608 | } |
| 599 | 609 | ||
| @@ -609,7 +619,21 @@ static inline struct slab *virt_to_slab(const void *obj) | |||
| 609 | return page_get_slab(page); | 619 | return page_get_slab(page); |
| 610 | } | 620 | } |
| 611 | 621 | ||
| 612 | /* These are the default caches for kmalloc. Custom caches can have other sizes. */ | 622 | static inline void *index_to_obj(struct kmem_cache *cache, struct slab *slab, |
| 623 | unsigned int idx) | ||
| 624 | { | ||
| 625 | return slab->s_mem + cache->buffer_size * idx; | ||
| 626 | } | ||
| 627 | |||
| 628 | static inline unsigned int obj_to_index(struct kmem_cache *cache, | ||
| 629 | struct slab *slab, void *obj) | ||
| 630 | { | ||
| 631 | return (unsigned)(obj - slab->s_mem) / cache->buffer_size; | ||
| 632 | } | ||
| 633 | |||
| 634 | /* | ||
| 635 | * These are the default caches for kmalloc. Custom caches can have other sizes. | ||
| 636 | */ | ||
| 613 | struct cache_sizes malloc_sizes[] = { | 637 | struct cache_sizes malloc_sizes[] = { |
| 614 | #define CACHE(x) { .cs_size = (x) }, | 638 | #define CACHE(x) { .cs_size = (x) }, |
| 615 | #include <linux/kmalloc_sizes.h> | 639 | #include <linux/kmalloc_sizes.h> |
| @@ -642,8 +666,6 @@ static struct kmem_cache cache_cache = { | |||
| 642 | .limit = BOOT_CPUCACHE_ENTRIES, | 666 | .limit = BOOT_CPUCACHE_ENTRIES, |
| 643 | .shared = 1, | 667 | .shared = 1, |
| 644 | .buffer_size = sizeof(struct kmem_cache), | 668 | .buffer_size = sizeof(struct kmem_cache), |
| 645 | .flags = SLAB_NO_REAP, | ||
| 646 | .spinlock = SPIN_LOCK_UNLOCKED, | ||
| 647 | .name = "kmem_cache", | 669 | .name = "kmem_cache", |
| 648 | #if DEBUG | 670 | #if DEBUG |
| 649 | .obj_size = sizeof(struct kmem_cache), | 671 | .obj_size = sizeof(struct kmem_cache), |
| @@ -655,8 +677,8 @@ static DEFINE_MUTEX(cache_chain_mutex); | |||
| 655 | static struct list_head cache_chain; | 677 | static struct list_head cache_chain; |
| 656 | 678 | ||
| 657 | /* | 679 | /* |
| 658 | * vm_enough_memory() looks at this to determine how many | 680 | * vm_enough_memory() looks at this to determine how many slab-allocated pages |
| 659 | * slab-allocated pages are possibly freeable under pressure | 681 | * are possibly freeable under pressure |
| 660 | * | 682 | * |
| 661 | * SLAB_RECLAIM_ACCOUNT turns this on per-slab | 683 | * SLAB_RECLAIM_ACCOUNT turns this on per-slab |
| 662 | */ | 684 | */ |
| @@ -675,7 +697,8 @@ static enum { | |||
| 675 | 697 | ||
| 676 | static DEFINE_PER_CPU(struct work_struct, reap_work); | 698 | static DEFINE_PER_CPU(struct work_struct, reap_work); |
| 677 | 699 | ||
| 678 | static void free_block(struct kmem_cache *cachep, void **objpp, int len, int node); | 700 | static void free_block(struct kmem_cache *cachep, void **objpp, int len, |
| 701 | int node); | ||
| 679 | static void enable_cpucache(struct kmem_cache *cachep); | 702 | static void enable_cpucache(struct kmem_cache *cachep); |
| 680 | static void cache_reap(void *unused); | 703 | static void cache_reap(void *unused); |
| 681 | static int __node_shrink(struct kmem_cache *cachep, int node); | 704 | static int __node_shrink(struct kmem_cache *cachep, int node); |
| @@ -685,7 +708,8 @@ static inline struct array_cache *cpu_cache_get(struct kmem_cache *cachep) | |||
| 685 | return cachep->array[smp_processor_id()]; | 708 | return cachep->array[smp_processor_id()]; |
| 686 | } | 709 | } |
| 687 | 710 | ||
| 688 | static inline struct kmem_cache *__find_general_cachep(size_t size, gfp_t gfpflags) | 711 | static inline struct kmem_cache *__find_general_cachep(size_t size, |
| 712 | gfp_t gfpflags) | ||
| 689 | { | 713 | { |
| 690 | struct cache_sizes *csizep = malloc_sizes; | 714 | struct cache_sizes *csizep = malloc_sizes; |
| 691 | 715 | ||
| @@ -720,8 +744,9 @@ static size_t slab_mgmt_size(size_t nr_objs, size_t align) | |||
| 720 | return ALIGN(sizeof(struct slab)+nr_objs*sizeof(kmem_bufctl_t), align); | 744 | return ALIGN(sizeof(struct slab)+nr_objs*sizeof(kmem_bufctl_t), align); |
| 721 | } | 745 | } |
| 722 | 746 | ||
| 723 | /* Calculate the number of objects and left-over bytes for a given | 747 | /* |
| 724 | buffer size. */ | 748 | * Calculate the number of objects and left-over bytes for a given buffer size. |
| 749 | */ | ||
| 725 | static void cache_estimate(unsigned long gfporder, size_t buffer_size, | 750 | static void cache_estimate(unsigned long gfporder, size_t buffer_size, |
| 726 | size_t align, int flags, size_t *left_over, | 751 | size_t align, int flags, size_t *left_over, |
| 727 | unsigned int *num) | 752 | unsigned int *num) |
| @@ -782,7 +807,8 @@ static void cache_estimate(unsigned long gfporder, size_t buffer_size, | |||
| 782 | 807 | ||
| 783 | #define slab_error(cachep, msg) __slab_error(__FUNCTION__, cachep, msg) | 808 | #define slab_error(cachep, msg) __slab_error(__FUNCTION__, cachep, msg) |
| 784 | 809 | ||
| 785 | static void __slab_error(const char *function, struct kmem_cache *cachep, char *msg) | 810 | static void __slab_error(const char *function, struct kmem_cache *cachep, |
| 811 | char *msg) | ||
| 786 | { | 812 | { |
| 787 | printk(KERN_ERR "slab error in %s(): cache `%s': %s\n", | 813 | printk(KERN_ERR "slab error in %s(): cache `%s': %s\n", |
| 788 | function, cachep->name, msg); | 814 | function, cachep->name, msg); |
| @@ -804,7 +830,7 @@ static void init_reap_node(int cpu) | |||
| 804 | 830 | ||
| 805 | node = next_node(cpu_to_node(cpu), node_online_map); | 831 | node = next_node(cpu_to_node(cpu), node_online_map); |
| 806 | if (node == MAX_NUMNODES) | 832 | if (node == MAX_NUMNODES) |
| 807 | node = 0; | 833 | node = first_node(node_online_map); |
| 808 | 834 | ||
| 809 | __get_cpu_var(reap_node) = node; | 835 | __get_cpu_var(reap_node) = node; |
| 810 | } | 836 | } |
| @@ -906,10 +932,8 @@ static void free_alien_cache(struct array_cache **ac_ptr) | |||
| 906 | 932 | ||
| 907 | if (!ac_ptr) | 933 | if (!ac_ptr) |
| 908 | return; | 934 | return; |
| 909 | |||
| 910 | for_each_node(i) | 935 | for_each_node(i) |
| 911 | kfree(ac_ptr[i]); | 936 | kfree(ac_ptr[i]); |
| 912 | |||
| 913 | kfree(ac_ptr); | 937 | kfree(ac_ptr); |
| 914 | } | 938 | } |
| 915 | 939 | ||
| @@ -943,7 +967,8 @@ static void reap_alien(struct kmem_cache *cachep, struct kmem_list3 *l3) | |||
| 943 | } | 967 | } |
| 944 | } | 968 | } |
| 945 | 969 | ||
| 946 | static void drain_alien_cache(struct kmem_cache *cachep, struct array_cache **alien) | 970 | static void drain_alien_cache(struct kmem_cache *cachep, |
| 971 | struct array_cache **alien) | ||
| 947 | { | 972 | { |
| 948 | int i = 0; | 973 | int i = 0; |
| 949 | struct array_cache *ac; | 974 | struct array_cache *ac; |
| @@ -986,20 +1011,22 @@ static int __devinit cpuup_callback(struct notifier_block *nfb, | |||
| 986 | switch (action) { | 1011 | switch (action) { |
| 987 | case CPU_UP_PREPARE: | 1012 | case CPU_UP_PREPARE: |
| 988 | mutex_lock(&cache_chain_mutex); | 1013 | mutex_lock(&cache_chain_mutex); |
| 989 | /* we need to do this right in the beginning since | 1014 | /* |
| 1015 | * We need to do this right in the beginning since | ||
| 990 | * alloc_arraycache's are going to use this list. | 1016 | * alloc_arraycache's are going to use this list. |
| 991 | * kmalloc_node allows us to add the slab to the right | 1017 | * kmalloc_node allows us to add the slab to the right |
| 992 | * kmem_list3 and not this cpu's kmem_list3 | 1018 | * kmem_list3 and not this cpu's kmem_list3 |
| 993 | */ | 1019 | */ |
| 994 | 1020 | ||
| 995 | list_for_each_entry(cachep, &cache_chain, next) { | 1021 | list_for_each_entry(cachep, &cache_chain, next) { |
| 996 | /* setup the size64 kmemlist for cpu before we can | 1022 | /* |
| 1023 | * Set up the size64 kmemlist for cpu before we can | ||
| 997 | * begin anything. Make sure some other cpu on this | 1024 | * begin anything. Make sure some other cpu on this |
| 998 | * node has not already allocated this | 1025 | * node has not already allocated this |
| 999 | */ | 1026 | */ |
| 1000 | if (!cachep->nodelists[node]) { | 1027 | if (!cachep->nodelists[node]) { |
| 1001 | if (!(l3 = kmalloc_node(memsize, | 1028 | l3 = kmalloc_node(memsize, GFP_KERNEL, node); |
| 1002 | GFP_KERNEL, node))) | 1029 | if (!l3) |
| 1003 | goto bad; | 1030 | goto bad; |
| 1004 | kmem_list3_init(l3); | 1031 | kmem_list3_init(l3); |
| 1005 | l3->next_reap = jiffies + REAPTIMEOUT_LIST3 + | 1032 | l3->next_reap = jiffies + REAPTIMEOUT_LIST3 + |
| @@ -1015,13 +1042,15 @@ static int __devinit cpuup_callback(struct notifier_block *nfb, | |||
| 1015 | 1042 | ||
| 1016 | spin_lock_irq(&cachep->nodelists[node]->list_lock); | 1043 | spin_lock_irq(&cachep->nodelists[node]->list_lock); |
| 1017 | cachep->nodelists[node]->free_limit = | 1044 | cachep->nodelists[node]->free_limit = |
| 1018 | (1 + nr_cpus_node(node)) * | 1045 | (1 + nr_cpus_node(node)) * |
| 1019 | cachep->batchcount + cachep->num; | 1046 | cachep->batchcount + cachep->num; |
| 1020 | spin_unlock_irq(&cachep->nodelists[node]->list_lock); | 1047 | spin_unlock_irq(&cachep->nodelists[node]->list_lock); |
| 1021 | } | 1048 | } |
| 1022 | 1049 | ||
| 1023 | /* Now we can go ahead with allocating the shared array's | 1050 | /* |
| 1024 | & array cache's */ | 1051 | * Now we can go ahead with allocating the shared arrays and |
| 1052 | * array caches | ||
| 1053 | */ | ||
| 1025 | list_for_each_entry(cachep, &cache_chain, next) { | 1054 | list_for_each_entry(cachep, &cache_chain, next) { |
| 1026 | struct array_cache *nc; | 1055 | struct array_cache *nc; |
| 1027 | struct array_cache *shared; | 1056 | struct array_cache *shared; |
| @@ -1041,7 +1070,6 @@ static int __devinit cpuup_callback(struct notifier_block *nfb, | |||
| 1041 | if (!alien) | 1070 | if (!alien) |
| 1042 | goto bad; | 1071 | goto bad; |
| 1043 | cachep->array[cpu] = nc; | 1072 | cachep->array[cpu] = nc; |
| 1044 | |||
| 1045 | l3 = cachep->nodelists[node]; | 1073 | l3 = cachep->nodelists[node]; |
| 1046 | BUG_ON(!l3); | 1074 | BUG_ON(!l3); |
| 1047 | 1075 | ||
| @@ -1061,7 +1089,6 @@ static int __devinit cpuup_callback(struct notifier_block *nfb, | |||
| 1061 | } | 1089 | } |
| 1062 | #endif | 1090 | #endif |
| 1063 | spin_unlock_irq(&l3->list_lock); | 1091 | spin_unlock_irq(&l3->list_lock); |
| 1064 | |||
| 1065 | kfree(shared); | 1092 | kfree(shared); |
| 1066 | free_alien_cache(alien); | 1093 | free_alien_cache(alien); |
| 1067 | } | 1094 | } |
| @@ -1083,7 +1110,6 @@ static int __devinit cpuup_callback(struct notifier_block *nfb, | |||
| 1083 | /* fall thru */ | 1110 | /* fall thru */ |
| 1084 | case CPU_UP_CANCELED: | 1111 | case CPU_UP_CANCELED: |
| 1085 | mutex_lock(&cache_chain_mutex); | 1112 | mutex_lock(&cache_chain_mutex); |
| 1086 | |||
| 1087 | list_for_each_entry(cachep, &cache_chain, next) { | 1113 | list_for_each_entry(cachep, &cache_chain, next) { |
| 1088 | struct array_cache *nc; | 1114 | struct array_cache *nc; |
| 1089 | struct array_cache *shared; | 1115 | struct array_cache *shared; |
| @@ -1150,7 +1176,7 @@ free_array_cache: | |||
| 1150 | #endif | 1176 | #endif |
| 1151 | } | 1177 | } |
| 1152 | return NOTIFY_OK; | 1178 | return NOTIFY_OK; |
| 1153 | bad: | 1179 | bad: |
| 1154 | mutex_unlock(&cache_chain_mutex); | 1180 | mutex_unlock(&cache_chain_mutex); |
| 1155 | return NOTIFY_BAD; | 1181 | return NOTIFY_BAD; |
| 1156 | } | 1182 | } |
| @@ -1160,7 +1186,8 @@ static struct notifier_block cpucache_notifier = { &cpuup_callback, NULL, 0 }; | |||
| 1160 | /* | 1186 | /* |
| 1161 | * swap the static kmem_list3 with kmalloced memory | 1187 | * swap the static kmem_list3 with kmalloced memory |
| 1162 | */ | 1188 | */ |
| 1163 | static void init_list(struct kmem_cache *cachep, struct kmem_list3 *list, int nodeid) | 1189 | static void init_list(struct kmem_cache *cachep, struct kmem_list3 *list, |
| 1190 | int nodeid) | ||
| 1164 | { | 1191 | { |
| 1165 | struct kmem_list3 *ptr; | 1192 | struct kmem_list3 *ptr; |
| 1166 | 1193 | ||
| @@ -1175,8 +1202,9 @@ static void init_list(struct kmem_cache *cachep, struct kmem_list3 *list, int no | |||
| 1175 | local_irq_enable(); | 1202 | local_irq_enable(); |
| 1176 | } | 1203 | } |
| 1177 | 1204 | ||
| 1178 | /* Initialisation. | 1205 | /* |
| 1179 | * Called after the gfp() functions have been enabled, and before smp_init(). | 1206 | * Initialisation. Called after the page allocator have been initialised and |
| 1207 | * before smp_init(). | ||
| 1180 | */ | 1208 | */ |
| 1181 | void __init kmem_cache_init(void) | 1209 | void __init kmem_cache_init(void) |
| 1182 | { | 1210 | { |
| @@ -1201,9 +1229,9 @@ void __init kmem_cache_init(void) | |||
| 1201 | 1229 | ||
| 1202 | /* Bootstrap is tricky, because several objects are allocated | 1230 | /* Bootstrap is tricky, because several objects are allocated |
| 1203 | * from caches that do not exist yet: | 1231 | * from caches that do not exist yet: |
| 1204 | * 1) initialize the cache_cache cache: it contains the struct kmem_cache | 1232 | * 1) initialize the cache_cache cache: it contains the struct |
| 1205 | * structures of all caches, except cache_cache itself: cache_cache | 1233 | * kmem_cache structures of all caches, except cache_cache itself: |
| 1206 | * is statically allocated. | 1234 | * cache_cache is statically allocated. |
| 1207 | * Initially an __init data area is used for the head array and the | 1235 | * Initially an __init data area is used for the head array and the |
| 1208 | * kmem_list3 structures, it's replaced with a kmalloc allocated | 1236 | * kmem_list3 structures, it's replaced with a kmalloc allocated |
| 1209 | * array at the end of the bootstrap. | 1237 | * array at the end of the bootstrap. |
| @@ -1226,7 +1254,8 @@ void __init kmem_cache_init(void) | |||
| 1226 | cache_cache.array[smp_processor_id()] = &initarray_cache.cache; | 1254 | cache_cache.array[smp_processor_id()] = &initarray_cache.cache; |
| 1227 | cache_cache.nodelists[numa_node_id()] = &initkmem_list3[CACHE_CACHE]; | 1255 | cache_cache.nodelists[numa_node_id()] = &initkmem_list3[CACHE_CACHE]; |
| 1228 | 1256 | ||
| 1229 | cache_cache.buffer_size = ALIGN(cache_cache.buffer_size, cache_line_size()); | 1257 | cache_cache.buffer_size = ALIGN(cache_cache.buffer_size, |
| 1258 | cache_line_size()); | ||
| 1230 | 1259 | ||
| 1231 | for (order = 0; order < MAX_ORDER; order++) { | 1260 | for (order = 0; order < MAX_ORDER; order++) { |
| 1232 | cache_estimate(order, cache_cache.buffer_size, | 1261 | cache_estimate(order, cache_cache.buffer_size, |
| @@ -1245,24 +1274,26 @@ void __init kmem_cache_init(void) | |||
| 1245 | sizes = malloc_sizes; | 1274 | sizes = malloc_sizes; |
| 1246 | names = cache_names; | 1275 | names = cache_names; |
| 1247 | 1276 | ||
| 1248 | /* Initialize the caches that provide memory for the array cache | 1277 | /* |
| 1249 | * and the kmem_list3 structures first. | 1278 | * Initialize the caches that provide memory for the array cache and the |
| 1250 | * Without this, further allocations will bug | 1279 | * kmem_list3 structures first. Without this, further allocations will |
| 1280 | * bug. | ||
| 1251 | */ | 1281 | */ |
| 1252 | 1282 | ||
| 1253 | sizes[INDEX_AC].cs_cachep = kmem_cache_create(names[INDEX_AC].name, | 1283 | sizes[INDEX_AC].cs_cachep = kmem_cache_create(names[INDEX_AC].name, |
| 1254 | sizes[INDEX_AC].cs_size, | 1284 | sizes[INDEX_AC].cs_size, |
| 1255 | ARCH_KMALLOC_MINALIGN, | 1285 | ARCH_KMALLOC_MINALIGN, |
| 1256 | (ARCH_KMALLOC_FLAGS | | 1286 | ARCH_KMALLOC_FLAGS|SLAB_PANIC, |
| 1257 | SLAB_PANIC), NULL, NULL); | 1287 | NULL, NULL); |
| 1258 | 1288 | ||
| 1259 | if (INDEX_AC != INDEX_L3) | 1289 | if (INDEX_AC != INDEX_L3) { |
| 1260 | sizes[INDEX_L3].cs_cachep = | 1290 | sizes[INDEX_L3].cs_cachep = |
| 1261 | kmem_cache_create(names[INDEX_L3].name, | 1291 | kmem_cache_create(names[INDEX_L3].name, |
| 1262 | sizes[INDEX_L3].cs_size, | 1292 | sizes[INDEX_L3].cs_size, |
| 1263 | ARCH_KMALLOC_MINALIGN, | 1293 | ARCH_KMALLOC_MINALIGN, |
| 1264 | (ARCH_KMALLOC_FLAGS | SLAB_PANIC), NULL, | 1294 | ARCH_KMALLOC_FLAGS|SLAB_PANIC, |
| 1265 | NULL); | 1295 | NULL, NULL); |
| 1296 | } | ||
| 1266 | 1297 | ||
| 1267 | while (sizes->cs_size != ULONG_MAX) { | 1298 | while (sizes->cs_size != ULONG_MAX) { |
| 1268 | /* | 1299 | /* |
| @@ -1272,13 +1303,13 @@ void __init kmem_cache_init(void) | |||
| 1272 | * Note for systems short on memory removing the alignment will | 1303 | * Note for systems short on memory removing the alignment will |
| 1273 | * allow tighter packing of the smaller caches. | 1304 | * allow tighter packing of the smaller caches. |
| 1274 | */ | 1305 | */ |
| 1275 | if (!sizes->cs_cachep) | 1306 | if (!sizes->cs_cachep) { |
| 1276 | sizes->cs_cachep = kmem_cache_create(names->name, | 1307 | sizes->cs_cachep = kmem_cache_create(names->name, |
| 1277 | sizes->cs_size, | 1308 | sizes->cs_size, |
| 1278 | ARCH_KMALLOC_MINALIGN, | 1309 | ARCH_KMALLOC_MINALIGN, |
| 1279 | (ARCH_KMALLOC_FLAGS | 1310 | ARCH_KMALLOC_FLAGS|SLAB_PANIC, |
| 1280 | | SLAB_PANIC), | 1311 | NULL, NULL); |
| 1281 | NULL, NULL); | 1312 | } |
| 1282 | 1313 | ||
| 1283 | /* Inc off-slab bufctl limit until the ceiling is hit. */ | 1314 | /* Inc off-slab bufctl limit until the ceiling is hit. */ |
| 1284 | if (!(OFF_SLAB(sizes->cs_cachep))) { | 1315 | if (!(OFF_SLAB(sizes->cs_cachep))) { |
| @@ -1287,13 +1318,11 @@ void __init kmem_cache_init(void) | |||
| 1287 | } | 1318 | } |
| 1288 | 1319 | ||
| 1289 | sizes->cs_dmacachep = kmem_cache_create(names->name_dma, | 1320 | sizes->cs_dmacachep = kmem_cache_create(names->name_dma, |
| 1290 | sizes->cs_size, | 1321 | sizes->cs_size, |
| 1291 | ARCH_KMALLOC_MINALIGN, | 1322 | ARCH_KMALLOC_MINALIGN, |
| 1292 | (ARCH_KMALLOC_FLAGS | | 1323 | ARCH_KMALLOC_FLAGS|SLAB_CACHE_DMA| |
| 1293 | SLAB_CACHE_DMA | | 1324 | SLAB_PANIC, |
| 1294 | SLAB_PANIC), NULL, | 1325 | NULL, NULL); |
| 1295 | NULL); | ||
| 1296 | |||
| 1297 | sizes++; | 1326 | sizes++; |
| 1298 | names++; | 1327 | names++; |
| 1299 | } | 1328 | } |
| @@ -1345,20 +1374,22 @@ void __init kmem_cache_init(void) | |||
| 1345 | struct kmem_cache *cachep; | 1374 | struct kmem_cache *cachep; |
| 1346 | mutex_lock(&cache_chain_mutex); | 1375 | mutex_lock(&cache_chain_mutex); |
| 1347 | list_for_each_entry(cachep, &cache_chain, next) | 1376 | list_for_each_entry(cachep, &cache_chain, next) |
| 1348 | enable_cpucache(cachep); | 1377 | enable_cpucache(cachep); |
| 1349 | mutex_unlock(&cache_chain_mutex); | 1378 | mutex_unlock(&cache_chain_mutex); |
| 1350 | } | 1379 | } |
| 1351 | 1380 | ||
| 1352 | /* Done! */ | 1381 | /* Done! */ |
| 1353 | g_cpucache_up = FULL; | 1382 | g_cpucache_up = FULL; |
| 1354 | 1383 | ||
| 1355 | /* Register a cpu startup notifier callback | 1384 | /* |
| 1356 | * that initializes cpu_cache_get for all new cpus | 1385 | * Register a cpu startup notifier callback that initializes |
| 1386 | * cpu_cache_get for all new cpus | ||
| 1357 | */ | 1387 | */ |
| 1358 | register_cpu_notifier(&cpucache_notifier); | 1388 | register_cpu_notifier(&cpucache_notifier); |
| 1359 | 1389 | ||
| 1360 | /* The reap timers are started later, with a module init call: | 1390 | /* |
| 1361 | * That part of the kernel is not yet operational. | 1391 | * The reap timers are started later, with a module init call: That part |
| 1392 | * of the kernel is not yet operational. | ||
| 1362 | */ | 1393 | */ |
| 1363 | } | 1394 | } |
| 1364 | 1395 | ||
| @@ -1366,16 +1397,13 @@ static int __init cpucache_init(void) | |||
| 1366 | { | 1397 | { |
| 1367 | int cpu; | 1398 | int cpu; |
| 1368 | 1399 | ||
| 1369 | /* | 1400 | /* |
| 1370 | * Register the timers that return unneeded | 1401 | * Register the timers that return unneeded pages to the page allocator |
| 1371 | * pages to gfp. | ||
| 1372 | */ | 1402 | */ |
| 1373 | for_each_online_cpu(cpu) | 1403 | for_each_online_cpu(cpu) |
| 1374 | start_cpu_timer(cpu); | 1404 | start_cpu_timer(cpu); |
| 1375 | |||
| 1376 | return 0; | 1405 | return 0; |
| 1377 | } | 1406 | } |
| 1378 | |||
| 1379 | __initcall(cpucache_init); | 1407 | __initcall(cpucache_init); |
| 1380 | 1408 | ||
| 1381 | /* | 1409 | /* |
| @@ -1402,7 +1430,7 @@ static void *kmem_getpages(struct kmem_cache *cachep, gfp_t flags, int nodeid) | |||
| 1402 | atomic_add(i, &slab_reclaim_pages); | 1430 | atomic_add(i, &slab_reclaim_pages); |
| 1403 | add_page_state(nr_slab, i); | 1431 | add_page_state(nr_slab, i); |
| 1404 | while (i--) { | 1432 | while (i--) { |
| 1405 | SetPageSlab(page); | 1433 | __SetPageSlab(page); |
| 1406 | page++; | 1434 | page++; |
| 1407 | } | 1435 | } |
| 1408 | return addr; | 1436 | return addr; |
| @@ -1418,8 +1446,8 @@ static void kmem_freepages(struct kmem_cache *cachep, void *addr) | |||
| 1418 | const unsigned long nr_freed = i; | 1446 | const unsigned long nr_freed = i; |
| 1419 | 1447 | ||
| 1420 | while (i--) { | 1448 | while (i--) { |
| 1421 | if (!TestClearPageSlab(page)) | 1449 | BUG_ON(!PageSlab(page)); |
| 1422 | BUG(); | 1450 | __ClearPageSlab(page); |
| 1423 | page++; | 1451 | page++; |
| 1424 | } | 1452 | } |
| 1425 | sub_page_state(nr_slab, nr_freed); | 1453 | sub_page_state(nr_slab, nr_freed); |
| @@ -1489,9 +1517,8 @@ static void dump_line(char *data, int offset, int limit) | |||
| 1489 | { | 1517 | { |
| 1490 | int i; | 1518 | int i; |
| 1491 | printk(KERN_ERR "%03x:", offset); | 1519 | printk(KERN_ERR "%03x:", offset); |
| 1492 | for (i = 0; i < limit; i++) { | 1520 | for (i = 0; i < limit; i++) |
| 1493 | printk(" %02x", (unsigned char)data[offset + i]); | 1521 | printk(" %02x", (unsigned char)data[offset + i]); |
| 1494 | } | ||
| 1495 | printk("\n"); | 1522 | printk("\n"); |
| 1496 | } | 1523 | } |
| 1497 | #endif | 1524 | #endif |
| @@ -1505,15 +1532,15 @@ static void print_objinfo(struct kmem_cache *cachep, void *objp, int lines) | |||
| 1505 | 1532 | ||
| 1506 | if (cachep->flags & SLAB_RED_ZONE) { | 1533 | if (cachep->flags & SLAB_RED_ZONE) { |
| 1507 | printk(KERN_ERR "Redzone: 0x%lx/0x%lx.\n", | 1534 | printk(KERN_ERR "Redzone: 0x%lx/0x%lx.\n", |
| 1508 | *dbg_redzone1(cachep, objp), | 1535 | *dbg_redzone1(cachep, objp), |
| 1509 | *dbg_redzone2(cachep, objp)); | 1536 | *dbg_redzone2(cachep, objp)); |
| 1510 | } | 1537 | } |
| 1511 | 1538 | ||
| 1512 | if (cachep->flags & SLAB_STORE_USER) { | 1539 | if (cachep->flags & SLAB_STORE_USER) { |
| 1513 | printk(KERN_ERR "Last user: [<%p>]", | 1540 | printk(KERN_ERR "Last user: [<%p>]", |
| 1514 | *dbg_userword(cachep, objp)); | 1541 | *dbg_userword(cachep, objp)); |
| 1515 | print_symbol("(%s)", | 1542 | print_symbol("(%s)", |
| 1516 | (unsigned long)*dbg_userword(cachep, objp)); | 1543 | (unsigned long)*dbg_userword(cachep, objp)); |
| 1517 | printk("\n"); | 1544 | printk("\n"); |
| 1518 | } | 1545 | } |
| 1519 | realobj = (char *)objp + obj_offset(cachep); | 1546 | realobj = (char *)objp + obj_offset(cachep); |
| @@ -1546,8 +1573,8 @@ static void check_poison_obj(struct kmem_cache *cachep, void *objp) | |||
| 1546 | /* Print header */ | 1573 | /* Print header */ |
| 1547 | if (lines == 0) { | 1574 | if (lines == 0) { |
| 1548 | printk(KERN_ERR | 1575 | printk(KERN_ERR |
| 1549 | "Slab corruption: start=%p, len=%d\n", | 1576 | "Slab corruption: start=%p, len=%d\n", |
| 1550 | realobj, size); | 1577 | realobj, size); |
| 1551 | print_objinfo(cachep, objp, 0); | 1578 | print_objinfo(cachep, objp, 0); |
| 1552 | } | 1579 | } |
| 1553 | /* Hexdump the affected line */ | 1580 | /* Hexdump the affected line */ |
| @@ -1568,18 +1595,18 @@ static void check_poison_obj(struct kmem_cache *cachep, void *objp) | |||
| 1568 | * exist: | 1595 | * exist: |
| 1569 | */ | 1596 | */ |
| 1570 | struct slab *slabp = virt_to_slab(objp); | 1597 | struct slab *slabp = virt_to_slab(objp); |
| 1571 | int objnr; | 1598 | unsigned int objnr; |
| 1572 | 1599 | ||
| 1573 | objnr = (unsigned)(objp - slabp->s_mem) / cachep->buffer_size; | 1600 | objnr = obj_to_index(cachep, slabp, objp); |
| 1574 | if (objnr) { | 1601 | if (objnr) { |
| 1575 | objp = slabp->s_mem + (objnr - 1) * cachep->buffer_size; | 1602 | objp = index_to_obj(cachep, slabp, objnr - 1); |
| 1576 | realobj = (char *)objp + obj_offset(cachep); | 1603 | realobj = (char *)objp + obj_offset(cachep); |
| 1577 | printk(KERN_ERR "Prev obj: start=%p, len=%d\n", | 1604 | printk(KERN_ERR "Prev obj: start=%p, len=%d\n", |
| 1578 | realobj, size); | 1605 | realobj, size); |
| 1579 | print_objinfo(cachep, objp, 2); | 1606 | print_objinfo(cachep, objp, 2); |
| 1580 | } | 1607 | } |
| 1581 | if (objnr + 1 < cachep->num) { | 1608 | if (objnr + 1 < cachep->num) { |
| 1582 | objp = slabp->s_mem + (objnr + 1) * cachep->buffer_size; | 1609 | objp = index_to_obj(cachep, slabp, objnr + 1); |
| 1583 | realobj = (char *)objp + obj_offset(cachep); | 1610 | realobj = (char *)objp + obj_offset(cachep); |
| 1584 | printk(KERN_ERR "Next obj: start=%p, len=%d\n", | 1611 | printk(KERN_ERR "Next obj: start=%p, len=%d\n", |
| 1585 | realobj, size); | 1612 | realobj, size); |
| @@ -1591,22 +1618,25 @@ static void check_poison_obj(struct kmem_cache *cachep, void *objp) | |||
| 1591 | 1618 | ||
| 1592 | #if DEBUG | 1619 | #if DEBUG |
| 1593 | /** | 1620 | /** |
| 1594 | * slab_destroy_objs - call the registered destructor for each object in | 1621 | * slab_destroy_objs - destroy a slab and its objects |
| 1595 | * a slab that is to be destroyed. | 1622 | * @cachep: cache pointer being destroyed |
| 1623 | * @slabp: slab pointer being destroyed | ||
| 1624 | * | ||
| 1625 | * Call the registered destructor for each object in a slab that is being | ||
| 1626 | * destroyed. | ||
| 1596 | */ | 1627 | */ |
| 1597 | static void slab_destroy_objs(struct kmem_cache *cachep, struct slab *slabp) | 1628 | static void slab_destroy_objs(struct kmem_cache *cachep, struct slab *slabp) |
| 1598 | { | 1629 | { |
| 1599 | int i; | 1630 | int i; |
| 1600 | for (i = 0; i < cachep->num; i++) { | 1631 | for (i = 0; i < cachep->num; i++) { |
| 1601 | void *objp = slabp->s_mem + cachep->buffer_size * i; | 1632 | void *objp = index_to_obj(cachep, slabp, i); |
| 1602 | 1633 | ||
| 1603 | if (cachep->flags & SLAB_POISON) { | 1634 | if (cachep->flags & SLAB_POISON) { |
| 1604 | #ifdef CONFIG_DEBUG_PAGEALLOC | 1635 | #ifdef CONFIG_DEBUG_PAGEALLOC |
| 1605 | if ((cachep->buffer_size % PAGE_SIZE) == 0 | 1636 | if (cachep->buffer_size % PAGE_SIZE == 0 && |
| 1606 | && OFF_SLAB(cachep)) | 1637 | OFF_SLAB(cachep)) |
| 1607 | kernel_map_pages(virt_to_page(objp), | 1638 | kernel_map_pages(virt_to_page(objp), |
| 1608 | cachep->buffer_size / PAGE_SIZE, | 1639 | cachep->buffer_size / PAGE_SIZE, 1); |
| 1609 | 1); | ||
| 1610 | else | 1640 | else |
| 1611 | check_poison_obj(cachep, objp); | 1641 | check_poison_obj(cachep, objp); |
| 1612 | #else | 1642 | #else |
| @@ -1631,7 +1661,7 @@ static void slab_destroy_objs(struct kmem_cache *cachep, struct slab *slabp) | |||
| 1631 | if (cachep->dtor) { | 1661 | if (cachep->dtor) { |
| 1632 | int i; | 1662 | int i; |
| 1633 | for (i = 0; i < cachep->num; i++) { | 1663 | for (i = 0; i < cachep->num; i++) { |
| 1634 | void *objp = slabp->s_mem + cachep->buffer_size * i; | 1664 | void *objp = index_to_obj(cachep, slabp, i); |
| 1635 | (cachep->dtor) (objp, cachep, 0); | 1665 | (cachep->dtor) (objp, cachep, 0); |
| 1636 | } | 1666 | } |
| 1637 | } | 1667 | } |
| @@ -1639,9 +1669,13 @@ static void slab_destroy_objs(struct kmem_cache *cachep, struct slab *slabp) | |||
| 1639 | #endif | 1669 | #endif |
| 1640 | 1670 | ||
| 1641 | /** | 1671 | /** |
| 1672 | * slab_destroy - destroy and release all objects in a slab | ||
| 1673 | * @cachep: cache pointer being destroyed | ||
| 1674 | * @slabp: slab pointer being destroyed | ||
| 1675 | * | ||
| 1642 | * Destroy all the objs in a slab, and release the mem back to the system. | 1676 | * Destroy all the objs in a slab, and release the mem back to the system. |
| 1643 | * Before calling the slab must have been unlinked from the cache. | 1677 | * Before calling the slab must have been unlinked from the cache. The |
| 1644 | * The cache-lock is not held/needed. | 1678 | * cache-lock is not held/needed. |
| 1645 | */ | 1679 | */ |
| 1646 | static void slab_destroy(struct kmem_cache *cachep, struct slab *slabp) | 1680 | static void slab_destroy(struct kmem_cache *cachep, struct slab *slabp) |
| 1647 | { | 1681 | { |
| @@ -1662,8 +1696,10 @@ static void slab_destroy(struct kmem_cache *cachep, struct slab *slabp) | |||
| 1662 | } | 1696 | } |
| 1663 | } | 1697 | } |
| 1664 | 1698 | ||
| 1665 | /* For setting up all the kmem_list3s for cache whose buffer_size is same | 1699 | /* |
| 1666 | as size of kmem_list3. */ | 1700 | * For setting up all the kmem_list3s for cache whose buffer_size is same as |
| 1701 | * size of kmem_list3. | ||
| 1702 | */ | ||
| 1667 | static void set_up_list3s(struct kmem_cache *cachep, int index) | 1703 | static void set_up_list3s(struct kmem_cache *cachep, int index) |
| 1668 | { | 1704 | { |
| 1669 | int node; | 1705 | int node; |
| @@ -1689,13 +1725,13 @@ static void set_up_list3s(struct kmem_cache *cachep, int index) | |||
| 1689 | * high order pages for slabs. When the gfp() functions are more friendly | 1725 | * high order pages for slabs. When the gfp() functions are more friendly |
| 1690 | * towards high-order requests, this should be changed. | 1726 | * towards high-order requests, this should be changed. |
| 1691 | */ | 1727 | */ |
| 1692 | static inline size_t calculate_slab_order(struct kmem_cache *cachep, | 1728 | static size_t calculate_slab_order(struct kmem_cache *cachep, |
| 1693 | size_t size, size_t align, unsigned long flags) | 1729 | size_t size, size_t align, unsigned long flags) |
| 1694 | { | 1730 | { |
| 1695 | size_t left_over = 0; | 1731 | size_t left_over = 0; |
| 1696 | int gfporder; | 1732 | int gfporder; |
| 1697 | 1733 | ||
| 1698 | for (gfporder = 0 ; gfporder <= MAX_GFP_ORDER; gfporder++) { | 1734 | for (gfporder = 0; gfporder <= MAX_GFP_ORDER; gfporder++) { |
| 1699 | unsigned int num; | 1735 | unsigned int num; |
| 1700 | size_t remainder; | 1736 | size_t remainder; |
| 1701 | 1737 | ||
| @@ -1730,12 +1766,66 @@ static inline size_t calculate_slab_order(struct kmem_cache *cachep, | |||
| 1730 | /* | 1766 | /* |
| 1731 | * Acceptable internal fragmentation? | 1767 | * Acceptable internal fragmentation? |
| 1732 | */ | 1768 | */ |
| 1733 | if ((left_over * 8) <= (PAGE_SIZE << gfporder)) | 1769 | if (left_over * 8 <= (PAGE_SIZE << gfporder)) |
| 1734 | break; | 1770 | break; |
| 1735 | } | 1771 | } |
| 1736 | return left_over; | 1772 | return left_over; |
| 1737 | } | 1773 | } |
| 1738 | 1774 | ||
| 1775 | static void setup_cpu_cache(struct kmem_cache *cachep) | ||
| 1776 | { | ||
| 1777 | if (g_cpucache_up == FULL) { | ||
| 1778 | enable_cpucache(cachep); | ||
| 1779 | return; | ||
| 1780 | } | ||
| 1781 | if (g_cpucache_up == NONE) { | ||
| 1782 | /* | ||
| 1783 | * Note: the first kmem_cache_create must create the cache | ||
| 1784 | * that's used by kmalloc(24), otherwise the creation of | ||
| 1785 | * further caches will BUG(). | ||
| 1786 | */ | ||
| 1787 | cachep->array[smp_processor_id()] = &initarray_generic.cache; | ||
| 1788 | |||
| 1789 | /* | ||
| 1790 | * If the cache that's used by kmalloc(sizeof(kmem_list3)) is | ||
| 1791 | * the first cache, then we need to set up all its list3s, | ||
| 1792 | * otherwise the creation of further caches will BUG(). | ||
| 1793 | */ | ||
| 1794 | set_up_list3s(cachep, SIZE_AC); | ||
| 1795 | if (INDEX_AC == INDEX_L3) | ||
| 1796 | g_cpucache_up = PARTIAL_L3; | ||
| 1797 | else | ||
| 1798 | g_cpucache_up = PARTIAL_AC; | ||
| 1799 | } else { | ||
| 1800 | cachep->array[smp_processor_id()] = | ||
| 1801 | kmalloc(sizeof(struct arraycache_init), GFP_KERNEL); | ||
| 1802 | |||
| 1803 | if (g_cpucache_up == PARTIAL_AC) { | ||
| 1804 | set_up_list3s(cachep, SIZE_L3); | ||
| 1805 | g_cpucache_up = PARTIAL_L3; | ||
| 1806 | } else { | ||
| 1807 | int node; | ||
| 1808 | for_each_online_node(node) { | ||
| 1809 | cachep->nodelists[node] = | ||
| 1810 | kmalloc_node(sizeof(struct kmem_list3), | ||
| 1811 | GFP_KERNEL, node); | ||
| 1812 | BUG_ON(!cachep->nodelists[node]); | ||
| 1813 | kmem_list3_init(cachep->nodelists[node]); | ||
| 1814 | } | ||
| 1815 | } | ||
| 1816 | } | ||
| 1817 | cachep->nodelists[numa_node_id()]->next_reap = | ||
| 1818 | jiffies + REAPTIMEOUT_LIST3 + | ||
| 1819 | ((unsigned long)cachep) % REAPTIMEOUT_LIST3; | ||
| 1820 | |||
| 1821 | cpu_cache_get(cachep)->avail = 0; | ||
| 1822 | cpu_cache_get(cachep)->limit = BOOT_CPUCACHE_ENTRIES; | ||
| 1823 | cpu_cache_get(cachep)->batchcount = 1; | ||
| 1824 | cpu_cache_get(cachep)->touched = 0; | ||
| 1825 | cachep->batchcount = 1; | ||
| 1826 | cachep->limit = BOOT_CPUCACHE_ENTRIES; | ||
| 1827 | } | ||
| 1828 | |||
| 1739 | /** | 1829 | /** |
| 1740 | * kmem_cache_create - Create a cache. | 1830 | * kmem_cache_create - Create a cache. |
| 1741 | * @name: A string which is used in /proc/slabinfo to identify this cache. | 1831 | * @name: A string which is used in /proc/slabinfo to identify this cache. |
| @@ -1751,9 +1841,8 @@ static inline size_t calculate_slab_order(struct kmem_cache *cachep, | |||
| 1751 | * and the @dtor is run before the pages are handed back. | 1841 | * and the @dtor is run before the pages are handed back. |
| 1752 | * | 1842 | * |
| 1753 | * @name must be valid until the cache is destroyed. This implies that | 1843 | * @name must be valid until the cache is destroyed. This implies that |
| 1754 | * the module calling this has to destroy the cache before getting | 1844 | * the module calling this has to destroy the cache before getting unloaded. |
| 1755 | * unloaded. | 1845 | * |
| 1756 | * | ||
| 1757 | * The flags are | 1846 | * The flags are |
| 1758 | * | 1847 | * |
| 1759 | * %SLAB_POISON - Poison the slab with a known test pattern (a5a5a5a5) | 1848 | * %SLAB_POISON - Poison the slab with a known test pattern (a5a5a5a5) |
| @@ -1762,16 +1851,14 @@ static inline size_t calculate_slab_order(struct kmem_cache *cachep, | |||
| 1762 | * %SLAB_RED_ZONE - Insert `Red' zones around the allocated memory to check | 1851 | * %SLAB_RED_ZONE - Insert `Red' zones around the allocated memory to check |
| 1763 | * for buffer overruns. | 1852 | * for buffer overruns. |
| 1764 | * | 1853 | * |
| 1765 | * %SLAB_NO_REAP - Don't automatically reap this cache when we're under | ||
| 1766 | * memory pressure. | ||
| 1767 | * | ||
| 1768 | * %SLAB_HWCACHE_ALIGN - Align the objects in this cache to a hardware | 1854 | * %SLAB_HWCACHE_ALIGN - Align the objects in this cache to a hardware |
| 1769 | * cacheline. This can be beneficial if you're counting cycles as closely | 1855 | * cacheline. This can be beneficial if you're counting cycles as closely |
| 1770 | * as davem. | 1856 | * as davem. |
| 1771 | */ | 1857 | */ |
| 1772 | struct kmem_cache * | 1858 | struct kmem_cache * |
| 1773 | kmem_cache_create (const char *name, size_t size, size_t align, | 1859 | kmem_cache_create (const char *name, size_t size, size_t align, |
| 1774 | unsigned long flags, void (*ctor)(void*, struct kmem_cache *, unsigned long), | 1860 | unsigned long flags, |
| 1861 | void (*ctor)(void*, struct kmem_cache *, unsigned long), | ||
| 1775 | void (*dtor)(void*, struct kmem_cache *, unsigned long)) | 1862 | void (*dtor)(void*, struct kmem_cache *, unsigned long)) |
| 1776 | { | 1863 | { |
| 1777 | size_t left_over, slab_size, ralign; | 1864 | size_t left_over, slab_size, ralign; |
| @@ -1781,12 +1868,10 @@ kmem_cache_create (const char *name, size_t size, size_t align, | |||
| 1781 | /* | 1868 | /* |
| 1782 | * Sanity checks... these are all serious usage bugs. | 1869 | * Sanity checks... these are all serious usage bugs. |
| 1783 | */ | 1870 | */ |
| 1784 | if ((!name) || | 1871 | if (!name || in_interrupt() || (size < BYTES_PER_WORD) || |
| 1785 | in_interrupt() || | ||
| 1786 | (size < BYTES_PER_WORD) || | ||
| 1787 | (size > (1 << MAX_OBJ_ORDER) * PAGE_SIZE) || (dtor && !ctor)) { | 1872 | (size > (1 << MAX_OBJ_ORDER) * PAGE_SIZE) || (dtor && !ctor)) { |
| 1788 | printk(KERN_ERR "%s: Early error in slab %s\n", | 1873 | printk(KERN_ERR "%s: Early error in slab %s\n", __FUNCTION__, |
| 1789 | __FUNCTION__, name); | 1874 | name); |
| 1790 | BUG(); | 1875 | BUG(); |
| 1791 | } | 1876 | } |
| 1792 | 1877 | ||
| @@ -1840,8 +1925,7 @@ kmem_cache_create (const char *name, size_t size, size_t align, | |||
| 1840 | * above the next power of two: caches with object sizes just above a | 1925 | * above the next power of two: caches with object sizes just above a |
| 1841 | * power of two have a significant amount of internal fragmentation. | 1926 | * power of two have a significant amount of internal fragmentation. |
| 1842 | */ | 1927 | */ |
| 1843 | if ((size < 4096 | 1928 | if (size < 4096 || fls(size - 1) == fls(size-1 + 3 * BYTES_PER_WORD)) |
| 1844 | || fls(size - 1) == fls(size - 1 + 3 * BYTES_PER_WORD))) | ||
| 1845 | flags |= SLAB_RED_ZONE | SLAB_STORE_USER; | 1929 | flags |= SLAB_RED_ZONE | SLAB_STORE_USER; |
| 1846 | if (!(flags & SLAB_DESTROY_BY_RCU)) | 1930 | if (!(flags & SLAB_DESTROY_BY_RCU)) |
| 1847 | flags |= SLAB_POISON; | 1931 | flags |= SLAB_POISON; |
| @@ -1853,13 +1937,14 @@ kmem_cache_create (const char *name, size_t size, size_t align, | |||
| 1853 | BUG_ON(dtor); | 1937 | BUG_ON(dtor); |
| 1854 | 1938 | ||
| 1855 | /* | 1939 | /* |
| 1856 | * Always checks flags, a caller might be expecting debug | 1940 | * Always checks flags, a caller might be expecting debug support which |
| 1857 | * support which isn't available. | 1941 | * isn't available. |
| 1858 | */ | 1942 | */ |
| 1859 | if (flags & ~CREATE_MASK) | 1943 | if (flags & ~CREATE_MASK) |
| 1860 | BUG(); | 1944 | BUG(); |
| 1861 | 1945 | ||
| 1862 | /* Check that size is in terms of words. This is needed to avoid | 1946 | /* |
| 1947 | * Check that size is in terms of words. This is needed to avoid | ||
| 1863 | * unaligned accesses for some archs when redzoning is used, and makes | 1948 | * unaligned accesses for some archs when redzoning is used, and makes |
| 1864 | * sure any on-slab bufctl's are also correctly aligned. | 1949 | * sure any on-slab bufctl's are also correctly aligned. |
| 1865 | */ | 1950 | */ |
| @@ -1868,12 +1953,14 @@ kmem_cache_create (const char *name, size_t size, size_t align, | |||
| 1868 | size &= ~(BYTES_PER_WORD - 1); | 1953 | size &= ~(BYTES_PER_WORD - 1); |
| 1869 | } | 1954 | } |
| 1870 | 1955 | ||
| 1871 | /* calculate out the final buffer alignment: */ | 1956 | /* calculate the final buffer alignment: */ |
| 1957 | |||
| 1872 | /* 1) arch recommendation: can be overridden for debug */ | 1958 | /* 1) arch recommendation: can be overridden for debug */ |
| 1873 | if (flags & SLAB_HWCACHE_ALIGN) { | 1959 | if (flags & SLAB_HWCACHE_ALIGN) { |
| 1874 | /* Default alignment: as specified by the arch code. | 1960 | /* |
| 1875 | * Except if an object is really small, then squeeze multiple | 1961 | * Default alignment: as specified by the arch code. Except if |
| 1876 | * objects into one cacheline. | 1962 | * an object is really small, then squeeze multiple objects into |
| 1963 | * one cacheline. | ||
| 1877 | */ | 1964 | */ |
| 1878 | ralign = cache_line_size(); | 1965 | ralign = cache_line_size(); |
| 1879 | while (size <= ralign / 2) | 1966 | while (size <= ralign / 2) |
| @@ -1893,7 +1980,8 @@ kmem_cache_create (const char *name, size_t size, size_t align, | |||
| 1893 | if (ralign > BYTES_PER_WORD) | 1980 | if (ralign > BYTES_PER_WORD) |
| 1894 | flags &= ~(SLAB_RED_ZONE | SLAB_STORE_USER); | 1981 | flags &= ~(SLAB_RED_ZONE | SLAB_STORE_USER); |
| 1895 | } | 1982 | } |
| 1896 | /* 4) Store it. Note that the debug code below can reduce | 1983 | /* |
| 1984 | * 4) Store it. Note that the debug code below can reduce | ||
| 1897 | * the alignment to BYTES_PER_WORD. | 1985 | * the alignment to BYTES_PER_WORD. |
| 1898 | */ | 1986 | */ |
| 1899 | align = ralign; | 1987 | align = ralign; |
| @@ -1978,7 +2066,6 @@ kmem_cache_create (const char *name, size_t size, size_t align, | |||
| 1978 | cachep->gfpflags = 0; | 2066 | cachep->gfpflags = 0; |
| 1979 | if (flags & SLAB_CACHE_DMA) | 2067 | if (flags & SLAB_CACHE_DMA) |
| 1980 | cachep->gfpflags |= GFP_DMA; | 2068 | cachep->gfpflags |= GFP_DMA; |
| 1981 | spin_lock_init(&cachep->spinlock); | ||
| 1982 | cachep->buffer_size = size; | 2069 | cachep->buffer_size = size; |
| 1983 | 2070 | ||
| 1984 | if (flags & CFLGS_OFF_SLAB) | 2071 | if (flags & CFLGS_OFF_SLAB) |
| @@ -1988,64 +2075,11 @@ kmem_cache_create (const char *name, size_t size, size_t align, | |||
| 1988 | cachep->name = name; | 2075 | cachep->name = name; |
| 1989 | 2076 | ||
| 1990 | 2077 | ||
| 1991 | if (g_cpucache_up == FULL) { | 2078 | setup_cpu_cache(cachep); |
| 1992 | enable_cpucache(cachep); | ||
| 1993 | } else { | ||
| 1994 | if (g_cpucache_up == NONE) { | ||
| 1995 | /* Note: the first kmem_cache_create must create | ||
| 1996 | * the cache that's used by kmalloc(24), otherwise | ||
| 1997 | * the creation of further caches will BUG(). | ||
| 1998 | */ | ||
| 1999 | cachep->array[smp_processor_id()] = | ||
| 2000 | &initarray_generic.cache; | ||
| 2001 | |||
| 2002 | /* If the cache that's used by | ||
| 2003 | * kmalloc(sizeof(kmem_list3)) is the first cache, | ||
| 2004 | * then we need to set up all its list3s, otherwise | ||
| 2005 | * the creation of further caches will BUG(). | ||
| 2006 | */ | ||
| 2007 | set_up_list3s(cachep, SIZE_AC); | ||
| 2008 | if (INDEX_AC == INDEX_L3) | ||
| 2009 | g_cpucache_up = PARTIAL_L3; | ||
| 2010 | else | ||
| 2011 | g_cpucache_up = PARTIAL_AC; | ||
| 2012 | } else { | ||
| 2013 | cachep->array[smp_processor_id()] = | ||
| 2014 | kmalloc(sizeof(struct arraycache_init), GFP_KERNEL); | ||
| 2015 | |||
| 2016 | if (g_cpucache_up == PARTIAL_AC) { | ||
| 2017 | set_up_list3s(cachep, SIZE_L3); | ||
| 2018 | g_cpucache_up = PARTIAL_L3; | ||
| 2019 | } else { | ||
| 2020 | int node; | ||
| 2021 | for_each_online_node(node) { | ||
| 2022 | |||
| 2023 | cachep->nodelists[node] = | ||
| 2024 | kmalloc_node(sizeof | ||
| 2025 | (struct kmem_list3), | ||
| 2026 | GFP_KERNEL, node); | ||
| 2027 | BUG_ON(!cachep->nodelists[node]); | ||
| 2028 | kmem_list3_init(cachep-> | ||
| 2029 | nodelists[node]); | ||
| 2030 | } | ||
| 2031 | } | ||
| 2032 | } | ||
| 2033 | cachep->nodelists[numa_node_id()]->next_reap = | ||
| 2034 | jiffies + REAPTIMEOUT_LIST3 + | ||
| 2035 | ((unsigned long)cachep) % REAPTIMEOUT_LIST3; | ||
| 2036 | |||
| 2037 | BUG_ON(!cpu_cache_get(cachep)); | ||
| 2038 | cpu_cache_get(cachep)->avail = 0; | ||
| 2039 | cpu_cache_get(cachep)->limit = BOOT_CPUCACHE_ENTRIES; | ||
| 2040 | cpu_cache_get(cachep)->batchcount = 1; | ||
| 2041 | cpu_cache_get(cachep)->touched = 0; | ||
| 2042 | cachep->batchcount = 1; | ||
| 2043 | cachep->limit = BOOT_CPUCACHE_ENTRIES; | ||
| 2044 | } | ||
| 2045 | 2079 | ||
| 2046 | /* cache setup completed, link it into the list */ | 2080 | /* cache setup completed, link it into the list */ |
| 2047 | list_add(&cachep->next, &cache_chain); | 2081 | list_add(&cachep->next, &cache_chain); |
| 2048 | oops: | 2082 | oops: |
| 2049 | if (!cachep && (flags & SLAB_PANIC)) | 2083 | if (!cachep && (flags & SLAB_PANIC)) |
| 2050 | panic("kmem_cache_create(): failed to create slab `%s'\n", | 2084 | panic("kmem_cache_create(): failed to create slab `%s'\n", |
| 2051 | name); | 2085 | name); |
| @@ -2089,30 +2123,13 @@ static void check_spinlock_acquired_node(struct kmem_cache *cachep, int node) | |||
| 2089 | #define check_spinlock_acquired_node(x, y) do { } while(0) | 2123 | #define check_spinlock_acquired_node(x, y) do { } while(0) |
| 2090 | #endif | 2124 | #endif |
| 2091 | 2125 | ||
| 2092 | /* | 2126 | static void drain_array(struct kmem_cache *cachep, struct kmem_list3 *l3, |
| 2093 | * Waits for all CPUs to execute func(). | 2127 | struct array_cache *ac, |
| 2094 | */ | 2128 | int force, int node); |
| 2095 | static void smp_call_function_all_cpus(void (*func)(void *arg), void *arg) | ||
| 2096 | { | ||
| 2097 | check_irq_on(); | ||
| 2098 | preempt_disable(); | ||
| 2099 | |||
| 2100 | local_irq_disable(); | ||
| 2101 | func(arg); | ||
| 2102 | local_irq_enable(); | ||
| 2103 | |||
| 2104 | if (smp_call_function(func, arg, 1, 1)) | ||
| 2105 | BUG(); | ||
| 2106 | |||
| 2107 | preempt_enable(); | ||
| 2108 | } | ||
| 2109 | |||
| 2110 | static void drain_array_locked(struct kmem_cache *cachep, struct array_cache *ac, | ||
| 2111 | int force, int node); | ||
| 2112 | 2129 | ||
| 2113 | static void do_drain(void *arg) | 2130 | static void do_drain(void *arg) |
| 2114 | { | 2131 | { |
| 2115 | struct kmem_cache *cachep = (struct kmem_cache *) arg; | 2132 | struct kmem_cache *cachep = arg; |
| 2116 | struct array_cache *ac; | 2133 | struct array_cache *ac; |
| 2117 | int node = numa_node_id(); | 2134 | int node = numa_node_id(); |
| 2118 | 2135 | ||
| @@ -2129,14 +2146,12 @@ static void drain_cpu_caches(struct kmem_cache *cachep) | |||
| 2129 | struct kmem_list3 *l3; | 2146 | struct kmem_list3 *l3; |
| 2130 | int node; | 2147 | int node; |
| 2131 | 2148 | ||
| 2132 | smp_call_function_all_cpus(do_drain, cachep); | 2149 | on_each_cpu(do_drain, cachep, 1, 1); |
| 2133 | check_irq_on(); | 2150 | check_irq_on(); |
| 2134 | for_each_online_node(node) { | 2151 | for_each_online_node(node) { |
| 2135 | l3 = cachep->nodelists[node]; | 2152 | l3 = cachep->nodelists[node]; |
| 2136 | if (l3) { | 2153 | if (l3) { |
| 2137 | spin_lock_irq(&l3->list_lock); | 2154 | drain_array(cachep, l3, l3->shared, 1, node); |
| 2138 | drain_array_locked(cachep, l3->shared, 1, node); | ||
| 2139 | spin_unlock_irq(&l3->list_lock); | ||
| 2140 | if (l3->alien) | 2155 | if (l3->alien) |
| 2141 | drain_alien_cache(cachep, l3->alien); | 2156 | drain_alien_cache(cachep, l3->alien); |
| 2142 | } | 2157 | } |
| @@ -2260,16 +2275,15 @@ int kmem_cache_destroy(struct kmem_cache *cachep) | |||
| 2260 | 2275 | ||
| 2261 | /* NUMA: free the list3 structures */ | 2276 | /* NUMA: free the list3 structures */ |
| 2262 | for_each_online_node(i) { | 2277 | for_each_online_node(i) { |
| 2263 | if ((l3 = cachep->nodelists[i])) { | 2278 | l3 = cachep->nodelists[i]; |
| 2279 | if (l3) { | ||
| 2264 | kfree(l3->shared); | 2280 | kfree(l3->shared); |
| 2265 | free_alien_cache(l3->alien); | 2281 | free_alien_cache(l3->alien); |
| 2266 | kfree(l3); | 2282 | kfree(l3); |
| 2267 | } | 2283 | } |
| 2268 | } | 2284 | } |
| 2269 | kmem_cache_free(&cache_cache, cachep); | 2285 | kmem_cache_free(&cache_cache, cachep); |
| 2270 | |||
| 2271 | unlock_cpu_hotplug(); | 2286 | unlock_cpu_hotplug(); |
| 2272 | |||
| 2273 | return 0; | 2287 | return 0; |
| 2274 | } | 2288 | } |
| 2275 | EXPORT_SYMBOL(kmem_cache_destroy); | 2289 | EXPORT_SYMBOL(kmem_cache_destroy); |
| @@ -2292,7 +2306,6 @@ static struct slab *alloc_slabmgmt(struct kmem_cache *cachep, void *objp, | |||
| 2292 | slabp->inuse = 0; | 2306 | slabp->inuse = 0; |
| 2293 | slabp->colouroff = colour_off; | 2307 | slabp->colouroff = colour_off; |
| 2294 | slabp->s_mem = objp + colour_off; | 2308 | slabp->s_mem = objp + colour_off; |
| 2295 | |||
| 2296 | return slabp; | 2309 | return slabp; |
| 2297 | } | 2310 | } |
| 2298 | 2311 | ||
| @@ -2307,7 +2320,7 @@ static void cache_init_objs(struct kmem_cache *cachep, | |||
| 2307 | int i; | 2320 | int i; |
| 2308 | 2321 | ||
| 2309 | for (i = 0; i < cachep->num; i++) { | 2322 | for (i = 0; i < cachep->num; i++) { |
| 2310 | void *objp = slabp->s_mem + cachep->buffer_size * i; | 2323 | void *objp = index_to_obj(cachep, slabp, i); |
| 2311 | #if DEBUG | 2324 | #if DEBUG |
| 2312 | /* need to poison the objs? */ | 2325 | /* need to poison the objs? */ |
| 2313 | if (cachep->flags & SLAB_POISON) | 2326 | if (cachep->flags & SLAB_POISON) |
| @@ -2320,9 +2333,9 @@ static void cache_init_objs(struct kmem_cache *cachep, | |||
| 2320 | *dbg_redzone2(cachep, objp) = RED_INACTIVE; | 2333 | *dbg_redzone2(cachep, objp) = RED_INACTIVE; |
| 2321 | } | 2334 | } |
| 2322 | /* | 2335 | /* |
| 2323 | * Constructors are not allowed to allocate memory from | 2336 | * Constructors are not allowed to allocate memory from the same |
| 2324 | * the same cache which they are a constructor for. | 2337 | * cache which they are a constructor for. Otherwise, deadlock. |
| 2325 | * Otherwise, deadlock. They must also be threaded. | 2338 | * They must also be threaded. |
| 2326 | */ | 2339 | */ |
| 2327 | if (cachep->ctor && !(cachep->flags & SLAB_POISON)) | 2340 | if (cachep->ctor && !(cachep->flags & SLAB_POISON)) |
| 2328 | cachep->ctor(objp + obj_offset(cachep), cachep, | 2341 | cachep->ctor(objp + obj_offset(cachep), cachep, |
| @@ -2336,8 +2349,8 @@ static void cache_init_objs(struct kmem_cache *cachep, | |||
| 2336 | slab_error(cachep, "constructor overwrote the" | 2349 | slab_error(cachep, "constructor overwrote the" |
| 2337 | " start of an object"); | 2350 | " start of an object"); |
| 2338 | } | 2351 | } |
| 2339 | if ((cachep->buffer_size % PAGE_SIZE) == 0 && OFF_SLAB(cachep) | 2352 | if ((cachep->buffer_size % PAGE_SIZE) == 0 && |
| 2340 | && cachep->flags & SLAB_POISON) | 2353 | OFF_SLAB(cachep) && cachep->flags & SLAB_POISON) |
| 2341 | kernel_map_pages(virt_to_page(objp), | 2354 | kernel_map_pages(virt_to_page(objp), |
| 2342 | cachep->buffer_size / PAGE_SIZE, 0); | 2355 | cachep->buffer_size / PAGE_SIZE, 0); |
| 2343 | #else | 2356 | #else |
| @@ -2352,18 +2365,16 @@ static void cache_init_objs(struct kmem_cache *cachep, | |||
| 2352 | 2365 | ||
| 2353 | static void kmem_flagcheck(struct kmem_cache *cachep, gfp_t flags) | 2366 | static void kmem_flagcheck(struct kmem_cache *cachep, gfp_t flags) |
| 2354 | { | 2367 | { |
| 2355 | if (flags & SLAB_DMA) { | 2368 | if (flags & SLAB_DMA) |
| 2356 | if (!(cachep->gfpflags & GFP_DMA)) | 2369 | BUG_ON(!(cachep->gfpflags & GFP_DMA)); |
| 2357 | BUG(); | 2370 | else |
| 2358 | } else { | 2371 | BUG_ON(cachep->gfpflags & GFP_DMA); |
| 2359 | if (cachep->gfpflags & GFP_DMA) | ||
| 2360 | BUG(); | ||
| 2361 | } | ||
| 2362 | } | 2372 | } |
| 2363 | 2373 | ||
| 2364 | static void *slab_get_obj(struct kmem_cache *cachep, struct slab *slabp, int nodeid) | 2374 | static void *slab_get_obj(struct kmem_cache *cachep, struct slab *slabp, |
| 2375 | int nodeid) | ||
| 2365 | { | 2376 | { |
| 2366 | void *objp = slabp->s_mem + (slabp->free * cachep->buffer_size); | 2377 | void *objp = index_to_obj(cachep, slabp, slabp->free); |
| 2367 | kmem_bufctl_t next; | 2378 | kmem_bufctl_t next; |
| 2368 | 2379 | ||
| 2369 | slabp->inuse++; | 2380 | slabp->inuse++; |
| @@ -2377,10 +2388,10 @@ static void *slab_get_obj(struct kmem_cache *cachep, struct slab *slabp, int nod | |||
| 2377 | return objp; | 2388 | return objp; |
| 2378 | } | 2389 | } |
| 2379 | 2390 | ||
| 2380 | static void slab_put_obj(struct kmem_cache *cachep, struct slab *slabp, void *objp, | 2391 | static void slab_put_obj(struct kmem_cache *cachep, struct slab *slabp, |
| 2381 | int nodeid) | 2392 | void *objp, int nodeid) |
| 2382 | { | 2393 | { |
| 2383 | unsigned int objnr = (unsigned)(objp-slabp->s_mem) / cachep->buffer_size; | 2394 | unsigned int objnr = obj_to_index(cachep, slabp, objp); |
| 2384 | 2395 | ||
| 2385 | #if DEBUG | 2396 | #if DEBUG |
| 2386 | /* Verify that the slab belongs to the intended node */ | 2397 | /* Verify that the slab belongs to the intended node */ |
| @@ -2388,7 +2399,7 @@ static void slab_put_obj(struct kmem_cache *cachep, struct slab *slabp, void *ob | |||
| 2388 | 2399 | ||
| 2389 | if (slab_bufctl(slabp)[objnr] != BUFCTL_FREE) { | 2400 | if (slab_bufctl(slabp)[objnr] != BUFCTL_FREE) { |
| 2390 | printk(KERN_ERR "slab: double free detected in cache " | 2401 | printk(KERN_ERR "slab: double free detected in cache " |
| 2391 | "'%s', objp %p\n", cachep->name, objp); | 2402 | "'%s', objp %p\n", cachep->name, objp); |
| 2392 | BUG(); | 2403 | BUG(); |
| 2393 | } | 2404 | } |
| 2394 | #endif | 2405 | #endif |
| @@ -2397,14 +2408,18 @@ static void slab_put_obj(struct kmem_cache *cachep, struct slab *slabp, void *ob | |||
| 2397 | slabp->inuse--; | 2408 | slabp->inuse--; |
| 2398 | } | 2409 | } |
| 2399 | 2410 | ||
| 2400 | static void set_slab_attr(struct kmem_cache *cachep, struct slab *slabp, void *objp) | 2411 | static void set_slab_attr(struct kmem_cache *cachep, struct slab *slabp, |
| 2412 | void *objp) | ||
| 2401 | { | 2413 | { |
| 2402 | int i; | 2414 | int i; |
| 2403 | struct page *page; | 2415 | struct page *page; |
| 2404 | 2416 | ||
| 2405 | /* Nasty!!!!!! I hope this is OK. */ | 2417 | /* Nasty!!!!!! I hope this is OK. */ |
| 2406 | i = 1 << cachep->gfporder; | ||
| 2407 | page = virt_to_page(objp); | 2418 | page = virt_to_page(objp); |
| 2419 | |||
| 2420 | i = 1; | ||
| 2421 | if (likely(!PageCompound(page))) | ||
| 2422 | i <<= cachep->gfporder; | ||
| 2408 | do { | 2423 | do { |
| 2409 | page_set_cache(page, cachep); | 2424 | page_set_cache(page, cachep); |
| 2410 | page_set_slab(page, slabp); | 2425 | page_set_slab(page, slabp); |
| @@ -2425,8 +2440,9 @@ static int cache_grow(struct kmem_cache *cachep, gfp_t flags, int nodeid) | |||
| 2425 | unsigned long ctor_flags; | 2440 | unsigned long ctor_flags; |
| 2426 | struct kmem_list3 *l3; | 2441 | struct kmem_list3 *l3; |
| 2427 | 2442 | ||
| 2428 | /* Be lazy and only check for valid flags here, | 2443 | /* |
| 2429 | * keeping it out of the critical path in kmem_cache_alloc(). | 2444 | * Be lazy and only check for valid flags here, keeping it out of the |
| 2445 | * critical path in kmem_cache_alloc(). | ||
| 2430 | */ | 2446 | */ |
| 2431 | if (flags & ~(SLAB_DMA | SLAB_LEVEL_MASK | SLAB_NO_GROW)) | 2447 | if (flags & ~(SLAB_DMA | SLAB_LEVEL_MASK | SLAB_NO_GROW)) |
| 2432 | BUG(); | 2448 | BUG(); |
| @@ -2467,14 +2483,17 @@ static int cache_grow(struct kmem_cache *cachep, gfp_t flags, int nodeid) | |||
| 2467 | */ | 2483 | */ |
| 2468 | kmem_flagcheck(cachep, flags); | 2484 | kmem_flagcheck(cachep, flags); |
| 2469 | 2485 | ||
| 2470 | /* Get mem for the objs. | 2486 | /* |
| 2471 | * Attempt to allocate a physical page from 'nodeid', | 2487 | * Get mem for the objs. Attempt to allocate a physical page from |
| 2488 | * 'nodeid'. | ||
| 2472 | */ | 2489 | */ |
| 2473 | if (!(objp = kmem_getpages(cachep, flags, nodeid))) | 2490 | objp = kmem_getpages(cachep, flags, nodeid); |
| 2491 | if (!objp) | ||
| 2474 | goto failed; | 2492 | goto failed; |
| 2475 | 2493 | ||
| 2476 | /* Get slab management. */ | 2494 | /* Get slab management. */ |
| 2477 | if (!(slabp = alloc_slabmgmt(cachep, objp, offset, local_flags))) | 2495 | slabp = alloc_slabmgmt(cachep, objp, offset, local_flags); |
| 2496 | if (!slabp) | ||
| 2478 | goto opps1; | 2497 | goto opps1; |
| 2479 | 2498 | ||
| 2480 | slabp->nodeid = nodeid; | 2499 | slabp->nodeid = nodeid; |
| @@ -2493,9 +2512,9 @@ static int cache_grow(struct kmem_cache *cachep, gfp_t flags, int nodeid) | |||
| 2493 | l3->free_objects += cachep->num; | 2512 | l3->free_objects += cachep->num; |
| 2494 | spin_unlock(&l3->list_lock); | 2513 | spin_unlock(&l3->list_lock); |
| 2495 | return 1; | 2514 | return 1; |
| 2496 | opps1: | 2515 | opps1: |
| 2497 | kmem_freepages(cachep, objp); | 2516 | kmem_freepages(cachep, objp); |
| 2498 | failed: | 2517 | failed: |
| 2499 | if (local_flags & __GFP_WAIT) | 2518 | if (local_flags & __GFP_WAIT) |
| 2500 | local_irq_disable(); | 2519 | local_irq_disable(); |
| 2501 | return 0; | 2520 | return 0; |
| @@ -2538,8 +2557,8 @@ static void *cache_free_debugcheck(struct kmem_cache *cachep, void *objp, | |||
| 2538 | page = virt_to_page(objp); | 2557 | page = virt_to_page(objp); |
| 2539 | 2558 | ||
| 2540 | if (page_get_cache(page) != cachep) { | 2559 | if (page_get_cache(page) != cachep) { |
| 2541 | printk(KERN_ERR | 2560 | printk(KERN_ERR "mismatch in kmem_cache_free: expected " |
| 2542 | "mismatch in kmem_cache_free: expected cache %p, got %p\n", | 2561 | "cache %p, got %p\n", |
| 2543 | page_get_cache(page), cachep); | 2562 | page_get_cache(page), cachep); |
| 2544 | printk(KERN_ERR "%p is %s.\n", cachep, cachep->name); | 2563 | printk(KERN_ERR "%p is %s.\n", cachep, cachep->name); |
| 2545 | printk(KERN_ERR "%p is %s.\n", page_get_cache(page), | 2564 | printk(KERN_ERR "%p is %s.\n", page_get_cache(page), |
| @@ -2549,13 +2568,12 @@ static void *cache_free_debugcheck(struct kmem_cache *cachep, void *objp, | |||
| 2549 | slabp = page_get_slab(page); | 2568 | slabp = page_get_slab(page); |
| 2550 | 2569 | ||
| 2551 | if (cachep->flags & SLAB_RED_ZONE) { | 2570 | if (cachep->flags & SLAB_RED_ZONE) { |
| 2552 | if (*dbg_redzone1(cachep, objp) != RED_ACTIVE | 2571 | if (*dbg_redzone1(cachep, objp) != RED_ACTIVE || |
| 2553 | || *dbg_redzone2(cachep, objp) != RED_ACTIVE) { | 2572 | *dbg_redzone2(cachep, objp) != RED_ACTIVE) { |
| 2554 | slab_error(cachep, | 2573 | slab_error(cachep, "double free, or memory outside" |
| 2555 | "double free, or memory outside" | 2574 | " object was overwritten"); |
| 2556 | " object was overwritten"); | 2575 | printk(KERN_ERR "%p: redzone 1:0x%lx, " |
| 2557 | printk(KERN_ERR | 2576 | "redzone 2:0x%lx.\n", |
| 2558 | "%p: redzone 1: 0x%lx, redzone 2: 0x%lx.\n", | ||
| 2559 | objp, *dbg_redzone1(cachep, objp), | 2577 | objp, *dbg_redzone1(cachep, objp), |
| 2560 | *dbg_redzone2(cachep, objp)); | 2578 | *dbg_redzone2(cachep, objp)); |
| 2561 | } | 2579 | } |
| @@ -2565,15 +2583,16 @@ static void *cache_free_debugcheck(struct kmem_cache *cachep, void *objp, | |||
| 2565 | if (cachep->flags & SLAB_STORE_USER) | 2583 | if (cachep->flags & SLAB_STORE_USER) |
| 2566 | *dbg_userword(cachep, objp) = caller; | 2584 | *dbg_userword(cachep, objp) = caller; |
| 2567 | 2585 | ||
| 2568 | objnr = (unsigned)(objp - slabp->s_mem) / cachep->buffer_size; | 2586 | objnr = obj_to_index(cachep, slabp, objp); |
| 2569 | 2587 | ||
| 2570 | BUG_ON(objnr >= cachep->num); | 2588 | BUG_ON(objnr >= cachep->num); |
| 2571 | BUG_ON(objp != slabp->s_mem + objnr * cachep->buffer_size); | 2589 | BUG_ON(objp != index_to_obj(cachep, slabp, objnr)); |
| 2572 | 2590 | ||
| 2573 | if (cachep->flags & SLAB_DEBUG_INITIAL) { | 2591 | if (cachep->flags & SLAB_DEBUG_INITIAL) { |
| 2574 | /* Need to call the slab's constructor so the | 2592 | /* |
| 2575 | * caller can perform a verify of its state (debugging). | 2593 | * Need to call the slab's constructor so the caller can |
| 2576 | * Called without the cache-lock held. | 2594 | * perform a verify of its state (debugging). Called without |
| 2595 | * the cache-lock held. | ||
| 2577 | */ | 2596 | */ |
| 2578 | cachep->ctor(objp + obj_offset(cachep), | 2597 | cachep->ctor(objp + obj_offset(cachep), |
| 2579 | cachep, SLAB_CTOR_CONSTRUCTOR | SLAB_CTOR_VERIFY); | 2598 | cachep, SLAB_CTOR_CONSTRUCTOR | SLAB_CTOR_VERIFY); |
| @@ -2586,7 +2605,7 @@ static void *cache_free_debugcheck(struct kmem_cache *cachep, void *objp, | |||
| 2586 | } | 2605 | } |
| 2587 | if (cachep->flags & SLAB_POISON) { | 2606 | if (cachep->flags & SLAB_POISON) { |
| 2588 | #ifdef CONFIG_DEBUG_PAGEALLOC | 2607 | #ifdef CONFIG_DEBUG_PAGEALLOC |
| 2589 | if ((cachep->buffer_size % PAGE_SIZE) == 0 && OFF_SLAB(cachep)) { | 2608 | if ((cachep->buffer_size % PAGE_SIZE)==0 && OFF_SLAB(cachep)) { |
| 2590 | store_stackinfo(cachep, objp, (unsigned long)caller); | 2609 | store_stackinfo(cachep, objp, (unsigned long)caller); |
| 2591 | kernel_map_pages(virt_to_page(objp), | 2610 | kernel_map_pages(virt_to_page(objp), |
| 2592 | cachep->buffer_size / PAGE_SIZE, 0); | 2611 | cachep->buffer_size / PAGE_SIZE, 0); |
| @@ -2612,14 +2631,14 @@ static void check_slabp(struct kmem_cache *cachep, struct slab *slabp) | |||
| 2612 | goto bad; | 2631 | goto bad; |
| 2613 | } | 2632 | } |
| 2614 | if (entries != cachep->num - slabp->inuse) { | 2633 | if (entries != cachep->num - slabp->inuse) { |
| 2615 | bad: | 2634 | bad: |
| 2616 | printk(KERN_ERR | 2635 | printk(KERN_ERR "slab: Internal list corruption detected in " |
| 2617 | "slab: Internal list corruption detected in cache '%s'(%d), slabp %p(%d). Hexdump:\n", | 2636 | "cache '%s'(%d), slabp %p(%d). Hexdump:\n", |
| 2618 | cachep->name, cachep->num, slabp, slabp->inuse); | 2637 | cachep->name, cachep->num, slabp, slabp->inuse); |
| 2619 | for (i = 0; | 2638 | for (i = 0; |
| 2620 | i < sizeof(*slabp) + cachep->num * sizeof(kmem_bufctl_t); | 2639 | i < sizeof(*slabp) + cachep->num * sizeof(kmem_bufctl_t); |
| 2621 | i++) { | 2640 | i++) { |
| 2622 | if ((i % 16) == 0) | 2641 | if (i % 16 == 0) |
| 2623 | printk("\n%03x:", i); | 2642 | printk("\n%03x:", i); |
| 2624 | printk(" %02x", ((unsigned char *)slabp)[i]); | 2643 | printk(" %02x", ((unsigned char *)slabp)[i]); |
| 2625 | } | 2644 | } |
| @@ -2641,12 +2660,13 @@ static void *cache_alloc_refill(struct kmem_cache *cachep, gfp_t flags) | |||
| 2641 | 2660 | ||
| 2642 | check_irq_off(); | 2661 | check_irq_off(); |
| 2643 | ac = cpu_cache_get(cachep); | 2662 | ac = cpu_cache_get(cachep); |
| 2644 | retry: | 2663 | retry: |
| 2645 | batchcount = ac->batchcount; | 2664 | batchcount = ac->batchcount; |
| 2646 | if (!ac->touched && batchcount > BATCHREFILL_LIMIT) { | 2665 | if (!ac->touched && batchcount > BATCHREFILL_LIMIT) { |
| 2647 | /* if there was little recent activity on this | 2666 | /* |
| 2648 | * cache, then perform only a partial refill. | 2667 | * If there was little recent activity on this cache, then |
| 2649 | * Otherwise we could generate refill bouncing. | 2668 | * perform only a partial refill. Otherwise we could generate |
| 2669 | * refill bouncing. | ||
| 2650 | */ | 2670 | */ |
| 2651 | batchcount = BATCHREFILL_LIMIT; | 2671 | batchcount = BATCHREFILL_LIMIT; |
| 2652 | } | 2672 | } |
| @@ -2702,29 +2722,29 @@ static void *cache_alloc_refill(struct kmem_cache *cachep, gfp_t flags) | |||
| 2702 | list_add(&slabp->list, &l3->slabs_partial); | 2722 | list_add(&slabp->list, &l3->slabs_partial); |
| 2703 | } | 2723 | } |
| 2704 | 2724 | ||
| 2705 | must_grow: | 2725 | must_grow: |
| 2706 | l3->free_objects -= ac->avail; | 2726 | l3->free_objects -= ac->avail; |
| 2707 | alloc_done: | 2727 | alloc_done: |
| 2708 | spin_unlock(&l3->list_lock); | 2728 | spin_unlock(&l3->list_lock); |
| 2709 | 2729 | ||
| 2710 | if (unlikely(!ac->avail)) { | 2730 | if (unlikely(!ac->avail)) { |
| 2711 | int x; | 2731 | int x; |
| 2712 | x = cache_grow(cachep, flags, numa_node_id()); | 2732 | x = cache_grow(cachep, flags, numa_node_id()); |
| 2713 | 2733 | ||
| 2714 | // cache_grow can reenable interrupts, then ac could change. | 2734 | /* cache_grow can reenable interrupts, then ac could change. */ |
| 2715 | ac = cpu_cache_get(cachep); | 2735 | ac = cpu_cache_get(cachep); |
| 2716 | if (!x && ac->avail == 0) // no objects in sight? abort | 2736 | if (!x && ac->avail == 0) /* no objects in sight? abort */ |
| 2717 | return NULL; | 2737 | return NULL; |
| 2718 | 2738 | ||
| 2719 | if (!ac->avail) // objects refilled by interrupt? | 2739 | if (!ac->avail) /* objects refilled by interrupt? */ |
| 2720 | goto retry; | 2740 | goto retry; |
| 2721 | } | 2741 | } |
| 2722 | ac->touched = 1; | 2742 | ac->touched = 1; |
| 2723 | return ac->entry[--ac->avail]; | 2743 | return ac->entry[--ac->avail]; |
| 2724 | } | 2744 | } |
| 2725 | 2745 | ||
| 2726 | static inline void | 2746 | static inline void cache_alloc_debugcheck_before(struct kmem_cache *cachep, |
| 2727 | cache_alloc_debugcheck_before(struct kmem_cache *cachep, gfp_t flags) | 2747 | gfp_t flags) |
| 2728 | { | 2748 | { |
| 2729 | might_sleep_if(flags & __GFP_WAIT); | 2749 | might_sleep_if(flags & __GFP_WAIT); |
| 2730 | #if DEBUG | 2750 | #if DEBUG |
| @@ -2733,8 +2753,8 @@ cache_alloc_debugcheck_before(struct kmem_cache *cachep, gfp_t flags) | |||
| 2733 | } | 2753 | } |
| 2734 | 2754 | ||
| 2735 | #if DEBUG | 2755 | #if DEBUG |
| 2736 | static void *cache_alloc_debugcheck_after(struct kmem_cache *cachep, gfp_t flags, | 2756 | static void *cache_alloc_debugcheck_after(struct kmem_cache *cachep, |
| 2737 | void *objp, void *caller) | 2757 | gfp_t flags, void *objp, void *caller) |
| 2738 | { | 2758 | { |
| 2739 | if (!objp) | 2759 | if (!objp) |
| 2740 | return objp; | 2760 | return objp; |
| @@ -2754,15 +2774,14 @@ static void *cache_alloc_debugcheck_after(struct kmem_cache *cachep, gfp_t flags | |||
| 2754 | *dbg_userword(cachep, objp) = caller; | 2774 | *dbg_userword(cachep, objp) = caller; |
| 2755 | 2775 | ||
| 2756 | if (cachep->flags & SLAB_RED_ZONE) { | 2776 | if (cachep->flags & SLAB_RED_ZONE) { |
| 2757 | if (*dbg_redzone1(cachep, objp) != RED_INACTIVE | 2777 | if (*dbg_redzone1(cachep, objp) != RED_INACTIVE || |
| 2758 | || *dbg_redzone2(cachep, objp) != RED_INACTIVE) { | 2778 | *dbg_redzone2(cachep, objp) != RED_INACTIVE) { |
| 2759 | slab_error(cachep, | 2779 | slab_error(cachep, "double free, or memory outside" |
| 2760 | "double free, or memory outside" | 2780 | " object was overwritten"); |
| 2761 | " object was overwritten"); | ||
| 2762 | printk(KERN_ERR | 2781 | printk(KERN_ERR |
| 2763 | "%p: redzone 1: 0x%lx, redzone 2: 0x%lx.\n", | 2782 | "%p: redzone 1:0x%lx, redzone 2:0x%lx\n", |
| 2764 | objp, *dbg_redzone1(cachep, objp), | 2783 | objp, *dbg_redzone1(cachep, objp), |
| 2765 | *dbg_redzone2(cachep, objp)); | 2784 | *dbg_redzone2(cachep, objp)); |
| 2766 | } | 2785 | } |
| 2767 | *dbg_redzone1(cachep, objp) = RED_ACTIVE; | 2786 | *dbg_redzone1(cachep, objp) = RED_ACTIVE; |
| 2768 | *dbg_redzone2(cachep, objp) = RED_ACTIVE; | 2787 | *dbg_redzone2(cachep, objp) = RED_ACTIVE; |
| @@ -2809,8 +2828,8 @@ static inline void *____cache_alloc(struct kmem_cache *cachep, gfp_t flags) | |||
| 2809 | return objp; | 2828 | return objp; |
| 2810 | } | 2829 | } |
| 2811 | 2830 | ||
| 2812 | static __always_inline void * | 2831 | static __always_inline void *__cache_alloc(struct kmem_cache *cachep, |
| 2813 | __cache_alloc(struct kmem_cache *cachep, gfp_t flags, void *caller) | 2832 | gfp_t flags, void *caller) |
| 2814 | { | 2833 | { |
| 2815 | unsigned long save_flags; | 2834 | unsigned long save_flags; |
| 2816 | void *objp; | 2835 | void *objp; |
| @@ -2830,7 +2849,8 @@ __cache_alloc(struct kmem_cache *cachep, gfp_t flags, void *caller) | |||
| 2830 | /* | 2849 | /* |
| 2831 | * A interface to enable slab creation on nodeid | 2850 | * A interface to enable slab creation on nodeid |
| 2832 | */ | 2851 | */ |
| 2833 | static void *__cache_alloc_node(struct kmem_cache *cachep, gfp_t flags, int nodeid) | 2852 | static void *__cache_alloc_node(struct kmem_cache *cachep, gfp_t flags, |
| 2853 | int nodeid) | ||
| 2834 | { | 2854 | { |
| 2835 | struct list_head *entry; | 2855 | struct list_head *entry; |
| 2836 | struct slab *slabp; | 2856 | struct slab *slabp; |
| @@ -2841,7 +2861,7 @@ static void *__cache_alloc_node(struct kmem_cache *cachep, gfp_t flags, int node | |||
| 2841 | l3 = cachep->nodelists[nodeid]; | 2861 | l3 = cachep->nodelists[nodeid]; |
| 2842 | BUG_ON(!l3); | 2862 | BUG_ON(!l3); |
| 2843 | 2863 | ||
| 2844 | retry: | 2864 | retry: |
| 2845 | check_irq_off(); | 2865 | check_irq_off(); |
| 2846 | spin_lock(&l3->list_lock); | 2866 | spin_lock(&l3->list_lock); |
| 2847 | entry = l3->slabs_partial.next; | 2867 | entry = l3->slabs_partial.next; |
| @@ -2868,16 +2888,15 @@ static void *__cache_alloc_node(struct kmem_cache *cachep, gfp_t flags, int node | |||
| 2868 | /* move slabp to correct slabp list: */ | 2888 | /* move slabp to correct slabp list: */ |
| 2869 | list_del(&slabp->list); | 2889 | list_del(&slabp->list); |
| 2870 | 2890 | ||
| 2871 | if (slabp->free == BUFCTL_END) { | 2891 | if (slabp->free == BUFCTL_END) |
| 2872 | list_add(&slabp->list, &l3->slabs_full); | 2892 | list_add(&slabp->list, &l3->slabs_full); |
| 2873 | } else { | 2893 | else |
| 2874 | list_add(&slabp->list, &l3->slabs_partial); | 2894 | list_add(&slabp->list, &l3->slabs_partial); |
| 2875 | } | ||
| 2876 | 2895 | ||
| 2877 | spin_unlock(&l3->list_lock); | 2896 | spin_unlock(&l3->list_lock); |
| 2878 | goto done; | 2897 | goto done; |
| 2879 | 2898 | ||
| 2880 | must_grow: | 2899 | must_grow: |
| 2881 | spin_unlock(&l3->list_lock); | 2900 | spin_unlock(&l3->list_lock); |
| 2882 | x = cache_grow(cachep, flags, nodeid); | 2901 | x = cache_grow(cachep, flags, nodeid); |
| 2883 | 2902 | ||
| @@ -2885,7 +2904,7 @@ static void *__cache_alloc_node(struct kmem_cache *cachep, gfp_t flags, int node | |||
| 2885 | return NULL; | 2904 | return NULL; |
| 2886 | 2905 | ||
| 2887 | goto retry; | 2906 | goto retry; |
| 2888 | done: | 2907 | done: |
| 2889 | return obj; | 2908 | return obj; |
| 2890 | } | 2909 | } |
| 2891 | #endif | 2910 | #endif |
| @@ -2958,7 +2977,7 @@ static void cache_flusharray(struct kmem_cache *cachep, struct array_cache *ac) | |||
| 2958 | } | 2977 | } |
| 2959 | 2978 | ||
| 2960 | free_block(cachep, ac->entry, batchcount, node); | 2979 | free_block(cachep, ac->entry, batchcount, node); |
| 2961 | free_done: | 2980 | free_done: |
| 2962 | #if STATS | 2981 | #if STATS |
| 2963 | { | 2982 | { |
| 2964 | int i = 0; | 2983 | int i = 0; |
| @@ -2979,16 +2998,12 @@ static void cache_flusharray(struct kmem_cache *cachep, struct array_cache *ac) | |||
| 2979 | #endif | 2998 | #endif |
| 2980 | spin_unlock(&l3->list_lock); | 2999 | spin_unlock(&l3->list_lock); |
| 2981 | ac->avail -= batchcount; | 3000 | ac->avail -= batchcount; |
| 2982 | memmove(ac->entry, &(ac->entry[batchcount]), | 3001 | memmove(ac->entry, &(ac->entry[batchcount]), sizeof(void *)*ac->avail); |
| 2983 | sizeof(void *) * ac->avail); | ||
| 2984 | } | 3002 | } |
| 2985 | 3003 | ||
| 2986 | /* | 3004 | /* |
| 2987 | * __cache_free | 3005 | * Release an obj back to its cache. If the obj has a constructed state, it must |
| 2988 | * Release an obj back to its cache. If the obj has a constructed | 3006 | * be in this state _before_ it is released. Called with disabled ints. |
| 2989 | * state, it must be in this state _before_ it is released. | ||
| 2990 | * | ||
| 2991 | * Called with disabled ints. | ||
| 2992 | */ | 3007 | */ |
| 2993 | static inline void __cache_free(struct kmem_cache *cachep, void *objp) | 3008 | static inline void __cache_free(struct kmem_cache *cachep, void *objp) |
| 2994 | { | 3009 | { |
| @@ -3007,9 +3022,9 @@ static inline void __cache_free(struct kmem_cache *cachep, void *objp) | |||
| 3007 | if (unlikely(slabp->nodeid != numa_node_id())) { | 3022 | if (unlikely(slabp->nodeid != numa_node_id())) { |
| 3008 | struct array_cache *alien = NULL; | 3023 | struct array_cache *alien = NULL; |
| 3009 | int nodeid = slabp->nodeid; | 3024 | int nodeid = slabp->nodeid; |
| 3010 | struct kmem_list3 *l3 = | 3025 | struct kmem_list3 *l3; |
| 3011 | cachep->nodelists[numa_node_id()]; | ||
| 3012 | 3026 | ||
| 3027 | l3 = cachep->nodelists[numa_node_id()]; | ||
| 3013 | STATS_INC_NODEFREES(cachep); | 3028 | STATS_INC_NODEFREES(cachep); |
| 3014 | if (l3->alien && l3->alien[nodeid]) { | 3029 | if (l3->alien && l3->alien[nodeid]) { |
| 3015 | alien = l3->alien[nodeid]; | 3030 | alien = l3->alien[nodeid]; |
| @@ -3093,7 +3108,7 @@ int fastcall kmem_ptr_validate(struct kmem_cache *cachep, void *ptr) | |||
| 3093 | if (unlikely(page_get_cache(page) != cachep)) | 3108 | if (unlikely(page_get_cache(page) != cachep)) |
| 3094 | goto out; | 3109 | goto out; |
| 3095 | return 1; | 3110 | return 1; |
| 3096 | out: | 3111 | out: |
| 3097 | return 0; | 3112 | return 0; |
| 3098 | } | 3113 | } |
| 3099 | 3114 | ||
| @@ -3119,7 +3134,7 @@ void *kmem_cache_alloc_node(struct kmem_cache *cachep, gfp_t flags, int nodeid) | |||
| 3119 | local_irq_save(save_flags); | 3134 | local_irq_save(save_flags); |
| 3120 | 3135 | ||
| 3121 | if (nodeid == -1 || nodeid == numa_node_id() || | 3136 | if (nodeid == -1 || nodeid == numa_node_id() || |
| 3122 | !cachep->nodelists[nodeid]) | 3137 | !cachep->nodelists[nodeid]) |
| 3123 | ptr = ____cache_alloc(cachep, flags); | 3138 | ptr = ____cache_alloc(cachep, flags); |
| 3124 | else | 3139 | else |
| 3125 | ptr = __cache_alloc_node(cachep, flags, nodeid); | 3140 | ptr = __cache_alloc_node(cachep, flags, nodeid); |
| @@ -3148,6 +3163,7 @@ EXPORT_SYMBOL(kmalloc_node); | |||
| 3148 | * kmalloc - allocate memory | 3163 | * kmalloc - allocate memory |
| 3149 | * @size: how many bytes of memory are required. | 3164 | * @size: how many bytes of memory are required. |
| 3150 | * @flags: the type of memory to allocate. | 3165 | * @flags: the type of memory to allocate. |
| 3166 | * @caller: function caller for debug tracking of the caller | ||
| 3151 | * | 3167 | * |
| 3152 | * kmalloc is the normal method of allocating memory | 3168 | * kmalloc is the normal method of allocating memory |
| 3153 | * in the kernel. | 3169 | * in the kernel. |
| @@ -3236,7 +3252,7 @@ void *__alloc_percpu(size_t size) | |||
| 3236 | /* Catch derefs w/o wrappers */ | 3252 | /* Catch derefs w/o wrappers */ |
| 3237 | return (void *)(~(unsigned long)pdata); | 3253 | return (void *)(~(unsigned long)pdata); |
| 3238 | 3254 | ||
| 3239 | unwind_oom: | 3255 | unwind_oom: |
| 3240 | while (--i >= 0) { | 3256 | while (--i >= 0) { |
| 3241 | if (!cpu_possible(i)) | 3257 | if (!cpu_possible(i)) |
| 3242 | continue; | 3258 | continue; |
| @@ -3339,18 +3355,20 @@ static int alloc_kmemlist(struct kmem_cache *cachep) | |||
| 3339 | struct array_cache *nc = NULL, *new; | 3355 | struct array_cache *nc = NULL, *new; |
| 3340 | struct array_cache **new_alien = NULL; | 3356 | struct array_cache **new_alien = NULL; |
| 3341 | #ifdef CONFIG_NUMA | 3357 | #ifdef CONFIG_NUMA |
| 3342 | if (!(new_alien = alloc_alien_cache(node, cachep->limit))) | 3358 | new_alien = alloc_alien_cache(node, cachep->limit); |
| 3359 | if (!new_alien) | ||
| 3343 | goto fail; | 3360 | goto fail; |
| 3344 | #endif | 3361 | #endif |
| 3345 | if (!(new = alloc_arraycache(node, (cachep->shared * | 3362 | new = alloc_arraycache(node, cachep->shared*cachep->batchcount, |
| 3346 | cachep->batchcount), | 3363 | 0xbaadf00d); |
| 3347 | 0xbaadf00d))) | 3364 | if (!new) |
| 3348 | goto fail; | 3365 | goto fail; |
| 3349 | if ((l3 = cachep->nodelists[node])) { | 3366 | l3 = cachep->nodelists[node]; |
| 3350 | 3367 | if (l3) { | |
| 3351 | spin_lock_irq(&l3->list_lock); | 3368 | spin_lock_irq(&l3->list_lock); |
| 3352 | 3369 | ||
| 3353 | if ((nc = cachep->nodelists[node]->shared)) | 3370 | nc = cachep->nodelists[node]->shared; |
| 3371 | if (nc) | ||
| 3354 | free_block(cachep, nc->entry, nc->avail, node); | 3372 | free_block(cachep, nc->entry, nc->avail, node); |
| 3355 | 3373 | ||
| 3356 | l3->shared = new; | 3374 | l3->shared = new; |
| @@ -3359,27 +3377,27 @@ static int alloc_kmemlist(struct kmem_cache *cachep) | |||
| 3359 | new_alien = NULL; | 3377 | new_alien = NULL; |
| 3360 | } | 3378 | } |
| 3361 | l3->free_limit = (1 + nr_cpus_node(node)) * | 3379 | l3->free_limit = (1 + nr_cpus_node(node)) * |
| 3362 | cachep->batchcount + cachep->num; | 3380 | cachep->batchcount + cachep->num; |
| 3363 | spin_unlock_irq(&l3->list_lock); | 3381 | spin_unlock_irq(&l3->list_lock); |
| 3364 | kfree(nc); | 3382 | kfree(nc); |
| 3365 | free_alien_cache(new_alien); | 3383 | free_alien_cache(new_alien); |
| 3366 | continue; | 3384 | continue; |
| 3367 | } | 3385 | } |
| 3368 | if (!(l3 = kmalloc_node(sizeof(struct kmem_list3), | 3386 | l3 = kmalloc_node(sizeof(struct kmem_list3), GFP_KERNEL, node); |
| 3369 | GFP_KERNEL, node))) | 3387 | if (!l3) |
| 3370 | goto fail; | 3388 | goto fail; |
| 3371 | 3389 | ||
| 3372 | kmem_list3_init(l3); | 3390 | kmem_list3_init(l3); |
| 3373 | l3->next_reap = jiffies + REAPTIMEOUT_LIST3 + | 3391 | l3->next_reap = jiffies + REAPTIMEOUT_LIST3 + |
| 3374 | ((unsigned long)cachep) % REAPTIMEOUT_LIST3; | 3392 | ((unsigned long)cachep) % REAPTIMEOUT_LIST3; |
| 3375 | l3->shared = new; | 3393 | l3->shared = new; |
| 3376 | l3->alien = new_alien; | 3394 | l3->alien = new_alien; |
| 3377 | l3->free_limit = (1 + nr_cpus_node(node)) * | 3395 | l3->free_limit = (1 + nr_cpus_node(node)) * |
| 3378 | cachep->batchcount + cachep->num; | 3396 | cachep->batchcount + cachep->num; |
| 3379 | cachep->nodelists[node] = l3; | 3397 | cachep->nodelists[node] = l3; |
| 3380 | } | 3398 | } |
| 3381 | return err; | 3399 | return err; |
| 3382 | fail: | 3400 | fail: |
| 3383 | err = -ENOMEM; | 3401 | err = -ENOMEM; |
| 3384 | return err; | 3402 | return err; |
| 3385 | } | 3403 | } |
| @@ -3391,7 +3409,7 @@ struct ccupdate_struct { | |||
| 3391 | 3409 | ||
| 3392 | static void do_ccupdate_local(void *info) | 3410 | static void do_ccupdate_local(void *info) |
| 3393 | { | 3411 | { |
| 3394 | struct ccupdate_struct *new = (struct ccupdate_struct *)info; | 3412 | struct ccupdate_struct *new = info; |
| 3395 | struct array_cache *old; | 3413 | struct array_cache *old; |
| 3396 | 3414 | ||
| 3397 | check_irq_off(); | 3415 | check_irq_off(); |
| @@ -3401,16 +3419,17 @@ static void do_ccupdate_local(void *info) | |||
| 3401 | new->new[smp_processor_id()] = old; | 3419 | new->new[smp_processor_id()] = old; |
| 3402 | } | 3420 | } |
| 3403 | 3421 | ||
| 3404 | static int do_tune_cpucache(struct kmem_cache *cachep, int limit, int batchcount, | 3422 | /* Always called with the cache_chain_mutex held */ |
| 3405 | int shared) | 3423 | static int do_tune_cpucache(struct kmem_cache *cachep, int limit, |
| 3424 | int batchcount, int shared) | ||
| 3406 | { | 3425 | { |
| 3407 | struct ccupdate_struct new; | 3426 | struct ccupdate_struct new; |
| 3408 | int i, err; | 3427 | int i, err; |
| 3409 | 3428 | ||
| 3410 | memset(&new.new, 0, sizeof(new.new)); | 3429 | memset(&new.new, 0, sizeof(new.new)); |
| 3411 | for_each_online_cpu(i) { | 3430 | for_each_online_cpu(i) { |
| 3412 | new.new[i] = | 3431 | new.new[i] = alloc_arraycache(cpu_to_node(i), limit, |
| 3413 | alloc_arraycache(cpu_to_node(i), limit, batchcount); | 3432 | batchcount); |
| 3414 | if (!new.new[i]) { | 3433 | if (!new.new[i]) { |
| 3415 | for (i--; i >= 0; i--) | 3434 | for (i--; i >= 0; i--) |
| 3416 | kfree(new.new[i]); | 3435 | kfree(new.new[i]); |
| @@ -3419,14 +3438,12 @@ static int do_tune_cpucache(struct kmem_cache *cachep, int limit, int batchcount | |||
| 3419 | } | 3438 | } |
| 3420 | new.cachep = cachep; | 3439 | new.cachep = cachep; |
| 3421 | 3440 | ||
| 3422 | smp_call_function_all_cpus(do_ccupdate_local, (void *)&new); | 3441 | on_each_cpu(do_ccupdate_local, (void *)&new, 1, 1); |
| 3423 | 3442 | ||
| 3424 | check_irq_on(); | 3443 | check_irq_on(); |
| 3425 | spin_lock(&cachep->spinlock); | ||
| 3426 | cachep->batchcount = batchcount; | 3444 | cachep->batchcount = batchcount; |
| 3427 | cachep->limit = limit; | 3445 | cachep->limit = limit; |
| 3428 | cachep->shared = shared; | 3446 | cachep->shared = shared; |
| 3429 | spin_unlock(&cachep->spinlock); | ||
| 3430 | 3447 | ||
| 3431 | for_each_online_cpu(i) { | 3448 | for_each_online_cpu(i) { |
| 3432 | struct array_cache *ccold = new.new[i]; | 3449 | struct array_cache *ccold = new.new[i]; |
| @@ -3447,15 +3464,17 @@ static int do_tune_cpucache(struct kmem_cache *cachep, int limit, int batchcount | |||
| 3447 | return 0; | 3464 | return 0; |
| 3448 | } | 3465 | } |
| 3449 | 3466 | ||
| 3467 | /* Called with cache_chain_mutex held always */ | ||
| 3450 | static void enable_cpucache(struct kmem_cache *cachep) | 3468 | static void enable_cpucache(struct kmem_cache *cachep) |
| 3451 | { | 3469 | { |
| 3452 | int err; | 3470 | int err; |
| 3453 | int limit, shared; | 3471 | int limit, shared; |
| 3454 | 3472 | ||
| 3455 | /* The head array serves three purposes: | 3473 | /* |
| 3474 | * The head array serves three purposes: | ||
| 3456 | * - create a LIFO ordering, i.e. return objects that are cache-warm | 3475 | * - create a LIFO ordering, i.e. return objects that are cache-warm |
| 3457 | * - reduce the number of spinlock operations. | 3476 | * - reduce the number of spinlock operations. |
| 3458 | * - reduce the number of linked list operations on the slab and | 3477 | * - reduce the number of linked list operations on the slab and |
| 3459 | * bufctl chains: array operations are cheaper. | 3478 | * bufctl chains: array operations are cheaper. |
| 3460 | * The numbers are guessed, we should auto-tune as described by | 3479 | * The numbers are guessed, we should auto-tune as described by |
| 3461 | * Bonwick. | 3480 | * Bonwick. |
| @@ -3471,7 +3490,8 @@ static void enable_cpucache(struct kmem_cache *cachep) | |||
| 3471 | else | 3490 | else |
| 3472 | limit = 120; | 3491 | limit = 120; |
| 3473 | 3492 | ||
| 3474 | /* Cpu bound tasks (e.g. network routing) can exhibit cpu bound | 3493 | /* |
| 3494 | * CPU bound tasks (e.g. network routing) can exhibit cpu bound | ||
| 3475 | * allocation behaviour: Most allocs on one cpu, most free operations | 3495 | * allocation behaviour: Most allocs on one cpu, most free operations |
| 3476 | * on another cpu. For these cases, an efficient object passing between | 3496 | * on another cpu. For these cases, an efficient object passing between |
| 3477 | * cpus is necessary. This is provided by a shared array. The array | 3497 | * cpus is necessary. This is provided by a shared array. The array |
| @@ -3486,9 +3506,9 @@ static void enable_cpucache(struct kmem_cache *cachep) | |||
| 3486 | #endif | 3506 | #endif |
| 3487 | 3507 | ||
| 3488 | #if DEBUG | 3508 | #if DEBUG |
| 3489 | /* With debugging enabled, large batchcount lead to excessively | 3509 | /* |
| 3490 | * long periods with disabled local interrupts. Limit the | 3510 | * With debugging enabled, large batchcount lead to excessively long |
| 3491 | * batchcount | 3511 | * periods with disabled local interrupts. Limit the batchcount |
| 3492 | */ | 3512 | */ |
| 3493 | if (limit > 32) | 3513 | if (limit > 32) |
| 3494 | limit = 32; | 3514 | limit = 32; |
| @@ -3499,23 +3519,32 @@ static void enable_cpucache(struct kmem_cache *cachep) | |||
| 3499 | cachep->name, -err); | 3519 | cachep->name, -err); |
| 3500 | } | 3520 | } |
| 3501 | 3521 | ||
| 3502 | static void drain_array_locked(struct kmem_cache *cachep, struct array_cache *ac, | 3522 | /* |
| 3503 | int force, int node) | 3523 | * Drain an array if it contains any elements taking the l3 lock only if |
| 3524 | * necessary. Note that the l3 listlock also protects the array_cache | ||
| 3525 | * if drain_array() is used on the shared array. | ||
| 3526 | */ | ||
| 3527 | void drain_array(struct kmem_cache *cachep, struct kmem_list3 *l3, | ||
| 3528 | struct array_cache *ac, int force, int node) | ||
| 3504 | { | 3529 | { |
| 3505 | int tofree; | 3530 | int tofree; |
| 3506 | 3531 | ||
| 3507 | check_spinlock_acquired_node(cachep, node); | 3532 | if (!ac || !ac->avail) |
| 3533 | return; | ||
| 3508 | if (ac->touched && !force) { | 3534 | if (ac->touched && !force) { |
| 3509 | ac->touched = 0; | 3535 | ac->touched = 0; |
| 3510 | } else if (ac->avail) { | 3536 | } else { |
| 3511 | tofree = force ? ac->avail : (ac->limit + 4) / 5; | 3537 | spin_lock_irq(&l3->list_lock); |
| 3512 | if (tofree > ac->avail) { | 3538 | if (ac->avail) { |
| 3513 | tofree = (ac->avail + 1) / 2; | 3539 | tofree = force ? ac->avail : (ac->limit + 4) / 5; |
| 3540 | if (tofree > ac->avail) | ||
| 3541 | tofree = (ac->avail + 1) / 2; | ||
| 3542 | free_block(cachep, ac->entry, tofree, node); | ||
| 3543 | ac->avail -= tofree; | ||
| 3544 | memmove(ac->entry, &(ac->entry[tofree]), | ||
| 3545 | sizeof(void *) * ac->avail); | ||
| 3514 | } | 3546 | } |
| 3515 | free_block(cachep, ac->entry, tofree, node); | 3547 | spin_unlock_irq(&l3->list_lock); |
| 3516 | ac->avail -= tofree; | ||
| 3517 | memmove(ac->entry, &(ac->entry[tofree]), | ||
| 3518 | sizeof(void *) * ac->avail); | ||
| 3519 | } | 3548 | } |
| 3520 | } | 3549 | } |
| 3521 | 3550 | ||
| @@ -3528,13 +3557,14 @@ static void drain_array_locked(struct kmem_cache *cachep, struct array_cache *ac | |||
| 3528 | * - clear the per-cpu caches for this CPU. | 3557 | * - clear the per-cpu caches for this CPU. |
| 3529 | * - return freeable pages to the main free memory pool. | 3558 | * - return freeable pages to the main free memory pool. |
| 3530 | * | 3559 | * |
| 3531 | * If we cannot acquire the cache chain mutex then just give up - we'll | 3560 | * If we cannot acquire the cache chain mutex then just give up - we'll try |
| 3532 | * try again on the next iteration. | 3561 | * again on the next iteration. |
| 3533 | */ | 3562 | */ |
| 3534 | static void cache_reap(void *unused) | 3563 | static void cache_reap(void *unused) |
| 3535 | { | 3564 | { |
| 3536 | struct list_head *walk; | 3565 | struct list_head *walk; |
| 3537 | struct kmem_list3 *l3; | 3566 | struct kmem_list3 *l3; |
| 3567 | int node = numa_node_id(); | ||
| 3538 | 3568 | ||
| 3539 | if (!mutex_trylock(&cache_chain_mutex)) { | 3569 | if (!mutex_trylock(&cache_chain_mutex)) { |
| 3540 | /* Give up. Setup the next iteration. */ | 3570 | /* Give up. Setup the next iteration. */ |
| @@ -3550,65 +3580,72 @@ static void cache_reap(void *unused) | |||
| 3550 | struct slab *slabp; | 3580 | struct slab *slabp; |
| 3551 | 3581 | ||
| 3552 | searchp = list_entry(walk, struct kmem_cache, next); | 3582 | searchp = list_entry(walk, struct kmem_cache, next); |
| 3553 | |||
| 3554 | if (searchp->flags & SLAB_NO_REAP) | ||
| 3555 | goto next; | ||
| 3556 | |||
| 3557 | check_irq_on(); | 3583 | check_irq_on(); |
| 3558 | 3584 | ||
| 3559 | l3 = searchp->nodelists[numa_node_id()]; | 3585 | /* |
| 3586 | * We only take the l3 lock if absolutely necessary and we | ||
| 3587 | * have established with reasonable certainty that | ||
| 3588 | * we can do some work if the lock was obtained. | ||
| 3589 | */ | ||
| 3590 | l3 = searchp->nodelists[node]; | ||
| 3591 | |||
| 3560 | reap_alien(searchp, l3); | 3592 | reap_alien(searchp, l3); |
| 3561 | spin_lock_irq(&l3->list_lock); | ||
| 3562 | 3593 | ||
| 3563 | drain_array_locked(searchp, cpu_cache_get(searchp), 0, | 3594 | drain_array(searchp, l3, cpu_cache_get(searchp), 0, node); |
| 3564 | numa_node_id()); | ||
| 3565 | 3595 | ||
| 3596 | /* | ||
| 3597 | * These are racy checks but it does not matter | ||
| 3598 | * if we skip one check or scan twice. | ||
| 3599 | */ | ||
| 3566 | if (time_after(l3->next_reap, jiffies)) | 3600 | if (time_after(l3->next_reap, jiffies)) |
| 3567 | goto next_unlock; | 3601 | goto next; |
| 3568 | 3602 | ||
| 3569 | l3->next_reap = jiffies + REAPTIMEOUT_LIST3; | 3603 | l3->next_reap = jiffies + REAPTIMEOUT_LIST3; |
| 3570 | 3604 | ||
| 3571 | if (l3->shared) | 3605 | drain_array(searchp, l3, l3->shared, 0, node); |
| 3572 | drain_array_locked(searchp, l3->shared, 0, | ||
| 3573 | numa_node_id()); | ||
| 3574 | 3606 | ||
| 3575 | if (l3->free_touched) { | 3607 | if (l3->free_touched) { |
| 3576 | l3->free_touched = 0; | 3608 | l3->free_touched = 0; |
| 3577 | goto next_unlock; | 3609 | goto next; |
| 3578 | } | 3610 | } |
| 3579 | 3611 | ||
| 3580 | tofree = | 3612 | tofree = (l3->free_limit + 5 * searchp->num - 1) / |
| 3581 | (l3->free_limit + 5 * searchp->num - | 3613 | (5 * searchp->num); |
| 3582 | 1) / (5 * searchp->num); | ||
| 3583 | do { | 3614 | do { |
| 3615 | /* | ||
| 3616 | * Do not lock if there are no free blocks. | ||
| 3617 | */ | ||
| 3618 | if (list_empty(&l3->slabs_free)) | ||
| 3619 | break; | ||
| 3620 | |||
| 3621 | spin_lock_irq(&l3->list_lock); | ||
| 3584 | p = l3->slabs_free.next; | 3622 | p = l3->slabs_free.next; |
| 3585 | if (p == &(l3->slabs_free)) | 3623 | if (p == &(l3->slabs_free)) { |
| 3624 | spin_unlock_irq(&l3->list_lock); | ||
| 3586 | break; | 3625 | break; |
| 3626 | } | ||
| 3587 | 3627 | ||
| 3588 | slabp = list_entry(p, struct slab, list); | 3628 | slabp = list_entry(p, struct slab, list); |
| 3589 | BUG_ON(slabp->inuse); | 3629 | BUG_ON(slabp->inuse); |
| 3590 | list_del(&slabp->list); | 3630 | list_del(&slabp->list); |
| 3591 | STATS_INC_REAPED(searchp); | 3631 | STATS_INC_REAPED(searchp); |
| 3592 | 3632 | ||
| 3593 | /* Safe to drop the lock. The slab is no longer | 3633 | /* |
| 3594 | * linked to the cache. | 3634 | * Safe to drop the lock. The slab is no longer linked |
| 3595 | * searchp cannot disappear, we hold | 3635 | * to the cache. searchp cannot disappear, we hold |
| 3596 | * cache_chain_lock | 3636 | * cache_chain_lock |
| 3597 | */ | 3637 | */ |
| 3598 | l3->free_objects -= searchp->num; | 3638 | l3->free_objects -= searchp->num; |
| 3599 | spin_unlock_irq(&l3->list_lock); | 3639 | spin_unlock_irq(&l3->list_lock); |
| 3600 | slab_destroy(searchp, slabp); | 3640 | slab_destroy(searchp, slabp); |
| 3601 | spin_lock_irq(&l3->list_lock); | ||
| 3602 | } while (--tofree > 0); | 3641 | } while (--tofree > 0); |
| 3603 | next_unlock: | 3642 | next: |
| 3604 | spin_unlock_irq(&l3->list_lock); | ||
| 3605 | next: | ||
| 3606 | cond_resched(); | 3643 | cond_resched(); |
| 3607 | } | 3644 | } |
| 3608 | check_irq_on(); | 3645 | check_irq_on(); |
| 3609 | mutex_unlock(&cache_chain_mutex); | 3646 | mutex_unlock(&cache_chain_mutex); |
| 3610 | next_reap_node(); | 3647 | next_reap_node(); |
| 3611 | /* Setup the next iteration */ | 3648 | /* Set up the next iteration */ |
| 3612 | schedule_delayed_work(&__get_cpu_var(reap_work), REAPTIMEOUT_CPUC); | 3649 | schedule_delayed_work(&__get_cpu_var(reap_work), REAPTIMEOUT_CPUC); |
| 3613 | } | 3650 | } |
| 3614 | 3651 | ||
| @@ -3658,8 +3695,8 @@ static void *s_next(struct seq_file *m, void *p, loff_t *pos) | |||
| 3658 | { | 3695 | { |
| 3659 | struct kmem_cache *cachep = p; | 3696 | struct kmem_cache *cachep = p; |
| 3660 | ++*pos; | 3697 | ++*pos; |
| 3661 | return cachep->next.next == &cache_chain ? NULL | 3698 | return cachep->next.next == &cache_chain ? |
| 3662 | : list_entry(cachep->next.next, struct kmem_cache, next); | 3699 | NULL : list_entry(cachep->next.next, struct kmem_cache, next); |
| 3663 | } | 3700 | } |
| 3664 | 3701 | ||
| 3665 | static void s_stop(struct seq_file *m, void *p) | 3702 | static void s_stop(struct seq_file *m, void *p) |
| @@ -3681,7 +3718,6 @@ static int s_show(struct seq_file *m, void *p) | |||
| 3681 | int node; | 3718 | int node; |
| 3682 | struct kmem_list3 *l3; | 3719 | struct kmem_list3 *l3; |
| 3683 | 3720 | ||
| 3684 | spin_lock(&cachep->spinlock); | ||
| 3685 | active_objs = 0; | 3721 | active_objs = 0; |
| 3686 | num_slabs = 0; | 3722 | num_slabs = 0; |
| 3687 | for_each_online_node(node) { | 3723 | for_each_online_node(node) { |
| @@ -3748,7 +3784,9 @@ static int s_show(struct seq_file *m, void *p) | |||
| 3748 | unsigned long node_frees = cachep->node_frees; | 3784 | unsigned long node_frees = cachep->node_frees; |
| 3749 | 3785 | ||
| 3750 | seq_printf(m, " : globalstat %7lu %6lu %5lu %4lu \ | 3786 | seq_printf(m, " : globalstat %7lu %6lu %5lu %4lu \ |
| 3751 | %4lu %4lu %4lu %4lu", allocs, high, grown, reaped, errors, max_freeable, node_allocs, node_frees); | 3787 | %4lu %4lu %4lu %4lu", allocs, high, grown, |
| 3788 | reaped, errors, max_freeable, node_allocs, | ||
| 3789 | node_frees); | ||
| 3752 | } | 3790 | } |
| 3753 | /* cpu stats */ | 3791 | /* cpu stats */ |
| 3754 | { | 3792 | { |
| @@ -3762,7 +3800,6 @@ static int s_show(struct seq_file *m, void *p) | |||
| 3762 | } | 3800 | } |
| 3763 | #endif | 3801 | #endif |
| 3764 | seq_putc(m, '\n'); | 3802 | seq_putc(m, '\n'); |
| 3765 | spin_unlock(&cachep->spinlock); | ||
| 3766 | return 0; | 3803 | return 0; |
| 3767 | } | 3804 | } |
| 3768 | 3805 | ||
| @@ -3820,13 +3857,12 @@ ssize_t slabinfo_write(struct file *file, const char __user * buffer, | |||
| 3820 | mutex_lock(&cache_chain_mutex); | 3857 | mutex_lock(&cache_chain_mutex); |
| 3821 | res = -EINVAL; | 3858 | res = -EINVAL; |
| 3822 | list_for_each(p, &cache_chain) { | 3859 | list_for_each(p, &cache_chain) { |
| 3823 | struct kmem_cache *cachep = list_entry(p, struct kmem_cache, | 3860 | struct kmem_cache *cachep; |
| 3824 | next); | ||
| 3825 | 3861 | ||
| 3862 | cachep = list_entry(p, struct kmem_cache, next); | ||
| 3826 | if (!strcmp(cachep->name, kbuf)) { | 3863 | if (!strcmp(cachep->name, kbuf)) { |
| 3827 | if (limit < 1 || | 3864 | if (limit < 1 || batchcount < 1 || |
| 3828 | batchcount < 1 || | 3865 | batchcount > limit || shared < 0) { |
| 3829 | batchcount > limit || shared < 0) { | ||
| 3830 | res = 0; | 3866 | res = 0; |
| 3831 | } else { | 3867 | } else { |
| 3832 | res = do_tune_cpucache(cachep, limit, | 3868 | res = do_tune_cpucache(cachep, limit, |
| @@ -209,19 +209,18 @@ int lru_add_drain_all(void) | |||
| 209 | */ | 209 | */ |
| 210 | void fastcall __page_cache_release(struct page *page) | 210 | void fastcall __page_cache_release(struct page *page) |
| 211 | { | 211 | { |
| 212 | unsigned long flags; | 212 | if (PageLRU(page)) { |
| 213 | struct zone *zone = page_zone(page); | 213 | unsigned long flags; |
| 214 | struct zone *zone = page_zone(page); | ||
| 214 | 215 | ||
| 215 | spin_lock_irqsave(&zone->lru_lock, flags); | 216 | spin_lock_irqsave(&zone->lru_lock, flags); |
| 216 | if (TestClearPageLRU(page)) | 217 | BUG_ON(!PageLRU(page)); |
| 218 | __ClearPageLRU(page); | ||
| 217 | del_page_from_lru(zone, page); | 219 | del_page_from_lru(zone, page); |
| 218 | if (page_count(page) != 0) | 220 | spin_unlock_irqrestore(&zone->lru_lock, flags); |
| 219 | page = NULL; | 221 | } |
| 220 | spin_unlock_irqrestore(&zone->lru_lock, flags); | 222 | free_hot_page(page); |
| 221 | if (page) | ||
| 222 | free_hot_page(page); | ||
| 223 | } | 223 | } |
| 224 | |||
| 225 | EXPORT_SYMBOL(__page_cache_release); | 224 | EXPORT_SYMBOL(__page_cache_release); |
| 226 | 225 | ||
| 227 | /* | 226 | /* |
| @@ -245,7 +244,6 @@ void release_pages(struct page **pages, int nr, int cold) | |||
| 245 | pagevec_init(&pages_to_free, cold); | 244 | pagevec_init(&pages_to_free, cold); |
| 246 | for (i = 0; i < nr; i++) { | 245 | for (i = 0; i < nr; i++) { |
| 247 | struct page *page = pages[i]; | 246 | struct page *page = pages[i]; |
| 248 | struct zone *pagezone; | ||
| 249 | 247 | ||
| 250 | if (unlikely(PageCompound(page))) { | 248 | if (unlikely(PageCompound(page))) { |
| 251 | if (zone) { | 249 | if (zone) { |
| @@ -259,23 +257,27 @@ void release_pages(struct page **pages, int nr, int cold) | |||
| 259 | if (!put_page_testzero(page)) | 257 | if (!put_page_testzero(page)) |
| 260 | continue; | 258 | continue; |
| 261 | 259 | ||
| 262 | pagezone = page_zone(page); | 260 | if (PageLRU(page)) { |
| 263 | if (pagezone != zone) { | 261 | struct zone *pagezone = page_zone(page); |
| 264 | if (zone) | 262 | if (pagezone != zone) { |
| 265 | spin_unlock_irq(&zone->lru_lock); | 263 | if (zone) |
| 266 | zone = pagezone; | 264 | spin_unlock_irq(&zone->lru_lock); |
| 267 | spin_lock_irq(&zone->lru_lock); | 265 | zone = pagezone; |
| 268 | } | 266 | spin_lock_irq(&zone->lru_lock); |
| 269 | if (TestClearPageLRU(page)) | 267 | } |
| 268 | BUG_ON(!PageLRU(page)); | ||
| 269 | __ClearPageLRU(page); | ||
| 270 | del_page_from_lru(zone, page); | 270 | del_page_from_lru(zone, page); |
| 271 | if (page_count(page) == 0) { | 271 | } |
| 272 | if (!pagevec_add(&pages_to_free, page)) { | 272 | |
| 273 | if (!pagevec_add(&pages_to_free, page)) { | ||
| 274 | if (zone) { | ||
| 273 | spin_unlock_irq(&zone->lru_lock); | 275 | spin_unlock_irq(&zone->lru_lock); |
| 274 | __pagevec_free(&pages_to_free); | 276 | zone = NULL; |
| 275 | pagevec_reinit(&pages_to_free); | ||
| 276 | zone = NULL; /* No lock is held */ | ||
| 277 | } | 277 | } |
| 278 | } | 278 | __pagevec_free(&pages_to_free); |
| 279 | pagevec_reinit(&pages_to_free); | ||
| 280 | } | ||
| 279 | } | 281 | } |
| 280 | if (zone) | 282 | if (zone) |
| 281 | spin_unlock_irq(&zone->lru_lock); | 283 | spin_unlock_irq(&zone->lru_lock); |
| @@ -343,8 +345,8 @@ void __pagevec_lru_add(struct pagevec *pvec) | |||
| 343 | zone = pagezone; | 345 | zone = pagezone; |
| 344 | spin_lock_irq(&zone->lru_lock); | 346 | spin_lock_irq(&zone->lru_lock); |
| 345 | } | 347 | } |
| 346 | if (TestSetPageLRU(page)) | 348 | BUG_ON(PageLRU(page)); |
| 347 | BUG(); | 349 | SetPageLRU(page); |
| 348 | add_page_to_inactive_list(zone, page); | 350 | add_page_to_inactive_list(zone, page); |
| 349 | } | 351 | } |
| 350 | if (zone) | 352 | if (zone) |
| @@ -370,10 +372,10 @@ void __pagevec_lru_add_active(struct pagevec *pvec) | |||
| 370 | zone = pagezone; | 372 | zone = pagezone; |
| 371 | spin_lock_irq(&zone->lru_lock); | 373 | spin_lock_irq(&zone->lru_lock); |
| 372 | } | 374 | } |
| 373 | if (TestSetPageLRU(page)) | 375 | BUG_ON(PageLRU(page)); |
| 374 | BUG(); | 376 | SetPageLRU(page); |
| 375 | if (TestSetPageActive(page)) | 377 | BUG_ON(PageActive(page)); |
| 376 | BUG(); | 378 | SetPageActive(page); |
| 377 | add_page_to_active_list(zone, page); | 379 | add_page_to_active_list(zone, page); |
| 378 | } | 380 | } |
| 379 | if (zone) | 381 | if (zone) |
diff --git a/mm/swap_state.c b/mm/swap_state.c index db8a3d3e1636..d7af296833fc 100644 --- a/mm/swap_state.c +++ b/mm/swap_state.c | |||
| @@ -15,6 +15,7 @@ | |||
| 15 | #include <linux/buffer_head.h> | 15 | #include <linux/buffer_head.h> |
| 16 | #include <linux/backing-dev.h> | 16 | #include <linux/backing-dev.h> |
| 17 | #include <linux/pagevec.h> | 17 | #include <linux/pagevec.h> |
| 18 | #include <linux/migrate.h> | ||
| 18 | 19 | ||
| 19 | #include <asm/pgtable.h> | 20 | #include <asm/pgtable.h> |
| 20 | 21 | ||
diff --git a/mm/swapfile.c b/mm/swapfile.c index 1f9cf0d073b8..365ed6ff182d 100644 --- a/mm/swapfile.c +++ b/mm/swapfile.c | |||
| @@ -116,7 +116,7 @@ static inline unsigned long scan_swap_map(struct swap_info_struct *si) | |||
| 116 | last_in_cluster = offset + SWAPFILE_CLUSTER; | 116 | last_in_cluster = offset + SWAPFILE_CLUSTER; |
| 117 | else if (offset == last_in_cluster) { | 117 | else if (offset == last_in_cluster) { |
| 118 | spin_lock(&swap_lock); | 118 | spin_lock(&swap_lock); |
| 119 | si->cluster_next = offset-SWAPFILE_CLUSTER-1; | 119 | si->cluster_next = offset-SWAPFILE_CLUSTER+1; |
| 120 | goto cluster; | 120 | goto cluster; |
| 121 | } | 121 | } |
| 122 | if (unlikely(--latency_ration < 0)) { | 122 | if (unlikely(--latency_ration < 0)) { |
diff --git a/mm/vmscan.c b/mm/vmscan.c index 4fe7e3aa02e2..fd572bbdc9f5 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c | |||
| @@ -33,39 +33,21 @@ | |||
| 33 | #include <linux/cpuset.h> | 33 | #include <linux/cpuset.h> |
| 34 | #include <linux/notifier.h> | 34 | #include <linux/notifier.h> |
| 35 | #include <linux/rwsem.h> | 35 | #include <linux/rwsem.h> |
| 36 | #include <linux/delay.h> | ||
| 36 | 37 | ||
| 37 | #include <asm/tlbflush.h> | 38 | #include <asm/tlbflush.h> |
| 38 | #include <asm/div64.h> | 39 | #include <asm/div64.h> |
| 39 | 40 | ||
| 40 | #include <linux/swapops.h> | 41 | #include <linux/swapops.h> |
| 41 | 42 | ||
| 42 | /* possible outcome of pageout() */ | 43 | #include "internal.h" |
| 43 | typedef enum { | ||
| 44 | /* failed to write page out, page is locked */ | ||
| 45 | PAGE_KEEP, | ||
| 46 | /* move page to the active list, page is locked */ | ||
| 47 | PAGE_ACTIVATE, | ||
| 48 | /* page has been sent to the disk successfully, page is unlocked */ | ||
| 49 | PAGE_SUCCESS, | ||
| 50 | /* page is clean and locked */ | ||
| 51 | PAGE_CLEAN, | ||
| 52 | } pageout_t; | ||
| 53 | 44 | ||
| 54 | struct scan_control { | 45 | struct scan_control { |
| 55 | /* Ask refill_inactive_zone, or shrink_cache to scan this many pages */ | ||
| 56 | unsigned long nr_to_scan; | ||
| 57 | |||
| 58 | /* Incremented by the number of inactive pages that were scanned */ | 46 | /* Incremented by the number of inactive pages that were scanned */ |
| 59 | unsigned long nr_scanned; | 47 | unsigned long nr_scanned; |
| 60 | 48 | ||
| 61 | /* Incremented by the number of pages reclaimed */ | ||
| 62 | unsigned long nr_reclaimed; | ||
| 63 | |||
| 64 | unsigned long nr_mapped; /* From page_state */ | 49 | unsigned long nr_mapped; /* From page_state */ |
| 65 | 50 | ||
| 66 | /* Ask shrink_caches, or shrink_zone to scan at this priority */ | ||
| 67 | unsigned int priority; | ||
| 68 | |||
| 69 | /* This context's GFP mask */ | 51 | /* This context's GFP mask */ |
| 70 | gfp_t gfp_mask; | 52 | gfp_t gfp_mask; |
| 71 | 53 | ||
| @@ -183,10 +165,11 @@ EXPORT_SYMBOL(remove_shrinker); | |||
| 183 | * | 165 | * |
| 184 | * Returns the number of slab objects which we shrunk. | 166 | * Returns the number of slab objects which we shrunk. |
| 185 | */ | 167 | */ |
| 186 | int shrink_slab(unsigned long scanned, gfp_t gfp_mask, unsigned long lru_pages) | 168 | unsigned long shrink_slab(unsigned long scanned, gfp_t gfp_mask, |
| 169 | unsigned long lru_pages) | ||
| 187 | { | 170 | { |
| 188 | struct shrinker *shrinker; | 171 | struct shrinker *shrinker; |
| 189 | int ret = 0; | 172 | unsigned long ret = 0; |
| 190 | 173 | ||
| 191 | if (scanned == 0) | 174 | if (scanned == 0) |
| 192 | scanned = SWAP_CLUSTER_MAX; | 175 | scanned = SWAP_CLUSTER_MAX; |
| @@ -306,9 +289,10 @@ static void handle_write_error(struct address_space *mapping, | |||
| 306 | } | 289 | } |
| 307 | 290 | ||
| 308 | /* | 291 | /* |
| 309 | * pageout is called by shrink_list() for each dirty page. Calls ->writepage(). | 292 | * pageout is called by shrink_page_list() for each dirty page. |
| 293 | * Calls ->writepage(). | ||
| 310 | */ | 294 | */ |
| 311 | static pageout_t pageout(struct page *page, struct address_space *mapping) | 295 | pageout_t pageout(struct page *page, struct address_space *mapping) |
| 312 | { | 296 | { |
| 313 | /* | 297 | /* |
| 314 | * If the page is dirty, only perform writeback if that write | 298 | * If the page is dirty, only perform writeback if that write |
| @@ -376,7 +360,7 @@ static pageout_t pageout(struct page *page, struct address_space *mapping) | |||
| 376 | return PAGE_CLEAN; | 360 | return PAGE_CLEAN; |
| 377 | } | 361 | } |
| 378 | 362 | ||
| 379 | static int remove_mapping(struct address_space *mapping, struct page *page) | 363 | int remove_mapping(struct address_space *mapping, struct page *page) |
| 380 | { | 364 | { |
| 381 | if (!mapping) | 365 | if (!mapping) |
| 382 | return 0; /* truncate got there first */ | 366 | return 0; /* truncate got there first */ |
| @@ -414,14 +398,15 @@ cannot_free: | |||
| 414 | } | 398 | } |
| 415 | 399 | ||
| 416 | /* | 400 | /* |
| 417 | * shrink_list adds the number of reclaimed pages to sc->nr_reclaimed | 401 | * shrink_page_list() returns the number of reclaimed pages |
| 418 | */ | 402 | */ |
| 419 | static int shrink_list(struct list_head *page_list, struct scan_control *sc) | 403 | static unsigned long shrink_page_list(struct list_head *page_list, |
| 404 | struct scan_control *sc) | ||
| 420 | { | 405 | { |
| 421 | LIST_HEAD(ret_pages); | 406 | LIST_HEAD(ret_pages); |
| 422 | struct pagevec freed_pvec; | 407 | struct pagevec freed_pvec; |
| 423 | int pgactivate = 0; | 408 | int pgactivate = 0; |
| 424 | int reclaimed = 0; | 409 | unsigned long nr_reclaimed = 0; |
| 425 | 410 | ||
| 426 | cond_resched(); | 411 | cond_resched(); |
| 427 | 412 | ||
| @@ -464,12 +449,9 @@ static int shrink_list(struct list_head *page_list, struct scan_control *sc) | |||
| 464 | * Anonymous process memory has backing store? | 449 | * Anonymous process memory has backing store? |
| 465 | * Try to allocate it some swap space here. | 450 | * Try to allocate it some swap space here. |
| 466 | */ | 451 | */ |
| 467 | if (PageAnon(page) && !PageSwapCache(page)) { | 452 | if (PageAnon(page) && !PageSwapCache(page)) |
| 468 | if (!sc->may_swap) | ||
| 469 | goto keep_locked; | ||
| 470 | if (!add_to_swap(page, GFP_ATOMIC)) | 453 | if (!add_to_swap(page, GFP_ATOMIC)) |
| 471 | goto activate_locked; | 454 | goto activate_locked; |
| 472 | } | ||
| 473 | #endif /* CONFIG_SWAP */ | 455 | #endif /* CONFIG_SWAP */ |
| 474 | 456 | ||
| 475 | mapping = page_mapping(page); | 457 | mapping = page_mapping(page); |
| @@ -481,12 +463,6 @@ static int shrink_list(struct list_head *page_list, struct scan_control *sc) | |||
| 481 | * processes. Try to unmap it here. | 463 | * processes. Try to unmap it here. |
| 482 | */ | 464 | */ |
| 483 | if (page_mapped(page) && mapping) { | 465 | if (page_mapped(page) && mapping) { |
| 484 | /* | ||
| 485 | * No unmapping if we do not swap | ||
| 486 | */ | ||
| 487 | if (!sc->may_swap) | ||
| 488 | goto keep_locked; | ||
| 489 | |||
| 490 | switch (try_to_unmap(page, 0)) { | 466 | switch (try_to_unmap(page, 0)) { |
| 491 | case SWAP_FAIL: | 467 | case SWAP_FAIL: |
| 492 | goto activate_locked; | 468 | goto activate_locked; |
| @@ -561,7 +537,7 @@ static int shrink_list(struct list_head *page_list, struct scan_control *sc) | |||
| 561 | 537 | ||
| 562 | free_it: | 538 | free_it: |
| 563 | unlock_page(page); | 539 | unlock_page(page); |
| 564 | reclaimed++; | 540 | nr_reclaimed++; |
| 565 | if (!pagevec_add(&freed_pvec, page)) | 541 | if (!pagevec_add(&freed_pvec, page)) |
| 566 | __pagevec_release_nonlru(&freed_pvec); | 542 | __pagevec_release_nonlru(&freed_pvec); |
| 567 | continue; | 543 | continue; |
| @@ -579,483 +555,8 @@ keep: | |||
| 579 | if (pagevec_count(&freed_pvec)) | 555 | if (pagevec_count(&freed_pvec)) |
| 580 | __pagevec_release_nonlru(&freed_pvec); | 556 | __pagevec_release_nonlru(&freed_pvec); |
| 581 | mod_page_state(pgactivate, pgactivate); | 557 | mod_page_state(pgactivate, pgactivate); |
| 582 | sc->nr_reclaimed += reclaimed; | 558 | return nr_reclaimed; |
| 583 | return reclaimed; | ||
| 584 | } | ||
| 585 | |||
| 586 | #ifdef CONFIG_MIGRATION | ||
| 587 | static inline void move_to_lru(struct page *page) | ||
| 588 | { | ||
| 589 | list_del(&page->lru); | ||
| 590 | if (PageActive(page)) { | ||
| 591 | /* | ||
| 592 | * lru_cache_add_active checks that | ||
| 593 | * the PG_active bit is off. | ||
| 594 | */ | ||
| 595 | ClearPageActive(page); | ||
| 596 | lru_cache_add_active(page); | ||
| 597 | } else { | ||
| 598 | lru_cache_add(page); | ||
| 599 | } | ||
| 600 | put_page(page); | ||
| 601 | } | ||
| 602 | |||
| 603 | /* | ||
| 604 | * Add isolated pages on the list back to the LRU. | ||
| 605 | * | ||
| 606 | * returns the number of pages put back. | ||
| 607 | */ | ||
| 608 | int putback_lru_pages(struct list_head *l) | ||
| 609 | { | ||
| 610 | struct page *page; | ||
| 611 | struct page *page2; | ||
| 612 | int count = 0; | ||
| 613 | |||
| 614 | list_for_each_entry_safe(page, page2, l, lru) { | ||
| 615 | move_to_lru(page); | ||
| 616 | count++; | ||
| 617 | } | ||
| 618 | return count; | ||
| 619 | } | ||
| 620 | |||
| 621 | /* | ||
| 622 | * Non migratable page | ||
| 623 | */ | ||
| 624 | int fail_migrate_page(struct page *newpage, struct page *page) | ||
| 625 | { | ||
| 626 | return -EIO; | ||
| 627 | } | ||
| 628 | EXPORT_SYMBOL(fail_migrate_page); | ||
| 629 | |||
| 630 | /* | ||
| 631 | * swapout a single page | ||
| 632 | * page is locked upon entry, unlocked on exit | ||
| 633 | */ | ||
| 634 | static int swap_page(struct page *page) | ||
| 635 | { | ||
| 636 | struct address_space *mapping = page_mapping(page); | ||
| 637 | |||
| 638 | if (page_mapped(page) && mapping) | ||
| 639 | if (try_to_unmap(page, 1) != SWAP_SUCCESS) | ||
| 640 | goto unlock_retry; | ||
| 641 | |||
| 642 | if (PageDirty(page)) { | ||
| 643 | /* Page is dirty, try to write it out here */ | ||
| 644 | switch(pageout(page, mapping)) { | ||
| 645 | case PAGE_KEEP: | ||
| 646 | case PAGE_ACTIVATE: | ||
| 647 | goto unlock_retry; | ||
| 648 | |||
| 649 | case PAGE_SUCCESS: | ||
| 650 | goto retry; | ||
| 651 | |||
| 652 | case PAGE_CLEAN: | ||
| 653 | ; /* try to free the page below */ | ||
| 654 | } | ||
| 655 | } | ||
| 656 | |||
| 657 | if (PagePrivate(page)) { | ||
| 658 | if (!try_to_release_page(page, GFP_KERNEL) || | ||
| 659 | (!mapping && page_count(page) == 1)) | ||
| 660 | goto unlock_retry; | ||
| 661 | } | ||
| 662 | |||
| 663 | if (remove_mapping(mapping, page)) { | ||
| 664 | /* Success */ | ||
| 665 | unlock_page(page); | ||
| 666 | return 0; | ||
| 667 | } | ||
| 668 | |||
| 669 | unlock_retry: | ||
| 670 | unlock_page(page); | ||
| 671 | |||
| 672 | retry: | ||
| 673 | return -EAGAIN; | ||
| 674 | } | ||
| 675 | EXPORT_SYMBOL(swap_page); | ||
| 676 | |||
| 677 | /* | ||
| 678 | * Page migration was first developed in the context of the memory hotplug | ||
| 679 | * project. The main authors of the migration code are: | ||
| 680 | * | ||
| 681 | * IWAMOTO Toshihiro <iwamoto@valinux.co.jp> | ||
| 682 | * Hirokazu Takahashi <taka@valinux.co.jp> | ||
| 683 | * Dave Hansen <haveblue@us.ibm.com> | ||
| 684 | * Christoph Lameter <clameter@sgi.com> | ||
| 685 | */ | ||
| 686 | |||
| 687 | /* | ||
| 688 | * Remove references for a page and establish the new page with the correct | ||
| 689 | * basic settings to be able to stop accesses to the page. | ||
| 690 | */ | ||
| 691 | int migrate_page_remove_references(struct page *newpage, | ||
| 692 | struct page *page, int nr_refs) | ||
| 693 | { | ||
| 694 | struct address_space *mapping = page_mapping(page); | ||
| 695 | struct page **radix_pointer; | ||
| 696 | |||
| 697 | /* | ||
| 698 | * Avoid doing any of the following work if the page count | ||
| 699 | * indicates that the page is in use or truncate has removed | ||
| 700 | * the page. | ||
| 701 | */ | ||
| 702 | if (!mapping || page_mapcount(page) + nr_refs != page_count(page)) | ||
| 703 | return -EAGAIN; | ||
| 704 | |||
| 705 | /* | ||
| 706 | * Establish swap ptes for anonymous pages or destroy pte | ||
| 707 | * maps for files. | ||
| 708 | * | ||
| 709 | * In order to reestablish file backed mappings the fault handlers | ||
| 710 | * will take the radix tree_lock which may then be used to stop | ||
| 711 | * processses from accessing this page until the new page is ready. | ||
| 712 | * | ||
| 713 | * A process accessing via a swap pte (an anonymous page) will take a | ||
| 714 | * page_lock on the old page which will block the process until the | ||
| 715 | * migration attempt is complete. At that time the PageSwapCache bit | ||
| 716 | * will be examined. If the page was migrated then the PageSwapCache | ||
| 717 | * bit will be clear and the operation to retrieve the page will be | ||
| 718 | * retried which will find the new page in the radix tree. Then a new | ||
| 719 | * direct mapping may be generated based on the radix tree contents. | ||
| 720 | * | ||
| 721 | * If the page was not migrated then the PageSwapCache bit | ||
| 722 | * is still set and the operation may continue. | ||
| 723 | */ | ||
| 724 | if (try_to_unmap(page, 1) == SWAP_FAIL) | ||
| 725 | /* A vma has VM_LOCKED set -> Permanent failure */ | ||
| 726 | return -EPERM; | ||
| 727 | |||
| 728 | /* | ||
| 729 | * Give up if we were unable to remove all mappings. | ||
| 730 | */ | ||
| 731 | if (page_mapcount(page)) | ||
| 732 | return -EAGAIN; | ||
| 733 | |||
| 734 | write_lock_irq(&mapping->tree_lock); | ||
| 735 | |||
| 736 | radix_pointer = (struct page **)radix_tree_lookup_slot( | ||
| 737 | &mapping->page_tree, | ||
| 738 | page_index(page)); | ||
| 739 | |||
| 740 | if (!page_mapping(page) || page_count(page) != nr_refs || | ||
| 741 | *radix_pointer != page) { | ||
| 742 | write_unlock_irq(&mapping->tree_lock); | ||
| 743 | return -EAGAIN; | ||
| 744 | } | ||
| 745 | |||
| 746 | /* | ||
| 747 | * Now we know that no one else is looking at the page. | ||
| 748 | * | ||
| 749 | * Certain minimal information about a page must be available | ||
| 750 | * in order for other subsystems to properly handle the page if they | ||
| 751 | * find it through the radix tree update before we are finished | ||
| 752 | * copying the page. | ||
| 753 | */ | ||
| 754 | get_page(newpage); | ||
| 755 | newpage->index = page->index; | ||
| 756 | newpage->mapping = page->mapping; | ||
| 757 | if (PageSwapCache(page)) { | ||
| 758 | SetPageSwapCache(newpage); | ||
| 759 | set_page_private(newpage, page_private(page)); | ||
| 760 | } | ||
| 761 | |||
| 762 | *radix_pointer = newpage; | ||
| 763 | __put_page(page); | ||
| 764 | write_unlock_irq(&mapping->tree_lock); | ||
| 765 | |||
| 766 | return 0; | ||
| 767 | } | ||
| 768 | EXPORT_SYMBOL(migrate_page_remove_references); | ||
| 769 | |||
| 770 | /* | ||
| 771 | * Copy the page to its new location | ||
| 772 | */ | ||
| 773 | void migrate_page_copy(struct page *newpage, struct page *page) | ||
| 774 | { | ||
| 775 | copy_highpage(newpage, page); | ||
| 776 | |||
| 777 | if (PageError(page)) | ||
| 778 | SetPageError(newpage); | ||
| 779 | if (PageReferenced(page)) | ||
| 780 | SetPageReferenced(newpage); | ||
| 781 | if (PageUptodate(page)) | ||
| 782 | SetPageUptodate(newpage); | ||
| 783 | if (PageActive(page)) | ||
| 784 | SetPageActive(newpage); | ||
| 785 | if (PageChecked(page)) | ||
| 786 | SetPageChecked(newpage); | ||
| 787 | if (PageMappedToDisk(page)) | ||
| 788 | SetPageMappedToDisk(newpage); | ||
| 789 | |||
| 790 | if (PageDirty(page)) { | ||
| 791 | clear_page_dirty_for_io(page); | ||
| 792 | set_page_dirty(newpage); | ||
| 793 | } | ||
| 794 | |||
| 795 | ClearPageSwapCache(page); | ||
| 796 | ClearPageActive(page); | ||
| 797 | ClearPagePrivate(page); | ||
| 798 | set_page_private(page, 0); | ||
| 799 | page->mapping = NULL; | ||
| 800 | |||
| 801 | /* | ||
| 802 | * If any waiters have accumulated on the new page then | ||
| 803 | * wake them up. | ||
| 804 | */ | ||
| 805 | if (PageWriteback(newpage)) | ||
| 806 | end_page_writeback(newpage); | ||
| 807 | } | ||
| 808 | EXPORT_SYMBOL(migrate_page_copy); | ||
| 809 | |||
| 810 | /* | ||
| 811 | * Common logic to directly migrate a single page suitable for | ||
| 812 | * pages that do not use PagePrivate. | ||
| 813 | * | ||
| 814 | * Pages are locked upon entry and exit. | ||
| 815 | */ | ||
| 816 | int migrate_page(struct page *newpage, struct page *page) | ||
| 817 | { | ||
| 818 | int rc; | ||
| 819 | |||
| 820 | BUG_ON(PageWriteback(page)); /* Writeback must be complete */ | ||
| 821 | |||
| 822 | rc = migrate_page_remove_references(newpage, page, 2); | ||
| 823 | |||
| 824 | if (rc) | ||
| 825 | return rc; | ||
| 826 | |||
| 827 | migrate_page_copy(newpage, page); | ||
| 828 | |||
| 829 | /* | ||
| 830 | * Remove auxiliary swap entries and replace | ||
| 831 | * them with real ptes. | ||
| 832 | * | ||
| 833 | * Note that a real pte entry will allow processes that are not | ||
| 834 | * waiting on the page lock to use the new page via the page tables | ||
| 835 | * before the new page is unlocked. | ||
| 836 | */ | ||
| 837 | remove_from_swap(newpage); | ||
| 838 | return 0; | ||
| 839 | } | 559 | } |
| 840 | EXPORT_SYMBOL(migrate_page); | ||
| 841 | |||
| 842 | /* | ||
| 843 | * migrate_pages | ||
| 844 | * | ||
| 845 | * Two lists are passed to this function. The first list | ||
| 846 | * contains the pages isolated from the LRU to be migrated. | ||
| 847 | * The second list contains new pages that the pages isolated | ||
| 848 | * can be moved to. If the second list is NULL then all | ||
| 849 | * pages are swapped out. | ||
| 850 | * | ||
| 851 | * The function returns after 10 attempts or if no pages | ||
| 852 | * are movable anymore because to has become empty | ||
| 853 | * or no retryable pages exist anymore. | ||
| 854 | * | ||
| 855 | * Return: Number of pages not migrated when "to" ran empty. | ||
| 856 | */ | ||
| 857 | int migrate_pages(struct list_head *from, struct list_head *to, | ||
| 858 | struct list_head *moved, struct list_head *failed) | ||
| 859 | { | ||
| 860 | int retry; | ||
| 861 | int nr_failed = 0; | ||
| 862 | int pass = 0; | ||
| 863 | struct page *page; | ||
| 864 | struct page *page2; | ||
| 865 | int swapwrite = current->flags & PF_SWAPWRITE; | ||
| 866 | int rc; | ||
| 867 | |||
| 868 | if (!swapwrite) | ||
| 869 | current->flags |= PF_SWAPWRITE; | ||
| 870 | |||
| 871 | redo: | ||
| 872 | retry = 0; | ||
| 873 | |||
| 874 | list_for_each_entry_safe(page, page2, from, lru) { | ||
| 875 | struct page *newpage = NULL; | ||
| 876 | struct address_space *mapping; | ||
| 877 | |||
| 878 | cond_resched(); | ||
| 879 | |||
| 880 | rc = 0; | ||
| 881 | if (page_count(page) == 1) | ||
| 882 | /* page was freed from under us. So we are done. */ | ||
| 883 | goto next; | ||
| 884 | |||
| 885 | if (to && list_empty(to)) | ||
| 886 | break; | ||
| 887 | |||
| 888 | /* | ||
| 889 | * Skip locked pages during the first two passes to give the | ||
| 890 | * functions holding the lock time to release the page. Later we | ||
| 891 | * use lock_page() to have a higher chance of acquiring the | ||
| 892 | * lock. | ||
| 893 | */ | ||
| 894 | rc = -EAGAIN; | ||
| 895 | if (pass > 2) | ||
| 896 | lock_page(page); | ||
| 897 | else | ||
| 898 | if (TestSetPageLocked(page)) | ||
| 899 | goto next; | ||
| 900 | |||
| 901 | /* | ||
| 902 | * Only wait on writeback if we have already done a pass where | ||
| 903 | * we we may have triggered writeouts for lots of pages. | ||
| 904 | */ | ||
| 905 | if (pass > 0) { | ||
| 906 | wait_on_page_writeback(page); | ||
| 907 | } else { | ||
| 908 | if (PageWriteback(page)) | ||
| 909 | goto unlock_page; | ||
| 910 | } | ||
| 911 | |||
| 912 | /* | ||
| 913 | * Anonymous pages must have swap cache references otherwise | ||
| 914 | * the information contained in the page maps cannot be | ||
| 915 | * preserved. | ||
| 916 | */ | ||
| 917 | if (PageAnon(page) && !PageSwapCache(page)) { | ||
| 918 | if (!add_to_swap(page, GFP_KERNEL)) { | ||
| 919 | rc = -ENOMEM; | ||
| 920 | goto unlock_page; | ||
| 921 | } | ||
| 922 | } | ||
| 923 | |||
| 924 | if (!to) { | ||
| 925 | rc = swap_page(page); | ||
| 926 | goto next; | ||
| 927 | } | ||
| 928 | |||
| 929 | newpage = lru_to_page(to); | ||
| 930 | lock_page(newpage); | ||
| 931 | |||
| 932 | /* | ||
| 933 | * Pages are properly locked and writeback is complete. | ||
| 934 | * Try to migrate the page. | ||
| 935 | */ | ||
| 936 | mapping = page_mapping(page); | ||
| 937 | if (!mapping) | ||
| 938 | goto unlock_both; | ||
| 939 | |||
| 940 | if (mapping->a_ops->migratepage) { | ||
| 941 | /* | ||
| 942 | * Most pages have a mapping and most filesystems | ||
| 943 | * should provide a migration function. Anonymous | ||
| 944 | * pages are part of swap space which also has its | ||
| 945 | * own migration function. This is the most common | ||
| 946 | * path for page migration. | ||
| 947 | */ | ||
| 948 | rc = mapping->a_ops->migratepage(newpage, page); | ||
| 949 | goto unlock_both; | ||
| 950 | } | ||
| 951 | |||
| 952 | /* | ||
| 953 | * Default handling if a filesystem does not provide | ||
| 954 | * a migration function. We can only migrate clean | ||
| 955 | * pages so try to write out any dirty pages first. | ||
| 956 | */ | ||
| 957 | if (PageDirty(page)) { | ||
| 958 | switch (pageout(page, mapping)) { | ||
| 959 | case PAGE_KEEP: | ||
| 960 | case PAGE_ACTIVATE: | ||
| 961 | goto unlock_both; | ||
| 962 | |||
| 963 | case PAGE_SUCCESS: | ||
| 964 | unlock_page(newpage); | ||
| 965 | goto next; | ||
| 966 | |||
| 967 | case PAGE_CLEAN: | ||
| 968 | ; /* try to migrate the page below */ | ||
| 969 | } | ||
| 970 | } | ||
| 971 | |||
| 972 | /* | ||
| 973 | * Buffers are managed in a filesystem specific way. | ||
| 974 | * We must have no buffers or drop them. | ||
| 975 | */ | ||
| 976 | if (!page_has_buffers(page) || | ||
| 977 | try_to_release_page(page, GFP_KERNEL)) { | ||
| 978 | rc = migrate_page(newpage, page); | ||
| 979 | goto unlock_both; | ||
| 980 | } | ||
| 981 | |||
| 982 | /* | ||
| 983 | * On early passes with mapped pages simply | ||
| 984 | * retry. There may be a lock held for some | ||
| 985 | * buffers that may go away. Later | ||
| 986 | * swap them out. | ||
| 987 | */ | ||
| 988 | if (pass > 4) { | ||
| 989 | /* | ||
| 990 | * Persistently unable to drop buffers..... As a | ||
| 991 | * measure of last resort we fall back to | ||
| 992 | * swap_page(). | ||
| 993 | */ | ||
| 994 | unlock_page(newpage); | ||
| 995 | newpage = NULL; | ||
| 996 | rc = swap_page(page); | ||
| 997 | goto next; | ||
| 998 | } | ||
| 999 | |||
| 1000 | unlock_both: | ||
| 1001 | unlock_page(newpage); | ||
| 1002 | |||
| 1003 | unlock_page: | ||
| 1004 | unlock_page(page); | ||
| 1005 | |||
| 1006 | next: | ||
| 1007 | if (rc == -EAGAIN) { | ||
| 1008 | retry++; | ||
| 1009 | } else if (rc) { | ||
| 1010 | /* Permanent failure */ | ||
| 1011 | list_move(&page->lru, failed); | ||
| 1012 | nr_failed++; | ||
| 1013 | } else { | ||
| 1014 | if (newpage) { | ||
| 1015 | /* Successful migration. Return page to LRU */ | ||
| 1016 | move_to_lru(newpage); | ||
| 1017 | } | ||
| 1018 | list_move(&page->lru, moved); | ||
| 1019 | } | ||
| 1020 | } | ||
| 1021 | if (retry && pass++ < 10) | ||
| 1022 | goto redo; | ||
| 1023 | |||
| 1024 | if (!swapwrite) | ||
| 1025 | current->flags &= ~PF_SWAPWRITE; | ||
| 1026 | |||
| 1027 | return nr_failed + retry; | ||
| 1028 | } | ||
| 1029 | |||
| 1030 | /* | ||
| 1031 | * Isolate one page from the LRU lists and put it on the | ||
| 1032 | * indicated list with elevated refcount. | ||
| 1033 | * | ||
| 1034 | * Result: | ||
| 1035 | * 0 = page not on LRU list | ||
| 1036 | * 1 = page removed from LRU list and added to the specified list. | ||
| 1037 | */ | ||
| 1038 | int isolate_lru_page(struct page *page) | ||
| 1039 | { | ||
| 1040 | int ret = 0; | ||
| 1041 | |||
| 1042 | if (PageLRU(page)) { | ||
| 1043 | struct zone *zone = page_zone(page); | ||
| 1044 | spin_lock_irq(&zone->lru_lock); | ||
| 1045 | if (TestClearPageLRU(page)) { | ||
| 1046 | ret = 1; | ||
| 1047 | get_page(page); | ||
| 1048 | if (PageActive(page)) | ||
| 1049 | del_page_from_active_list(zone, page); | ||
| 1050 | else | ||
| 1051 | del_page_from_inactive_list(zone, page); | ||
| 1052 | } | ||
| 1053 | spin_unlock_irq(&zone->lru_lock); | ||
| 1054 | } | ||
| 1055 | |||
| 1056 | return ret; | ||
| 1057 | } | ||
| 1058 | #endif | ||
| 1059 | 560 | ||
| 1060 | /* | 561 | /* |
| 1061 | * zone->lru_lock is heavily contended. Some of the functions that | 562 | * zone->lru_lock is heavily contended. Some of the functions that |
| @@ -1074,32 +575,35 @@ int isolate_lru_page(struct page *page) | |||
| 1074 | * | 575 | * |
| 1075 | * returns how many pages were moved onto *@dst. | 576 | * returns how many pages were moved onto *@dst. |
| 1076 | */ | 577 | */ |
| 1077 | static int isolate_lru_pages(int nr_to_scan, struct list_head *src, | 578 | static unsigned long isolate_lru_pages(unsigned long nr_to_scan, |
| 1078 | struct list_head *dst, int *scanned) | 579 | struct list_head *src, struct list_head *dst, |
| 580 | unsigned long *scanned) | ||
| 1079 | { | 581 | { |
| 1080 | int nr_taken = 0; | 582 | unsigned long nr_taken = 0; |
| 1081 | struct page *page; | 583 | struct page *page; |
| 1082 | int scan = 0; | 584 | unsigned long scan; |
| 1083 | 585 | ||
| 1084 | while (scan++ < nr_to_scan && !list_empty(src)) { | 586 | for (scan = 0; scan < nr_to_scan && !list_empty(src); scan++) { |
| 587 | struct list_head *target; | ||
| 1085 | page = lru_to_page(src); | 588 | page = lru_to_page(src); |
| 1086 | prefetchw_prev_lru_page(page, src, flags); | 589 | prefetchw_prev_lru_page(page, src, flags); |
| 1087 | 590 | ||
| 1088 | if (!TestClearPageLRU(page)) | 591 | BUG_ON(!PageLRU(page)); |
| 1089 | BUG(); | 592 | |
| 1090 | list_del(&page->lru); | 593 | list_del(&page->lru); |
| 1091 | if (get_page_testone(page)) { | 594 | target = src; |
| 595 | if (likely(get_page_unless_zero(page))) { | ||
| 1092 | /* | 596 | /* |
| 1093 | * It is being freed elsewhere | 597 | * Be careful not to clear PageLRU until after we're |
| 598 | * sure the page is not being freed elsewhere -- the | ||
| 599 | * page release code relies on it. | ||
| 1094 | */ | 600 | */ |
| 1095 | __put_page(page); | 601 | ClearPageLRU(page); |
| 1096 | SetPageLRU(page); | 602 | target = dst; |
| 1097 | list_add(&page->lru, src); | ||
| 1098 | continue; | ||
| 1099 | } else { | ||
| 1100 | list_add(&page->lru, dst); | ||
| 1101 | nr_taken++; | 603 | nr_taken++; |
| 1102 | } | 604 | } /* else it is being freed elsewhere */ |
| 605 | |||
| 606 | list_add(&page->lru, target); | ||
| 1103 | } | 607 | } |
| 1104 | 608 | ||
| 1105 | *scanned = scan; | 609 | *scanned = scan; |
| @@ -1107,23 +611,26 @@ static int isolate_lru_pages(int nr_to_scan, struct list_head *src, | |||
| 1107 | } | 611 | } |
| 1108 | 612 | ||
| 1109 | /* | 613 | /* |
| 1110 | * shrink_cache() adds the number of pages reclaimed to sc->nr_reclaimed | 614 | * shrink_inactive_list() is a helper for shrink_zone(). It returns the number |
| 615 | * of reclaimed pages | ||
| 1111 | */ | 616 | */ |
| 1112 | static void shrink_cache(struct zone *zone, struct scan_control *sc) | 617 | static unsigned long shrink_inactive_list(unsigned long max_scan, |
| 618 | struct zone *zone, struct scan_control *sc) | ||
| 1113 | { | 619 | { |
| 1114 | LIST_HEAD(page_list); | 620 | LIST_HEAD(page_list); |
| 1115 | struct pagevec pvec; | 621 | struct pagevec pvec; |
| 1116 | int max_scan = sc->nr_to_scan; | 622 | unsigned long nr_scanned = 0; |
| 623 | unsigned long nr_reclaimed = 0; | ||
| 1117 | 624 | ||
| 1118 | pagevec_init(&pvec, 1); | 625 | pagevec_init(&pvec, 1); |
| 1119 | 626 | ||
| 1120 | lru_add_drain(); | 627 | lru_add_drain(); |
| 1121 | spin_lock_irq(&zone->lru_lock); | 628 | spin_lock_irq(&zone->lru_lock); |
| 1122 | while (max_scan > 0) { | 629 | do { |
| 1123 | struct page *page; | 630 | struct page *page; |
| 1124 | int nr_taken; | 631 | unsigned long nr_taken; |
| 1125 | int nr_scan; | 632 | unsigned long nr_scan; |
| 1126 | int nr_freed; | 633 | unsigned long nr_freed; |
| 1127 | 634 | ||
| 1128 | nr_taken = isolate_lru_pages(sc->swap_cluster_max, | 635 | nr_taken = isolate_lru_pages(sc->swap_cluster_max, |
| 1129 | &zone->inactive_list, | 636 | &zone->inactive_list, |
| @@ -1132,12 +639,9 @@ static void shrink_cache(struct zone *zone, struct scan_control *sc) | |||
| 1132 | zone->pages_scanned += nr_scan; | 639 | zone->pages_scanned += nr_scan; |
| 1133 | spin_unlock_irq(&zone->lru_lock); | 640 | spin_unlock_irq(&zone->lru_lock); |
| 1134 | 641 | ||
| 1135 | if (nr_taken == 0) | 642 | nr_scanned += nr_scan; |
| 1136 | goto done; | 643 | nr_freed = shrink_page_list(&page_list, sc); |
| 1137 | 644 | nr_reclaimed += nr_freed; | |
| 1138 | max_scan -= nr_scan; | ||
| 1139 | nr_freed = shrink_list(&page_list, sc); | ||
| 1140 | |||
| 1141 | local_irq_disable(); | 645 | local_irq_disable(); |
| 1142 | if (current_is_kswapd()) { | 646 | if (current_is_kswapd()) { |
| 1143 | __mod_page_state_zone(zone, pgscan_kswapd, nr_scan); | 647 | __mod_page_state_zone(zone, pgscan_kswapd, nr_scan); |
| @@ -1146,14 +650,17 @@ static void shrink_cache(struct zone *zone, struct scan_control *sc) | |||
| 1146 | __mod_page_state_zone(zone, pgscan_direct, nr_scan); | 650 | __mod_page_state_zone(zone, pgscan_direct, nr_scan); |
| 1147 | __mod_page_state_zone(zone, pgsteal, nr_freed); | 651 | __mod_page_state_zone(zone, pgsteal, nr_freed); |
| 1148 | 652 | ||
| 653 | if (nr_taken == 0) | ||
| 654 | goto done; | ||
| 655 | |||
| 1149 | spin_lock(&zone->lru_lock); | 656 | spin_lock(&zone->lru_lock); |
| 1150 | /* | 657 | /* |
| 1151 | * Put back any unfreeable pages. | 658 | * Put back any unfreeable pages. |
| 1152 | */ | 659 | */ |
| 1153 | while (!list_empty(&page_list)) { | 660 | while (!list_empty(&page_list)) { |
| 1154 | page = lru_to_page(&page_list); | 661 | page = lru_to_page(&page_list); |
| 1155 | if (TestSetPageLRU(page)) | 662 | BUG_ON(PageLRU(page)); |
| 1156 | BUG(); | 663 | SetPageLRU(page); |
| 1157 | list_del(&page->lru); | 664 | list_del(&page->lru); |
| 1158 | if (PageActive(page)) | 665 | if (PageActive(page)) |
| 1159 | add_page_to_active_list(zone, page); | 666 | add_page_to_active_list(zone, page); |
| @@ -1165,10 +672,12 @@ static void shrink_cache(struct zone *zone, struct scan_control *sc) | |||
| 1165 | spin_lock_irq(&zone->lru_lock); | 672 | spin_lock_irq(&zone->lru_lock); |
| 1166 | } | 673 | } |
| 1167 | } | 674 | } |
| 1168 | } | 675 | } while (nr_scanned < max_scan); |
| 1169 | spin_unlock_irq(&zone->lru_lock); | 676 | spin_unlock(&zone->lru_lock); |
| 1170 | done: | 677 | done: |
| 678 | local_irq_enable(); | ||
| 1171 | pagevec_release(&pvec); | 679 | pagevec_release(&pvec); |
| 680 | return nr_reclaimed; | ||
| 1172 | } | 681 | } |
| 1173 | 682 | ||
| 1174 | /* | 683 | /* |
| @@ -1188,13 +697,12 @@ done: | |||
| 1188 | * The downside is that we have to touch page->_count against each page. | 697 | * The downside is that we have to touch page->_count against each page. |
| 1189 | * But we had to alter page->flags anyway. | 698 | * But we had to alter page->flags anyway. |
| 1190 | */ | 699 | */ |
| 1191 | static void | 700 | static void shrink_active_list(unsigned long nr_pages, struct zone *zone, |
| 1192 | refill_inactive_zone(struct zone *zone, struct scan_control *sc) | 701 | struct scan_control *sc) |
| 1193 | { | 702 | { |
| 1194 | int pgmoved; | 703 | unsigned long pgmoved; |
| 1195 | int pgdeactivate = 0; | 704 | int pgdeactivate = 0; |
| 1196 | int pgscanned; | 705 | unsigned long pgscanned; |
| 1197 | int nr_pages = sc->nr_to_scan; | ||
| 1198 | LIST_HEAD(l_hold); /* The pages which were snipped off */ | 706 | LIST_HEAD(l_hold); /* The pages which were snipped off */ |
| 1199 | LIST_HEAD(l_inactive); /* Pages to go onto the inactive_list */ | 707 | LIST_HEAD(l_inactive); /* Pages to go onto the inactive_list */ |
| 1200 | LIST_HEAD(l_active); /* Pages to go onto the active_list */ | 708 | LIST_HEAD(l_active); /* Pages to go onto the active_list */ |
| @@ -1202,7 +710,7 @@ refill_inactive_zone(struct zone *zone, struct scan_control *sc) | |||
| 1202 | struct pagevec pvec; | 710 | struct pagevec pvec; |
| 1203 | int reclaim_mapped = 0; | 711 | int reclaim_mapped = 0; |
| 1204 | 712 | ||
| 1205 | if (unlikely(sc->may_swap)) { | 713 | if (sc->may_swap) { |
| 1206 | long mapped_ratio; | 714 | long mapped_ratio; |
| 1207 | long distress; | 715 | long distress; |
| 1208 | long swap_tendency; | 716 | long swap_tendency; |
| @@ -1272,10 +780,11 @@ refill_inactive_zone(struct zone *zone, struct scan_control *sc) | |||
| 1272 | while (!list_empty(&l_inactive)) { | 780 | while (!list_empty(&l_inactive)) { |
| 1273 | page = lru_to_page(&l_inactive); | 781 | page = lru_to_page(&l_inactive); |
| 1274 | prefetchw_prev_lru_page(page, &l_inactive, flags); | 782 | prefetchw_prev_lru_page(page, &l_inactive, flags); |
| 1275 | if (TestSetPageLRU(page)) | 783 | BUG_ON(PageLRU(page)); |
| 1276 | BUG(); | 784 | SetPageLRU(page); |
| 1277 | if (!TestClearPageActive(page)) | 785 | BUG_ON(!PageActive(page)); |
| 1278 | BUG(); | 786 | ClearPageActive(page); |
| 787 | |||
| 1279 | list_move(&page->lru, &zone->inactive_list); | 788 | list_move(&page->lru, &zone->inactive_list); |
| 1280 | pgmoved++; | 789 | pgmoved++; |
| 1281 | if (!pagevec_add(&pvec, page)) { | 790 | if (!pagevec_add(&pvec, page)) { |
| @@ -1301,8 +810,8 @@ refill_inactive_zone(struct zone *zone, struct scan_control *sc) | |||
| 1301 | while (!list_empty(&l_active)) { | 810 | while (!list_empty(&l_active)) { |
| 1302 | page = lru_to_page(&l_active); | 811 | page = lru_to_page(&l_active); |
| 1303 | prefetchw_prev_lru_page(page, &l_active, flags); | 812 | prefetchw_prev_lru_page(page, &l_active, flags); |
| 1304 | if (TestSetPageLRU(page)) | 813 | BUG_ON(PageLRU(page)); |
| 1305 | BUG(); | 814 | SetPageLRU(page); |
| 1306 | BUG_ON(!PageActive(page)); | 815 | BUG_ON(!PageActive(page)); |
| 1307 | list_move(&page->lru, &zone->active_list); | 816 | list_move(&page->lru, &zone->active_list); |
| 1308 | pgmoved++; | 817 | pgmoved++; |
| @@ -1327,11 +836,13 @@ refill_inactive_zone(struct zone *zone, struct scan_control *sc) | |||
| 1327 | /* | 836 | /* |
| 1328 | * This is a basic per-zone page freer. Used by both kswapd and direct reclaim. | 837 | * This is a basic per-zone page freer. Used by both kswapd and direct reclaim. |
| 1329 | */ | 838 | */ |
| 1330 | static void | 839 | static unsigned long shrink_zone(int priority, struct zone *zone, |
| 1331 | shrink_zone(struct zone *zone, struct scan_control *sc) | 840 | struct scan_control *sc) |
| 1332 | { | 841 | { |
| 1333 | unsigned long nr_active; | 842 | unsigned long nr_active; |
| 1334 | unsigned long nr_inactive; | 843 | unsigned long nr_inactive; |
| 844 | unsigned long nr_to_scan; | ||
| 845 | unsigned long nr_reclaimed = 0; | ||
| 1335 | 846 | ||
| 1336 | atomic_inc(&zone->reclaim_in_progress); | 847 | atomic_inc(&zone->reclaim_in_progress); |
| 1337 | 848 | ||
| @@ -1339,14 +850,14 @@ shrink_zone(struct zone *zone, struct scan_control *sc) | |||
| 1339 | * Add one to `nr_to_scan' just to make sure that the kernel will | 850 | * Add one to `nr_to_scan' just to make sure that the kernel will |
| 1340 | * slowly sift through the active list. | 851 | * slowly sift through the active list. |
| 1341 | */ | 852 | */ |
| 1342 | zone->nr_scan_active += (zone->nr_active >> sc->priority) + 1; | 853 | zone->nr_scan_active += (zone->nr_active >> priority) + 1; |
| 1343 | nr_active = zone->nr_scan_active; | 854 | nr_active = zone->nr_scan_active; |
| 1344 | if (nr_active >= sc->swap_cluster_max) | 855 | if (nr_active >= sc->swap_cluster_max) |
| 1345 | zone->nr_scan_active = 0; | 856 | zone->nr_scan_active = 0; |
| 1346 | else | 857 | else |
| 1347 | nr_active = 0; | 858 | nr_active = 0; |
| 1348 | 859 | ||
| 1349 | zone->nr_scan_inactive += (zone->nr_inactive >> sc->priority) + 1; | 860 | zone->nr_scan_inactive += (zone->nr_inactive >> priority) + 1; |
| 1350 | nr_inactive = zone->nr_scan_inactive; | 861 | nr_inactive = zone->nr_scan_inactive; |
| 1351 | if (nr_inactive >= sc->swap_cluster_max) | 862 | if (nr_inactive >= sc->swap_cluster_max) |
| 1352 | zone->nr_scan_inactive = 0; | 863 | zone->nr_scan_inactive = 0; |
| @@ -1355,23 +866,25 @@ shrink_zone(struct zone *zone, struct scan_control *sc) | |||
| 1355 | 866 | ||
| 1356 | while (nr_active || nr_inactive) { | 867 | while (nr_active || nr_inactive) { |
| 1357 | if (nr_active) { | 868 | if (nr_active) { |
| 1358 | sc->nr_to_scan = min(nr_active, | 869 | nr_to_scan = min(nr_active, |
| 1359 | (unsigned long)sc->swap_cluster_max); | 870 | (unsigned long)sc->swap_cluster_max); |
| 1360 | nr_active -= sc->nr_to_scan; | 871 | nr_active -= nr_to_scan; |
| 1361 | refill_inactive_zone(zone, sc); | 872 | shrink_active_list(nr_to_scan, zone, sc); |
| 1362 | } | 873 | } |
| 1363 | 874 | ||
| 1364 | if (nr_inactive) { | 875 | if (nr_inactive) { |
| 1365 | sc->nr_to_scan = min(nr_inactive, | 876 | nr_to_scan = min(nr_inactive, |
| 1366 | (unsigned long)sc->swap_cluster_max); | 877 | (unsigned long)sc->swap_cluster_max); |
| 1367 | nr_inactive -= sc->nr_to_scan; | 878 | nr_inactive -= nr_to_scan; |
| 1368 | shrink_cache(zone, sc); | 879 | nr_reclaimed += shrink_inactive_list(nr_to_scan, zone, |
| 880 | sc); | ||
| 1369 | } | 881 | } |
| 1370 | } | 882 | } |
| 1371 | 883 | ||
| 1372 | throttle_vm_writeout(); | 884 | throttle_vm_writeout(); |
| 1373 | 885 | ||
| 1374 | atomic_dec(&zone->reclaim_in_progress); | 886 | atomic_dec(&zone->reclaim_in_progress); |
| 887 | return nr_reclaimed; | ||
| 1375 | } | 888 | } |
| 1376 | 889 | ||
| 1377 | /* | 890 | /* |
| @@ -1390,9 +903,10 @@ shrink_zone(struct zone *zone, struct scan_control *sc) | |||
| 1390 | * If a zone is deemed to be full of pinned pages then just give it a light | 903 | * If a zone is deemed to be full of pinned pages then just give it a light |
| 1391 | * scan then give up on it. | 904 | * scan then give up on it. |
| 1392 | */ | 905 | */ |
| 1393 | static void | 906 | static unsigned long shrink_zones(int priority, struct zone **zones, |
| 1394 | shrink_caches(struct zone **zones, struct scan_control *sc) | 907 | struct scan_control *sc) |
| 1395 | { | 908 | { |
| 909 | unsigned long nr_reclaimed = 0; | ||
| 1396 | int i; | 910 | int i; |
| 1397 | 911 | ||
| 1398 | for (i = 0; zones[i] != NULL; i++) { | 912 | for (i = 0; zones[i] != NULL; i++) { |
| @@ -1404,15 +918,16 @@ shrink_caches(struct zone **zones, struct scan_control *sc) | |||
| 1404 | if (!cpuset_zone_allowed(zone, __GFP_HARDWALL)) | 918 | if (!cpuset_zone_allowed(zone, __GFP_HARDWALL)) |
| 1405 | continue; | 919 | continue; |
| 1406 | 920 | ||
| 1407 | zone->temp_priority = sc->priority; | 921 | zone->temp_priority = priority; |
| 1408 | if (zone->prev_priority > sc->priority) | 922 | if (zone->prev_priority > priority) |
| 1409 | zone->prev_priority = sc->priority; | 923 | zone->prev_priority = priority; |
| 1410 | 924 | ||
| 1411 | if (zone->all_unreclaimable && sc->priority != DEF_PRIORITY) | 925 | if (zone->all_unreclaimable && priority != DEF_PRIORITY) |
| 1412 | continue; /* Let kswapd poll it */ | 926 | continue; /* Let kswapd poll it */ |
| 1413 | 927 | ||
| 1414 | shrink_zone(zone, sc); | 928 | nr_reclaimed += shrink_zone(priority, zone, sc); |
| 1415 | } | 929 | } |
| 930 | return nr_reclaimed; | ||
| 1416 | } | 931 | } |
| 1417 | 932 | ||
| 1418 | /* | 933 | /* |
| @@ -1428,19 +943,21 @@ shrink_caches(struct zone **zones, struct scan_control *sc) | |||
| 1428 | * holds filesystem locks which prevent writeout this might not work, and the | 943 | * holds filesystem locks which prevent writeout this might not work, and the |
| 1429 | * allocation attempt will fail. | 944 | * allocation attempt will fail. |
| 1430 | */ | 945 | */ |
| 1431 | int try_to_free_pages(struct zone **zones, gfp_t gfp_mask) | 946 | unsigned long try_to_free_pages(struct zone **zones, gfp_t gfp_mask) |
| 1432 | { | 947 | { |
| 1433 | int priority; | 948 | int priority; |
| 1434 | int ret = 0; | 949 | int ret = 0; |
| 1435 | int total_scanned = 0, total_reclaimed = 0; | 950 | unsigned long total_scanned = 0; |
| 951 | unsigned long nr_reclaimed = 0; | ||
| 1436 | struct reclaim_state *reclaim_state = current->reclaim_state; | 952 | struct reclaim_state *reclaim_state = current->reclaim_state; |
| 1437 | struct scan_control sc; | ||
| 1438 | unsigned long lru_pages = 0; | 953 | unsigned long lru_pages = 0; |
| 1439 | int i; | 954 | int i; |
| 1440 | 955 | struct scan_control sc = { | |
| 1441 | sc.gfp_mask = gfp_mask; | 956 | .gfp_mask = gfp_mask, |
| 1442 | sc.may_writepage = !laptop_mode; | 957 | .may_writepage = !laptop_mode, |
| 1443 | sc.may_swap = 1; | 958 | .swap_cluster_max = SWAP_CLUSTER_MAX, |
| 959 | .may_swap = 1, | ||
| 960 | }; | ||
| 1444 | 961 | ||
| 1445 | inc_page_state(allocstall); | 962 | inc_page_state(allocstall); |
| 1446 | 963 | ||
| @@ -1457,20 +974,16 @@ int try_to_free_pages(struct zone **zones, gfp_t gfp_mask) | |||
| 1457 | for (priority = DEF_PRIORITY; priority >= 0; priority--) { | 974 | for (priority = DEF_PRIORITY; priority >= 0; priority--) { |
| 1458 | sc.nr_mapped = read_page_state(nr_mapped); | 975 | sc.nr_mapped = read_page_state(nr_mapped); |
| 1459 | sc.nr_scanned = 0; | 976 | sc.nr_scanned = 0; |
| 1460 | sc.nr_reclaimed = 0; | ||
| 1461 | sc.priority = priority; | ||
| 1462 | sc.swap_cluster_max = SWAP_CLUSTER_MAX; | ||
| 1463 | if (!priority) | 977 | if (!priority) |
| 1464 | disable_swap_token(); | 978 | disable_swap_token(); |
| 1465 | shrink_caches(zones, &sc); | 979 | nr_reclaimed += shrink_zones(priority, zones, &sc); |
| 1466 | shrink_slab(sc.nr_scanned, gfp_mask, lru_pages); | 980 | shrink_slab(sc.nr_scanned, gfp_mask, lru_pages); |
| 1467 | if (reclaim_state) { | 981 | if (reclaim_state) { |
| 1468 | sc.nr_reclaimed += reclaim_state->reclaimed_slab; | 982 | nr_reclaimed += reclaim_state->reclaimed_slab; |
| 1469 | reclaim_state->reclaimed_slab = 0; | 983 | reclaim_state->reclaimed_slab = 0; |
| 1470 | } | 984 | } |
| 1471 | total_scanned += sc.nr_scanned; | 985 | total_scanned += sc.nr_scanned; |
| 1472 | total_reclaimed += sc.nr_reclaimed; | 986 | if (nr_reclaimed >= sc.swap_cluster_max) { |
| 1473 | if (total_reclaimed >= sc.swap_cluster_max) { | ||
| 1474 | ret = 1; | 987 | ret = 1; |
| 1475 | goto out; | 988 | goto out; |
| 1476 | } | 989 | } |
| @@ -1482,7 +995,8 @@ int try_to_free_pages(struct zone **zones, gfp_t gfp_mask) | |||
| 1482 | * that's undesirable in laptop mode, where we *want* lumpy | 995 | * that's undesirable in laptop mode, where we *want* lumpy |
| 1483 | * writeout. So in laptop mode, write out the whole world. | 996 | * writeout. So in laptop mode, write out the whole world. |
| 1484 | */ | 997 | */ |
| 1485 | if (total_scanned > sc.swap_cluster_max + sc.swap_cluster_max/2) { | 998 | if (total_scanned > sc.swap_cluster_max + |
| 999 | sc.swap_cluster_max / 2) { | ||
| 1486 | wakeup_pdflush(laptop_mode ? 0 : total_scanned); | 1000 | wakeup_pdflush(laptop_mode ? 0 : total_scanned); |
| 1487 | sc.may_writepage = 1; | 1001 | sc.may_writepage = 1; |
| 1488 | } | 1002 | } |
| @@ -1528,22 +1042,26 @@ out: | |||
| 1528 | * the page allocator fallback scheme to ensure that aging of pages is balanced | 1042 | * the page allocator fallback scheme to ensure that aging of pages is balanced |
| 1529 | * across the zones. | 1043 | * across the zones. |
| 1530 | */ | 1044 | */ |
| 1531 | static int balance_pgdat(pg_data_t *pgdat, int nr_pages, int order) | 1045 | static unsigned long balance_pgdat(pg_data_t *pgdat, unsigned long nr_pages, |
| 1046 | int order) | ||
| 1532 | { | 1047 | { |
| 1533 | int to_free = nr_pages; | 1048 | unsigned long to_free = nr_pages; |
| 1534 | int all_zones_ok; | 1049 | int all_zones_ok; |
| 1535 | int priority; | 1050 | int priority; |
| 1536 | int i; | 1051 | int i; |
| 1537 | int total_scanned, total_reclaimed; | 1052 | unsigned long total_scanned; |
| 1053 | unsigned long nr_reclaimed; | ||
| 1538 | struct reclaim_state *reclaim_state = current->reclaim_state; | 1054 | struct reclaim_state *reclaim_state = current->reclaim_state; |
| 1539 | struct scan_control sc; | 1055 | struct scan_control sc = { |
| 1056 | .gfp_mask = GFP_KERNEL, | ||
| 1057 | .may_swap = 1, | ||
| 1058 | .swap_cluster_max = nr_pages ? nr_pages : SWAP_CLUSTER_MAX, | ||
| 1059 | }; | ||
| 1540 | 1060 | ||
| 1541 | loop_again: | 1061 | loop_again: |
| 1542 | total_scanned = 0; | 1062 | total_scanned = 0; |
| 1543 | total_reclaimed = 0; | 1063 | nr_reclaimed = 0; |
| 1544 | sc.gfp_mask = GFP_KERNEL; | 1064 | sc.may_writepage = !laptop_mode, |
| 1545 | sc.may_writepage = !laptop_mode; | ||
| 1546 | sc.may_swap = 1; | ||
| 1547 | sc.nr_mapped = read_page_state(nr_mapped); | 1065 | sc.nr_mapped = read_page_state(nr_mapped); |
| 1548 | 1066 | ||
| 1549 | inc_page_state(pageoutrun); | 1067 | inc_page_state(pageoutrun); |
| @@ -1624,15 +1142,11 @@ scan: | |||
| 1624 | if (zone->prev_priority > priority) | 1142 | if (zone->prev_priority > priority) |
| 1625 | zone->prev_priority = priority; | 1143 | zone->prev_priority = priority; |
| 1626 | sc.nr_scanned = 0; | 1144 | sc.nr_scanned = 0; |
| 1627 | sc.nr_reclaimed = 0; | 1145 | nr_reclaimed += shrink_zone(priority, zone, &sc); |
| 1628 | sc.priority = priority; | ||
| 1629 | sc.swap_cluster_max = nr_pages? nr_pages : SWAP_CLUSTER_MAX; | ||
| 1630 | shrink_zone(zone, &sc); | ||
| 1631 | reclaim_state->reclaimed_slab = 0; | 1146 | reclaim_state->reclaimed_slab = 0; |
| 1632 | nr_slab = shrink_slab(sc.nr_scanned, GFP_KERNEL, | 1147 | nr_slab = shrink_slab(sc.nr_scanned, GFP_KERNEL, |
| 1633 | lru_pages); | 1148 | lru_pages); |
| 1634 | sc.nr_reclaimed += reclaim_state->reclaimed_slab; | 1149 | nr_reclaimed += reclaim_state->reclaimed_slab; |
| 1635 | total_reclaimed += sc.nr_reclaimed; | ||
| 1636 | total_scanned += sc.nr_scanned; | 1150 | total_scanned += sc.nr_scanned; |
| 1637 | if (zone->all_unreclaimable) | 1151 | if (zone->all_unreclaimable) |
| 1638 | continue; | 1152 | continue; |
| @@ -1645,10 +1159,10 @@ scan: | |||
| 1645 | * even in laptop mode | 1159 | * even in laptop mode |
| 1646 | */ | 1160 | */ |
| 1647 | if (total_scanned > SWAP_CLUSTER_MAX * 2 && | 1161 | if (total_scanned > SWAP_CLUSTER_MAX * 2 && |
| 1648 | total_scanned > total_reclaimed+total_reclaimed/2) | 1162 | total_scanned > nr_reclaimed + nr_reclaimed / 2) |
| 1649 | sc.may_writepage = 1; | 1163 | sc.may_writepage = 1; |
| 1650 | } | 1164 | } |
| 1651 | if (nr_pages && to_free > total_reclaimed) | 1165 | if (nr_pages && to_free > nr_reclaimed) |
| 1652 | continue; /* swsusp: need to do more work */ | 1166 | continue; /* swsusp: need to do more work */ |
| 1653 | if (all_zones_ok) | 1167 | if (all_zones_ok) |
| 1654 | break; /* kswapd: all done */ | 1168 | break; /* kswapd: all done */ |
| @@ -1665,7 +1179,7 @@ scan: | |||
| 1665 | * matches the direct reclaim path behaviour in terms of impact | 1179 | * matches the direct reclaim path behaviour in terms of impact |
| 1666 | * on zone->*_priority. | 1180 | * on zone->*_priority. |
| 1667 | */ | 1181 | */ |
| 1668 | if ((total_reclaimed >= SWAP_CLUSTER_MAX) && (!nr_pages)) | 1182 | if ((nr_reclaimed >= SWAP_CLUSTER_MAX) && !nr_pages) |
| 1669 | break; | 1183 | break; |
| 1670 | } | 1184 | } |
| 1671 | out: | 1185 | out: |
| @@ -1679,7 +1193,7 @@ out: | |||
| 1679 | goto loop_again; | 1193 | goto loop_again; |
| 1680 | } | 1194 | } |
| 1681 | 1195 | ||
| 1682 | return total_reclaimed; | 1196 | return nr_reclaimed; |
| 1683 | } | 1197 | } |
| 1684 | 1198 | ||
| 1685 | /* | 1199 | /* |
| @@ -1779,24 +1293,31 @@ void wakeup_kswapd(struct zone *zone, int order) | |||
| 1779 | * Try to free `nr_pages' of memory, system-wide. Returns the number of freed | 1293 | * Try to free `nr_pages' of memory, system-wide. Returns the number of freed |
| 1780 | * pages. | 1294 | * pages. |
| 1781 | */ | 1295 | */ |
| 1782 | int shrink_all_memory(int nr_pages) | 1296 | unsigned long shrink_all_memory(unsigned long nr_pages) |
| 1783 | { | 1297 | { |
| 1784 | pg_data_t *pgdat; | 1298 | pg_data_t *pgdat; |
| 1785 | int nr_to_free = nr_pages; | 1299 | unsigned long nr_to_free = nr_pages; |
| 1786 | int ret = 0; | 1300 | unsigned long ret = 0; |
| 1301 | unsigned retry = 2; | ||
| 1787 | struct reclaim_state reclaim_state = { | 1302 | struct reclaim_state reclaim_state = { |
| 1788 | .reclaimed_slab = 0, | 1303 | .reclaimed_slab = 0, |
| 1789 | }; | 1304 | }; |
| 1790 | 1305 | ||
| 1791 | current->reclaim_state = &reclaim_state; | 1306 | current->reclaim_state = &reclaim_state; |
| 1307 | repeat: | ||
| 1792 | for_each_pgdat(pgdat) { | 1308 | for_each_pgdat(pgdat) { |
| 1793 | int freed; | 1309 | unsigned long freed; |
| 1310 | |||
| 1794 | freed = balance_pgdat(pgdat, nr_to_free, 0); | 1311 | freed = balance_pgdat(pgdat, nr_to_free, 0); |
| 1795 | ret += freed; | 1312 | ret += freed; |
| 1796 | nr_to_free -= freed; | 1313 | nr_to_free -= freed; |
| 1797 | if (nr_to_free <= 0) | 1314 | if ((long)nr_to_free <= 0) |
| 1798 | break; | 1315 | break; |
| 1799 | } | 1316 | } |
| 1317 | if (retry-- && ret < nr_pages) { | ||
| 1318 | blk_congestion_wait(WRITE, HZ/5); | ||
| 1319 | goto repeat; | ||
| 1320 | } | ||
| 1800 | current->reclaim_state = NULL; | 1321 | current->reclaim_state = NULL; |
| 1801 | return ret; | 1322 | return ret; |
| 1802 | } | 1323 | } |
| @@ -1808,8 +1329,7 @@ int shrink_all_memory(int nr_pages) | |||
| 1808 | away, we get changed to run anywhere: as the first one comes back, | 1329 | away, we get changed to run anywhere: as the first one comes back, |
| 1809 | restore their cpu bindings. */ | 1330 | restore their cpu bindings. */ |
| 1810 | static int __devinit cpu_callback(struct notifier_block *nfb, | 1331 | static int __devinit cpu_callback(struct notifier_block *nfb, |
| 1811 | unsigned long action, | 1332 | unsigned long action, void *hcpu) |
| 1812 | void *hcpu) | ||
| 1813 | { | 1333 | { |
| 1814 | pg_data_t *pgdat; | 1334 | pg_data_t *pgdat; |
| 1815 | cpumask_t mask; | 1335 | cpumask_t mask; |
| @@ -1829,10 +1349,15 @@ static int __devinit cpu_callback(struct notifier_block *nfb, | |||
| 1829 | static int __init kswapd_init(void) | 1349 | static int __init kswapd_init(void) |
| 1830 | { | 1350 | { |
| 1831 | pg_data_t *pgdat; | 1351 | pg_data_t *pgdat; |
| 1352 | |||
| 1832 | swap_setup(); | 1353 | swap_setup(); |
| 1833 | for_each_pgdat(pgdat) | 1354 | for_each_pgdat(pgdat) { |
| 1834 | pgdat->kswapd | 1355 | pid_t pid; |
| 1835 | = find_task_by_pid(kernel_thread(kswapd, pgdat, CLONE_KERNEL)); | 1356 | |
| 1357 | pid = kernel_thread(kswapd, pgdat, CLONE_KERNEL); | ||
| 1358 | BUG_ON(pid < 0); | ||
| 1359 | pgdat->kswapd = find_task_by_pid(pid); | ||
| 1360 | } | ||
| 1836 | total_memory = nr_free_pagecache_pages(); | 1361 | total_memory = nr_free_pagecache_pages(); |
| 1837 | hotcpu_notifier(cpu_callback, 0); | 1362 | hotcpu_notifier(cpu_callback, 0); |
| 1838 | return 0; | 1363 | return 0; |
| @@ -1874,46 +1399,24 @@ int zone_reclaim_interval __read_mostly = 30*HZ; | |||
| 1874 | /* | 1399 | /* |
| 1875 | * Try to free up some pages from this zone through reclaim. | 1400 | * Try to free up some pages from this zone through reclaim. |
| 1876 | */ | 1401 | */ |
| 1877 | int zone_reclaim(struct zone *zone, gfp_t gfp_mask, unsigned int order) | 1402 | static int __zone_reclaim(struct zone *zone, gfp_t gfp_mask, unsigned int order) |
| 1878 | { | 1403 | { |
| 1879 | int nr_pages; | 1404 | /* Minimum pages needed in order to stay on node */ |
| 1405 | const unsigned long nr_pages = 1 << order; | ||
| 1880 | struct task_struct *p = current; | 1406 | struct task_struct *p = current; |
| 1881 | struct reclaim_state reclaim_state; | 1407 | struct reclaim_state reclaim_state; |
| 1882 | struct scan_control sc; | 1408 | int priority; |
| 1883 | cpumask_t mask; | 1409 | unsigned long nr_reclaimed = 0; |
| 1884 | int node_id; | 1410 | struct scan_control sc = { |
| 1885 | 1411 | .may_writepage = !!(zone_reclaim_mode & RECLAIM_WRITE), | |
| 1886 | if (time_before(jiffies, | 1412 | .may_swap = !!(zone_reclaim_mode & RECLAIM_SWAP), |
| 1887 | zone->last_unsuccessful_zone_reclaim + zone_reclaim_interval)) | 1413 | .nr_mapped = read_page_state(nr_mapped), |
| 1888 | return 0; | 1414 | .swap_cluster_max = max_t(unsigned long, nr_pages, |
| 1889 | 1415 | SWAP_CLUSTER_MAX), | |
| 1890 | if (!(gfp_mask & __GFP_WAIT) || | 1416 | .gfp_mask = gfp_mask, |
| 1891 | zone->all_unreclaimable || | 1417 | }; |
| 1892 | atomic_read(&zone->reclaim_in_progress) > 0 || | ||
| 1893 | (p->flags & PF_MEMALLOC)) | ||
| 1894 | return 0; | ||
| 1895 | |||
| 1896 | node_id = zone->zone_pgdat->node_id; | ||
| 1897 | mask = node_to_cpumask(node_id); | ||
| 1898 | if (!cpus_empty(mask) && node_id != numa_node_id()) | ||
| 1899 | return 0; | ||
| 1900 | |||
| 1901 | sc.may_writepage = !!(zone_reclaim_mode & RECLAIM_WRITE); | ||
| 1902 | sc.may_swap = !!(zone_reclaim_mode & RECLAIM_SWAP); | ||
| 1903 | sc.nr_scanned = 0; | ||
| 1904 | sc.nr_reclaimed = 0; | ||
| 1905 | sc.priority = ZONE_RECLAIM_PRIORITY + 1; | ||
| 1906 | sc.nr_mapped = read_page_state(nr_mapped); | ||
| 1907 | sc.gfp_mask = gfp_mask; | ||
| 1908 | 1418 | ||
| 1909 | disable_swap_token(); | 1419 | disable_swap_token(); |
| 1910 | |||
| 1911 | nr_pages = 1 << order; | ||
| 1912 | if (nr_pages > SWAP_CLUSTER_MAX) | ||
| 1913 | sc.swap_cluster_max = nr_pages; | ||
| 1914 | else | ||
| 1915 | sc.swap_cluster_max = SWAP_CLUSTER_MAX; | ||
| 1916 | |||
| 1917 | cond_resched(); | 1420 | cond_resched(); |
| 1918 | /* | 1421 | /* |
| 1919 | * We need to be able to allocate from the reserves for RECLAIM_SWAP | 1422 | * We need to be able to allocate from the reserves for RECLAIM_SWAP |
| @@ -1928,17 +1431,20 @@ int zone_reclaim(struct zone *zone, gfp_t gfp_mask, unsigned int order) | |||
| 1928 | * Free memory by calling shrink zone with increasing priorities | 1431 | * Free memory by calling shrink zone with increasing priorities |
| 1929 | * until we have enough memory freed. | 1432 | * until we have enough memory freed. |
| 1930 | */ | 1433 | */ |
| 1434 | priority = ZONE_RECLAIM_PRIORITY; | ||
| 1931 | do { | 1435 | do { |
| 1932 | sc.priority--; | 1436 | nr_reclaimed += shrink_zone(priority, zone, &sc); |
| 1933 | shrink_zone(zone, &sc); | 1437 | priority--; |
| 1438 | } while (priority >= 0 && nr_reclaimed < nr_pages); | ||
| 1934 | 1439 | ||
| 1935 | } while (sc.nr_reclaimed < nr_pages && sc.priority > 0); | 1440 | if (nr_reclaimed < nr_pages && (zone_reclaim_mode & RECLAIM_SLAB)) { |
| 1936 | |||
| 1937 | if (sc.nr_reclaimed < nr_pages && (zone_reclaim_mode & RECLAIM_SLAB)) { | ||
| 1938 | /* | 1441 | /* |
| 1939 | * shrink_slab does not currently allow us to determine | 1442 | * shrink_slab() does not currently allow us to determine how |
| 1940 | * how many pages were freed in the zone. So we just | 1443 | * many pages were freed in this zone. So we just shake the slab |
| 1941 | * shake the slab and then go offnode for a single allocation. | 1444 | * a bit and then go off node for this particular allocation |
| 1445 | * despite possibly having freed enough memory to allocate in | ||
| 1446 | * this zone. If we freed local memory then the next | ||
| 1447 | * allocations will be local again. | ||
| 1942 | * | 1448 | * |
| 1943 | * shrink_slab will free memory on all zones and may take | 1449 | * shrink_slab will free memory on all zones and may take |
| 1944 | * a long time. | 1450 | * a long time. |
| @@ -1949,10 +1455,54 @@ int zone_reclaim(struct zone *zone, gfp_t gfp_mask, unsigned int order) | |||
| 1949 | p->reclaim_state = NULL; | 1455 | p->reclaim_state = NULL; |
| 1950 | current->flags &= ~(PF_MEMALLOC | PF_SWAPWRITE); | 1456 | current->flags &= ~(PF_MEMALLOC | PF_SWAPWRITE); |
| 1951 | 1457 | ||
| 1952 | if (sc.nr_reclaimed == 0) | 1458 | if (nr_reclaimed == 0) { |
| 1459 | /* | ||
| 1460 | * We were unable to reclaim enough pages to stay on node. We | ||
| 1461 | * now allow off node accesses for a certain time period before | ||
| 1462 | * trying again to reclaim pages from the local zone. | ||
| 1463 | */ | ||
| 1953 | zone->last_unsuccessful_zone_reclaim = jiffies; | 1464 | zone->last_unsuccessful_zone_reclaim = jiffies; |
| 1465 | } | ||
| 1954 | 1466 | ||
| 1955 | return sc.nr_reclaimed >= nr_pages; | 1467 | return nr_reclaimed >= nr_pages; |
| 1956 | } | 1468 | } |
| 1957 | #endif | ||
| 1958 | 1469 | ||
| 1470 | int zone_reclaim(struct zone *zone, gfp_t gfp_mask, unsigned int order) | ||
| 1471 | { | ||
| 1472 | cpumask_t mask; | ||
| 1473 | int node_id; | ||
| 1474 | |||
| 1475 | /* | ||
| 1476 | * Do not reclaim if there was a recent unsuccessful attempt at zone | ||
| 1477 | * reclaim. In that case we let allocations go off node for the | ||
| 1478 | * zone_reclaim_interval. Otherwise we would scan for each off-node | ||
| 1479 | * page allocation. | ||
| 1480 | */ | ||
| 1481 | if (time_before(jiffies, | ||
| 1482 | zone->last_unsuccessful_zone_reclaim + zone_reclaim_interval)) | ||
| 1483 | return 0; | ||
| 1484 | |||
| 1485 | /* | ||
| 1486 | * Avoid concurrent zone reclaims, do not reclaim in a zone that does | ||
| 1487 | * not have reclaimable pages and if we should not delay the allocation | ||
| 1488 | * then do not scan. | ||
| 1489 | */ | ||
| 1490 | if (!(gfp_mask & __GFP_WAIT) || | ||
| 1491 | zone->all_unreclaimable || | ||
| 1492 | atomic_read(&zone->reclaim_in_progress) > 0 || | ||
| 1493 | (current->flags & PF_MEMALLOC)) | ||
| 1494 | return 0; | ||
| 1495 | |||
| 1496 | /* | ||
| 1497 | * Only run zone reclaim on the local zone or on zones that do not | ||
| 1498 | * have associated processors. This will favor the local processor | ||
| 1499 | * over remote processors and spread off node memory allocations | ||
| 1500 | * as wide as possible. | ||
| 1501 | */ | ||
| 1502 | node_id = zone->zone_pgdat->node_id; | ||
| 1503 | mask = node_to_cpumask(node_id); | ||
| 1504 | if (!cpus_empty(mask) && node_id != numa_node_id()) | ||
| 1505 | return 0; | ||
| 1506 | return __zone_reclaim(zone, gfp_mask, order); | ||
| 1507 | } | ||
| 1508 | #endif | ||
diff --git a/security/keys/process_keys.c b/security/keys/process_keys.c index 74cb79eb917e..f6940618e345 100644 --- a/security/keys/process_keys.c +++ b/security/keys/process_keys.c | |||
| @@ -16,11 +16,12 @@ | |||
| 16 | #include <linux/keyctl.h> | 16 | #include <linux/keyctl.h> |
| 17 | #include <linux/fs.h> | 17 | #include <linux/fs.h> |
| 18 | #include <linux/err.h> | 18 | #include <linux/err.h> |
| 19 | #include <linux/mutex.h> | ||
| 19 | #include <asm/uaccess.h> | 20 | #include <asm/uaccess.h> |
| 20 | #include "internal.h" | 21 | #include "internal.h" |
| 21 | 22 | ||
| 22 | /* session keyring create vs join semaphore */ | 23 | /* session keyring create vs join semaphore */ |
| 23 | static DECLARE_MUTEX(key_session_sem); | 24 | static DEFINE_MUTEX(key_session_mutex); |
| 24 | 25 | ||
| 25 | /* the root user's tracking struct */ | 26 | /* the root user's tracking struct */ |
| 26 | struct key_user root_key_user = { | 27 | struct key_user root_key_user = { |
| @@ -711,7 +712,7 @@ long join_session_keyring(const char *name) | |||
| 711 | } | 712 | } |
| 712 | 713 | ||
| 713 | /* allow the user to join or create a named keyring */ | 714 | /* allow the user to join or create a named keyring */ |
| 714 | down(&key_session_sem); | 715 | mutex_lock(&key_session_mutex); |
| 715 | 716 | ||
| 716 | /* look for an existing keyring of this name */ | 717 | /* look for an existing keyring of this name */ |
| 717 | keyring = find_keyring_by_name(name, 0); | 718 | keyring = find_keyring_by_name(name, 0); |
| @@ -737,7 +738,7 @@ long join_session_keyring(const char *name) | |||
| 737 | key_put(keyring); | 738 | key_put(keyring); |
| 738 | 739 | ||
| 739 | error2: | 740 | error2: |
| 740 | up(&key_session_sem); | 741 | mutex_unlock(&key_session_mutex); |
| 741 | error: | 742 | error: |
| 742 | return ret; | 743 | return ret; |
| 743 | 744 | ||
diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c index 5b16196f2823..ccaf988f3729 100644 --- a/security/selinux/hooks.c +++ b/security/selinux/hooks.c | |||
| @@ -117,6 +117,8 @@ static struct security_operations *secondary_ops = NULL; | |||
| 117 | static LIST_HEAD(superblock_security_head); | 117 | static LIST_HEAD(superblock_security_head); |
| 118 | static DEFINE_SPINLOCK(sb_security_lock); | 118 | static DEFINE_SPINLOCK(sb_security_lock); |
| 119 | 119 | ||
| 120 | static kmem_cache_t *sel_inode_cache; | ||
| 121 | |||
| 120 | /* Allocate and free functions for each kind of security blob. */ | 122 | /* Allocate and free functions for each kind of security blob. */ |
| 121 | 123 | ||
| 122 | static int task_alloc_security(struct task_struct *task) | 124 | static int task_alloc_security(struct task_struct *task) |
| @@ -146,10 +148,11 @@ static int inode_alloc_security(struct inode *inode) | |||
| 146 | struct task_security_struct *tsec = current->security; | 148 | struct task_security_struct *tsec = current->security; |
| 147 | struct inode_security_struct *isec; | 149 | struct inode_security_struct *isec; |
| 148 | 150 | ||
| 149 | isec = kzalloc(sizeof(struct inode_security_struct), GFP_KERNEL); | 151 | isec = kmem_cache_alloc(sel_inode_cache, SLAB_KERNEL); |
| 150 | if (!isec) | 152 | if (!isec) |
| 151 | return -ENOMEM; | 153 | return -ENOMEM; |
| 152 | 154 | ||
| 155 | memset(isec, 0, sizeof(*isec)); | ||
| 153 | init_MUTEX(&isec->sem); | 156 | init_MUTEX(&isec->sem); |
| 154 | INIT_LIST_HEAD(&isec->list); | 157 | INIT_LIST_HEAD(&isec->list); |
| 155 | isec->inode = inode; | 158 | isec->inode = inode; |
| @@ -172,7 +175,7 @@ static void inode_free_security(struct inode *inode) | |||
| 172 | spin_unlock(&sbsec->isec_lock); | 175 | spin_unlock(&sbsec->isec_lock); |
| 173 | 176 | ||
| 174 | inode->i_security = NULL; | 177 | inode->i_security = NULL; |
| 175 | kfree(isec); | 178 | kmem_cache_free(sel_inode_cache, isec); |
| 176 | } | 179 | } |
| 177 | 180 | ||
| 178 | static int file_alloc_security(struct file *file) | 181 | static int file_alloc_security(struct file *file) |
| @@ -1929,7 +1932,6 @@ static int selinux_inode_init_security(struct inode *inode, struct inode *dir, | |||
| 1929 | struct task_security_struct *tsec; | 1932 | struct task_security_struct *tsec; |
| 1930 | struct inode_security_struct *dsec; | 1933 | struct inode_security_struct *dsec; |
| 1931 | struct superblock_security_struct *sbsec; | 1934 | struct superblock_security_struct *sbsec; |
| 1932 | struct inode_security_struct *isec; | ||
| 1933 | u32 newsid, clen; | 1935 | u32 newsid, clen; |
| 1934 | int rc; | 1936 | int rc; |
| 1935 | char *namep = NULL, *context; | 1937 | char *namep = NULL, *context; |
| @@ -1937,7 +1939,6 @@ static int selinux_inode_init_security(struct inode *inode, struct inode *dir, | |||
| 1937 | tsec = current->security; | 1939 | tsec = current->security; |
| 1938 | dsec = dir->i_security; | 1940 | dsec = dir->i_security; |
| 1939 | sbsec = dir->i_sb->s_security; | 1941 | sbsec = dir->i_sb->s_security; |
| 1940 | isec = inode->i_security; | ||
| 1941 | 1942 | ||
| 1942 | if (tsec->create_sid && sbsec->behavior != SECURITY_FS_USE_MNTPOINT) { | 1943 | if (tsec->create_sid && sbsec->behavior != SECURITY_FS_USE_MNTPOINT) { |
| 1943 | newsid = tsec->create_sid; | 1944 | newsid = tsec->create_sid; |
| @@ -1957,7 +1958,7 @@ static int selinux_inode_init_security(struct inode *inode, struct inode *dir, | |||
| 1957 | 1958 | ||
| 1958 | inode_security_set_sid(inode, newsid); | 1959 | inode_security_set_sid(inode, newsid); |
| 1959 | 1960 | ||
| 1960 | if (sbsec->behavior == SECURITY_FS_USE_MNTPOINT) | 1961 | if (!ss_initialized || sbsec->behavior == SECURITY_FS_USE_MNTPOINT) |
| 1961 | return -EOPNOTSUPP; | 1962 | return -EOPNOTSUPP; |
| 1962 | 1963 | ||
| 1963 | if (name) { | 1964 | if (name) { |
| @@ -4408,6 +4409,9 @@ static __init int selinux_init(void) | |||
| 4408 | tsec = current->security; | 4409 | tsec = current->security; |
| 4409 | tsec->osid = tsec->sid = SECINITSID_KERNEL; | 4410 | tsec->osid = tsec->sid = SECINITSID_KERNEL; |
| 4410 | 4411 | ||
| 4412 | sel_inode_cache = kmem_cache_create("selinux_inode_security", | ||
| 4413 | sizeof(struct inode_security_struct), | ||
| 4414 | 0, SLAB_PANIC, NULL, NULL); | ||
| 4411 | avc_init(); | 4415 | avc_init(); |
| 4412 | 4416 | ||
| 4413 | original_ops = secondary_ops = security_ops; | 4417 | original_ops = secondary_ops = security_ops; |
diff --git a/security/selinux/selinuxfs.c b/security/selinux/selinuxfs.c index b5fa02d17b1e..f5d78365488f 100644 --- a/security/selinux/selinuxfs.c +++ b/security/selinux/selinuxfs.c | |||
| @@ -15,6 +15,7 @@ | |||
| 15 | #include <linux/slab.h> | 15 | #include <linux/slab.h> |
| 16 | #include <linux/vmalloc.h> | 16 | #include <linux/vmalloc.h> |
| 17 | #include <linux/fs.h> | 17 | #include <linux/fs.h> |
| 18 | #include <linux/mutex.h> | ||
| 18 | #include <linux/init.h> | 19 | #include <linux/init.h> |
| 19 | #include <linux/string.h> | 20 | #include <linux/string.h> |
| 20 | #include <linux/security.h> | 21 | #include <linux/security.h> |
| @@ -44,7 +45,7 @@ static int __init checkreqprot_setup(char *str) | |||
| 44 | __setup("checkreqprot=", checkreqprot_setup); | 45 | __setup("checkreqprot=", checkreqprot_setup); |
| 45 | 46 | ||
| 46 | 47 | ||
| 47 | static DECLARE_MUTEX(sel_sem); | 48 | static DEFINE_MUTEX(sel_mutex); |
| 48 | 49 | ||
| 49 | /* global data for booleans */ | 50 | /* global data for booleans */ |
| 50 | static struct dentry *bool_dir = NULL; | 51 | static struct dentry *bool_dir = NULL; |
| @@ -230,7 +231,7 @@ static ssize_t sel_write_load(struct file * file, const char __user * buf, | |||
| 230 | ssize_t length; | 231 | ssize_t length; |
| 231 | void *data = NULL; | 232 | void *data = NULL; |
| 232 | 233 | ||
| 233 | down(&sel_sem); | 234 | mutex_lock(&sel_mutex); |
| 234 | 235 | ||
| 235 | length = task_has_security(current, SECURITY__LOAD_POLICY); | 236 | length = task_has_security(current, SECURITY__LOAD_POLICY); |
| 236 | if (length) | 237 | if (length) |
| @@ -262,7 +263,7 @@ static ssize_t sel_write_load(struct file * file, const char __user * buf, | |||
| 262 | else | 263 | else |
| 263 | length = count; | 264 | length = count; |
| 264 | out: | 265 | out: |
| 265 | up(&sel_sem); | 266 | mutex_unlock(&sel_mutex); |
| 266 | vfree(data); | 267 | vfree(data); |
| 267 | return length; | 268 | return length; |
| 268 | } | 269 | } |
| @@ -709,12 +710,11 @@ static ssize_t sel_read_bool(struct file *filep, char __user *buf, | |||
| 709 | { | 710 | { |
| 710 | char *page = NULL; | 711 | char *page = NULL; |
| 711 | ssize_t length; | 712 | ssize_t length; |
| 712 | ssize_t end; | ||
| 713 | ssize_t ret; | 713 | ssize_t ret; |
| 714 | int cur_enforcing; | 714 | int cur_enforcing; |
| 715 | struct inode *inode; | 715 | struct inode *inode; |
| 716 | 716 | ||
| 717 | down(&sel_sem); | 717 | mutex_lock(&sel_mutex); |
| 718 | 718 | ||
| 719 | ret = -EFAULT; | 719 | ret = -EFAULT; |
| 720 | 720 | ||
| @@ -740,26 +740,9 @@ static ssize_t sel_read_bool(struct file *filep, char __user *buf, | |||
| 740 | 740 | ||
| 741 | length = scnprintf(page, PAGE_SIZE, "%d %d", cur_enforcing, | 741 | length = scnprintf(page, PAGE_SIZE, "%d %d", cur_enforcing, |
| 742 | bool_pending_values[inode->i_ino - BOOL_INO_OFFSET]); | 742 | bool_pending_values[inode->i_ino - BOOL_INO_OFFSET]); |
| 743 | if (length < 0) { | 743 | ret = simple_read_from_buffer(buf, count, ppos, page, length); |
| 744 | ret = length; | ||
| 745 | goto out; | ||
| 746 | } | ||
| 747 | |||
| 748 | if (*ppos >= length) { | ||
| 749 | ret = 0; | ||
| 750 | goto out; | ||
| 751 | } | ||
| 752 | if (count + *ppos > length) | ||
| 753 | count = length - *ppos; | ||
| 754 | end = count + *ppos; | ||
| 755 | if (copy_to_user(buf, (char *) page + *ppos, count)) { | ||
| 756 | ret = -EFAULT; | ||
| 757 | goto out; | ||
| 758 | } | ||
| 759 | *ppos = end; | ||
| 760 | ret = count; | ||
| 761 | out: | 744 | out: |
| 762 | up(&sel_sem); | 745 | mutex_unlock(&sel_mutex); |
| 763 | if (page) | 746 | if (page) |
| 764 | free_page((unsigned long)page); | 747 | free_page((unsigned long)page); |
| 765 | return ret; | 748 | return ret; |
| @@ -773,7 +756,7 @@ static ssize_t sel_write_bool(struct file *filep, const char __user *buf, | |||
| 773 | int new_value; | 756 | int new_value; |
| 774 | struct inode *inode; | 757 | struct inode *inode; |
| 775 | 758 | ||
| 776 | down(&sel_sem); | 759 | mutex_lock(&sel_mutex); |
| 777 | 760 | ||
| 778 | length = task_has_security(current, SECURITY__SETBOOL); | 761 | length = task_has_security(current, SECURITY__SETBOOL); |
| 779 | if (length) | 762 | if (length) |
| @@ -812,7 +795,7 @@ static ssize_t sel_write_bool(struct file *filep, const char __user *buf, | |||
| 812 | length = count; | 795 | length = count; |
| 813 | 796 | ||
| 814 | out: | 797 | out: |
| 815 | up(&sel_sem); | 798 | mutex_unlock(&sel_mutex); |
| 816 | if (page) | 799 | if (page) |
| 817 | free_page((unsigned long) page); | 800 | free_page((unsigned long) page); |
| 818 | return length; | 801 | return length; |
| @@ -831,7 +814,7 @@ static ssize_t sel_commit_bools_write(struct file *filep, | |||
| 831 | ssize_t length = -EFAULT; | 814 | ssize_t length = -EFAULT; |
| 832 | int new_value; | 815 | int new_value; |
| 833 | 816 | ||
| 834 | down(&sel_sem); | 817 | mutex_lock(&sel_mutex); |
| 835 | 818 | ||
| 836 | length = task_has_security(current, SECURITY__SETBOOL); | 819 | length = task_has_security(current, SECURITY__SETBOOL); |
| 837 | if (length) | 820 | if (length) |
| @@ -869,7 +852,7 @@ static ssize_t sel_commit_bools_write(struct file *filep, | |||
| 869 | length = count; | 852 | length = count; |
| 870 | 853 | ||
| 871 | out: | 854 | out: |
| 872 | up(&sel_sem); | 855 | mutex_unlock(&sel_mutex); |
| 873 | if (page) | 856 | if (page) |
| 874 | free_page((unsigned long) page); | 857 | free_page((unsigned long) page); |
| 875 | return length; | 858 | return length; |
| @@ -987,7 +970,7 @@ out: | |||
| 987 | return ret; | 970 | return ret; |
| 988 | err: | 971 | err: |
| 989 | kfree(values); | 972 | kfree(values); |
| 990 | d_genocide(dir); | 973 | sel_remove_bools(dir); |
| 991 | ret = -ENOMEM; | 974 | ret = -ENOMEM; |
| 992 | goto out; | 975 | goto out; |
| 993 | } | 976 | } |
| @@ -1168,37 +1151,38 @@ static int sel_make_avc_files(struct dentry *dir) | |||
| 1168 | dentry = d_alloc_name(dir, files[i].name); | 1151 | dentry = d_alloc_name(dir, files[i].name); |
| 1169 | if (!dentry) { | 1152 | if (!dentry) { |
| 1170 | ret = -ENOMEM; | 1153 | ret = -ENOMEM; |
| 1171 | goto err; | 1154 | goto out; |
| 1172 | } | 1155 | } |
| 1173 | 1156 | ||
| 1174 | inode = sel_make_inode(dir->d_sb, S_IFREG|files[i].mode); | 1157 | inode = sel_make_inode(dir->d_sb, S_IFREG|files[i].mode); |
| 1175 | if (!inode) { | 1158 | if (!inode) { |
| 1176 | ret = -ENOMEM; | 1159 | ret = -ENOMEM; |
| 1177 | goto err; | 1160 | goto out; |
| 1178 | } | 1161 | } |
| 1179 | inode->i_fop = files[i].ops; | 1162 | inode->i_fop = files[i].ops; |
| 1180 | d_add(dentry, inode); | 1163 | d_add(dentry, inode); |
| 1181 | } | 1164 | } |
| 1182 | out: | 1165 | out: |
| 1183 | return ret; | 1166 | return ret; |
| 1184 | err: | ||
| 1185 | d_genocide(dir); | ||
| 1186 | goto out; | ||
| 1187 | } | 1167 | } |
| 1188 | 1168 | ||
| 1189 | static int sel_make_dir(struct super_block *sb, struct dentry *dentry) | 1169 | static int sel_make_dir(struct inode *dir, struct dentry *dentry) |
| 1190 | { | 1170 | { |
| 1191 | int ret = 0; | 1171 | int ret = 0; |
| 1192 | struct inode *inode; | 1172 | struct inode *inode; |
| 1193 | 1173 | ||
| 1194 | inode = sel_make_inode(sb, S_IFDIR | S_IRUGO | S_IXUGO); | 1174 | inode = sel_make_inode(dir->i_sb, S_IFDIR | S_IRUGO | S_IXUGO); |
| 1195 | if (!inode) { | 1175 | if (!inode) { |
| 1196 | ret = -ENOMEM; | 1176 | ret = -ENOMEM; |
| 1197 | goto out; | 1177 | goto out; |
| 1198 | } | 1178 | } |
| 1199 | inode->i_op = &simple_dir_inode_operations; | 1179 | inode->i_op = &simple_dir_inode_operations; |
| 1200 | inode->i_fop = &simple_dir_operations; | 1180 | inode->i_fop = &simple_dir_operations; |
| 1181 | /* directory inodes start off with i_nlink == 2 (for "." entry) */ | ||
| 1182 | inode->i_nlink++; | ||
| 1201 | d_add(dentry, inode); | 1183 | d_add(dentry, inode); |
| 1184 | /* bump link count on parent directory, too */ | ||
| 1185 | dir->i_nlink++; | ||
| 1202 | out: | 1186 | out: |
| 1203 | return ret; | 1187 | return ret; |
| 1204 | } | 1188 | } |
| @@ -1207,7 +1191,7 @@ static int sel_fill_super(struct super_block * sb, void * data, int silent) | |||
| 1207 | { | 1191 | { |
| 1208 | int ret; | 1192 | int ret; |
| 1209 | struct dentry *dentry; | 1193 | struct dentry *dentry; |
| 1210 | struct inode *inode; | 1194 | struct inode *inode, *root_inode; |
| 1211 | struct inode_security_struct *isec; | 1195 | struct inode_security_struct *isec; |
| 1212 | 1196 | ||
| 1213 | static struct tree_descr selinux_files[] = { | 1197 | static struct tree_descr selinux_files[] = { |
| @@ -1228,30 +1212,33 @@ static int sel_fill_super(struct super_block * sb, void * data, int silent) | |||
| 1228 | }; | 1212 | }; |
| 1229 | ret = simple_fill_super(sb, SELINUX_MAGIC, selinux_files); | 1213 | ret = simple_fill_super(sb, SELINUX_MAGIC, selinux_files); |
| 1230 | if (ret) | 1214 | if (ret) |
| 1231 | return ret; | 1215 | goto err; |
| 1216 | |||
| 1217 | root_inode = sb->s_root->d_inode; | ||
| 1232 | 1218 | ||
| 1233 | dentry = d_alloc_name(sb->s_root, BOOL_DIR_NAME); | 1219 | dentry = d_alloc_name(sb->s_root, BOOL_DIR_NAME); |
| 1234 | if (!dentry) | 1220 | if (!dentry) { |
| 1235 | return -ENOMEM; | 1221 | ret = -ENOMEM; |
| 1222 | goto err; | ||
| 1223 | } | ||
| 1236 | 1224 | ||
| 1237 | inode = sel_make_inode(sb, S_IFDIR | S_IRUGO | S_IXUGO); | 1225 | ret = sel_make_dir(root_inode, dentry); |
| 1238 | if (!inode) | ||
| 1239 | goto out; | ||
| 1240 | inode->i_op = &simple_dir_inode_operations; | ||
| 1241 | inode->i_fop = &simple_dir_operations; | ||
| 1242 | d_add(dentry, inode); | ||
| 1243 | bool_dir = dentry; | ||
| 1244 | ret = sel_make_bools(); | ||
| 1245 | if (ret) | 1226 | if (ret) |
| 1246 | goto out; | 1227 | goto err; |
| 1228 | |||
| 1229 | bool_dir = dentry; | ||
| 1247 | 1230 | ||
| 1248 | dentry = d_alloc_name(sb->s_root, NULL_FILE_NAME); | 1231 | dentry = d_alloc_name(sb->s_root, NULL_FILE_NAME); |
| 1249 | if (!dentry) | 1232 | if (!dentry) { |
| 1250 | return -ENOMEM; | 1233 | ret = -ENOMEM; |
| 1234 | goto err; | ||
| 1235 | } | ||
| 1251 | 1236 | ||
| 1252 | inode = sel_make_inode(sb, S_IFCHR | S_IRUGO | S_IWUGO); | 1237 | inode = sel_make_inode(sb, S_IFCHR | S_IRUGO | S_IWUGO); |
| 1253 | if (!inode) | 1238 | if (!inode) { |
| 1254 | goto out; | 1239 | ret = -ENOMEM; |
| 1240 | goto err; | ||
| 1241 | } | ||
| 1255 | isec = (struct inode_security_struct*)inode->i_security; | 1242 | isec = (struct inode_security_struct*)inode->i_security; |
| 1256 | isec->sid = SECINITSID_DEVNULL; | 1243 | isec->sid = SECINITSID_DEVNULL; |
| 1257 | isec->sclass = SECCLASS_CHR_FILE; | 1244 | isec->sclass = SECCLASS_CHR_FILE; |
| @@ -1262,22 +1249,23 @@ static int sel_fill_super(struct super_block * sb, void * data, int silent) | |||
| 1262 | selinux_null = dentry; | 1249 | selinux_null = dentry; |
| 1263 | 1250 | ||
| 1264 | dentry = d_alloc_name(sb->s_root, "avc"); | 1251 | dentry = d_alloc_name(sb->s_root, "avc"); |
| 1265 | if (!dentry) | 1252 | if (!dentry) { |
| 1266 | return -ENOMEM; | 1253 | ret = -ENOMEM; |
| 1254 | goto err; | ||
| 1255 | } | ||
| 1267 | 1256 | ||
| 1268 | ret = sel_make_dir(sb, dentry); | 1257 | ret = sel_make_dir(root_inode, dentry); |
| 1269 | if (ret) | 1258 | if (ret) |
| 1270 | goto out; | 1259 | goto err; |
| 1271 | 1260 | ||
| 1272 | ret = sel_make_avc_files(dentry); | 1261 | ret = sel_make_avc_files(dentry); |
| 1273 | if (ret) | 1262 | if (ret) |
| 1274 | goto out; | 1263 | goto err; |
| 1275 | |||
| 1276 | return 0; | ||
| 1277 | out: | 1264 | out: |
| 1278 | dput(dentry); | 1265 | return ret; |
| 1266 | err: | ||
| 1279 | printk(KERN_ERR "%s: failed while creating inodes\n", __FUNCTION__); | 1267 | printk(KERN_ERR "%s: failed while creating inodes\n", __FUNCTION__); |
| 1280 | return -ENOMEM; | 1268 | goto out; |
| 1281 | } | 1269 | } |
| 1282 | 1270 | ||
| 1283 | static struct super_block *sel_get_sb(struct file_system_type *fs_type, | 1271 | static struct super_block *sel_get_sb(struct file_system_type *fs_type, |
diff --git a/security/selinux/ss/services.c b/security/selinux/ss/services.c index 8a764928ff4b..63e0b7f29cb5 100644 --- a/security/selinux/ss/services.c +++ b/security/selinux/ss/services.c | |||
| @@ -27,7 +27,8 @@ | |||
| 27 | #include <linux/in.h> | 27 | #include <linux/in.h> |
| 28 | #include <linux/sched.h> | 28 | #include <linux/sched.h> |
| 29 | #include <linux/audit.h> | 29 | #include <linux/audit.h> |
| 30 | #include <asm/semaphore.h> | 30 | #include <linux/mutex.h> |
| 31 | |||
| 31 | #include "flask.h" | 32 | #include "flask.h" |
| 32 | #include "avc.h" | 33 | #include "avc.h" |
| 33 | #include "avc_ss.h" | 34 | #include "avc_ss.h" |
| @@ -48,9 +49,9 @@ static DEFINE_RWLOCK(policy_rwlock); | |||
| 48 | #define POLICY_RDUNLOCK read_unlock(&policy_rwlock) | 49 | #define POLICY_RDUNLOCK read_unlock(&policy_rwlock) |
| 49 | #define POLICY_WRUNLOCK write_unlock_irq(&policy_rwlock) | 50 | #define POLICY_WRUNLOCK write_unlock_irq(&policy_rwlock) |
| 50 | 51 | ||
| 51 | static DECLARE_MUTEX(load_sem); | 52 | static DEFINE_MUTEX(load_mutex); |
| 52 | #define LOAD_LOCK down(&load_sem) | 53 | #define LOAD_LOCK mutex_lock(&load_mutex) |
| 53 | #define LOAD_UNLOCK up(&load_sem) | 54 | #define LOAD_UNLOCK mutex_unlock(&load_mutex) |
| 54 | 55 | ||
| 55 | static struct sidtab sidtab; | 56 | static struct sidtab sidtab; |
| 56 | struct policydb policydb; | 57 | struct policydb policydb; |
