diff options
83 files changed, 1944 insertions, 739 deletions
diff --git a/Documentation/networking/ip-sysctl.txt b/Documentation/networking/ip-sysctl.txt index 17f1f91af35c..946b66e1b652 100644 --- a/Documentation/networking/ip-sysctl.txt +++ b/Documentation/networking/ip-sysctl.txt | |||
@@ -148,9 +148,9 @@ tcp_available_congestion_control - STRING | |||
148 | but not loaded. | 148 | but not loaded. |
149 | 149 | ||
150 | tcp_base_mss - INTEGER | 150 | tcp_base_mss - INTEGER |
151 | The initial value of search_low to be used by Packetization Layer | 151 | The initial value of search_low to be used by the packetization layer |
152 | Path MTU Discovery (MTU probing). If MTU probing is enabled, | 152 | Path MTU discovery (MTU probing). If MTU probing is enabled, |
153 | this is the inital MSS used by the connection. | 153 | this is the initial MSS used by the connection. |
154 | 154 | ||
155 | tcp_congestion_control - STRING | 155 | tcp_congestion_control - STRING |
156 | Set the congestion control algorithm to be used for new | 156 | Set the congestion control algorithm to be used for new |
@@ -185,10 +185,9 @@ tcp_frto - INTEGER | |||
185 | timeouts. It is particularly beneficial in wireless environments | 185 | timeouts. It is particularly beneficial in wireless environments |
186 | where packet loss is typically due to random radio interference | 186 | where packet loss is typically due to random radio interference |
187 | rather than intermediate router congestion. F-RTO is sender-side | 187 | rather than intermediate router congestion. F-RTO is sender-side |
188 | only modification. Therefore it does not require any support from | 188 | only modification. Therefore it does not require any support from |
189 | the peer, but in a typical case, however, where wireless link is | 189 | the peer. |
190 | the local access link and most of the data flows downlink, the | 190 | |
191 | faraway servers should have F-RTO enabled to take advantage of it. | ||
192 | If set to 1, basic version is enabled. 2 enables SACK enhanced | 191 | If set to 1, basic version is enabled. 2 enables SACK enhanced |
193 | F-RTO if flow uses SACK. The basic version can be used also when | 192 | F-RTO if flow uses SACK. The basic version can be used also when |
194 | SACK is in use though scenario(s) with it exists where F-RTO | 193 | SACK is in use though scenario(s) with it exists where F-RTO |
@@ -276,7 +275,7 @@ tcp_mem - vector of 3 INTEGERs: min, pressure, max | |||
276 | memory. | 275 | memory. |
277 | 276 | ||
278 | tcp_moderate_rcvbuf - BOOLEAN | 277 | tcp_moderate_rcvbuf - BOOLEAN |
279 | If set, TCP performs receive buffer autotuning, attempting to | 278 | If set, TCP performs receive buffer auto-tuning, attempting to |
280 | automatically size the buffer (no greater than tcp_rmem[2]) to | 279 | automatically size the buffer (no greater than tcp_rmem[2]) to |
281 | match the size required by the path for full throughput. Enabled by | 280 | match the size required by the path for full throughput. Enabled by |
282 | default. | 281 | default. |
@@ -336,7 +335,7 @@ tcp_rmem - vector of 3 INTEGERs: min, default, max | |||
336 | pressure. | 335 | pressure. |
337 | Default: 8K | 336 | Default: 8K |
338 | 337 | ||
339 | default: default size of receive buffer used by TCP sockets. | 338 | default: initial size of receive buffer used by TCP sockets. |
340 | This value overrides net.core.rmem_default used by other protocols. | 339 | This value overrides net.core.rmem_default used by other protocols. |
341 | Default: 87380 bytes. This value results in window of 65535 with | 340 | Default: 87380 bytes. This value results in window of 65535 with |
342 | default setting of tcp_adv_win_scale and tcp_app_win:0 and a bit | 341 | default setting of tcp_adv_win_scale and tcp_app_win:0 and a bit |
@@ -344,8 +343,10 @@ tcp_rmem - vector of 3 INTEGERs: min, default, max | |||
344 | 343 | ||
345 | max: maximal size of receive buffer allowed for automatically | 344 | max: maximal size of receive buffer allowed for automatically |
346 | selected receiver buffers for TCP socket. This value does not override | 345 | selected receiver buffers for TCP socket. This value does not override |
347 | net.core.rmem_max, "static" selection via SO_RCVBUF does not use this. | 346 | net.core.rmem_max. Calling setsockopt() with SO_RCVBUF disables |
348 | Default: 87380*2 bytes. | 347 | automatic tuning of that socket's receive buffer size, in which |
348 | case this value is ignored. | ||
349 | Default: between 87380B and 4MB, depending on RAM size. | ||
349 | 350 | ||
350 | tcp_sack - BOOLEAN | 351 | tcp_sack - BOOLEAN |
351 | Enable select acknowledgments (SACKS). | 352 | Enable select acknowledgments (SACKS). |
@@ -358,7 +359,7 @@ tcp_slow_start_after_idle - BOOLEAN | |||
358 | Default: 1 | 359 | Default: 1 |
359 | 360 | ||
360 | tcp_stdurg - BOOLEAN | 361 | tcp_stdurg - BOOLEAN |
361 | Use the Host requirements interpretation of the TCP urg pointer field. | 362 | Use the Host requirements interpretation of the TCP urgent pointer field. |
362 | Most hosts use the older BSD interpretation, so if you turn this on | 363 | Most hosts use the older BSD interpretation, so if you turn this on |
363 | Linux might not communicate correctly with them. | 364 | Linux might not communicate correctly with them. |
364 | Default: FALSE | 365 | Default: FALSE |
@@ -371,12 +372,12 @@ tcp_synack_retries - INTEGER | |||
371 | tcp_syncookies - BOOLEAN | 372 | tcp_syncookies - BOOLEAN |
372 | Only valid when the kernel was compiled with CONFIG_SYNCOOKIES | 373 | Only valid when the kernel was compiled with CONFIG_SYNCOOKIES |
373 | Send out syncookies when the syn backlog queue of a socket | 374 | Send out syncookies when the syn backlog queue of a socket |
374 | overflows. This is to prevent against the common 'syn flood attack' | 375 | overflows. This is to prevent against the common 'SYN flood attack' |
375 | Default: FALSE | 376 | Default: FALSE |
376 | 377 | ||
377 | Note, that syncookies is fallback facility. | 378 | Note, that syncookies is fallback facility. |
378 | It MUST NOT be used to help highly loaded servers to stand | 379 | It MUST NOT be used to help highly loaded servers to stand |
379 | against legal connection rate. If you see synflood warnings | 380 | against legal connection rate. If you see SYN flood warnings |
380 | in your logs, but investigation shows that they occur | 381 | in your logs, but investigation shows that they occur |
381 | because of overload with legal connections, you should tune | 382 | because of overload with legal connections, you should tune |
382 | another parameters until this warning disappear. | 383 | another parameters until this warning disappear. |
@@ -386,7 +387,7 @@ tcp_syncookies - BOOLEAN | |||
386 | to use TCP extensions, can result in serious degradation | 387 | to use TCP extensions, can result in serious degradation |
387 | of some services (f.e. SMTP relaying), visible not by you, | 388 | of some services (f.e. SMTP relaying), visible not by you, |
388 | but your clients and relays, contacting you. While you see | 389 | but your clients and relays, contacting you. While you see |
389 | synflood warnings in logs not being really flooded, your server | 390 | SYN flood warnings in logs not being really flooded, your server |
390 | is seriously misconfigured. | 391 | is seriously misconfigured. |
391 | 392 | ||
392 | tcp_syn_retries - INTEGER | 393 | tcp_syn_retries - INTEGER |
@@ -419,19 +420,21 @@ tcp_window_scaling - BOOLEAN | |||
419 | Enable window scaling as defined in RFC1323. | 420 | Enable window scaling as defined in RFC1323. |
420 | 421 | ||
421 | tcp_wmem - vector of 3 INTEGERs: min, default, max | 422 | tcp_wmem - vector of 3 INTEGERs: min, default, max |
422 | min: Amount of memory reserved for send buffers for TCP socket. | 423 | min: Amount of memory reserved for send buffers for TCP sockets. |
423 | Each TCP socket has rights to use it due to fact of its birth. | 424 | Each TCP socket has rights to use it due to fact of its birth. |
424 | Default: 4K | 425 | Default: 4K |
425 | 426 | ||
426 | default: Amount of memory allowed for send buffers for TCP socket | 427 | default: initial size of send buffer used by TCP sockets. This |
427 | by default. This value overrides net.core.wmem_default used | 428 | value overrides net.core.wmem_default used by other protocols. |
428 | by other protocols, it is usually lower than net.core.wmem_default. | 429 | It is usually lower than net.core.wmem_default. |
429 | Default: 16K | 430 | Default: 16K |
430 | 431 | ||
431 | max: Maximal amount of memory allowed for automatically selected | 432 | max: Maximal amount of memory allowed for automatically tuned |
432 | send buffers for TCP socket. This value does not override | 433 | send buffers for TCP sockets. This value does not override |
433 | net.core.wmem_max, "static" selection via SO_SNDBUF does not use this. | 434 | net.core.wmem_max. Calling setsockopt() with SO_SNDBUF disables |
434 | Default: 128K | 435 | automatic tuning of that socket's send buffer size, in which case |
436 | this value is ignored. | ||
437 | Default: between 64K and 4MB, depending on RAM size. | ||
435 | 438 | ||
436 | tcp_workaround_signed_windows - BOOLEAN | 439 | tcp_workaround_signed_windows - BOOLEAN |
437 | If set, assume no receipt of a window scaling option means the | 440 | If set, assume no receipt of a window scaling option means the |
@@ -1060,24 +1063,193 @@ bridge-nf-filter-pppoe-tagged - BOOLEAN | |||
1060 | Default: 1 | 1063 | Default: 1 |
1061 | 1064 | ||
1062 | 1065 | ||
1063 | UNDOCUMENTED: | 1066 | proc/sys/net/sctp/* Variables: |
1067 | |||
1068 | addip_enable - BOOLEAN | ||
1069 | Enable or disable extension of Dynamic Address Reconfiguration | ||
1070 | (ADD-IP) functionality specified in RFC5061. This extension provides | ||
1071 | the ability to dynamically add and remove new addresses for the SCTP | ||
1072 | associations. | ||
1073 | |||
1074 | 1: Enable extension. | ||
1075 | |||
1076 | 0: Disable extension. | ||
1077 | |||
1078 | Default: 0 | ||
1079 | |||
1080 | addip_noauth_enable - BOOLEAN | ||
1081 | Dynamic Address Reconfiguration (ADD-IP) requires the use of | ||
1082 | authentication to protect the operations of adding or removing new | ||
1083 | addresses. This requirement is mandated so that unauthorized hosts | ||
1084 | would not be able to hijack associations. However, older | ||
1085 | implementations may not have implemented this requirement while | ||
1086 | allowing the ADD-IP extension. For reasons of interoperability, | ||
1087 | we provide this variable to control the enforcement of the | ||
1088 | authentication requirement. | ||
1089 | |||
1090 | 1: Allow ADD-IP extension to be used without authentication. This | ||
1091 | should only be set in a closed environment for interoperability | ||
1092 | with older implementations. | ||
1093 | |||
1094 | 0: Enforce the authentication requirement | ||
1095 | |||
1096 | Default: 0 | ||
1097 | |||
1098 | auth_enable - BOOLEAN | ||
1099 | Enable or disable Authenticated Chunks extension. This extension | ||
1100 | provides the ability to send and receive authenticated chunks and is | ||
1101 | required for secure operation of Dynamic Address Reconfiguration | ||
1102 | (ADD-IP) extension. | ||
1103 | |||
1104 | 1: Enable this extension. | ||
1105 | 0: Disable this extension. | ||
1106 | |||
1107 | Default: 0 | ||
1108 | |||
1109 | prsctp_enable - BOOLEAN | ||
1110 | Enable or disable the Partial Reliability extension (RFC3758) which | ||
1111 | is used to notify peers that a given DATA should no longer be expected. | ||
1112 | |||
1113 | 1: Enable extension | ||
1114 | 0: Disable | ||
1115 | |||
1116 | Default: 1 | ||
1117 | |||
1118 | max_burst - INTEGER | ||
1119 | The limit of the number of new packets that can be initially sent. It | ||
1120 | controls how bursty the generated traffic can be. | ||
1121 | |||
1122 | Default: 4 | ||
1123 | |||
1124 | association_max_retrans - INTEGER | ||
1125 | Set the maximum number for retransmissions that an association can | ||
1126 | attempt deciding that the remote end is unreachable. If this value | ||
1127 | is exceeded, the association is terminated. | ||
1128 | |||
1129 | Default: 10 | ||
1130 | |||
1131 | max_init_retransmits - INTEGER | ||
1132 | The maximum number of retransmissions of INIT and COOKIE-ECHO chunks | ||
1133 | that an association will attempt before declaring the destination | ||
1134 | unreachable and terminating. | ||
1135 | |||
1136 | Default: 8 | ||
1137 | |||
1138 | path_max_retrans - INTEGER | ||
1139 | The maximum number of retransmissions that will be attempted on a given | ||
1140 | path. Once this threshold is exceeded, the path is considered | ||
1141 | unreachable, and new traffic will use a different path when the | ||
1142 | association is multihomed. | ||
1143 | |||
1144 | Default: 5 | ||
1145 | |||
1146 | rto_initial - INTEGER | ||
1147 | The initial round trip timeout value in milliseconds that will be used | ||
1148 | in calculating round trip times. This is the initial time interval | ||
1149 | for retransmissions. | ||
1150 | |||
1151 | Default: 3000 | ||
1064 | 1152 | ||
1065 | dev_weight FIXME | 1153 | rto_max - INTEGER |
1066 | discovery_slots FIXME | 1154 | The maximum value (in milliseconds) of the round trip timeout. This |
1067 | discovery_timeout FIXME | 1155 | is the largest time interval that can elapse between retransmissions. |
1068 | fast_poll_increase FIXME | 1156 | |
1069 | ip6_queue_maxlen FIXME | 1157 | Default: 60000 |
1070 | lap_keepalive_time FIXME | 1158 | |
1071 | lo_cong FIXME | 1159 | rto_min - INTEGER |
1072 | max_baud_rate FIXME | 1160 | The minimum value (in milliseconds) of the round trip timeout. This |
1073 | max_dgram_qlen FIXME | 1161 | is the smallest time interval the can elapse between retransmissions. |
1074 | max_noreply_time FIXME | 1162 | |
1075 | max_tx_data_size FIXME | 1163 | Default: 1000 |
1076 | max_tx_window FIXME | 1164 | |
1077 | min_tx_turn_time FIXME | 1165 | hb_interval - INTEGER |
1078 | mod_cong FIXME | 1166 | The interval (in milliseconds) between HEARTBEAT chunks. These chunks |
1079 | no_cong FIXME | 1167 | are sent at the specified interval on idle paths to probe the state of |
1080 | no_cong_thresh FIXME | 1168 | a given path between 2 associations. |
1081 | slot_timeout FIXME | 1169 | |
1082 | warn_noreply_time FIXME | 1170 | Default: 30000 |
1171 | |||
1172 | sack_timeout - INTEGER | ||
1173 | The amount of time (in milliseconds) that the implementation will wait | ||
1174 | to send a SACK. | ||
1175 | |||
1176 | Default: 200 | ||
1177 | |||
1178 | valid_cookie_life - INTEGER | ||
1179 | The default lifetime of the SCTP cookie (in milliseconds). The cookie | ||
1180 | is used during association establishment. | ||
1181 | |||
1182 | Default: 60000 | ||
1183 | |||
1184 | cookie_preserve_enable - BOOLEAN | ||
1185 | Enable or disable the ability to extend the lifetime of the SCTP cookie | ||
1186 | that is used during the establishment phase of SCTP association | ||
1187 | |||
1188 | 1: Enable cookie lifetime extension. | ||
1189 | 0: Disable | ||
1190 | |||
1191 | Default: 1 | ||
1192 | |||
1193 | rcvbuf_policy - INTEGER | ||
1194 | Determines if the receive buffer is attributed to the socket or to | ||
1195 | association. SCTP supports the capability to create multiple | ||
1196 | associations on a single socket. When using this capability, it is | ||
1197 | possible that a single stalled association that's buffering a lot | ||
1198 | of data may block other associations from delivering their data by | ||
1199 | consuming all of the receive buffer space. To work around this, | ||
1200 | the rcvbuf_policy could be set to attribute the receiver buffer space | ||
1201 | to each association instead of the socket. This prevents the described | ||
1202 | blocking. | ||
1203 | |||
1204 | 1: rcvbuf space is per association | ||
1205 | 0: recbuf space is per socket | ||
1206 | |||
1207 | Default: 0 | ||
1208 | |||
1209 | sndbuf_policy - INTEGER | ||
1210 | Similar to rcvbuf_policy above, this applies to send buffer space. | ||
1211 | |||
1212 | 1: Send buffer is tracked per association | ||
1213 | 0: Send buffer is tracked per socket. | ||
1214 | |||
1215 | Default: 0 | ||
1216 | |||
1217 | sctp_mem - vector of 3 INTEGERs: min, pressure, max | ||
1218 | Number of pages allowed for queueing by all SCTP sockets. | ||
1219 | |||
1220 | min: Below this number of pages SCTP is not bothered about its | ||
1221 | memory appetite. When amount of memory allocated by SCTP exceeds | ||
1222 | this number, SCTP starts to moderate memory usage. | ||
1223 | |||
1224 | pressure: This value was introduced to follow format of tcp_mem. | ||
1225 | |||
1226 | max: Number of pages allowed for queueing by all SCTP sockets. | ||
1227 | |||
1228 | Default is calculated at boot time from amount of available memory. | ||
1229 | |||
1230 | sctp_rmem - vector of 3 INTEGERs: min, default, max | ||
1231 | See tcp_rmem for a description. | ||
1232 | |||
1233 | sctp_wmem - vector of 3 INTEGERs: min, default, max | ||
1234 | See tcp_wmem for a description. | ||
1235 | |||
1236 | UNDOCUMENTED: | ||
1083 | 1237 | ||
1238 | /proc/sys/net/core/* | ||
1239 | dev_weight FIXME | ||
1240 | |||
1241 | /proc/sys/net/unix/* | ||
1242 | max_dgram_qlen FIXME | ||
1243 | |||
1244 | /proc/sys/net/irda/* | ||
1245 | fast_poll_increase FIXME | ||
1246 | warn_noreply_time FIXME | ||
1247 | discovery_slots FIXME | ||
1248 | slot_timeout FIXME | ||
1249 | max_baud_rate FIXME | ||
1250 | discovery_timeout FIXME | ||
1251 | lap_keepalive_time FIXME | ||
1252 | max_noreply_time FIXME | ||
1253 | max_tx_data_size FIXME | ||
1254 | max_tx_window FIXME | ||
1255 | min_tx_turn_time FIXME | ||
diff --git a/Documentation/scheduler/sched-domains.txt b/Documentation/scheduler/sched-domains.txt index a9e990ab980f..373ceacc367e 100644 --- a/Documentation/scheduler/sched-domains.txt +++ b/Documentation/scheduler/sched-domains.txt | |||
@@ -61,10 +61,7 @@ builder by #define'ing ARCH_HASH_SCHED_DOMAIN, and exporting your | |||
61 | arch_init_sched_domains function. This function will attach domains to all | 61 | arch_init_sched_domains function. This function will attach domains to all |
62 | CPUs using cpu_attach_domain. | 62 | CPUs using cpu_attach_domain. |
63 | 63 | ||
64 | Implementors should change the line | 64 | The sched-domains debugging infrastructure can be enabled by enabling |
65 | #undef SCHED_DOMAIN_DEBUG | 65 | CONFIG_SCHED_DEBUG. This enables an error checking parse of the sched domains |
66 | to | ||
67 | #define SCHED_DOMAIN_DEBUG | ||
68 | in kernel/sched.c as this enables an error checking parse of the sched domains | ||
69 | which should catch most possible errors (described above). It also prints out | 66 | which should catch most possible errors (described above). It also prints out |
70 | the domain structure in a visual format. | 67 | the domain structure in a visual format. |
diff --git a/Documentation/scheduler/sched-rt-group.txt b/Documentation/scheduler/sched-rt-group.txt index 14f901f639ee..3ef339f491e0 100644 --- a/Documentation/scheduler/sched-rt-group.txt +++ b/Documentation/scheduler/sched-rt-group.txt | |||
@@ -51,9 +51,9 @@ needs only about 3% CPU time to do so, it can do with a 0.03 * 0.005s = | |||
51 | 0.00015s. So this group can be scheduled with a period of 0.005s and a run time | 51 | 0.00015s. So this group can be scheduled with a period of 0.005s and a run time |
52 | of 0.00015s. | 52 | of 0.00015s. |
53 | 53 | ||
54 | The remaining CPU time will be used for user input and other tass. Because | 54 | The remaining CPU time will be used for user input and other tasks. Because |
55 | realtime tasks have explicitly allocated the CPU time they need to perform | 55 | realtime tasks have explicitly allocated the CPU time they need to perform |
56 | their tasks, buffer underruns in the graphocs or audio can be eliminated. | 56 | their tasks, buffer underruns in the graphics or audio can be eliminated. |
57 | 57 | ||
58 | NOTE: the above example is not fully implemented as of yet (2.6.25). We still | 58 | NOTE: the above example is not fully implemented as of yet (2.6.25). We still |
59 | lack an EDF scheduler to make non-uniform periods usable. | 59 | lack an EDF scheduler to make non-uniform periods usable. |
diff --git a/MAINTAINERS b/MAINTAINERS index 6476125363e0..56a2f678019e 100644 --- a/MAINTAINERS +++ b/MAINTAINERS | |||
@@ -3082,8 +3082,8 @@ L: linux-scsi@vger.kernel.org | |||
3082 | S: Maintained | 3082 | S: Maintained |
3083 | 3083 | ||
3084 | OPROFILE | 3084 | OPROFILE |
3085 | P: Philippe Elie | 3085 | P: Robert Richter |
3086 | M: phil.el@wanadoo.fr | 3086 | M: robert.richter@amd.com |
3087 | L: oprofile-list@lists.sf.net | 3087 | L: oprofile-list@lists.sf.net |
3088 | S: Maintained | 3088 | S: Maintained |
3089 | 3089 | ||
diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c index 2b2bb3f9b683..d1b867101e5f 100644 --- a/arch/x86/mm/ioremap.c +++ b/arch/x86/mm/ioremap.c | |||
@@ -300,6 +300,29 @@ void __iomem *ioremap_cache(resource_size_t phys_addr, unsigned long size) | |||
300 | } | 300 | } |
301 | EXPORT_SYMBOL(ioremap_cache); | 301 | EXPORT_SYMBOL(ioremap_cache); |
302 | 302 | ||
303 | static void __iomem *ioremap_default(resource_size_t phys_addr, | ||
304 | unsigned long size) | ||
305 | { | ||
306 | unsigned long flags; | ||
307 | void *ret; | ||
308 | int err; | ||
309 | |||
310 | /* | ||
311 | * - WB for WB-able memory and no other conflicting mappings | ||
312 | * - UC_MINUS for non-WB-able memory with no other conflicting mappings | ||
313 | * - Inherit from confliting mappings otherwise | ||
314 | */ | ||
315 | err = reserve_memtype(phys_addr, phys_addr + size, -1, &flags); | ||
316 | if (err < 0) | ||
317 | return NULL; | ||
318 | |||
319 | ret = (void *) __ioremap_caller(phys_addr, size, flags, | ||
320 | __builtin_return_address(0)); | ||
321 | |||
322 | free_memtype(phys_addr, phys_addr + size); | ||
323 | return (void __iomem *)ret; | ||
324 | } | ||
325 | |||
303 | /** | 326 | /** |
304 | * iounmap - Free a IO remapping | 327 | * iounmap - Free a IO remapping |
305 | * @addr: virtual address from ioremap_* | 328 | * @addr: virtual address from ioremap_* |
@@ -365,7 +388,7 @@ void *xlate_dev_mem_ptr(unsigned long phys) | |||
365 | if (page_is_ram(start >> PAGE_SHIFT)) | 388 | if (page_is_ram(start >> PAGE_SHIFT)) |
366 | return __va(phys); | 389 | return __va(phys); |
367 | 390 | ||
368 | addr = (void *)ioremap(start, PAGE_SIZE); | 391 | addr = (void *)ioremap_default(start, PAGE_SIZE); |
369 | if (addr) | 392 | if (addr) |
370 | addr = (void *)((unsigned long)addr | (phys & ~PAGE_MASK)); | 393 | addr = (void *)((unsigned long)addr | (phys & ~PAGE_MASK)); |
371 | 394 | ||
diff --git a/block/bsg.c b/block/bsg.c index f0b7cd343216..54d617f7df3e 100644 --- a/block/bsg.c +++ b/block/bsg.c | |||
@@ -709,11 +709,12 @@ static void bsg_kref_release_function(struct kref *kref) | |||
709 | { | 709 | { |
710 | struct bsg_class_device *bcd = | 710 | struct bsg_class_device *bcd = |
711 | container_of(kref, struct bsg_class_device, ref); | 711 | container_of(kref, struct bsg_class_device, ref); |
712 | struct device *parent = bcd->parent; | ||
712 | 713 | ||
713 | if (bcd->release) | 714 | if (bcd->release) |
714 | bcd->release(bcd->parent); | 715 | bcd->release(bcd->parent); |
715 | 716 | ||
716 | put_device(bcd->parent); | 717 | put_device(parent); |
717 | } | 718 | } |
718 | 719 | ||
719 | static int bsg_put_device(struct bsg_device *bd) | 720 | static int bsg_put_device(struct bsg_device *bd) |
diff --git a/drivers/ata/libata-acpi.c b/drivers/ata/libata-acpi.c index 3ff8b14420d9..9330b7922f62 100644 --- a/drivers/ata/libata-acpi.c +++ b/drivers/ata/libata-acpi.c | |||
@@ -29,14 +29,16 @@ | |||
29 | enum { | 29 | enum { |
30 | ATA_ACPI_FILTER_SETXFER = 1 << 0, | 30 | ATA_ACPI_FILTER_SETXFER = 1 << 0, |
31 | ATA_ACPI_FILTER_LOCK = 1 << 1, | 31 | ATA_ACPI_FILTER_LOCK = 1 << 1, |
32 | ATA_ACPI_FILTER_DIPM = 1 << 2, | ||
32 | 33 | ||
33 | ATA_ACPI_FILTER_DEFAULT = ATA_ACPI_FILTER_SETXFER | | 34 | ATA_ACPI_FILTER_DEFAULT = ATA_ACPI_FILTER_SETXFER | |
34 | ATA_ACPI_FILTER_LOCK, | 35 | ATA_ACPI_FILTER_LOCK | |
36 | ATA_ACPI_FILTER_DIPM, | ||
35 | }; | 37 | }; |
36 | 38 | ||
37 | static unsigned int ata_acpi_gtf_filter = ATA_ACPI_FILTER_DEFAULT; | 39 | static unsigned int ata_acpi_gtf_filter = ATA_ACPI_FILTER_DEFAULT; |
38 | module_param_named(acpi_gtf_filter, ata_acpi_gtf_filter, int, 0644); | 40 | module_param_named(acpi_gtf_filter, ata_acpi_gtf_filter, int, 0644); |
39 | MODULE_PARM_DESC(acpi_gtf_filter, "filter mask for ACPI _GTF commands, set to filter out (0x1=set xfermode, 0x2=lock/freeze lock)"); | 41 | MODULE_PARM_DESC(acpi_gtf_filter, "filter mask for ACPI _GTF commands, set to filter out (0x1=set xfermode, 0x2=lock/freeze lock, 0x4=DIPM)"); |
40 | 42 | ||
41 | #define NO_PORT_MULT 0xffff | 43 | #define NO_PORT_MULT 0xffff |
42 | #define SATA_ADR(root, pmp) (((root) << 16) | (pmp)) | 44 | #define SATA_ADR(root, pmp) (((root) << 16) | (pmp)) |
@@ -195,6 +197,10 @@ static void ata_acpi_handle_hotplug(struct ata_port *ap, struct ata_device *dev, | |||
195 | /* This device does not support hotplug */ | 197 | /* This device does not support hotplug */ |
196 | return; | 198 | return; |
197 | 199 | ||
200 | if (event == ACPI_NOTIFY_BUS_CHECK || | ||
201 | event == ACPI_NOTIFY_DEVICE_CHECK) | ||
202 | status = acpi_evaluate_integer(handle, "_STA", NULL, &sta); | ||
203 | |||
198 | spin_lock_irqsave(ap->lock, flags); | 204 | spin_lock_irqsave(ap->lock, flags); |
199 | 205 | ||
200 | switch (event) { | 206 | switch (event) { |
@@ -202,7 +208,6 @@ static void ata_acpi_handle_hotplug(struct ata_port *ap, struct ata_device *dev, | |||
202 | case ACPI_NOTIFY_DEVICE_CHECK: | 208 | case ACPI_NOTIFY_DEVICE_CHECK: |
203 | ata_ehi_push_desc(ehi, "ACPI event"); | 209 | ata_ehi_push_desc(ehi, "ACPI event"); |
204 | 210 | ||
205 | status = acpi_evaluate_integer(handle, "_STA", NULL, &sta); | ||
206 | if (ACPI_FAILURE(status)) { | 211 | if (ACPI_FAILURE(status)) { |
207 | ata_port_printk(ap, KERN_ERR, | 212 | ata_port_printk(ap, KERN_ERR, |
208 | "acpi: failed to determine bay status (0x%x)\n", | 213 | "acpi: failed to determine bay status (0x%x)\n", |
@@ -690,6 +695,14 @@ static int ata_acpi_filter_tf(const struct ata_taskfile *tf, | |||
690 | return 1; | 695 | return 1; |
691 | } | 696 | } |
692 | 697 | ||
698 | if (ata_acpi_gtf_filter & ATA_ACPI_FILTER_DIPM) { | ||
699 | /* inhibit enabling DIPM */ | ||
700 | if (tf->command == ATA_CMD_SET_FEATURES && | ||
701 | tf->feature == SETFEATURES_SATA_ENABLE && | ||
702 | tf->nsect == SATA_DIPM) | ||
703 | return 1; | ||
704 | } | ||
705 | |||
693 | return 0; | 706 | return 0; |
694 | } | 707 | } |
695 | 708 | ||
diff --git a/drivers/ata/pata_sis.c b/drivers/ata/pata_sis.c index e82c66e8d31b..26345d7b531c 100644 --- a/drivers/ata/pata_sis.c +++ b/drivers/ata/pata_sis.c | |||
@@ -56,6 +56,7 @@ static const struct sis_laptop sis_laptop[] = { | |||
56 | { 0x5513, 0x1043, 0x1107 }, /* ASUS A6K */ | 56 | { 0x5513, 0x1043, 0x1107 }, /* ASUS A6K */ |
57 | { 0x5513, 0x1734, 0x105F }, /* FSC Amilo A1630 */ | 57 | { 0x5513, 0x1734, 0x105F }, /* FSC Amilo A1630 */ |
58 | { 0x5513, 0x1071, 0x8640 }, /* EasyNote K5305 */ | 58 | { 0x5513, 0x1071, 0x8640 }, /* EasyNote K5305 */ |
59 | { 0x5513, 0x1039, 0x5513 }, /* Targa Visionary 1000 */ | ||
59 | /* end marker */ | 60 | /* end marker */ |
60 | { 0, } | 61 | { 0, } |
61 | }; | 62 | }; |
diff --git a/drivers/char/ipmi/ipmi_watchdog.c b/drivers/char/ipmi/ipmi_watchdog.c index 1b9a87047817..0e6df289cb46 100644 --- a/drivers/char/ipmi/ipmi_watchdog.c +++ b/drivers/char/ipmi/ipmi_watchdog.c | |||
@@ -755,9 +755,8 @@ static ssize_t ipmi_write(struct file *file, | |||
755 | rv = ipmi_heartbeat(); | 755 | rv = ipmi_heartbeat(); |
756 | if (rv) | 756 | if (rv) |
757 | return rv; | 757 | return rv; |
758 | return 1; | ||
759 | } | 758 | } |
760 | return 0; | 759 | return len; |
761 | } | 760 | } |
762 | 761 | ||
763 | static ssize_t ipmi_read(struct file *file, | 762 | static ssize_t ipmi_read(struct file *file, |
diff --git a/drivers/char/pcmcia/ipwireless/hardware.c b/drivers/char/pcmcia/ipwireless/hardware.c index ba6340ae98af..929101ecbae2 100644 --- a/drivers/char/pcmcia/ipwireless/hardware.c +++ b/drivers/char/pcmcia/ipwireless/hardware.c | |||
@@ -590,8 +590,10 @@ static struct ipw_rx_packet *pool_allocate(struct ipw_hardware *hw, | |||
590 | packet = kmalloc(sizeof(struct ipw_rx_packet) + | 590 | packet = kmalloc(sizeof(struct ipw_rx_packet) + |
591 | old_packet->length + minimum_free_space, | 591 | old_packet->length + minimum_free_space, |
592 | GFP_ATOMIC); | 592 | GFP_ATOMIC); |
593 | if (!packet) | 593 | if (!packet) { |
594 | kfree(old_packet); | ||
594 | return NULL; | 595 | return NULL; |
596 | } | ||
595 | memcpy(packet, old_packet, | 597 | memcpy(packet, old_packet, |
596 | sizeof(struct ipw_rx_packet) | 598 | sizeof(struct ipw_rx_packet) |
597 | + old_packet->length); | 599 | + old_packet->length); |
diff --git a/drivers/char/rtc.c b/drivers/char/rtc.c index 5f80a9dff573..909cac93fa2a 100644 --- a/drivers/char/rtc.c +++ b/drivers/char/rtc.c | |||
@@ -678,12 +678,13 @@ static int rtc_do_ioctl(unsigned int cmd, unsigned long arg, int kernel) | |||
678 | if (arg != (1<<tmp)) | 678 | if (arg != (1<<tmp)) |
679 | return -EINVAL; | 679 | return -EINVAL; |
680 | 680 | ||
681 | rtc_freq = arg; | ||
682 | |||
681 | spin_lock_irqsave(&rtc_lock, flags); | 683 | spin_lock_irqsave(&rtc_lock, flags); |
682 | if (hpet_set_periodic_freq(arg)) { | 684 | if (hpet_set_periodic_freq(arg)) { |
683 | spin_unlock_irqrestore(&rtc_lock, flags); | 685 | spin_unlock_irqrestore(&rtc_lock, flags); |
684 | return 0; | 686 | return 0; |
685 | } | 687 | } |
686 | rtc_freq = arg; | ||
687 | 688 | ||
688 | val = CMOS_READ(RTC_FREQ_SELECT) & 0xf0; | 689 | val = CMOS_READ(RTC_FREQ_SELECT) & 0xf0; |
689 | val |= (16 - tmp); | 690 | val |= (16 - tmp); |
diff --git a/drivers/char/tpm/tpm_tis.c b/drivers/char/tpm/tpm_tis.c index 13a4bdd4e4d6..c7a977bc03e8 100644 --- a/drivers/char/tpm/tpm_tis.c +++ b/drivers/char/tpm/tpm_tis.c | |||
@@ -623,6 +623,7 @@ static struct pnp_device_id tpm_pnp_tbl[] __devinitdata = { | |||
623 | {"IFX0102", 0}, /* Infineon */ | 623 | {"IFX0102", 0}, /* Infineon */ |
624 | {"BCM0101", 0}, /* Broadcom */ | 624 | {"BCM0101", 0}, /* Broadcom */ |
625 | {"NSC1200", 0}, /* National */ | 625 | {"NSC1200", 0}, /* National */ |
626 | {"ICO0102", 0}, /* Intel */ | ||
626 | /* Add new here */ | 627 | /* Add new here */ |
627 | {"", 0}, /* User Specified */ | 628 | {"", 0}, /* User Specified */ |
628 | {"", 0} /* Terminator */ | 629 | {"", 0} /* Terminator */ |
diff --git a/drivers/isdn/i4l/isdn_common.c b/drivers/isdn/i4l/isdn_common.c index 0f3c66de69bc..8d8c6b736167 100644 --- a/drivers/isdn/i4l/isdn_common.c +++ b/drivers/isdn/i4l/isdn_common.c | |||
@@ -1977,8 +1977,10 @@ isdn_writebuf_stub(int drvidx, int chan, const u_char __user * buf, int len) | |||
1977 | if (!skb) | 1977 | if (!skb) |
1978 | return -ENOMEM; | 1978 | return -ENOMEM; |
1979 | skb_reserve(skb, hl); | 1979 | skb_reserve(skb, hl); |
1980 | if (copy_from_user(skb_put(skb, len), buf, len)) | 1980 | if (copy_from_user(skb_put(skb, len), buf, len)) { |
1981 | dev_kfree_skb(skb); | ||
1981 | return -EFAULT; | 1982 | return -EFAULT; |
1983 | } | ||
1982 | ret = dev->drv[drvidx]->interface->writebuf_skb(drvidx, chan, 1, skb); | 1984 | ret = dev->drv[drvidx]->interface->writebuf_skb(drvidx, chan, 1, skb); |
1983 | if (ret <= 0) | 1985 | if (ret <= 0) |
1984 | dev_kfree_skb(skb); | 1986 | dev_kfree_skb(skb); |
diff --git a/drivers/media/video/ov7670.c b/drivers/media/video/ov7670.c index 2bc6bdc9c1f2..d7bfd30f74a9 100644 --- a/drivers/media/video/ov7670.c +++ b/drivers/media/video/ov7670.c | |||
@@ -406,8 +406,10 @@ static int ov7670_read(struct i2c_client *c, unsigned char reg, | |||
406 | int ret; | 406 | int ret; |
407 | 407 | ||
408 | ret = i2c_smbus_read_byte_data(c, reg); | 408 | ret = i2c_smbus_read_byte_data(c, reg); |
409 | if (ret >= 0) | 409 | if (ret >= 0) { |
410 | *value = (unsigned char) ret; | 410 | *value = (unsigned char) ret; |
411 | ret = 0; | ||
412 | } | ||
411 | return ret; | 413 | return ret; |
412 | } | 414 | } |
413 | 415 | ||
diff --git a/drivers/message/fusion/mptbase.c b/drivers/message/fusion/mptbase.c index db3c892f87fb..d40d6d15ae20 100644 --- a/drivers/message/fusion/mptbase.c +++ b/drivers/message/fusion/mptbase.c | |||
@@ -1686,9 +1686,14 @@ mpt_attach(struct pci_dev *pdev, const struct pci_device_id *id) | |||
1686 | ioc->bus_type = SAS; | 1686 | ioc->bus_type = SAS; |
1687 | } | 1687 | } |
1688 | 1688 | ||
1689 | if (ioc->bus_type == SAS && mpt_msi_enable == -1) | 1689 | if (mpt_msi_enable == -1) { |
1690 | ioc->msi_enable = 1; | 1690 | /* Enable on SAS, disable on FC and SPI */ |
1691 | else | 1691 | if (ioc->bus_type == SAS) |
1692 | ioc->msi_enable = 1; | ||
1693 | else | ||
1694 | ioc->msi_enable = 0; | ||
1695 | } else | ||
1696 | /* follow flag: 0 - disable; 1 - enable */ | ||
1692 | ioc->msi_enable = mpt_msi_enable; | 1697 | ioc->msi_enable = mpt_msi_enable; |
1693 | 1698 | ||
1694 | if (ioc->errata_flag_1064) | 1699 | if (ioc->errata_flag_1064) |
diff --git a/drivers/message/fusion/mptspi.c b/drivers/message/fusion/mptspi.c index 25bcfcf36f2e..1effca4e40e1 100644 --- a/drivers/message/fusion/mptspi.c +++ b/drivers/message/fusion/mptspi.c | |||
@@ -1266,13 +1266,18 @@ mptspi_dv_renegotiate(struct _MPT_SCSI_HOST *hd) | |||
1266 | static int | 1266 | static int |
1267 | mptspi_ioc_reset(MPT_ADAPTER *ioc, int reset_phase) | 1267 | mptspi_ioc_reset(MPT_ADAPTER *ioc, int reset_phase) |
1268 | { | 1268 | { |
1269 | struct _MPT_SCSI_HOST *hd = shost_priv(ioc->sh); | ||
1270 | int rc; | 1269 | int rc; |
1271 | 1270 | ||
1272 | rc = mptscsih_ioc_reset(ioc, reset_phase); | 1271 | rc = mptscsih_ioc_reset(ioc, reset_phase); |
1273 | 1272 | ||
1274 | if (reset_phase == MPT_IOC_POST_RESET) | 1273 | /* only try to do a renegotiation if we're properly set up |
1274 | * if we get an ioc fault on bringup, ioc->sh will be NULL */ | ||
1275 | if (reset_phase == MPT_IOC_POST_RESET && | ||
1276 | ioc->sh) { | ||
1277 | struct _MPT_SCSI_HOST *hd = shost_priv(ioc->sh); | ||
1278 | |||
1275 | mptspi_dv_renegotiate(hd); | 1279 | mptspi_dv_renegotiate(hd); |
1280 | } | ||
1276 | 1281 | ||
1277 | return rc; | 1282 | return rc; |
1278 | } | 1283 | } |
diff --git a/drivers/net/irda/nsc-ircc.c b/drivers/net/irda/nsc-ircc.c index a7714da7c283..effc1ce8179a 100644 --- a/drivers/net/irda/nsc-ircc.c +++ b/drivers/net/irda/nsc-ircc.c | |||
@@ -152,6 +152,7 @@ static chipio_t pnp_info; | |||
152 | static const struct pnp_device_id nsc_ircc_pnp_table[] = { | 152 | static const struct pnp_device_id nsc_ircc_pnp_table[] = { |
153 | { .id = "NSC6001", .driver_data = 0 }, | 153 | { .id = "NSC6001", .driver_data = 0 }, |
154 | { .id = "IBM0071", .driver_data = 0 }, | 154 | { .id = "IBM0071", .driver_data = 0 }, |
155 | { .id = "HWPC224", .driver_data = 0 }, | ||
155 | { } | 156 | { } |
156 | }; | 157 | }; |
157 | 158 | ||
diff --git a/drivers/net/irda/via-ircc.c b/drivers/net/irda/via-ircc.c index 58e128784585..04ad3573b159 100644 --- a/drivers/net/irda/via-ircc.c +++ b/drivers/net/irda/via-ircc.c | |||
@@ -1546,6 +1546,7 @@ static int via_ircc_net_open(struct net_device *dev) | |||
1546 | IRDA_WARNING("%s, unable to allocate dma2=%d\n", | 1546 | IRDA_WARNING("%s, unable to allocate dma2=%d\n", |
1547 | driver_name, self->io.dma2); | 1547 | driver_name, self->io.dma2); |
1548 | free_irq(self->io.irq, self); | 1548 | free_irq(self->io.irq, self); |
1549 | free_dma(self->io.dma); | ||
1549 | return -EAGAIN; | 1550 | return -EAGAIN; |
1550 | } | 1551 | } |
1551 | } | 1552 | } |
@@ -1606,6 +1607,8 @@ static int via_ircc_net_close(struct net_device *dev) | |||
1606 | EnAllInt(iobase, OFF); | 1607 | EnAllInt(iobase, OFF); |
1607 | free_irq(self->io.irq, dev); | 1608 | free_irq(self->io.irq, dev); |
1608 | free_dma(self->io.dma); | 1609 | free_dma(self->io.dma); |
1610 | if (self->io.dma2 != self->io.dma) | ||
1611 | free_dma(self->io.dma2); | ||
1609 | 1612 | ||
1610 | return 0; | 1613 | return 0; |
1611 | } | 1614 | } |
diff --git a/drivers/net/tun.c b/drivers/net/tun.c index 7ab94c825b57..b9018bfa0a97 100644 --- a/drivers/net/tun.c +++ b/drivers/net/tun.c | |||
@@ -602,6 +602,12 @@ static int tun_set_iff(struct net *net, struct file *file, struct ifreq *ifr) | |||
602 | tun->attached = 1; | 602 | tun->attached = 1; |
603 | get_net(dev_net(tun->dev)); | 603 | get_net(dev_net(tun->dev)); |
604 | 604 | ||
605 | /* Make sure persistent devices do not get stuck in | ||
606 | * xoff state. | ||
607 | */ | ||
608 | if (netif_running(tun->dev)) | ||
609 | netif_wake_queue(tun->dev); | ||
610 | |||
605 | strcpy(ifr->ifr_name, tun->dev->name); | 611 | strcpy(ifr->ifr_name, tun->dev->name); |
606 | return 0; | 612 | return 0; |
607 | 613 | ||
diff --git a/drivers/net/wireless/hostap/hostap_cs.c b/drivers/net/wireless/hostap/hostap_cs.c index 80039a0ae027..3b4e55cf33cd 100644 --- a/drivers/net/wireless/hostap/hostap_cs.c +++ b/drivers/net/wireless/hostap/hostap_cs.c | |||
@@ -777,8 +777,10 @@ static int hostap_cs_suspend(struct pcmcia_device *link) | |||
777 | int dev_open = 0; | 777 | int dev_open = 0; |
778 | struct hostap_interface *iface = NULL; | 778 | struct hostap_interface *iface = NULL; |
779 | 779 | ||
780 | if (dev) | 780 | if (!dev) |
781 | iface = netdev_priv(dev); | 781 | return -ENODEV; |
782 | |||
783 | iface = netdev_priv(dev); | ||
782 | 784 | ||
783 | PDEBUG(DEBUG_EXTRA, "%s: CS_EVENT_PM_SUSPEND\n", dev_info); | 785 | PDEBUG(DEBUG_EXTRA, "%s: CS_EVENT_PM_SUSPEND\n", dev_info); |
784 | if (iface && iface->local) | 786 | if (iface && iface->local) |
@@ -798,8 +800,10 @@ static int hostap_cs_resume(struct pcmcia_device *link) | |||
798 | int dev_open = 0; | 800 | int dev_open = 0; |
799 | struct hostap_interface *iface = NULL; | 801 | struct hostap_interface *iface = NULL; |
800 | 802 | ||
801 | if (dev) | 803 | if (!dev) |
802 | iface = netdev_priv(dev); | 804 | return -ENODEV; |
805 | |||
806 | iface = netdev_priv(dev); | ||
803 | 807 | ||
804 | PDEBUG(DEBUG_EXTRA, "%s: CS_EVENT_PM_RESUME\n", dev_info); | 808 | PDEBUG(DEBUG_EXTRA, "%s: CS_EVENT_PM_RESUME\n", dev_info); |
805 | 809 | ||
diff --git a/drivers/net/wireless/iwlwifi/iwl-3945.c b/drivers/net/wireless/iwlwifi/iwl-3945.c index f5387a7a76c0..55ac850744b3 100644 --- a/drivers/net/wireless/iwlwifi/iwl-3945.c +++ b/drivers/net/wireless/iwlwifi/iwl-3945.c | |||
@@ -449,7 +449,7 @@ static void iwl3945_dbg_report_frame(struct iwl3945_priv *priv, | |||
449 | 449 | ||
450 | if (print_summary) { | 450 | if (print_summary) { |
451 | char *title; | 451 | char *title; |
452 | u32 rate; | 452 | int rate; |
453 | 453 | ||
454 | if (hundred) | 454 | if (hundred) |
455 | title = "100Frames"; | 455 | title = "100Frames"; |
@@ -487,7 +487,7 @@ static void iwl3945_dbg_report_frame(struct iwl3945_priv *priv, | |||
487 | * but you can hack it to show more, if you'd like to. */ | 487 | * but you can hack it to show more, if you'd like to. */ |
488 | if (dataframe) | 488 | if (dataframe) |
489 | IWL_DEBUG_RX("%s: mhd=0x%04x, dst=0x%02x, " | 489 | IWL_DEBUG_RX("%s: mhd=0x%04x, dst=0x%02x, " |
490 | "len=%u, rssi=%d, chnl=%d, rate=%u, \n", | 490 | "len=%u, rssi=%d, chnl=%d, rate=%d, \n", |
491 | title, fc, header->addr1[5], | 491 | title, fc, header->addr1[5], |
492 | length, rssi, channel, rate); | 492 | length, rssi, channel, rate); |
493 | else { | 493 | else { |
diff --git a/drivers/net/wireless/libertas/scan.c b/drivers/net/wireless/libertas/scan.c index d448c9702a0f..387d4878af2f 100644 --- a/drivers/net/wireless/libertas/scan.c +++ b/drivers/net/wireless/libertas/scan.c | |||
@@ -567,11 +567,11 @@ static int lbs_process_bss(struct bss_descriptor *bss, | |||
567 | pos += 8; | 567 | pos += 8; |
568 | 568 | ||
569 | /* beacon interval is 2 bytes long */ | 569 | /* beacon interval is 2 bytes long */ |
570 | bss->beaconperiod = le16_to_cpup((void *) pos); | 570 | bss->beaconperiod = get_unaligned_le16(pos); |
571 | pos += 2; | 571 | pos += 2; |
572 | 572 | ||
573 | /* capability information is 2 bytes long */ | 573 | /* capability information is 2 bytes long */ |
574 | bss->capability = le16_to_cpup((void *) pos); | 574 | bss->capability = get_unaligned_le16(pos); |
575 | lbs_deb_scan("process_bss: capabilities 0x%04x\n", bss->capability); | 575 | lbs_deb_scan("process_bss: capabilities 0x%04x\n", bss->capability); |
576 | pos += 2; | 576 | pos += 2; |
577 | 577 | ||
diff --git a/drivers/net/wireless/rt2x00/rt2400pci.c b/drivers/net/wireless/rt2x00/rt2400pci.c index 560b9c73c0b9..b36ed1c6c746 100644 --- a/drivers/net/wireless/rt2x00/rt2400pci.c +++ b/drivers/net/wireless/rt2x00/rt2400pci.c | |||
@@ -731,6 +731,17 @@ static int rt2400pci_init_registers(struct rt2x00_dev *rt2x00dev) | |||
731 | (rt2x00dev->rx->data_size / 128)); | 731 | (rt2x00dev->rx->data_size / 128)); |
732 | rt2x00pci_register_write(rt2x00dev, CSR9, reg); | 732 | rt2x00pci_register_write(rt2x00dev, CSR9, reg); |
733 | 733 | ||
734 | rt2x00pci_register_read(rt2x00dev, CSR14, ®); | ||
735 | rt2x00_set_field32(®, CSR14_TSF_COUNT, 0); | ||
736 | rt2x00_set_field32(®, CSR14_TSF_SYNC, 0); | ||
737 | rt2x00_set_field32(®, CSR14_TBCN, 0); | ||
738 | rt2x00_set_field32(®, CSR14_TCFP, 0); | ||
739 | rt2x00_set_field32(®, CSR14_TATIMW, 0); | ||
740 | rt2x00_set_field32(®, CSR14_BEACON_GEN, 0); | ||
741 | rt2x00_set_field32(®, CSR14_CFP_COUNT_PRELOAD, 0); | ||
742 | rt2x00_set_field32(®, CSR14_TBCM_PRELOAD, 0); | ||
743 | rt2x00pci_register_write(rt2x00dev, CSR14, reg); | ||
744 | |||
734 | rt2x00pci_register_write(rt2x00dev, CNT3, 0x3f080000); | 745 | rt2x00pci_register_write(rt2x00dev, CNT3, 0x3f080000); |
735 | 746 | ||
736 | rt2x00pci_register_read(rt2x00dev, ARCSR0, ®); | 747 | rt2x00pci_register_read(rt2x00dev, ARCSR0, ®); |
diff --git a/drivers/net/wireless/rt2x00/rt2500pci.c b/drivers/net/wireless/rt2x00/rt2500pci.c index a5ed54b69262..f7731fb82555 100644 --- a/drivers/net/wireless/rt2x00/rt2500pci.c +++ b/drivers/net/wireless/rt2x00/rt2500pci.c | |||
@@ -824,6 +824,17 @@ static int rt2500pci_init_registers(struct rt2x00_dev *rt2x00dev) | |||
824 | rt2x00_set_field32(®, CSR11_CW_SELECT, 0); | 824 | rt2x00_set_field32(®, CSR11_CW_SELECT, 0); |
825 | rt2x00pci_register_write(rt2x00dev, CSR11, reg); | 825 | rt2x00pci_register_write(rt2x00dev, CSR11, reg); |
826 | 826 | ||
827 | rt2x00pci_register_read(rt2x00dev, CSR14, ®); | ||
828 | rt2x00_set_field32(®, CSR14_TSF_COUNT, 0); | ||
829 | rt2x00_set_field32(®, CSR14_TSF_SYNC, 0); | ||
830 | rt2x00_set_field32(®, CSR14_TBCN, 0); | ||
831 | rt2x00_set_field32(®, CSR14_TCFP, 0); | ||
832 | rt2x00_set_field32(®, CSR14_TATIMW, 0); | ||
833 | rt2x00_set_field32(®, CSR14_BEACON_GEN, 0); | ||
834 | rt2x00_set_field32(®, CSR14_CFP_COUNT_PRELOAD, 0); | ||
835 | rt2x00_set_field32(®, CSR14_TBCM_PRELOAD, 0); | ||
836 | rt2x00pci_register_write(rt2x00dev, CSR14, reg); | ||
837 | |||
827 | rt2x00pci_register_write(rt2x00dev, CNT3, 0); | 838 | rt2x00pci_register_write(rt2x00dev, CNT3, 0); |
828 | 839 | ||
829 | rt2x00pci_register_read(rt2x00dev, TXCSR8, ®); | 840 | rt2x00pci_register_read(rt2x00dev, TXCSR8, ®); |
diff --git a/drivers/net/wireless/rt2x00/rt2500usb.c b/drivers/net/wireless/rt2x00/rt2500usb.c index 61e59c17a60a..d90512f97b39 100644 --- a/drivers/net/wireless/rt2x00/rt2500usb.c +++ b/drivers/net/wireless/rt2x00/rt2500usb.c | |||
@@ -801,6 +801,13 @@ static int rt2500usb_init_registers(struct rt2x00_dev *rt2x00dev) | |||
801 | rt2x00_set_field16(®, TXRX_CSR8_BBP_ID1_VALID, 0); | 801 | rt2x00_set_field16(®, TXRX_CSR8_BBP_ID1_VALID, 0); |
802 | rt2500usb_register_write(rt2x00dev, TXRX_CSR8, reg); | 802 | rt2500usb_register_write(rt2x00dev, TXRX_CSR8, reg); |
803 | 803 | ||
804 | rt2500usb_register_read(rt2x00dev, TXRX_CSR19, ®); | ||
805 | rt2x00_set_field16(®, TXRX_CSR19_TSF_COUNT, 0); | ||
806 | rt2x00_set_field16(®, TXRX_CSR19_TSF_SYNC, 0); | ||
807 | rt2x00_set_field16(®, TXRX_CSR19_TBCN, 0); | ||
808 | rt2x00_set_field16(®, TXRX_CSR19_BEACON_GEN, 0); | ||
809 | rt2500usb_register_write(rt2x00dev, TXRX_CSR19, reg); | ||
810 | |||
804 | rt2500usb_register_write(rt2x00dev, TXRX_CSR21, 0xe78f); | 811 | rt2500usb_register_write(rt2x00dev, TXRX_CSR21, 0xe78f); |
805 | rt2500usb_register_write(rt2x00dev, MAC_CSR9, 0xff1d); | 812 | rt2500usb_register_write(rt2x00dev, MAC_CSR9, 0xff1d); |
806 | 813 | ||
diff --git a/drivers/net/wireless/rt2x00/rt61pci.c b/drivers/net/wireless/rt2x00/rt61pci.c index 14bc7b281659..c3afb5cbe807 100644 --- a/drivers/net/wireless/rt2x00/rt61pci.c +++ b/drivers/net/wireless/rt2x00/rt61pci.c | |||
@@ -1201,6 +1201,15 @@ static int rt61pci_init_registers(struct rt2x00_dev *rt2x00dev) | |||
1201 | rt2x00_set_field32(®, TXRX_CSR8_ACK_CTS_54MBS, 42); | 1201 | rt2x00_set_field32(®, TXRX_CSR8_ACK_CTS_54MBS, 42); |
1202 | rt2x00pci_register_write(rt2x00dev, TXRX_CSR8, reg); | 1202 | rt2x00pci_register_write(rt2x00dev, TXRX_CSR8, reg); |
1203 | 1203 | ||
1204 | rt2x00pci_register_read(rt2x00dev, TXRX_CSR9, ®); | ||
1205 | rt2x00_set_field32(®, TXRX_CSR9_BEACON_INTERVAL, 0); | ||
1206 | rt2x00_set_field32(®, TXRX_CSR9_TSF_TICKING, 0); | ||
1207 | rt2x00_set_field32(®, TXRX_CSR9_TSF_SYNC, 0); | ||
1208 | rt2x00_set_field32(®, TXRX_CSR9_TBTT_ENABLE, 0); | ||
1209 | rt2x00_set_field32(®, TXRX_CSR9_BEACON_GEN, 0); | ||
1210 | rt2x00_set_field32(®, TXRX_CSR9_TIMESTAMP_COMPENSATE, 0); | ||
1211 | rt2x00pci_register_write(rt2x00dev, TXRX_CSR9, reg); | ||
1212 | |||
1204 | rt2x00pci_register_write(rt2x00dev, TXRX_CSR15, 0x0000000f); | 1213 | rt2x00pci_register_write(rt2x00dev, TXRX_CSR15, 0x0000000f); |
1205 | 1214 | ||
1206 | rt2x00pci_register_write(rt2x00dev, MAC_CSR6, 0x00000fff); | 1215 | rt2x00pci_register_write(rt2x00dev, MAC_CSR6, 0x00000fff); |
diff --git a/drivers/net/wireless/rt2x00/rt73usb.c b/drivers/net/wireless/rt2x00/rt73usb.c index 83cc0147f698..46e9e081fbf1 100644 --- a/drivers/net/wireless/rt2x00/rt73usb.c +++ b/drivers/net/wireless/rt2x00/rt73usb.c | |||
@@ -1006,6 +1006,15 @@ static int rt73usb_init_registers(struct rt2x00_dev *rt2x00dev) | |||
1006 | rt2x00_set_field32(®, TXRX_CSR8_ACK_CTS_54MBS, 42); | 1006 | rt2x00_set_field32(®, TXRX_CSR8_ACK_CTS_54MBS, 42); |
1007 | rt73usb_register_write(rt2x00dev, TXRX_CSR8, reg); | 1007 | rt73usb_register_write(rt2x00dev, TXRX_CSR8, reg); |
1008 | 1008 | ||
1009 | rt73usb_register_read(rt2x00dev, TXRX_CSR9, ®); | ||
1010 | rt2x00_set_field32(®, TXRX_CSR9_BEACON_INTERVAL, 0); | ||
1011 | rt2x00_set_field32(®, TXRX_CSR9_TSF_TICKING, 0); | ||
1012 | rt2x00_set_field32(®, TXRX_CSR9_TSF_SYNC, 0); | ||
1013 | rt2x00_set_field32(®, TXRX_CSR9_TBTT_ENABLE, 0); | ||
1014 | rt2x00_set_field32(®, TXRX_CSR9_BEACON_GEN, 0); | ||
1015 | rt2x00_set_field32(®, TXRX_CSR9_TIMESTAMP_COMPENSATE, 0); | ||
1016 | rt73usb_register_write(rt2x00dev, TXRX_CSR9, reg); | ||
1017 | |||
1009 | rt73usb_register_write(rt2x00dev, TXRX_CSR15, 0x0000000f); | 1018 | rt73usb_register_write(rt2x00dev, TXRX_CSR15, 0x0000000f); |
1010 | 1019 | ||
1011 | rt73usb_register_read(rt2x00dev, MAC_CSR6, ®); | 1020 | rt73usb_register_read(rt2x00dev, MAC_CSR6, ®); |
diff --git a/drivers/net/wireless/zd1211rw/zd_mac.c b/drivers/net/wireless/zd1211rw/zd_mac.c index 418606ac1c3b..694e95d35fd4 100644 --- a/drivers/net/wireless/zd1211rw/zd_mac.c +++ b/drivers/net/wireless/zd1211rw/zd_mac.c | |||
@@ -765,6 +765,7 @@ static void zd_op_remove_interface(struct ieee80211_hw *hw, | |||
765 | { | 765 | { |
766 | struct zd_mac *mac = zd_hw_mac(hw); | 766 | struct zd_mac *mac = zd_hw_mac(hw); |
767 | mac->type = IEEE80211_IF_TYPE_INVALID; | 767 | mac->type = IEEE80211_IF_TYPE_INVALID; |
768 | zd_set_beacon_interval(&mac->chip, 0); | ||
768 | zd_write_mac_addr(&mac->chip, NULL); | 769 | zd_write_mac_addr(&mac->chip, NULL); |
769 | } | 770 | } |
770 | 771 | ||
diff --git a/drivers/net/wireless/zd1211rw/zd_usb.c b/drivers/net/wireless/zd1211rw/zd_usb.c index 8941f5eb96c2..6cdad9764604 100644 --- a/drivers/net/wireless/zd1211rw/zd_usb.c +++ b/drivers/net/wireless/zd1211rw/zd_usb.c | |||
@@ -64,6 +64,7 @@ static struct usb_device_id usb_ids[] = { | |||
64 | { USB_DEVICE(0x079b, 0x0062), .driver_info = DEVICE_ZD1211B }, | 64 | { USB_DEVICE(0x079b, 0x0062), .driver_info = DEVICE_ZD1211B }, |
65 | { USB_DEVICE(0x1582, 0x6003), .driver_info = DEVICE_ZD1211B }, | 65 | { USB_DEVICE(0x1582, 0x6003), .driver_info = DEVICE_ZD1211B }, |
66 | { USB_DEVICE(0x050d, 0x705c), .driver_info = DEVICE_ZD1211B }, | 66 | { USB_DEVICE(0x050d, 0x705c), .driver_info = DEVICE_ZD1211B }, |
67 | { USB_DEVICE(0x083a, 0xe506), .driver_info = DEVICE_ZD1211B }, | ||
67 | { USB_DEVICE(0x083a, 0x4505), .driver_info = DEVICE_ZD1211B }, | 68 | { USB_DEVICE(0x083a, 0x4505), .driver_info = DEVICE_ZD1211B }, |
68 | { USB_DEVICE(0x0471, 0x1236), .driver_info = DEVICE_ZD1211B }, | 69 | { USB_DEVICE(0x0471, 0x1236), .driver_info = DEVICE_ZD1211B }, |
69 | { USB_DEVICE(0x13b1, 0x0024), .driver_info = DEVICE_ZD1211B }, | 70 | { USB_DEVICE(0x13b1, 0x0024), .driver_info = DEVICE_ZD1211B }, |
diff --git a/drivers/rapidio/rio-driver.c b/drivers/rapidio/rio-driver.c index 3ce9f3defc12..956d3e79f6aa 100644 --- a/drivers/rapidio/rio-driver.c +++ b/drivers/rapidio/rio-driver.c | |||
@@ -101,8 +101,8 @@ static int rio_device_probe(struct device *dev) | |||
101 | if (error >= 0) { | 101 | if (error >= 0) { |
102 | rdev->driver = rdrv; | 102 | rdev->driver = rdrv; |
103 | error = 0; | 103 | error = 0; |
104 | } else | ||
104 | rio_dev_put(rdev); | 105 | rio_dev_put(rdev); |
105 | } | ||
106 | } | 106 | } |
107 | return error; | 107 | return error; |
108 | } | 108 | } |
diff --git a/drivers/rtc/rtc-fm3130.c b/drivers/rtc/rtc-fm3130.c index 11644c8fca82..abfdfcbaa059 100644 --- a/drivers/rtc/rtc-fm3130.c +++ b/drivers/rtc/rtc-fm3130.c | |||
@@ -55,7 +55,7 @@ struct fm3130 { | |||
55 | int alarm; | 55 | int alarm; |
56 | }; | 56 | }; |
57 | static const struct i2c_device_id fm3130_id[] = { | 57 | static const struct i2c_device_id fm3130_id[] = { |
58 | { "fm3130-rtc", 0 }, | 58 | { "fm3130", 0 }, |
59 | { } | 59 | { } |
60 | }; | 60 | }; |
61 | MODULE_DEVICE_TABLE(i2c, fm3130_id); | 61 | MODULE_DEVICE_TABLE(i2c, fm3130_id); |
diff --git a/drivers/rtc/rtc-pcf8563.c b/drivers/rtc/rtc-pcf8563.c index 0fc4c3630780..748a502a6355 100644 --- a/drivers/rtc/rtc-pcf8563.c +++ b/drivers/rtc/rtc-pcf8563.c | |||
@@ -302,6 +302,7 @@ static int pcf8563_remove(struct i2c_client *client) | |||
302 | 302 | ||
303 | static const struct i2c_device_id pcf8563_id[] = { | 303 | static const struct i2c_device_id pcf8563_id[] = { |
304 | { "pcf8563", 0 }, | 304 | { "pcf8563", 0 }, |
305 | { "rtc8564", 0 }, | ||
305 | { } | 306 | { } |
306 | }; | 307 | }; |
307 | MODULE_DEVICE_TABLE(i2c, pcf8563_id); | 308 | MODULE_DEVICE_TABLE(i2c, pcf8563_id); |
diff --git a/drivers/scsi/ipr.c b/drivers/scsi/ipr.c index 999e91ea7451..e7a3a6554425 100644 --- a/drivers/scsi/ipr.c +++ b/drivers/scsi/ipr.c | |||
@@ -71,6 +71,7 @@ | |||
71 | #include <linux/module.h> | 71 | #include <linux/module.h> |
72 | #include <linux/moduleparam.h> | 72 | #include <linux/moduleparam.h> |
73 | #include <linux/libata.h> | 73 | #include <linux/libata.h> |
74 | #include <linux/hdreg.h> | ||
74 | #include <asm/io.h> | 75 | #include <asm/io.h> |
75 | #include <asm/irq.h> | 76 | #include <asm/irq.h> |
76 | #include <asm/processor.h> | 77 | #include <asm/processor.h> |
@@ -4913,8 +4914,11 @@ static int ipr_ioctl(struct scsi_device *sdev, int cmd, void __user *arg) | |||
4913 | struct ipr_resource_entry *res; | 4914 | struct ipr_resource_entry *res; |
4914 | 4915 | ||
4915 | res = (struct ipr_resource_entry *)sdev->hostdata; | 4916 | res = (struct ipr_resource_entry *)sdev->hostdata; |
4916 | if (res && ipr_is_gata(res)) | 4917 | if (res && ipr_is_gata(res)) { |
4918 | if (cmd == HDIO_GET_IDENTITY) | ||
4919 | return -ENOTTY; | ||
4917 | return ata_scsi_ioctl(sdev, cmd, arg); | 4920 | return ata_scsi_ioctl(sdev, cmd, arg); |
4921 | } | ||
4918 | 4922 | ||
4919 | return -EINVAL; | 4923 | return -EINVAL; |
4920 | } | 4924 | } |
diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c index a82d2fe80fb5..cbf55d59a54c 100644 --- a/drivers/scsi/scsi_lib.c +++ b/drivers/scsi/scsi_lib.c | |||
@@ -207,6 +207,15 @@ int scsi_execute(struct scsi_device *sdev, const unsigned char *cmd, | |||
207 | */ | 207 | */ |
208 | blk_execute_rq(req->q, NULL, req, 1); | 208 | blk_execute_rq(req->q, NULL, req, 1); |
209 | 209 | ||
210 | /* | ||
211 | * Some devices (USB mass-storage in particular) may transfer | ||
212 | * garbage data together with a residue indicating that the data | ||
213 | * is invalid. Prevent the garbage from being misinterpreted | ||
214 | * and prevent security leaks by zeroing out the excess data. | ||
215 | */ | ||
216 | if (unlikely(req->data_len > 0 && req->data_len <= bufflen)) | ||
217 | memset(buffer + (bufflen - req->data_len), 0, req->data_len); | ||
218 | |||
210 | ret = req->errors; | 219 | ret = req->errors; |
211 | out: | 220 | out: |
212 | blk_put_request(req); | 221 | blk_put_request(req); |
diff --git a/drivers/serial/8250.c b/drivers/serial/8250.c index 1bc00b721e9d..be95e55b228b 100644 --- a/drivers/serial/8250.c +++ b/drivers/serial/8250.c | |||
@@ -2623,6 +2623,9 @@ static struct console serial8250_console = { | |||
2623 | 2623 | ||
2624 | static int __init serial8250_console_init(void) | 2624 | static int __init serial8250_console_init(void) |
2625 | { | 2625 | { |
2626 | if (nr_uarts > UART_NR) | ||
2627 | nr_uarts = UART_NR; | ||
2628 | |||
2626 | serial8250_isa_init_ports(); | 2629 | serial8250_isa_init_ports(); |
2627 | register_console(&serial8250_console); | 2630 | register_console(&serial8250_console); |
2628 | return 0; | 2631 | return 0; |
diff --git a/drivers/ssb/driver_pcicore.c b/drivers/ssb/driver_pcicore.c index d28c53868093..538c570df337 100644 --- a/drivers/ssb/driver_pcicore.c +++ b/drivers/ssb/driver_pcicore.c | |||
@@ -537,6 +537,13 @@ int ssb_pcicore_dev_irqvecs_enable(struct ssb_pcicore *pc, | |||
537 | int err = 0; | 537 | int err = 0; |
538 | u32 tmp; | 538 | u32 tmp; |
539 | 539 | ||
540 | if (dev->bus->bustype != SSB_BUSTYPE_PCI) { | ||
541 | /* This SSB device is not on a PCI host-bus. So the IRQs are | ||
542 | * not routed through the PCI core. | ||
543 | * So we must not enable routing through the PCI core. */ | ||
544 | goto out; | ||
545 | } | ||
546 | |||
540 | if (!pdev) | 547 | if (!pdev) |
541 | goto out; | 548 | goto out; |
542 | bus = pdev->bus; | 549 | bus = pdev->bus; |
diff --git a/drivers/usb/host/ohci-au1xxx.c b/drivers/usb/host/ohci-au1xxx.c index f90fe0c7373f..68c17f5ea8ea 100644 --- a/drivers/usb/host/ohci-au1xxx.c +++ b/drivers/usb/host/ohci-au1xxx.c | |||
@@ -8,7 +8,7 @@ | |||
8 | * Bus Glue for AMD Alchemy Au1xxx | 8 | * Bus Glue for AMD Alchemy Au1xxx |
9 | * | 9 | * |
10 | * Written by Christopher Hoover <ch@hpl.hp.com> | 10 | * Written by Christopher Hoover <ch@hpl.hp.com> |
11 | * Based on fragments of previous driver by Rusell King et al. | 11 | * Based on fragments of previous driver by Russell King et al. |
12 | * | 12 | * |
13 | * Modified for LH7A404 from ohci-sa1111.c | 13 | * Modified for LH7A404 from ohci-sa1111.c |
14 | * by Durgesh Pattamatta <pattamattad@sharpsec.com> | 14 | * by Durgesh Pattamatta <pattamattad@sharpsec.com> |
diff --git a/drivers/usb/host/ohci-lh7a404.c b/drivers/usb/host/ohci-lh7a404.c index 13c12ed22252..1ef5d482c145 100644 --- a/drivers/usb/host/ohci-lh7a404.c +++ b/drivers/usb/host/ohci-lh7a404.c | |||
@@ -8,7 +8,7 @@ | |||
8 | * Bus Glue for Sharp LH7A404 | 8 | * Bus Glue for Sharp LH7A404 |
9 | * | 9 | * |
10 | * Written by Christopher Hoover <ch@hpl.hp.com> | 10 | * Written by Christopher Hoover <ch@hpl.hp.com> |
11 | * Based on fragments of previous driver by Rusell King et al. | 11 | * Based on fragments of previous driver by Russell King et al. |
12 | * | 12 | * |
13 | * Modified for LH7A404 from ohci-sa1111.c | 13 | * Modified for LH7A404 from ohci-sa1111.c |
14 | * by Durgesh Pattamatta <pattamattad@sharpsec.com> | 14 | * by Durgesh Pattamatta <pattamattad@sharpsec.com> |
diff --git a/drivers/usb/host/ohci-s3c2410.c b/drivers/usb/host/ohci-s3c2410.c index ead4772f0f27..3c7a740cfe0c 100644 --- a/drivers/usb/host/ohci-s3c2410.c +++ b/drivers/usb/host/ohci-s3c2410.c | |||
@@ -8,7 +8,7 @@ | |||
8 | * USB Bus Glue for Samsung S3C2410 | 8 | * USB Bus Glue for Samsung S3C2410 |
9 | * | 9 | * |
10 | * Written by Christopher Hoover <ch@hpl.hp.com> | 10 | * Written by Christopher Hoover <ch@hpl.hp.com> |
11 | * Based on fragments of previous driver by Rusell King et al. | 11 | * Based on fragments of previous driver by Russell King et al. |
12 | * | 12 | * |
13 | * Modified for S3C2410 from ohci-sa1111.c, ohci-omap.c and ohci-lh7a40.c | 13 | * Modified for S3C2410 from ohci-sa1111.c, ohci-omap.c and ohci-lh7a40.c |
14 | * by Ben Dooks, <ben@simtec.co.uk> | 14 | * by Ben Dooks, <ben@simtec.co.uk> |
diff --git a/drivers/usb/host/ohci-sa1111.c b/drivers/usb/host/ohci-sa1111.c index 0f48f2d99226..2e9dceb9bb99 100644 --- a/drivers/usb/host/ohci-sa1111.c +++ b/drivers/usb/host/ohci-sa1111.c | |||
@@ -8,7 +8,7 @@ | |||
8 | * SA1111 Bus Glue | 8 | * SA1111 Bus Glue |
9 | * | 9 | * |
10 | * Written by Christopher Hoover <ch@hpl.hp.com> | 10 | * Written by Christopher Hoover <ch@hpl.hp.com> |
11 | * Based on fragments of previous driver by Rusell King et al. | 11 | * Based on fragments of previous driver by Russell King et al. |
12 | * | 12 | * |
13 | * This file is licenced under the GPL. | 13 | * This file is licenced under the GPL. |
14 | */ | 14 | */ |
diff --git a/drivers/video/fb_defio.c b/drivers/video/fb_defio.c index 24843fdd5395..59df132cc375 100644 --- a/drivers/video/fb_defio.c +++ b/drivers/video/fb_defio.c | |||
@@ -74,6 +74,7 @@ static int fb_deferred_io_mkwrite(struct vm_area_struct *vma, | |||
74 | { | 74 | { |
75 | struct fb_info *info = vma->vm_private_data; | 75 | struct fb_info *info = vma->vm_private_data; |
76 | struct fb_deferred_io *fbdefio = info->fbdefio; | 76 | struct fb_deferred_io *fbdefio = info->fbdefio; |
77 | struct page *cur; | ||
77 | 78 | ||
78 | /* this is a callback we get when userspace first tries to | 79 | /* this is a callback we get when userspace first tries to |
79 | write to the page. we schedule a workqueue. that workqueue | 80 | write to the page. we schedule a workqueue. that workqueue |
@@ -83,7 +84,24 @@ static int fb_deferred_io_mkwrite(struct vm_area_struct *vma, | |||
83 | 84 | ||
84 | /* protect against the workqueue changing the page list */ | 85 | /* protect against the workqueue changing the page list */ |
85 | mutex_lock(&fbdefio->lock); | 86 | mutex_lock(&fbdefio->lock); |
86 | list_add(&page->lru, &fbdefio->pagelist); | 87 | |
88 | /* we loop through the pagelist before adding in order | ||
89 | to keep the pagelist sorted */ | ||
90 | list_for_each_entry(cur, &fbdefio->pagelist, lru) { | ||
91 | /* this check is to catch the case where a new | ||
92 | process could start writing to the same page | ||
93 | through a new pte. this new access can cause the | ||
94 | mkwrite even when the original ps's pte is marked | ||
95 | writable */ | ||
96 | if (unlikely(cur == page)) | ||
97 | goto page_already_added; | ||
98 | else if (cur->index > page->index) | ||
99 | break; | ||
100 | } | ||
101 | |||
102 | list_add_tail(&page->lru, &cur->lru); | ||
103 | |||
104 | page_already_added: | ||
87 | mutex_unlock(&fbdefio->lock); | 105 | mutex_unlock(&fbdefio->lock); |
88 | 106 | ||
89 | /* come back after delay to process the deferred IO */ | 107 | /* come back after delay to process the deferred IO */ |
diff --git a/fs/cifs/cifsacl.c b/fs/cifs/cifsacl.c index 34902cff5400..0e9fc2ba90ee 100644 --- a/fs/cifs/cifsacl.c +++ b/fs/cifs/cifsacl.c | |||
@@ -34,11 +34,11 @@ | |||
34 | static struct cifs_wksid wksidarr[NUM_WK_SIDS] = { | 34 | static struct cifs_wksid wksidarr[NUM_WK_SIDS] = { |
35 | {{1, 0, {0, 0, 0, 0, 0, 0}, {0, 0, 0, 0, 0} }, "null user"}, | 35 | {{1, 0, {0, 0, 0, 0, 0, 0}, {0, 0, 0, 0, 0} }, "null user"}, |
36 | {{1, 1, {0, 0, 0, 0, 0, 1}, {0, 0, 0, 0, 0} }, "nobody"}, | 36 | {{1, 1, {0, 0, 0, 0, 0, 1}, {0, 0, 0, 0, 0} }, "nobody"}, |
37 | {{1, 1, {0, 0, 0, 0, 0, 5}, {cpu_to_le32(11), 0, 0, 0, 0} }, "net-users"}, | 37 | {{1, 1, {0, 0, 0, 0, 0, 5}, {__constant_cpu_to_le32(11), 0, 0, 0, 0} }, "net-users"}, |
38 | {{1, 1, {0, 0, 0, 0, 0, 5}, {cpu_to_le32(18), 0, 0, 0, 0} }, "sys"}, | 38 | {{1, 1, {0, 0, 0, 0, 0, 5}, {__constant_cpu_to_le32(18), 0, 0, 0, 0} }, "sys"}, |
39 | {{1, 2, {0, 0, 0, 0, 0, 5}, {cpu_to_le32(32), cpu_to_le32(544), 0, 0, 0} }, "root"}, | 39 | {{1, 2, {0, 0, 0, 0, 0, 5}, {__constant_cpu_to_le32(32), __constant_cpu_to_le32(544), 0, 0, 0} }, "root"}, |
40 | {{1, 2, {0, 0, 0, 0, 0, 5}, {cpu_to_le32(32), cpu_to_le32(545), 0, 0, 0} }, "users"}, | 40 | {{1, 2, {0, 0, 0, 0, 0, 5}, {__constant_cpu_to_le32(32), __constant_cpu_to_le32(545), 0, 0, 0} }, "users"}, |
41 | {{1, 2, {0, 0, 0, 0, 0, 5}, {cpu_to_le32(32), cpu_to_le32(546), 0, 0, 0} }, "guest"} } | 41 | {{1, 2, {0, 0, 0, 0, 0, 5}, {__constant_cpu_to_le32(32), __constant_cpu_to_le32(546), 0, 0, 0} }, "guest"} } |
42 | ; | 42 | ; |
43 | 43 | ||
44 | 44 | ||
diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c index 722be543ceec..2e904bd111c8 100644 --- a/fs/cifs/inode.c +++ b/fs/cifs/inode.c | |||
@@ -219,15 +219,15 @@ int cifs_get_inode_info_unix(struct inode **pinode, | |||
219 | rc = CIFSSMBUnixQPathInfo(xid, pTcon, full_path, &find_data, | 219 | rc = CIFSSMBUnixQPathInfo(xid, pTcon, full_path, &find_data, |
220 | cifs_sb->local_nls, cifs_sb->mnt_cifs_flags & | 220 | cifs_sb->local_nls, cifs_sb->mnt_cifs_flags & |
221 | CIFS_MOUNT_MAP_SPECIAL_CHR); | 221 | CIFS_MOUNT_MAP_SPECIAL_CHR); |
222 | if (rc) { | 222 | if (rc == -EREMOTE && !is_dfs_referral) { |
223 | if (rc == -EREMOTE && !is_dfs_referral) { | 223 | is_dfs_referral = true; |
224 | is_dfs_referral = true; | 224 | cFYI(DBG2, ("DFS ref")); |
225 | cFYI(DBG2, ("DFS ref")); | 225 | /* for DFS, server does not give us real inode data */ |
226 | /* for DFS, server does not give us real inode data */ | 226 | fill_fake_finddataunix(&find_data, sb); |
227 | fill_fake_finddataunix(&find_data, sb); | 227 | rc = 0; |
228 | rc = 0; | 228 | } else if (rc) |
229 | } | 229 | goto cgiiu_exit; |
230 | } | 230 | |
231 | num_of_bytes = le64_to_cpu(find_data.NumOfBytes); | 231 | num_of_bytes = le64_to_cpu(find_data.NumOfBytes); |
232 | end_of_file = le64_to_cpu(find_data.EndOfFile); | 232 | end_of_file = le64_to_cpu(find_data.EndOfFile); |
233 | 233 | ||
@@ -236,7 +236,7 @@ int cifs_get_inode_info_unix(struct inode **pinode, | |||
236 | *pinode = new_inode(sb); | 236 | *pinode = new_inode(sb); |
237 | if (*pinode == NULL) { | 237 | if (*pinode == NULL) { |
238 | rc = -ENOMEM; | 238 | rc = -ENOMEM; |
239 | goto cgiiu_exit; | 239 | goto cgiiu_exit; |
240 | } | 240 | } |
241 | /* Is an i_ino of zero legal? */ | 241 | /* Is an i_ino of zero legal? */ |
242 | /* note ino incremented to unique num in new_inode */ | 242 | /* note ino incremented to unique num in new_inode */ |
@@ -610,7 +610,7 @@ int setup_arg_pages(struct linux_binprm *bprm, | |||
610 | bprm->exec -= stack_shift; | 610 | bprm->exec -= stack_shift; |
611 | 611 | ||
612 | down_write(&mm->mmap_sem); | 612 | down_write(&mm->mmap_sem); |
613 | vm_flags = vma->vm_flags; | 613 | vm_flags = VM_STACK_FLAGS; |
614 | 614 | ||
615 | /* | 615 | /* |
616 | * Adjust stack execute permissions; explicitly enable for | 616 | * Adjust stack execute permissions; explicitly enable for |
diff --git a/fs/ocfs2/dlmglue.c b/fs/ocfs2/dlmglue.c index 394d25a131a5..80e20d9f2780 100644 --- a/fs/ocfs2/dlmglue.c +++ b/fs/ocfs2/dlmglue.c | |||
@@ -1554,8 +1554,8 @@ out: | |||
1554 | */ | 1554 | */ |
1555 | int ocfs2_file_lock(struct file *file, int ex, int trylock) | 1555 | int ocfs2_file_lock(struct file *file, int ex, int trylock) |
1556 | { | 1556 | { |
1557 | int ret, level = ex ? LKM_EXMODE : LKM_PRMODE; | 1557 | int ret, level = ex ? DLM_LOCK_EX : DLM_LOCK_PR; |
1558 | unsigned int lkm_flags = trylock ? LKM_NOQUEUE : 0; | 1558 | unsigned int lkm_flags = trylock ? DLM_LKF_NOQUEUE : 0; |
1559 | unsigned long flags; | 1559 | unsigned long flags; |
1560 | struct ocfs2_file_private *fp = file->private_data; | 1560 | struct ocfs2_file_private *fp = file->private_data; |
1561 | struct ocfs2_lock_res *lockres = &fp->fp_flock; | 1561 | struct ocfs2_lock_res *lockres = &fp->fp_flock; |
@@ -1582,7 +1582,7 @@ int ocfs2_file_lock(struct file *file, int ex, int trylock) | |||
1582 | * Get the lock at NLMODE to start - that way we | 1582 | * Get the lock at NLMODE to start - that way we |
1583 | * can cancel the upconvert request if need be. | 1583 | * can cancel the upconvert request if need be. |
1584 | */ | 1584 | */ |
1585 | ret = ocfs2_lock_create(osb, lockres, LKM_NLMODE, 0); | 1585 | ret = ocfs2_lock_create(osb, lockres, DLM_LOCK_NL, 0); |
1586 | if (ret < 0) { | 1586 | if (ret < 0) { |
1587 | mlog_errno(ret); | 1587 | mlog_errno(ret); |
1588 | goto out; | 1588 | goto out; |
@@ -1597,7 +1597,7 @@ int ocfs2_file_lock(struct file *file, int ex, int trylock) | |||
1597 | } | 1597 | } |
1598 | 1598 | ||
1599 | lockres->l_action = OCFS2_AST_CONVERT; | 1599 | lockres->l_action = OCFS2_AST_CONVERT; |
1600 | lkm_flags |= LKM_CONVERT; | 1600 | lkm_flags |= DLM_LKF_CONVERT; |
1601 | lockres->l_requested = level; | 1601 | lockres->l_requested = level; |
1602 | lockres_or_flags(lockres, OCFS2_LOCK_BUSY); | 1602 | lockres_or_flags(lockres, OCFS2_LOCK_BUSY); |
1603 | 1603 | ||
@@ -1664,7 +1664,7 @@ void ocfs2_file_unlock(struct file *file) | |||
1664 | if (!(lockres->l_flags & OCFS2_LOCK_ATTACHED)) | 1664 | if (!(lockres->l_flags & OCFS2_LOCK_ATTACHED)) |
1665 | return; | 1665 | return; |
1666 | 1666 | ||
1667 | if (lockres->l_level == LKM_NLMODE) | 1667 | if (lockres->l_level == DLM_LOCK_NL) |
1668 | return; | 1668 | return; |
1669 | 1669 | ||
1670 | mlog(0, "Unlock: \"%s\" flags: 0x%lx, level: %d, act: %d\n", | 1670 | mlog(0, "Unlock: \"%s\" flags: 0x%lx, level: %d, act: %d\n", |
@@ -1678,11 +1678,11 @@ void ocfs2_file_unlock(struct file *file) | |||
1678 | lockres_or_flags(lockres, OCFS2_LOCK_BLOCKED); | 1678 | lockres_or_flags(lockres, OCFS2_LOCK_BLOCKED); |
1679 | lockres->l_blocking = DLM_LOCK_EX; | 1679 | lockres->l_blocking = DLM_LOCK_EX; |
1680 | 1680 | ||
1681 | gen = ocfs2_prepare_downconvert(lockres, LKM_NLMODE); | 1681 | gen = ocfs2_prepare_downconvert(lockres, DLM_LOCK_NL); |
1682 | lockres_add_mask_waiter(lockres, &mw, OCFS2_LOCK_BUSY, 0); | 1682 | lockres_add_mask_waiter(lockres, &mw, OCFS2_LOCK_BUSY, 0); |
1683 | spin_unlock_irqrestore(&lockres->l_lock, flags); | 1683 | spin_unlock_irqrestore(&lockres->l_lock, flags); |
1684 | 1684 | ||
1685 | ret = ocfs2_downconvert_lock(osb, lockres, LKM_NLMODE, 0, gen); | 1685 | ret = ocfs2_downconvert_lock(osb, lockres, DLM_LOCK_NL, 0, gen); |
1686 | if (ret) { | 1686 | if (ret) { |
1687 | mlog_errno(ret); | 1687 | mlog_errno(ret); |
1688 | return; | 1688 | return; |
diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c index afaee301b0ee..ad3d26ddfe31 100644 --- a/fs/xfs/xfs_log.c +++ b/fs/xfs/xfs_log.c | |||
@@ -2427,13 +2427,20 @@ restart: | |||
2427 | if (iclog->ic_size - iclog->ic_offset < 2*sizeof(xlog_op_header_t)) { | 2427 | if (iclog->ic_size - iclog->ic_offset < 2*sizeof(xlog_op_header_t)) { |
2428 | xlog_state_switch_iclogs(log, iclog, iclog->ic_size); | 2428 | xlog_state_switch_iclogs(log, iclog, iclog->ic_size); |
2429 | 2429 | ||
2430 | /* If I'm the only one writing to this iclog, sync it to disk */ | 2430 | /* |
2431 | if (atomic_read(&iclog->ic_refcnt) == 1) { | 2431 | * If I'm the only one writing to this iclog, sync it to disk. |
2432 | * We need to do an atomic compare and decrement here to avoid | ||
2433 | * racing with concurrent atomic_dec_and_lock() calls in | ||
2434 | * xlog_state_release_iclog() when there is more than one | ||
2435 | * reference to the iclog. | ||
2436 | */ | ||
2437 | if (!atomic_add_unless(&iclog->ic_refcnt, -1, 1)) { | ||
2438 | /* we are the only one */ | ||
2432 | spin_unlock(&log->l_icloglock); | 2439 | spin_unlock(&log->l_icloglock); |
2433 | if ((error = xlog_state_release_iclog(log, iclog))) | 2440 | error = xlog_state_release_iclog(log, iclog); |
2441 | if (error) | ||
2434 | return error; | 2442 | return error; |
2435 | } else { | 2443 | } else { |
2436 | atomic_dec(&iclog->ic_refcnt); | ||
2437 | spin_unlock(&log->l_icloglock); | 2444 | spin_unlock(&log->l_icloglock); |
2438 | } | 2445 | } |
2439 | goto restart; | 2446 | goto restart; |
diff --git a/include/asm-avr32/setup.h b/include/asm-avr32/setup.h index ea3070ff13a5..ff5b7cf6be4d 100644 --- a/include/asm-avr32/setup.h +++ b/include/asm-avr32/setup.h | |||
@@ -2,7 +2,7 @@ | |||
2 | * Copyright (C) 2004-2006 Atmel Corporation | 2 | * Copyright (C) 2004-2006 Atmel Corporation |
3 | * | 3 | * |
4 | * Based on linux/include/asm-arm/setup.h | 4 | * Based on linux/include/asm-arm/setup.h |
5 | * Copyright (C) 1997-1999 Russel King | 5 | * Copyright (C) 1997-1999 Russell King |
6 | * | 6 | * |
7 | * This program is free software; you can redistribute it and/or modify | 7 | * This program is free software; you can redistribute it and/or modify |
8 | * it under the terms of the GNU General Public License version 2 as | 8 | * it under the terms of the GNU General Public License version 2 as |
diff --git a/include/asm-frv/system.h b/include/asm-frv/system.h index d3a12a9079f7..7742ec000cc4 100644 --- a/include/asm-frv/system.h +++ b/include/asm-frv/system.h | |||
@@ -87,7 +87,7 @@ do { \ | |||
87 | } while(0) | 87 | } while(0) |
88 | 88 | ||
89 | #define irqs_disabled() \ | 89 | #define irqs_disabled() \ |
90 | ({unsigned long flags; local_save_flags(flags); flags; }) | 90 | ({unsigned long flags; local_save_flags(flags); !!flags; }) |
91 | 91 | ||
92 | #define local_irq_save(flags) \ | 92 | #define local_irq_save(flags) \ |
93 | do { \ | 93 | do { \ |
diff --git a/include/asm-x86/desc.h b/include/asm-x86/desc.h index 268a012bcd79..28bddbcb38be 100644 --- a/include/asm-x86/desc.h +++ b/include/asm-x86/desc.h | |||
@@ -192,8 +192,8 @@ static inline void native_set_ldt(const void *addr, unsigned int entries) | |||
192 | unsigned cpu = smp_processor_id(); | 192 | unsigned cpu = smp_processor_id(); |
193 | ldt_desc ldt; | 193 | ldt_desc ldt; |
194 | 194 | ||
195 | set_tssldt_descriptor(&ldt, (unsigned long)addr, | 195 | set_tssldt_descriptor(&ldt, (unsigned long)addr, DESC_LDT, |
196 | DESC_LDT, entries * sizeof(ldt) - 1); | 196 | entries * LDT_ENTRY_SIZE - 1); |
197 | write_gdt_entry(get_cpu_gdt_table(cpu), GDT_ENTRY_LDT, | 197 | write_gdt_entry(get_cpu_gdt_table(cpu), GDT_ENTRY_LDT, |
198 | &ldt, DESC_LDT); | 198 | &ldt, DESC_LDT); |
199 | asm volatile("lldt %w0"::"q" (GDT_ENTRY_LDT*8)); | 199 | asm volatile("lldt %w0"::"q" (GDT_ENTRY_LDT*8)); |
diff --git a/include/linux/sched.h b/include/linux/sched.h index 33a8f42041fa..f6cd60f2de63 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h | |||
@@ -134,7 +134,6 @@ extern unsigned long nr_running(void); | |||
134 | extern unsigned long nr_uninterruptible(void); | 134 | extern unsigned long nr_uninterruptible(void); |
135 | extern unsigned long nr_active(void); | 135 | extern unsigned long nr_active(void); |
136 | extern unsigned long nr_iowait(void); | 136 | extern unsigned long nr_iowait(void); |
137 | extern unsigned long weighted_cpuload(const int cpu); | ||
138 | 137 | ||
139 | struct seq_file; | 138 | struct seq_file; |
140 | struct cfs_rq; | 139 | struct cfs_rq; |
@@ -784,6 +783,8 @@ struct sched_domain { | |||
784 | unsigned int balance_interval; /* initialise to 1. units in ms. */ | 783 | unsigned int balance_interval; /* initialise to 1. units in ms. */ |
785 | unsigned int nr_balance_failed; /* initialise to 0 */ | 784 | unsigned int nr_balance_failed; /* initialise to 0 */ |
786 | 785 | ||
786 | u64 last_update; | ||
787 | |||
787 | #ifdef CONFIG_SCHEDSTATS | 788 | #ifdef CONFIG_SCHEDSTATS |
788 | /* load_balance() stats */ | 789 | /* load_balance() stats */ |
789 | unsigned int lb_count[CPU_MAX_IDLE_TYPES]; | 790 | unsigned int lb_count[CPU_MAX_IDLE_TYPES]; |
@@ -823,23 +824,6 @@ extern int arch_reinit_sched_domains(void); | |||
823 | 824 | ||
824 | #endif /* CONFIG_SMP */ | 825 | #endif /* CONFIG_SMP */ |
825 | 826 | ||
826 | /* | ||
827 | * A runqueue laden with a single nice 0 task scores a weighted_cpuload of | ||
828 | * SCHED_LOAD_SCALE. This function returns 1 if any cpu is laden with a | ||
829 | * task of nice 0 or enough lower priority tasks to bring up the | ||
830 | * weighted_cpuload | ||
831 | */ | ||
832 | static inline int above_background_load(void) | ||
833 | { | ||
834 | unsigned long cpu; | ||
835 | |||
836 | for_each_online_cpu(cpu) { | ||
837 | if (weighted_cpuload(cpu) >= SCHED_LOAD_SCALE) | ||
838 | return 1; | ||
839 | } | ||
840 | return 0; | ||
841 | } | ||
842 | |||
843 | struct io_context; /* See blkdev.h */ | 827 | struct io_context; /* See blkdev.h */ |
844 | #define NGROUPS_SMALL 32 | 828 | #define NGROUPS_SMALL 32 |
845 | #define NGROUPS_PER_BLOCK ((unsigned int)(PAGE_SIZE / sizeof(gid_t))) | 829 | #define NGROUPS_PER_BLOCK ((unsigned int)(PAGE_SIZE / sizeof(gid_t))) |
@@ -921,8 +905,8 @@ struct sched_class { | |||
921 | void (*set_cpus_allowed)(struct task_struct *p, | 905 | void (*set_cpus_allowed)(struct task_struct *p, |
922 | const cpumask_t *newmask); | 906 | const cpumask_t *newmask); |
923 | 907 | ||
924 | void (*join_domain)(struct rq *rq); | 908 | void (*rq_online)(struct rq *rq); |
925 | void (*leave_domain)(struct rq *rq); | 909 | void (*rq_offline)(struct rq *rq); |
926 | 910 | ||
927 | void (*switched_from) (struct rq *this_rq, struct task_struct *task, | 911 | void (*switched_from) (struct rq *this_rq, struct task_struct *task, |
928 | int running); | 912 | int running); |
@@ -1039,6 +1023,7 @@ struct task_struct { | |||
1039 | #endif | 1023 | #endif |
1040 | 1024 | ||
1041 | int prio, static_prio, normal_prio; | 1025 | int prio, static_prio, normal_prio; |
1026 | unsigned int rt_priority; | ||
1042 | const struct sched_class *sched_class; | 1027 | const struct sched_class *sched_class; |
1043 | struct sched_entity se; | 1028 | struct sched_entity se; |
1044 | struct sched_rt_entity rt; | 1029 | struct sched_rt_entity rt; |
@@ -1122,7 +1107,6 @@ struct task_struct { | |||
1122 | int __user *set_child_tid; /* CLONE_CHILD_SETTID */ | 1107 | int __user *set_child_tid; /* CLONE_CHILD_SETTID */ |
1123 | int __user *clear_child_tid; /* CLONE_CHILD_CLEARTID */ | 1108 | int __user *clear_child_tid; /* CLONE_CHILD_CLEARTID */ |
1124 | 1109 | ||
1125 | unsigned int rt_priority; | ||
1126 | cputime_t utime, stime, utimescaled, stimescaled; | 1110 | cputime_t utime, stime, utimescaled, stimescaled; |
1127 | cputime_t gtime; | 1111 | cputime_t gtime; |
1128 | cputime_t prev_utime, prev_stime; | 1112 | cputime_t prev_utime, prev_stime; |
@@ -1141,12 +1125,12 @@ struct task_struct { | |||
1141 | gid_t gid,egid,sgid,fsgid; | 1125 | gid_t gid,egid,sgid,fsgid; |
1142 | struct group_info *group_info; | 1126 | struct group_info *group_info; |
1143 | kernel_cap_t cap_effective, cap_inheritable, cap_permitted, cap_bset; | 1127 | kernel_cap_t cap_effective, cap_inheritable, cap_permitted, cap_bset; |
1144 | unsigned securebits; | ||
1145 | struct user_struct *user; | 1128 | struct user_struct *user; |
1129 | unsigned securebits; | ||
1146 | #ifdef CONFIG_KEYS | 1130 | #ifdef CONFIG_KEYS |
1131 | unsigned char jit_keyring; /* default keyring to attach requested keys to */ | ||
1147 | struct key *request_key_auth; /* assumed request_key authority */ | 1132 | struct key *request_key_auth; /* assumed request_key authority */ |
1148 | struct key *thread_keyring; /* keyring private to this thread */ | 1133 | struct key *thread_keyring; /* keyring private to this thread */ |
1149 | unsigned char jit_keyring; /* default keyring to attach requested keys to */ | ||
1150 | #endif | 1134 | #endif |
1151 | char comm[TASK_COMM_LEN]; /* executable name excluding path | 1135 | char comm[TASK_COMM_LEN]; /* executable name excluding path |
1152 | - access with [gs]et_task_comm (which lock | 1136 | - access with [gs]et_task_comm (which lock |
@@ -1233,8 +1217,8 @@ struct task_struct { | |||
1233 | # define MAX_LOCK_DEPTH 48UL | 1217 | # define MAX_LOCK_DEPTH 48UL |
1234 | u64 curr_chain_key; | 1218 | u64 curr_chain_key; |
1235 | int lockdep_depth; | 1219 | int lockdep_depth; |
1236 | struct held_lock held_locks[MAX_LOCK_DEPTH]; | ||
1237 | unsigned int lockdep_recursion; | 1220 | unsigned int lockdep_recursion; |
1221 | struct held_lock held_locks[MAX_LOCK_DEPTH]; | ||
1238 | #endif | 1222 | #endif |
1239 | 1223 | ||
1240 | /* journalling filesystem info */ | 1224 | /* journalling filesystem info */ |
@@ -1262,10 +1246,6 @@ struct task_struct { | |||
1262 | u64 acct_vm_mem1; /* accumulated virtual memory usage */ | 1246 | u64 acct_vm_mem1; /* accumulated virtual memory usage */ |
1263 | cputime_t acct_stimexpd;/* stime since last update */ | 1247 | cputime_t acct_stimexpd;/* stime since last update */ |
1264 | #endif | 1248 | #endif |
1265 | #ifdef CONFIG_NUMA | ||
1266 | struct mempolicy *mempolicy; | ||
1267 | short il_next; | ||
1268 | #endif | ||
1269 | #ifdef CONFIG_CPUSETS | 1249 | #ifdef CONFIG_CPUSETS |
1270 | nodemask_t mems_allowed; | 1250 | nodemask_t mems_allowed; |
1271 | int cpuset_mems_generation; | 1251 | int cpuset_mems_generation; |
@@ -1285,6 +1265,10 @@ struct task_struct { | |||
1285 | struct list_head pi_state_list; | 1265 | struct list_head pi_state_list; |
1286 | struct futex_pi_state *pi_state_cache; | 1266 | struct futex_pi_state *pi_state_cache; |
1287 | #endif | 1267 | #endif |
1268 | #ifdef CONFIG_NUMA | ||
1269 | struct mempolicy *mempolicy; | ||
1270 | short il_next; | ||
1271 | #endif | ||
1288 | atomic_t fs_excl; /* holding fs exclusive resources */ | 1272 | atomic_t fs_excl; /* holding fs exclusive resources */ |
1289 | struct rcu_head rcu; | 1273 | struct rcu_head rcu; |
1290 | 1274 | ||
@@ -1504,6 +1488,7 @@ static inline void put_task_struct(struct task_struct *t) | |||
1504 | #define PF_SWAPWRITE 0x00800000 /* Allowed to write to swap */ | 1488 | #define PF_SWAPWRITE 0x00800000 /* Allowed to write to swap */ |
1505 | #define PF_SPREAD_PAGE 0x01000000 /* Spread page cache over cpuset */ | 1489 | #define PF_SPREAD_PAGE 0x01000000 /* Spread page cache over cpuset */ |
1506 | #define PF_SPREAD_SLAB 0x02000000 /* Spread some slab caches over cpuset */ | 1490 | #define PF_SPREAD_SLAB 0x02000000 /* Spread some slab caches over cpuset */ |
1491 | #define PF_THREAD_BOUND 0x04000000 /* Thread bound to specific cpu */ | ||
1507 | #define PF_MEMPOLICY 0x10000000 /* Non-default NUMA mempolicy */ | 1492 | #define PF_MEMPOLICY 0x10000000 /* Non-default NUMA mempolicy */ |
1508 | #define PF_MUTEX_TESTER 0x20000000 /* Thread belongs to the rt mutex tester */ | 1493 | #define PF_MUTEX_TESTER 0x20000000 /* Thread belongs to the rt mutex tester */ |
1509 | #define PF_FREEZER_SKIP 0x40000000 /* Freezer should not count it as freezeable */ | 1494 | #define PF_FREEZER_SKIP 0x40000000 /* Freezer should not count it as freezeable */ |
@@ -1637,6 +1622,7 @@ extern unsigned int sysctl_sched_child_runs_first; | |||
1637 | extern unsigned int sysctl_sched_features; | 1622 | extern unsigned int sysctl_sched_features; |
1638 | extern unsigned int sysctl_sched_migration_cost; | 1623 | extern unsigned int sysctl_sched_migration_cost; |
1639 | extern unsigned int sysctl_sched_nr_migrate; | 1624 | extern unsigned int sysctl_sched_nr_migrate; |
1625 | extern unsigned int sysctl_sched_shares_ratelimit; | ||
1640 | 1626 | ||
1641 | int sched_nr_latency_handler(struct ctl_table *table, int write, | 1627 | int sched_nr_latency_handler(struct ctl_table *table, int write, |
1642 | struct file *file, void __user *buffer, size_t *length, | 1628 | struct file *file, void __user *buffer, size_t *length, |
diff --git a/include/linux/xfrm.h b/include/linux/xfrm.h index 2ca6bae88721..fb0c215a3051 100644 --- a/include/linux/xfrm.h +++ b/include/linux/xfrm.h | |||
@@ -339,6 +339,7 @@ struct xfrm_usersa_info { | |||
339 | #define XFRM_STATE_NOPMTUDISC 4 | 339 | #define XFRM_STATE_NOPMTUDISC 4 |
340 | #define XFRM_STATE_WILDRECV 8 | 340 | #define XFRM_STATE_WILDRECV 8 |
341 | #define XFRM_STATE_ICMP 16 | 341 | #define XFRM_STATE_ICMP 16 |
342 | #define XFRM_STATE_AF_UNSPEC 32 | ||
342 | }; | 343 | }; |
343 | 344 | ||
344 | struct xfrm_usersa_id { | 345 | struct xfrm_usersa_id { |
diff --git a/kernel/Makefile b/kernel/Makefile index 1c9938addb9d..6c55301112e0 100644 --- a/kernel/Makefile +++ b/kernel/Makefile | |||
@@ -3,7 +3,7 @@ | |||
3 | # | 3 | # |
4 | 4 | ||
5 | obj-y = sched.o fork.o exec_domain.o panic.o printk.o profile.o \ | 5 | obj-y = sched.o fork.o exec_domain.o panic.o printk.o profile.o \ |
6 | exit.o itimer.o time.o softirq.o resource.o \ | 6 | cpu.o exit.o itimer.o time.o softirq.o resource.o \ |
7 | sysctl.o capability.o ptrace.o timer.o user.o \ | 7 | sysctl.o capability.o ptrace.o timer.o user.o \ |
8 | signal.o sys.o kmod.o workqueue.o pid.o \ | 8 | signal.o sys.o kmod.o workqueue.o pid.o \ |
9 | rcupdate.o extable.o params.o posix-timers.o \ | 9 | rcupdate.o extable.o params.o posix-timers.o \ |
@@ -27,7 +27,7 @@ obj-$(CONFIG_RT_MUTEXES) += rtmutex.o | |||
27 | obj-$(CONFIG_DEBUG_RT_MUTEXES) += rtmutex-debug.o | 27 | obj-$(CONFIG_DEBUG_RT_MUTEXES) += rtmutex-debug.o |
28 | obj-$(CONFIG_RT_MUTEX_TESTER) += rtmutex-tester.o | 28 | obj-$(CONFIG_RT_MUTEX_TESTER) += rtmutex-tester.o |
29 | obj-$(CONFIG_GENERIC_ISA_DMA) += dma.o | 29 | obj-$(CONFIG_GENERIC_ISA_DMA) += dma.o |
30 | obj-$(CONFIG_SMP) += cpu.o spinlock.o | 30 | obj-$(CONFIG_SMP) += spinlock.o |
31 | obj-$(CONFIG_DEBUG_SPINLOCK) += spinlock.o | 31 | obj-$(CONFIG_DEBUG_SPINLOCK) += spinlock.o |
32 | obj-$(CONFIG_PROVE_LOCKING) += spinlock.o | 32 | obj-$(CONFIG_PROVE_LOCKING) += spinlock.o |
33 | obj-$(CONFIG_UID16) += uid16.o | 33 | obj-$(CONFIG_UID16) += uid16.o |
@@ -69,6 +69,7 @@ obj-$(CONFIG_TASK_DELAY_ACCT) += delayacct.o | |||
69 | obj-$(CONFIG_TASKSTATS) += taskstats.o tsacct.o | 69 | obj-$(CONFIG_TASKSTATS) += taskstats.o tsacct.o |
70 | obj-$(CONFIG_MARKERS) += marker.o | 70 | obj-$(CONFIG_MARKERS) += marker.o |
71 | obj-$(CONFIG_LATENCYTOP) += latencytop.o | 71 | obj-$(CONFIG_LATENCYTOP) += latencytop.o |
72 | obj-$(CONFIG_SMP) += sched_cpupri.o | ||
72 | 73 | ||
73 | ifneq ($(CONFIG_SCHED_NO_NO_OMIT_FRAME_POINTER),y) | 74 | ifneq ($(CONFIG_SCHED_NO_NO_OMIT_FRAME_POINTER),y) |
74 | # According to Alan Modra <alan@linuxcare.com.au>, the -fno-omit-frame-pointer is | 75 | # According to Alan Modra <alan@linuxcare.com.au>, the -fno-omit-frame-pointer is |
diff --git a/kernel/cpu.c b/kernel/cpu.c index c77bc3a1c722..b11f06dc149a 100644 --- a/kernel/cpu.c +++ b/kernel/cpu.c | |||
@@ -15,6 +15,28 @@ | |||
15 | #include <linux/stop_machine.h> | 15 | #include <linux/stop_machine.h> |
16 | #include <linux/mutex.h> | 16 | #include <linux/mutex.h> |
17 | 17 | ||
18 | /* | ||
19 | * Represents all cpu's present in the system | ||
20 | * In systems capable of hotplug, this map could dynamically grow | ||
21 | * as new cpu's are detected in the system via any platform specific | ||
22 | * method, such as ACPI for e.g. | ||
23 | */ | ||
24 | cpumask_t cpu_present_map __read_mostly; | ||
25 | EXPORT_SYMBOL(cpu_present_map); | ||
26 | |||
27 | #ifndef CONFIG_SMP | ||
28 | |||
29 | /* | ||
30 | * Represents all cpu's that are currently online. | ||
31 | */ | ||
32 | cpumask_t cpu_online_map __read_mostly = CPU_MASK_ALL; | ||
33 | EXPORT_SYMBOL(cpu_online_map); | ||
34 | |||
35 | cpumask_t cpu_possible_map __read_mostly = CPU_MASK_ALL; | ||
36 | EXPORT_SYMBOL(cpu_possible_map); | ||
37 | |||
38 | #else /* CONFIG_SMP */ | ||
39 | |||
18 | /* Serializes the updates to cpu_online_map, cpu_present_map */ | 40 | /* Serializes the updates to cpu_online_map, cpu_present_map */ |
19 | static DEFINE_MUTEX(cpu_add_remove_lock); | 41 | static DEFINE_MUTEX(cpu_add_remove_lock); |
20 | 42 | ||
@@ -403,3 +425,5 @@ out: | |||
403 | cpu_maps_update_done(); | 425 | cpu_maps_update_done(); |
404 | } | 426 | } |
405 | #endif /* CONFIG_PM_SLEEP_SMP */ | 427 | #endif /* CONFIG_PM_SLEEP_SMP */ |
428 | |||
429 | #endif /* CONFIG_SMP */ | ||
diff --git a/kernel/cpuset.c b/kernel/cpuset.c index 9fceb97e989c..64a05da9bc4c 100644 --- a/kernel/cpuset.c +++ b/kernel/cpuset.c | |||
@@ -1194,6 +1194,15 @@ static int cpuset_can_attach(struct cgroup_subsys *ss, | |||
1194 | 1194 | ||
1195 | if (cpus_empty(cs->cpus_allowed) || nodes_empty(cs->mems_allowed)) | 1195 | if (cpus_empty(cs->cpus_allowed) || nodes_empty(cs->mems_allowed)) |
1196 | return -ENOSPC; | 1196 | return -ENOSPC; |
1197 | if (tsk->flags & PF_THREAD_BOUND) { | ||
1198 | cpumask_t mask; | ||
1199 | |||
1200 | mutex_lock(&callback_mutex); | ||
1201 | mask = cs->cpus_allowed; | ||
1202 | mutex_unlock(&callback_mutex); | ||
1203 | if (!cpus_equal(tsk->cpus_allowed, mask)) | ||
1204 | return -EINVAL; | ||
1205 | } | ||
1197 | 1206 | ||
1198 | return security_task_setscheduler(tsk, 0, NULL); | 1207 | return security_task_setscheduler(tsk, 0, NULL); |
1199 | } | 1208 | } |
@@ -1207,11 +1216,14 @@ static void cpuset_attach(struct cgroup_subsys *ss, | |||
1207 | struct mm_struct *mm; | 1216 | struct mm_struct *mm; |
1208 | struct cpuset *cs = cgroup_cs(cont); | 1217 | struct cpuset *cs = cgroup_cs(cont); |
1209 | struct cpuset *oldcs = cgroup_cs(oldcont); | 1218 | struct cpuset *oldcs = cgroup_cs(oldcont); |
1219 | int err; | ||
1210 | 1220 | ||
1211 | mutex_lock(&callback_mutex); | 1221 | mutex_lock(&callback_mutex); |
1212 | guarantee_online_cpus(cs, &cpus); | 1222 | guarantee_online_cpus(cs, &cpus); |
1213 | set_cpus_allowed_ptr(tsk, &cpus); | 1223 | err = set_cpus_allowed_ptr(tsk, &cpus); |
1214 | mutex_unlock(&callback_mutex); | 1224 | mutex_unlock(&callback_mutex); |
1225 | if (err) | ||
1226 | return; | ||
1215 | 1227 | ||
1216 | from = oldcs->mems_allowed; | 1228 | from = oldcs->mems_allowed; |
1217 | to = cs->mems_allowed; | 1229 | to = cs->mems_allowed; |
diff --git a/kernel/kthread.c b/kernel/kthread.c index bd1b9ea024e1..97747cdd37c9 100644 --- a/kernel/kthread.c +++ b/kernel/kthread.c | |||
@@ -180,6 +180,7 @@ void kthread_bind(struct task_struct *k, unsigned int cpu) | |||
180 | set_task_cpu(k, cpu); | 180 | set_task_cpu(k, cpu); |
181 | k->cpus_allowed = cpumask_of_cpu(cpu); | 181 | k->cpus_allowed = cpumask_of_cpu(cpu); |
182 | k->rt.nr_cpus_allowed = 1; | 182 | k->rt.nr_cpus_allowed = 1; |
183 | k->flags |= PF_THREAD_BOUND; | ||
183 | } | 184 | } |
184 | EXPORT_SYMBOL(kthread_bind); | 185 | EXPORT_SYMBOL(kthread_bind); |
185 | 186 | ||
diff --git a/kernel/sched.c b/kernel/sched.c index 94ead43eda62..d16c8d9fbd8b 100644 --- a/kernel/sched.c +++ b/kernel/sched.c | |||
@@ -74,6 +74,8 @@ | |||
74 | #include <asm/tlb.h> | 74 | #include <asm/tlb.h> |
75 | #include <asm/irq_regs.h> | 75 | #include <asm/irq_regs.h> |
76 | 76 | ||
77 | #include "sched_cpupri.h" | ||
78 | |||
77 | /* | 79 | /* |
78 | * Convert user-nice values [ -20 ... 0 ... 19 ] | 80 | * Convert user-nice values [ -20 ... 0 ... 19 ] |
79 | * to static priority [ MAX_RT_PRIO..MAX_PRIO-1 ], | 81 | * to static priority [ MAX_RT_PRIO..MAX_PRIO-1 ], |
@@ -289,15 +291,15 @@ struct task_group root_task_group; | |||
289 | static DEFINE_PER_CPU(struct sched_entity, init_sched_entity); | 291 | static DEFINE_PER_CPU(struct sched_entity, init_sched_entity); |
290 | /* Default task group's cfs_rq on each cpu */ | 292 | /* Default task group's cfs_rq on each cpu */ |
291 | static DEFINE_PER_CPU(struct cfs_rq, init_cfs_rq) ____cacheline_aligned_in_smp; | 293 | static DEFINE_PER_CPU(struct cfs_rq, init_cfs_rq) ____cacheline_aligned_in_smp; |
292 | #endif | 294 | #endif /* CONFIG_FAIR_GROUP_SCHED */ |
293 | 295 | ||
294 | #ifdef CONFIG_RT_GROUP_SCHED | 296 | #ifdef CONFIG_RT_GROUP_SCHED |
295 | static DEFINE_PER_CPU(struct sched_rt_entity, init_sched_rt_entity); | 297 | static DEFINE_PER_CPU(struct sched_rt_entity, init_sched_rt_entity); |
296 | static DEFINE_PER_CPU(struct rt_rq, init_rt_rq) ____cacheline_aligned_in_smp; | 298 | static DEFINE_PER_CPU(struct rt_rq, init_rt_rq) ____cacheline_aligned_in_smp; |
297 | #endif | 299 | #endif /* CONFIG_RT_GROUP_SCHED */ |
298 | #else | 300 | #else /* !CONFIG_FAIR_GROUP_SCHED */ |
299 | #define root_task_group init_task_group | 301 | #define root_task_group init_task_group |
300 | #endif | 302 | #endif /* CONFIG_FAIR_GROUP_SCHED */ |
301 | 303 | ||
302 | /* task_group_lock serializes add/remove of task groups and also changes to | 304 | /* task_group_lock serializes add/remove of task groups and also changes to |
303 | * a task group's cpu shares. | 305 | * a task group's cpu shares. |
@@ -307,9 +309,9 @@ static DEFINE_SPINLOCK(task_group_lock); | |||
307 | #ifdef CONFIG_FAIR_GROUP_SCHED | 309 | #ifdef CONFIG_FAIR_GROUP_SCHED |
308 | #ifdef CONFIG_USER_SCHED | 310 | #ifdef CONFIG_USER_SCHED |
309 | # define INIT_TASK_GROUP_LOAD (2*NICE_0_LOAD) | 311 | # define INIT_TASK_GROUP_LOAD (2*NICE_0_LOAD) |
310 | #else | 312 | #else /* !CONFIG_USER_SCHED */ |
311 | # define INIT_TASK_GROUP_LOAD NICE_0_LOAD | 313 | # define INIT_TASK_GROUP_LOAD NICE_0_LOAD |
312 | #endif | 314 | #endif /* CONFIG_USER_SCHED */ |
313 | 315 | ||
314 | /* | 316 | /* |
315 | * A weight of 0 or 1 can cause arithmetics problems. | 317 | * A weight of 0 or 1 can cause arithmetics problems. |
@@ -363,6 +365,10 @@ static inline void set_task_rq(struct task_struct *p, unsigned int cpu) | |||
363 | #else | 365 | #else |
364 | 366 | ||
365 | static inline void set_task_rq(struct task_struct *p, unsigned int cpu) { } | 367 | static inline void set_task_rq(struct task_struct *p, unsigned int cpu) { } |
368 | static inline struct task_group *task_group(struct task_struct *p) | ||
369 | { | ||
370 | return NULL; | ||
371 | } | ||
366 | 372 | ||
367 | #endif /* CONFIG_GROUP_SCHED */ | 373 | #endif /* CONFIG_GROUP_SCHED */ |
368 | 374 | ||
@@ -373,6 +379,7 @@ struct cfs_rq { | |||
373 | 379 | ||
374 | u64 exec_clock; | 380 | u64 exec_clock; |
375 | u64 min_vruntime; | 381 | u64 min_vruntime; |
382 | u64 pair_start; | ||
376 | 383 | ||
377 | struct rb_root tasks_timeline; | 384 | struct rb_root tasks_timeline; |
378 | struct rb_node *rb_leftmost; | 385 | struct rb_node *rb_leftmost; |
@@ -401,6 +408,31 @@ struct cfs_rq { | |||
401 | */ | 408 | */ |
402 | struct list_head leaf_cfs_rq_list; | 409 | struct list_head leaf_cfs_rq_list; |
403 | struct task_group *tg; /* group that "owns" this runqueue */ | 410 | struct task_group *tg; /* group that "owns" this runqueue */ |
411 | |||
412 | #ifdef CONFIG_SMP | ||
413 | /* | ||
414 | * the part of load.weight contributed by tasks | ||
415 | */ | ||
416 | unsigned long task_weight; | ||
417 | |||
418 | /* | ||
419 | * h_load = weight * f(tg) | ||
420 | * | ||
421 | * Where f(tg) is the recursive weight fraction assigned to | ||
422 | * this group. | ||
423 | */ | ||
424 | unsigned long h_load; | ||
425 | |||
426 | /* | ||
427 | * this cpu's part of tg->shares | ||
428 | */ | ||
429 | unsigned long shares; | ||
430 | |||
431 | /* | ||
432 | * load.weight at the time we set shares | ||
433 | */ | ||
434 | unsigned long rq_weight; | ||
435 | #endif | ||
404 | #endif | 436 | #endif |
405 | }; | 437 | }; |
406 | 438 | ||
@@ -452,6 +484,9 @@ struct root_domain { | |||
452 | */ | 484 | */ |
453 | cpumask_t rto_mask; | 485 | cpumask_t rto_mask; |
454 | atomic_t rto_count; | 486 | atomic_t rto_count; |
487 | #ifdef CONFIG_SMP | ||
488 | struct cpupri cpupri; | ||
489 | #endif | ||
455 | }; | 490 | }; |
456 | 491 | ||
457 | /* | 492 | /* |
@@ -526,6 +561,9 @@ struct rq { | |||
526 | int push_cpu; | 561 | int push_cpu; |
527 | /* cpu of this runqueue: */ | 562 | /* cpu of this runqueue: */ |
528 | int cpu; | 563 | int cpu; |
564 | int online; | ||
565 | |||
566 | unsigned long avg_load_per_task; | ||
529 | 567 | ||
530 | struct task_struct *migration_thread; | 568 | struct task_struct *migration_thread; |
531 | struct list_head migration_queue; | 569 | struct list_head migration_queue; |
@@ -749,6 +787,12 @@ late_initcall(sched_init_debug); | |||
749 | const_debug unsigned int sysctl_sched_nr_migrate = 32; | 787 | const_debug unsigned int sysctl_sched_nr_migrate = 32; |
750 | 788 | ||
751 | /* | 789 | /* |
790 | * ratelimit for updating the group shares. | ||
791 | * default: 0.5ms | ||
792 | */ | ||
793 | const_debug unsigned int sysctl_sched_shares_ratelimit = 500000; | ||
794 | |||
795 | /* | ||
752 | * period over which we measure -rt task cpu usage in us. | 796 | * period over which we measure -rt task cpu usage in us. |
753 | * default: 1s | 797 | * default: 1s |
754 | */ | 798 | */ |
@@ -775,82 +819,6 @@ static inline u64 global_rt_runtime(void) | |||
775 | return (u64)sysctl_sched_rt_runtime * NSEC_PER_USEC; | 819 | return (u64)sysctl_sched_rt_runtime * NSEC_PER_USEC; |
776 | } | 820 | } |
777 | 821 | ||
778 | unsigned long long time_sync_thresh = 100000; | ||
779 | |||
780 | static DEFINE_PER_CPU(unsigned long long, time_offset); | ||
781 | static DEFINE_PER_CPU(unsigned long long, prev_cpu_time); | ||
782 | |||
783 | /* | ||
784 | * Global lock which we take every now and then to synchronize | ||
785 | * the CPUs time. This method is not warp-safe, but it's good | ||
786 | * enough to synchronize slowly diverging time sources and thus | ||
787 | * it's good enough for tracing: | ||
788 | */ | ||
789 | static DEFINE_SPINLOCK(time_sync_lock); | ||
790 | static unsigned long long prev_global_time; | ||
791 | |||
792 | static unsigned long long __sync_cpu_clock(unsigned long long time, int cpu) | ||
793 | { | ||
794 | /* | ||
795 | * We want this inlined, to not get tracer function calls | ||
796 | * in this critical section: | ||
797 | */ | ||
798 | spin_acquire(&time_sync_lock.dep_map, 0, 0, _THIS_IP_); | ||
799 | __raw_spin_lock(&time_sync_lock.raw_lock); | ||
800 | |||
801 | if (time < prev_global_time) { | ||
802 | per_cpu(time_offset, cpu) += prev_global_time - time; | ||
803 | time = prev_global_time; | ||
804 | } else { | ||
805 | prev_global_time = time; | ||
806 | } | ||
807 | |||
808 | __raw_spin_unlock(&time_sync_lock.raw_lock); | ||
809 | spin_release(&time_sync_lock.dep_map, 1, _THIS_IP_); | ||
810 | |||
811 | return time; | ||
812 | } | ||
813 | |||
814 | static unsigned long long __cpu_clock(int cpu) | ||
815 | { | ||
816 | unsigned long long now; | ||
817 | |||
818 | /* | ||
819 | * Only call sched_clock() if the scheduler has already been | ||
820 | * initialized (some code might call cpu_clock() very early): | ||
821 | */ | ||
822 | if (unlikely(!scheduler_running)) | ||
823 | return 0; | ||
824 | |||
825 | now = sched_clock_cpu(cpu); | ||
826 | |||
827 | return now; | ||
828 | } | ||
829 | |||
830 | /* | ||
831 | * For kernel-internal use: high-speed (but slightly incorrect) per-cpu | ||
832 | * clock constructed from sched_clock(): | ||
833 | */ | ||
834 | unsigned long long cpu_clock(int cpu) | ||
835 | { | ||
836 | unsigned long long prev_cpu_time, time, delta_time; | ||
837 | unsigned long flags; | ||
838 | |||
839 | local_irq_save(flags); | ||
840 | prev_cpu_time = per_cpu(prev_cpu_time, cpu); | ||
841 | time = __cpu_clock(cpu) + per_cpu(time_offset, cpu); | ||
842 | delta_time = time-prev_cpu_time; | ||
843 | |||
844 | if (unlikely(delta_time > time_sync_thresh)) { | ||
845 | time = __sync_cpu_clock(time, cpu); | ||
846 | per_cpu(prev_cpu_time, cpu) = time; | ||
847 | } | ||
848 | local_irq_restore(flags); | ||
849 | |||
850 | return time; | ||
851 | } | ||
852 | EXPORT_SYMBOL_GPL(cpu_clock); | ||
853 | |||
854 | #ifndef prepare_arch_switch | 822 | #ifndef prepare_arch_switch |
855 | # define prepare_arch_switch(next) do { } while (0) | 823 | # define prepare_arch_switch(next) do { } while (0) |
856 | #endif | 824 | #endif |
@@ -1313,15 +1281,15 @@ void wake_up_idle_cpu(int cpu) | |||
1313 | if (!tsk_is_polling(rq->idle)) | 1281 | if (!tsk_is_polling(rq->idle)) |
1314 | smp_send_reschedule(cpu); | 1282 | smp_send_reschedule(cpu); |
1315 | } | 1283 | } |
1316 | #endif | 1284 | #endif /* CONFIG_NO_HZ */ |
1317 | 1285 | ||
1318 | #else | 1286 | #else /* !CONFIG_SMP */ |
1319 | static void __resched_task(struct task_struct *p, int tif_bit) | 1287 | static void __resched_task(struct task_struct *p, int tif_bit) |
1320 | { | 1288 | { |
1321 | assert_spin_locked(&task_rq(p)->lock); | 1289 | assert_spin_locked(&task_rq(p)->lock); |
1322 | set_tsk_thread_flag(p, tif_bit); | 1290 | set_tsk_thread_flag(p, tif_bit); |
1323 | } | 1291 | } |
1324 | #endif | 1292 | #endif /* CONFIG_SMP */ |
1325 | 1293 | ||
1326 | #if BITS_PER_LONG == 32 | 1294 | #if BITS_PER_LONG == 32 |
1327 | # define WMULT_CONST (~0UL) | 1295 | # define WMULT_CONST (~0UL) |
@@ -1336,6 +1304,9 @@ static void __resched_task(struct task_struct *p, int tif_bit) | |||
1336 | */ | 1304 | */ |
1337 | #define SRR(x, y) (((x) + (1UL << ((y) - 1))) >> (y)) | 1305 | #define SRR(x, y) (((x) + (1UL << ((y) - 1))) >> (y)) |
1338 | 1306 | ||
1307 | /* | ||
1308 | * delta *= weight / lw | ||
1309 | */ | ||
1339 | static unsigned long | 1310 | static unsigned long |
1340 | calc_delta_mine(unsigned long delta_exec, unsigned long weight, | 1311 | calc_delta_mine(unsigned long delta_exec, unsigned long weight, |
1341 | struct load_weight *lw) | 1312 | struct load_weight *lw) |
@@ -1363,12 +1334,6 @@ calc_delta_mine(unsigned long delta_exec, unsigned long weight, | |||
1363 | return (unsigned long)min(tmp, (u64)(unsigned long)LONG_MAX); | 1334 | return (unsigned long)min(tmp, (u64)(unsigned long)LONG_MAX); |
1364 | } | 1335 | } |
1365 | 1336 | ||
1366 | static inline unsigned long | ||
1367 | calc_delta_fair(unsigned long delta_exec, struct load_weight *lw) | ||
1368 | { | ||
1369 | return calc_delta_mine(delta_exec, NICE_0_LOAD, lw); | ||
1370 | } | ||
1371 | |||
1372 | static inline void update_load_add(struct load_weight *lw, unsigned long inc) | 1337 | static inline void update_load_add(struct load_weight *lw, unsigned long inc) |
1373 | { | 1338 | { |
1374 | lw->weight += inc; | 1339 | lw->weight += inc; |
@@ -1479,17 +1444,211 @@ static inline void dec_cpu_load(struct rq *rq, unsigned long load) | |||
1479 | #ifdef CONFIG_SMP | 1444 | #ifdef CONFIG_SMP |
1480 | static unsigned long source_load(int cpu, int type); | 1445 | static unsigned long source_load(int cpu, int type); |
1481 | static unsigned long target_load(int cpu, int type); | 1446 | static unsigned long target_load(int cpu, int type); |
1482 | static unsigned long cpu_avg_load_per_task(int cpu); | ||
1483 | static int task_hot(struct task_struct *p, u64 now, struct sched_domain *sd); | 1447 | static int task_hot(struct task_struct *p, u64 now, struct sched_domain *sd); |
1484 | #else /* CONFIG_SMP */ | 1448 | |
1449 | static unsigned long cpu_avg_load_per_task(int cpu) | ||
1450 | { | ||
1451 | struct rq *rq = cpu_rq(cpu); | ||
1452 | |||
1453 | if (rq->nr_running) | ||
1454 | rq->avg_load_per_task = rq->load.weight / rq->nr_running; | ||
1455 | |||
1456 | return rq->avg_load_per_task; | ||
1457 | } | ||
1485 | 1458 | ||
1486 | #ifdef CONFIG_FAIR_GROUP_SCHED | 1459 | #ifdef CONFIG_FAIR_GROUP_SCHED |
1487 | static void cfs_rq_set_shares(struct cfs_rq *cfs_rq, unsigned long shares) | 1460 | |
1461 | typedef void (*tg_visitor)(struct task_group *, int, struct sched_domain *); | ||
1462 | |||
1463 | /* | ||
1464 | * Iterate the full tree, calling @down when first entering a node and @up when | ||
1465 | * leaving it for the final time. | ||
1466 | */ | ||
1467 | static void | ||
1468 | walk_tg_tree(tg_visitor down, tg_visitor up, int cpu, struct sched_domain *sd) | ||
1488 | { | 1469 | { |
1470 | struct task_group *parent, *child; | ||
1471 | |||
1472 | rcu_read_lock(); | ||
1473 | parent = &root_task_group; | ||
1474 | down: | ||
1475 | (*down)(parent, cpu, sd); | ||
1476 | list_for_each_entry_rcu(child, &parent->children, siblings) { | ||
1477 | parent = child; | ||
1478 | goto down; | ||
1479 | |||
1480 | up: | ||
1481 | continue; | ||
1482 | } | ||
1483 | (*up)(parent, cpu, sd); | ||
1484 | |||
1485 | child = parent; | ||
1486 | parent = parent->parent; | ||
1487 | if (parent) | ||
1488 | goto up; | ||
1489 | rcu_read_unlock(); | ||
1490 | } | ||
1491 | |||
1492 | static void __set_se_shares(struct sched_entity *se, unsigned long shares); | ||
1493 | |||
1494 | /* | ||
1495 | * Calculate and set the cpu's group shares. | ||
1496 | */ | ||
1497 | static void | ||
1498 | __update_group_shares_cpu(struct task_group *tg, int cpu, | ||
1499 | unsigned long sd_shares, unsigned long sd_rq_weight) | ||
1500 | { | ||
1501 | int boost = 0; | ||
1502 | unsigned long shares; | ||
1503 | unsigned long rq_weight; | ||
1504 | |||
1505 | if (!tg->se[cpu]) | ||
1506 | return; | ||
1507 | |||
1508 | rq_weight = tg->cfs_rq[cpu]->load.weight; | ||
1509 | |||
1510 | /* | ||
1511 | * If there are currently no tasks on the cpu pretend there is one of | ||
1512 | * average load so that when a new task gets to run here it will not | ||
1513 | * get delayed by group starvation. | ||
1514 | */ | ||
1515 | if (!rq_weight) { | ||
1516 | boost = 1; | ||
1517 | rq_weight = NICE_0_LOAD; | ||
1518 | } | ||
1519 | |||
1520 | if (unlikely(rq_weight > sd_rq_weight)) | ||
1521 | rq_weight = sd_rq_weight; | ||
1522 | |||
1523 | /* | ||
1524 | * \Sum shares * rq_weight | ||
1525 | * shares = ----------------------- | ||
1526 | * \Sum rq_weight | ||
1527 | * | ||
1528 | */ | ||
1529 | shares = (sd_shares * rq_weight) / (sd_rq_weight + 1); | ||
1530 | |||
1531 | /* | ||
1532 | * record the actual number of shares, not the boosted amount. | ||
1533 | */ | ||
1534 | tg->cfs_rq[cpu]->shares = boost ? 0 : shares; | ||
1535 | tg->cfs_rq[cpu]->rq_weight = rq_weight; | ||
1536 | |||
1537 | if (shares < MIN_SHARES) | ||
1538 | shares = MIN_SHARES; | ||
1539 | else if (shares > MAX_SHARES) | ||
1540 | shares = MAX_SHARES; | ||
1541 | |||
1542 | __set_se_shares(tg->se[cpu], shares); | ||
1543 | } | ||
1544 | |||
1545 | /* | ||
1546 | * Re-compute the task group their per cpu shares over the given domain. | ||
1547 | * This needs to be done in a bottom-up fashion because the rq weight of a | ||
1548 | * parent group depends on the shares of its child groups. | ||
1549 | */ | ||
1550 | static void | ||
1551 | tg_shares_up(struct task_group *tg, int cpu, struct sched_domain *sd) | ||
1552 | { | ||
1553 | unsigned long rq_weight = 0; | ||
1554 | unsigned long shares = 0; | ||
1555 | int i; | ||
1556 | |||
1557 | for_each_cpu_mask(i, sd->span) { | ||
1558 | rq_weight += tg->cfs_rq[i]->load.weight; | ||
1559 | shares += tg->cfs_rq[i]->shares; | ||
1560 | } | ||
1561 | |||
1562 | if ((!shares && rq_weight) || shares > tg->shares) | ||
1563 | shares = tg->shares; | ||
1564 | |||
1565 | if (!sd->parent || !(sd->parent->flags & SD_LOAD_BALANCE)) | ||
1566 | shares = tg->shares; | ||
1567 | |||
1568 | if (!rq_weight) | ||
1569 | rq_weight = cpus_weight(sd->span) * NICE_0_LOAD; | ||
1570 | |||
1571 | for_each_cpu_mask(i, sd->span) { | ||
1572 | struct rq *rq = cpu_rq(i); | ||
1573 | unsigned long flags; | ||
1574 | |||
1575 | spin_lock_irqsave(&rq->lock, flags); | ||
1576 | __update_group_shares_cpu(tg, i, shares, rq_weight); | ||
1577 | spin_unlock_irqrestore(&rq->lock, flags); | ||
1578 | } | ||
1579 | } | ||
1580 | |||
1581 | /* | ||
1582 | * Compute the cpu's hierarchical load factor for each task group. | ||
1583 | * This needs to be done in a top-down fashion because the load of a child | ||
1584 | * group is a fraction of its parents load. | ||
1585 | */ | ||
1586 | static void | ||
1587 | tg_load_down(struct task_group *tg, int cpu, struct sched_domain *sd) | ||
1588 | { | ||
1589 | unsigned long load; | ||
1590 | |||
1591 | if (!tg->parent) { | ||
1592 | load = cpu_rq(cpu)->load.weight; | ||
1593 | } else { | ||
1594 | load = tg->parent->cfs_rq[cpu]->h_load; | ||
1595 | load *= tg->cfs_rq[cpu]->shares; | ||
1596 | load /= tg->parent->cfs_rq[cpu]->load.weight + 1; | ||
1597 | } | ||
1598 | |||
1599 | tg->cfs_rq[cpu]->h_load = load; | ||
1600 | } | ||
1601 | |||
1602 | static void | ||
1603 | tg_nop(struct task_group *tg, int cpu, struct sched_domain *sd) | ||
1604 | { | ||
1605 | } | ||
1606 | |||
1607 | static void update_shares(struct sched_domain *sd) | ||
1608 | { | ||
1609 | u64 now = cpu_clock(raw_smp_processor_id()); | ||
1610 | s64 elapsed = now - sd->last_update; | ||
1611 | |||
1612 | if (elapsed >= (s64)(u64)sysctl_sched_shares_ratelimit) { | ||
1613 | sd->last_update = now; | ||
1614 | walk_tg_tree(tg_nop, tg_shares_up, 0, sd); | ||
1615 | } | ||
1489 | } | 1616 | } |
1617 | |||
1618 | static void update_shares_locked(struct rq *rq, struct sched_domain *sd) | ||
1619 | { | ||
1620 | spin_unlock(&rq->lock); | ||
1621 | update_shares(sd); | ||
1622 | spin_lock(&rq->lock); | ||
1623 | } | ||
1624 | |||
1625 | static void update_h_load(int cpu) | ||
1626 | { | ||
1627 | walk_tg_tree(tg_load_down, tg_nop, cpu, NULL); | ||
1628 | } | ||
1629 | |||
1630 | #else | ||
1631 | |||
1632 | static inline void update_shares(struct sched_domain *sd) | ||
1633 | { | ||
1634 | } | ||
1635 | |||
1636 | static inline void update_shares_locked(struct rq *rq, struct sched_domain *sd) | ||
1637 | { | ||
1638 | } | ||
1639 | |||
1490 | #endif | 1640 | #endif |
1491 | 1641 | ||
1492 | #endif /* CONFIG_SMP */ | 1642 | #endif |
1643 | |||
1644 | #ifdef CONFIG_FAIR_GROUP_SCHED | ||
1645 | static void cfs_rq_set_shares(struct cfs_rq *cfs_rq, unsigned long shares) | ||
1646 | { | ||
1647 | #ifdef CONFIG_SMP | ||
1648 | cfs_rq->shares = shares; | ||
1649 | #endif | ||
1650 | } | ||
1651 | #endif | ||
1493 | 1652 | ||
1494 | #include "sched_stats.h" | 1653 | #include "sched_stats.h" |
1495 | #include "sched_idletask.c" | 1654 | #include "sched_idletask.c" |
@@ -1500,27 +1659,17 @@ static void cfs_rq_set_shares(struct cfs_rq *cfs_rq, unsigned long shares) | |||
1500 | #endif | 1659 | #endif |
1501 | 1660 | ||
1502 | #define sched_class_highest (&rt_sched_class) | 1661 | #define sched_class_highest (&rt_sched_class) |
1662 | #define for_each_class(class) \ | ||
1663 | for (class = sched_class_highest; class; class = class->next) | ||
1503 | 1664 | ||
1504 | static inline void inc_load(struct rq *rq, const struct task_struct *p) | 1665 | static void inc_nr_running(struct rq *rq) |
1505 | { | ||
1506 | update_load_add(&rq->load, p->se.load.weight); | ||
1507 | } | ||
1508 | |||
1509 | static inline void dec_load(struct rq *rq, const struct task_struct *p) | ||
1510 | { | ||
1511 | update_load_sub(&rq->load, p->se.load.weight); | ||
1512 | } | ||
1513 | |||
1514 | static void inc_nr_running(struct task_struct *p, struct rq *rq) | ||
1515 | { | 1666 | { |
1516 | rq->nr_running++; | 1667 | rq->nr_running++; |
1517 | inc_load(rq, p); | ||
1518 | } | 1668 | } |
1519 | 1669 | ||
1520 | static void dec_nr_running(struct task_struct *p, struct rq *rq) | 1670 | static void dec_nr_running(struct rq *rq) |
1521 | { | 1671 | { |
1522 | rq->nr_running--; | 1672 | rq->nr_running--; |
1523 | dec_load(rq, p); | ||
1524 | } | 1673 | } |
1525 | 1674 | ||
1526 | static void set_load_weight(struct task_struct *p) | 1675 | static void set_load_weight(struct task_struct *p) |
@@ -1544,6 +1693,12 @@ static void set_load_weight(struct task_struct *p) | |||
1544 | p->se.load.inv_weight = prio_to_wmult[p->static_prio - MAX_RT_PRIO]; | 1693 | p->se.load.inv_weight = prio_to_wmult[p->static_prio - MAX_RT_PRIO]; |
1545 | } | 1694 | } |
1546 | 1695 | ||
1696 | static void update_avg(u64 *avg, u64 sample) | ||
1697 | { | ||
1698 | s64 diff = sample - *avg; | ||
1699 | *avg += diff >> 3; | ||
1700 | } | ||
1701 | |||
1547 | static void enqueue_task(struct rq *rq, struct task_struct *p, int wakeup) | 1702 | static void enqueue_task(struct rq *rq, struct task_struct *p, int wakeup) |
1548 | { | 1703 | { |
1549 | sched_info_queued(p); | 1704 | sched_info_queued(p); |
@@ -1553,6 +1708,13 @@ static void enqueue_task(struct rq *rq, struct task_struct *p, int wakeup) | |||
1553 | 1708 | ||
1554 | static void dequeue_task(struct rq *rq, struct task_struct *p, int sleep) | 1709 | static void dequeue_task(struct rq *rq, struct task_struct *p, int sleep) |
1555 | { | 1710 | { |
1711 | if (sleep && p->se.last_wakeup) { | ||
1712 | update_avg(&p->se.avg_overlap, | ||
1713 | p->se.sum_exec_runtime - p->se.last_wakeup); | ||
1714 | p->se.last_wakeup = 0; | ||
1715 | } | ||
1716 | |||
1717 | sched_info_dequeued(p); | ||
1556 | p->sched_class->dequeue_task(rq, p, sleep); | 1718 | p->sched_class->dequeue_task(rq, p, sleep); |
1557 | p->se.on_rq = 0; | 1719 | p->se.on_rq = 0; |
1558 | } | 1720 | } |
@@ -1612,7 +1774,7 @@ static void activate_task(struct rq *rq, struct task_struct *p, int wakeup) | |||
1612 | rq->nr_uninterruptible--; | 1774 | rq->nr_uninterruptible--; |
1613 | 1775 | ||
1614 | enqueue_task(rq, p, wakeup); | 1776 | enqueue_task(rq, p, wakeup); |
1615 | inc_nr_running(p, rq); | 1777 | inc_nr_running(rq); |
1616 | } | 1778 | } |
1617 | 1779 | ||
1618 | /* | 1780 | /* |
@@ -1624,7 +1786,7 @@ static void deactivate_task(struct rq *rq, struct task_struct *p, int sleep) | |||
1624 | rq->nr_uninterruptible++; | 1786 | rq->nr_uninterruptible++; |
1625 | 1787 | ||
1626 | dequeue_task(rq, p, sleep); | 1788 | dequeue_task(rq, p, sleep); |
1627 | dec_nr_running(p, rq); | 1789 | dec_nr_running(rq); |
1628 | } | 1790 | } |
1629 | 1791 | ||
1630 | /** | 1792 | /** |
@@ -1636,12 +1798,6 @@ inline int task_curr(const struct task_struct *p) | |||
1636 | return cpu_curr(task_cpu(p)) == p; | 1798 | return cpu_curr(task_cpu(p)) == p; |
1637 | } | 1799 | } |
1638 | 1800 | ||
1639 | /* Used instead of source_load when we know the type == 0 */ | ||
1640 | unsigned long weighted_cpuload(const int cpu) | ||
1641 | { | ||
1642 | return cpu_rq(cpu)->load.weight; | ||
1643 | } | ||
1644 | |||
1645 | static inline void __set_task_cpu(struct task_struct *p, unsigned int cpu) | 1801 | static inline void __set_task_cpu(struct task_struct *p, unsigned int cpu) |
1646 | { | 1802 | { |
1647 | set_task_rq(p, cpu); | 1803 | set_task_rq(p, cpu); |
@@ -1670,6 +1826,12 @@ static inline void check_class_changed(struct rq *rq, struct task_struct *p, | |||
1670 | 1826 | ||
1671 | #ifdef CONFIG_SMP | 1827 | #ifdef CONFIG_SMP |
1672 | 1828 | ||
1829 | /* Used instead of source_load when we know the type == 0 */ | ||
1830 | static unsigned long weighted_cpuload(const int cpu) | ||
1831 | { | ||
1832 | return cpu_rq(cpu)->load.weight; | ||
1833 | } | ||
1834 | |||
1673 | /* | 1835 | /* |
1674 | * Is this task likely cache-hot: | 1836 | * Is this task likely cache-hot: |
1675 | */ | 1837 | */ |
@@ -1880,7 +2042,7 @@ static unsigned long source_load(int cpu, int type) | |||
1880 | struct rq *rq = cpu_rq(cpu); | 2042 | struct rq *rq = cpu_rq(cpu); |
1881 | unsigned long total = weighted_cpuload(cpu); | 2043 | unsigned long total = weighted_cpuload(cpu); |
1882 | 2044 | ||
1883 | if (type == 0) | 2045 | if (type == 0 || !sched_feat(LB_BIAS)) |
1884 | return total; | 2046 | return total; |
1885 | 2047 | ||
1886 | return min(rq->cpu_load[type-1], total); | 2048 | return min(rq->cpu_load[type-1], total); |
@@ -1895,25 +2057,13 @@ static unsigned long target_load(int cpu, int type) | |||
1895 | struct rq *rq = cpu_rq(cpu); | 2057 | struct rq *rq = cpu_rq(cpu); |
1896 | unsigned long total = weighted_cpuload(cpu); | 2058 | unsigned long total = weighted_cpuload(cpu); |
1897 | 2059 | ||
1898 | if (type == 0) | 2060 | if (type == 0 || !sched_feat(LB_BIAS)) |
1899 | return total; | 2061 | return total; |
1900 | 2062 | ||
1901 | return max(rq->cpu_load[type-1], total); | 2063 | return max(rq->cpu_load[type-1], total); |
1902 | } | 2064 | } |
1903 | 2065 | ||
1904 | /* | 2066 | /* |
1905 | * Return the average load per task on the cpu's run queue | ||
1906 | */ | ||
1907 | static unsigned long cpu_avg_load_per_task(int cpu) | ||
1908 | { | ||
1909 | struct rq *rq = cpu_rq(cpu); | ||
1910 | unsigned long total = weighted_cpuload(cpu); | ||
1911 | unsigned long n = rq->nr_running; | ||
1912 | |||
1913 | return n ? total / n : SCHED_LOAD_SCALE; | ||
1914 | } | ||
1915 | |||
1916 | /* | ||
1917 | * find_idlest_group finds and returns the least busy CPU group within the | 2067 | * find_idlest_group finds and returns the least busy CPU group within the |
1918 | * domain. | 2068 | * domain. |
1919 | */ | 2069 | */ |
@@ -2019,6 +2169,9 @@ static int sched_balance_self(int cpu, int flag) | |||
2019 | sd = tmp; | 2169 | sd = tmp; |
2020 | } | 2170 | } |
2021 | 2171 | ||
2172 | if (sd) | ||
2173 | update_shares(sd); | ||
2174 | |||
2022 | while (sd) { | 2175 | while (sd) { |
2023 | cpumask_t span, tmpmask; | 2176 | cpumask_t span, tmpmask; |
2024 | struct sched_group *group; | 2177 | struct sched_group *group; |
@@ -2085,6 +2238,22 @@ static int try_to_wake_up(struct task_struct *p, unsigned int state, int sync) | |||
2085 | if (!sched_feat(SYNC_WAKEUPS)) | 2238 | if (!sched_feat(SYNC_WAKEUPS)) |
2086 | sync = 0; | 2239 | sync = 0; |
2087 | 2240 | ||
2241 | #ifdef CONFIG_SMP | ||
2242 | if (sched_feat(LB_WAKEUP_UPDATE)) { | ||
2243 | struct sched_domain *sd; | ||
2244 | |||
2245 | this_cpu = raw_smp_processor_id(); | ||
2246 | cpu = task_cpu(p); | ||
2247 | |||
2248 | for_each_domain(this_cpu, sd) { | ||
2249 | if (cpu_isset(cpu, sd->span)) { | ||
2250 | update_shares(sd); | ||
2251 | break; | ||
2252 | } | ||
2253 | } | ||
2254 | } | ||
2255 | #endif | ||
2256 | |||
2088 | smp_wmb(); | 2257 | smp_wmb(); |
2089 | rq = task_rq_lock(p, &flags); | 2258 | rq = task_rq_lock(p, &flags); |
2090 | old_state = p->state; | 2259 | old_state = p->state; |
@@ -2131,7 +2300,7 @@ static int try_to_wake_up(struct task_struct *p, unsigned int state, int sync) | |||
2131 | } | 2300 | } |
2132 | } | 2301 | } |
2133 | } | 2302 | } |
2134 | #endif | 2303 | #endif /* CONFIG_SCHEDSTATS */ |
2135 | 2304 | ||
2136 | out_activate: | 2305 | out_activate: |
2137 | #endif /* CONFIG_SMP */ | 2306 | #endif /* CONFIG_SMP */ |
@@ -2157,6 +2326,8 @@ out_running: | |||
2157 | p->sched_class->task_wake_up(rq, p); | 2326 | p->sched_class->task_wake_up(rq, p); |
2158 | #endif | 2327 | #endif |
2159 | out: | 2328 | out: |
2329 | current->se.last_wakeup = current->se.sum_exec_runtime; | ||
2330 | |||
2160 | task_rq_unlock(rq, &flags); | 2331 | task_rq_unlock(rq, &flags); |
2161 | 2332 | ||
2162 | return success; | 2333 | return success; |
@@ -2277,7 +2448,7 @@ void wake_up_new_task(struct task_struct *p, unsigned long clone_flags) | |||
2277 | * management (if any): | 2448 | * management (if any): |
2278 | */ | 2449 | */ |
2279 | p->sched_class->task_new(rq, p); | 2450 | p->sched_class->task_new(rq, p); |
2280 | inc_nr_running(p, rq); | 2451 | inc_nr_running(rq); |
2281 | } | 2452 | } |
2282 | check_preempt_curr(rq, p); | 2453 | check_preempt_curr(rq, p); |
2283 | #ifdef CONFIG_SMP | 2454 | #ifdef CONFIG_SMP |
@@ -2331,7 +2502,7 @@ fire_sched_out_preempt_notifiers(struct task_struct *curr, | |||
2331 | notifier->ops->sched_out(notifier, next); | 2502 | notifier->ops->sched_out(notifier, next); |
2332 | } | 2503 | } |
2333 | 2504 | ||
2334 | #else | 2505 | #else /* !CONFIG_PREEMPT_NOTIFIERS */ |
2335 | 2506 | ||
2336 | static void fire_sched_in_preempt_notifiers(struct task_struct *curr) | 2507 | static void fire_sched_in_preempt_notifiers(struct task_struct *curr) |
2337 | { | 2508 | { |
@@ -2343,7 +2514,7 @@ fire_sched_out_preempt_notifiers(struct task_struct *curr, | |||
2343 | { | 2514 | { |
2344 | } | 2515 | } |
2345 | 2516 | ||
2346 | #endif | 2517 | #endif /* CONFIG_PREEMPT_NOTIFIERS */ |
2347 | 2518 | ||
2348 | /** | 2519 | /** |
2349 | * prepare_task_switch - prepare to switch tasks | 2520 | * prepare_task_switch - prepare to switch tasks |
@@ -2785,7 +2956,7 @@ balance_tasks(struct rq *this_rq, int this_cpu, struct rq *busiest, | |||
2785 | enum cpu_idle_type idle, int *all_pinned, | 2956 | enum cpu_idle_type idle, int *all_pinned, |
2786 | int *this_best_prio, struct rq_iterator *iterator) | 2957 | int *this_best_prio, struct rq_iterator *iterator) |
2787 | { | 2958 | { |
2788 | int loops = 0, pulled = 0, pinned = 0, skip_for_load; | 2959 | int loops = 0, pulled = 0, pinned = 0; |
2789 | struct task_struct *p; | 2960 | struct task_struct *p; |
2790 | long rem_load_move = max_load_move; | 2961 | long rem_load_move = max_load_move; |
2791 | 2962 | ||
@@ -2801,14 +2972,8 @@ balance_tasks(struct rq *this_rq, int this_cpu, struct rq *busiest, | |||
2801 | next: | 2972 | next: |
2802 | if (!p || loops++ > sysctl_sched_nr_migrate) | 2973 | if (!p || loops++ > sysctl_sched_nr_migrate) |
2803 | goto out; | 2974 | goto out; |
2804 | /* | 2975 | |
2805 | * To help distribute high priority tasks across CPUs we don't | 2976 | if ((p->se.load.weight >> 1) > rem_load_move || |
2806 | * skip a task if it will be the highest priority task (i.e. smallest | ||
2807 | * prio value) on its new queue regardless of its load weight | ||
2808 | */ | ||
2809 | skip_for_load = (p->se.load.weight >> 1) > rem_load_move + | ||
2810 | SCHED_LOAD_SCALE_FUZZ; | ||
2811 | if ((skip_for_load && p->prio >= *this_best_prio) || | ||
2812 | !can_migrate_task(p, busiest, this_cpu, sd, idle, &pinned)) { | 2977 | !can_migrate_task(p, busiest, this_cpu, sd, idle, &pinned)) { |
2813 | p = iterator->next(iterator->arg); | 2978 | p = iterator->next(iterator->arg); |
2814 | goto next; | 2979 | goto next; |
@@ -2863,6 +3028,10 @@ static int move_tasks(struct rq *this_rq, int this_cpu, struct rq *busiest, | |||
2863 | max_load_move - total_load_moved, | 3028 | max_load_move - total_load_moved, |
2864 | sd, idle, all_pinned, &this_best_prio); | 3029 | sd, idle, all_pinned, &this_best_prio); |
2865 | class = class->next; | 3030 | class = class->next; |
3031 | |||
3032 | if (idle == CPU_NEWLY_IDLE && this_rq->nr_running) | ||
3033 | break; | ||
3034 | |||
2866 | } while (class && max_load_move > total_load_moved); | 3035 | } while (class && max_load_move > total_load_moved); |
2867 | 3036 | ||
2868 | return total_load_moved > 0; | 3037 | return total_load_moved > 0; |
@@ -2939,6 +3108,7 @@ find_busiest_group(struct sched_domain *sd, int this_cpu, | |||
2939 | max_load = this_load = total_load = total_pwr = 0; | 3108 | max_load = this_load = total_load = total_pwr = 0; |
2940 | busiest_load_per_task = busiest_nr_running = 0; | 3109 | busiest_load_per_task = busiest_nr_running = 0; |
2941 | this_load_per_task = this_nr_running = 0; | 3110 | this_load_per_task = this_nr_running = 0; |
3111 | |||
2942 | if (idle == CPU_NOT_IDLE) | 3112 | if (idle == CPU_NOT_IDLE) |
2943 | load_idx = sd->busy_idx; | 3113 | load_idx = sd->busy_idx; |
2944 | else if (idle == CPU_NEWLY_IDLE) | 3114 | else if (idle == CPU_NEWLY_IDLE) |
@@ -2953,6 +3123,8 @@ find_busiest_group(struct sched_domain *sd, int this_cpu, | |||
2953 | int __group_imb = 0; | 3123 | int __group_imb = 0; |
2954 | unsigned int balance_cpu = -1, first_idle_cpu = 0; | 3124 | unsigned int balance_cpu = -1, first_idle_cpu = 0; |
2955 | unsigned long sum_nr_running, sum_weighted_load; | 3125 | unsigned long sum_nr_running, sum_weighted_load; |
3126 | unsigned long sum_avg_load_per_task; | ||
3127 | unsigned long avg_load_per_task; | ||
2956 | 3128 | ||
2957 | local_group = cpu_isset(this_cpu, group->cpumask); | 3129 | local_group = cpu_isset(this_cpu, group->cpumask); |
2958 | 3130 | ||
@@ -2961,6 +3133,8 @@ find_busiest_group(struct sched_domain *sd, int this_cpu, | |||
2961 | 3133 | ||
2962 | /* Tally up the load of all CPUs in the group */ | 3134 | /* Tally up the load of all CPUs in the group */ |
2963 | sum_weighted_load = sum_nr_running = avg_load = 0; | 3135 | sum_weighted_load = sum_nr_running = avg_load = 0; |
3136 | sum_avg_load_per_task = avg_load_per_task = 0; | ||
3137 | |||
2964 | max_cpu_load = 0; | 3138 | max_cpu_load = 0; |
2965 | min_cpu_load = ~0UL; | 3139 | min_cpu_load = ~0UL; |
2966 | 3140 | ||
@@ -2994,6 +3168,8 @@ find_busiest_group(struct sched_domain *sd, int this_cpu, | |||
2994 | avg_load += load; | 3168 | avg_load += load; |
2995 | sum_nr_running += rq->nr_running; | 3169 | sum_nr_running += rq->nr_running; |
2996 | sum_weighted_load += weighted_cpuload(i); | 3170 | sum_weighted_load += weighted_cpuload(i); |
3171 | |||
3172 | sum_avg_load_per_task += cpu_avg_load_per_task(i); | ||
2997 | } | 3173 | } |
2998 | 3174 | ||
2999 | /* | 3175 | /* |
@@ -3015,7 +3191,20 @@ find_busiest_group(struct sched_domain *sd, int this_cpu, | |||
3015 | avg_load = sg_div_cpu_power(group, | 3191 | avg_load = sg_div_cpu_power(group, |
3016 | avg_load * SCHED_LOAD_SCALE); | 3192 | avg_load * SCHED_LOAD_SCALE); |
3017 | 3193 | ||
3018 | if ((max_cpu_load - min_cpu_load) > SCHED_LOAD_SCALE) | 3194 | |
3195 | /* | ||
3196 | * Consider the group unbalanced when the imbalance is larger | ||
3197 | * than the average weight of two tasks. | ||
3198 | * | ||
3199 | * APZ: with cgroup the avg task weight can vary wildly and | ||
3200 | * might not be a suitable number - should we keep a | ||
3201 | * normalized nr_running number somewhere that negates | ||
3202 | * the hierarchy? | ||
3203 | */ | ||
3204 | avg_load_per_task = sg_div_cpu_power(group, | ||
3205 | sum_avg_load_per_task * SCHED_LOAD_SCALE); | ||
3206 | |||
3207 | if ((max_cpu_load - min_cpu_load) > 2*avg_load_per_task) | ||
3019 | __group_imb = 1; | 3208 | __group_imb = 1; |
3020 | 3209 | ||
3021 | group_capacity = group->__cpu_power / SCHED_LOAD_SCALE; | 3210 | group_capacity = group->__cpu_power / SCHED_LOAD_SCALE; |
@@ -3156,9 +3345,9 @@ small_imbalance: | |||
3156 | if (busiest_load_per_task > this_load_per_task) | 3345 | if (busiest_load_per_task > this_load_per_task) |
3157 | imbn = 1; | 3346 | imbn = 1; |
3158 | } else | 3347 | } else |
3159 | this_load_per_task = SCHED_LOAD_SCALE; | 3348 | this_load_per_task = cpu_avg_load_per_task(this_cpu); |
3160 | 3349 | ||
3161 | if (max_load - this_load + SCHED_LOAD_SCALE_FUZZ >= | 3350 | if (max_load - this_load + 2*busiest_load_per_task >= |
3162 | busiest_load_per_task * imbn) { | 3351 | busiest_load_per_task * imbn) { |
3163 | *imbalance = busiest_load_per_task; | 3352 | *imbalance = busiest_load_per_task; |
3164 | return busiest; | 3353 | return busiest; |
@@ -3284,6 +3473,7 @@ static int load_balance(int this_cpu, struct rq *this_rq, | |||
3284 | schedstat_inc(sd, lb_count[idle]); | 3473 | schedstat_inc(sd, lb_count[idle]); |
3285 | 3474 | ||
3286 | redo: | 3475 | redo: |
3476 | update_shares(sd); | ||
3287 | group = find_busiest_group(sd, this_cpu, &imbalance, idle, &sd_idle, | 3477 | group = find_busiest_group(sd, this_cpu, &imbalance, idle, &sd_idle, |
3288 | cpus, balance); | 3478 | cpus, balance); |
3289 | 3479 | ||
@@ -3386,8 +3576,9 @@ redo: | |||
3386 | 3576 | ||
3387 | if (!ld_moved && !sd_idle && sd->flags & SD_SHARE_CPUPOWER && | 3577 | if (!ld_moved && !sd_idle && sd->flags & SD_SHARE_CPUPOWER && |
3388 | !test_sd_parent(sd, SD_POWERSAVINGS_BALANCE)) | 3578 | !test_sd_parent(sd, SD_POWERSAVINGS_BALANCE)) |
3389 | return -1; | 3579 | ld_moved = -1; |
3390 | return ld_moved; | 3580 | |
3581 | goto out; | ||
3391 | 3582 | ||
3392 | out_balanced: | 3583 | out_balanced: |
3393 | schedstat_inc(sd, lb_balanced[idle]); | 3584 | schedstat_inc(sd, lb_balanced[idle]); |
@@ -3402,8 +3593,13 @@ out_one_pinned: | |||
3402 | 3593 | ||
3403 | if (!sd_idle && sd->flags & SD_SHARE_CPUPOWER && | 3594 | if (!sd_idle && sd->flags & SD_SHARE_CPUPOWER && |
3404 | !test_sd_parent(sd, SD_POWERSAVINGS_BALANCE)) | 3595 | !test_sd_parent(sd, SD_POWERSAVINGS_BALANCE)) |
3405 | return -1; | 3596 | ld_moved = -1; |
3406 | return 0; | 3597 | else |
3598 | ld_moved = 0; | ||
3599 | out: | ||
3600 | if (ld_moved) | ||
3601 | update_shares(sd); | ||
3602 | return ld_moved; | ||
3407 | } | 3603 | } |
3408 | 3604 | ||
3409 | /* | 3605 | /* |
@@ -3438,6 +3634,7 @@ load_balance_newidle(int this_cpu, struct rq *this_rq, struct sched_domain *sd, | |||
3438 | 3634 | ||
3439 | schedstat_inc(sd, lb_count[CPU_NEWLY_IDLE]); | 3635 | schedstat_inc(sd, lb_count[CPU_NEWLY_IDLE]); |
3440 | redo: | 3636 | redo: |
3637 | update_shares_locked(this_rq, sd); | ||
3441 | group = find_busiest_group(sd, this_cpu, &imbalance, CPU_NEWLY_IDLE, | 3638 | group = find_busiest_group(sd, this_cpu, &imbalance, CPU_NEWLY_IDLE, |
3442 | &sd_idle, cpus, NULL); | 3639 | &sd_idle, cpus, NULL); |
3443 | if (!group) { | 3640 | if (!group) { |
@@ -3481,6 +3678,7 @@ redo: | |||
3481 | } else | 3678 | } else |
3482 | sd->nr_balance_failed = 0; | 3679 | sd->nr_balance_failed = 0; |
3483 | 3680 | ||
3681 | update_shares_locked(this_rq, sd); | ||
3484 | return ld_moved; | 3682 | return ld_moved; |
3485 | 3683 | ||
3486 | out_balanced: | 3684 | out_balanced: |
@@ -3672,6 +3870,7 @@ static void rebalance_domains(int cpu, enum cpu_idle_type idle) | |||
3672 | /* Earliest time when we have to do rebalance again */ | 3870 | /* Earliest time when we have to do rebalance again */ |
3673 | unsigned long next_balance = jiffies + 60*HZ; | 3871 | unsigned long next_balance = jiffies + 60*HZ; |
3674 | int update_next_balance = 0; | 3872 | int update_next_balance = 0; |
3873 | int need_serialize; | ||
3675 | cpumask_t tmp; | 3874 | cpumask_t tmp; |
3676 | 3875 | ||
3677 | for_each_domain(cpu, sd) { | 3876 | for_each_domain(cpu, sd) { |
@@ -3689,8 +3888,9 @@ static void rebalance_domains(int cpu, enum cpu_idle_type idle) | |||
3689 | if (interval > HZ*NR_CPUS/10) | 3888 | if (interval > HZ*NR_CPUS/10) |
3690 | interval = HZ*NR_CPUS/10; | 3889 | interval = HZ*NR_CPUS/10; |
3691 | 3890 | ||
3891 | need_serialize = sd->flags & SD_SERIALIZE; | ||
3692 | 3892 | ||
3693 | if (sd->flags & SD_SERIALIZE) { | 3893 | if (need_serialize) { |
3694 | if (!spin_trylock(&balancing)) | 3894 | if (!spin_trylock(&balancing)) |
3695 | goto out; | 3895 | goto out; |
3696 | } | 3896 | } |
@@ -3706,7 +3906,7 @@ static void rebalance_domains(int cpu, enum cpu_idle_type idle) | |||
3706 | } | 3906 | } |
3707 | sd->last_balance = jiffies; | 3907 | sd->last_balance = jiffies; |
3708 | } | 3908 | } |
3709 | if (sd->flags & SD_SERIALIZE) | 3909 | if (need_serialize) |
3710 | spin_unlock(&balancing); | 3910 | spin_unlock(&balancing); |
3711 | out: | 3911 | out: |
3712 | if (time_after(next_balance, sd->last_balance + interval)) { | 3912 | if (time_after(next_balance, sd->last_balance + interval)) { |
@@ -4070,6 +4270,7 @@ static noinline void __schedule_bug(struct task_struct *prev) | |||
4070 | prev->comm, prev->pid, preempt_count()); | 4270 | prev->comm, prev->pid, preempt_count()); |
4071 | 4271 | ||
4072 | debug_show_held_locks(prev); | 4272 | debug_show_held_locks(prev); |
4273 | print_modules(); | ||
4073 | if (irqs_disabled()) | 4274 | if (irqs_disabled()) |
4074 | print_irqtrace_events(prev); | 4275 | print_irqtrace_events(prev); |
4075 | 4276 | ||
@@ -4143,7 +4344,7 @@ asmlinkage void __sched schedule(void) | |||
4143 | struct task_struct *prev, *next; | 4344 | struct task_struct *prev, *next; |
4144 | unsigned long *switch_count; | 4345 | unsigned long *switch_count; |
4145 | struct rq *rq; | 4346 | struct rq *rq; |
4146 | int cpu; | 4347 | int cpu, hrtick = sched_feat(HRTICK); |
4147 | 4348 | ||
4148 | need_resched: | 4349 | need_resched: |
4149 | preempt_disable(); | 4350 | preempt_disable(); |
@@ -4158,7 +4359,8 @@ need_resched_nonpreemptible: | |||
4158 | 4359 | ||
4159 | schedule_debug(prev); | 4360 | schedule_debug(prev); |
4160 | 4361 | ||
4161 | hrtick_clear(rq); | 4362 | if (hrtick) |
4363 | hrtick_clear(rq); | ||
4162 | 4364 | ||
4163 | /* | 4365 | /* |
4164 | * Do the rq-clock update outside the rq lock: | 4366 | * Do the rq-clock update outside the rq lock: |
@@ -4204,7 +4406,8 @@ need_resched_nonpreemptible: | |||
4204 | } else | 4406 | } else |
4205 | spin_unlock_irq(&rq->lock); | 4407 | spin_unlock_irq(&rq->lock); |
4206 | 4408 | ||
4207 | hrtick_set(rq); | 4409 | if (hrtick) |
4410 | hrtick_set(rq); | ||
4208 | 4411 | ||
4209 | if (unlikely(reacquire_kernel_lock(current) < 0)) | 4412 | if (unlikely(reacquire_kernel_lock(current) < 0)) |
4210 | goto need_resched_nonpreemptible; | 4413 | goto need_resched_nonpreemptible; |
@@ -4586,10 +4789,8 @@ void set_user_nice(struct task_struct *p, long nice) | |||
4586 | goto out_unlock; | 4789 | goto out_unlock; |
4587 | } | 4790 | } |
4588 | on_rq = p->se.on_rq; | 4791 | on_rq = p->se.on_rq; |
4589 | if (on_rq) { | 4792 | if (on_rq) |
4590 | dequeue_task(rq, p, 0); | 4793 | dequeue_task(rq, p, 0); |
4591 | dec_load(rq, p); | ||
4592 | } | ||
4593 | 4794 | ||
4594 | p->static_prio = NICE_TO_PRIO(nice); | 4795 | p->static_prio = NICE_TO_PRIO(nice); |
4595 | set_load_weight(p); | 4796 | set_load_weight(p); |
@@ -4599,7 +4800,6 @@ void set_user_nice(struct task_struct *p, long nice) | |||
4599 | 4800 | ||
4600 | if (on_rq) { | 4801 | if (on_rq) { |
4601 | enqueue_task(rq, p, 0); | 4802 | enqueue_task(rq, p, 0); |
4602 | inc_load(rq, p); | ||
4603 | /* | 4803 | /* |
4604 | * If the task increased its priority or is running and | 4804 | * If the task increased its priority or is running and |
4605 | * lowered its priority, then reschedule its CPU: | 4805 | * lowered its priority, then reschedule its CPU: |
@@ -5070,24 +5270,6 @@ asmlinkage long sys_sched_setaffinity(pid_t pid, unsigned int len, | |||
5070 | return sched_setaffinity(pid, &new_mask); | 5270 | return sched_setaffinity(pid, &new_mask); |
5071 | } | 5271 | } |
5072 | 5272 | ||
5073 | /* | ||
5074 | * Represents all cpu's present in the system | ||
5075 | * In systems capable of hotplug, this map could dynamically grow | ||
5076 | * as new cpu's are detected in the system via any platform specific | ||
5077 | * method, such as ACPI for e.g. | ||
5078 | */ | ||
5079 | |||
5080 | cpumask_t cpu_present_map __read_mostly; | ||
5081 | EXPORT_SYMBOL(cpu_present_map); | ||
5082 | |||
5083 | #ifndef CONFIG_SMP | ||
5084 | cpumask_t cpu_online_map __read_mostly = CPU_MASK_ALL; | ||
5085 | EXPORT_SYMBOL(cpu_online_map); | ||
5086 | |||
5087 | cpumask_t cpu_possible_map __read_mostly = CPU_MASK_ALL; | ||
5088 | EXPORT_SYMBOL(cpu_possible_map); | ||
5089 | #endif | ||
5090 | |||
5091 | long sched_getaffinity(pid_t pid, cpumask_t *mask) | 5273 | long sched_getaffinity(pid_t pid, cpumask_t *mask) |
5092 | { | 5274 | { |
5093 | struct task_struct *p; | 5275 | struct task_struct *p; |
@@ -5571,6 +5753,12 @@ int set_cpus_allowed_ptr(struct task_struct *p, const cpumask_t *new_mask) | |||
5571 | goto out; | 5753 | goto out; |
5572 | } | 5754 | } |
5573 | 5755 | ||
5756 | if (unlikely((p->flags & PF_THREAD_BOUND) && p != current && | ||
5757 | !cpus_equal(p->cpus_allowed, *new_mask))) { | ||
5758 | ret = -EINVAL; | ||
5759 | goto out; | ||
5760 | } | ||
5761 | |||
5574 | if (p->sched_class->set_cpus_allowed) | 5762 | if (p->sched_class->set_cpus_allowed) |
5575 | p->sched_class->set_cpus_allowed(p, new_mask); | 5763 | p->sched_class->set_cpus_allowed(p, new_mask); |
5576 | else { | 5764 | else { |
@@ -5622,10 +5810,10 @@ static int __migrate_task(struct task_struct *p, int src_cpu, int dest_cpu) | |||
5622 | double_rq_lock(rq_src, rq_dest); | 5810 | double_rq_lock(rq_src, rq_dest); |
5623 | /* Already moved. */ | 5811 | /* Already moved. */ |
5624 | if (task_cpu(p) != src_cpu) | 5812 | if (task_cpu(p) != src_cpu) |
5625 | goto out; | 5813 | goto done; |
5626 | /* Affinity changed (again). */ | 5814 | /* Affinity changed (again). */ |
5627 | if (!cpu_isset(dest_cpu, p->cpus_allowed)) | 5815 | if (!cpu_isset(dest_cpu, p->cpus_allowed)) |
5628 | goto out; | 5816 | goto fail; |
5629 | 5817 | ||
5630 | on_rq = p->se.on_rq; | 5818 | on_rq = p->se.on_rq; |
5631 | if (on_rq) | 5819 | if (on_rq) |
@@ -5636,8 +5824,9 @@ static int __migrate_task(struct task_struct *p, int src_cpu, int dest_cpu) | |||
5636 | activate_task(rq_dest, p, 0); | 5824 | activate_task(rq_dest, p, 0); |
5637 | check_preempt_curr(rq_dest, p); | 5825 | check_preempt_curr(rq_dest, p); |
5638 | } | 5826 | } |
5827 | done: | ||
5639 | ret = 1; | 5828 | ret = 1; |
5640 | out: | 5829 | fail: |
5641 | double_rq_unlock(rq_src, rq_dest); | 5830 | double_rq_unlock(rq_src, rq_dest); |
5642 | return ret; | 5831 | return ret; |
5643 | } | 5832 | } |
@@ -6059,6 +6248,36 @@ static void unregister_sched_domain_sysctl(void) | |||
6059 | } | 6248 | } |
6060 | #endif | 6249 | #endif |
6061 | 6250 | ||
6251 | static void set_rq_online(struct rq *rq) | ||
6252 | { | ||
6253 | if (!rq->online) { | ||
6254 | const struct sched_class *class; | ||
6255 | |||
6256 | cpu_set(rq->cpu, rq->rd->online); | ||
6257 | rq->online = 1; | ||
6258 | |||
6259 | for_each_class(class) { | ||
6260 | if (class->rq_online) | ||
6261 | class->rq_online(rq); | ||
6262 | } | ||
6263 | } | ||
6264 | } | ||
6265 | |||
6266 | static void set_rq_offline(struct rq *rq) | ||
6267 | { | ||
6268 | if (rq->online) { | ||
6269 | const struct sched_class *class; | ||
6270 | |||
6271 | for_each_class(class) { | ||
6272 | if (class->rq_offline) | ||
6273 | class->rq_offline(rq); | ||
6274 | } | ||
6275 | |||
6276 | cpu_clear(rq->cpu, rq->rd->online); | ||
6277 | rq->online = 0; | ||
6278 | } | ||
6279 | } | ||
6280 | |||
6062 | /* | 6281 | /* |
6063 | * migration_call - callback that gets triggered when a CPU is added. | 6282 | * migration_call - callback that gets triggered when a CPU is added. |
6064 | * Here we can start up the necessary migration thread for the new CPU. | 6283 | * Here we can start up the necessary migration thread for the new CPU. |
@@ -6096,7 +6315,8 @@ migration_call(struct notifier_block *nfb, unsigned long action, void *hcpu) | |||
6096 | spin_lock_irqsave(&rq->lock, flags); | 6315 | spin_lock_irqsave(&rq->lock, flags); |
6097 | if (rq->rd) { | 6316 | if (rq->rd) { |
6098 | BUG_ON(!cpu_isset(cpu, rq->rd->span)); | 6317 | BUG_ON(!cpu_isset(cpu, rq->rd->span)); |
6099 | cpu_set(cpu, rq->rd->online); | 6318 | |
6319 | set_rq_online(rq); | ||
6100 | } | 6320 | } |
6101 | spin_unlock_irqrestore(&rq->lock, flags); | 6321 | spin_unlock_irqrestore(&rq->lock, flags); |
6102 | break; | 6322 | break; |
@@ -6157,7 +6377,7 @@ migration_call(struct notifier_block *nfb, unsigned long action, void *hcpu) | |||
6157 | spin_lock_irqsave(&rq->lock, flags); | 6377 | spin_lock_irqsave(&rq->lock, flags); |
6158 | if (rq->rd) { | 6378 | if (rq->rd) { |
6159 | BUG_ON(!cpu_isset(cpu, rq->rd->span)); | 6379 | BUG_ON(!cpu_isset(cpu, rq->rd->span)); |
6160 | cpu_clear(cpu, rq->rd->online); | 6380 | set_rq_offline(rq); |
6161 | } | 6381 | } |
6162 | spin_unlock_irqrestore(&rq->lock, flags); | 6382 | spin_unlock_irqrestore(&rq->lock, flags); |
6163 | break; | 6383 | break; |
@@ -6191,6 +6411,28 @@ void __init migration_init(void) | |||
6191 | 6411 | ||
6192 | #ifdef CONFIG_SCHED_DEBUG | 6412 | #ifdef CONFIG_SCHED_DEBUG |
6193 | 6413 | ||
6414 | static inline const char *sd_level_to_string(enum sched_domain_level lvl) | ||
6415 | { | ||
6416 | switch (lvl) { | ||
6417 | case SD_LV_NONE: | ||
6418 | return "NONE"; | ||
6419 | case SD_LV_SIBLING: | ||
6420 | return "SIBLING"; | ||
6421 | case SD_LV_MC: | ||
6422 | return "MC"; | ||
6423 | case SD_LV_CPU: | ||
6424 | return "CPU"; | ||
6425 | case SD_LV_NODE: | ||
6426 | return "NODE"; | ||
6427 | case SD_LV_ALLNODES: | ||
6428 | return "ALLNODES"; | ||
6429 | case SD_LV_MAX: | ||
6430 | return "MAX"; | ||
6431 | |||
6432 | } | ||
6433 | return "MAX"; | ||
6434 | } | ||
6435 | |||
6194 | static int sched_domain_debug_one(struct sched_domain *sd, int cpu, int level, | 6436 | static int sched_domain_debug_one(struct sched_domain *sd, int cpu, int level, |
6195 | cpumask_t *groupmask) | 6437 | cpumask_t *groupmask) |
6196 | { | 6438 | { |
@@ -6210,7 +6452,8 @@ static int sched_domain_debug_one(struct sched_domain *sd, int cpu, int level, | |||
6210 | return -1; | 6452 | return -1; |
6211 | } | 6453 | } |
6212 | 6454 | ||
6213 | printk(KERN_CONT "span %s\n", str); | 6455 | printk(KERN_CONT "span %s level %s\n", |
6456 | str, sd_level_to_string(sd->level)); | ||
6214 | 6457 | ||
6215 | if (!cpu_isset(cpu, sd->span)) { | 6458 | if (!cpu_isset(cpu, sd->span)) { |
6216 | printk(KERN_ERR "ERROR: domain->span does not contain " | 6459 | printk(KERN_ERR "ERROR: domain->span does not contain " |
@@ -6294,9 +6537,9 @@ static void sched_domain_debug(struct sched_domain *sd, int cpu) | |||
6294 | } | 6537 | } |
6295 | kfree(groupmask); | 6538 | kfree(groupmask); |
6296 | } | 6539 | } |
6297 | #else | 6540 | #else /* !CONFIG_SCHED_DEBUG */ |
6298 | # define sched_domain_debug(sd, cpu) do { } while (0) | 6541 | # define sched_domain_debug(sd, cpu) do { } while (0) |
6299 | #endif | 6542 | #endif /* CONFIG_SCHED_DEBUG */ |
6300 | 6543 | ||
6301 | static int sd_degenerate(struct sched_domain *sd) | 6544 | static int sd_degenerate(struct sched_domain *sd) |
6302 | { | 6545 | { |
@@ -6356,20 +6599,16 @@ sd_parent_degenerate(struct sched_domain *sd, struct sched_domain *parent) | |||
6356 | static void rq_attach_root(struct rq *rq, struct root_domain *rd) | 6599 | static void rq_attach_root(struct rq *rq, struct root_domain *rd) |
6357 | { | 6600 | { |
6358 | unsigned long flags; | 6601 | unsigned long flags; |
6359 | const struct sched_class *class; | ||
6360 | 6602 | ||
6361 | spin_lock_irqsave(&rq->lock, flags); | 6603 | spin_lock_irqsave(&rq->lock, flags); |
6362 | 6604 | ||
6363 | if (rq->rd) { | 6605 | if (rq->rd) { |
6364 | struct root_domain *old_rd = rq->rd; | 6606 | struct root_domain *old_rd = rq->rd; |
6365 | 6607 | ||
6366 | for (class = sched_class_highest; class; class = class->next) { | 6608 | if (cpu_isset(rq->cpu, old_rd->online)) |
6367 | if (class->leave_domain) | 6609 | set_rq_offline(rq); |
6368 | class->leave_domain(rq); | ||
6369 | } | ||
6370 | 6610 | ||
6371 | cpu_clear(rq->cpu, old_rd->span); | 6611 | cpu_clear(rq->cpu, old_rd->span); |
6372 | cpu_clear(rq->cpu, old_rd->online); | ||
6373 | 6612 | ||
6374 | if (atomic_dec_and_test(&old_rd->refcount)) | 6613 | if (atomic_dec_and_test(&old_rd->refcount)) |
6375 | kfree(old_rd); | 6614 | kfree(old_rd); |
@@ -6380,12 +6619,7 @@ static void rq_attach_root(struct rq *rq, struct root_domain *rd) | |||
6380 | 6619 | ||
6381 | cpu_set(rq->cpu, rd->span); | 6620 | cpu_set(rq->cpu, rd->span); |
6382 | if (cpu_isset(rq->cpu, cpu_online_map)) | 6621 | if (cpu_isset(rq->cpu, cpu_online_map)) |
6383 | cpu_set(rq->cpu, rd->online); | 6622 | set_rq_online(rq); |
6384 | |||
6385 | for (class = sched_class_highest; class; class = class->next) { | ||
6386 | if (class->join_domain) | ||
6387 | class->join_domain(rq); | ||
6388 | } | ||
6389 | 6623 | ||
6390 | spin_unlock_irqrestore(&rq->lock, flags); | 6624 | spin_unlock_irqrestore(&rq->lock, flags); |
6391 | } | 6625 | } |
@@ -6396,6 +6630,8 @@ static void init_rootdomain(struct root_domain *rd) | |||
6396 | 6630 | ||
6397 | cpus_clear(rd->span); | 6631 | cpus_clear(rd->span); |
6398 | cpus_clear(rd->online); | 6632 | cpus_clear(rd->online); |
6633 | |||
6634 | cpupri_init(&rd->cpupri); | ||
6399 | } | 6635 | } |
6400 | 6636 | ||
6401 | static void init_defrootdomain(void) | 6637 | static void init_defrootdomain(void) |
@@ -6590,7 +6826,7 @@ static void sched_domain_node_span(int node, cpumask_t *span) | |||
6590 | cpus_or(*span, *span, *nodemask); | 6826 | cpus_or(*span, *span, *nodemask); |
6591 | } | 6827 | } |
6592 | } | 6828 | } |
6593 | #endif | 6829 | #endif /* CONFIG_NUMA */ |
6594 | 6830 | ||
6595 | int sched_smt_power_savings = 0, sched_mc_power_savings = 0; | 6831 | int sched_smt_power_savings = 0, sched_mc_power_savings = 0; |
6596 | 6832 | ||
@@ -6609,7 +6845,7 @@ cpu_to_cpu_group(int cpu, const cpumask_t *cpu_map, struct sched_group **sg, | |||
6609 | *sg = &per_cpu(sched_group_cpus, cpu); | 6845 | *sg = &per_cpu(sched_group_cpus, cpu); |
6610 | return cpu; | 6846 | return cpu; |
6611 | } | 6847 | } |
6612 | #endif | 6848 | #endif /* CONFIG_SCHED_SMT */ |
6613 | 6849 | ||
6614 | /* | 6850 | /* |
6615 | * multi-core sched-domains: | 6851 | * multi-core sched-domains: |
@@ -6617,7 +6853,7 @@ cpu_to_cpu_group(int cpu, const cpumask_t *cpu_map, struct sched_group **sg, | |||
6617 | #ifdef CONFIG_SCHED_MC | 6853 | #ifdef CONFIG_SCHED_MC |
6618 | static DEFINE_PER_CPU(struct sched_domain, core_domains); | 6854 | static DEFINE_PER_CPU(struct sched_domain, core_domains); |
6619 | static DEFINE_PER_CPU(struct sched_group, sched_group_core); | 6855 | static DEFINE_PER_CPU(struct sched_group, sched_group_core); |
6620 | #endif | 6856 | #endif /* CONFIG_SCHED_MC */ |
6621 | 6857 | ||
6622 | #if defined(CONFIG_SCHED_MC) && defined(CONFIG_SCHED_SMT) | 6858 | #if defined(CONFIG_SCHED_MC) && defined(CONFIG_SCHED_SMT) |
6623 | static int | 6859 | static int |
@@ -6719,7 +6955,7 @@ static void init_numa_sched_groups_power(struct sched_group *group_head) | |||
6719 | sg = sg->next; | 6955 | sg = sg->next; |
6720 | } while (sg != group_head); | 6956 | } while (sg != group_head); |
6721 | } | 6957 | } |
6722 | #endif | 6958 | #endif /* CONFIG_NUMA */ |
6723 | 6959 | ||
6724 | #ifdef CONFIG_NUMA | 6960 | #ifdef CONFIG_NUMA |
6725 | /* Free memory allocated for various sched_group structures */ | 6961 | /* Free memory allocated for various sched_group structures */ |
@@ -6756,11 +6992,11 @@ next_sg: | |||
6756 | sched_group_nodes_bycpu[cpu] = NULL; | 6992 | sched_group_nodes_bycpu[cpu] = NULL; |
6757 | } | 6993 | } |
6758 | } | 6994 | } |
6759 | #else | 6995 | #else /* !CONFIG_NUMA */ |
6760 | static void free_sched_groups(const cpumask_t *cpu_map, cpumask_t *nodemask) | 6996 | static void free_sched_groups(const cpumask_t *cpu_map, cpumask_t *nodemask) |
6761 | { | 6997 | { |
6762 | } | 6998 | } |
6763 | #endif | 6999 | #endif /* CONFIG_NUMA */ |
6764 | 7000 | ||
6765 | /* | 7001 | /* |
6766 | * Initialize sched groups cpu_power. | 7002 | * Initialize sched groups cpu_power. |
@@ -7469,7 +7705,7 @@ int sched_create_sysfs_power_savings_entries(struct sysdev_class *cls) | |||
7469 | #endif | 7705 | #endif |
7470 | return err; | 7706 | return err; |
7471 | } | 7707 | } |
7472 | #endif | 7708 | #endif /* CONFIG_SCHED_MC || CONFIG_SCHED_SMT */ |
7473 | 7709 | ||
7474 | /* | 7710 | /* |
7475 | * Force a reinitialization of the sched domains hierarchy. The domains | 7711 | * Force a reinitialization of the sched domains hierarchy. The domains |
@@ -7480,21 +7716,28 @@ int sched_create_sysfs_power_savings_entries(struct sysdev_class *cls) | |||
7480 | static int update_sched_domains(struct notifier_block *nfb, | 7716 | static int update_sched_domains(struct notifier_block *nfb, |
7481 | unsigned long action, void *hcpu) | 7717 | unsigned long action, void *hcpu) |
7482 | { | 7718 | { |
7719 | int cpu = (int)(long)hcpu; | ||
7720 | |||
7483 | switch (action) { | 7721 | switch (action) { |
7484 | case CPU_UP_PREPARE: | ||
7485 | case CPU_UP_PREPARE_FROZEN: | ||
7486 | case CPU_DOWN_PREPARE: | 7722 | case CPU_DOWN_PREPARE: |
7487 | case CPU_DOWN_PREPARE_FROZEN: | 7723 | case CPU_DOWN_PREPARE_FROZEN: |
7724 | disable_runtime(cpu_rq(cpu)); | ||
7725 | /* fall-through */ | ||
7726 | case CPU_UP_PREPARE: | ||
7727 | case CPU_UP_PREPARE_FROZEN: | ||
7488 | detach_destroy_domains(&cpu_online_map); | 7728 | detach_destroy_domains(&cpu_online_map); |
7489 | free_sched_domains(); | 7729 | free_sched_domains(); |
7490 | return NOTIFY_OK; | 7730 | return NOTIFY_OK; |
7491 | 7731 | ||
7492 | case CPU_UP_CANCELED: | 7732 | |
7493 | case CPU_UP_CANCELED_FROZEN: | ||
7494 | case CPU_DOWN_FAILED: | 7733 | case CPU_DOWN_FAILED: |
7495 | case CPU_DOWN_FAILED_FROZEN: | 7734 | case CPU_DOWN_FAILED_FROZEN: |
7496 | case CPU_ONLINE: | 7735 | case CPU_ONLINE: |
7497 | case CPU_ONLINE_FROZEN: | 7736 | case CPU_ONLINE_FROZEN: |
7737 | enable_runtime(cpu_rq(cpu)); | ||
7738 | /* fall-through */ | ||
7739 | case CPU_UP_CANCELED: | ||
7740 | case CPU_UP_CANCELED_FROZEN: | ||
7498 | case CPU_DEAD: | 7741 | case CPU_DEAD: |
7499 | case CPU_DEAD_FROZEN: | 7742 | case CPU_DEAD_FROZEN: |
7500 | /* | 7743 | /* |
@@ -7694,8 +7937,8 @@ void __init sched_init(void) | |||
7694 | 7937 | ||
7695 | root_task_group.cfs_rq = (struct cfs_rq **)ptr; | 7938 | root_task_group.cfs_rq = (struct cfs_rq **)ptr; |
7696 | ptr += nr_cpu_ids * sizeof(void **); | 7939 | ptr += nr_cpu_ids * sizeof(void **); |
7697 | #endif | 7940 | #endif /* CONFIG_USER_SCHED */ |
7698 | #endif | 7941 | #endif /* CONFIG_FAIR_GROUP_SCHED */ |
7699 | #ifdef CONFIG_RT_GROUP_SCHED | 7942 | #ifdef CONFIG_RT_GROUP_SCHED |
7700 | init_task_group.rt_se = (struct sched_rt_entity **)ptr; | 7943 | init_task_group.rt_se = (struct sched_rt_entity **)ptr; |
7701 | ptr += nr_cpu_ids * sizeof(void **); | 7944 | ptr += nr_cpu_ids * sizeof(void **); |
@@ -7709,8 +7952,8 @@ void __init sched_init(void) | |||
7709 | 7952 | ||
7710 | root_task_group.rt_rq = (struct rt_rq **)ptr; | 7953 | root_task_group.rt_rq = (struct rt_rq **)ptr; |
7711 | ptr += nr_cpu_ids * sizeof(void **); | 7954 | ptr += nr_cpu_ids * sizeof(void **); |
7712 | #endif | 7955 | #endif /* CONFIG_USER_SCHED */ |
7713 | #endif | 7956 | #endif /* CONFIG_RT_GROUP_SCHED */ |
7714 | } | 7957 | } |
7715 | 7958 | ||
7716 | #ifdef CONFIG_SMP | 7959 | #ifdef CONFIG_SMP |
@@ -7726,8 +7969,8 @@ void __init sched_init(void) | |||
7726 | #ifdef CONFIG_USER_SCHED | 7969 | #ifdef CONFIG_USER_SCHED |
7727 | init_rt_bandwidth(&root_task_group.rt_bandwidth, | 7970 | init_rt_bandwidth(&root_task_group.rt_bandwidth, |
7728 | global_rt_period(), RUNTIME_INF); | 7971 | global_rt_period(), RUNTIME_INF); |
7729 | #endif | 7972 | #endif /* CONFIG_USER_SCHED */ |
7730 | #endif | 7973 | #endif /* CONFIG_RT_GROUP_SCHED */ |
7731 | 7974 | ||
7732 | #ifdef CONFIG_GROUP_SCHED | 7975 | #ifdef CONFIG_GROUP_SCHED |
7733 | list_add(&init_task_group.list, &task_groups); | 7976 | list_add(&init_task_group.list, &task_groups); |
@@ -7737,8 +7980,8 @@ void __init sched_init(void) | |||
7737 | INIT_LIST_HEAD(&root_task_group.children); | 7980 | INIT_LIST_HEAD(&root_task_group.children); |
7738 | init_task_group.parent = &root_task_group; | 7981 | init_task_group.parent = &root_task_group; |
7739 | list_add(&init_task_group.siblings, &root_task_group.children); | 7982 | list_add(&init_task_group.siblings, &root_task_group.children); |
7740 | #endif | 7983 | #endif /* CONFIG_USER_SCHED */ |
7741 | #endif | 7984 | #endif /* CONFIG_GROUP_SCHED */ |
7742 | 7985 | ||
7743 | for_each_possible_cpu(i) { | 7986 | for_each_possible_cpu(i) { |
7744 | struct rq *rq; | 7987 | struct rq *rq; |
@@ -7818,6 +8061,7 @@ void __init sched_init(void) | |||
7818 | rq->next_balance = jiffies; | 8061 | rq->next_balance = jiffies; |
7819 | rq->push_cpu = 0; | 8062 | rq->push_cpu = 0; |
7820 | rq->cpu = i; | 8063 | rq->cpu = i; |
8064 | rq->online = 0; | ||
7821 | rq->migration_thread = NULL; | 8065 | rq->migration_thread = NULL; |
7822 | INIT_LIST_HEAD(&rq->migration_queue); | 8066 | INIT_LIST_HEAD(&rq->migration_queue); |
7823 | rq_attach_root(rq, &def_root_domain); | 8067 | rq_attach_root(rq, &def_root_domain); |
@@ -8057,7 +8301,7 @@ static inline void unregister_fair_sched_group(struct task_group *tg, int cpu) | |||
8057 | { | 8301 | { |
8058 | list_del_rcu(&tg->cfs_rq[cpu]->leaf_cfs_rq_list); | 8302 | list_del_rcu(&tg->cfs_rq[cpu]->leaf_cfs_rq_list); |
8059 | } | 8303 | } |
8060 | #else | 8304 | #else /* !CONFG_FAIR_GROUP_SCHED */ |
8061 | static inline void free_fair_sched_group(struct task_group *tg) | 8305 | static inline void free_fair_sched_group(struct task_group *tg) |
8062 | { | 8306 | { |
8063 | } | 8307 | } |
@@ -8075,7 +8319,7 @@ static inline void register_fair_sched_group(struct task_group *tg, int cpu) | |||
8075 | static inline void unregister_fair_sched_group(struct task_group *tg, int cpu) | 8319 | static inline void unregister_fair_sched_group(struct task_group *tg, int cpu) |
8076 | { | 8320 | { |
8077 | } | 8321 | } |
8078 | #endif | 8322 | #endif /* CONFIG_FAIR_GROUP_SCHED */ |
8079 | 8323 | ||
8080 | #ifdef CONFIG_RT_GROUP_SCHED | 8324 | #ifdef CONFIG_RT_GROUP_SCHED |
8081 | static void free_rt_sched_group(struct task_group *tg) | 8325 | static void free_rt_sched_group(struct task_group *tg) |
@@ -8146,7 +8390,7 @@ static inline void unregister_rt_sched_group(struct task_group *tg, int cpu) | |||
8146 | { | 8390 | { |
8147 | list_del_rcu(&tg->rt_rq[cpu]->leaf_rt_rq_list); | 8391 | list_del_rcu(&tg->rt_rq[cpu]->leaf_rt_rq_list); |
8148 | } | 8392 | } |
8149 | #else | 8393 | #else /* !CONFIG_RT_GROUP_SCHED */ |
8150 | static inline void free_rt_sched_group(struct task_group *tg) | 8394 | static inline void free_rt_sched_group(struct task_group *tg) |
8151 | { | 8395 | { |
8152 | } | 8396 | } |
@@ -8164,7 +8408,7 @@ static inline void register_rt_sched_group(struct task_group *tg, int cpu) | |||
8164 | static inline void unregister_rt_sched_group(struct task_group *tg, int cpu) | 8408 | static inline void unregister_rt_sched_group(struct task_group *tg, int cpu) |
8165 | { | 8409 | { |
8166 | } | 8410 | } |
8167 | #endif | 8411 | #endif /* CONFIG_RT_GROUP_SCHED */ |
8168 | 8412 | ||
8169 | #ifdef CONFIG_GROUP_SCHED | 8413 | #ifdef CONFIG_GROUP_SCHED |
8170 | static void free_sched_group(struct task_group *tg) | 8414 | static void free_sched_group(struct task_group *tg) |
@@ -8275,17 +8519,14 @@ void sched_move_task(struct task_struct *tsk) | |||
8275 | 8519 | ||
8276 | task_rq_unlock(rq, &flags); | 8520 | task_rq_unlock(rq, &flags); |
8277 | } | 8521 | } |
8278 | #endif | 8522 | #endif /* CONFIG_GROUP_SCHED */ |
8279 | 8523 | ||
8280 | #ifdef CONFIG_FAIR_GROUP_SCHED | 8524 | #ifdef CONFIG_FAIR_GROUP_SCHED |
8281 | static void set_se_shares(struct sched_entity *se, unsigned long shares) | 8525 | static void __set_se_shares(struct sched_entity *se, unsigned long shares) |
8282 | { | 8526 | { |
8283 | struct cfs_rq *cfs_rq = se->cfs_rq; | 8527 | struct cfs_rq *cfs_rq = se->cfs_rq; |
8284 | struct rq *rq = cfs_rq->rq; | ||
8285 | int on_rq; | 8528 | int on_rq; |
8286 | 8529 | ||
8287 | spin_lock_irq(&rq->lock); | ||
8288 | |||
8289 | on_rq = se->on_rq; | 8530 | on_rq = se->on_rq; |
8290 | if (on_rq) | 8531 | if (on_rq) |
8291 | dequeue_entity(cfs_rq, se, 0); | 8532 | dequeue_entity(cfs_rq, se, 0); |
@@ -8295,8 +8536,17 @@ static void set_se_shares(struct sched_entity *se, unsigned long shares) | |||
8295 | 8536 | ||
8296 | if (on_rq) | 8537 | if (on_rq) |
8297 | enqueue_entity(cfs_rq, se, 0); | 8538 | enqueue_entity(cfs_rq, se, 0); |
8539 | } | ||
8298 | 8540 | ||
8299 | spin_unlock_irq(&rq->lock); | 8541 | static void set_se_shares(struct sched_entity *se, unsigned long shares) |
8542 | { | ||
8543 | struct cfs_rq *cfs_rq = se->cfs_rq; | ||
8544 | struct rq *rq = cfs_rq->rq; | ||
8545 | unsigned long flags; | ||
8546 | |||
8547 | spin_lock_irqsave(&rq->lock, flags); | ||
8548 | __set_se_shares(se, shares); | ||
8549 | spin_unlock_irqrestore(&rq->lock, flags); | ||
8300 | } | 8550 | } |
8301 | 8551 | ||
8302 | static DEFINE_MUTEX(shares_mutex); | 8552 | static DEFINE_MUTEX(shares_mutex); |
@@ -8335,8 +8585,13 @@ int sched_group_set_shares(struct task_group *tg, unsigned long shares) | |||
8335 | * w/o tripping rebalance_share or load_balance_fair. | 8585 | * w/o tripping rebalance_share or load_balance_fair. |
8336 | */ | 8586 | */ |
8337 | tg->shares = shares; | 8587 | tg->shares = shares; |
8338 | for_each_possible_cpu(i) | 8588 | for_each_possible_cpu(i) { |
8589 | /* | ||
8590 | * force a rebalance | ||
8591 | */ | ||
8592 | cfs_rq_set_shares(tg->cfs_rq[i], 0); | ||
8339 | set_se_shares(tg->se[i], shares); | 8593 | set_se_shares(tg->se[i], shares); |
8594 | } | ||
8340 | 8595 | ||
8341 | /* | 8596 | /* |
8342 | * Enable load balance activity on this group, by inserting it back on | 8597 | * Enable load balance activity on this group, by inserting it back on |
@@ -8375,7 +8630,7 @@ static unsigned long to_ratio(u64 period, u64 runtime) | |||
8375 | #ifdef CONFIG_CGROUP_SCHED | 8630 | #ifdef CONFIG_CGROUP_SCHED |
8376 | static int __rt_schedulable(struct task_group *tg, u64 period, u64 runtime) | 8631 | static int __rt_schedulable(struct task_group *tg, u64 period, u64 runtime) |
8377 | { | 8632 | { |
8378 | struct task_group *tgi, *parent = tg ? tg->parent : NULL; | 8633 | struct task_group *tgi, *parent = tg->parent; |
8379 | unsigned long total = 0; | 8634 | unsigned long total = 0; |
8380 | 8635 | ||
8381 | if (!parent) { | 8636 | if (!parent) { |
@@ -8399,7 +8654,7 @@ static int __rt_schedulable(struct task_group *tg, u64 period, u64 runtime) | |||
8399 | } | 8654 | } |
8400 | rcu_read_unlock(); | 8655 | rcu_read_unlock(); |
8401 | 8656 | ||
8402 | return total + to_ratio(period, runtime) < | 8657 | return total + to_ratio(period, runtime) <= |
8403 | to_ratio(ktime_to_ns(parent->rt_bandwidth.rt_period), | 8658 | to_ratio(ktime_to_ns(parent->rt_bandwidth.rt_period), |
8404 | parent->rt_bandwidth.rt_runtime); | 8659 | parent->rt_bandwidth.rt_runtime); |
8405 | } | 8660 | } |
@@ -8519,16 +8774,21 @@ long sched_group_rt_period(struct task_group *tg) | |||
8519 | 8774 | ||
8520 | static int sched_rt_global_constraints(void) | 8775 | static int sched_rt_global_constraints(void) |
8521 | { | 8776 | { |
8777 | struct task_group *tg = &root_task_group; | ||
8778 | u64 rt_runtime, rt_period; | ||
8522 | int ret = 0; | 8779 | int ret = 0; |
8523 | 8780 | ||
8781 | rt_period = ktime_to_ns(tg->rt_bandwidth.rt_period); | ||
8782 | rt_runtime = tg->rt_bandwidth.rt_runtime; | ||
8783 | |||
8524 | mutex_lock(&rt_constraints_mutex); | 8784 | mutex_lock(&rt_constraints_mutex); |
8525 | if (!__rt_schedulable(NULL, 1, 0)) | 8785 | if (!__rt_schedulable(tg, rt_period, rt_runtime)) |
8526 | ret = -EINVAL; | 8786 | ret = -EINVAL; |
8527 | mutex_unlock(&rt_constraints_mutex); | 8787 | mutex_unlock(&rt_constraints_mutex); |
8528 | 8788 | ||
8529 | return ret; | 8789 | return ret; |
8530 | } | 8790 | } |
8531 | #else | 8791 | #else /* !CONFIG_RT_GROUP_SCHED */ |
8532 | static int sched_rt_global_constraints(void) | 8792 | static int sched_rt_global_constraints(void) |
8533 | { | 8793 | { |
8534 | unsigned long flags; | 8794 | unsigned long flags; |
@@ -8546,7 +8806,7 @@ static int sched_rt_global_constraints(void) | |||
8546 | 8806 | ||
8547 | return 0; | 8807 | return 0; |
8548 | } | 8808 | } |
8549 | #endif | 8809 | #endif /* CONFIG_RT_GROUP_SCHED */ |
8550 | 8810 | ||
8551 | int sched_rt_handler(struct ctl_table *table, int write, | 8811 | int sched_rt_handler(struct ctl_table *table, int write, |
8552 | struct file *filp, void __user *buffer, size_t *lenp, | 8812 | struct file *filp, void __user *buffer, size_t *lenp, |
@@ -8654,7 +8914,7 @@ static u64 cpu_shares_read_u64(struct cgroup *cgrp, struct cftype *cft) | |||
8654 | 8914 | ||
8655 | return (u64) tg->shares; | 8915 | return (u64) tg->shares; |
8656 | } | 8916 | } |
8657 | #endif | 8917 | #endif /* CONFIG_FAIR_GROUP_SCHED */ |
8658 | 8918 | ||
8659 | #ifdef CONFIG_RT_GROUP_SCHED | 8919 | #ifdef CONFIG_RT_GROUP_SCHED |
8660 | static int cpu_rt_runtime_write(struct cgroup *cgrp, struct cftype *cft, | 8920 | static int cpu_rt_runtime_write(struct cgroup *cgrp, struct cftype *cft, |
@@ -8678,7 +8938,7 @@ static u64 cpu_rt_period_read_uint(struct cgroup *cgrp, struct cftype *cft) | |||
8678 | { | 8938 | { |
8679 | return sched_group_rt_period(cgroup_tg(cgrp)); | 8939 | return sched_group_rt_period(cgroup_tg(cgrp)); |
8680 | } | 8940 | } |
8681 | #endif | 8941 | #endif /* CONFIG_RT_GROUP_SCHED */ |
8682 | 8942 | ||
8683 | static struct cftype cpu_files[] = { | 8943 | static struct cftype cpu_files[] = { |
8684 | #ifdef CONFIG_FAIR_GROUP_SCHED | 8944 | #ifdef CONFIG_FAIR_GROUP_SCHED |
diff --git a/kernel/sched_clock.c b/kernel/sched_clock.c index 8affbfd0cdb0..22ed55d1167f 100644 --- a/kernel/sched_clock.c +++ b/kernel/sched_clock.c | |||
@@ -330,3 +330,16 @@ unsigned long long __attribute__((weak)) sched_clock(void) | |||
330 | { | 330 | { |
331 | return (unsigned long long)jiffies * (NSEC_PER_SEC / HZ); | 331 | return (unsigned long long)jiffies * (NSEC_PER_SEC / HZ); |
332 | } | 332 | } |
333 | |||
334 | unsigned long long cpu_clock(int cpu) | ||
335 | { | ||
336 | unsigned long long clock; | ||
337 | unsigned long flags; | ||
338 | |||
339 | local_irq_save(flags); | ||
340 | clock = sched_clock_cpu(cpu); | ||
341 | local_irq_restore(flags); | ||
342 | |||
343 | return clock; | ||
344 | } | ||
345 | EXPORT_SYMBOL_GPL(cpu_clock); | ||
diff --git a/kernel/sched_cpupri.c b/kernel/sched_cpupri.c new file mode 100644 index 000000000000..52154fefab7e --- /dev/null +++ b/kernel/sched_cpupri.c | |||
@@ -0,0 +1,174 @@ | |||
1 | /* | ||
2 | * kernel/sched_cpupri.c | ||
3 | * | ||
4 | * CPU priority management | ||
5 | * | ||
6 | * Copyright (C) 2007-2008 Novell | ||
7 | * | ||
8 | * Author: Gregory Haskins <ghaskins@novell.com> | ||
9 | * | ||
10 | * This code tracks the priority of each CPU so that global migration | ||
11 | * decisions are easy to calculate. Each CPU can be in a state as follows: | ||
12 | * | ||
13 | * (INVALID), IDLE, NORMAL, RT1, ... RT99 | ||
14 | * | ||
15 | * going from the lowest priority to the highest. CPUs in the INVALID state | ||
16 | * are not eligible for routing. The system maintains this state with | ||
17 | * a 2 dimensional bitmap (the first for priority class, the second for cpus | ||
18 | * in that class). Therefore a typical application without affinity | ||
19 | * restrictions can find a suitable CPU with O(1) complexity (e.g. two bit | ||
20 | * searches). For tasks with affinity restrictions, the algorithm has a | ||
21 | * worst case complexity of O(min(102, nr_domcpus)), though the scenario that | ||
22 | * yields the worst case search is fairly contrived. | ||
23 | * | ||
24 | * This program is free software; you can redistribute it and/or | ||
25 | * modify it under the terms of the GNU General Public License | ||
26 | * as published by the Free Software Foundation; version 2 | ||
27 | * of the License. | ||
28 | */ | ||
29 | |||
30 | #include "sched_cpupri.h" | ||
31 | |||
32 | /* Convert between a 140 based task->prio, and our 102 based cpupri */ | ||
33 | static int convert_prio(int prio) | ||
34 | { | ||
35 | int cpupri; | ||
36 | |||
37 | if (prio == CPUPRI_INVALID) | ||
38 | cpupri = CPUPRI_INVALID; | ||
39 | else if (prio == MAX_PRIO) | ||
40 | cpupri = CPUPRI_IDLE; | ||
41 | else if (prio >= MAX_RT_PRIO) | ||
42 | cpupri = CPUPRI_NORMAL; | ||
43 | else | ||
44 | cpupri = MAX_RT_PRIO - prio + 1; | ||
45 | |||
46 | return cpupri; | ||
47 | } | ||
48 | |||
49 | #define for_each_cpupri_active(array, idx) \ | ||
50 | for (idx = find_first_bit(array, CPUPRI_NR_PRIORITIES); \ | ||
51 | idx < CPUPRI_NR_PRIORITIES; \ | ||
52 | idx = find_next_bit(array, CPUPRI_NR_PRIORITIES, idx+1)) | ||
53 | |||
54 | /** | ||
55 | * cpupri_find - find the best (lowest-pri) CPU in the system | ||
56 | * @cp: The cpupri context | ||
57 | * @p: The task | ||
58 | * @lowest_mask: A mask to fill in with selected CPUs | ||
59 | * | ||
60 | * Note: This function returns the recommended CPUs as calculated during the | ||
61 | * current invokation. By the time the call returns, the CPUs may have in | ||
62 | * fact changed priorities any number of times. While not ideal, it is not | ||
63 | * an issue of correctness since the normal rebalancer logic will correct | ||
64 | * any discrepancies created by racing against the uncertainty of the current | ||
65 | * priority configuration. | ||
66 | * | ||
67 | * Returns: (int)bool - CPUs were found | ||
68 | */ | ||
69 | int cpupri_find(struct cpupri *cp, struct task_struct *p, | ||
70 | cpumask_t *lowest_mask) | ||
71 | { | ||
72 | int idx = 0; | ||
73 | int task_pri = convert_prio(p->prio); | ||
74 | |||
75 | for_each_cpupri_active(cp->pri_active, idx) { | ||
76 | struct cpupri_vec *vec = &cp->pri_to_cpu[idx]; | ||
77 | cpumask_t mask; | ||
78 | |||
79 | if (idx >= task_pri) | ||
80 | break; | ||
81 | |||
82 | cpus_and(mask, p->cpus_allowed, vec->mask); | ||
83 | |||
84 | if (cpus_empty(mask)) | ||
85 | continue; | ||
86 | |||
87 | *lowest_mask = mask; | ||
88 | return 1; | ||
89 | } | ||
90 | |||
91 | return 0; | ||
92 | } | ||
93 | |||
94 | /** | ||
95 | * cpupri_set - update the cpu priority setting | ||
96 | * @cp: The cpupri context | ||
97 | * @cpu: The target cpu | ||
98 | * @pri: The priority (INVALID-RT99) to assign to this CPU | ||
99 | * | ||
100 | * Note: Assumes cpu_rq(cpu)->lock is locked | ||
101 | * | ||
102 | * Returns: (void) | ||
103 | */ | ||
104 | void cpupri_set(struct cpupri *cp, int cpu, int newpri) | ||
105 | { | ||
106 | int *currpri = &cp->cpu_to_pri[cpu]; | ||
107 | int oldpri = *currpri; | ||
108 | unsigned long flags; | ||
109 | |||
110 | newpri = convert_prio(newpri); | ||
111 | |||
112 | BUG_ON(newpri >= CPUPRI_NR_PRIORITIES); | ||
113 | |||
114 | if (newpri == oldpri) | ||
115 | return; | ||
116 | |||
117 | /* | ||
118 | * If the cpu was currently mapped to a different value, we | ||
119 | * first need to unmap the old value | ||
120 | */ | ||
121 | if (likely(oldpri != CPUPRI_INVALID)) { | ||
122 | struct cpupri_vec *vec = &cp->pri_to_cpu[oldpri]; | ||
123 | |||
124 | spin_lock_irqsave(&vec->lock, flags); | ||
125 | |||
126 | vec->count--; | ||
127 | if (!vec->count) | ||
128 | clear_bit(oldpri, cp->pri_active); | ||
129 | cpu_clear(cpu, vec->mask); | ||
130 | |||
131 | spin_unlock_irqrestore(&vec->lock, flags); | ||
132 | } | ||
133 | |||
134 | if (likely(newpri != CPUPRI_INVALID)) { | ||
135 | struct cpupri_vec *vec = &cp->pri_to_cpu[newpri]; | ||
136 | |||
137 | spin_lock_irqsave(&vec->lock, flags); | ||
138 | |||
139 | cpu_set(cpu, vec->mask); | ||
140 | vec->count++; | ||
141 | if (vec->count == 1) | ||
142 | set_bit(newpri, cp->pri_active); | ||
143 | |||
144 | spin_unlock_irqrestore(&vec->lock, flags); | ||
145 | } | ||
146 | |||
147 | *currpri = newpri; | ||
148 | } | ||
149 | |||
150 | /** | ||
151 | * cpupri_init - initialize the cpupri structure | ||
152 | * @cp: The cpupri context | ||
153 | * | ||
154 | * Returns: (void) | ||
155 | */ | ||
156 | void cpupri_init(struct cpupri *cp) | ||
157 | { | ||
158 | int i; | ||
159 | |||
160 | memset(cp, 0, sizeof(*cp)); | ||
161 | |||
162 | for (i = 0; i < CPUPRI_NR_PRIORITIES; i++) { | ||
163 | struct cpupri_vec *vec = &cp->pri_to_cpu[i]; | ||
164 | |||
165 | spin_lock_init(&vec->lock); | ||
166 | vec->count = 0; | ||
167 | cpus_clear(vec->mask); | ||
168 | } | ||
169 | |||
170 | for_each_possible_cpu(i) | ||
171 | cp->cpu_to_pri[i] = CPUPRI_INVALID; | ||
172 | } | ||
173 | |||
174 | |||
diff --git a/kernel/sched_cpupri.h b/kernel/sched_cpupri.h new file mode 100644 index 000000000000..f25811b0f931 --- /dev/null +++ b/kernel/sched_cpupri.h | |||
@@ -0,0 +1,36 @@ | |||
1 | #ifndef _LINUX_CPUPRI_H | ||
2 | #define _LINUX_CPUPRI_H | ||
3 | |||
4 | #include <linux/sched.h> | ||
5 | |||
6 | #define CPUPRI_NR_PRIORITIES (MAX_RT_PRIO + 2) | ||
7 | #define CPUPRI_NR_PRI_WORDS BITS_TO_LONGS(CPUPRI_NR_PRIORITIES) | ||
8 | |||
9 | #define CPUPRI_INVALID -1 | ||
10 | #define CPUPRI_IDLE 0 | ||
11 | #define CPUPRI_NORMAL 1 | ||
12 | /* values 2-101 are RT priorities 0-99 */ | ||
13 | |||
14 | struct cpupri_vec { | ||
15 | spinlock_t lock; | ||
16 | int count; | ||
17 | cpumask_t mask; | ||
18 | }; | ||
19 | |||
20 | struct cpupri { | ||
21 | struct cpupri_vec pri_to_cpu[CPUPRI_NR_PRIORITIES]; | ||
22 | long pri_active[CPUPRI_NR_PRI_WORDS]; | ||
23 | int cpu_to_pri[NR_CPUS]; | ||
24 | }; | ||
25 | |||
26 | #ifdef CONFIG_SMP | ||
27 | int cpupri_find(struct cpupri *cp, | ||
28 | struct task_struct *p, cpumask_t *lowest_mask); | ||
29 | void cpupri_set(struct cpupri *cp, int cpu, int pri); | ||
30 | void cpupri_init(struct cpupri *cp); | ||
31 | #else | ||
32 | #define cpupri_set(cp, cpu, pri) do { } while (0) | ||
33 | #define cpupri_init() do { } while (0) | ||
34 | #endif | ||
35 | |||
36 | #endif /* _LINUX_CPUPRI_H */ | ||
diff --git a/kernel/sched_debug.c b/kernel/sched_debug.c index 8bb713040ac9..bbe6b31c3c56 100644 --- a/kernel/sched_debug.c +++ b/kernel/sched_debug.c | |||
@@ -119,9 +119,7 @@ void print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq) | |||
119 | struct sched_entity *last; | 119 | struct sched_entity *last; |
120 | unsigned long flags; | 120 | unsigned long flags; |
121 | 121 | ||
122 | #if !defined(CONFIG_CGROUP_SCHED) || !defined(CONFIG_USER_SCHED) | 122 | #if defined(CONFIG_CGROUP_SCHED) && defined(CONFIG_FAIR_GROUP_SCHED) |
123 | SEQ_printf(m, "\ncfs_rq[%d]:\n", cpu); | ||
124 | #else | ||
125 | char path[128] = ""; | 123 | char path[128] = ""; |
126 | struct cgroup *cgroup = NULL; | 124 | struct cgroup *cgroup = NULL; |
127 | struct task_group *tg = cfs_rq->tg; | 125 | struct task_group *tg = cfs_rq->tg; |
@@ -133,6 +131,8 @@ void print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq) | |||
133 | cgroup_path(cgroup, path, sizeof(path)); | 131 | cgroup_path(cgroup, path, sizeof(path)); |
134 | 132 | ||
135 | SEQ_printf(m, "\ncfs_rq[%d]:%s\n", cpu, path); | 133 | SEQ_printf(m, "\ncfs_rq[%d]:%s\n", cpu, path); |
134 | #else | ||
135 | SEQ_printf(m, "\ncfs_rq[%d]:\n", cpu); | ||
136 | #endif | 136 | #endif |
137 | 137 | ||
138 | SEQ_printf(m, " .%-30s: %Ld.%06ld\n", "exec_clock", | 138 | SEQ_printf(m, " .%-30s: %Ld.%06ld\n", "exec_clock", |
@@ -162,11 +162,64 @@ void print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq) | |||
162 | SEQ_printf(m, " .%-30s: %ld\n", "nr_running", cfs_rq->nr_running); | 162 | SEQ_printf(m, " .%-30s: %ld\n", "nr_running", cfs_rq->nr_running); |
163 | SEQ_printf(m, " .%-30s: %ld\n", "load", cfs_rq->load.weight); | 163 | SEQ_printf(m, " .%-30s: %ld\n", "load", cfs_rq->load.weight); |
164 | #ifdef CONFIG_SCHEDSTATS | 164 | #ifdef CONFIG_SCHEDSTATS |
165 | SEQ_printf(m, " .%-30s: %d\n", "bkl_count", | 165 | #define P(n) SEQ_printf(m, " .%-30s: %d\n", #n, rq->n); |
166 | rq->bkl_count); | 166 | |
167 | P(yld_exp_empty); | ||
168 | P(yld_act_empty); | ||
169 | P(yld_both_empty); | ||
170 | P(yld_count); | ||
171 | |||
172 | P(sched_switch); | ||
173 | P(sched_count); | ||
174 | P(sched_goidle); | ||
175 | |||
176 | P(ttwu_count); | ||
177 | P(ttwu_local); | ||
178 | |||
179 | P(bkl_count); | ||
180 | |||
181 | #undef P | ||
167 | #endif | 182 | #endif |
168 | SEQ_printf(m, " .%-30s: %ld\n", "nr_spread_over", | 183 | SEQ_printf(m, " .%-30s: %ld\n", "nr_spread_over", |
169 | cfs_rq->nr_spread_over); | 184 | cfs_rq->nr_spread_over); |
185 | #ifdef CONFIG_FAIR_GROUP_SCHED | ||
186 | #ifdef CONFIG_SMP | ||
187 | SEQ_printf(m, " .%-30s: %lu\n", "shares", cfs_rq->shares); | ||
188 | #endif | ||
189 | #endif | ||
190 | } | ||
191 | |||
192 | void print_rt_rq(struct seq_file *m, int cpu, struct rt_rq *rt_rq) | ||
193 | { | ||
194 | #if defined(CONFIG_CGROUP_SCHED) && defined(CONFIG_RT_GROUP_SCHED) | ||
195 | char path[128] = ""; | ||
196 | struct cgroup *cgroup = NULL; | ||
197 | struct task_group *tg = rt_rq->tg; | ||
198 | |||
199 | if (tg) | ||
200 | cgroup = tg->css.cgroup; | ||
201 | |||
202 | if (cgroup) | ||
203 | cgroup_path(cgroup, path, sizeof(path)); | ||
204 | |||
205 | SEQ_printf(m, "\nrt_rq[%d]:%s\n", cpu, path); | ||
206 | #else | ||
207 | SEQ_printf(m, "\nrt_rq[%d]:\n", cpu); | ||
208 | #endif | ||
209 | |||
210 | |||
211 | #define P(x) \ | ||
212 | SEQ_printf(m, " .%-30s: %Ld\n", #x, (long long)(rt_rq->x)) | ||
213 | #define PN(x) \ | ||
214 | SEQ_printf(m, " .%-30s: %Ld.%06ld\n", #x, SPLIT_NS(rt_rq->x)) | ||
215 | |||
216 | P(rt_nr_running); | ||
217 | P(rt_throttled); | ||
218 | PN(rt_time); | ||
219 | PN(rt_runtime); | ||
220 | |||
221 | #undef PN | ||
222 | #undef P | ||
170 | } | 223 | } |
171 | 224 | ||
172 | static void print_cpu(struct seq_file *m, int cpu) | 225 | static void print_cpu(struct seq_file *m, int cpu) |
@@ -208,6 +261,7 @@ static void print_cpu(struct seq_file *m, int cpu) | |||
208 | #undef PN | 261 | #undef PN |
209 | 262 | ||
210 | print_cfs_stats(m, cpu); | 263 | print_cfs_stats(m, cpu); |
264 | print_rt_stats(m, cpu); | ||
211 | 265 | ||
212 | print_rq(m, rq, cpu); | 266 | print_rq(m, rq, cpu); |
213 | } | 267 | } |
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c index 08ae848b71d4..f2aa987027d6 100644 --- a/kernel/sched_fair.c +++ b/kernel/sched_fair.c | |||
@@ -63,13 +63,13 @@ unsigned int __read_mostly sysctl_sched_compat_yield; | |||
63 | 63 | ||
64 | /* | 64 | /* |
65 | * SCHED_OTHER wake-up granularity. | 65 | * SCHED_OTHER wake-up granularity. |
66 | * (default: 10 msec * (1 + ilog(ncpus)), units: nanoseconds) | 66 | * (default: 5 msec * (1 + ilog(ncpus)), units: nanoseconds) |
67 | * | 67 | * |
68 | * This option delays the preemption effects of decoupled workloads | 68 | * This option delays the preemption effects of decoupled workloads |
69 | * and reduces their over-scheduling. Synchronous workloads will still | 69 | * and reduces their over-scheduling. Synchronous workloads will still |
70 | * have immediate wakeup/sleep latencies. | 70 | * have immediate wakeup/sleep latencies. |
71 | */ | 71 | */ |
72 | unsigned int sysctl_sched_wakeup_granularity = 10000000UL; | 72 | unsigned int sysctl_sched_wakeup_granularity = 5000000UL; |
73 | 73 | ||
74 | const_debug unsigned int sysctl_sched_migration_cost = 500000UL; | 74 | const_debug unsigned int sysctl_sched_migration_cost = 500000UL; |
75 | 75 | ||
@@ -334,6 +334,34 @@ int sched_nr_latency_handler(struct ctl_table *table, int write, | |||
334 | #endif | 334 | #endif |
335 | 335 | ||
336 | /* | 336 | /* |
337 | * delta *= w / rw | ||
338 | */ | ||
339 | static inline unsigned long | ||
340 | calc_delta_weight(unsigned long delta, struct sched_entity *se) | ||
341 | { | ||
342 | for_each_sched_entity(se) { | ||
343 | delta = calc_delta_mine(delta, | ||
344 | se->load.weight, &cfs_rq_of(se)->load); | ||
345 | } | ||
346 | |||
347 | return delta; | ||
348 | } | ||
349 | |||
350 | /* | ||
351 | * delta *= rw / w | ||
352 | */ | ||
353 | static inline unsigned long | ||
354 | calc_delta_fair(unsigned long delta, struct sched_entity *se) | ||
355 | { | ||
356 | for_each_sched_entity(se) { | ||
357 | delta = calc_delta_mine(delta, | ||
358 | cfs_rq_of(se)->load.weight, &se->load); | ||
359 | } | ||
360 | |||
361 | return delta; | ||
362 | } | ||
363 | |||
364 | /* | ||
337 | * The idea is to set a period in which each task runs once. | 365 | * The idea is to set a period in which each task runs once. |
338 | * | 366 | * |
339 | * When there are too many tasks (sysctl_sched_nr_latency) we have to stretch | 367 | * When there are too many tasks (sysctl_sched_nr_latency) we have to stretch |
@@ -362,47 +390,80 @@ static u64 __sched_period(unsigned long nr_running) | |||
362 | */ | 390 | */ |
363 | static u64 sched_slice(struct cfs_rq *cfs_rq, struct sched_entity *se) | 391 | static u64 sched_slice(struct cfs_rq *cfs_rq, struct sched_entity *se) |
364 | { | 392 | { |
365 | u64 slice = __sched_period(cfs_rq->nr_running); | 393 | return calc_delta_weight(__sched_period(cfs_rq->nr_running), se); |
366 | |||
367 | for_each_sched_entity(se) { | ||
368 | cfs_rq = cfs_rq_of(se); | ||
369 | |||
370 | slice *= se->load.weight; | ||
371 | do_div(slice, cfs_rq->load.weight); | ||
372 | } | ||
373 | |||
374 | |||
375 | return slice; | ||
376 | } | 394 | } |
377 | 395 | ||
378 | /* | 396 | /* |
379 | * We calculate the vruntime slice of a to be inserted task | 397 | * We calculate the vruntime slice of a to be inserted task |
380 | * | 398 | * |
381 | * vs = s/w = p/rw | 399 | * vs = s*rw/w = p |
382 | */ | 400 | */ |
383 | static u64 sched_vslice_add(struct cfs_rq *cfs_rq, struct sched_entity *se) | 401 | static u64 sched_vslice_add(struct cfs_rq *cfs_rq, struct sched_entity *se) |
384 | { | 402 | { |
385 | unsigned long nr_running = cfs_rq->nr_running; | 403 | unsigned long nr_running = cfs_rq->nr_running; |
386 | unsigned long weight; | ||
387 | u64 vslice; | ||
388 | 404 | ||
389 | if (!se->on_rq) | 405 | if (!se->on_rq) |
390 | nr_running++; | 406 | nr_running++; |
391 | 407 | ||
392 | vslice = __sched_period(nr_running); | 408 | return __sched_period(nr_running); |
409 | } | ||
410 | |||
411 | /* | ||
412 | * The goal of calc_delta_asym() is to be asymmetrically around NICE_0_LOAD, in | ||
413 | * that it favours >=0 over <0. | ||
414 | * | ||
415 | * -20 | | ||
416 | * | | ||
417 | * 0 --------+------- | ||
418 | * .' | ||
419 | * 19 .' | ||
420 | * | ||
421 | */ | ||
422 | static unsigned long | ||
423 | calc_delta_asym(unsigned long delta, struct sched_entity *se) | ||
424 | { | ||
425 | struct load_weight lw = { | ||
426 | .weight = NICE_0_LOAD, | ||
427 | .inv_weight = 1UL << (WMULT_SHIFT-NICE_0_SHIFT) | ||
428 | }; | ||
393 | 429 | ||
394 | for_each_sched_entity(se) { | 430 | for_each_sched_entity(se) { |
395 | cfs_rq = cfs_rq_of(se); | 431 | struct load_weight *se_lw = &se->load; |
432 | unsigned long rw = cfs_rq_of(se)->load.weight; | ||
433 | |||
434 | #ifdef CONFIG_FAIR_SCHED_GROUP | ||
435 | struct cfs_rq *cfs_rq = se->my_q; | ||
436 | struct task_group *tg = NULL | ||
437 | |||
438 | if (cfs_rq) | ||
439 | tg = cfs_rq->tg; | ||
440 | |||
441 | if (tg && tg->shares < NICE_0_LOAD) { | ||
442 | /* | ||
443 | * scale shares to what it would have been had | ||
444 | * tg->weight been NICE_0_LOAD: | ||
445 | * | ||
446 | * weight = 1024 * shares / tg->weight | ||
447 | */ | ||
448 | lw.weight *= se->load.weight; | ||
449 | lw.weight /= tg->shares; | ||
450 | |||
451 | lw.inv_weight = 0; | ||
452 | |||
453 | se_lw = &lw; | ||
454 | rw += lw.weight - se->load.weight; | ||
455 | } else | ||
456 | #endif | ||
396 | 457 | ||
397 | weight = cfs_rq->load.weight; | 458 | if (se->load.weight < NICE_0_LOAD) { |
398 | if (!se->on_rq) | 459 | se_lw = &lw; |
399 | weight += se->load.weight; | 460 | rw += NICE_0_LOAD - se->load.weight; |
461 | } | ||
400 | 462 | ||
401 | vslice *= NICE_0_LOAD; | 463 | delta = calc_delta_mine(delta, rw, se_lw); |
402 | do_div(vslice, weight); | ||
403 | } | 464 | } |
404 | 465 | ||
405 | return vslice; | 466 | return delta; |
406 | } | 467 | } |
407 | 468 | ||
408 | /* | 469 | /* |
@@ -419,11 +480,7 @@ __update_curr(struct cfs_rq *cfs_rq, struct sched_entity *curr, | |||
419 | 480 | ||
420 | curr->sum_exec_runtime += delta_exec; | 481 | curr->sum_exec_runtime += delta_exec; |
421 | schedstat_add(cfs_rq, exec_clock, delta_exec); | 482 | schedstat_add(cfs_rq, exec_clock, delta_exec); |
422 | delta_exec_weighted = delta_exec; | 483 | delta_exec_weighted = calc_delta_fair(delta_exec, curr); |
423 | if (unlikely(curr->load.weight != NICE_0_LOAD)) { | ||
424 | delta_exec_weighted = calc_delta_fair(delta_exec_weighted, | ||
425 | &curr->load); | ||
426 | } | ||
427 | curr->vruntime += delta_exec_weighted; | 484 | curr->vruntime += delta_exec_weighted; |
428 | } | 485 | } |
429 | 486 | ||
@@ -510,10 +567,27 @@ update_stats_curr_start(struct cfs_rq *cfs_rq, struct sched_entity *se) | |||
510 | * Scheduling class queueing methods: | 567 | * Scheduling class queueing methods: |
511 | */ | 568 | */ |
512 | 569 | ||
570 | #if defined CONFIG_SMP && defined CONFIG_FAIR_GROUP_SCHED | ||
571 | static void | ||
572 | add_cfs_task_weight(struct cfs_rq *cfs_rq, unsigned long weight) | ||
573 | { | ||
574 | cfs_rq->task_weight += weight; | ||
575 | } | ||
576 | #else | ||
577 | static inline void | ||
578 | add_cfs_task_weight(struct cfs_rq *cfs_rq, unsigned long weight) | ||
579 | { | ||
580 | } | ||
581 | #endif | ||
582 | |||
513 | static void | 583 | static void |
514 | account_entity_enqueue(struct cfs_rq *cfs_rq, struct sched_entity *se) | 584 | account_entity_enqueue(struct cfs_rq *cfs_rq, struct sched_entity *se) |
515 | { | 585 | { |
516 | update_load_add(&cfs_rq->load, se->load.weight); | 586 | update_load_add(&cfs_rq->load, se->load.weight); |
587 | if (!parent_entity(se)) | ||
588 | inc_cpu_load(rq_of(cfs_rq), se->load.weight); | ||
589 | if (entity_is_task(se)) | ||
590 | add_cfs_task_weight(cfs_rq, se->load.weight); | ||
517 | cfs_rq->nr_running++; | 591 | cfs_rq->nr_running++; |
518 | se->on_rq = 1; | 592 | se->on_rq = 1; |
519 | list_add(&se->group_node, &cfs_rq->tasks); | 593 | list_add(&se->group_node, &cfs_rq->tasks); |
@@ -523,6 +597,10 @@ static void | |||
523 | account_entity_dequeue(struct cfs_rq *cfs_rq, struct sched_entity *se) | 597 | account_entity_dequeue(struct cfs_rq *cfs_rq, struct sched_entity *se) |
524 | { | 598 | { |
525 | update_load_sub(&cfs_rq->load, se->load.weight); | 599 | update_load_sub(&cfs_rq->load, se->load.weight); |
600 | if (!parent_entity(se)) | ||
601 | dec_cpu_load(rq_of(cfs_rq), se->load.weight); | ||
602 | if (entity_is_task(se)) | ||
603 | add_cfs_task_weight(cfs_rq, -se->load.weight); | ||
526 | cfs_rq->nr_running--; | 604 | cfs_rq->nr_running--; |
527 | se->on_rq = 0; | 605 | se->on_rq = 0; |
528 | list_del_init(&se->group_node); | 606 | list_del_init(&se->group_node); |
@@ -609,8 +687,17 @@ place_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int initial) | |||
609 | 687 | ||
610 | if (!initial) { | 688 | if (!initial) { |
611 | /* sleeps upto a single latency don't count. */ | 689 | /* sleeps upto a single latency don't count. */ |
612 | if (sched_feat(NEW_FAIR_SLEEPERS)) | 690 | if (sched_feat(NEW_FAIR_SLEEPERS)) { |
613 | vruntime -= sysctl_sched_latency; | 691 | unsigned long thresh = sysctl_sched_latency; |
692 | |||
693 | /* | ||
694 | * convert the sleeper threshold into virtual time | ||
695 | */ | ||
696 | if (sched_feat(NORMALIZED_SLEEPER)) | ||
697 | thresh = calc_delta_fair(thresh, se); | ||
698 | |||
699 | vruntime -= thresh; | ||
700 | } | ||
614 | 701 | ||
615 | /* ensure we never gain time by being placed backwards. */ | 702 | /* ensure we never gain time by being placed backwards. */ |
616 | vruntime = max_vruntime(se->vruntime, vruntime); | 703 | vruntime = max_vruntime(se->vruntime, vruntime); |
@@ -639,21 +726,6 @@ enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int wakeup) | |||
639 | __enqueue_entity(cfs_rq, se); | 726 | __enqueue_entity(cfs_rq, se); |
640 | } | 727 | } |
641 | 728 | ||
642 | static void update_avg(u64 *avg, u64 sample) | ||
643 | { | ||
644 | s64 diff = sample - *avg; | ||
645 | *avg += diff >> 3; | ||
646 | } | ||
647 | |||
648 | static void update_avg_stats(struct cfs_rq *cfs_rq, struct sched_entity *se) | ||
649 | { | ||
650 | if (!se->last_wakeup) | ||
651 | return; | ||
652 | |||
653 | update_avg(&se->avg_overlap, se->sum_exec_runtime - se->last_wakeup); | ||
654 | se->last_wakeup = 0; | ||
655 | } | ||
656 | |||
657 | static void | 729 | static void |
658 | dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int sleep) | 730 | dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int sleep) |
659 | { | 731 | { |
@@ -664,7 +736,6 @@ dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int sleep) | |||
664 | 736 | ||
665 | update_stats_dequeue(cfs_rq, se); | 737 | update_stats_dequeue(cfs_rq, se); |
666 | if (sleep) { | 738 | if (sleep) { |
667 | update_avg_stats(cfs_rq, se); | ||
668 | #ifdef CONFIG_SCHEDSTATS | 739 | #ifdef CONFIG_SCHEDSTATS |
669 | if (entity_is_task(se)) { | 740 | if (entity_is_task(se)) { |
670 | struct task_struct *tsk = task_of(se); | 741 | struct task_struct *tsk = task_of(se); |
@@ -726,17 +797,16 @@ set_next_entity(struct cfs_rq *cfs_rq, struct sched_entity *se) | |||
726 | se->prev_sum_exec_runtime = se->sum_exec_runtime; | 797 | se->prev_sum_exec_runtime = se->sum_exec_runtime; |
727 | } | 798 | } |
728 | 799 | ||
729 | static int | ||
730 | wakeup_preempt_entity(struct sched_entity *curr, struct sched_entity *se); | ||
731 | |||
732 | static struct sched_entity * | 800 | static struct sched_entity * |
733 | pick_next(struct cfs_rq *cfs_rq, struct sched_entity *se) | 801 | pick_next(struct cfs_rq *cfs_rq, struct sched_entity *se) |
734 | { | 802 | { |
735 | if (!cfs_rq->next) | 803 | struct rq *rq = rq_of(cfs_rq); |
736 | return se; | 804 | u64 pair_slice = rq->clock - cfs_rq->pair_start; |
737 | 805 | ||
738 | if (wakeup_preempt_entity(cfs_rq->next, se) != 0) | 806 | if (!cfs_rq->next || pair_slice > sched_slice(cfs_rq, cfs_rq->next)) { |
807 | cfs_rq->pair_start = rq->clock; | ||
739 | return se; | 808 | return se; |
809 | } | ||
740 | 810 | ||
741 | return cfs_rq->next; | 811 | return cfs_rq->next; |
742 | } | 812 | } |
@@ -835,7 +905,7 @@ static void hrtick_start_fair(struct rq *rq, struct task_struct *p) | |||
835 | hrtick_start(rq, delta, requeue); | 905 | hrtick_start(rq, delta, requeue); |
836 | } | 906 | } |
837 | } | 907 | } |
838 | #else | 908 | #else /* !CONFIG_SCHED_HRTICK */ |
839 | static inline void | 909 | static inline void |
840 | hrtick_start_fair(struct rq *rq, struct task_struct *p) | 910 | hrtick_start_fair(struct rq *rq, struct task_struct *p) |
841 | { | 911 | { |
@@ -976,7 +1046,7 @@ static int wake_idle(int cpu, struct task_struct *p) | |||
976 | } | 1046 | } |
977 | return cpu; | 1047 | return cpu; |
978 | } | 1048 | } |
979 | #else | 1049 | #else /* !ARCH_HAS_SCHED_WAKE_IDLE*/ |
980 | static inline int wake_idle(int cpu, struct task_struct *p) | 1050 | static inline int wake_idle(int cpu, struct task_struct *p) |
981 | { | 1051 | { |
982 | return cpu; | 1052 | return cpu; |
@@ -987,6 +1057,89 @@ static inline int wake_idle(int cpu, struct task_struct *p) | |||
987 | 1057 | ||
988 | static const struct sched_class fair_sched_class; | 1058 | static const struct sched_class fair_sched_class; |
989 | 1059 | ||
1060 | #ifdef CONFIG_FAIR_GROUP_SCHED | ||
1061 | /* | ||
1062 | * effective_load() calculates the load change as seen from the root_task_group | ||
1063 | * | ||
1064 | * Adding load to a group doesn't make a group heavier, but can cause movement | ||
1065 | * of group shares between cpus. Assuming the shares were perfectly aligned one | ||
1066 | * can calculate the shift in shares. | ||
1067 | * | ||
1068 | * The problem is that perfectly aligning the shares is rather expensive, hence | ||
1069 | * we try to avoid doing that too often - see update_shares(), which ratelimits | ||
1070 | * this change. | ||
1071 | * | ||
1072 | * We compensate this by not only taking the current delta into account, but | ||
1073 | * also considering the delta between when the shares were last adjusted and | ||
1074 | * now. | ||
1075 | * | ||
1076 | * We still saw a performance dip, some tracing learned us that between | ||
1077 | * cgroup:/ and cgroup:/foo balancing the number of affine wakeups increased | ||
1078 | * significantly. Therefore try to bias the error in direction of failing | ||
1079 | * the affine wakeup. | ||
1080 | * | ||
1081 | */ | ||
1082 | static long effective_load(struct task_group *tg, int cpu, | ||
1083 | long wl, long wg) | ||
1084 | { | ||
1085 | struct sched_entity *se = tg->se[cpu]; | ||
1086 | long more_w; | ||
1087 | |||
1088 | if (!tg->parent) | ||
1089 | return wl; | ||
1090 | |||
1091 | /* | ||
1092 | * By not taking the decrease of shares on the other cpu into | ||
1093 | * account our error leans towards reducing the affine wakeups. | ||
1094 | */ | ||
1095 | if (!wl && sched_feat(ASYM_EFF_LOAD)) | ||
1096 | return wl; | ||
1097 | |||
1098 | /* | ||
1099 | * Instead of using this increment, also add the difference | ||
1100 | * between when the shares were last updated and now. | ||
1101 | */ | ||
1102 | more_w = se->my_q->load.weight - se->my_q->rq_weight; | ||
1103 | wl += more_w; | ||
1104 | wg += more_w; | ||
1105 | |||
1106 | for_each_sched_entity(se) { | ||
1107 | #define D(n) (likely(n) ? (n) : 1) | ||
1108 | |||
1109 | long S, rw, s, a, b; | ||
1110 | |||
1111 | S = se->my_q->tg->shares; | ||
1112 | s = se->my_q->shares; | ||
1113 | rw = se->my_q->rq_weight; | ||
1114 | |||
1115 | a = S*(rw + wl); | ||
1116 | b = S*rw + s*wg; | ||
1117 | |||
1118 | wl = s*(a-b)/D(b); | ||
1119 | /* | ||
1120 | * Assume the group is already running and will | ||
1121 | * thus already be accounted for in the weight. | ||
1122 | * | ||
1123 | * That is, moving shares between CPUs, does not | ||
1124 | * alter the group weight. | ||
1125 | */ | ||
1126 | wg = 0; | ||
1127 | #undef D | ||
1128 | } | ||
1129 | |||
1130 | return wl; | ||
1131 | } | ||
1132 | |||
1133 | #else | ||
1134 | |||
1135 | static inline unsigned long effective_load(struct task_group *tg, int cpu, | ||
1136 | unsigned long wl, unsigned long wg) | ||
1137 | { | ||
1138 | return wl; | ||
1139 | } | ||
1140 | |||
1141 | #endif | ||
1142 | |||
990 | static int | 1143 | static int |
991 | wake_affine(struct rq *rq, struct sched_domain *this_sd, struct rq *this_rq, | 1144 | wake_affine(struct rq *rq, struct sched_domain *this_sd, struct rq *this_rq, |
992 | struct task_struct *p, int prev_cpu, int this_cpu, int sync, | 1145 | struct task_struct *p, int prev_cpu, int this_cpu, int sync, |
@@ -994,8 +1147,10 @@ wake_affine(struct rq *rq, struct sched_domain *this_sd, struct rq *this_rq, | |||
994 | unsigned int imbalance) | 1147 | unsigned int imbalance) |
995 | { | 1148 | { |
996 | struct task_struct *curr = this_rq->curr; | 1149 | struct task_struct *curr = this_rq->curr; |
1150 | struct task_group *tg; | ||
997 | unsigned long tl = this_load; | 1151 | unsigned long tl = this_load; |
998 | unsigned long tl_per_task; | 1152 | unsigned long tl_per_task; |
1153 | unsigned long weight; | ||
999 | int balanced; | 1154 | int balanced; |
1000 | 1155 | ||
1001 | if (!(this_sd->flags & SD_WAKE_AFFINE) || !sched_feat(AFFINE_WAKEUPS)) | 1156 | if (!(this_sd->flags & SD_WAKE_AFFINE) || !sched_feat(AFFINE_WAKEUPS)) |
@@ -1006,19 +1161,28 @@ wake_affine(struct rq *rq, struct sched_domain *this_sd, struct rq *this_rq, | |||
1006 | * effect of the currently running task from the load | 1161 | * effect of the currently running task from the load |
1007 | * of the current CPU: | 1162 | * of the current CPU: |
1008 | */ | 1163 | */ |
1009 | if (sync) | 1164 | if (sync) { |
1010 | tl -= current->se.load.weight; | 1165 | tg = task_group(current); |
1166 | weight = current->se.load.weight; | ||
1167 | |||
1168 | tl += effective_load(tg, this_cpu, -weight, -weight); | ||
1169 | load += effective_load(tg, prev_cpu, 0, -weight); | ||
1170 | } | ||
1011 | 1171 | ||
1012 | balanced = 100*(tl + p->se.load.weight) <= imbalance*load; | 1172 | tg = task_group(p); |
1173 | weight = p->se.load.weight; | ||
1174 | |||
1175 | balanced = 100*(tl + effective_load(tg, this_cpu, weight, weight)) <= | ||
1176 | imbalance*(load + effective_load(tg, prev_cpu, 0, weight)); | ||
1013 | 1177 | ||
1014 | /* | 1178 | /* |
1015 | * If the currently running task will sleep within | 1179 | * If the currently running task will sleep within |
1016 | * a reasonable amount of time then attract this newly | 1180 | * a reasonable amount of time then attract this newly |
1017 | * woken task: | 1181 | * woken task: |
1018 | */ | 1182 | */ |
1019 | if (sync && balanced && curr->sched_class == &fair_sched_class) { | 1183 | if (sync && balanced) { |
1020 | if (curr->se.avg_overlap < sysctl_sched_migration_cost && | 1184 | if (curr->se.avg_overlap < sysctl_sched_migration_cost && |
1021 | p->se.avg_overlap < sysctl_sched_migration_cost) | 1185 | p->se.avg_overlap < sysctl_sched_migration_cost) |
1022 | return 1; | 1186 | return 1; |
1023 | } | 1187 | } |
1024 | 1188 | ||
@@ -1111,11 +1275,13 @@ static unsigned long wakeup_gran(struct sched_entity *se) | |||
1111 | unsigned long gran = sysctl_sched_wakeup_granularity; | 1275 | unsigned long gran = sysctl_sched_wakeup_granularity; |
1112 | 1276 | ||
1113 | /* | 1277 | /* |
1114 | * More easily preempt - nice tasks, while not making | 1278 | * More easily preempt - nice tasks, while not making it harder for |
1115 | * it harder for + nice tasks. | 1279 | * + nice tasks. |
1116 | */ | 1280 | */ |
1117 | if (unlikely(se->load.weight > NICE_0_LOAD)) | 1281 | if (sched_feat(ASYM_GRAN)) |
1118 | gran = calc_delta_fair(gran, &se->load); | 1282 | gran = calc_delta_asym(sysctl_sched_wakeup_granularity, se); |
1283 | else | ||
1284 | gran = calc_delta_fair(sysctl_sched_wakeup_granularity, se); | ||
1119 | 1285 | ||
1120 | return gran; | 1286 | return gran; |
1121 | } | 1287 | } |
@@ -1177,7 +1343,6 @@ static void check_preempt_wakeup(struct rq *rq, struct task_struct *p) | |||
1177 | return; | 1343 | return; |
1178 | } | 1344 | } |
1179 | 1345 | ||
1180 | se->last_wakeup = se->sum_exec_runtime; | ||
1181 | if (unlikely(se == pse)) | 1346 | if (unlikely(se == pse)) |
1182 | return; | 1347 | return; |
1183 | 1348 | ||
@@ -1275,23 +1440,18 @@ __load_balance_iterator(struct cfs_rq *cfs_rq, struct list_head *next) | |||
1275 | struct task_struct *p = NULL; | 1440 | struct task_struct *p = NULL; |
1276 | struct sched_entity *se; | 1441 | struct sched_entity *se; |
1277 | 1442 | ||
1278 | if (next == &cfs_rq->tasks) | 1443 | while (next != &cfs_rq->tasks) { |
1279 | return NULL; | ||
1280 | |||
1281 | /* Skip over entities that are not tasks */ | ||
1282 | do { | ||
1283 | se = list_entry(next, struct sched_entity, group_node); | 1444 | se = list_entry(next, struct sched_entity, group_node); |
1284 | next = next->next; | 1445 | next = next->next; |
1285 | } while (next != &cfs_rq->tasks && !entity_is_task(se)); | ||
1286 | 1446 | ||
1287 | if (next == &cfs_rq->tasks) | 1447 | /* Skip over entities that are not tasks */ |
1288 | return NULL; | 1448 | if (entity_is_task(se)) { |
1449 | p = task_of(se); | ||
1450 | break; | ||
1451 | } | ||
1452 | } | ||
1289 | 1453 | ||
1290 | cfs_rq->balance_iterator = next; | 1454 | cfs_rq->balance_iterator = next; |
1291 | |||
1292 | if (entity_is_task(se)) | ||
1293 | p = task_of(se); | ||
1294 | |||
1295 | return p; | 1455 | return p; |
1296 | } | 1456 | } |
1297 | 1457 | ||
@@ -1309,75 +1469,82 @@ static struct task_struct *load_balance_next_fair(void *arg) | |||
1309 | return __load_balance_iterator(cfs_rq, cfs_rq->balance_iterator); | 1469 | return __load_balance_iterator(cfs_rq, cfs_rq->balance_iterator); |
1310 | } | 1470 | } |
1311 | 1471 | ||
1312 | #ifdef CONFIG_FAIR_GROUP_SCHED | 1472 | static unsigned long |
1313 | static int cfs_rq_best_prio(struct cfs_rq *cfs_rq) | 1473 | __load_balance_fair(struct rq *this_rq, int this_cpu, struct rq *busiest, |
1474 | unsigned long max_load_move, struct sched_domain *sd, | ||
1475 | enum cpu_idle_type idle, int *all_pinned, int *this_best_prio, | ||
1476 | struct cfs_rq *cfs_rq) | ||
1314 | { | 1477 | { |
1315 | struct sched_entity *curr; | 1478 | struct rq_iterator cfs_rq_iterator; |
1316 | struct task_struct *p; | ||
1317 | |||
1318 | if (!cfs_rq->nr_running || !first_fair(cfs_rq)) | ||
1319 | return MAX_PRIO; | ||
1320 | |||
1321 | curr = cfs_rq->curr; | ||
1322 | if (!curr) | ||
1323 | curr = __pick_next_entity(cfs_rq); | ||
1324 | 1479 | ||
1325 | p = task_of(curr); | 1480 | cfs_rq_iterator.start = load_balance_start_fair; |
1481 | cfs_rq_iterator.next = load_balance_next_fair; | ||
1482 | cfs_rq_iterator.arg = cfs_rq; | ||
1326 | 1483 | ||
1327 | return p->prio; | 1484 | return balance_tasks(this_rq, this_cpu, busiest, |
1485 | max_load_move, sd, idle, all_pinned, | ||
1486 | this_best_prio, &cfs_rq_iterator); | ||
1328 | } | 1487 | } |
1329 | #endif | ||
1330 | 1488 | ||
1489 | #ifdef CONFIG_FAIR_GROUP_SCHED | ||
1331 | static unsigned long | 1490 | static unsigned long |
1332 | load_balance_fair(struct rq *this_rq, int this_cpu, struct rq *busiest, | 1491 | load_balance_fair(struct rq *this_rq, int this_cpu, struct rq *busiest, |
1333 | unsigned long max_load_move, | 1492 | unsigned long max_load_move, |
1334 | struct sched_domain *sd, enum cpu_idle_type idle, | 1493 | struct sched_domain *sd, enum cpu_idle_type idle, |
1335 | int *all_pinned, int *this_best_prio) | 1494 | int *all_pinned, int *this_best_prio) |
1336 | { | 1495 | { |
1337 | struct cfs_rq *busy_cfs_rq; | ||
1338 | long rem_load_move = max_load_move; | 1496 | long rem_load_move = max_load_move; |
1339 | struct rq_iterator cfs_rq_iterator; | 1497 | int busiest_cpu = cpu_of(busiest); |
1340 | 1498 | struct task_group *tg; | |
1341 | cfs_rq_iterator.start = load_balance_start_fair; | ||
1342 | cfs_rq_iterator.next = load_balance_next_fair; | ||
1343 | 1499 | ||
1344 | for_each_leaf_cfs_rq(busiest, busy_cfs_rq) { | 1500 | rcu_read_lock(); |
1345 | #ifdef CONFIG_FAIR_GROUP_SCHED | 1501 | update_h_load(busiest_cpu); |
1346 | struct cfs_rq *this_cfs_rq; | ||
1347 | long imbalance; | ||
1348 | unsigned long maxload; | ||
1349 | 1502 | ||
1350 | this_cfs_rq = cpu_cfs_rq(busy_cfs_rq, this_cpu); | 1503 | list_for_each_entry(tg, &task_groups, list) { |
1504 | struct cfs_rq *busiest_cfs_rq = tg->cfs_rq[busiest_cpu]; | ||
1505 | unsigned long busiest_h_load = busiest_cfs_rq->h_load; | ||
1506 | unsigned long busiest_weight = busiest_cfs_rq->load.weight; | ||
1507 | u64 rem_load, moved_load; | ||
1351 | 1508 | ||
1352 | imbalance = busy_cfs_rq->load.weight - this_cfs_rq->load.weight; | 1509 | /* |
1353 | /* Don't pull if this_cfs_rq has more load than busy_cfs_rq */ | 1510 | * empty group |
1354 | if (imbalance <= 0) | 1511 | */ |
1512 | if (!busiest_cfs_rq->task_weight) | ||
1355 | continue; | 1513 | continue; |
1356 | 1514 | ||
1357 | /* Don't pull more than imbalance/2 */ | 1515 | rem_load = (u64)rem_load_move * busiest_weight; |
1358 | imbalance /= 2; | 1516 | rem_load = div_u64(rem_load, busiest_h_load + 1); |
1359 | maxload = min(rem_load_move, imbalance); | ||
1360 | 1517 | ||
1361 | *this_best_prio = cfs_rq_best_prio(this_cfs_rq); | 1518 | moved_load = __load_balance_fair(this_rq, this_cpu, busiest, |
1362 | #else | 1519 | rem_load, sd, idle, all_pinned, this_best_prio, |
1363 | # define maxload rem_load_move | 1520 | tg->cfs_rq[busiest_cpu]); |
1364 | #endif | 1521 | |
1365 | /* | 1522 | if (!moved_load) |
1366 | * pass busy_cfs_rq argument into | 1523 | continue; |
1367 | * load_balance_[start|next]_fair iterators | 1524 | |
1368 | */ | 1525 | moved_load *= busiest_h_load; |
1369 | cfs_rq_iterator.arg = busy_cfs_rq; | 1526 | moved_load = div_u64(moved_load, busiest_weight + 1); |
1370 | rem_load_move -= balance_tasks(this_rq, this_cpu, busiest, | ||
1371 | maxload, sd, idle, all_pinned, | ||
1372 | this_best_prio, | ||
1373 | &cfs_rq_iterator); | ||
1374 | 1527 | ||
1375 | if (rem_load_move <= 0) | 1528 | rem_load_move -= moved_load; |
1529 | if (rem_load_move < 0) | ||
1376 | break; | 1530 | break; |
1377 | } | 1531 | } |
1532 | rcu_read_unlock(); | ||
1378 | 1533 | ||
1379 | return max_load_move - rem_load_move; | 1534 | return max_load_move - rem_load_move; |
1380 | } | 1535 | } |
1536 | #else | ||
1537 | static unsigned long | ||
1538 | load_balance_fair(struct rq *this_rq, int this_cpu, struct rq *busiest, | ||
1539 | unsigned long max_load_move, | ||
1540 | struct sched_domain *sd, enum cpu_idle_type idle, | ||
1541 | int *all_pinned, int *this_best_prio) | ||
1542 | { | ||
1543 | return __load_balance_fair(this_rq, this_cpu, busiest, | ||
1544 | max_load_move, sd, idle, all_pinned, | ||
1545 | this_best_prio, &busiest->cfs); | ||
1546 | } | ||
1547 | #endif | ||
1381 | 1548 | ||
1382 | static int | 1549 | static int |
1383 | move_one_task_fair(struct rq *this_rq, int this_cpu, struct rq *busiest, | 1550 | move_one_task_fair(struct rq *this_rq, int this_cpu, struct rq *busiest, |
@@ -1402,7 +1569,7 @@ move_one_task_fair(struct rq *this_rq, int this_cpu, struct rq *busiest, | |||
1402 | 1569 | ||
1403 | return 0; | 1570 | return 0; |
1404 | } | 1571 | } |
1405 | #endif | 1572 | #endif /* CONFIG_SMP */ |
1406 | 1573 | ||
1407 | /* | 1574 | /* |
1408 | * scheduler tick hitting a task of our scheduling class: | 1575 | * scheduler tick hitting a task of our scheduling class: |
diff --git a/kernel/sched_features.h b/kernel/sched_features.h index 1c7283cb9581..862b06bd560a 100644 --- a/kernel/sched_features.h +++ b/kernel/sched_features.h | |||
@@ -1,4 +1,5 @@ | |||
1 | SCHED_FEAT(NEW_FAIR_SLEEPERS, 1) | 1 | SCHED_FEAT(NEW_FAIR_SLEEPERS, 1) |
2 | SCHED_FEAT(NORMALIZED_SLEEPER, 1) | ||
2 | SCHED_FEAT(WAKEUP_PREEMPT, 1) | 3 | SCHED_FEAT(WAKEUP_PREEMPT, 1) |
3 | SCHED_FEAT(START_DEBIT, 1) | 4 | SCHED_FEAT(START_DEBIT, 1) |
4 | SCHED_FEAT(AFFINE_WAKEUPS, 1) | 5 | SCHED_FEAT(AFFINE_WAKEUPS, 1) |
@@ -6,5 +7,7 @@ SCHED_FEAT(CACHE_HOT_BUDDY, 1) | |||
6 | SCHED_FEAT(SYNC_WAKEUPS, 1) | 7 | SCHED_FEAT(SYNC_WAKEUPS, 1) |
7 | SCHED_FEAT(HRTICK, 1) | 8 | SCHED_FEAT(HRTICK, 1) |
8 | SCHED_FEAT(DOUBLE_TICK, 0) | 9 | SCHED_FEAT(DOUBLE_TICK, 0) |
9 | SCHED_FEAT(NORMALIZED_SLEEPER, 1) | 10 | SCHED_FEAT(ASYM_GRAN, 1) |
10 | SCHED_FEAT(DEADLINE, 1) | 11 | SCHED_FEAT(LB_BIAS, 0) |
12 | SCHED_FEAT(LB_WAKEUP_UPDATE, 1) | ||
13 | SCHED_FEAT(ASYM_EFF_LOAD, 1) | ||
diff --git a/kernel/sched_rt.c b/kernel/sched_rt.c index 0f3c19197fa4..47ceac9e8552 100644 --- a/kernel/sched_rt.c +++ b/kernel/sched_rt.c | |||
@@ -12,6 +12,9 @@ static inline int rt_overloaded(struct rq *rq) | |||
12 | 12 | ||
13 | static inline void rt_set_overload(struct rq *rq) | 13 | static inline void rt_set_overload(struct rq *rq) |
14 | { | 14 | { |
15 | if (!rq->online) | ||
16 | return; | ||
17 | |||
15 | cpu_set(rq->cpu, rq->rd->rto_mask); | 18 | cpu_set(rq->cpu, rq->rd->rto_mask); |
16 | /* | 19 | /* |
17 | * Make sure the mask is visible before we set | 20 | * Make sure the mask is visible before we set |
@@ -26,6 +29,9 @@ static inline void rt_set_overload(struct rq *rq) | |||
26 | 29 | ||
27 | static inline void rt_clear_overload(struct rq *rq) | 30 | static inline void rt_clear_overload(struct rq *rq) |
28 | { | 31 | { |
32 | if (!rq->online) | ||
33 | return; | ||
34 | |||
29 | /* the order here really doesn't matter */ | 35 | /* the order here really doesn't matter */ |
30 | atomic_dec(&rq->rd->rto_count); | 36 | atomic_dec(&rq->rd->rto_count); |
31 | cpu_clear(rq->cpu, rq->rd->rto_mask); | 37 | cpu_clear(rq->cpu, rq->rd->rto_mask); |
@@ -155,7 +161,7 @@ static inline struct rt_bandwidth *sched_rt_bandwidth(struct rt_rq *rt_rq) | |||
155 | return &rt_rq->tg->rt_bandwidth; | 161 | return &rt_rq->tg->rt_bandwidth; |
156 | } | 162 | } |
157 | 163 | ||
158 | #else | 164 | #else /* !CONFIG_RT_GROUP_SCHED */ |
159 | 165 | ||
160 | static inline u64 sched_rt_runtime(struct rt_rq *rt_rq) | 166 | static inline u64 sched_rt_runtime(struct rt_rq *rt_rq) |
161 | { | 167 | { |
@@ -220,49 +226,10 @@ static inline struct rt_bandwidth *sched_rt_bandwidth(struct rt_rq *rt_rq) | |||
220 | return &def_rt_bandwidth; | 226 | return &def_rt_bandwidth; |
221 | } | 227 | } |
222 | 228 | ||
223 | #endif | 229 | #endif /* CONFIG_RT_GROUP_SCHED */ |
224 | |||
225 | static int do_sched_rt_period_timer(struct rt_bandwidth *rt_b, int overrun) | ||
226 | { | ||
227 | int i, idle = 1; | ||
228 | cpumask_t span; | ||
229 | |||
230 | if (rt_b->rt_runtime == RUNTIME_INF) | ||
231 | return 1; | ||
232 | |||
233 | span = sched_rt_period_mask(); | ||
234 | for_each_cpu_mask(i, span) { | ||
235 | int enqueue = 0; | ||
236 | struct rt_rq *rt_rq = sched_rt_period_rt_rq(rt_b, i); | ||
237 | struct rq *rq = rq_of_rt_rq(rt_rq); | ||
238 | |||
239 | spin_lock(&rq->lock); | ||
240 | if (rt_rq->rt_time) { | ||
241 | u64 runtime; | ||
242 | |||
243 | spin_lock(&rt_rq->rt_runtime_lock); | ||
244 | runtime = rt_rq->rt_runtime; | ||
245 | rt_rq->rt_time -= min(rt_rq->rt_time, overrun*runtime); | ||
246 | if (rt_rq->rt_throttled && rt_rq->rt_time < runtime) { | ||
247 | rt_rq->rt_throttled = 0; | ||
248 | enqueue = 1; | ||
249 | } | ||
250 | if (rt_rq->rt_time || rt_rq->rt_nr_running) | ||
251 | idle = 0; | ||
252 | spin_unlock(&rt_rq->rt_runtime_lock); | ||
253 | } else if (rt_rq->rt_nr_running) | ||
254 | idle = 0; | ||
255 | |||
256 | if (enqueue) | ||
257 | sched_rt_rq_enqueue(rt_rq); | ||
258 | spin_unlock(&rq->lock); | ||
259 | } | ||
260 | |||
261 | return idle; | ||
262 | } | ||
263 | 230 | ||
264 | #ifdef CONFIG_SMP | 231 | #ifdef CONFIG_SMP |
265 | static int balance_runtime(struct rt_rq *rt_rq) | 232 | static int do_balance_runtime(struct rt_rq *rt_rq) |
266 | { | 233 | { |
267 | struct rt_bandwidth *rt_b = sched_rt_bandwidth(rt_rq); | 234 | struct rt_bandwidth *rt_b = sched_rt_bandwidth(rt_rq); |
268 | struct root_domain *rd = cpu_rq(smp_processor_id())->rd; | 235 | struct root_domain *rd = cpu_rq(smp_processor_id())->rd; |
@@ -281,6 +248,9 @@ static int balance_runtime(struct rt_rq *rt_rq) | |||
281 | continue; | 248 | continue; |
282 | 249 | ||
283 | spin_lock(&iter->rt_runtime_lock); | 250 | spin_lock(&iter->rt_runtime_lock); |
251 | if (iter->rt_runtime == RUNTIME_INF) | ||
252 | goto next; | ||
253 | |||
284 | diff = iter->rt_runtime - iter->rt_time; | 254 | diff = iter->rt_runtime - iter->rt_time; |
285 | if (diff > 0) { | 255 | if (diff > 0) { |
286 | do_div(diff, weight); | 256 | do_div(diff, weight); |
@@ -294,13 +264,163 @@ static int balance_runtime(struct rt_rq *rt_rq) | |||
294 | break; | 264 | break; |
295 | } | 265 | } |
296 | } | 266 | } |
267 | next: | ||
297 | spin_unlock(&iter->rt_runtime_lock); | 268 | spin_unlock(&iter->rt_runtime_lock); |
298 | } | 269 | } |
299 | spin_unlock(&rt_b->rt_runtime_lock); | 270 | spin_unlock(&rt_b->rt_runtime_lock); |
300 | 271 | ||
301 | return more; | 272 | return more; |
302 | } | 273 | } |
303 | #endif | 274 | |
275 | static void __disable_runtime(struct rq *rq) | ||
276 | { | ||
277 | struct root_domain *rd = rq->rd; | ||
278 | struct rt_rq *rt_rq; | ||
279 | |||
280 | if (unlikely(!scheduler_running)) | ||
281 | return; | ||
282 | |||
283 | for_each_leaf_rt_rq(rt_rq, rq) { | ||
284 | struct rt_bandwidth *rt_b = sched_rt_bandwidth(rt_rq); | ||
285 | s64 want; | ||
286 | int i; | ||
287 | |||
288 | spin_lock(&rt_b->rt_runtime_lock); | ||
289 | spin_lock(&rt_rq->rt_runtime_lock); | ||
290 | if (rt_rq->rt_runtime == RUNTIME_INF || | ||
291 | rt_rq->rt_runtime == rt_b->rt_runtime) | ||
292 | goto balanced; | ||
293 | spin_unlock(&rt_rq->rt_runtime_lock); | ||
294 | |||
295 | want = rt_b->rt_runtime - rt_rq->rt_runtime; | ||
296 | |||
297 | for_each_cpu_mask(i, rd->span) { | ||
298 | struct rt_rq *iter = sched_rt_period_rt_rq(rt_b, i); | ||
299 | s64 diff; | ||
300 | |||
301 | if (iter == rt_rq) | ||
302 | continue; | ||
303 | |||
304 | spin_lock(&iter->rt_runtime_lock); | ||
305 | if (want > 0) { | ||
306 | diff = min_t(s64, iter->rt_runtime, want); | ||
307 | iter->rt_runtime -= diff; | ||
308 | want -= diff; | ||
309 | } else { | ||
310 | iter->rt_runtime -= want; | ||
311 | want -= want; | ||
312 | } | ||
313 | spin_unlock(&iter->rt_runtime_lock); | ||
314 | |||
315 | if (!want) | ||
316 | break; | ||
317 | } | ||
318 | |||
319 | spin_lock(&rt_rq->rt_runtime_lock); | ||
320 | BUG_ON(want); | ||
321 | balanced: | ||
322 | rt_rq->rt_runtime = RUNTIME_INF; | ||
323 | spin_unlock(&rt_rq->rt_runtime_lock); | ||
324 | spin_unlock(&rt_b->rt_runtime_lock); | ||
325 | } | ||
326 | } | ||
327 | |||
328 | static void disable_runtime(struct rq *rq) | ||
329 | { | ||
330 | unsigned long flags; | ||
331 | |||
332 | spin_lock_irqsave(&rq->lock, flags); | ||
333 | __disable_runtime(rq); | ||
334 | spin_unlock_irqrestore(&rq->lock, flags); | ||
335 | } | ||
336 | |||
337 | static void __enable_runtime(struct rq *rq) | ||
338 | { | ||
339 | struct rt_rq *rt_rq; | ||
340 | |||
341 | if (unlikely(!scheduler_running)) | ||
342 | return; | ||
343 | |||
344 | for_each_leaf_rt_rq(rt_rq, rq) { | ||
345 | struct rt_bandwidth *rt_b = sched_rt_bandwidth(rt_rq); | ||
346 | |||
347 | spin_lock(&rt_b->rt_runtime_lock); | ||
348 | spin_lock(&rt_rq->rt_runtime_lock); | ||
349 | rt_rq->rt_runtime = rt_b->rt_runtime; | ||
350 | rt_rq->rt_time = 0; | ||
351 | spin_unlock(&rt_rq->rt_runtime_lock); | ||
352 | spin_unlock(&rt_b->rt_runtime_lock); | ||
353 | } | ||
354 | } | ||
355 | |||
356 | static void enable_runtime(struct rq *rq) | ||
357 | { | ||
358 | unsigned long flags; | ||
359 | |||
360 | spin_lock_irqsave(&rq->lock, flags); | ||
361 | __enable_runtime(rq); | ||
362 | spin_unlock_irqrestore(&rq->lock, flags); | ||
363 | } | ||
364 | |||
365 | static int balance_runtime(struct rt_rq *rt_rq) | ||
366 | { | ||
367 | int more = 0; | ||
368 | |||
369 | if (rt_rq->rt_time > rt_rq->rt_runtime) { | ||
370 | spin_unlock(&rt_rq->rt_runtime_lock); | ||
371 | more = do_balance_runtime(rt_rq); | ||
372 | spin_lock(&rt_rq->rt_runtime_lock); | ||
373 | } | ||
374 | |||
375 | return more; | ||
376 | } | ||
377 | #else /* !CONFIG_SMP */ | ||
378 | static inline int balance_runtime(struct rt_rq *rt_rq) | ||
379 | { | ||
380 | return 0; | ||
381 | } | ||
382 | #endif /* CONFIG_SMP */ | ||
383 | |||
384 | static int do_sched_rt_period_timer(struct rt_bandwidth *rt_b, int overrun) | ||
385 | { | ||
386 | int i, idle = 1; | ||
387 | cpumask_t span; | ||
388 | |||
389 | if (rt_b->rt_runtime == RUNTIME_INF) | ||
390 | return 1; | ||
391 | |||
392 | span = sched_rt_period_mask(); | ||
393 | for_each_cpu_mask(i, span) { | ||
394 | int enqueue = 0; | ||
395 | struct rt_rq *rt_rq = sched_rt_period_rt_rq(rt_b, i); | ||
396 | struct rq *rq = rq_of_rt_rq(rt_rq); | ||
397 | |||
398 | spin_lock(&rq->lock); | ||
399 | if (rt_rq->rt_time) { | ||
400 | u64 runtime; | ||
401 | |||
402 | spin_lock(&rt_rq->rt_runtime_lock); | ||
403 | if (rt_rq->rt_throttled) | ||
404 | balance_runtime(rt_rq); | ||
405 | runtime = rt_rq->rt_runtime; | ||
406 | rt_rq->rt_time -= min(rt_rq->rt_time, overrun*runtime); | ||
407 | if (rt_rq->rt_throttled && rt_rq->rt_time < runtime) { | ||
408 | rt_rq->rt_throttled = 0; | ||
409 | enqueue = 1; | ||
410 | } | ||
411 | if (rt_rq->rt_time || rt_rq->rt_nr_running) | ||
412 | idle = 0; | ||
413 | spin_unlock(&rt_rq->rt_runtime_lock); | ||
414 | } else if (rt_rq->rt_nr_running) | ||
415 | idle = 0; | ||
416 | |||
417 | if (enqueue) | ||
418 | sched_rt_rq_enqueue(rt_rq); | ||
419 | spin_unlock(&rq->lock); | ||
420 | } | ||
421 | |||
422 | return idle; | ||
423 | } | ||
304 | 424 | ||
305 | static inline int rt_se_prio(struct sched_rt_entity *rt_se) | 425 | static inline int rt_se_prio(struct sched_rt_entity *rt_se) |
306 | { | 426 | { |
@@ -327,18 +447,10 @@ static int sched_rt_runtime_exceeded(struct rt_rq *rt_rq) | |||
327 | if (sched_rt_runtime(rt_rq) >= sched_rt_period(rt_rq)) | 447 | if (sched_rt_runtime(rt_rq) >= sched_rt_period(rt_rq)) |
328 | return 0; | 448 | return 0; |
329 | 449 | ||
330 | #ifdef CONFIG_SMP | 450 | balance_runtime(rt_rq); |
331 | if (rt_rq->rt_time > runtime) { | 451 | runtime = sched_rt_runtime(rt_rq); |
332 | int more; | 452 | if (runtime == RUNTIME_INF) |
333 | 453 | return 0; | |
334 | spin_unlock(&rt_rq->rt_runtime_lock); | ||
335 | more = balance_runtime(rt_rq); | ||
336 | spin_lock(&rt_rq->rt_runtime_lock); | ||
337 | |||
338 | if (more) | ||
339 | runtime = sched_rt_runtime(rt_rq); | ||
340 | } | ||
341 | #endif | ||
342 | 454 | ||
343 | if (rt_rq->rt_time > runtime) { | 455 | if (rt_rq->rt_time > runtime) { |
344 | rt_rq->rt_throttled = 1; | 456 | rt_rq->rt_throttled = 1; |
@@ -392,12 +504,21 @@ void inc_rt_tasks(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq) | |||
392 | WARN_ON(!rt_prio(rt_se_prio(rt_se))); | 504 | WARN_ON(!rt_prio(rt_se_prio(rt_se))); |
393 | rt_rq->rt_nr_running++; | 505 | rt_rq->rt_nr_running++; |
394 | #if defined CONFIG_SMP || defined CONFIG_RT_GROUP_SCHED | 506 | #if defined CONFIG_SMP || defined CONFIG_RT_GROUP_SCHED |
395 | if (rt_se_prio(rt_se) < rt_rq->highest_prio) | 507 | if (rt_se_prio(rt_se) < rt_rq->highest_prio) { |
508 | struct rq *rq = rq_of_rt_rq(rt_rq); | ||
509 | |||
396 | rt_rq->highest_prio = rt_se_prio(rt_se); | 510 | rt_rq->highest_prio = rt_se_prio(rt_se); |
511 | #ifdef CONFIG_SMP | ||
512 | if (rq->online) | ||
513 | cpupri_set(&rq->rd->cpupri, rq->cpu, | ||
514 | rt_se_prio(rt_se)); | ||
515 | #endif | ||
516 | } | ||
397 | #endif | 517 | #endif |
398 | #ifdef CONFIG_SMP | 518 | #ifdef CONFIG_SMP |
399 | if (rt_se->nr_cpus_allowed > 1) { | 519 | if (rt_se->nr_cpus_allowed > 1) { |
400 | struct rq *rq = rq_of_rt_rq(rt_rq); | 520 | struct rq *rq = rq_of_rt_rq(rt_rq); |
521 | |||
401 | rq->rt.rt_nr_migratory++; | 522 | rq->rt.rt_nr_migratory++; |
402 | } | 523 | } |
403 | 524 | ||
@@ -417,6 +538,10 @@ void inc_rt_tasks(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq) | |||
417 | static inline | 538 | static inline |
418 | void dec_rt_tasks(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq) | 539 | void dec_rt_tasks(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq) |
419 | { | 540 | { |
541 | #ifdef CONFIG_SMP | ||
542 | int highest_prio = rt_rq->highest_prio; | ||
543 | #endif | ||
544 | |||
420 | WARN_ON(!rt_prio(rt_se_prio(rt_se))); | 545 | WARN_ON(!rt_prio(rt_se_prio(rt_se))); |
421 | WARN_ON(!rt_rq->rt_nr_running); | 546 | WARN_ON(!rt_rq->rt_nr_running); |
422 | rt_rq->rt_nr_running--; | 547 | rt_rq->rt_nr_running--; |
@@ -440,6 +565,14 @@ void dec_rt_tasks(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq) | |||
440 | rq->rt.rt_nr_migratory--; | 565 | rq->rt.rt_nr_migratory--; |
441 | } | 566 | } |
442 | 567 | ||
568 | if (rt_rq->highest_prio != highest_prio) { | ||
569 | struct rq *rq = rq_of_rt_rq(rt_rq); | ||
570 | |||
571 | if (rq->online) | ||
572 | cpupri_set(&rq->rd->cpupri, rq->cpu, | ||
573 | rt_rq->highest_prio); | ||
574 | } | ||
575 | |||
443 | update_rt_migration(rq_of_rt_rq(rt_rq)); | 576 | update_rt_migration(rq_of_rt_rq(rt_rq)); |
444 | #endif /* CONFIG_SMP */ | 577 | #endif /* CONFIG_SMP */ |
445 | #ifdef CONFIG_RT_GROUP_SCHED | 578 | #ifdef CONFIG_RT_GROUP_SCHED |
@@ -455,6 +588,7 @@ static void __enqueue_rt_entity(struct sched_rt_entity *rt_se) | |||
455 | struct rt_rq *rt_rq = rt_rq_of_se(rt_se); | 588 | struct rt_rq *rt_rq = rt_rq_of_se(rt_se); |
456 | struct rt_prio_array *array = &rt_rq->active; | 589 | struct rt_prio_array *array = &rt_rq->active; |
457 | struct rt_rq *group_rq = group_rt_rq(rt_se); | 590 | struct rt_rq *group_rq = group_rt_rq(rt_se); |
591 | struct list_head *queue = array->queue + rt_se_prio(rt_se); | ||
458 | 592 | ||
459 | /* | 593 | /* |
460 | * Don't enqueue the group if its throttled, or when empty. | 594 | * Don't enqueue the group if its throttled, or when empty. |
@@ -465,7 +599,11 @@ static void __enqueue_rt_entity(struct sched_rt_entity *rt_se) | |||
465 | if (group_rq && (rt_rq_throttled(group_rq) || !group_rq->rt_nr_running)) | 599 | if (group_rq && (rt_rq_throttled(group_rq) || !group_rq->rt_nr_running)) |
466 | return; | 600 | return; |
467 | 601 | ||
468 | list_add_tail(&rt_se->run_list, array->queue + rt_se_prio(rt_se)); | 602 | if (rt_se->nr_cpus_allowed == 1) |
603 | list_add(&rt_se->run_list, queue); | ||
604 | else | ||
605 | list_add_tail(&rt_se->run_list, queue); | ||
606 | |||
469 | __set_bit(rt_se_prio(rt_se), array->bitmap); | 607 | __set_bit(rt_se_prio(rt_se), array->bitmap); |
470 | 608 | ||
471 | inc_rt_tasks(rt_se, rt_rq); | 609 | inc_rt_tasks(rt_se, rt_rq); |
@@ -532,6 +670,8 @@ static void enqueue_task_rt(struct rq *rq, struct task_struct *p, int wakeup) | |||
532 | rt_se->timeout = 0; | 670 | rt_se->timeout = 0; |
533 | 671 | ||
534 | enqueue_rt_entity(rt_se); | 672 | enqueue_rt_entity(rt_se); |
673 | |||
674 | inc_cpu_load(rq, p->se.load.weight); | ||
535 | } | 675 | } |
536 | 676 | ||
537 | static void dequeue_task_rt(struct rq *rq, struct task_struct *p, int sleep) | 677 | static void dequeue_task_rt(struct rq *rq, struct task_struct *p, int sleep) |
@@ -540,6 +680,8 @@ static void dequeue_task_rt(struct rq *rq, struct task_struct *p, int sleep) | |||
540 | 680 | ||
541 | update_curr_rt(rq); | 681 | update_curr_rt(rq); |
542 | dequeue_rt_entity(rt_se); | 682 | dequeue_rt_entity(rt_se); |
683 | |||
684 | dec_cpu_load(rq, p->se.load.weight); | ||
543 | } | 685 | } |
544 | 686 | ||
545 | /* | 687 | /* |
@@ -550,10 +692,12 @@ static | |||
550 | void requeue_rt_entity(struct rt_rq *rt_rq, struct sched_rt_entity *rt_se) | 692 | void requeue_rt_entity(struct rt_rq *rt_rq, struct sched_rt_entity *rt_se) |
551 | { | 693 | { |
552 | struct rt_prio_array *array = &rt_rq->active; | 694 | struct rt_prio_array *array = &rt_rq->active; |
553 | struct list_head *queue = array->queue + rt_se_prio(rt_se); | ||
554 | 695 | ||
555 | if (on_rt_rq(rt_se)) | 696 | if (on_rt_rq(rt_se)) { |
556 | list_move_tail(&rt_se->run_list, queue); | 697 | list_del_init(&rt_se->run_list); |
698 | list_add_tail(&rt_se->run_list, | ||
699 | array->queue + rt_se_prio(rt_se)); | ||
700 | } | ||
557 | } | 701 | } |
558 | 702 | ||
559 | static void requeue_task_rt(struct rq *rq, struct task_struct *p) | 703 | static void requeue_task_rt(struct rq *rq, struct task_struct *p) |
@@ -616,8 +760,37 @@ static int select_task_rq_rt(struct task_struct *p, int sync) | |||
616 | */ | 760 | */ |
617 | static void check_preempt_curr_rt(struct rq *rq, struct task_struct *p) | 761 | static void check_preempt_curr_rt(struct rq *rq, struct task_struct *p) |
618 | { | 762 | { |
619 | if (p->prio < rq->curr->prio) | 763 | if (p->prio < rq->curr->prio) { |
620 | resched_task(rq->curr); | 764 | resched_task(rq->curr); |
765 | return; | ||
766 | } | ||
767 | |||
768 | #ifdef CONFIG_SMP | ||
769 | /* | ||
770 | * If: | ||
771 | * | ||
772 | * - the newly woken task is of equal priority to the current task | ||
773 | * - the newly woken task is non-migratable while current is migratable | ||
774 | * - current will be preempted on the next reschedule | ||
775 | * | ||
776 | * we should check to see if current can readily move to a different | ||
777 | * cpu. If so, we will reschedule to allow the push logic to try | ||
778 | * to move current somewhere else, making room for our non-migratable | ||
779 | * task. | ||
780 | */ | ||
781 | if((p->prio == rq->curr->prio) | ||
782 | && p->rt.nr_cpus_allowed == 1 | ||
783 | && rq->curr->rt.nr_cpus_allowed != 1) { | ||
784 | cpumask_t mask; | ||
785 | |||
786 | if (cpupri_find(&rq->rd->cpupri, rq->curr, &mask)) | ||
787 | /* | ||
788 | * There appears to be other cpus that can accept | ||
789 | * current, so lets reschedule to try and push it away | ||
790 | */ | ||
791 | resched_task(rq->curr); | ||
792 | } | ||
793 | #endif | ||
621 | } | 794 | } |
622 | 795 | ||
623 | static struct sched_rt_entity *pick_next_rt_entity(struct rq *rq, | 796 | static struct sched_rt_entity *pick_next_rt_entity(struct rq *rq, |
@@ -720,73 +893,6 @@ static struct task_struct *pick_next_highest_task_rt(struct rq *rq, int cpu) | |||
720 | 893 | ||
721 | static DEFINE_PER_CPU(cpumask_t, local_cpu_mask); | 894 | static DEFINE_PER_CPU(cpumask_t, local_cpu_mask); |
722 | 895 | ||
723 | static int find_lowest_cpus(struct task_struct *task, cpumask_t *lowest_mask) | ||
724 | { | ||
725 | int lowest_prio = -1; | ||
726 | int lowest_cpu = -1; | ||
727 | int count = 0; | ||
728 | int cpu; | ||
729 | |||
730 | cpus_and(*lowest_mask, task_rq(task)->rd->online, task->cpus_allowed); | ||
731 | |||
732 | /* | ||
733 | * Scan each rq for the lowest prio. | ||
734 | */ | ||
735 | for_each_cpu_mask(cpu, *lowest_mask) { | ||
736 | struct rq *rq = cpu_rq(cpu); | ||
737 | |||
738 | /* We look for lowest RT prio or non-rt CPU */ | ||
739 | if (rq->rt.highest_prio >= MAX_RT_PRIO) { | ||
740 | /* | ||
741 | * if we already found a low RT queue | ||
742 | * and now we found this non-rt queue | ||
743 | * clear the mask and set our bit. | ||
744 | * Otherwise just return the queue as is | ||
745 | * and the count==1 will cause the algorithm | ||
746 | * to use the first bit found. | ||
747 | */ | ||
748 | if (lowest_cpu != -1) { | ||
749 | cpus_clear(*lowest_mask); | ||
750 | cpu_set(rq->cpu, *lowest_mask); | ||
751 | } | ||
752 | return 1; | ||
753 | } | ||
754 | |||
755 | /* no locking for now */ | ||
756 | if ((rq->rt.highest_prio > task->prio) | ||
757 | && (rq->rt.highest_prio >= lowest_prio)) { | ||
758 | if (rq->rt.highest_prio > lowest_prio) { | ||
759 | /* new low - clear old data */ | ||
760 | lowest_prio = rq->rt.highest_prio; | ||
761 | lowest_cpu = cpu; | ||
762 | count = 0; | ||
763 | } | ||
764 | count++; | ||
765 | } else | ||
766 | cpu_clear(cpu, *lowest_mask); | ||
767 | } | ||
768 | |||
769 | /* | ||
770 | * Clear out all the set bits that represent | ||
771 | * runqueues that were of higher prio than | ||
772 | * the lowest_prio. | ||
773 | */ | ||
774 | if (lowest_cpu > 0) { | ||
775 | /* | ||
776 | * Perhaps we could add another cpumask op to | ||
777 | * zero out bits. Like cpu_zero_bits(cpumask, nrbits); | ||
778 | * Then that could be optimized to use memset and such. | ||
779 | */ | ||
780 | for_each_cpu_mask(cpu, *lowest_mask) { | ||
781 | if (cpu >= lowest_cpu) | ||
782 | break; | ||
783 | cpu_clear(cpu, *lowest_mask); | ||
784 | } | ||
785 | } | ||
786 | |||
787 | return count; | ||
788 | } | ||
789 | |||
790 | static inline int pick_optimal_cpu(int this_cpu, cpumask_t *mask) | 896 | static inline int pick_optimal_cpu(int this_cpu, cpumask_t *mask) |
791 | { | 897 | { |
792 | int first; | 898 | int first; |
@@ -808,17 +914,12 @@ static int find_lowest_rq(struct task_struct *task) | |||
808 | cpumask_t *lowest_mask = &__get_cpu_var(local_cpu_mask); | 914 | cpumask_t *lowest_mask = &__get_cpu_var(local_cpu_mask); |
809 | int this_cpu = smp_processor_id(); | 915 | int this_cpu = smp_processor_id(); |
810 | int cpu = task_cpu(task); | 916 | int cpu = task_cpu(task); |
811 | int count = find_lowest_cpus(task, lowest_mask); | ||
812 | 917 | ||
813 | if (!count) | 918 | if (task->rt.nr_cpus_allowed == 1) |
814 | return -1; /* No targets found */ | 919 | return -1; /* No other targets possible */ |
815 | 920 | ||
816 | /* | 921 | if (!cpupri_find(&task_rq(task)->rd->cpupri, task, lowest_mask)) |
817 | * There is no sense in performing an optimal search if only one | 922 | return -1; /* No targets found */ |
818 | * target is found. | ||
819 | */ | ||
820 | if (count == 1) | ||
821 | return first_cpu(*lowest_mask); | ||
822 | 923 | ||
823 | /* | 924 | /* |
824 | * At this point we have built a mask of cpus representing the | 925 | * At this point we have built a mask of cpus representing the |
@@ -1163,17 +1264,25 @@ static void set_cpus_allowed_rt(struct task_struct *p, | |||
1163 | } | 1264 | } |
1164 | 1265 | ||
1165 | /* Assumes rq->lock is held */ | 1266 | /* Assumes rq->lock is held */ |
1166 | static void join_domain_rt(struct rq *rq) | 1267 | static void rq_online_rt(struct rq *rq) |
1167 | { | 1268 | { |
1168 | if (rq->rt.overloaded) | 1269 | if (rq->rt.overloaded) |
1169 | rt_set_overload(rq); | 1270 | rt_set_overload(rq); |
1271 | |||
1272 | __enable_runtime(rq); | ||
1273 | |||
1274 | cpupri_set(&rq->rd->cpupri, rq->cpu, rq->rt.highest_prio); | ||
1170 | } | 1275 | } |
1171 | 1276 | ||
1172 | /* Assumes rq->lock is held */ | 1277 | /* Assumes rq->lock is held */ |
1173 | static void leave_domain_rt(struct rq *rq) | 1278 | static void rq_offline_rt(struct rq *rq) |
1174 | { | 1279 | { |
1175 | if (rq->rt.overloaded) | 1280 | if (rq->rt.overloaded) |
1176 | rt_clear_overload(rq); | 1281 | rt_clear_overload(rq); |
1282 | |||
1283 | __disable_runtime(rq); | ||
1284 | |||
1285 | cpupri_set(&rq->rd->cpupri, rq->cpu, CPUPRI_INVALID); | ||
1177 | } | 1286 | } |
1178 | 1287 | ||
1179 | /* | 1288 | /* |
@@ -1336,8 +1445,8 @@ static const struct sched_class rt_sched_class = { | |||
1336 | .load_balance = load_balance_rt, | 1445 | .load_balance = load_balance_rt, |
1337 | .move_one_task = move_one_task_rt, | 1446 | .move_one_task = move_one_task_rt, |
1338 | .set_cpus_allowed = set_cpus_allowed_rt, | 1447 | .set_cpus_allowed = set_cpus_allowed_rt, |
1339 | .join_domain = join_domain_rt, | 1448 | .rq_online = rq_online_rt, |
1340 | .leave_domain = leave_domain_rt, | 1449 | .rq_offline = rq_offline_rt, |
1341 | .pre_schedule = pre_schedule_rt, | 1450 | .pre_schedule = pre_schedule_rt, |
1342 | .post_schedule = post_schedule_rt, | 1451 | .post_schedule = post_schedule_rt, |
1343 | .task_wake_up = task_wake_up_rt, | 1452 | .task_wake_up = task_wake_up_rt, |
@@ -1350,3 +1459,17 @@ static const struct sched_class rt_sched_class = { | |||
1350 | .prio_changed = prio_changed_rt, | 1459 | .prio_changed = prio_changed_rt, |
1351 | .switched_to = switched_to_rt, | 1460 | .switched_to = switched_to_rt, |
1352 | }; | 1461 | }; |
1462 | |||
1463 | #ifdef CONFIG_SCHED_DEBUG | ||
1464 | extern void print_rt_rq(struct seq_file *m, int cpu, struct rt_rq *rt_rq); | ||
1465 | |||
1466 | static void print_rt_stats(struct seq_file *m, int cpu) | ||
1467 | { | ||
1468 | struct rt_rq *rt_rq; | ||
1469 | |||
1470 | rcu_read_lock(); | ||
1471 | for_each_leaf_rt_rq(rt_rq, cpu_rq(cpu)) | ||
1472 | print_rt_rq(m, cpu, rt_rq); | ||
1473 | rcu_read_unlock(); | ||
1474 | } | ||
1475 | #endif /* CONFIG_SCHED_DEBUG */ | ||
diff --git a/kernel/sched_stats.h b/kernel/sched_stats.h index 80179ef7450e..8385d43987e2 100644 --- a/kernel/sched_stats.h +++ b/kernel/sched_stats.h | |||
@@ -118,6 +118,13 @@ rq_sched_info_depart(struct rq *rq, unsigned long long delta) | |||
118 | if (rq) | 118 | if (rq) |
119 | rq->rq_sched_info.cpu_time += delta; | 119 | rq->rq_sched_info.cpu_time += delta; |
120 | } | 120 | } |
121 | |||
122 | static inline void | ||
123 | rq_sched_info_dequeued(struct rq *rq, unsigned long long delta) | ||
124 | { | ||
125 | if (rq) | ||
126 | rq->rq_sched_info.run_delay += delta; | ||
127 | } | ||
121 | # define schedstat_inc(rq, field) do { (rq)->field++; } while (0) | 128 | # define schedstat_inc(rq, field) do { (rq)->field++; } while (0) |
122 | # define schedstat_add(rq, field, amt) do { (rq)->field += (amt); } while (0) | 129 | # define schedstat_add(rq, field, amt) do { (rq)->field += (amt); } while (0) |
123 | # define schedstat_set(var, val) do { var = (val); } while (0) | 130 | # define schedstat_set(var, val) do { var = (val); } while (0) |
@@ -126,6 +133,9 @@ static inline void | |||
126 | rq_sched_info_arrive(struct rq *rq, unsigned long long delta) | 133 | rq_sched_info_arrive(struct rq *rq, unsigned long long delta) |
127 | {} | 134 | {} |
128 | static inline void | 135 | static inline void |
136 | rq_sched_info_dequeued(struct rq *rq, unsigned long long delta) | ||
137 | {} | ||
138 | static inline void | ||
129 | rq_sched_info_depart(struct rq *rq, unsigned long long delta) | 139 | rq_sched_info_depart(struct rq *rq, unsigned long long delta) |
130 | {} | 140 | {} |
131 | # define schedstat_inc(rq, field) do { } while (0) | 141 | # define schedstat_inc(rq, field) do { } while (0) |
@@ -134,6 +144,11 @@ rq_sched_info_depart(struct rq *rq, unsigned long long delta) | |||
134 | #endif | 144 | #endif |
135 | 145 | ||
136 | #if defined(CONFIG_SCHEDSTATS) || defined(CONFIG_TASK_DELAY_ACCT) | 146 | #if defined(CONFIG_SCHEDSTATS) || defined(CONFIG_TASK_DELAY_ACCT) |
147 | static inline void sched_info_reset_dequeued(struct task_struct *t) | ||
148 | { | ||
149 | t->sched_info.last_queued = 0; | ||
150 | } | ||
151 | |||
137 | /* | 152 | /* |
138 | * Called when a process is dequeued from the active array and given | 153 | * Called when a process is dequeued from the active array and given |
139 | * the cpu. We should note that with the exception of interactive | 154 | * the cpu. We should note that with the exception of interactive |
@@ -143,15 +158,22 @@ rq_sched_info_depart(struct rq *rq, unsigned long long delta) | |||
143 | * active queue, thus delaying tasks in the expired queue from running; | 158 | * active queue, thus delaying tasks in the expired queue from running; |
144 | * see scheduler_tick()). | 159 | * see scheduler_tick()). |
145 | * | 160 | * |
146 | * This function is only called from sched_info_arrive(), rather than | 161 | * Though we are interested in knowing how long it was from the *first* time a |
147 | * dequeue_task(). Even though a task may be queued and dequeued multiple | 162 | * task was queued to the time that it finally hit a cpu, we call this routine |
148 | * times as it is shuffled about, we're really interested in knowing how | 163 | * from dequeue_task() to account for possible rq->clock skew across cpus. The |
149 | * long it was from the *first* time it was queued to the time that it | 164 | * delta taken on each cpu would annul the skew. |
150 | * finally hit a cpu. | ||
151 | */ | 165 | */ |
152 | static inline void sched_info_dequeued(struct task_struct *t) | 166 | static inline void sched_info_dequeued(struct task_struct *t) |
153 | { | 167 | { |
154 | t->sched_info.last_queued = 0; | 168 | unsigned long long now = task_rq(t)->clock, delta = 0; |
169 | |||
170 | if (unlikely(sched_info_on())) | ||
171 | if (t->sched_info.last_queued) | ||
172 | delta = now - t->sched_info.last_queued; | ||
173 | sched_info_reset_dequeued(t); | ||
174 | t->sched_info.run_delay += delta; | ||
175 | |||
176 | rq_sched_info_dequeued(task_rq(t), delta); | ||
155 | } | 177 | } |
156 | 178 | ||
157 | /* | 179 | /* |
@@ -165,7 +187,7 @@ static void sched_info_arrive(struct task_struct *t) | |||
165 | 187 | ||
166 | if (t->sched_info.last_queued) | 188 | if (t->sched_info.last_queued) |
167 | delta = now - t->sched_info.last_queued; | 189 | delta = now - t->sched_info.last_queued; |
168 | sched_info_dequeued(t); | 190 | sched_info_reset_dequeued(t); |
169 | t->sched_info.run_delay += delta; | 191 | t->sched_info.run_delay += delta; |
170 | t->sched_info.last_arrival = now; | 192 | t->sched_info.last_arrival = now; |
171 | t->sched_info.pcount++; | 193 | t->sched_info.pcount++; |
@@ -242,7 +264,9 @@ sched_info_switch(struct task_struct *prev, struct task_struct *next) | |||
242 | __sched_info_switch(prev, next); | 264 | __sched_info_switch(prev, next); |
243 | } | 265 | } |
244 | #else | 266 | #else |
245 | #define sched_info_queued(t) do { } while (0) | 267 | #define sched_info_queued(t) do { } while (0) |
246 | #define sched_info_switch(t, next) do { } while (0) | 268 | #define sched_info_reset_dequeued(t) do { } while (0) |
269 | #define sched_info_dequeued(t) do { } while (0) | ||
270 | #define sched_info_switch(t, next) do { } while (0) | ||
247 | #endif /* CONFIG_SCHEDSTATS || CONFIG_TASK_DELAY_ACCT */ | 271 | #endif /* CONFIG_SCHEDSTATS || CONFIG_TASK_DELAY_ACCT */ |
248 | 272 | ||
diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 29116652dca8..fe8cdc80ff02 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c | |||
@@ -266,6 +266,14 @@ static struct ctl_table kern_table[] = { | |||
266 | }, | 266 | }, |
267 | { | 267 | { |
268 | .ctl_name = CTL_UNNUMBERED, | 268 | .ctl_name = CTL_UNNUMBERED, |
269 | .procname = "sched_shares_ratelimit", | ||
270 | .data = &sysctl_sched_shares_ratelimit, | ||
271 | .maxlen = sizeof(unsigned int), | ||
272 | .mode = 0644, | ||
273 | .proc_handler = &proc_dointvec, | ||
274 | }, | ||
275 | { | ||
276 | .ctl_name = CTL_UNNUMBERED, | ||
269 | .procname = "sched_child_runs_first", | 277 | .procname = "sched_child_runs_first", |
270 | .data = &sysctl_sched_child_runs_first, | 278 | .data = &sysctl_sched_child_runs_first, |
271 | .maxlen = sizeof(unsigned int), | 279 | .maxlen = sizeof(unsigned int), |
@@ -1628,9 +1628,11 @@ static __always_inline void *slab_alloc(struct kmem_cache *s, | |||
1628 | void **object; | 1628 | void **object; |
1629 | struct kmem_cache_cpu *c; | 1629 | struct kmem_cache_cpu *c; |
1630 | unsigned long flags; | 1630 | unsigned long flags; |
1631 | unsigned int objsize; | ||
1631 | 1632 | ||
1632 | local_irq_save(flags); | 1633 | local_irq_save(flags); |
1633 | c = get_cpu_slab(s, smp_processor_id()); | 1634 | c = get_cpu_slab(s, smp_processor_id()); |
1635 | objsize = c->objsize; | ||
1634 | if (unlikely(!c->freelist || !node_match(c, node))) | 1636 | if (unlikely(!c->freelist || !node_match(c, node))) |
1635 | 1637 | ||
1636 | object = __slab_alloc(s, gfpflags, node, addr, c); | 1638 | object = __slab_alloc(s, gfpflags, node, addr, c); |
@@ -1643,7 +1645,7 @@ static __always_inline void *slab_alloc(struct kmem_cache *s, | |||
1643 | local_irq_restore(flags); | 1645 | local_irq_restore(flags); |
1644 | 1646 | ||
1645 | if (unlikely((gfpflags & __GFP_ZERO) && object)) | 1647 | if (unlikely((gfpflags & __GFP_ZERO) && object)) |
1646 | memset(object, 0, c->objsize); | 1648 | memset(object, 0, objsize); |
1647 | 1649 | ||
1648 | return object; | 1650 | return object; |
1649 | } | 1651 | } |
diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c index 4b02d14e7ab9..e1600ad8fb0e 100644 --- a/net/ipv4/fib_trie.c +++ b/net/ipv4/fib_trie.c | |||
@@ -1359,17 +1359,17 @@ static int check_leaf(struct trie *t, struct leaf *l, | |||
1359 | t->stats.semantic_match_miss++; | 1359 | t->stats.semantic_match_miss++; |
1360 | #endif | 1360 | #endif |
1361 | if (err <= 0) | 1361 | if (err <= 0) |
1362 | return plen; | 1362 | return err; |
1363 | } | 1363 | } |
1364 | 1364 | ||
1365 | return -1; | 1365 | return 1; |
1366 | } | 1366 | } |
1367 | 1367 | ||
1368 | static int fn_trie_lookup(struct fib_table *tb, const struct flowi *flp, | 1368 | static int fn_trie_lookup(struct fib_table *tb, const struct flowi *flp, |
1369 | struct fib_result *res) | 1369 | struct fib_result *res) |
1370 | { | 1370 | { |
1371 | struct trie *t = (struct trie *) tb->tb_data; | 1371 | struct trie *t = (struct trie *) tb->tb_data; |
1372 | int plen, ret = 0; | 1372 | int ret; |
1373 | struct node *n; | 1373 | struct node *n; |
1374 | struct tnode *pn; | 1374 | struct tnode *pn; |
1375 | int pos, bits; | 1375 | int pos, bits; |
@@ -1393,10 +1393,7 @@ static int fn_trie_lookup(struct fib_table *tb, const struct flowi *flp, | |||
1393 | 1393 | ||
1394 | /* Just a leaf? */ | 1394 | /* Just a leaf? */ |
1395 | if (IS_LEAF(n)) { | 1395 | if (IS_LEAF(n)) { |
1396 | plen = check_leaf(t, (struct leaf *)n, key, flp, res); | 1396 | ret = check_leaf(t, (struct leaf *)n, key, flp, res); |
1397 | if (plen < 0) | ||
1398 | goto failed; | ||
1399 | ret = 0; | ||
1400 | goto found; | 1397 | goto found; |
1401 | } | 1398 | } |
1402 | 1399 | ||
@@ -1421,11 +1418,9 @@ static int fn_trie_lookup(struct fib_table *tb, const struct flowi *flp, | |||
1421 | } | 1418 | } |
1422 | 1419 | ||
1423 | if (IS_LEAF(n)) { | 1420 | if (IS_LEAF(n)) { |
1424 | plen = check_leaf(t, (struct leaf *)n, key, flp, res); | 1421 | ret = check_leaf(t, (struct leaf *)n, key, flp, res); |
1425 | if (plen < 0) | 1422 | if (ret > 0) |
1426 | goto backtrace; | 1423 | goto backtrace; |
1427 | |||
1428 | ret = 0; | ||
1429 | goto found; | 1424 | goto found; |
1430 | } | 1425 | } |
1431 | 1426 | ||
diff --git a/net/ipv4/netfilter/nf_nat_snmp_basic.c b/net/ipv4/netfilter/nf_nat_snmp_basic.c index 7750c97fde7b..ffeaffc3fffe 100644 --- a/net/ipv4/netfilter/nf_nat_snmp_basic.c +++ b/net/ipv4/netfilter/nf_nat_snmp_basic.c | |||
@@ -439,8 +439,8 @@ static unsigned char asn1_oid_decode(struct asn1_ctx *ctx, | |||
439 | unsigned int *len) | 439 | unsigned int *len) |
440 | { | 440 | { |
441 | unsigned long subid; | 441 | unsigned long subid; |
442 | unsigned int size; | ||
443 | unsigned long *optr; | 442 | unsigned long *optr; |
443 | size_t size; | ||
444 | 444 | ||
445 | size = eoc - ctx->pointer + 1; | 445 | size = eoc - ctx->pointer + 1; |
446 | 446 | ||
diff --git a/net/ipv4/tcp_probe.c b/net/ipv4/tcp_probe.c index 5ff0ce6e9d39..7ddc30f0744f 100644 --- a/net/ipv4/tcp_probe.c +++ b/net/ipv4/tcp_probe.c | |||
@@ -224,7 +224,7 @@ static __init int tcpprobe_init(void) | |||
224 | if (bufsize < 0) | 224 | if (bufsize < 0) |
225 | return -EINVAL; | 225 | return -EINVAL; |
226 | 226 | ||
227 | tcp_probe.log = kcalloc(sizeof(struct tcp_log), bufsize, GFP_KERNEL); | 227 | tcp_probe.log = kcalloc(bufsize, sizeof(struct tcp_log), GFP_KERNEL); |
228 | if (!tcp_probe.log) | 228 | if (!tcp_probe.log) |
229 | goto err0; | 229 | goto err0; |
230 | 230 | ||
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 147588f4c7c0..ff61a5cdb0b3 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c | |||
@@ -749,12 +749,12 @@ static void ipv6_del_addr(struct inet6_ifaddr *ifp) | |||
749 | } | 749 | } |
750 | write_unlock_bh(&idev->lock); | 750 | write_unlock_bh(&idev->lock); |
751 | 751 | ||
752 | addrconf_del_timer(ifp); | ||
753 | |||
752 | ipv6_ifa_notify(RTM_DELADDR, ifp); | 754 | ipv6_ifa_notify(RTM_DELADDR, ifp); |
753 | 755 | ||
754 | atomic_notifier_call_chain(&inet6addr_chain, NETDEV_DOWN, ifp); | 756 | atomic_notifier_call_chain(&inet6addr_chain, NETDEV_DOWN, ifp); |
755 | 757 | ||
756 | addrconf_del_timer(ifp); | ||
757 | |||
758 | /* | 758 | /* |
759 | * Purge or update corresponding prefix | 759 | * Purge or update corresponding prefix |
760 | * | 760 | * |
diff --git a/net/ipv6/exthdrs.c b/net/ipv6/exthdrs.c index 3cd1c993d52b..dcf94fdfb863 100644 --- a/net/ipv6/exthdrs.c +++ b/net/ipv6/exthdrs.c | |||
@@ -445,7 +445,7 @@ looped_back: | |||
445 | kfree_skb(skb); | 445 | kfree_skb(skb); |
446 | return -1; | 446 | return -1; |
447 | } | 447 | } |
448 | if (!ipv6_chk_home_addr(&init_net, addr)) { | 448 | if (!ipv6_chk_home_addr(dev_net(skb->dst->dev), addr)) { |
449 | IP6_INC_STATS_BH(ip6_dst_idev(skb->dst), | 449 | IP6_INC_STATS_BH(ip6_dst_idev(skb->dst), |
450 | IPSTATS_MIB_INADDRERRORS); | 450 | IPSTATS_MIB_INADDRERRORS); |
451 | kfree_skb(skb); | 451 | kfree_skb(skb); |
diff --git a/net/irda/irnetlink.c b/net/irda/irnetlink.c index 9e1fb82e3220..2f05ec1037ab 100644 --- a/net/irda/irnetlink.c +++ b/net/irda/irnetlink.c | |||
@@ -101,8 +101,8 @@ static int irda_nl_get_mode(struct sk_buff *skb, struct genl_info *info) | |||
101 | 101 | ||
102 | hdr = genlmsg_put(msg, info->snd_pid, info->snd_seq, | 102 | hdr = genlmsg_put(msg, info->snd_pid, info->snd_seq, |
103 | &irda_nl_family, 0, IRDA_NL_CMD_GET_MODE); | 103 | &irda_nl_family, 0, IRDA_NL_CMD_GET_MODE); |
104 | if (IS_ERR(hdr)) { | 104 | if (hdr == NULL) { |
105 | ret = PTR_ERR(hdr); | 105 | ret = -EMSGSIZE; |
106 | goto err_out; | 106 | goto err_out; |
107 | } | 107 | } |
108 | 108 | ||
diff --git a/net/mac80211/main.c b/net/mac80211/main.c index 98c0b5e56ecc..df0836ff1a20 100644 --- a/net/mac80211/main.c +++ b/net/mac80211/main.c | |||
@@ -530,8 +530,6 @@ static int ieee80211_stop(struct net_device *dev) | |||
530 | local->sta_hw_scanning = 0; | 530 | local->sta_hw_scanning = 0; |
531 | } | 531 | } |
532 | 532 | ||
533 | flush_workqueue(local->hw.workqueue); | ||
534 | |||
535 | sdata->u.sta.flags &= ~IEEE80211_STA_PRIVACY_INVOKED; | 533 | sdata->u.sta.flags &= ~IEEE80211_STA_PRIVACY_INVOKED; |
536 | kfree(sdata->u.sta.extra_ie); | 534 | kfree(sdata->u.sta.extra_ie); |
537 | sdata->u.sta.extra_ie = NULL; | 535 | sdata->u.sta.extra_ie = NULL; |
@@ -555,6 +553,8 @@ static int ieee80211_stop(struct net_device *dev) | |||
555 | 553 | ||
556 | ieee80211_led_radio(local, 0); | 554 | ieee80211_led_radio(local, 0); |
557 | 555 | ||
556 | flush_workqueue(local->hw.workqueue); | ||
557 | |||
558 | tasklet_disable(&local->tx_pending_tasklet); | 558 | tasklet_disable(&local->tx_pending_tasklet); |
559 | tasklet_disable(&local->tasklet); | 559 | tasklet_disable(&local->tasklet); |
560 | } | 560 | } |
diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index 4d2b582dd055..b404537c0bcd 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c | |||
@@ -547,15 +547,14 @@ static void ieee80211_set_associated(struct net_device *dev, | |||
547 | sdata->bss_conf.ht_bss_conf = &conf->ht_bss_conf; | 547 | sdata->bss_conf.ht_bss_conf = &conf->ht_bss_conf; |
548 | } | 548 | } |
549 | 549 | ||
550 | netif_carrier_on(dev); | ||
551 | ifsta->flags |= IEEE80211_STA_PREV_BSSID_SET; | 550 | ifsta->flags |= IEEE80211_STA_PREV_BSSID_SET; |
552 | memcpy(ifsta->prev_bssid, sdata->u.sta.bssid, ETH_ALEN); | 551 | memcpy(ifsta->prev_bssid, sdata->u.sta.bssid, ETH_ALEN); |
553 | memcpy(wrqu.ap_addr.sa_data, sdata->u.sta.bssid, ETH_ALEN); | 552 | memcpy(wrqu.ap_addr.sa_data, sdata->u.sta.bssid, ETH_ALEN); |
554 | ieee80211_sta_send_associnfo(dev, ifsta); | 553 | ieee80211_sta_send_associnfo(dev, ifsta); |
555 | } else { | 554 | } else { |
555 | netif_carrier_off(dev); | ||
556 | ieee80211_sta_tear_down_BA_sessions(dev, ifsta->bssid); | 556 | ieee80211_sta_tear_down_BA_sessions(dev, ifsta->bssid); |
557 | ifsta->flags &= ~IEEE80211_STA_ASSOCIATED; | 557 | ifsta->flags &= ~IEEE80211_STA_ASSOCIATED; |
558 | netif_carrier_off(dev); | ||
559 | ieee80211_reset_erp_info(dev); | 558 | ieee80211_reset_erp_info(dev); |
560 | 559 | ||
561 | sdata->bss_conf.assoc_ht = 0; | 560 | sdata->bss_conf.assoc_ht = 0; |
@@ -569,6 +568,10 @@ static void ieee80211_set_associated(struct net_device *dev, | |||
569 | 568 | ||
570 | sdata->bss_conf.assoc = assoc; | 569 | sdata->bss_conf.assoc = assoc; |
571 | ieee80211_bss_info_change_notify(sdata, changed); | 570 | ieee80211_bss_info_change_notify(sdata, changed); |
571 | |||
572 | if (assoc) | ||
573 | netif_carrier_on(dev); | ||
574 | |||
572 | wrqu.ap_addr.sa_family = ARPHRD_ETHER; | 575 | wrqu.ap_addr.sa_family = ARPHRD_ETHER; |
573 | wireless_send_event(dev, SIOCGIWAP, &wrqu, NULL); | 576 | wireless_send_event(dev, SIOCGIWAP, &wrqu, NULL); |
574 | } | 577 | } |
@@ -3611,8 +3614,10 @@ static int ieee80211_sta_find_ibss(struct net_device *dev, | |||
3611 | spin_unlock_bh(&local->sta_bss_lock); | 3614 | spin_unlock_bh(&local->sta_bss_lock); |
3612 | 3615 | ||
3613 | #ifdef CONFIG_MAC80211_IBSS_DEBUG | 3616 | #ifdef CONFIG_MAC80211_IBSS_DEBUG |
3614 | printk(KERN_DEBUG " sta_find_ibss: selected %s current " | 3617 | if (found) |
3615 | "%s\n", print_mac(mac, bssid), print_mac(mac2, ifsta->bssid)); | 3618 | printk(KERN_DEBUG " sta_find_ibss: selected %s current " |
3619 | "%s\n", print_mac(mac, bssid), | ||
3620 | print_mac(mac2, ifsta->bssid)); | ||
3616 | #endif /* CONFIG_MAC80211_IBSS_DEBUG */ | 3621 | #endif /* CONFIG_MAC80211_IBSS_DEBUG */ |
3617 | if (found && memcmp(ifsta->bssid, bssid, ETH_ALEN) != 0 && | 3622 | if (found && memcmp(ifsta->bssid, bssid, ETH_ALEN) != 0 && |
3618 | (bss = ieee80211_rx_bss_get(dev, bssid, | 3623 | (bss = ieee80211_rx_bss_get(dev, bssid, |
diff --git a/net/mac80211/rc80211_pid.h b/net/mac80211/rc80211_pid.h index 04afc13ed825..4ea7b97d1af1 100644 --- a/net/mac80211/rc80211_pid.h +++ b/net/mac80211/rc80211_pid.h | |||
@@ -141,7 +141,6 @@ struct rc_pid_events_file_info { | |||
141 | * rate behaviour values (lower means we should trust more what we learnt | 141 | * rate behaviour values (lower means we should trust more what we learnt |
142 | * about behaviour of rates, higher means we should trust more the natural | 142 | * about behaviour of rates, higher means we should trust more the natural |
143 | * ordering of rates) | 143 | * ordering of rates) |
144 | * @fast_start: if Y, push high rates right after initialization | ||
145 | */ | 144 | */ |
146 | struct rc_pid_debugfs_entries { | 145 | struct rc_pid_debugfs_entries { |
147 | struct dentry *dir; | 146 | struct dentry *dir; |
@@ -154,7 +153,6 @@ struct rc_pid_debugfs_entries { | |||
154 | struct dentry *sharpen_factor; | 153 | struct dentry *sharpen_factor; |
155 | struct dentry *sharpen_duration; | 154 | struct dentry *sharpen_duration; |
156 | struct dentry *norm_offset; | 155 | struct dentry *norm_offset; |
157 | struct dentry *fast_start; | ||
158 | }; | 156 | }; |
159 | 157 | ||
160 | void rate_control_pid_event_tx_status(struct rc_pid_event_buffer *buf, | 158 | void rate_control_pid_event_tx_status(struct rc_pid_event_buffer *buf, |
@@ -267,9 +265,6 @@ struct rc_pid_info { | |||
267 | /* Normalization offset. */ | 265 | /* Normalization offset. */ |
268 | unsigned int norm_offset; | 266 | unsigned int norm_offset; |
269 | 267 | ||
270 | /* Fast starst parameter. */ | ||
271 | unsigned int fast_start; | ||
272 | |||
273 | /* Rates information. */ | 268 | /* Rates information. */ |
274 | struct rc_pid_rateinfo *rinfo; | 269 | struct rc_pid_rateinfo *rinfo; |
275 | 270 | ||
diff --git a/net/mac80211/rc80211_pid_algo.c b/net/mac80211/rc80211_pid_algo.c index a849b745bdb5..bcd27c1d7594 100644 --- a/net/mac80211/rc80211_pid_algo.c +++ b/net/mac80211/rc80211_pid_algo.c | |||
@@ -398,13 +398,25 @@ static void *rate_control_pid_alloc(struct ieee80211_local *local) | |||
398 | return NULL; | 398 | return NULL; |
399 | } | 399 | } |
400 | 400 | ||
401 | pinfo->target = RC_PID_TARGET_PF; | ||
402 | pinfo->sampling_period = RC_PID_INTERVAL; | ||
403 | pinfo->coeff_p = RC_PID_COEFF_P; | ||
404 | pinfo->coeff_i = RC_PID_COEFF_I; | ||
405 | pinfo->coeff_d = RC_PID_COEFF_D; | ||
406 | pinfo->smoothing_shift = RC_PID_SMOOTHING_SHIFT; | ||
407 | pinfo->sharpen_factor = RC_PID_SHARPENING_FACTOR; | ||
408 | pinfo->sharpen_duration = RC_PID_SHARPENING_DURATION; | ||
409 | pinfo->norm_offset = RC_PID_NORM_OFFSET; | ||
410 | pinfo->rinfo = rinfo; | ||
411 | pinfo->oldrate = 0; | ||
412 | |||
401 | /* Sort the rates. This is optimized for the most common case (i.e. | 413 | /* Sort the rates. This is optimized for the most common case (i.e. |
402 | * almost-sorted CCK+OFDM rates). Kind of bubble-sort with reversed | 414 | * almost-sorted CCK+OFDM rates). Kind of bubble-sort with reversed |
403 | * mapping too. */ | 415 | * mapping too. */ |
404 | for (i = 0; i < sband->n_bitrates; i++) { | 416 | for (i = 0; i < sband->n_bitrates; i++) { |
405 | rinfo[i].index = i; | 417 | rinfo[i].index = i; |
406 | rinfo[i].rev_index = i; | 418 | rinfo[i].rev_index = i; |
407 | if (pinfo->fast_start) | 419 | if (RC_PID_FAST_START) |
408 | rinfo[i].diff = 0; | 420 | rinfo[i].diff = 0; |
409 | else | 421 | else |
410 | rinfo[i].diff = i * pinfo->norm_offset; | 422 | rinfo[i].diff = i * pinfo->norm_offset; |
@@ -425,19 +437,6 @@ static void *rate_control_pid_alloc(struct ieee80211_local *local) | |||
425 | break; | 437 | break; |
426 | } | 438 | } |
427 | 439 | ||
428 | pinfo->target = RC_PID_TARGET_PF; | ||
429 | pinfo->sampling_period = RC_PID_INTERVAL; | ||
430 | pinfo->coeff_p = RC_PID_COEFF_P; | ||
431 | pinfo->coeff_i = RC_PID_COEFF_I; | ||
432 | pinfo->coeff_d = RC_PID_COEFF_D; | ||
433 | pinfo->smoothing_shift = RC_PID_SMOOTHING_SHIFT; | ||
434 | pinfo->sharpen_factor = RC_PID_SHARPENING_FACTOR; | ||
435 | pinfo->sharpen_duration = RC_PID_SHARPENING_DURATION; | ||
436 | pinfo->norm_offset = RC_PID_NORM_OFFSET; | ||
437 | pinfo->fast_start = RC_PID_FAST_START; | ||
438 | pinfo->rinfo = rinfo; | ||
439 | pinfo->oldrate = 0; | ||
440 | |||
441 | #ifdef CONFIG_MAC80211_DEBUGFS | 440 | #ifdef CONFIG_MAC80211_DEBUGFS |
442 | de = &pinfo->dentries; | 441 | de = &pinfo->dentries; |
443 | de->dir = debugfs_create_dir("rc80211_pid", | 442 | de->dir = debugfs_create_dir("rc80211_pid", |
@@ -465,9 +464,6 @@ static void *rate_control_pid_alloc(struct ieee80211_local *local) | |||
465 | de->norm_offset = debugfs_create_u32("norm_offset", | 464 | de->norm_offset = debugfs_create_u32("norm_offset", |
466 | S_IRUSR | S_IWUSR, de->dir, | 465 | S_IRUSR | S_IWUSR, de->dir, |
467 | &pinfo->norm_offset); | 466 | &pinfo->norm_offset); |
468 | de->fast_start = debugfs_create_bool("fast_start", | ||
469 | S_IRUSR | S_IWUSR, de->dir, | ||
470 | &pinfo->fast_start); | ||
471 | #endif | 467 | #endif |
472 | 468 | ||
473 | return pinfo; | 469 | return pinfo; |
@@ -479,7 +475,6 @@ static void rate_control_pid_free(void *priv) | |||
479 | #ifdef CONFIG_MAC80211_DEBUGFS | 475 | #ifdef CONFIG_MAC80211_DEBUGFS |
480 | struct rc_pid_debugfs_entries *de = &pinfo->dentries; | 476 | struct rc_pid_debugfs_entries *de = &pinfo->dentries; |
481 | 477 | ||
482 | debugfs_remove(de->fast_start); | ||
483 | debugfs_remove(de->norm_offset); | 478 | debugfs_remove(de->norm_offset); |
484 | debugfs_remove(de->sharpen_duration); | 479 | debugfs_remove(de->sharpen_duration); |
485 | debugfs_remove(de->sharpen_factor); | 480 | debugfs_remove(de->sharpen_factor); |
diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c index 271cd01d57ae..dd28fb239a60 100644 --- a/net/netfilter/nf_conntrack_proto_tcp.c +++ b/net/netfilter/nf_conntrack_proto_tcp.c | |||
@@ -844,9 +844,15 @@ static int tcp_packet(struct nf_conn *ct, | |||
844 | /* Attempt to reopen a closed/aborted connection. | 844 | /* Attempt to reopen a closed/aborted connection. |
845 | * Delete this connection and look up again. */ | 845 | * Delete this connection and look up again. */ |
846 | write_unlock_bh(&tcp_lock); | 846 | write_unlock_bh(&tcp_lock); |
847 | if (del_timer(&ct->timeout)) | 847 | /* Only repeat if we can actually remove the timer. |
848 | * Destruction may already be in progress in process | ||
849 | * context and we must give it a chance to terminate. | ||
850 | */ | ||
851 | if (del_timer(&ct->timeout)) { | ||
848 | ct->timeout.function((unsigned long)ct); | 852 | ct->timeout.function((unsigned long)ct); |
849 | return -NF_REPEAT; | 853 | return -NF_REPEAT; |
854 | } | ||
855 | return -NF_DROP; | ||
850 | } | 856 | } |
851 | /* Fall through */ | 857 | /* Fall through */ |
852 | case TCP_CONNTRACK_IGNORE: | 858 | case TCP_CONNTRACK_IGNORE: |
diff --git a/net/netlabel/netlabel_cipso_v4.c b/net/netlabel/netlabel_cipso_v4.c index fdc14a0d21af..9080c61b71a5 100644 --- a/net/netlabel/netlabel_cipso_v4.c +++ b/net/netlabel/netlabel_cipso_v4.c | |||
@@ -584,12 +584,7 @@ list_start: | |||
584 | rcu_read_unlock(); | 584 | rcu_read_unlock(); |
585 | 585 | ||
586 | genlmsg_end(ans_skb, data); | 586 | genlmsg_end(ans_skb, data); |
587 | 587 | return genlmsg_reply(ans_skb, info); | |
588 | ret_val = genlmsg_reply(ans_skb, info); | ||
589 | if (ret_val != 0) | ||
590 | goto list_failure; | ||
591 | |||
592 | return 0; | ||
593 | 588 | ||
594 | list_retry: | 589 | list_retry: |
595 | /* XXX - this limit is a guesstimate */ | 590 | /* XXX - this limit is a guesstimate */ |
diff --git a/net/netlabel/netlabel_mgmt.c b/net/netlabel/netlabel_mgmt.c index 22c191267808..44be5d5261f4 100644 --- a/net/netlabel/netlabel_mgmt.c +++ b/net/netlabel/netlabel_mgmt.c | |||
@@ -386,11 +386,7 @@ static int netlbl_mgmt_listdef(struct sk_buff *skb, struct genl_info *info) | |||
386 | rcu_read_unlock(); | 386 | rcu_read_unlock(); |
387 | 387 | ||
388 | genlmsg_end(ans_skb, data); | 388 | genlmsg_end(ans_skb, data); |
389 | 389 | return genlmsg_reply(ans_skb, info); | |
390 | ret_val = genlmsg_reply(ans_skb, info); | ||
391 | if (ret_val != 0) | ||
392 | goto listdef_failure; | ||
393 | return 0; | ||
394 | 390 | ||
395 | listdef_failure_lock: | 391 | listdef_failure_lock: |
396 | rcu_read_unlock(); | 392 | rcu_read_unlock(); |
@@ -501,11 +497,7 @@ static int netlbl_mgmt_version(struct sk_buff *skb, struct genl_info *info) | |||
501 | goto version_failure; | 497 | goto version_failure; |
502 | 498 | ||
503 | genlmsg_end(ans_skb, data); | 499 | genlmsg_end(ans_skb, data); |
504 | 500 | return genlmsg_reply(ans_skb, info); | |
505 | ret_val = genlmsg_reply(ans_skb, info); | ||
506 | if (ret_val != 0) | ||
507 | goto version_failure; | ||
508 | return 0; | ||
509 | 501 | ||
510 | version_failure: | 502 | version_failure: |
511 | kfree_skb(ans_skb); | 503 | kfree_skb(ans_skb); |
diff --git a/net/netlabel/netlabel_unlabeled.c b/net/netlabel/netlabel_unlabeled.c index 52b2611a6eb6..56f80872924e 100644 --- a/net/netlabel/netlabel_unlabeled.c +++ b/net/netlabel/netlabel_unlabeled.c | |||
@@ -1107,11 +1107,7 @@ static int netlbl_unlabel_list(struct sk_buff *skb, struct genl_info *info) | |||
1107 | goto list_failure; | 1107 | goto list_failure; |
1108 | 1108 | ||
1109 | genlmsg_end(ans_skb, data); | 1109 | genlmsg_end(ans_skb, data); |
1110 | 1110 | return genlmsg_reply(ans_skb, info); | |
1111 | ret_val = genlmsg_reply(ans_skb, info); | ||
1112 | if (ret_val != 0) | ||
1113 | goto list_failure; | ||
1114 | return 0; | ||
1115 | 1111 | ||
1116 | list_failure: | 1112 | list_failure: |
1117 | kfree_skb(ans_skb); | 1113 | kfree_skb(ans_skb); |
diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c index 0c9d5a6950fe..fcdb45d1071b 100644 --- a/net/sctp/sm_statefuns.c +++ b/net/sctp/sm_statefuns.c | |||
@@ -5899,12 +5899,6 @@ static int sctp_eat_data(const struct sctp_association *asoc, | |||
5899 | return SCTP_IERROR_NO_DATA; | 5899 | return SCTP_IERROR_NO_DATA; |
5900 | } | 5900 | } |
5901 | 5901 | ||
5902 | /* If definately accepting the DATA chunk, record its TSN, otherwise | ||
5903 | * wait for renege processing. | ||
5904 | */ | ||
5905 | if (SCTP_CMD_CHUNK_ULP == deliver) | ||
5906 | sctp_add_cmd_sf(commands, SCTP_CMD_REPORT_TSN, SCTP_U32(tsn)); | ||
5907 | |||
5908 | chunk->data_accepted = 1; | 5902 | chunk->data_accepted = 1; |
5909 | 5903 | ||
5910 | /* Note: Some chunks may get overcounted (if we drop) or overcounted | 5904 | /* Note: Some chunks may get overcounted (if we drop) or overcounted |
@@ -5924,6 +5918,9 @@ static int sctp_eat_data(const struct sctp_association *asoc, | |||
5924 | * and discard the DATA chunk. | 5918 | * and discard the DATA chunk. |
5925 | */ | 5919 | */ |
5926 | if (ntohs(data_hdr->stream) >= asoc->c.sinit_max_instreams) { | 5920 | if (ntohs(data_hdr->stream) >= asoc->c.sinit_max_instreams) { |
5921 | /* Mark tsn as received even though we drop it */ | ||
5922 | sctp_add_cmd_sf(commands, SCTP_CMD_REPORT_TSN, SCTP_U32(tsn)); | ||
5923 | |||
5927 | err = sctp_make_op_error(asoc, chunk, SCTP_ERROR_INV_STRM, | 5924 | err = sctp_make_op_error(asoc, chunk, SCTP_ERROR_INV_STRM, |
5928 | &data_hdr->stream, | 5925 | &data_hdr->stream, |
5929 | sizeof(data_hdr->stream)); | 5926 | sizeof(data_hdr->stream)); |
diff --git a/net/sctp/ulpevent.c b/net/sctp/ulpevent.c index ce6cda6b6994..a1f654aea268 100644 --- a/net/sctp/ulpevent.c +++ b/net/sctp/ulpevent.c | |||
@@ -710,6 +710,11 @@ struct sctp_ulpevent *sctp_ulpevent_make_rcvmsg(struct sctp_association *asoc, | |||
710 | if (!skb) | 710 | if (!skb) |
711 | goto fail; | 711 | goto fail; |
712 | 712 | ||
713 | /* Now that all memory allocations for this chunk succeeded, we | ||
714 | * can mark it as received so the tsn_map is updated correctly. | ||
715 | */ | ||
716 | sctp_tsnmap_mark(&asoc->peer.tsn_map, ntohl(chunk->subh.data_hdr->tsn)); | ||
717 | |||
713 | /* First calculate the padding, so we don't inadvertently | 718 | /* First calculate the padding, so we don't inadvertently |
714 | * pass up the wrong length to the user. | 719 | * pass up the wrong length to the user. |
715 | * | 720 | * |
diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index b976d9ed10e4..04c41504f84c 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c | |||
@@ -277,9 +277,8 @@ static void copy_from_user_state(struct xfrm_state *x, struct xfrm_usersa_info * | |||
277 | memcpy(&x->props.saddr, &p->saddr, sizeof(x->props.saddr)); | 277 | memcpy(&x->props.saddr, &p->saddr, sizeof(x->props.saddr)); |
278 | x->props.flags = p->flags; | 278 | x->props.flags = p->flags; |
279 | 279 | ||
280 | if (!x->sel.family) | 280 | if (!x->sel.family && !(p->flags & XFRM_STATE_AF_UNSPEC)) |
281 | x->sel.family = p->family; | 281 | x->sel.family = p->family; |
282 | |||
283 | } | 282 | } |
284 | 283 | ||
285 | /* | 284 | /* |