aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Documentation/networking/ip-sysctl.txt256
-rw-r--r--Documentation/scheduler/sched-domains.txt7
-rw-r--r--Documentation/scheduler/sched-rt-group.txt4
-rw-r--r--MAINTAINERS4
-rw-r--r--arch/x86/mm/ioremap.c25
-rw-r--r--block/bsg.c3
-rw-r--r--drivers/ata/libata-acpi.c19
-rw-r--r--drivers/ata/pata_sis.c1
-rw-r--r--drivers/char/ipmi/ipmi_watchdog.c3
-rw-r--r--drivers/char/pcmcia/ipwireless/hardware.c4
-rw-r--r--drivers/char/rtc.c3
-rw-r--r--drivers/char/tpm/tpm_tis.c1
-rw-r--r--drivers/isdn/i4l/isdn_common.c4
-rw-r--r--drivers/media/video/ov7670.c4
-rw-r--r--drivers/message/fusion/mptbase.c11
-rw-r--r--drivers/message/fusion/mptspi.c9
-rw-r--r--drivers/net/irda/nsc-ircc.c1
-rw-r--r--drivers/net/irda/via-ircc.c3
-rw-r--r--drivers/net/tun.c6
-rw-r--r--drivers/net/wireless/hostap/hostap_cs.c12
-rw-r--r--drivers/net/wireless/iwlwifi/iwl-3945.c4
-rw-r--r--drivers/net/wireless/libertas/scan.c4
-rw-r--r--drivers/net/wireless/rt2x00/rt2400pci.c11
-rw-r--r--drivers/net/wireless/rt2x00/rt2500pci.c11
-rw-r--r--drivers/net/wireless/rt2x00/rt2500usb.c7
-rw-r--r--drivers/net/wireless/rt2x00/rt61pci.c9
-rw-r--r--drivers/net/wireless/rt2x00/rt73usb.c9
-rw-r--r--drivers/net/wireless/zd1211rw/zd_mac.c1
-rw-r--r--drivers/net/wireless/zd1211rw/zd_usb.c1
-rw-r--r--drivers/rapidio/rio-driver.c2
-rw-r--r--drivers/rtc/rtc-fm3130.c2
-rw-r--r--drivers/rtc/rtc-pcf8563.c1
-rw-r--r--drivers/scsi/ipr.c6
-rw-r--r--drivers/scsi/scsi_lib.c9
-rw-r--r--drivers/serial/8250.c3
-rw-r--r--drivers/ssb/driver_pcicore.c7
-rw-r--r--drivers/usb/host/ohci-au1xxx.c2
-rw-r--r--drivers/usb/host/ohci-lh7a404.c2
-rw-r--r--drivers/usb/host/ohci-s3c2410.c2
-rw-r--r--drivers/usb/host/ohci-sa1111.c2
-rw-r--r--drivers/video/fb_defio.c20
-rw-r--r--fs/cifs/cifsacl.c10
-rw-r--r--fs/cifs/inode.c20
-rw-r--r--fs/exec.c2
-rw-r--r--fs/ocfs2/dlmglue.c14
-rw-r--r--fs/xfs/xfs_log.c15
-rw-r--r--include/asm-avr32/setup.h2
-rw-r--r--include/asm-frv/system.h2
-rw-r--r--include/asm-x86/desc.h4
-rw-r--r--include/linux/sched.h42
-rw-r--r--include/linux/xfrm.h1
-rw-r--r--kernel/Makefile5
-rw-r--r--kernel/cpu.c24
-rw-r--r--kernel/cpuset.c14
-rw-r--r--kernel/kthread.c1
-rw-r--r--kernel/sched.c730
-rw-r--r--kernel/sched_clock.c13
-rw-r--r--kernel/sched_cpupri.c174
-rw-r--r--kernel/sched_cpupri.h36
-rw-r--r--kernel/sched_debug.c64
-rw-r--r--kernel/sched_fair.c413
-rw-r--r--kernel/sched_features.h7
-rw-r--r--kernel/sched_rt.c405
-rw-r--r--kernel/sched_stats.h42
-rw-r--r--kernel/sysctl.c8
-rw-r--r--mm/slub.c4
-rw-r--r--net/ipv4/fib_trie.c17
-rw-r--r--net/ipv4/netfilter/nf_nat_snmp_basic.c2
-rw-r--r--net/ipv4/tcp_probe.c2
-rw-r--r--net/ipv6/addrconf.c4
-rw-r--r--net/ipv6/exthdrs.c2
-rw-r--r--net/irda/irnetlink.c4
-rw-r--r--net/mac80211/main.c4
-rw-r--r--net/mac80211/mlme.c13
-rw-r--r--net/mac80211/rc80211_pid.h5
-rw-r--r--net/mac80211/rc80211_pid_algo.c31
-rw-r--r--net/netfilter/nf_conntrack_proto_tcp.c10
-rw-r--r--net/netlabel/netlabel_cipso_v4.c7
-rw-r--r--net/netlabel/netlabel_mgmt.c12
-rw-r--r--net/netlabel/netlabel_unlabeled.c6
-rw-r--r--net/sctp/sm_statefuns.c9
-rw-r--r--net/sctp/ulpevent.c5
-rw-r--r--net/xfrm/xfrm_user.c3
83 files changed, 1944 insertions, 739 deletions
diff --git a/Documentation/networking/ip-sysctl.txt b/Documentation/networking/ip-sysctl.txt
index 17f1f91af35c..946b66e1b652 100644
--- a/Documentation/networking/ip-sysctl.txt
+++ b/Documentation/networking/ip-sysctl.txt
@@ -148,9 +148,9 @@ tcp_available_congestion_control - STRING
148 but not loaded. 148 but not loaded.
149 149
150tcp_base_mss - INTEGER 150tcp_base_mss - INTEGER
151 The initial value of search_low to be used by Packetization Layer 151 The initial value of search_low to be used by the packetization layer
152 Path MTU Discovery (MTU probing). If MTU probing is enabled, 152 Path MTU discovery (MTU probing). If MTU probing is enabled,
153 this is the inital MSS used by the connection. 153 this is the initial MSS used by the connection.
154 154
155tcp_congestion_control - STRING 155tcp_congestion_control - STRING
156 Set the congestion control algorithm to be used for new 156 Set the congestion control algorithm to be used for new
@@ -185,10 +185,9 @@ tcp_frto - INTEGER
185 timeouts. It is particularly beneficial in wireless environments 185 timeouts. It is particularly beneficial in wireless environments
186 where packet loss is typically due to random radio interference 186 where packet loss is typically due to random radio interference
187 rather than intermediate router congestion. F-RTO is sender-side 187 rather than intermediate router congestion. F-RTO is sender-side
188 only modification. Therefore it does not require any support from 188 only modification. Therefore it does not require any support from
189 the peer, but in a typical case, however, where wireless link is 189 the peer.
190 the local access link and most of the data flows downlink, the 190
191 faraway servers should have F-RTO enabled to take advantage of it.
192 If set to 1, basic version is enabled. 2 enables SACK enhanced 191 If set to 1, basic version is enabled. 2 enables SACK enhanced
193 F-RTO if flow uses SACK. The basic version can be used also when 192 F-RTO if flow uses SACK. The basic version can be used also when
194 SACK is in use though scenario(s) with it exists where F-RTO 193 SACK is in use though scenario(s) with it exists where F-RTO
@@ -276,7 +275,7 @@ tcp_mem - vector of 3 INTEGERs: min, pressure, max
276 memory. 275 memory.
277 276
278tcp_moderate_rcvbuf - BOOLEAN 277tcp_moderate_rcvbuf - BOOLEAN
279 If set, TCP performs receive buffer autotuning, attempting to 278 If set, TCP performs receive buffer auto-tuning, attempting to
280 automatically size the buffer (no greater than tcp_rmem[2]) to 279 automatically size the buffer (no greater than tcp_rmem[2]) to
281 match the size required by the path for full throughput. Enabled by 280 match the size required by the path for full throughput. Enabled by
282 default. 281 default.
@@ -336,7 +335,7 @@ tcp_rmem - vector of 3 INTEGERs: min, default, max
336 pressure. 335 pressure.
337 Default: 8K 336 Default: 8K
338 337
339 default: default size of receive buffer used by TCP sockets. 338 default: initial size of receive buffer used by TCP sockets.
340 This value overrides net.core.rmem_default used by other protocols. 339 This value overrides net.core.rmem_default used by other protocols.
341 Default: 87380 bytes. This value results in window of 65535 with 340 Default: 87380 bytes. This value results in window of 65535 with
342 default setting of tcp_adv_win_scale and tcp_app_win:0 and a bit 341 default setting of tcp_adv_win_scale and tcp_app_win:0 and a bit
@@ -344,8 +343,10 @@ tcp_rmem - vector of 3 INTEGERs: min, default, max
344 343
345 max: maximal size of receive buffer allowed for automatically 344 max: maximal size of receive buffer allowed for automatically
346 selected receiver buffers for TCP socket. This value does not override 345 selected receiver buffers for TCP socket. This value does not override
347 net.core.rmem_max, "static" selection via SO_RCVBUF does not use this. 346 net.core.rmem_max. Calling setsockopt() with SO_RCVBUF disables
348 Default: 87380*2 bytes. 347 automatic tuning of that socket's receive buffer size, in which
348 case this value is ignored.
349 Default: between 87380B and 4MB, depending on RAM size.
349 350
350tcp_sack - BOOLEAN 351tcp_sack - BOOLEAN
351 Enable select acknowledgments (SACKS). 352 Enable select acknowledgments (SACKS).
@@ -358,7 +359,7 @@ tcp_slow_start_after_idle - BOOLEAN
358 Default: 1 359 Default: 1
359 360
360tcp_stdurg - BOOLEAN 361tcp_stdurg - BOOLEAN
361 Use the Host requirements interpretation of the TCP urg pointer field. 362 Use the Host requirements interpretation of the TCP urgent pointer field.
362 Most hosts use the older BSD interpretation, so if you turn this on 363 Most hosts use the older BSD interpretation, so if you turn this on
363 Linux might not communicate correctly with them. 364 Linux might not communicate correctly with them.
364 Default: FALSE 365 Default: FALSE
@@ -371,12 +372,12 @@ tcp_synack_retries - INTEGER
371tcp_syncookies - BOOLEAN 372tcp_syncookies - BOOLEAN
372 Only valid when the kernel was compiled with CONFIG_SYNCOOKIES 373 Only valid when the kernel was compiled with CONFIG_SYNCOOKIES
373 Send out syncookies when the syn backlog queue of a socket 374 Send out syncookies when the syn backlog queue of a socket
374 overflows. This is to prevent against the common 'syn flood attack' 375 overflows. This is to prevent against the common 'SYN flood attack'
375 Default: FALSE 376 Default: FALSE
376 377
377 Note, that syncookies is fallback facility. 378 Note, that syncookies is fallback facility.
378 It MUST NOT be used to help highly loaded servers to stand 379 It MUST NOT be used to help highly loaded servers to stand
379 against legal connection rate. If you see synflood warnings 380 against legal connection rate. If you see SYN flood warnings
380 in your logs, but investigation shows that they occur 381 in your logs, but investigation shows that they occur
381 because of overload with legal connections, you should tune 382 because of overload with legal connections, you should tune
382 another parameters until this warning disappear. 383 another parameters until this warning disappear.
@@ -386,7 +387,7 @@ tcp_syncookies - BOOLEAN
386 to use TCP extensions, can result in serious degradation 387 to use TCP extensions, can result in serious degradation
387 of some services (f.e. SMTP relaying), visible not by you, 388 of some services (f.e. SMTP relaying), visible not by you,
388 but your clients and relays, contacting you. While you see 389 but your clients and relays, contacting you. While you see
389 synflood warnings in logs not being really flooded, your server 390 SYN flood warnings in logs not being really flooded, your server
390 is seriously misconfigured. 391 is seriously misconfigured.
391 392
392tcp_syn_retries - INTEGER 393tcp_syn_retries - INTEGER
@@ -419,19 +420,21 @@ tcp_window_scaling - BOOLEAN
419 Enable window scaling as defined in RFC1323. 420 Enable window scaling as defined in RFC1323.
420 421
421tcp_wmem - vector of 3 INTEGERs: min, default, max 422tcp_wmem - vector of 3 INTEGERs: min, default, max
422 min: Amount of memory reserved for send buffers for TCP socket. 423 min: Amount of memory reserved for send buffers for TCP sockets.
423 Each TCP socket has rights to use it due to fact of its birth. 424 Each TCP socket has rights to use it due to fact of its birth.
424 Default: 4K 425 Default: 4K
425 426
426 default: Amount of memory allowed for send buffers for TCP socket 427 default: initial size of send buffer used by TCP sockets. This
427 by default. This value overrides net.core.wmem_default used 428 value overrides net.core.wmem_default used by other protocols.
428 by other protocols, it is usually lower than net.core.wmem_default. 429 It is usually lower than net.core.wmem_default.
429 Default: 16K 430 Default: 16K
430 431
431 max: Maximal amount of memory allowed for automatically selected 432 max: Maximal amount of memory allowed for automatically tuned
432 send buffers for TCP socket. This value does not override 433 send buffers for TCP sockets. This value does not override
433 net.core.wmem_max, "static" selection via SO_SNDBUF does not use this. 434 net.core.wmem_max. Calling setsockopt() with SO_SNDBUF disables
434 Default: 128K 435 automatic tuning of that socket's send buffer size, in which case
436 this value is ignored.
437 Default: between 64K and 4MB, depending on RAM size.
435 438
436tcp_workaround_signed_windows - BOOLEAN 439tcp_workaround_signed_windows - BOOLEAN
437 If set, assume no receipt of a window scaling option means the 440 If set, assume no receipt of a window scaling option means the
@@ -1060,24 +1063,193 @@ bridge-nf-filter-pppoe-tagged - BOOLEAN
1060 Default: 1 1063 Default: 1
1061 1064
1062 1065
1063UNDOCUMENTED: 1066proc/sys/net/sctp/* Variables:
1067
1068addip_enable - BOOLEAN
1069 Enable or disable extension of Dynamic Address Reconfiguration
1070 (ADD-IP) functionality specified in RFC5061. This extension provides
1071 the ability to dynamically add and remove new addresses for the SCTP
1072 associations.
1073
1074 1: Enable extension.
1075
1076 0: Disable extension.
1077
1078 Default: 0
1079
1080addip_noauth_enable - BOOLEAN
1081 Dynamic Address Reconfiguration (ADD-IP) requires the use of
1082 authentication to protect the operations of adding or removing new
1083 addresses. This requirement is mandated so that unauthorized hosts
1084 would not be able to hijack associations. However, older
1085 implementations may not have implemented this requirement while
1086 allowing the ADD-IP extension. For reasons of interoperability,
1087 we provide this variable to control the enforcement of the
1088 authentication requirement.
1089
1090 1: Allow ADD-IP extension to be used without authentication. This
1091 should only be set in a closed environment for interoperability
1092 with older implementations.
1093
1094 0: Enforce the authentication requirement
1095
1096 Default: 0
1097
1098auth_enable - BOOLEAN
1099 Enable or disable Authenticated Chunks extension. This extension
1100 provides the ability to send and receive authenticated chunks and is
1101 required for secure operation of Dynamic Address Reconfiguration
1102 (ADD-IP) extension.
1103
1104 1: Enable this extension.
1105 0: Disable this extension.
1106
1107 Default: 0
1108
1109prsctp_enable - BOOLEAN
1110 Enable or disable the Partial Reliability extension (RFC3758) which
1111 is used to notify peers that a given DATA should no longer be expected.
1112
1113 1: Enable extension
1114 0: Disable
1115
1116 Default: 1
1117
1118max_burst - INTEGER
1119 The limit of the number of new packets that can be initially sent. It
1120 controls how bursty the generated traffic can be.
1121
1122 Default: 4
1123
1124association_max_retrans - INTEGER
1125 Set the maximum number for retransmissions that an association can
1126 attempt deciding that the remote end is unreachable. If this value
1127 is exceeded, the association is terminated.
1128
1129 Default: 10
1130
1131max_init_retransmits - INTEGER
1132 The maximum number of retransmissions of INIT and COOKIE-ECHO chunks
1133 that an association will attempt before declaring the destination
1134 unreachable and terminating.
1135
1136 Default: 8
1137
1138path_max_retrans - INTEGER
1139 The maximum number of retransmissions that will be attempted on a given
1140 path. Once this threshold is exceeded, the path is considered
1141 unreachable, and new traffic will use a different path when the
1142 association is multihomed.
1143
1144 Default: 5
1145
1146rto_initial - INTEGER
1147 The initial round trip timeout value in milliseconds that will be used
1148 in calculating round trip times. This is the initial time interval
1149 for retransmissions.
1150
1151 Default: 3000
1064 1152
1065dev_weight FIXME 1153rto_max - INTEGER
1066discovery_slots FIXME 1154 The maximum value (in milliseconds) of the round trip timeout. This
1067discovery_timeout FIXME 1155 is the largest time interval that can elapse between retransmissions.
1068fast_poll_increase FIXME 1156
1069ip6_queue_maxlen FIXME 1157 Default: 60000
1070lap_keepalive_time FIXME 1158
1071lo_cong FIXME 1159rto_min - INTEGER
1072max_baud_rate FIXME 1160 The minimum value (in milliseconds) of the round trip timeout. This
1073max_dgram_qlen FIXME 1161 is the smallest time interval the can elapse between retransmissions.
1074max_noreply_time FIXME 1162
1075max_tx_data_size FIXME 1163 Default: 1000
1076max_tx_window FIXME 1164
1077min_tx_turn_time FIXME 1165hb_interval - INTEGER
1078mod_cong FIXME 1166 The interval (in milliseconds) between HEARTBEAT chunks. These chunks
1079no_cong FIXME 1167 are sent at the specified interval on idle paths to probe the state of
1080no_cong_thresh FIXME 1168 a given path between 2 associations.
1081slot_timeout FIXME 1169
1082warn_noreply_time FIXME 1170 Default: 30000
1171
1172sack_timeout - INTEGER
1173 The amount of time (in milliseconds) that the implementation will wait
1174 to send a SACK.
1175
1176 Default: 200
1177
1178valid_cookie_life - INTEGER
1179 The default lifetime of the SCTP cookie (in milliseconds). The cookie
1180 is used during association establishment.
1181
1182 Default: 60000
1183
1184cookie_preserve_enable - BOOLEAN
1185 Enable or disable the ability to extend the lifetime of the SCTP cookie
1186 that is used during the establishment phase of SCTP association
1187
1188 1: Enable cookie lifetime extension.
1189 0: Disable
1190
1191 Default: 1
1192
1193rcvbuf_policy - INTEGER
1194 Determines if the receive buffer is attributed to the socket or to
1195 association. SCTP supports the capability to create multiple
1196 associations on a single socket. When using this capability, it is
1197 possible that a single stalled association that's buffering a lot
1198 of data may block other associations from delivering their data by
1199 consuming all of the receive buffer space. To work around this,
1200 the rcvbuf_policy could be set to attribute the receiver buffer space
1201 to each association instead of the socket. This prevents the described
1202 blocking.
1203
1204 1: rcvbuf space is per association
1205 0: recbuf space is per socket
1206
1207 Default: 0
1208
1209sndbuf_policy - INTEGER
1210 Similar to rcvbuf_policy above, this applies to send buffer space.
1211
1212 1: Send buffer is tracked per association
1213 0: Send buffer is tracked per socket.
1214
1215 Default: 0
1216
1217sctp_mem - vector of 3 INTEGERs: min, pressure, max
1218 Number of pages allowed for queueing by all SCTP sockets.
1219
1220 min: Below this number of pages SCTP is not bothered about its
1221 memory appetite. When amount of memory allocated by SCTP exceeds
1222 this number, SCTP starts to moderate memory usage.
1223
1224 pressure: This value was introduced to follow format of tcp_mem.
1225
1226 max: Number of pages allowed for queueing by all SCTP sockets.
1227
1228 Default is calculated at boot time from amount of available memory.
1229
1230sctp_rmem - vector of 3 INTEGERs: min, default, max
1231 See tcp_rmem for a description.
1232
1233sctp_wmem - vector of 3 INTEGERs: min, default, max
1234 See tcp_wmem for a description.
1235
1236UNDOCUMENTED:
1083 1237
1238/proc/sys/net/core/*
1239 dev_weight FIXME
1240
1241/proc/sys/net/unix/*
1242 max_dgram_qlen FIXME
1243
1244/proc/sys/net/irda/*
1245 fast_poll_increase FIXME
1246 warn_noreply_time FIXME
1247 discovery_slots FIXME
1248 slot_timeout FIXME
1249 max_baud_rate FIXME
1250 discovery_timeout FIXME
1251 lap_keepalive_time FIXME
1252 max_noreply_time FIXME
1253 max_tx_data_size FIXME
1254 max_tx_window FIXME
1255 min_tx_turn_time FIXME
diff --git a/Documentation/scheduler/sched-domains.txt b/Documentation/scheduler/sched-domains.txt
index a9e990ab980f..373ceacc367e 100644
--- a/Documentation/scheduler/sched-domains.txt
+++ b/Documentation/scheduler/sched-domains.txt
@@ -61,10 +61,7 @@ builder by #define'ing ARCH_HASH_SCHED_DOMAIN, and exporting your
61arch_init_sched_domains function. This function will attach domains to all 61arch_init_sched_domains function. This function will attach domains to all
62CPUs using cpu_attach_domain. 62CPUs using cpu_attach_domain.
63 63
64Implementors should change the line 64The sched-domains debugging infrastructure can be enabled by enabling
65#undef SCHED_DOMAIN_DEBUG 65CONFIG_SCHED_DEBUG. This enables an error checking parse of the sched domains
66to
67#define SCHED_DOMAIN_DEBUG
68in kernel/sched.c as this enables an error checking parse of the sched domains
69which should catch most possible errors (described above). It also prints out 66which should catch most possible errors (described above). It also prints out
70the domain structure in a visual format. 67the domain structure in a visual format.
diff --git a/Documentation/scheduler/sched-rt-group.txt b/Documentation/scheduler/sched-rt-group.txt
index 14f901f639ee..3ef339f491e0 100644
--- a/Documentation/scheduler/sched-rt-group.txt
+++ b/Documentation/scheduler/sched-rt-group.txt
@@ -51,9 +51,9 @@ needs only about 3% CPU time to do so, it can do with a 0.03 * 0.005s =
510.00015s. So this group can be scheduled with a period of 0.005s and a run time 510.00015s. So this group can be scheduled with a period of 0.005s and a run time
52of 0.00015s. 52of 0.00015s.
53 53
54The remaining CPU time will be used for user input and other tass. Because 54The remaining CPU time will be used for user input and other tasks. Because
55realtime tasks have explicitly allocated the CPU time they need to perform 55realtime tasks have explicitly allocated the CPU time they need to perform
56their tasks, buffer underruns in the graphocs or audio can be eliminated. 56their tasks, buffer underruns in the graphics or audio can be eliminated.
57 57
58NOTE: the above example is not fully implemented as of yet (2.6.25). We still 58NOTE: the above example is not fully implemented as of yet (2.6.25). We still
59lack an EDF scheduler to make non-uniform periods usable. 59lack an EDF scheduler to make non-uniform periods usable.
diff --git a/MAINTAINERS b/MAINTAINERS
index 6476125363e0..56a2f678019e 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -3082,8 +3082,8 @@ L: linux-scsi@vger.kernel.org
3082S: Maintained 3082S: Maintained
3083 3083
3084OPROFILE 3084OPROFILE
3085P: Philippe Elie 3085P: Robert Richter
3086M: phil.el@wanadoo.fr 3086M: robert.richter@amd.com
3087L: oprofile-list@lists.sf.net 3087L: oprofile-list@lists.sf.net
3088S: Maintained 3088S: Maintained
3089 3089
diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c
index 2b2bb3f9b683..d1b867101e5f 100644
--- a/arch/x86/mm/ioremap.c
+++ b/arch/x86/mm/ioremap.c
@@ -300,6 +300,29 @@ void __iomem *ioremap_cache(resource_size_t phys_addr, unsigned long size)
300} 300}
301EXPORT_SYMBOL(ioremap_cache); 301EXPORT_SYMBOL(ioremap_cache);
302 302
303static void __iomem *ioremap_default(resource_size_t phys_addr,
304 unsigned long size)
305{
306 unsigned long flags;
307 void *ret;
308 int err;
309
310 /*
311 * - WB for WB-able memory and no other conflicting mappings
312 * - UC_MINUS for non-WB-able memory with no other conflicting mappings
313 * - Inherit from confliting mappings otherwise
314 */
315 err = reserve_memtype(phys_addr, phys_addr + size, -1, &flags);
316 if (err < 0)
317 return NULL;
318
319 ret = (void *) __ioremap_caller(phys_addr, size, flags,
320 __builtin_return_address(0));
321
322 free_memtype(phys_addr, phys_addr + size);
323 return (void __iomem *)ret;
324}
325
303/** 326/**
304 * iounmap - Free a IO remapping 327 * iounmap - Free a IO remapping
305 * @addr: virtual address from ioremap_* 328 * @addr: virtual address from ioremap_*
@@ -365,7 +388,7 @@ void *xlate_dev_mem_ptr(unsigned long phys)
365 if (page_is_ram(start >> PAGE_SHIFT)) 388 if (page_is_ram(start >> PAGE_SHIFT))
366 return __va(phys); 389 return __va(phys);
367 390
368 addr = (void *)ioremap(start, PAGE_SIZE); 391 addr = (void *)ioremap_default(start, PAGE_SIZE);
369 if (addr) 392 if (addr)
370 addr = (void *)((unsigned long)addr | (phys & ~PAGE_MASK)); 393 addr = (void *)((unsigned long)addr | (phys & ~PAGE_MASK));
371 394
diff --git a/block/bsg.c b/block/bsg.c
index f0b7cd343216..54d617f7df3e 100644
--- a/block/bsg.c
+++ b/block/bsg.c
@@ -709,11 +709,12 @@ static void bsg_kref_release_function(struct kref *kref)
709{ 709{
710 struct bsg_class_device *bcd = 710 struct bsg_class_device *bcd =
711 container_of(kref, struct bsg_class_device, ref); 711 container_of(kref, struct bsg_class_device, ref);
712 struct device *parent = bcd->parent;
712 713
713 if (bcd->release) 714 if (bcd->release)
714 bcd->release(bcd->parent); 715 bcd->release(bcd->parent);
715 716
716 put_device(bcd->parent); 717 put_device(parent);
717} 718}
718 719
719static int bsg_put_device(struct bsg_device *bd) 720static int bsg_put_device(struct bsg_device *bd)
diff --git a/drivers/ata/libata-acpi.c b/drivers/ata/libata-acpi.c
index 3ff8b14420d9..9330b7922f62 100644
--- a/drivers/ata/libata-acpi.c
+++ b/drivers/ata/libata-acpi.c
@@ -29,14 +29,16 @@
29enum { 29enum {
30 ATA_ACPI_FILTER_SETXFER = 1 << 0, 30 ATA_ACPI_FILTER_SETXFER = 1 << 0,
31 ATA_ACPI_FILTER_LOCK = 1 << 1, 31 ATA_ACPI_FILTER_LOCK = 1 << 1,
32 ATA_ACPI_FILTER_DIPM = 1 << 2,
32 33
33 ATA_ACPI_FILTER_DEFAULT = ATA_ACPI_FILTER_SETXFER | 34 ATA_ACPI_FILTER_DEFAULT = ATA_ACPI_FILTER_SETXFER |
34 ATA_ACPI_FILTER_LOCK, 35 ATA_ACPI_FILTER_LOCK |
36 ATA_ACPI_FILTER_DIPM,
35}; 37};
36 38
37static unsigned int ata_acpi_gtf_filter = ATA_ACPI_FILTER_DEFAULT; 39static unsigned int ata_acpi_gtf_filter = ATA_ACPI_FILTER_DEFAULT;
38module_param_named(acpi_gtf_filter, ata_acpi_gtf_filter, int, 0644); 40module_param_named(acpi_gtf_filter, ata_acpi_gtf_filter, int, 0644);
39MODULE_PARM_DESC(acpi_gtf_filter, "filter mask for ACPI _GTF commands, set to filter out (0x1=set xfermode, 0x2=lock/freeze lock)"); 41MODULE_PARM_DESC(acpi_gtf_filter, "filter mask for ACPI _GTF commands, set to filter out (0x1=set xfermode, 0x2=lock/freeze lock, 0x4=DIPM)");
40 42
41#define NO_PORT_MULT 0xffff 43#define NO_PORT_MULT 0xffff
42#define SATA_ADR(root, pmp) (((root) << 16) | (pmp)) 44#define SATA_ADR(root, pmp) (((root) << 16) | (pmp))
@@ -195,6 +197,10 @@ static void ata_acpi_handle_hotplug(struct ata_port *ap, struct ata_device *dev,
195 /* This device does not support hotplug */ 197 /* This device does not support hotplug */
196 return; 198 return;
197 199
200 if (event == ACPI_NOTIFY_BUS_CHECK ||
201 event == ACPI_NOTIFY_DEVICE_CHECK)
202 status = acpi_evaluate_integer(handle, "_STA", NULL, &sta);
203
198 spin_lock_irqsave(ap->lock, flags); 204 spin_lock_irqsave(ap->lock, flags);
199 205
200 switch (event) { 206 switch (event) {
@@ -202,7 +208,6 @@ static void ata_acpi_handle_hotplug(struct ata_port *ap, struct ata_device *dev,
202 case ACPI_NOTIFY_DEVICE_CHECK: 208 case ACPI_NOTIFY_DEVICE_CHECK:
203 ata_ehi_push_desc(ehi, "ACPI event"); 209 ata_ehi_push_desc(ehi, "ACPI event");
204 210
205 status = acpi_evaluate_integer(handle, "_STA", NULL, &sta);
206 if (ACPI_FAILURE(status)) { 211 if (ACPI_FAILURE(status)) {
207 ata_port_printk(ap, KERN_ERR, 212 ata_port_printk(ap, KERN_ERR,
208 "acpi: failed to determine bay status (0x%x)\n", 213 "acpi: failed to determine bay status (0x%x)\n",
@@ -690,6 +695,14 @@ static int ata_acpi_filter_tf(const struct ata_taskfile *tf,
690 return 1; 695 return 1;
691 } 696 }
692 697
698 if (ata_acpi_gtf_filter & ATA_ACPI_FILTER_DIPM) {
699 /* inhibit enabling DIPM */
700 if (tf->command == ATA_CMD_SET_FEATURES &&
701 tf->feature == SETFEATURES_SATA_ENABLE &&
702 tf->nsect == SATA_DIPM)
703 return 1;
704 }
705
693 return 0; 706 return 0;
694} 707}
695 708
diff --git a/drivers/ata/pata_sis.c b/drivers/ata/pata_sis.c
index e82c66e8d31b..26345d7b531c 100644
--- a/drivers/ata/pata_sis.c
+++ b/drivers/ata/pata_sis.c
@@ -56,6 +56,7 @@ static const struct sis_laptop sis_laptop[] = {
56 { 0x5513, 0x1043, 0x1107 }, /* ASUS A6K */ 56 { 0x5513, 0x1043, 0x1107 }, /* ASUS A6K */
57 { 0x5513, 0x1734, 0x105F }, /* FSC Amilo A1630 */ 57 { 0x5513, 0x1734, 0x105F }, /* FSC Amilo A1630 */
58 { 0x5513, 0x1071, 0x8640 }, /* EasyNote K5305 */ 58 { 0x5513, 0x1071, 0x8640 }, /* EasyNote K5305 */
59 { 0x5513, 0x1039, 0x5513 }, /* Targa Visionary 1000 */
59 /* end marker */ 60 /* end marker */
60 { 0, } 61 { 0, }
61}; 62};
diff --git a/drivers/char/ipmi/ipmi_watchdog.c b/drivers/char/ipmi/ipmi_watchdog.c
index 1b9a87047817..0e6df289cb46 100644
--- a/drivers/char/ipmi/ipmi_watchdog.c
+++ b/drivers/char/ipmi/ipmi_watchdog.c
@@ -755,9 +755,8 @@ static ssize_t ipmi_write(struct file *file,
755 rv = ipmi_heartbeat(); 755 rv = ipmi_heartbeat();
756 if (rv) 756 if (rv)
757 return rv; 757 return rv;
758 return 1;
759 } 758 }
760 return 0; 759 return len;
761} 760}
762 761
763static ssize_t ipmi_read(struct file *file, 762static ssize_t ipmi_read(struct file *file,
diff --git a/drivers/char/pcmcia/ipwireless/hardware.c b/drivers/char/pcmcia/ipwireless/hardware.c
index ba6340ae98af..929101ecbae2 100644
--- a/drivers/char/pcmcia/ipwireless/hardware.c
+++ b/drivers/char/pcmcia/ipwireless/hardware.c
@@ -590,8 +590,10 @@ static struct ipw_rx_packet *pool_allocate(struct ipw_hardware *hw,
590 packet = kmalloc(sizeof(struct ipw_rx_packet) + 590 packet = kmalloc(sizeof(struct ipw_rx_packet) +
591 old_packet->length + minimum_free_space, 591 old_packet->length + minimum_free_space,
592 GFP_ATOMIC); 592 GFP_ATOMIC);
593 if (!packet) 593 if (!packet) {
594 kfree(old_packet);
594 return NULL; 595 return NULL;
596 }
595 memcpy(packet, old_packet, 597 memcpy(packet, old_packet,
596 sizeof(struct ipw_rx_packet) 598 sizeof(struct ipw_rx_packet)
597 + old_packet->length); 599 + old_packet->length);
diff --git a/drivers/char/rtc.c b/drivers/char/rtc.c
index 5f80a9dff573..909cac93fa2a 100644
--- a/drivers/char/rtc.c
+++ b/drivers/char/rtc.c
@@ -678,12 +678,13 @@ static int rtc_do_ioctl(unsigned int cmd, unsigned long arg, int kernel)
678 if (arg != (1<<tmp)) 678 if (arg != (1<<tmp))
679 return -EINVAL; 679 return -EINVAL;
680 680
681 rtc_freq = arg;
682
681 spin_lock_irqsave(&rtc_lock, flags); 683 spin_lock_irqsave(&rtc_lock, flags);
682 if (hpet_set_periodic_freq(arg)) { 684 if (hpet_set_periodic_freq(arg)) {
683 spin_unlock_irqrestore(&rtc_lock, flags); 685 spin_unlock_irqrestore(&rtc_lock, flags);
684 return 0; 686 return 0;
685 } 687 }
686 rtc_freq = arg;
687 688
688 val = CMOS_READ(RTC_FREQ_SELECT) & 0xf0; 689 val = CMOS_READ(RTC_FREQ_SELECT) & 0xf0;
689 val |= (16 - tmp); 690 val |= (16 - tmp);
diff --git a/drivers/char/tpm/tpm_tis.c b/drivers/char/tpm/tpm_tis.c
index 13a4bdd4e4d6..c7a977bc03e8 100644
--- a/drivers/char/tpm/tpm_tis.c
+++ b/drivers/char/tpm/tpm_tis.c
@@ -623,6 +623,7 @@ static struct pnp_device_id tpm_pnp_tbl[] __devinitdata = {
623 {"IFX0102", 0}, /* Infineon */ 623 {"IFX0102", 0}, /* Infineon */
624 {"BCM0101", 0}, /* Broadcom */ 624 {"BCM0101", 0}, /* Broadcom */
625 {"NSC1200", 0}, /* National */ 625 {"NSC1200", 0}, /* National */
626 {"ICO0102", 0}, /* Intel */
626 /* Add new here */ 627 /* Add new here */
627 {"", 0}, /* User Specified */ 628 {"", 0}, /* User Specified */
628 {"", 0} /* Terminator */ 629 {"", 0} /* Terminator */
diff --git a/drivers/isdn/i4l/isdn_common.c b/drivers/isdn/i4l/isdn_common.c
index 0f3c66de69bc..8d8c6b736167 100644
--- a/drivers/isdn/i4l/isdn_common.c
+++ b/drivers/isdn/i4l/isdn_common.c
@@ -1977,8 +1977,10 @@ isdn_writebuf_stub(int drvidx, int chan, const u_char __user * buf, int len)
1977 if (!skb) 1977 if (!skb)
1978 return -ENOMEM; 1978 return -ENOMEM;
1979 skb_reserve(skb, hl); 1979 skb_reserve(skb, hl);
1980 if (copy_from_user(skb_put(skb, len), buf, len)) 1980 if (copy_from_user(skb_put(skb, len), buf, len)) {
1981 dev_kfree_skb(skb);
1981 return -EFAULT; 1982 return -EFAULT;
1983 }
1982 ret = dev->drv[drvidx]->interface->writebuf_skb(drvidx, chan, 1, skb); 1984 ret = dev->drv[drvidx]->interface->writebuf_skb(drvidx, chan, 1, skb);
1983 if (ret <= 0) 1985 if (ret <= 0)
1984 dev_kfree_skb(skb); 1986 dev_kfree_skb(skb);
diff --git a/drivers/media/video/ov7670.c b/drivers/media/video/ov7670.c
index 2bc6bdc9c1f2..d7bfd30f74a9 100644
--- a/drivers/media/video/ov7670.c
+++ b/drivers/media/video/ov7670.c
@@ -406,8 +406,10 @@ static int ov7670_read(struct i2c_client *c, unsigned char reg,
406 int ret; 406 int ret;
407 407
408 ret = i2c_smbus_read_byte_data(c, reg); 408 ret = i2c_smbus_read_byte_data(c, reg);
409 if (ret >= 0) 409 if (ret >= 0) {
410 *value = (unsigned char) ret; 410 *value = (unsigned char) ret;
411 ret = 0;
412 }
411 return ret; 413 return ret;
412} 414}
413 415
diff --git a/drivers/message/fusion/mptbase.c b/drivers/message/fusion/mptbase.c
index db3c892f87fb..d40d6d15ae20 100644
--- a/drivers/message/fusion/mptbase.c
+++ b/drivers/message/fusion/mptbase.c
@@ -1686,9 +1686,14 @@ mpt_attach(struct pci_dev *pdev, const struct pci_device_id *id)
1686 ioc->bus_type = SAS; 1686 ioc->bus_type = SAS;
1687 } 1687 }
1688 1688
1689 if (ioc->bus_type == SAS && mpt_msi_enable == -1) 1689 if (mpt_msi_enable == -1) {
1690 ioc->msi_enable = 1; 1690 /* Enable on SAS, disable on FC and SPI */
1691 else 1691 if (ioc->bus_type == SAS)
1692 ioc->msi_enable = 1;
1693 else
1694 ioc->msi_enable = 0;
1695 } else
1696 /* follow flag: 0 - disable; 1 - enable */
1692 ioc->msi_enable = mpt_msi_enable; 1697 ioc->msi_enable = mpt_msi_enable;
1693 1698
1694 if (ioc->errata_flag_1064) 1699 if (ioc->errata_flag_1064)
diff --git a/drivers/message/fusion/mptspi.c b/drivers/message/fusion/mptspi.c
index 25bcfcf36f2e..1effca4e40e1 100644
--- a/drivers/message/fusion/mptspi.c
+++ b/drivers/message/fusion/mptspi.c
@@ -1266,13 +1266,18 @@ mptspi_dv_renegotiate(struct _MPT_SCSI_HOST *hd)
1266static int 1266static int
1267mptspi_ioc_reset(MPT_ADAPTER *ioc, int reset_phase) 1267mptspi_ioc_reset(MPT_ADAPTER *ioc, int reset_phase)
1268{ 1268{
1269 struct _MPT_SCSI_HOST *hd = shost_priv(ioc->sh);
1270 int rc; 1269 int rc;
1271 1270
1272 rc = mptscsih_ioc_reset(ioc, reset_phase); 1271 rc = mptscsih_ioc_reset(ioc, reset_phase);
1273 1272
1274 if (reset_phase == MPT_IOC_POST_RESET) 1273 /* only try to do a renegotiation if we're properly set up
1274 * if we get an ioc fault on bringup, ioc->sh will be NULL */
1275 if (reset_phase == MPT_IOC_POST_RESET &&
1276 ioc->sh) {
1277 struct _MPT_SCSI_HOST *hd = shost_priv(ioc->sh);
1278
1275 mptspi_dv_renegotiate(hd); 1279 mptspi_dv_renegotiate(hd);
1280 }
1276 1281
1277 return rc; 1282 return rc;
1278} 1283}
diff --git a/drivers/net/irda/nsc-ircc.c b/drivers/net/irda/nsc-ircc.c
index a7714da7c283..effc1ce8179a 100644
--- a/drivers/net/irda/nsc-ircc.c
+++ b/drivers/net/irda/nsc-ircc.c
@@ -152,6 +152,7 @@ static chipio_t pnp_info;
152static const struct pnp_device_id nsc_ircc_pnp_table[] = { 152static const struct pnp_device_id nsc_ircc_pnp_table[] = {
153 { .id = "NSC6001", .driver_data = 0 }, 153 { .id = "NSC6001", .driver_data = 0 },
154 { .id = "IBM0071", .driver_data = 0 }, 154 { .id = "IBM0071", .driver_data = 0 },
155 { .id = "HWPC224", .driver_data = 0 },
155 { } 156 { }
156}; 157};
157 158
diff --git a/drivers/net/irda/via-ircc.c b/drivers/net/irda/via-ircc.c
index 58e128784585..04ad3573b159 100644
--- a/drivers/net/irda/via-ircc.c
+++ b/drivers/net/irda/via-ircc.c
@@ -1546,6 +1546,7 @@ static int via_ircc_net_open(struct net_device *dev)
1546 IRDA_WARNING("%s, unable to allocate dma2=%d\n", 1546 IRDA_WARNING("%s, unable to allocate dma2=%d\n",
1547 driver_name, self->io.dma2); 1547 driver_name, self->io.dma2);
1548 free_irq(self->io.irq, self); 1548 free_irq(self->io.irq, self);
1549 free_dma(self->io.dma);
1549 return -EAGAIN; 1550 return -EAGAIN;
1550 } 1551 }
1551 } 1552 }
@@ -1606,6 +1607,8 @@ static int via_ircc_net_close(struct net_device *dev)
1606 EnAllInt(iobase, OFF); 1607 EnAllInt(iobase, OFF);
1607 free_irq(self->io.irq, dev); 1608 free_irq(self->io.irq, dev);
1608 free_dma(self->io.dma); 1609 free_dma(self->io.dma);
1610 if (self->io.dma2 != self->io.dma)
1611 free_dma(self->io.dma2);
1609 1612
1610 return 0; 1613 return 0;
1611} 1614}
diff --git a/drivers/net/tun.c b/drivers/net/tun.c
index 7ab94c825b57..b9018bfa0a97 100644
--- a/drivers/net/tun.c
+++ b/drivers/net/tun.c
@@ -602,6 +602,12 @@ static int tun_set_iff(struct net *net, struct file *file, struct ifreq *ifr)
602 tun->attached = 1; 602 tun->attached = 1;
603 get_net(dev_net(tun->dev)); 603 get_net(dev_net(tun->dev));
604 604
605 /* Make sure persistent devices do not get stuck in
606 * xoff state.
607 */
608 if (netif_running(tun->dev))
609 netif_wake_queue(tun->dev);
610
605 strcpy(ifr->ifr_name, tun->dev->name); 611 strcpy(ifr->ifr_name, tun->dev->name);
606 return 0; 612 return 0;
607 613
diff --git a/drivers/net/wireless/hostap/hostap_cs.c b/drivers/net/wireless/hostap/hostap_cs.c
index 80039a0ae027..3b4e55cf33cd 100644
--- a/drivers/net/wireless/hostap/hostap_cs.c
+++ b/drivers/net/wireless/hostap/hostap_cs.c
@@ -777,8 +777,10 @@ static int hostap_cs_suspend(struct pcmcia_device *link)
777 int dev_open = 0; 777 int dev_open = 0;
778 struct hostap_interface *iface = NULL; 778 struct hostap_interface *iface = NULL;
779 779
780 if (dev) 780 if (!dev)
781 iface = netdev_priv(dev); 781 return -ENODEV;
782
783 iface = netdev_priv(dev);
782 784
783 PDEBUG(DEBUG_EXTRA, "%s: CS_EVENT_PM_SUSPEND\n", dev_info); 785 PDEBUG(DEBUG_EXTRA, "%s: CS_EVENT_PM_SUSPEND\n", dev_info);
784 if (iface && iface->local) 786 if (iface && iface->local)
@@ -798,8 +800,10 @@ static int hostap_cs_resume(struct pcmcia_device *link)
798 int dev_open = 0; 800 int dev_open = 0;
799 struct hostap_interface *iface = NULL; 801 struct hostap_interface *iface = NULL;
800 802
801 if (dev) 803 if (!dev)
802 iface = netdev_priv(dev); 804 return -ENODEV;
805
806 iface = netdev_priv(dev);
803 807
804 PDEBUG(DEBUG_EXTRA, "%s: CS_EVENT_PM_RESUME\n", dev_info); 808 PDEBUG(DEBUG_EXTRA, "%s: CS_EVENT_PM_RESUME\n", dev_info);
805 809
diff --git a/drivers/net/wireless/iwlwifi/iwl-3945.c b/drivers/net/wireless/iwlwifi/iwl-3945.c
index f5387a7a76c0..55ac850744b3 100644
--- a/drivers/net/wireless/iwlwifi/iwl-3945.c
+++ b/drivers/net/wireless/iwlwifi/iwl-3945.c
@@ -449,7 +449,7 @@ static void iwl3945_dbg_report_frame(struct iwl3945_priv *priv,
449 449
450 if (print_summary) { 450 if (print_summary) {
451 char *title; 451 char *title;
452 u32 rate; 452 int rate;
453 453
454 if (hundred) 454 if (hundred)
455 title = "100Frames"; 455 title = "100Frames";
@@ -487,7 +487,7 @@ static void iwl3945_dbg_report_frame(struct iwl3945_priv *priv,
487 * but you can hack it to show more, if you'd like to. */ 487 * but you can hack it to show more, if you'd like to. */
488 if (dataframe) 488 if (dataframe)
489 IWL_DEBUG_RX("%s: mhd=0x%04x, dst=0x%02x, " 489 IWL_DEBUG_RX("%s: mhd=0x%04x, dst=0x%02x, "
490 "len=%u, rssi=%d, chnl=%d, rate=%u, \n", 490 "len=%u, rssi=%d, chnl=%d, rate=%d, \n",
491 title, fc, header->addr1[5], 491 title, fc, header->addr1[5],
492 length, rssi, channel, rate); 492 length, rssi, channel, rate);
493 else { 493 else {
diff --git a/drivers/net/wireless/libertas/scan.c b/drivers/net/wireless/libertas/scan.c
index d448c9702a0f..387d4878af2f 100644
--- a/drivers/net/wireless/libertas/scan.c
+++ b/drivers/net/wireless/libertas/scan.c
@@ -567,11 +567,11 @@ static int lbs_process_bss(struct bss_descriptor *bss,
567 pos += 8; 567 pos += 8;
568 568
569 /* beacon interval is 2 bytes long */ 569 /* beacon interval is 2 bytes long */
570 bss->beaconperiod = le16_to_cpup((void *) pos); 570 bss->beaconperiod = get_unaligned_le16(pos);
571 pos += 2; 571 pos += 2;
572 572
573 /* capability information is 2 bytes long */ 573 /* capability information is 2 bytes long */
574 bss->capability = le16_to_cpup((void *) pos); 574 bss->capability = get_unaligned_le16(pos);
575 lbs_deb_scan("process_bss: capabilities 0x%04x\n", bss->capability); 575 lbs_deb_scan("process_bss: capabilities 0x%04x\n", bss->capability);
576 pos += 2; 576 pos += 2;
577 577
diff --git a/drivers/net/wireless/rt2x00/rt2400pci.c b/drivers/net/wireless/rt2x00/rt2400pci.c
index 560b9c73c0b9..b36ed1c6c746 100644
--- a/drivers/net/wireless/rt2x00/rt2400pci.c
+++ b/drivers/net/wireless/rt2x00/rt2400pci.c
@@ -731,6 +731,17 @@ static int rt2400pci_init_registers(struct rt2x00_dev *rt2x00dev)
731 (rt2x00dev->rx->data_size / 128)); 731 (rt2x00dev->rx->data_size / 128));
732 rt2x00pci_register_write(rt2x00dev, CSR9, reg); 732 rt2x00pci_register_write(rt2x00dev, CSR9, reg);
733 733
734 rt2x00pci_register_read(rt2x00dev, CSR14, &reg);
735 rt2x00_set_field32(&reg, CSR14_TSF_COUNT, 0);
736 rt2x00_set_field32(&reg, CSR14_TSF_SYNC, 0);
737 rt2x00_set_field32(&reg, CSR14_TBCN, 0);
738 rt2x00_set_field32(&reg, CSR14_TCFP, 0);
739 rt2x00_set_field32(&reg, CSR14_TATIMW, 0);
740 rt2x00_set_field32(&reg, CSR14_BEACON_GEN, 0);
741 rt2x00_set_field32(&reg, CSR14_CFP_COUNT_PRELOAD, 0);
742 rt2x00_set_field32(&reg, CSR14_TBCM_PRELOAD, 0);
743 rt2x00pci_register_write(rt2x00dev, CSR14, reg);
744
734 rt2x00pci_register_write(rt2x00dev, CNT3, 0x3f080000); 745 rt2x00pci_register_write(rt2x00dev, CNT3, 0x3f080000);
735 746
736 rt2x00pci_register_read(rt2x00dev, ARCSR0, &reg); 747 rt2x00pci_register_read(rt2x00dev, ARCSR0, &reg);
diff --git a/drivers/net/wireless/rt2x00/rt2500pci.c b/drivers/net/wireless/rt2x00/rt2500pci.c
index a5ed54b69262..f7731fb82555 100644
--- a/drivers/net/wireless/rt2x00/rt2500pci.c
+++ b/drivers/net/wireless/rt2x00/rt2500pci.c
@@ -824,6 +824,17 @@ static int rt2500pci_init_registers(struct rt2x00_dev *rt2x00dev)
824 rt2x00_set_field32(&reg, CSR11_CW_SELECT, 0); 824 rt2x00_set_field32(&reg, CSR11_CW_SELECT, 0);
825 rt2x00pci_register_write(rt2x00dev, CSR11, reg); 825 rt2x00pci_register_write(rt2x00dev, CSR11, reg);
826 826
827 rt2x00pci_register_read(rt2x00dev, CSR14, &reg);
828 rt2x00_set_field32(&reg, CSR14_TSF_COUNT, 0);
829 rt2x00_set_field32(&reg, CSR14_TSF_SYNC, 0);
830 rt2x00_set_field32(&reg, CSR14_TBCN, 0);
831 rt2x00_set_field32(&reg, CSR14_TCFP, 0);
832 rt2x00_set_field32(&reg, CSR14_TATIMW, 0);
833 rt2x00_set_field32(&reg, CSR14_BEACON_GEN, 0);
834 rt2x00_set_field32(&reg, CSR14_CFP_COUNT_PRELOAD, 0);
835 rt2x00_set_field32(&reg, CSR14_TBCM_PRELOAD, 0);
836 rt2x00pci_register_write(rt2x00dev, CSR14, reg);
837
827 rt2x00pci_register_write(rt2x00dev, CNT3, 0); 838 rt2x00pci_register_write(rt2x00dev, CNT3, 0);
828 839
829 rt2x00pci_register_read(rt2x00dev, TXCSR8, &reg); 840 rt2x00pci_register_read(rt2x00dev, TXCSR8, &reg);
diff --git a/drivers/net/wireless/rt2x00/rt2500usb.c b/drivers/net/wireless/rt2x00/rt2500usb.c
index 61e59c17a60a..d90512f97b39 100644
--- a/drivers/net/wireless/rt2x00/rt2500usb.c
+++ b/drivers/net/wireless/rt2x00/rt2500usb.c
@@ -801,6 +801,13 @@ static int rt2500usb_init_registers(struct rt2x00_dev *rt2x00dev)
801 rt2x00_set_field16(&reg, TXRX_CSR8_BBP_ID1_VALID, 0); 801 rt2x00_set_field16(&reg, TXRX_CSR8_BBP_ID1_VALID, 0);
802 rt2500usb_register_write(rt2x00dev, TXRX_CSR8, reg); 802 rt2500usb_register_write(rt2x00dev, TXRX_CSR8, reg);
803 803
804 rt2500usb_register_read(rt2x00dev, TXRX_CSR19, &reg);
805 rt2x00_set_field16(&reg, TXRX_CSR19_TSF_COUNT, 0);
806 rt2x00_set_field16(&reg, TXRX_CSR19_TSF_SYNC, 0);
807 rt2x00_set_field16(&reg, TXRX_CSR19_TBCN, 0);
808 rt2x00_set_field16(&reg, TXRX_CSR19_BEACON_GEN, 0);
809 rt2500usb_register_write(rt2x00dev, TXRX_CSR19, reg);
810
804 rt2500usb_register_write(rt2x00dev, TXRX_CSR21, 0xe78f); 811 rt2500usb_register_write(rt2x00dev, TXRX_CSR21, 0xe78f);
805 rt2500usb_register_write(rt2x00dev, MAC_CSR9, 0xff1d); 812 rt2500usb_register_write(rt2x00dev, MAC_CSR9, 0xff1d);
806 813
diff --git a/drivers/net/wireless/rt2x00/rt61pci.c b/drivers/net/wireless/rt2x00/rt61pci.c
index 14bc7b281659..c3afb5cbe807 100644
--- a/drivers/net/wireless/rt2x00/rt61pci.c
+++ b/drivers/net/wireless/rt2x00/rt61pci.c
@@ -1201,6 +1201,15 @@ static int rt61pci_init_registers(struct rt2x00_dev *rt2x00dev)
1201 rt2x00_set_field32(&reg, TXRX_CSR8_ACK_CTS_54MBS, 42); 1201 rt2x00_set_field32(&reg, TXRX_CSR8_ACK_CTS_54MBS, 42);
1202 rt2x00pci_register_write(rt2x00dev, TXRX_CSR8, reg); 1202 rt2x00pci_register_write(rt2x00dev, TXRX_CSR8, reg);
1203 1203
1204 rt2x00pci_register_read(rt2x00dev, TXRX_CSR9, &reg);
1205 rt2x00_set_field32(&reg, TXRX_CSR9_BEACON_INTERVAL, 0);
1206 rt2x00_set_field32(&reg, TXRX_CSR9_TSF_TICKING, 0);
1207 rt2x00_set_field32(&reg, TXRX_CSR9_TSF_SYNC, 0);
1208 rt2x00_set_field32(&reg, TXRX_CSR9_TBTT_ENABLE, 0);
1209 rt2x00_set_field32(&reg, TXRX_CSR9_BEACON_GEN, 0);
1210 rt2x00_set_field32(&reg, TXRX_CSR9_TIMESTAMP_COMPENSATE, 0);
1211 rt2x00pci_register_write(rt2x00dev, TXRX_CSR9, reg);
1212
1204 rt2x00pci_register_write(rt2x00dev, TXRX_CSR15, 0x0000000f); 1213 rt2x00pci_register_write(rt2x00dev, TXRX_CSR15, 0x0000000f);
1205 1214
1206 rt2x00pci_register_write(rt2x00dev, MAC_CSR6, 0x00000fff); 1215 rt2x00pci_register_write(rt2x00dev, MAC_CSR6, 0x00000fff);
diff --git a/drivers/net/wireless/rt2x00/rt73usb.c b/drivers/net/wireless/rt2x00/rt73usb.c
index 83cc0147f698..46e9e081fbf1 100644
--- a/drivers/net/wireless/rt2x00/rt73usb.c
+++ b/drivers/net/wireless/rt2x00/rt73usb.c
@@ -1006,6 +1006,15 @@ static int rt73usb_init_registers(struct rt2x00_dev *rt2x00dev)
1006 rt2x00_set_field32(&reg, TXRX_CSR8_ACK_CTS_54MBS, 42); 1006 rt2x00_set_field32(&reg, TXRX_CSR8_ACK_CTS_54MBS, 42);
1007 rt73usb_register_write(rt2x00dev, TXRX_CSR8, reg); 1007 rt73usb_register_write(rt2x00dev, TXRX_CSR8, reg);
1008 1008
1009 rt73usb_register_read(rt2x00dev, TXRX_CSR9, &reg);
1010 rt2x00_set_field32(&reg, TXRX_CSR9_BEACON_INTERVAL, 0);
1011 rt2x00_set_field32(&reg, TXRX_CSR9_TSF_TICKING, 0);
1012 rt2x00_set_field32(&reg, TXRX_CSR9_TSF_SYNC, 0);
1013 rt2x00_set_field32(&reg, TXRX_CSR9_TBTT_ENABLE, 0);
1014 rt2x00_set_field32(&reg, TXRX_CSR9_BEACON_GEN, 0);
1015 rt2x00_set_field32(&reg, TXRX_CSR9_TIMESTAMP_COMPENSATE, 0);
1016 rt73usb_register_write(rt2x00dev, TXRX_CSR9, reg);
1017
1009 rt73usb_register_write(rt2x00dev, TXRX_CSR15, 0x0000000f); 1018 rt73usb_register_write(rt2x00dev, TXRX_CSR15, 0x0000000f);
1010 1019
1011 rt73usb_register_read(rt2x00dev, MAC_CSR6, &reg); 1020 rt73usb_register_read(rt2x00dev, MAC_CSR6, &reg);
diff --git a/drivers/net/wireless/zd1211rw/zd_mac.c b/drivers/net/wireless/zd1211rw/zd_mac.c
index 418606ac1c3b..694e95d35fd4 100644
--- a/drivers/net/wireless/zd1211rw/zd_mac.c
+++ b/drivers/net/wireless/zd1211rw/zd_mac.c
@@ -765,6 +765,7 @@ static void zd_op_remove_interface(struct ieee80211_hw *hw,
765{ 765{
766 struct zd_mac *mac = zd_hw_mac(hw); 766 struct zd_mac *mac = zd_hw_mac(hw);
767 mac->type = IEEE80211_IF_TYPE_INVALID; 767 mac->type = IEEE80211_IF_TYPE_INVALID;
768 zd_set_beacon_interval(&mac->chip, 0);
768 zd_write_mac_addr(&mac->chip, NULL); 769 zd_write_mac_addr(&mac->chip, NULL);
769} 770}
770 771
diff --git a/drivers/net/wireless/zd1211rw/zd_usb.c b/drivers/net/wireless/zd1211rw/zd_usb.c
index 8941f5eb96c2..6cdad9764604 100644
--- a/drivers/net/wireless/zd1211rw/zd_usb.c
+++ b/drivers/net/wireless/zd1211rw/zd_usb.c
@@ -64,6 +64,7 @@ static struct usb_device_id usb_ids[] = {
64 { USB_DEVICE(0x079b, 0x0062), .driver_info = DEVICE_ZD1211B }, 64 { USB_DEVICE(0x079b, 0x0062), .driver_info = DEVICE_ZD1211B },
65 { USB_DEVICE(0x1582, 0x6003), .driver_info = DEVICE_ZD1211B }, 65 { USB_DEVICE(0x1582, 0x6003), .driver_info = DEVICE_ZD1211B },
66 { USB_DEVICE(0x050d, 0x705c), .driver_info = DEVICE_ZD1211B }, 66 { USB_DEVICE(0x050d, 0x705c), .driver_info = DEVICE_ZD1211B },
67 { USB_DEVICE(0x083a, 0xe506), .driver_info = DEVICE_ZD1211B },
67 { USB_DEVICE(0x083a, 0x4505), .driver_info = DEVICE_ZD1211B }, 68 { USB_DEVICE(0x083a, 0x4505), .driver_info = DEVICE_ZD1211B },
68 { USB_DEVICE(0x0471, 0x1236), .driver_info = DEVICE_ZD1211B }, 69 { USB_DEVICE(0x0471, 0x1236), .driver_info = DEVICE_ZD1211B },
69 { USB_DEVICE(0x13b1, 0x0024), .driver_info = DEVICE_ZD1211B }, 70 { USB_DEVICE(0x13b1, 0x0024), .driver_info = DEVICE_ZD1211B },
diff --git a/drivers/rapidio/rio-driver.c b/drivers/rapidio/rio-driver.c
index 3ce9f3defc12..956d3e79f6aa 100644
--- a/drivers/rapidio/rio-driver.c
+++ b/drivers/rapidio/rio-driver.c
@@ -101,8 +101,8 @@ static int rio_device_probe(struct device *dev)
101 if (error >= 0) { 101 if (error >= 0) {
102 rdev->driver = rdrv; 102 rdev->driver = rdrv;
103 error = 0; 103 error = 0;
104 } else
104 rio_dev_put(rdev); 105 rio_dev_put(rdev);
105 }
106 } 106 }
107 return error; 107 return error;
108} 108}
diff --git a/drivers/rtc/rtc-fm3130.c b/drivers/rtc/rtc-fm3130.c
index 11644c8fca82..abfdfcbaa059 100644
--- a/drivers/rtc/rtc-fm3130.c
+++ b/drivers/rtc/rtc-fm3130.c
@@ -55,7 +55,7 @@ struct fm3130 {
55 int alarm; 55 int alarm;
56}; 56};
57static const struct i2c_device_id fm3130_id[] = { 57static const struct i2c_device_id fm3130_id[] = {
58 { "fm3130-rtc", 0 }, 58 { "fm3130", 0 },
59 { } 59 { }
60}; 60};
61MODULE_DEVICE_TABLE(i2c, fm3130_id); 61MODULE_DEVICE_TABLE(i2c, fm3130_id);
diff --git a/drivers/rtc/rtc-pcf8563.c b/drivers/rtc/rtc-pcf8563.c
index 0fc4c3630780..748a502a6355 100644
--- a/drivers/rtc/rtc-pcf8563.c
+++ b/drivers/rtc/rtc-pcf8563.c
@@ -302,6 +302,7 @@ static int pcf8563_remove(struct i2c_client *client)
302 302
303static const struct i2c_device_id pcf8563_id[] = { 303static const struct i2c_device_id pcf8563_id[] = {
304 { "pcf8563", 0 }, 304 { "pcf8563", 0 },
305 { "rtc8564", 0 },
305 { } 306 { }
306}; 307};
307MODULE_DEVICE_TABLE(i2c, pcf8563_id); 308MODULE_DEVICE_TABLE(i2c, pcf8563_id);
diff --git a/drivers/scsi/ipr.c b/drivers/scsi/ipr.c
index 999e91ea7451..e7a3a6554425 100644
--- a/drivers/scsi/ipr.c
+++ b/drivers/scsi/ipr.c
@@ -71,6 +71,7 @@
71#include <linux/module.h> 71#include <linux/module.h>
72#include <linux/moduleparam.h> 72#include <linux/moduleparam.h>
73#include <linux/libata.h> 73#include <linux/libata.h>
74#include <linux/hdreg.h>
74#include <asm/io.h> 75#include <asm/io.h>
75#include <asm/irq.h> 76#include <asm/irq.h>
76#include <asm/processor.h> 77#include <asm/processor.h>
@@ -4913,8 +4914,11 @@ static int ipr_ioctl(struct scsi_device *sdev, int cmd, void __user *arg)
4913 struct ipr_resource_entry *res; 4914 struct ipr_resource_entry *res;
4914 4915
4915 res = (struct ipr_resource_entry *)sdev->hostdata; 4916 res = (struct ipr_resource_entry *)sdev->hostdata;
4916 if (res && ipr_is_gata(res)) 4917 if (res && ipr_is_gata(res)) {
4918 if (cmd == HDIO_GET_IDENTITY)
4919 return -ENOTTY;
4917 return ata_scsi_ioctl(sdev, cmd, arg); 4920 return ata_scsi_ioctl(sdev, cmd, arg);
4921 }
4918 4922
4919 return -EINVAL; 4923 return -EINVAL;
4920} 4924}
diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c
index a82d2fe80fb5..cbf55d59a54c 100644
--- a/drivers/scsi/scsi_lib.c
+++ b/drivers/scsi/scsi_lib.c
@@ -207,6 +207,15 @@ int scsi_execute(struct scsi_device *sdev, const unsigned char *cmd,
207 */ 207 */
208 blk_execute_rq(req->q, NULL, req, 1); 208 blk_execute_rq(req->q, NULL, req, 1);
209 209
210 /*
211 * Some devices (USB mass-storage in particular) may transfer
212 * garbage data together with a residue indicating that the data
213 * is invalid. Prevent the garbage from being misinterpreted
214 * and prevent security leaks by zeroing out the excess data.
215 */
216 if (unlikely(req->data_len > 0 && req->data_len <= bufflen))
217 memset(buffer + (bufflen - req->data_len), 0, req->data_len);
218
210 ret = req->errors; 219 ret = req->errors;
211 out: 220 out:
212 blk_put_request(req); 221 blk_put_request(req);
diff --git a/drivers/serial/8250.c b/drivers/serial/8250.c
index 1bc00b721e9d..be95e55b228b 100644
--- a/drivers/serial/8250.c
+++ b/drivers/serial/8250.c
@@ -2623,6 +2623,9 @@ static struct console serial8250_console = {
2623 2623
2624static int __init serial8250_console_init(void) 2624static int __init serial8250_console_init(void)
2625{ 2625{
2626 if (nr_uarts > UART_NR)
2627 nr_uarts = UART_NR;
2628
2626 serial8250_isa_init_ports(); 2629 serial8250_isa_init_ports();
2627 register_console(&serial8250_console); 2630 register_console(&serial8250_console);
2628 return 0; 2631 return 0;
diff --git a/drivers/ssb/driver_pcicore.c b/drivers/ssb/driver_pcicore.c
index d28c53868093..538c570df337 100644
--- a/drivers/ssb/driver_pcicore.c
+++ b/drivers/ssb/driver_pcicore.c
@@ -537,6 +537,13 @@ int ssb_pcicore_dev_irqvecs_enable(struct ssb_pcicore *pc,
537 int err = 0; 537 int err = 0;
538 u32 tmp; 538 u32 tmp;
539 539
540 if (dev->bus->bustype != SSB_BUSTYPE_PCI) {
541 /* This SSB device is not on a PCI host-bus. So the IRQs are
542 * not routed through the PCI core.
543 * So we must not enable routing through the PCI core. */
544 goto out;
545 }
546
540 if (!pdev) 547 if (!pdev)
541 goto out; 548 goto out;
542 bus = pdev->bus; 549 bus = pdev->bus;
diff --git a/drivers/usb/host/ohci-au1xxx.c b/drivers/usb/host/ohci-au1xxx.c
index f90fe0c7373f..68c17f5ea8ea 100644
--- a/drivers/usb/host/ohci-au1xxx.c
+++ b/drivers/usb/host/ohci-au1xxx.c
@@ -8,7 +8,7 @@
8 * Bus Glue for AMD Alchemy Au1xxx 8 * Bus Glue for AMD Alchemy Au1xxx
9 * 9 *
10 * Written by Christopher Hoover <ch@hpl.hp.com> 10 * Written by Christopher Hoover <ch@hpl.hp.com>
11 * Based on fragments of previous driver by Rusell King et al. 11 * Based on fragments of previous driver by Russell King et al.
12 * 12 *
13 * Modified for LH7A404 from ohci-sa1111.c 13 * Modified for LH7A404 from ohci-sa1111.c
14 * by Durgesh Pattamatta <pattamattad@sharpsec.com> 14 * by Durgesh Pattamatta <pattamattad@sharpsec.com>
diff --git a/drivers/usb/host/ohci-lh7a404.c b/drivers/usb/host/ohci-lh7a404.c
index 13c12ed22252..1ef5d482c145 100644
--- a/drivers/usb/host/ohci-lh7a404.c
+++ b/drivers/usb/host/ohci-lh7a404.c
@@ -8,7 +8,7 @@
8 * Bus Glue for Sharp LH7A404 8 * Bus Glue for Sharp LH7A404
9 * 9 *
10 * Written by Christopher Hoover <ch@hpl.hp.com> 10 * Written by Christopher Hoover <ch@hpl.hp.com>
11 * Based on fragments of previous driver by Rusell King et al. 11 * Based on fragments of previous driver by Russell King et al.
12 * 12 *
13 * Modified for LH7A404 from ohci-sa1111.c 13 * Modified for LH7A404 from ohci-sa1111.c
14 * by Durgesh Pattamatta <pattamattad@sharpsec.com> 14 * by Durgesh Pattamatta <pattamattad@sharpsec.com>
diff --git a/drivers/usb/host/ohci-s3c2410.c b/drivers/usb/host/ohci-s3c2410.c
index ead4772f0f27..3c7a740cfe0c 100644
--- a/drivers/usb/host/ohci-s3c2410.c
+++ b/drivers/usb/host/ohci-s3c2410.c
@@ -8,7 +8,7 @@
8 * USB Bus Glue for Samsung S3C2410 8 * USB Bus Glue for Samsung S3C2410
9 * 9 *
10 * Written by Christopher Hoover <ch@hpl.hp.com> 10 * Written by Christopher Hoover <ch@hpl.hp.com>
11 * Based on fragments of previous driver by Rusell King et al. 11 * Based on fragments of previous driver by Russell King et al.
12 * 12 *
13 * Modified for S3C2410 from ohci-sa1111.c, ohci-omap.c and ohci-lh7a40.c 13 * Modified for S3C2410 from ohci-sa1111.c, ohci-omap.c and ohci-lh7a40.c
14 * by Ben Dooks, <ben@simtec.co.uk> 14 * by Ben Dooks, <ben@simtec.co.uk>
diff --git a/drivers/usb/host/ohci-sa1111.c b/drivers/usb/host/ohci-sa1111.c
index 0f48f2d99226..2e9dceb9bb99 100644
--- a/drivers/usb/host/ohci-sa1111.c
+++ b/drivers/usb/host/ohci-sa1111.c
@@ -8,7 +8,7 @@
8 * SA1111 Bus Glue 8 * SA1111 Bus Glue
9 * 9 *
10 * Written by Christopher Hoover <ch@hpl.hp.com> 10 * Written by Christopher Hoover <ch@hpl.hp.com>
11 * Based on fragments of previous driver by Rusell King et al. 11 * Based on fragments of previous driver by Russell King et al.
12 * 12 *
13 * This file is licenced under the GPL. 13 * This file is licenced under the GPL.
14 */ 14 */
diff --git a/drivers/video/fb_defio.c b/drivers/video/fb_defio.c
index 24843fdd5395..59df132cc375 100644
--- a/drivers/video/fb_defio.c
+++ b/drivers/video/fb_defio.c
@@ -74,6 +74,7 @@ static int fb_deferred_io_mkwrite(struct vm_area_struct *vma,
74{ 74{
75 struct fb_info *info = vma->vm_private_data; 75 struct fb_info *info = vma->vm_private_data;
76 struct fb_deferred_io *fbdefio = info->fbdefio; 76 struct fb_deferred_io *fbdefio = info->fbdefio;
77 struct page *cur;
77 78
78 /* this is a callback we get when userspace first tries to 79 /* this is a callback we get when userspace first tries to
79 write to the page. we schedule a workqueue. that workqueue 80 write to the page. we schedule a workqueue. that workqueue
@@ -83,7 +84,24 @@ static int fb_deferred_io_mkwrite(struct vm_area_struct *vma,
83 84
84 /* protect against the workqueue changing the page list */ 85 /* protect against the workqueue changing the page list */
85 mutex_lock(&fbdefio->lock); 86 mutex_lock(&fbdefio->lock);
86 list_add(&page->lru, &fbdefio->pagelist); 87
88 /* we loop through the pagelist before adding in order
89 to keep the pagelist sorted */
90 list_for_each_entry(cur, &fbdefio->pagelist, lru) {
91 /* this check is to catch the case where a new
92 process could start writing to the same page
93 through a new pte. this new access can cause the
94 mkwrite even when the original ps's pte is marked
95 writable */
96 if (unlikely(cur == page))
97 goto page_already_added;
98 else if (cur->index > page->index)
99 break;
100 }
101
102 list_add_tail(&page->lru, &cur->lru);
103
104page_already_added:
87 mutex_unlock(&fbdefio->lock); 105 mutex_unlock(&fbdefio->lock);
88 106
89 /* come back after delay to process the deferred IO */ 107 /* come back after delay to process the deferred IO */
diff --git a/fs/cifs/cifsacl.c b/fs/cifs/cifsacl.c
index 34902cff5400..0e9fc2ba90ee 100644
--- a/fs/cifs/cifsacl.c
+++ b/fs/cifs/cifsacl.c
@@ -34,11 +34,11 @@
34static struct cifs_wksid wksidarr[NUM_WK_SIDS] = { 34static struct cifs_wksid wksidarr[NUM_WK_SIDS] = {
35 {{1, 0, {0, 0, 0, 0, 0, 0}, {0, 0, 0, 0, 0} }, "null user"}, 35 {{1, 0, {0, 0, 0, 0, 0, 0}, {0, 0, 0, 0, 0} }, "null user"},
36 {{1, 1, {0, 0, 0, 0, 0, 1}, {0, 0, 0, 0, 0} }, "nobody"}, 36 {{1, 1, {0, 0, 0, 0, 0, 1}, {0, 0, 0, 0, 0} }, "nobody"},
37 {{1, 1, {0, 0, 0, 0, 0, 5}, {cpu_to_le32(11), 0, 0, 0, 0} }, "net-users"}, 37 {{1, 1, {0, 0, 0, 0, 0, 5}, {__constant_cpu_to_le32(11), 0, 0, 0, 0} }, "net-users"},
38 {{1, 1, {0, 0, 0, 0, 0, 5}, {cpu_to_le32(18), 0, 0, 0, 0} }, "sys"}, 38 {{1, 1, {0, 0, 0, 0, 0, 5}, {__constant_cpu_to_le32(18), 0, 0, 0, 0} }, "sys"},
39 {{1, 2, {0, 0, 0, 0, 0, 5}, {cpu_to_le32(32), cpu_to_le32(544), 0, 0, 0} }, "root"}, 39 {{1, 2, {0, 0, 0, 0, 0, 5}, {__constant_cpu_to_le32(32), __constant_cpu_to_le32(544), 0, 0, 0} }, "root"},
40 {{1, 2, {0, 0, 0, 0, 0, 5}, {cpu_to_le32(32), cpu_to_le32(545), 0, 0, 0} }, "users"}, 40 {{1, 2, {0, 0, 0, 0, 0, 5}, {__constant_cpu_to_le32(32), __constant_cpu_to_le32(545), 0, 0, 0} }, "users"},
41 {{1, 2, {0, 0, 0, 0, 0, 5}, {cpu_to_le32(32), cpu_to_le32(546), 0, 0, 0} }, "guest"} } 41 {{1, 2, {0, 0, 0, 0, 0, 5}, {__constant_cpu_to_le32(32), __constant_cpu_to_le32(546), 0, 0, 0} }, "guest"} }
42; 42;
43 43
44 44
diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c
index 722be543ceec..2e904bd111c8 100644
--- a/fs/cifs/inode.c
+++ b/fs/cifs/inode.c
@@ -219,15 +219,15 @@ int cifs_get_inode_info_unix(struct inode **pinode,
219 rc = CIFSSMBUnixQPathInfo(xid, pTcon, full_path, &find_data, 219 rc = CIFSSMBUnixQPathInfo(xid, pTcon, full_path, &find_data,
220 cifs_sb->local_nls, cifs_sb->mnt_cifs_flags & 220 cifs_sb->local_nls, cifs_sb->mnt_cifs_flags &
221 CIFS_MOUNT_MAP_SPECIAL_CHR); 221 CIFS_MOUNT_MAP_SPECIAL_CHR);
222 if (rc) { 222 if (rc == -EREMOTE && !is_dfs_referral) {
223 if (rc == -EREMOTE && !is_dfs_referral) { 223 is_dfs_referral = true;
224 is_dfs_referral = true; 224 cFYI(DBG2, ("DFS ref"));
225 cFYI(DBG2, ("DFS ref")); 225 /* for DFS, server does not give us real inode data */
226 /* for DFS, server does not give us real inode data */ 226 fill_fake_finddataunix(&find_data, sb);
227 fill_fake_finddataunix(&find_data, sb); 227 rc = 0;
228 rc = 0; 228 } else if (rc)
229 } 229 goto cgiiu_exit;
230 } 230
231 num_of_bytes = le64_to_cpu(find_data.NumOfBytes); 231 num_of_bytes = le64_to_cpu(find_data.NumOfBytes);
232 end_of_file = le64_to_cpu(find_data.EndOfFile); 232 end_of_file = le64_to_cpu(find_data.EndOfFile);
233 233
@@ -236,7 +236,7 @@ int cifs_get_inode_info_unix(struct inode **pinode,
236 *pinode = new_inode(sb); 236 *pinode = new_inode(sb);
237 if (*pinode == NULL) { 237 if (*pinode == NULL) {
238 rc = -ENOMEM; 238 rc = -ENOMEM;
239 goto cgiiu_exit; 239 goto cgiiu_exit;
240 } 240 }
241 /* Is an i_ino of zero legal? */ 241 /* Is an i_ino of zero legal? */
242 /* note ino incremented to unique num in new_inode */ 242 /* note ino incremented to unique num in new_inode */
diff --git a/fs/exec.c b/fs/exec.c
index da94a6f05df3..fd9234379e8d 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -610,7 +610,7 @@ int setup_arg_pages(struct linux_binprm *bprm,
610 bprm->exec -= stack_shift; 610 bprm->exec -= stack_shift;
611 611
612 down_write(&mm->mmap_sem); 612 down_write(&mm->mmap_sem);
613 vm_flags = vma->vm_flags; 613 vm_flags = VM_STACK_FLAGS;
614 614
615 /* 615 /*
616 * Adjust stack execute permissions; explicitly enable for 616 * Adjust stack execute permissions; explicitly enable for
diff --git a/fs/ocfs2/dlmglue.c b/fs/ocfs2/dlmglue.c
index 394d25a131a5..80e20d9f2780 100644
--- a/fs/ocfs2/dlmglue.c
+++ b/fs/ocfs2/dlmglue.c
@@ -1554,8 +1554,8 @@ out:
1554 */ 1554 */
1555int ocfs2_file_lock(struct file *file, int ex, int trylock) 1555int ocfs2_file_lock(struct file *file, int ex, int trylock)
1556{ 1556{
1557 int ret, level = ex ? LKM_EXMODE : LKM_PRMODE; 1557 int ret, level = ex ? DLM_LOCK_EX : DLM_LOCK_PR;
1558 unsigned int lkm_flags = trylock ? LKM_NOQUEUE : 0; 1558 unsigned int lkm_flags = trylock ? DLM_LKF_NOQUEUE : 0;
1559 unsigned long flags; 1559 unsigned long flags;
1560 struct ocfs2_file_private *fp = file->private_data; 1560 struct ocfs2_file_private *fp = file->private_data;
1561 struct ocfs2_lock_res *lockres = &fp->fp_flock; 1561 struct ocfs2_lock_res *lockres = &fp->fp_flock;
@@ -1582,7 +1582,7 @@ int ocfs2_file_lock(struct file *file, int ex, int trylock)
1582 * Get the lock at NLMODE to start - that way we 1582 * Get the lock at NLMODE to start - that way we
1583 * can cancel the upconvert request if need be. 1583 * can cancel the upconvert request if need be.
1584 */ 1584 */
1585 ret = ocfs2_lock_create(osb, lockres, LKM_NLMODE, 0); 1585 ret = ocfs2_lock_create(osb, lockres, DLM_LOCK_NL, 0);
1586 if (ret < 0) { 1586 if (ret < 0) {
1587 mlog_errno(ret); 1587 mlog_errno(ret);
1588 goto out; 1588 goto out;
@@ -1597,7 +1597,7 @@ int ocfs2_file_lock(struct file *file, int ex, int trylock)
1597 } 1597 }
1598 1598
1599 lockres->l_action = OCFS2_AST_CONVERT; 1599 lockres->l_action = OCFS2_AST_CONVERT;
1600 lkm_flags |= LKM_CONVERT; 1600 lkm_flags |= DLM_LKF_CONVERT;
1601 lockres->l_requested = level; 1601 lockres->l_requested = level;
1602 lockres_or_flags(lockres, OCFS2_LOCK_BUSY); 1602 lockres_or_flags(lockres, OCFS2_LOCK_BUSY);
1603 1603
@@ -1664,7 +1664,7 @@ void ocfs2_file_unlock(struct file *file)
1664 if (!(lockres->l_flags & OCFS2_LOCK_ATTACHED)) 1664 if (!(lockres->l_flags & OCFS2_LOCK_ATTACHED))
1665 return; 1665 return;
1666 1666
1667 if (lockres->l_level == LKM_NLMODE) 1667 if (lockres->l_level == DLM_LOCK_NL)
1668 return; 1668 return;
1669 1669
1670 mlog(0, "Unlock: \"%s\" flags: 0x%lx, level: %d, act: %d\n", 1670 mlog(0, "Unlock: \"%s\" flags: 0x%lx, level: %d, act: %d\n",
@@ -1678,11 +1678,11 @@ void ocfs2_file_unlock(struct file *file)
1678 lockres_or_flags(lockres, OCFS2_LOCK_BLOCKED); 1678 lockres_or_flags(lockres, OCFS2_LOCK_BLOCKED);
1679 lockres->l_blocking = DLM_LOCK_EX; 1679 lockres->l_blocking = DLM_LOCK_EX;
1680 1680
1681 gen = ocfs2_prepare_downconvert(lockres, LKM_NLMODE); 1681 gen = ocfs2_prepare_downconvert(lockres, DLM_LOCK_NL);
1682 lockres_add_mask_waiter(lockres, &mw, OCFS2_LOCK_BUSY, 0); 1682 lockres_add_mask_waiter(lockres, &mw, OCFS2_LOCK_BUSY, 0);
1683 spin_unlock_irqrestore(&lockres->l_lock, flags); 1683 spin_unlock_irqrestore(&lockres->l_lock, flags);
1684 1684
1685 ret = ocfs2_downconvert_lock(osb, lockres, LKM_NLMODE, 0, gen); 1685 ret = ocfs2_downconvert_lock(osb, lockres, DLM_LOCK_NL, 0, gen);
1686 if (ret) { 1686 if (ret) {
1687 mlog_errno(ret); 1687 mlog_errno(ret);
1688 return; 1688 return;
diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c
index afaee301b0ee..ad3d26ddfe31 100644
--- a/fs/xfs/xfs_log.c
+++ b/fs/xfs/xfs_log.c
@@ -2427,13 +2427,20 @@ restart:
2427 if (iclog->ic_size - iclog->ic_offset < 2*sizeof(xlog_op_header_t)) { 2427 if (iclog->ic_size - iclog->ic_offset < 2*sizeof(xlog_op_header_t)) {
2428 xlog_state_switch_iclogs(log, iclog, iclog->ic_size); 2428 xlog_state_switch_iclogs(log, iclog, iclog->ic_size);
2429 2429
2430 /* If I'm the only one writing to this iclog, sync it to disk */ 2430 /*
2431 if (atomic_read(&iclog->ic_refcnt) == 1) { 2431 * If I'm the only one writing to this iclog, sync it to disk.
2432 * We need to do an atomic compare and decrement here to avoid
2433 * racing with concurrent atomic_dec_and_lock() calls in
2434 * xlog_state_release_iclog() when there is more than one
2435 * reference to the iclog.
2436 */
2437 if (!atomic_add_unless(&iclog->ic_refcnt, -1, 1)) {
2438 /* we are the only one */
2432 spin_unlock(&log->l_icloglock); 2439 spin_unlock(&log->l_icloglock);
2433 if ((error = xlog_state_release_iclog(log, iclog))) 2440 error = xlog_state_release_iclog(log, iclog);
2441 if (error)
2434 return error; 2442 return error;
2435 } else { 2443 } else {
2436 atomic_dec(&iclog->ic_refcnt);
2437 spin_unlock(&log->l_icloglock); 2444 spin_unlock(&log->l_icloglock);
2438 } 2445 }
2439 goto restart; 2446 goto restart;
diff --git a/include/asm-avr32/setup.h b/include/asm-avr32/setup.h
index ea3070ff13a5..ff5b7cf6be4d 100644
--- a/include/asm-avr32/setup.h
+++ b/include/asm-avr32/setup.h
@@ -2,7 +2,7 @@
2 * Copyright (C) 2004-2006 Atmel Corporation 2 * Copyright (C) 2004-2006 Atmel Corporation
3 * 3 *
4 * Based on linux/include/asm-arm/setup.h 4 * Based on linux/include/asm-arm/setup.h
5 * Copyright (C) 1997-1999 Russel King 5 * Copyright (C) 1997-1999 Russell King
6 * 6 *
7 * This program is free software; you can redistribute it and/or modify 7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License version 2 as 8 * it under the terms of the GNU General Public License version 2 as
diff --git a/include/asm-frv/system.h b/include/asm-frv/system.h
index d3a12a9079f7..7742ec000cc4 100644
--- a/include/asm-frv/system.h
+++ b/include/asm-frv/system.h
@@ -87,7 +87,7 @@ do { \
87} while(0) 87} while(0)
88 88
89#define irqs_disabled() \ 89#define irqs_disabled() \
90 ({unsigned long flags; local_save_flags(flags); flags; }) 90 ({unsigned long flags; local_save_flags(flags); !!flags; })
91 91
92#define local_irq_save(flags) \ 92#define local_irq_save(flags) \
93do { \ 93do { \
diff --git a/include/asm-x86/desc.h b/include/asm-x86/desc.h
index 268a012bcd79..28bddbcb38be 100644
--- a/include/asm-x86/desc.h
+++ b/include/asm-x86/desc.h
@@ -192,8 +192,8 @@ static inline void native_set_ldt(const void *addr, unsigned int entries)
192 unsigned cpu = smp_processor_id(); 192 unsigned cpu = smp_processor_id();
193 ldt_desc ldt; 193 ldt_desc ldt;
194 194
195 set_tssldt_descriptor(&ldt, (unsigned long)addr, 195 set_tssldt_descriptor(&ldt, (unsigned long)addr, DESC_LDT,
196 DESC_LDT, entries * sizeof(ldt) - 1); 196 entries * LDT_ENTRY_SIZE - 1);
197 write_gdt_entry(get_cpu_gdt_table(cpu), GDT_ENTRY_LDT, 197 write_gdt_entry(get_cpu_gdt_table(cpu), GDT_ENTRY_LDT,
198 &ldt, DESC_LDT); 198 &ldt, DESC_LDT);
199 asm volatile("lldt %w0"::"q" (GDT_ENTRY_LDT*8)); 199 asm volatile("lldt %w0"::"q" (GDT_ENTRY_LDT*8));
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 33a8f42041fa..f6cd60f2de63 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -134,7 +134,6 @@ extern unsigned long nr_running(void);
134extern unsigned long nr_uninterruptible(void); 134extern unsigned long nr_uninterruptible(void);
135extern unsigned long nr_active(void); 135extern unsigned long nr_active(void);
136extern unsigned long nr_iowait(void); 136extern unsigned long nr_iowait(void);
137extern unsigned long weighted_cpuload(const int cpu);
138 137
139struct seq_file; 138struct seq_file;
140struct cfs_rq; 139struct cfs_rq;
@@ -784,6 +783,8 @@ struct sched_domain {
784 unsigned int balance_interval; /* initialise to 1. units in ms. */ 783 unsigned int balance_interval; /* initialise to 1. units in ms. */
785 unsigned int nr_balance_failed; /* initialise to 0 */ 784 unsigned int nr_balance_failed; /* initialise to 0 */
786 785
786 u64 last_update;
787
787#ifdef CONFIG_SCHEDSTATS 788#ifdef CONFIG_SCHEDSTATS
788 /* load_balance() stats */ 789 /* load_balance() stats */
789 unsigned int lb_count[CPU_MAX_IDLE_TYPES]; 790 unsigned int lb_count[CPU_MAX_IDLE_TYPES];
@@ -823,23 +824,6 @@ extern int arch_reinit_sched_domains(void);
823 824
824#endif /* CONFIG_SMP */ 825#endif /* CONFIG_SMP */
825 826
826/*
827 * A runqueue laden with a single nice 0 task scores a weighted_cpuload of
828 * SCHED_LOAD_SCALE. This function returns 1 if any cpu is laden with a
829 * task of nice 0 or enough lower priority tasks to bring up the
830 * weighted_cpuload
831 */
832static inline int above_background_load(void)
833{
834 unsigned long cpu;
835
836 for_each_online_cpu(cpu) {
837 if (weighted_cpuload(cpu) >= SCHED_LOAD_SCALE)
838 return 1;
839 }
840 return 0;
841}
842
843struct io_context; /* See blkdev.h */ 827struct io_context; /* See blkdev.h */
844#define NGROUPS_SMALL 32 828#define NGROUPS_SMALL 32
845#define NGROUPS_PER_BLOCK ((unsigned int)(PAGE_SIZE / sizeof(gid_t))) 829#define NGROUPS_PER_BLOCK ((unsigned int)(PAGE_SIZE / sizeof(gid_t)))
@@ -921,8 +905,8 @@ struct sched_class {
921 void (*set_cpus_allowed)(struct task_struct *p, 905 void (*set_cpus_allowed)(struct task_struct *p,
922 const cpumask_t *newmask); 906 const cpumask_t *newmask);
923 907
924 void (*join_domain)(struct rq *rq); 908 void (*rq_online)(struct rq *rq);
925 void (*leave_domain)(struct rq *rq); 909 void (*rq_offline)(struct rq *rq);
926 910
927 void (*switched_from) (struct rq *this_rq, struct task_struct *task, 911 void (*switched_from) (struct rq *this_rq, struct task_struct *task,
928 int running); 912 int running);
@@ -1039,6 +1023,7 @@ struct task_struct {
1039#endif 1023#endif
1040 1024
1041 int prio, static_prio, normal_prio; 1025 int prio, static_prio, normal_prio;
1026 unsigned int rt_priority;
1042 const struct sched_class *sched_class; 1027 const struct sched_class *sched_class;
1043 struct sched_entity se; 1028 struct sched_entity se;
1044 struct sched_rt_entity rt; 1029 struct sched_rt_entity rt;
@@ -1122,7 +1107,6 @@ struct task_struct {
1122 int __user *set_child_tid; /* CLONE_CHILD_SETTID */ 1107 int __user *set_child_tid; /* CLONE_CHILD_SETTID */
1123 int __user *clear_child_tid; /* CLONE_CHILD_CLEARTID */ 1108 int __user *clear_child_tid; /* CLONE_CHILD_CLEARTID */
1124 1109
1125 unsigned int rt_priority;
1126 cputime_t utime, stime, utimescaled, stimescaled; 1110 cputime_t utime, stime, utimescaled, stimescaled;
1127 cputime_t gtime; 1111 cputime_t gtime;
1128 cputime_t prev_utime, prev_stime; 1112 cputime_t prev_utime, prev_stime;
@@ -1141,12 +1125,12 @@ struct task_struct {
1141 gid_t gid,egid,sgid,fsgid; 1125 gid_t gid,egid,sgid,fsgid;
1142 struct group_info *group_info; 1126 struct group_info *group_info;
1143 kernel_cap_t cap_effective, cap_inheritable, cap_permitted, cap_bset; 1127 kernel_cap_t cap_effective, cap_inheritable, cap_permitted, cap_bset;
1144 unsigned securebits;
1145 struct user_struct *user; 1128 struct user_struct *user;
1129 unsigned securebits;
1146#ifdef CONFIG_KEYS 1130#ifdef CONFIG_KEYS
1131 unsigned char jit_keyring; /* default keyring to attach requested keys to */
1147 struct key *request_key_auth; /* assumed request_key authority */ 1132 struct key *request_key_auth; /* assumed request_key authority */
1148 struct key *thread_keyring; /* keyring private to this thread */ 1133 struct key *thread_keyring; /* keyring private to this thread */
1149 unsigned char jit_keyring; /* default keyring to attach requested keys to */
1150#endif 1134#endif
1151 char comm[TASK_COMM_LEN]; /* executable name excluding path 1135 char comm[TASK_COMM_LEN]; /* executable name excluding path
1152 - access with [gs]et_task_comm (which lock 1136 - access with [gs]et_task_comm (which lock
@@ -1233,8 +1217,8 @@ struct task_struct {
1233# define MAX_LOCK_DEPTH 48UL 1217# define MAX_LOCK_DEPTH 48UL
1234 u64 curr_chain_key; 1218 u64 curr_chain_key;
1235 int lockdep_depth; 1219 int lockdep_depth;
1236 struct held_lock held_locks[MAX_LOCK_DEPTH];
1237 unsigned int lockdep_recursion; 1220 unsigned int lockdep_recursion;
1221 struct held_lock held_locks[MAX_LOCK_DEPTH];
1238#endif 1222#endif
1239 1223
1240/* journalling filesystem info */ 1224/* journalling filesystem info */
@@ -1262,10 +1246,6 @@ struct task_struct {
1262 u64 acct_vm_mem1; /* accumulated virtual memory usage */ 1246 u64 acct_vm_mem1; /* accumulated virtual memory usage */
1263 cputime_t acct_stimexpd;/* stime since last update */ 1247 cputime_t acct_stimexpd;/* stime since last update */
1264#endif 1248#endif
1265#ifdef CONFIG_NUMA
1266 struct mempolicy *mempolicy;
1267 short il_next;
1268#endif
1269#ifdef CONFIG_CPUSETS 1249#ifdef CONFIG_CPUSETS
1270 nodemask_t mems_allowed; 1250 nodemask_t mems_allowed;
1271 int cpuset_mems_generation; 1251 int cpuset_mems_generation;
@@ -1285,6 +1265,10 @@ struct task_struct {
1285 struct list_head pi_state_list; 1265 struct list_head pi_state_list;
1286 struct futex_pi_state *pi_state_cache; 1266 struct futex_pi_state *pi_state_cache;
1287#endif 1267#endif
1268#ifdef CONFIG_NUMA
1269 struct mempolicy *mempolicy;
1270 short il_next;
1271#endif
1288 atomic_t fs_excl; /* holding fs exclusive resources */ 1272 atomic_t fs_excl; /* holding fs exclusive resources */
1289 struct rcu_head rcu; 1273 struct rcu_head rcu;
1290 1274
@@ -1504,6 +1488,7 @@ static inline void put_task_struct(struct task_struct *t)
1504#define PF_SWAPWRITE 0x00800000 /* Allowed to write to swap */ 1488#define PF_SWAPWRITE 0x00800000 /* Allowed to write to swap */
1505#define PF_SPREAD_PAGE 0x01000000 /* Spread page cache over cpuset */ 1489#define PF_SPREAD_PAGE 0x01000000 /* Spread page cache over cpuset */
1506#define PF_SPREAD_SLAB 0x02000000 /* Spread some slab caches over cpuset */ 1490#define PF_SPREAD_SLAB 0x02000000 /* Spread some slab caches over cpuset */
1491#define PF_THREAD_BOUND 0x04000000 /* Thread bound to specific cpu */
1507#define PF_MEMPOLICY 0x10000000 /* Non-default NUMA mempolicy */ 1492#define PF_MEMPOLICY 0x10000000 /* Non-default NUMA mempolicy */
1508#define PF_MUTEX_TESTER 0x20000000 /* Thread belongs to the rt mutex tester */ 1493#define PF_MUTEX_TESTER 0x20000000 /* Thread belongs to the rt mutex tester */
1509#define PF_FREEZER_SKIP 0x40000000 /* Freezer should not count it as freezeable */ 1494#define PF_FREEZER_SKIP 0x40000000 /* Freezer should not count it as freezeable */
@@ -1637,6 +1622,7 @@ extern unsigned int sysctl_sched_child_runs_first;
1637extern unsigned int sysctl_sched_features; 1622extern unsigned int sysctl_sched_features;
1638extern unsigned int sysctl_sched_migration_cost; 1623extern unsigned int sysctl_sched_migration_cost;
1639extern unsigned int sysctl_sched_nr_migrate; 1624extern unsigned int sysctl_sched_nr_migrate;
1625extern unsigned int sysctl_sched_shares_ratelimit;
1640 1626
1641int sched_nr_latency_handler(struct ctl_table *table, int write, 1627int sched_nr_latency_handler(struct ctl_table *table, int write,
1642 struct file *file, void __user *buffer, size_t *length, 1628 struct file *file, void __user *buffer, size_t *length,
diff --git a/include/linux/xfrm.h b/include/linux/xfrm.h
index 2ca6bae88721..fb0c215a3051 100644
--- a/include/linux/xfrm.h
+++ b/include/linux/xfrm.h
@@ -339,6 +339,7 @@ struct xfrm_usersa_info {
339#define XFRM_STATE_NOPMTUDISC 4 339#define XFRM_STATE_NOPMTUDISC 4
340#define XFRM_STATE_WILDRECV 8 340#define XFRM_STATE_WILDRECV 8
341#define XFRM_STATE_ICMP 16 341#define XFRM_STATE_ICMP 16
342#define XFRM_STATE_AF_UNSPEC 32
342}; 343};
343 344
344struct xfrm_usersa_id { 345struct xfrm_usersa_id {
diff --git a/kernel/Makefile b/kernel/Makefile
index 1c9938addb9d..6c55301112e0 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -3,7 +3,7 @@
3# 3#
4 4
5obj-y = sched.o fork.o exec_domain.o panic.o printk.o profile.o \ 5obj-y = sched.o fork.o exec_domain.o panic.o printk.o profile.o \
6 exit.o itimer.o time.o softirq.o resource.o \ 6 cpu.o exit.o itimer.o time.o softirq.o resource.o \
7 sysctl.o capability.o ptrace.o timer.o user.o \ 7 sysctl.o capability.o ptrace.o timer.o user.o \
8 signal.o sys.o kmod.o workqueue.o pid.o \ 8 signal.o sys.o kmod.o workqueue.o pid.o \
9 rcupdate.o extable.o params.o posix-timers.o \ 9 rcupdate.o extable.o params.o posix-timers.o \
@@ -27,7 +27,7 @@ obj-$(CONFIG_RT_MUTEXES) += rtmutex.o
27obj-$(CONFIG_DEBUG_RT_MUTEXES) += rtmutex-debug.o 27obj-$(CONFIG_DEBUG_RT_MUTEXES) += rtmutex-debug.o
28obj-$(CONFIG_RT_MUTEX_TESTER) += rtmutex-tester.o 28obj-$(CONFIG_RT_MUTEX_TESTER) += rtmutex-tester.o
29obj-$(CONFIG_GENERIC_ISA_DMA) += dma.o 29obj-$(CONFIG_GENERIC_ISA_DMA) += dma.o
30obj-$(CONFIG_SMP) += cpu.o spinlock.o 30obj-$(CONFIG_SMP) += spinlock.o
31obj-$(CONFIG_DEBUG_SPINLOCK) += spinlock.o 31obj-$(CONFIG_DEBUG_SPINLOCK) += spinlock.o
32obj-$(CONFIG_PROVE_LOCKING) += spinlock.o 32obj-$(CONFIG_PROVE_LOCKING) += spinlock.o
33obj-$(CONFIG_UID16) += uid16.o 33obj-$(CONFIG_UID16) += uid16.o
@@ -69,6 +69,7 @@ obj-$(CONFIG_TASK_DELAY_ACCT) += delayacct.o
69obj-$(CONFIG_TASKSTATS) += taskstats.o tsacct.o 69obj-$(CONFIG_TASKSTATS) += taskstats.o tsacct.o
70obj-$(CONFIG_MARKERS) += marker.o 70obj-$(CONFIG_MARKERS) += marker.o
71obj-$(CONFIG_LATENCYTOP) += latencytop.o 71obj-$(CONFIG_LATENCYTOP) += latencytop.o
72obj-$(CONFIG_SMP) += sched_cpupri.o
72 73
73ifneq ($(CONFIG_SCHED_NO_NO_OMIT_FRAME_POINTER),y) 74ifneq ($(CONFIG_SCHED_NO_NO_OMIT_FRAME_POINTER),y)
74# According to Alan Modra <alan@linuxcare.com.au>, the -fno-omit-frame-pointer is 75# According to Alan Modra <alan@linuxcare.com.au>, the -fno-omit-frame-pointer is
diff --git a/kernel/cpu.c b/kernel/cpu.c
index c77bc3a1c722..b11f06dc149a 100644
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -15,6 +15,28 @@
15#include <linux/stop_machine.h> 15#include <linux/stop_machine.h>
16#include <linux/mutex.h> 16#include <linux/mutex.h>
17 17
18/*
19 * Represents all cpu's present in the system
20 * In systems capable of hotplug, this map could dynamically grow
21 * as new cpu's are detected in the system via any platform specific
22 * method, such as ACPI for e.g.
23 */
24cpumask_t cpu_present_map __read_mostly;
25EXPORT_SYMBOL(cpu_present_map);
26
27#ifndef CONFIG_SMP
28
29/*
30 * Represents all cpu's that are currently online.
31 */
32cpumask_t cpu_online_map __read_mostly = CPU_MASK_ALL;
33EXPORT_SYMBOL(cpu_online_map);
34
35cpumask_t cpu_possible_map __read_mostly = CPU_MASK_ALL;
36EXPORT_SYMBOL(cpu_possible_map);
37
38#else /* CONFIG_SMP */
39
18/* Serializes the updates to cpu_online_map, cpu_present_map */ 40/* Serializes the updates to cpu_online_map, cpu_present_map */
19static DEFINE_MUTEX(cpu_add_remove_lock); 41static DEFINE_MUTEX(cpu_add_remove_lock);
20 42
@@ -403,3 +425,5 @@ out:
403 cpu_maps_update_done(); 425 cpu_maps_update_done();
404} 426}
405#endif /* CONFIG_PM_SLEEP_SMP */ 427#endif /* CONFIG_PM_SLEEP_SMP */
428
429#endif /* CONFIG_SMP */
diff --git a/kernel/cpuset.c b/kernel/cpuset.c
index 9fceb97e989c..64a05da9bc4c 100644
--- a/kernel/cpuset.c
+++ b/kernel/cpuset.c
@@ -1194,6 +1194,15 @@ static int cpuset_can_attach(struct cgroup_subsys *ss,
1194 1194
1195 if (cpus_empty(cs->cpus_allowed) || nodes_empty(cs->mems_allowed)) 1195 if (cpus_empty(cs->cpus_allowed) || nodes_empty(cs->mems_allowed))
1196 return -ENOSPC; 1196 return -ENOSPC;
1197 if (tsk->flags & PF_THREAD_BOUND) {
1198 cpumask_t mask;
1199
1200 mutex_lock(&callback_mutex);
1201 mask = cs->cpus_allowed;
1202 mutex_unlock(&callback_mutex);
1203 if (!cpus_equal(tsk->cpus_allowed, mask))
1204 return -EINVAL;
1205 }
1197 1206
1198 return security_task_setscheduler(tsk, 0, NULL); 1207 return security_task_setscheduler(tsk, 0, NULL);
1199} 1208}
@@ -1207,11 +1216,14 @@ static void cpuset_attach(struct cgroup_subsys *ss,
1207 struct mm_struct *mm; 1216 struct mm_struct *mm;
1208 struct cpuset *cs = cgroup_cs(cont); 1217 struct cpuset *cs = cgroup_cs(cont);
1209 struct cpuset *oldcs = cgroup_cs(oldcont); 1218 struct cpuset *oldcs = cgroup_cs(oldcont);
1219 int err;
1210 1220
1211 mutex_lock(&callback_mutex); 1221 mutex_lock(&callback_mutex);
1212 guarantee_online_cpus(cs, &cpus); 1222 guarantee_online_cpus(cs, &cpus);
1213 set_cpus_allowed_ptr(tsk, &cpus); 1223 err = set_cpus_allowed_ptr(tsk, &cpus);
1214 mutex_unlock(&callback_mutex); 1224 mutex_unlock(&callback_mutex);
1225 if (err)
1226 return;
1215 1227
1216 from = oldcs->mems_allowed; 1228 from = oldcs->mems_allowed;
1217 to = cs->mems_allowed; 1229 to = cs->mems_allowed;
diff --git a/kernel/kthread.c b/kernel/kthread.c
index bd1b9ea024e1..97747cdd37c9 100644
--- a/kernel/kthread.c
+++ b/kernel/kthread.c
@@ -180,6 +180,7 @@ void kthread_bind(struct task_struct *k, unsigned int cpu)
180 set_task_cpu(k, cpu); 180 set_task_cpu(k, cpu);
181 k->cpus_allowed = cpumask_of_cpu(cpu); 181 k->cpus_allowed = cpumask_of_cpu(cpu);
182 k->rt.nr_cpus_allowed = 1; 182 k->rt.nr_cpus_allowed = 1;
183 k->flags |= PF_THREAD_BOUND;
183} 184}
184EXPORT_SYMBOL(kthread_bind); 185EXPORT_SYMBOL(kthread_bind);
185 186
diff --git a/kernel/sched.c b/kernel/sched.c
index 94ead43eda62..d16c8d9fbd8b 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -74,6 +74,8 @@
74#include <asm/tlb.h> 74#include <asm/tlb.h>
75#include <asm/irq_regs.h> 75#include <asm/irq_regs.h>
76 76
77#include "sched_cpupri.h"
78
77/* 79/*
78 * Convert user-nice values [ -20 ... 0 ... 19 ] 80 * Convert user-nice values [ -20 ... 0 ... 19 ]
79 * to static priority [ MAX_RT_PRIO..MAX_PRIO-1 ], 81 * to static priority [ MAX_RT_PRIO..MAX_PRIO-1 ],
@@ -289,15 +291,15 @@ struct task_group root_task_group;
289static DEFINE_PER_CPU(struct sched_entity, init_sched_entity); 291static DEFINE_PER_CPU(struct sched_entity, init_sched_entity);
290/* Default task group's cfs_rq on each cpu */ 292/* Default task group's cfs_rq on each cpu */
291static DEFINE_PER_CPU(struct cfs_rq, init_cfs_rq) ____cacheline_aligned_in_smp; 293static DEFINE_PER_CPU(struct cfs_rq, init_cfs_rq) ____cacheline_aligned_in_smp;
292#endif 294#endif /* CONFIG_FAIR_GROUP_SCHED */
293 295
294#ifdef CONFIG_RT_GROUP_SCHED 296#ifdef CONFIG_RT_GROUP_SCHED
295static DEFINE_PER_CPU(struct sched_rt_entity, init_sched_rt_entity); 297static DEFINE_PER_CPU(struct sched_rt_entity, init_sched_rt_entity);
296static DEFINE_PER_CPU(struct rt_rq, init_rt_rq) ____cacheline_aligned_in_smp; 298static DEFINE_PER_CPU(struct rt_rq, init_rt_rq) ____cacheline_aligned_in_smp;
297#endif 299#endif /* CONFIG_RT_GROUP_SCHED */
298#else 300#else /* !CONFIG_FAIR_GROUP_SCHED */
299#define root_task_group init_task_group 301#define root_task_group init_task_group
300#endif 302#endif /* CONFIG_FAIR_GROUP_SCHED */
301 303
302/* task_group_lock serializes add/remove of task groups and also changes to 304/* task_group_lock serializes add/remove of task groups and also changes to
303 * a task group's cpu shares. 305 * a task group's cpu shares.
@@ -307,9 +309,9 @@ static DEFINE_SPINLOCK(task_group_lock);
307#ifdef CONFIG_FAIR_GROUP_SCHED 309#ifdef CONFIG_FAIR_GROUP_SCHED
308#ifdef CONFIG_USER_SCHED 310#ifdef CONFIG_USER_SCHED
309# define INIT_TASK_GROUP_LOAD (2*NICE_0_LOAD) 311# define INIT_TASK_GROUP_LOAD (2*NICE_0_LOAD)
310#else 312#else /* !CONFIG_USER_SCHED */
311# define INIT_TASK_GROUP_LOAD NICE_0_LOAD 313# define INIT_TASK_GROUP_LOAD NICE_0_LOAD
312#endif 314#endif /* CONFIG_USER_SCHED */
313 315
314/* 316/*
315 * A weight of 0 or 1 can cause arithmetics problems. 317 * A weight of 0 or 1 can cause arithmetics problems.
@@ -363,6 +365,10 @@ static inline void set_task_rq(struct task_struct *p, unsigned int cpu)
363#else 365#else
364 366
365static inline void set_task_rq(struct task_struct *p, unsigned int cpu) { } 367static inline void set_task_rq(struct task_struct *p, unsigned int cpu) { }
368static inline struct task_group *task_group(struct task_struct *p)
369{
370 return NULL;
371}
366 372
367#endif /* CONFIG_GROUP_SCHED */ 373#endif /* CONFIG_GROUP_SCHED */
368 374
@@ -373,6 +379,7 @@ struct cfs_rq {
373 379
374 u64 exec_clock; 380 u64 exec_clock;
375 u64 min_vruntime; 381 u64 min_vruntime;
382 u64 pair_start;
376 383
377 struct rb_root tasks_timeline; 384 struct rb_root tasks_timeline;
378 struct rb_node *rb_leftmost; 385 struct rb_node *rb_leftmost;
@@ -401,6 +408,31 @@ struct cfs_rq {
401 */ 408 */
402 struct list_head leaf_cfs_rq_list; 409 struct list_head leaf_cfs_rq_list;
403 struct task_group *tg; /* group that "owns" this runqueue */ 410 struct task_group *tg; /* group that "owns" this runqueue */
411
412#ifdef CONFIG_SMP
413 /*
414 * the part of load.weight contributed by tasks
415 */
416 unsigned long task_weight;
417
418 /*
419 * h_load = weight * f(tg)
420 *
421 * Where f(tg) is the recursive weight fraction assigned to
422 * this group.
423 */
424 unsigned long h_load;
425
426 /*
427 * this cpu's part of tg->shares
428 */
429 unsigned long shares;
430
431 /*
432 * load.weight at the time we set shares
433 */
434 unsigned long rq_weight;
435#endif
404#endif 436#endif
405}; 437};
406 438
@@ -452,6 +484,9 @@ struct root_domain {
452 */ 484 */
453 cpumask_t rto_mask; 485 cpumask_t rto_mask;
454 atomic_t rto_count; 486 atomic_t rto_count;
487#ifdef CONFIG_SMP
488 struct cpupri cpupri;
489#endif
455}; 490};
456 491
457/* 492/*
@@ -526,6 +561,9 @@ struct rq {
526 int push_cpu; 561 int push_cpu;
527 /* cpu of this runqueue: */ 562 /* cpu of this runqueue: */
528 int cpu; 563 int cpu;
564 int online;
565
566 unsigned long avg_load_per_task;
529 567
530 struct task_struct *migration_thread; 568 struct task_struct *migration_thread;
531 struct list_head migration_queue; 569 struct list_head migration_queue;
@@ -749,6 +787,12 @@ late_initcall(sched_init_debug);
749const_debug unsigned int sysctl_sched_nr_migrate = 32; 787const_debug unsigned int sysctl_sched_nr_migrate = 32;
750 788
751/* 789/*
790 * ratelimit for updating the group shares.
791 * default: 0.5ms
792 */
793const_debug unsigned int sysctl_sched_shares_ratelimit = 500000;
794
795/*
752 * period over which we measure -rt task cpu usage in us. 796 * period over which we measure -rt task cpu usage in us.
753 * default: 1s 797 * default: 1s
754 */ 798 */
@@ -775,82 +819,6 @@ static inline u64 global_rt_runtime(void)
775 return (u64)sysctl_sched_rt_runtime * NSEC_PER_USEC; 819 return (u64)sysctl_sched_rt_runtime * NSEC_PER_USEC;
776} 820}
777 821
778unsigned long long time_sync_thresh = 100000;
779
780static DEFINE_PER_CPU(unsigned long long, time_offset);
781static DEFINE_PER_CPU(unsigned long long, prev_cpu_time);
782
783/*
784 * Global lock which we take every now and then to synchronize
785 * the CPUs time. This method is not warp-safe, but it's good
786 * enough to synchronize slowly diverging time sources and thus
787 * it's good enough for tracing:
788 */
789static DEFINE_SPINLOCK(time_sync_lock);
790static unsigned long long prev_global_time;
791
792static unsigned long long __sync_cpu_clock(unsigned long long time, int cpu)
793{
794 /*
795 * We want this inlined, to not get tracer function calls
796 * in this critical section:
797 */
798 spin_acquire(&time_sync_lock.dep_map, 0, 0, _THIS_IP_);
799 __raw_spin_lock(&time_sync_lock.raw_lock);
800
801 if (time < prev_global_time) {
802 per_cpu(time_offset, cpu) += prev_global_time - time;
803 time = prev_global_time;
804 } else {
805 prev_global_time = time;
806 }
807
808 __raw_spin_unlock(&time_sync_lock.raw_lock);
809 spin_release(&time_sync_lock.dep_map, 1, _THIS_IP_);
810
811 return time;
812}
813
814static unsigned long long __cpu_clock(int cpu)
815{
816 unsigned long long now;
817
818 /*
819 * Only call sched_clock() if the scheduler has already been
820 * initialized (some code might call cpu_clock() very early):
821 */
822 if (unlikely(!scheduler_running))
823 return 0;
824
825 now = sched_clock_cpu(cpu);
826
827 return now;
828}
829
830/*
831 * For kernel-internal use: high-speed (but slightly incorrect) per-cpu
832 * clock constructed from sched_clock():
833 */
834unsigned long long cpu_clock(int cpu)
835{
836 unsigned long long prev_cpu_time, time, delta_time;
837 unsigned long flags;
838
839 local_irq_save(flags);
840 prev_cpu_time = per_cpu(prev_cpu_time, cpu);
841 time = __cpu_clock(cpu) + per_cpu(time_offset, cpu);
842 delta_time = time-prev_cpu_time;
843
844 if (unlikely(delta_time > time_sync_thresh)) {
845 time = __sync_cpu_clock(time, cpu);
846 per_cpu(prev_cpu_time, cpu) = time;
847 }
848 local_irq_restore(flags);
849
850 return time;
851}
852EXPORT_SYMBOL_GPL(cpu_clock);
853
854#ifndef prepare_arch_switch 822#ifndef prepare_arch_switch
855# define prepare_arch_switch(next) do { } while (0) 823# define prepare_arch_switch(next) do { } while (0)
856#endif 824#endif
@@ -1313,15 +1281,15 @@ void wake_up_idle_cpu(int cpu)
1313 if (!tsk_is_polling(rq->idle)) 1281 if (!tsk_is_polling(rq->idle))
1314 smp_send_reschedule(cpu); 1282 smp_send_reschedule(cpu);
1315} 1283}
1316#endif 1284#endif /* CONFIG_NO_HZ */
1317 1285
1318#else 1286#else /* !CONFIG_SMP */
1319static void __resched_task(struct task_struct *p, int tif_bit) 1287static void __resched_task(struct task_struct *p, int tif_bit)
1320{ 1288{
1321 assert_spin_locked(&task_rq(p)->lock); 1289 assert_spin_locked(&task_rq(p)->lock);
1322 set_tsk_thread_flag(p, tif_bit); 1290 set_tsk_thread_flag(p, tif_bit);
1323} 1291}
1324#endif 1292#endif /* CONFIG_SMP */
1325 1293
1326#if BITS_PER_LONG == 32 1294#if BITS_PER_LONG == 32
1327# define WMULT_CONST (~0UL) 1295# define WMULT_CONST (~0UL)
@@ -1336,6 +1304,9 @@ static void __resched_task(struct task_struct *p, int tif_bit)
1336 */ 1304 */
1337#define SRR(x, y) (((x) + (1UL << ((y) - 1))) >> (y)) 1305#define SRR(x, y) (((x) + (1UL << ((y) - 1))) >> (y))
1338 1306
1307/*
1308 * delta *= weight / lw
1309 */
1339static unsigned long 1310static unsigned long
1340calc_delta_mine(unsigned long delta_exec, unsigned long weight, 1311calc_delta_mine(unsigned long delta_exec, unsigned long weight,
1341 struct load_weight *lw) 1312 struct load_weight *lw)
@@ -1363,12 +1334,6 @@ calc_delta_mine(unsigned long delta_exec, unsigned long weight,
1363 return (unsigned long)min(tmp, (u64)(unsigned long)LONG_MAX); 1334 return (unsigned long)min(tmp, (u64)(unsigned long)LONG_MAX);
1364} 1335}
1365 1336
1366static inline unsigned long
1367calc_delta_fair(unsigned long delta_exec, struct load_weight *lw)
1368{
1369 return calc_delta_mine(delta_exec, NICE_0_LOAD, lw);
1370}
1371
1372static inline void update_load_add(struct load_weight *lw, unsigned long inc) 1337static inline void update_load_add(struct load_weight *lw, unsigned long inc)
1373{ 1338{
1374 lw->weight += inc; 1339 lw->weight += inc;
@@ -1479,17 +1444,211 @@ static inline void dec_cpu_load(struct rq *rq, unsigned long load)
1479#ifdef CONFIG_SMP 1444#ifdef CONFIG_SMP
1480static unsigned long source_load(int cpu, int type); 1445static unsigned long source_load(int cpu, int type);
1481static unsigned long target_load(int cpu, int type); 1446static unsigned long target_load(int cpu, int type);
1482static unsigned long cpu_avg_load_per_task(int cpu);
1483static int task_hot(struct task_struct *p, u64 now, struct sched_domain *sd); 1447static int task_hot(struct task_struct *p, u64 now, struct sched_domain *sd);
1484#else /* CONFIG_SMP */ 1448
1449static unsigned long cpu_avg_load_per_task(int cpu)
1450{
1451 struct rq *rq = cpu_rq(cpu);
1452
1453 if (rq->nr_running)
1454 rq->avg_load_per_task = rq->load.weight / rq->nr_running;
1455
1456 return rq->avg_load_per_task;
1457}
1485 1458
1486#ifdef CONFIG_FAIR_GROUP_SCHED 1459#ifdef CONFIG_FAIR_GROUP_SCHED
1487static void cfs_rq_set_shares(struct cfs_rq *cfs_rq, unsigned long shares) 1460
1461typedef void (*tg_visitor)(struct task_group *, int, struct sched_domain *);
1462
1463/*
1464 * Iterate the full tree, calling @down when first entering a node and @up when
1465 * leaving it for the final time.
1466 */
1467static void
1468walk_tg_tree(tg_visitor down, tg_visitor up, int cpu, struct sched_domain *sd)
1488{ 1469{
1470 struct task_group *parent, *child;
1471
1472 rcu_read_lock();
1473 parent = &root_task_group;
1474down:
1475 (*down)(parent, cpu, sd);
1476 list_for_each_entry_rcu(child, &parent->children, siblings) {
1477 parent = child;
1478 goto down;
1479
1480up:
1481 continue;
1482 }
1483 (*up)(parent, cpu, sd);
1484
1485 child = parent;
1486 parent = parent->parent;
1487 if (parent)
1488 goto up;
1489 rcu_read_unlock();
1490}
1491
1492static void __set_se_shares(struct sched_entity *se, unsigned long shares);
1493
1494/*
1495 * Calculate and set the cpu's group shares.
1496 */
1497static void
1498__update_group_shares_cpu(struct task_group *tg, int cpu,
1499 unsigned long sd_shares, unsigned long sd_rq_weight)
1500{
1501 int boost = 0;
1502 unsigned long shares;
1503 unsigned long rq_weight;
1504
1505 if (!tg->se[cpu])
1506 return;
1507
1508 rq_weight = tg->cfs_rq[cpu]->load.weight;
1509
1510 /*
1511 * If there are currently no tasks on the cpu pretend there is one of
1512 * average load so that when a new task gets to run here it will not
1513 * get delayed by group starvation.
1514 */
1515 if (!rq_weight) {
1516 boost = 1;
1517 rq_weight = NICE_0_LOAD;
1518 }
1519
1520 if (unlikely(rq_weight > sd_rq_weight))
1521 rq_weight = sd_rq_weight;
1522
1523 /*
1524 * \Sum shares * rq_weight
1525 * shares = -----------------------
1526 * \Sum rq_weight
1527 *
1528 */
1529 shares = (sd_shares * rq_weight) / (sd_rq_weight + 1);
1530
1531 /*
1532 * record the actual number of shares, not the boosted amount.
1533 */
1534 tg->cfs_rq[cpu]->shares = boost ? 0 : shares;
1535 tg->cfs_rq[cpu]->rq_weight = rq_weight;
1536
1537 if (shares < MIN_SHARES)
1538 shares = MIN_SHARES;
1539 else if (shares > MAX_SHARES)
1540 shares = MAX_SHARES;
1541
1542 __set_se_shares(tg->se[cpu], shares);
1543}
1544
1545/*
1546 * Re-compute the task group their per cpu shares over the given domain.
1547 * This needs to be done in a bottom-up fashion because the rq weight of a
1548 * parent group depends on the shares of its child groups.
1549 */
1550static void
1551tg_shares_up(struct task_group *tg, int cpu, struct sched_domain *sd)
1552{
1553 unsigned long rq_weight = 0;
1554 unsigned long shares = 0;
1555 int i;
1556
1557 for_each_cpu_mask(i, sd->span) {
1558 rq_weight += tg->cfs_rq[i]->load.weight;
1559 shares += tg->cfs_rq[i]->shares;
1560 }
1561
1562 if ((!shares && rq_weight) || shares > tg->shares)
1563 shares = tg->shares;
1564
1565 if (!sd->parent || !(sd->parent->flags & SD_LOAD_BALANCE))
1566 shares = tg->shares;
1567
1568 if (!rq_weight)
1569 rq_weight = cpus_weight(sd->span) * NICE_0_LOAD;
1570
1571 for_each_cpu_mask(i, sd->span) {
1572 struct rq *rq = cpu_rq(i);
1573 unsigned long flags;
1574
1575 spin_lock_irqsave(&rq->lock, flags);
1576 __update_group_shares_cpu(tg, i, shares, rq_weight);
1577 spin_unlock_irqrestore(&rq->lock, flags);
1578 }
1579}
1580
1581/*
1582 * Compute the cpu's hierarchical load factor for each task group.
1583 * This needs to be done in a top-down fashion because the load of a child
1584 * group is a fraction of its parents load.
1585 */
1586static void
1587tg_load_down(struct task_group *tg, int cpu, struct sched_domain *sd)
1588{
1589 unsigned long load;
1590
1591 if (!tg->parent) {
1592 load = cpu_rq(cpu)->load.weight;
1593 } else {
1594 load = tg->parent->cfs_rq[cpu]->h_load;
1595 load *= tg->cfs_rq[cpu]->shares;
1596 load /= tg->parent->cfs_rq[cpu]->load.weight + 1;
1597 }
1598
1599 tg->cfs_rq[cpu]->h_load = load;
1600}
1601
1602static void
1603tg_nop(struct task_group *tg, int cpu, struct sched_domain *sd)
1604{
1605}
1606
1607static void update_shares(struct sched_domain *sd)
1608{
1609 u64 now = cpu_clock(raw_smp_processor_id());
1610 s64 elapsed = now - sd->last_update;
1611
1612 if (elapsed >= (s64)(u64)sysctl_sched_shares_ratelimit) {
1613 sd->last_update = now;
1614 walk_tg_tree(tg_nop, tg_shares_up, 0, sd);
1615 }
1489} 1616}
1617
1618static void update_shares_locked(struct rq *rq, struct sched_domain *sd)
1619{
1620 spin_unlock(&rq->lock);
1621 update_shares(sd);
1622 spin_lock(&rq->lock);
1623}
1624
1625static void update_h_load(int cpu)
1626{
1627 walk_tg_tree(tg_load_down, tg_nop, cpu, NULL);
1628}
1629
1630#else
1631
1632static inline void update_shares(struct sched_domain *sd)
1633{
1634}
1635
1636static inline void update_shares_locked(struct rq *rq, struct sched_domain *sd)
1637{
1638}
1639
1490#endif 1640#endif
1491 1641
1492#endif /* CONFIG_SMP */ 1642#endif
1643
1644#ifdef CONFIG_FAIR_GROUP_SCHED
1645static void cfs_rq_set_shares(struct cfs_rq *cfs_rq, unsigned long shares)
1646{
1647#ifdef CONFIG_SMP
1648 cfs_rq->shares = shares;
1649#endif
1650}
1651#endif
1493 1652
1494#include "sched_stats.h" 1653#include "sched_stats.h"
1495#include "sched_idletask.c" 1654#include "sched_idletask.c"
@@ -1500,27 +1659,17 @@ static void cfs_rq_set_shares(struct cfs_rq *cfs_rq, unsigned long shares)
1500#endif 1659#endif
1501 1660
1502#define sched_class_highest (&rt_sched_class) 1661#define sched_class_highest (&rt_sched_class)
1662#define for_each_class(class) \
1663 for (class = sched_class_highest; class; class = class->next)
1503 1664
1504static inline void inc_load(struct rq *rq, const struct task_struct *p) 1665static void inc_nr_running(struct rq *rq)
1505{
1506 update_load_add(&rq->load, p->se.load.weight);
1507}
1508
1509static inline void dec_load(struct rq *rq, const struct task_struct *p)
1510{
1511 update_load_sub(&rq->load, p->se.load.weight);
1512}
1513
1514static void inc_nr_running(struct task_struct *p, struct rq *rq)
1515{ 1666{
1516 rq->nr_running++; 1667 rq->nr_running++;
1517 inc_load(rq, p);
1518} 1668}
1519 1669
1520static void dec_nr_running(struct task_struct *p, struct rq *rq) 1670static void dec_nr_running(struct rq *rq)
1521{ 1671{
1522 rq->nr_running--; 1672 rq->nr_running--;
1523 dec_load(rq, p);
1524} 1673}
1525 1674
1526static void set_load_weight(struct task_struct *p) 1675static void set_load_weight(struct task_struct *p)
@@ -1544,6 +1693,12 @@ static void set_load_weight(struct task_struct *p)
1544 p->se.load.inv_weight = prio_to_wmult[p->static_prio - MAX_RT_PRIO]; 1693 p->se.load.inv_weight = prio_to_wmult[p->static_prio - MAX_RT_PRIO];
1545} 1694}
1546 1695
1696static void update_avg(u64 *avg, u64 sample)
1697{
1698 s64 diff = sample - *avg;
1699 *avg += diff >> 3;
1700}
1701
1547static void enqueue_task(struct rq *rq, struct task_struct *p, int wakeup) 1702static void enqueue_task(struct rq *rq, struct task_struct *p, int wakeup)
1548{ 1703{
1549 sched_info_queued(p); 1704 sched_info_queued(p);
@@ -1553,6 +1708,13 @@ static void enqueue_task(struct rq *rq, struct task_struct *p, int wakeup)
1553 1708
1554static void dequeue_task(struct rq *rq, struct task_struct *p, int sleep) 1709static void dequeue_task(struct rq *rq, struct task_struct *p, int sleep)
1555{ 1710{
1711 if (sleep && p->se.last_wakeup) {
1712 update_avg(&p->se.avg_overlap,
1713 p->se.sum_exec_runtime - p->se.last_wakeup);
1714 p->se.last_wakeup = 0;
1715 }
1716
1717 sched_info_dequeued(p);
1556 p->sched_class->dequeue_task(rq, p, sleep); 1718 p->sched_class->dequeue_task(rq, p, sleep);
1557 p->se.on_rq = 0; 1719 p->se.on_rq = 0;
1558} 1720}
@@ -1612,7 +1774,7 @@ static void activate_task(struct rq *rq, struct task_struct *p, int wakeup)
1612 rq->nr_uninterruptible--; 1774 rq->nr_uninterruptible--;
1613 1775
1614 enqueue_task(rq, p, wakeup); 1776 enqueue_task(rq, p, wakeup);
1615 inc_nr_running(p, rq); 1777 inc_nr_running(rq);
1616} 1778}
1617 1779
1618/* 1780/*
@@ -1624,7 +1786,7 @@ static void deactivate_task(struct rq *rq, struct task_struct *p, int sleep)
1624 rq->nr_uninterruptible++; 1786 rq->nr_uninterruptible++;
1625 1787
1626 dequeue_task(rq, p, sleep); 1788 dequeue_task(rq, p, sleep);
1627 dec_nr_running(p, rq); 1789 dec_nr_running(rq);
1628} 1790}
1629 1791
1630/** 1792/**
@@ -1636,12 +1798,6 @@ inline int task_curr(const struct task_struct *p)
1636 return cpu_curr(task_cpu(p)) == p; 1798 return cpu_curr(task_cpu(p)) == p;
1637} 1799}
1638 1800
1639/* Used instead of source_load when we know the type == 0 */
1640unsigned long weighted_cpuload(const int cpu)
1641{
1642 return cpu_rq(cpu)->load.weight;
1643}
1644
1645static inline void __set_task_cpu(struct task_struct *p, unsigned int cpu) 1801static inline void __set_task_cpu(struct task_struct *p, unsigned int cpu)
1646{ 1802{
1647 set_task_rq(p, cpu); 1803 set_task_rq(p, cpu);
@@ -1670,6 +1826,12 @@ static inline void check_class_changed(struct rq *rq, struct task_struct *p,
1670 1826
1671#ifdef CONFIG_SMP 1827#ifdef CONFIG_SMP
1672 1828
1829/* Used instead of source_load when we know the type == 0 */
1830static unsigned long weighted_cpuload(const int cpu)
1831{
1832 return cpu_rq(cpu)->load.weight;
1833}
1834
1673/* 1835/*
1674 * Is this task likely cache-hot: 1836 * Is this task likely cache-hot:
1675 */ 1837 */
@@ -1880,7 +2042,7 @@ static unsigned long source_load(int cpu, int type)
1880 struct rq *rq = cpu_rq(cpu); 2042 struct rq *rq = cpu_rq(cpu);
1881 unsigned long total = weighted_cpuload(cpu); 2043 unsigned long total = weighted_cpuload(cpu);
1882 2044
1883 if (type == 0) 2045 if (type == 0 || !sched_feat(LB_BIAS))
1884 return total; 2046 return total;
1885 2047
1886 return min(rq->cpu_load[type-1], total); 2048 return min(rq->cpu_load[type-1], total);
@@ -1895,25 +2057,13 @@ static unsigned long target_load(int cpu, int type)
1895 struct rq *rq = cpu_rq(cpu); 2057 struct rq *rq = cpu_rq(cpu);
1896 unsigned long total = weighted_cpuload(cpu); 2058 unsigned long total = weighted_cpuload(cpu);
1897 2059
1898 if (type == 0) 2060 if (type == 0 || !sched_feat(LB_BIAS))
1899 return total; 2061 return total;
1900 2062
1901 return max(rq->cpu_load[type-1], total); 2063 return max(rq->cpu_load[type-1], total);
1902} 2064}
1903 2065
1904/* 2066/*
1905 * Return the average load per task on the cpu's run queue
1906 */
1907static unsigned long cpu_avg_load_per_task(int cpu)
1908{
1909 struct rq *rq = cpu_rq(cpu);
1910 unsigned long total = weighted_cpuload(cpu);
1911 unsigned long n = rq->nr_running;
1912
1913 return n ? total / n : SCHED_LOAD_SCALE;
1914}
1915
1916/*
1917 * find_idlest_group finds and returns the least busy CPU group within the 2067 * find_idlest_group finds and returns the least busy CPU group within the
1918 * domain. 2068 * domain.
1919 */ 2069 */
@@ -2019,6 +2169,9 @@ static int sched_balance_self(int cpu, int flag)
2019 sd = tmp; 2169 sd = tmp;
2020 } 2170 }
2021 2171
2172 if (sd)
2173 update_shares(sd);
2174
2022 while (sd) { 2175 while (sd) {
2023 cpumask_t span, tmpmask; 2176 cpumask_t span, tmpmask;
2024 struct sched_group *group; 2177 struct sched_group *group;
@@ -2085,6 +2238,22 @@ static int try_to_wake_up(struct task_struct *p, unsigned int state, int sync)
2085 if (!sched_feat(SYNC_WAKEUPS)) 2238 if (!sched_feat(SYNC_WAKEUPS))
2086 sync = 0; 2239 sync = 0;
2087 2240
2241#ifdef CONFIG_SMP
2242 if (sched_feat(LB_WAKEUP_UPDATE)) {
2243 struct sched_domain *sd;
2244
2245 this_cpu = raw_smp_processor_id();
2246 cpu = task_cpu(p);
2247
2248 for_each_domain(this_cpu, sd) {
2249 if (cpu_isset(cpu, sd->span)) {
2250 update_shares(sd);
2251 break;
2252 }
2253 }
2254 }
2255#endif
2256
2088 smp_wmb(); 2257 smp_wmb();
2089 rq = task_rq_lock(p, &flags); 2258 rq = task_rq_lock(p, &flags);
2090 old_state = p->state; 2259 old_state = p->state;
@@ -2131,7 +2300,7 @@ static int try_to_wake_up(struct task_struct *p, unsigned int state, int sync)
2131 } 2300 }
2132 } 2301 }
2133 } 2302 }
2134#endif 2303#endif /* CONFIG_SCHEDSTATS */
2135 2304
2136out_activate: 2305out_activate:
2137#endif /* CONFIG_SMP */ 2306#endif /* CONFIG_SMP */
@@ -2157,6 +2326,8 @@ out_running:
2157 p->sched_class->task_wake_up(rq, p); 2326 p->sched_class->task_wake_up(rq, p);
2158#endif 2327#endif
2159out: 2328out:
2329 current->se.last_wakeup = current->se.sum_exec_runtime;
2330
2160 task_rq_unlock(rq, &flags); 2331 task_rq_unlock(rq, &flags);
2161 2332
2162 return success; 2333 return success;
@@ -2277,7 +2448,7 @@ void wake_up_new_task(struct task_struct *p, unsigned long clone_flags)
2277 * management (if any): 2448 * management (if any):
2278 */ 2449 */
2279 p->sched_class->task_new(rq, p); 2450 p->sched_class->task_new(rq, p);
2280 inc_nr_running(p, rq); 2451 inc_nr_running(rq);
2281 } 2452 }
2282 check_preempt_curr(rq, p); 2453 check_preempt_curr(rq, p);
2283#ifdef CONFIG_SMP 2454#ifdef CONFIG_SMP
@@ -2331,7 +2502,7 @@ fire_sched_out_preempt_notifiers(struct task_struct *curr,
2331 notifier->ops->sched_out(notifier, next); 2502 notifier->ops->sched_out(notifier, next);
2332} 2503}
2333 2504
2334#else 2505#else /* !CONFIG_PREEMPT_NOTIFIERS */
2335 2506
2336static void fire_sched_in_preempt_notifiers(struct task_struct *curr) 2507static void fire_sched_in_preempt_notifiers(struct task_struct *curr)
2337{ 2508{
@@ -2343,7 +2514,7 @@ fire_sched_out_preempt_notifiers(struct task_struct *curr,
2343{ 2514{
2344} 2515}
2345 2516
2346#endif 2517#endif /* CONFIG_PREEMPT_NOTIFIERS */
2347 2518
2348/** 2519/**
2349 * prepare_task_switch - prepare to switch tasks 2520 * prepare_task_switch - prepare to switch tasks
@@ -2785,7 +2956,7 @@ balance_tasks(struct rq *this_rq, int this_cpu, struct rq *busiest,
2785 enum cpu_idle_type idle, int *all_pinned, 2956 enum cpu_idle_type idle, int *all_pinned,
2786 int *this_best_prio, struct rq_iterator *iterator) 2957 int *this_best_prio, struct rq_iterator *iterator)
2787{ 2958{
2788 int loops = 0, pulled = 0, pinned = 0, skip_for_load; 2959 int loops = 0, pulled = 0, pinned = 0;
2789 struct task_struct *p; 2960 struct task_struct *p;
2790 long rem_load_move = max_load_move; 2961 long rem_load_move = max_load_move;
2791 2962
@@ -2801,14 +2972,8 @@ balance_tasks(struct rq *this_rq, int this_cpu, struct rq *busiest,
2801next: 2972next:
2802 if (!p || loops++ > sysctl_sched_nr_migrate) 2973 if (!p || loops++ > sysctl_sched_nr_migrate)
2803 goto out; 2974 goto out;
2804 /* 2975
2805 * To help distribute high priority tasks across CPUs we don't 2976 if ((p->se.load.weight >> 1) > rem_load_move ||
2806 * skip a task if it will be the highest priority task (i.e. smallest
2807 * prio value) on its new queue regardless of its load weight
2808 */
2809 skip_for_load = (p->se.load.weight >> 1) > rem_load_move +
2810 SCHED_LOAD_SCALE_FUZZ;
2811 if ((skip_for_load && p->prio >= *this_best_prio) ||
2812 !can_migrate_task(p, busiest, this_cpu, sd, idle, &pinned)) { 2977 !can_migrate_task(p, busiest, this_cpu, sd, idle, &pinned)) {
2813 p = iterator->next(iterator->arg); 2978 p = iterator->next(iterator->arg);
2814 goto next; 2979 goto next;
@@ -2863,6 +3028,10 @@ static int move_tasks(struct rq *this_rq, int this_cpu, struct rq *busiest,
2863 max_load_move - total_load_moved, 3028 max_load_move - total_load_moved,
2864 sd, idle, all_pinned, &this_best_prio); 3029 sd, idle, all_pinned, &this_best_prio);
2865 class = class->next; 3030 class = class->next;
3031
3032 if (idle == CPU_NEWLY_IDLE && this_rq->nr_running)
3033 break;
3034
2866 } while (class && max_load_move > total_load_moved); 3035 } while (class && max_load_move > total_load_moved);
2867 3036
2868 return total_load_moved > 0; 3037 return total_load_moved > 0;
@@ -2939,6 +3108,7 @@ find_busiest_group(struct sched_domain *sd, int this_cpu,
2939 max_load = this_load = total_load = total_pwr = 0; 3108 max_load = this_load = total_load = total_pwr = 0;
2940 busiest_load_per_task = busiest_nr_running = 0; 3109 busiest_load_per_task = busiest_nr_running = 0;
2941 this_load_per_task = this_nr_running = 0; 3110 this_load_per_task = this_nr_running = 0;
3111
2942 if (idle == CPU_NOT_IDLE) 3112 if (idle == CPU_NOT_IDLE)
2943 load_idx = sd->busy_idx; 3113 load_idx = sd->busy_idx;
2944 else if (idle == CPU_NEWLY_IDLE) 3114 else if (idle == CPU_NEWLY_IDLE)
@@ -2953,6 +3123,8 @@ find_busiest_group(struct sched_domain *sd, int this_cpu,
2953 int __group_imb = 0; 3123 int __group_imb = 0;
2954 unsigned int balance_cpu = -1, first_idle_cpu = 0; 3124 unsigned int balance_cpu = -1, first_idle_cpu = 0;
2955 unsigned long sum_nr_running, sum_weighted_load; 3125 unsigned long sum_nr_running, sum_weighted_load;
3126 unsigned long sum_avg_load_per_task;
3127 unsigned long avg_load_per_task;
2956 3128
2957 local_group = cpu_isset(this_cpu, group->cpumask); 3129 local_group = cpu_isset(this_cpu, group->cpumask);
2958 3130
@@ -2961,6 +3133,8 @@ find_busiest_group(struct sched_domain *sd, int this_cpu,
2961 3133
2962 /* Tally up the load of all CPUs in the group */ 3134 /* Tally up the load of all CPUs in the group */
2963 sum_weighted_load = sum_nr_running = avg_load = 0; 3135 sum_weighted_load = sum_nr_running = avg_load = 0;
3136 sum_avg_load_per_task = avg_load_per_task = 0;
3137
2964 max_cpu_load = 0; 3138 max_cpu_load = 0;
2965 min_cpu_load = ~0UL; 3139 min_cpu_load = ~0UL;
2966 3140
@@ -2994,6 +3168,8 @@ find_busiest_group(struct sched_domain *sd, int this_cpu,
2994 avg_load += load; 3168 avg_load += load;
2995 sum_nr_running += rq->nr_running; 3169 sum_nr_running += rq->nr_running;
2996 sum_weighted_load += weighted_cpuload(i); 3170 sum_weighted_load += weighted_cpuload(i);
3171
3172 sum_avg_load_per_task += cpu_avg_load_per_task(i);
2997 } 3173 }
2998 3174
2999 /* 3175 /*
@@ -3015,7 +3191,20 @@ find_busiest_group(struct sched_domain *sd, int this_cpu,
3015 avg_load = sg_div_cpu_power(group, 3191 avg_load = sg_div_cpu_power(group,
3016 avg_load * SCHED_LOAD_SCALE); 3192 avg_load * SCHED_LOAD_SCALE);
3017 3193
3018 if ((max_cpu_load - min_cpu_load) > SCHED_LOAD_SCALE) 3194
3195 /*
3196 * Consider the group unbalanced when the imbalance is larger
3197 * than the average weight of two tasks.
3198 *
3199 * APZ: with cgroup the avg task weight can vary wildly and
3200 * might not be a suitable number - should we keep a
3201 * normalized nr_running number somewhere that negates
3202 * the hierarchy?
3203 */
3204 avg_load_per_task = sg_div_cpu_power(group,
3205 sum_avg_load_per_task * SCHED_LOAD_SCALE);
3206
3207 if ((max_cpu_load - min_cpu_load) > 2*avg_load_per_task)
3019 __group_imb = 1; 3208 __group_imb = 1;
3020 3209
3021 group_capacity = group->__cpu_power / SCHED_LOAD_SCALE; 3210 group_capacity = group->__cpu_power / SCHED_LOAD_SCALE;
@@ -3156,9 +3345,9 @@ small_imbalance:
3156 if (busiest_load_per_task > this_load_per_task) 3345 if (busiest_load_per_task > this_load_per_task)
3157 imbn = 1; 3346 imbn = 1;
3158 } else 3347 } else
3159 this_load_per_task = SCHED_LOAD_SCALE; 3348 this_load_per_task = cpu_avg_load_per_task(this_cpu);
3160 3349
3161 if (max_load - this_load + SCHED_LOAD_SCALE_FUZZ >= 3350 if (max_load - this_load + 2*busiest_load_per_task >=
3162 busiest_load_per_task * imbn) { 3351 busiest_load_per_task * imbn) {
3163 *imbalance = busiest_load_per_task; 3352 *imbalance = busiest_load_per_task;
3164 return busiest; 3353 return busiest;
@@ -3284,6 +3473,7 @@ static int load_balance(int this_cpu, struct rq *this_rq,
3284 schedstat_inc(sd, lb_count[idle]); 3473 schedstat_inc(sd, lb_count[idle]);
3285 3474
3286redo: 3475redo:
3476 update_shares(sd);
3287 group = find_busiest_group(sd, this_cpu, &imbalance, idle, &sd_idle, 3477 group = find_busiest_group(sd, this_cpu, &imbalance, idle, &sd_idle,
3288 cpus, balance); 3478 cpus, balance);
3289 3479
@@ -3386,8 +3576,9 @@ redo:
3386 3576
3387 if (!ld_moved && !sd_idle && sd->flags & SD_SHARE_CPUPOWER && 3577 if (!ld_moved && !sd_idle && sd->flags & SD_SHARE_CPUPOWER &&
3388 !test_sd_parent(sd, SD_POWERSAVINGS_BALANCE)) 3578 !test_sd_parent(sd, SD_POWERSAVINGS_BALANCE))
3389 return -1; 3579 ld_moved = -1;
3390 return ld_moved; 3580
3581 goto out;
3391 3582
3392out_balanced: 3583out_balanced:
3393 schedstat_inc(sd, lb_balanced[idle]); 3584 schedstat_inc(sd, lb_balanced[idle]);
@@ -3402,8 +3593,13 @@ out_one_pinned:
3402 3593
3403 if (!sd_idle && sd->flags & SD_SHARE_CPUPOWER && 3594 if (!sd_idle && sd->flags & SD_SHARE_CPUPOWER &&
3404 !test_sd_parent(sd, SD_POWERSAVINGS_BALANCE)) 3595 !test_sd_parent(sd, SD_POWERSAVINGS_BALANCE))
3405 return -1; 3596 ld_moved = -1;
3406 return 0; 3597 else
3598 ld_moved = 0;
3599out:
3600 if (ld_moved)
3601 update_shares(sd);
3602 return ld_moved;
3407} 3603}
3408 3604
3409/* 3605/*
@@ -3438,6 +3634,7 @@ load_balance_newidle(int this_cpu, struct rq *this_rq, struct sched_domain *sd,
3438 3634
3439 schedstat_inc(sd, lb_count[CPU_NEWLY_IDLE]); 3635 schedstat_inc(sd, lb_count[CPU_NEWLY_IDLE]);
3440redo: 3636redo:
3637 update_shares_locked(this_rq, sd);
3441 group = find_busiest_group(sd, this_cpu, &imbalance, CPU_NEWLY_IDLE, 3638 group = find_busiest_group(sd, this_cpu, &imbalance, CPU_NEWLY_IDLE,
3442 &sd_idle, cpus, NULL); 3639 &sd_idle, cpus, NULL);
3443 if (!group) { 3640 if (!group) {
@@ -3481,6 +3678,7 @@ redo:
3481 } else 3678 } else
3482 sd->nr_balance_failed = 0; 3679 sd->nr_balance_failed = 0;
3483 3680
3681 update_shares_locked(this_rq, sd);
3484 return ld_moved; 3682 return ld_moved;
3485 3683
3486out_balanced: 3684out_balanced:
@@ -3672,6 +3870,7 @@ static void rebalance_domains(int cpu, enum cpu_idle_type idle)
3672 /* Earliest time when we have to do rebalance again */ 3870 /* Earliest time when we have to do rebalance again */
3673 unsigned long next_balance = jiffies + 60*HZ; 3871 unsigned long next_balance = jiffies + 60*HZ;
3674 int update_next_balance = 0; 3872 int update_next_balance = 0;
3873 int need_serialize;
3675 cpumask_t tmp; 3874 cpumask_t tmp;
3676 3875
3677 for_each_domain(cpu, sd) { 3876 for_each_domain(cpu, sd) {
@@ -3689,8 +3888,9 @@ static void rebalance_domains(int cpu, enum cpu_idle_type idle)
3689 if (interval > HZ*NR_CPUS/10) 3888 if (interval > HZ*NR_CPUS/10)
3690 interval = HZ*NR_CPUS/10; 3889 interval = HZ*NR_CPUS/10;
3691 3890
3891 need_serialize = sd->flags & SD_SERIALIZE;
3692 3892
3693 if (sd->flags & SD_SERIALIZE) { 3893 if (need_serialize) {
3694 if (!spin_trylock(&balancing)) 3894 if (!spin_trylock(&balancing))
3695 goto out; 3895 goto out;
3696 } 3896 }
@@ -3706,7 +3906,7 @@ static void rebalance_domains(int cpu, enum cpu_idle_type idle)
3706 } 3906 }
3707 sd->last_balance = jiffies; 3907 sd->last_balance = jiffies;
3708 } 3908 }
3709 if (sd->flags & SD_SERIALIZE) 3909 if (need_serialize)
3710 spin_unlock(&balancing); 3910 spin_unlock(&balancing);
3711out: 3911out:
3712 if (time_after(next_balance, sd->last_balance + interval)) { 3912 if (time_after(next_balance, sd->last_balance + interval)) {
@@ -4070,6 +4270,7 @@ static noinline void __schedule_bug(struct task_struct *prev)
4070 prev->comm, prev->pid, preempt_count()); 4270 prev->comm, prev->pid, preempt_count());
4071 4271
4072 debug_show_held_locks(prev); 4272 debug_show_held_locks(prev);
4273 print_modules();
4073 if (irqs_disabled()) 4274 if (irqs_disabled())
4074 print_irqtrace_events(prev); 4275 print_irqtrace_events(prev);
4075 4276
@@ -4143,7 +4344,7 @@ asmlinkage void __sched schedule(void)
4143 struct task_struct *prev, *next; 4344 struct task_struct *prev, *next;
4144 unsigned long *switch_count; 4345 unsigned long *switch_count;
4145 struct rq *rq; 4346 struct rq *rq;
4146 int cpu; 4347 int cpu, hrtick = sched_feat(HRTICK);
4147 4348
4148need_resched: 4349need_resched:
4149 preempt_disable(); 4350 preempt_disable();
@@ -4158,7 +4359,8 @@ need_resched_nonpreemptible:
4158 4359
4159 schedule_debug(prev); 4360 schedule_debug(prev);
4160 4361
4161 hrtick_clear(rq); 4362 if (hrtick)
4363 hrtick_clear(rq);
4162 4364
4163 /* 4365 /*
4164 * Do the rq-clock update outside the rq lock: 4366 * Do the rq-clock update outside the rq lock:
@@ -4204,7 +4406,8 @@ need_resched_nonpreemptible:
4204 } else 4406 } else
4205 spin_unlock_irq(&rq->lock); 4407 spin_unlock_irq(&rq->lock);
4206 4408
4207 hrtick_set(rq); 4409 if (hrtick)
4410 hrtick_set(rq);
4208 4411
4209 if (unlikely(reacquire_kernel_lock(current) < 0)) 4412 if (unlikely(reacquire_kernel_lock(current) < 0))
4210 goto need_resched_nonpreemptible; 4413 goto need_resched_nonpreemptible;
@@ -4586,10 +4789,8 @@ void set_user_nice(struct task_struct *p, long nice)
4586 goto out_unlock; 4789 goto out_unlock;
4587 } 4790 }
4588 on_rq = p->se.on_rq; 4791 on_rq = p->se.on_rq;
4589 if (on_rq) { 4792 if (on_rq)
4590 dequeue_task(rq, p, 0); 4793 dequeue_task(rq, p, 0);
4591 dec_load(rq, p);
4592 }
4593 4794
4594 p->static_prio = NICE_TO_PRIO(nice); 4795 p->static_prio = NICE_TO_PRIO(nice);
4595 set_load_weight(p); 4796 set_load_weight(p);
@@ -4599,7 +4800,6 @@ void set_user_nice(struct task_struct *p, long nice)
4599 4800
4600 if (on_rq) { 4801 if (on_rq) {
4601 enqueue_task(rq, p, 0); 4802 enqueue_task(rq, p, 0);
4602 inc_load(rq, p);
4603 /* 4803 /*
4604 * If the task increased its priority or is running and 4804 * If the task increased its priority or is running and
4605 * lowered its priority, then reschedule its CPU: 4805 * lowered its priority, then reschedule its CPU:
@@ -5070,24 +5270,6 @@ asmlinkage long sys_sched_setaffinity(pid_t pid, unsigned int len,
5070 return sched_setaffinity(pid, &new_mask); 5270 return sched_setaffinity(pid, &new_mask);
5071} 5271}
5072 5272
5073/*
5074 * Represents all cpu's present in the system
5075 * In systems capable of hotplug, this map could dynamically grow
5076 * as new cpu's are detected in the system via any platform specific
5077 * method, such as ACPI for e.g.
5078 */
5079
5080cpumask_t cpu_present_map __read_mostly;
5081EXPORT_SYMBOL(cpu_present_map);
5082
5083#ifndef CONFIG_SMP
5084cpumask_t cpu_online_map __read_mostly = CPU_MASK_ALL;
5085EXPORT_SYMBOL(cpu_online_map);
5086
5087cpumask_t cpu_possible_map __read_mostly = CPU_MASK_ALL;
5088EXPORT_SYMBOL(cpu_possible_map);
5089#endif
5090
5091long sched_getaffinity(pid_t pid, cpumask_t *mask) 5273long sched_getaffinity(pid_t pid, cpumask_t *mask)
5092{ 5274{
5093 struct task_struct *p; 5275 struct task_struct *p;
@@ -5571,6 +5753,12 @@ int set_cpus_allowed_ptr(struct task_struct *p, const cpumask_t *new_mask)
5571 goto out; 5753 goto out;
5572 } 5754 }
5573 5755
5756 if (unlikely((p->flags & PF_THREAD_BOUND) && p != current &&
5757 !cpus_equal(p->cpus_allowed, *new_mask))) {
5758 ret = -EINVAL;
5759 goto out;
5760 }
5761
5574 if (p->sched_class->set_cpus_allowed) 5762 if (p->sched_class->set_cpus_allowed)
5575 p->sched_class->set_cpus_allowed(p, new_mask); 5763 p->sched_class->set_cpus_allowed(p, new_mask);
5576 else { 5764 else {
@@ -5622,10 +5810,10 @@ static int __migrate_task(struct task_struct *p, int src_cpu, int dest_cpu)
5622 double_rq_lock(rq_src, rq_dest); 5810 double_rq_lock(rq_src, rq_dest);
5623 /* Already moved. */ 5811 /* Already moved. */
5624 if (task_cpu(p) != src_cpu) 5812 if (task_cpu(p) != src_cpu)
5625 goto out; 5813 goto done;
5626 /* Affinity changed (again). */ 5814 /* Affinity changed (again). */
5627 if (!cpu_isset(dest_cpu, p->cpus_allowed)) 5815 if (!cpu_isset(dest_cpu, p->cpus_allowed))
5628 goto out; 5816 goto fail;
5629 5817
5630 on_rq = p->se.on_rq; 5818 on_rq = p->se.on_rq;
5631 if (on_rq) 5819 if (on_rq)
@@ -5636,8 +5824,9 @@ static int __migrate_task(struct task_struct *p, int src_cpu, int dest_cpu)
5636 activate_task(rq_dest, p, 0); 5824 activate_task(rq_dest, p, 0);
5637 check_preempt_curr(rq_dest, p); 5825 check_preempt_curr(rq_dest, p);
5638 } 5826 }
5827done:
5639 ret = 1; 5828 ret = 1;
5640out: 5829fail:
5641 double_rq_unlock(rq_src, rq_dest); 5830 double_rq_unlock(rq_src, rq_dest);
5642 return ret; 5831 return ret;
5643} 5832}
@@ -6059,6 +6248,36 @@ static void unregister_sched_domain_sysctl(void)
6059} 6248}
6060#endif 6249#endif
6061 6250
6251static void set_rq_online(struct rq *rq)
6252{
6253 if (!rq->online) {
6254 const struct sched_class *class;
6255
6256 cpu_set(rq->cpu, rq->rd->online);
6257 rq->online = 1;
6258
6259 for_each_class(class) {
6260 if (class->rq_online)
6261 class->rq_online(rq);
6262 }
6263 }
6264}
6265
6266static void set_rq_offline(struct rq *rq)
6267{
6268 if (rq->online) {
6269 const struct sched_class *class;
6270
6271 for_each_class(class) {
6272 if (class->rq_offline)
6273 class->rq_offline(rq);
6274 }
6275
6276 cpu_clear(rq->cpu, rq->rd->online);
6277 rq->online = 0;
6278 }
6279}
6280
6062/* 6281/*
6063 * migration_call - callback that gets triggered when a CPU is added. 6282 * migration_call - callback that gets triggered when a CPU is added.
6064 * Here we can start up the necessary migration thread for the new CPU. 6283 * Here we can start up the necessary migration thread for the new CPU.
@@ -6096,7 +6315,8 @@ migration_call(struct notifier_block *nfb, unsigned long action, void *hcpu)
6096 spin_lock_irqsave(&rq->lock, flags); 6315 spin_lock_irqsave(&rq->lock, flags);
6097 if (rq->rd) { 6316 if (rq->rd) {
6098 BUG_ON(!cpu_isset(cpu, rq->rd->span)); 6317 BUG_ON(!cpu_isset(cpu, rq->rd->span));
6099 cpu_set(cpu, rq->rd->online); 6318
6319 set_rq_online(rq);
6100 } 6320 }
6101 spin_unlock_irqrestore(&rq->lock, flags); 6321 spin_unlock_irqrestore(&rq->lock, flags);
6102 break; 6322 break;
@@ -6157,7 +6377,7 @@ migration_call(struct notifier_block *nfb, unsigned long action, void *hcpu)
6157 spin_lock_irqsave(&rq->lock, flags); 6377 spin_lock_irqsave(&rq->lock, flags);
6158 if (rq->rd) { 6378 if (rq->rd) {
6159 BUG_ON(!cpu_isset(cpu, rq->rd->span)); 6379 BUG_ON(!cpu_isset(cpu, rq->rd->span));
6160 cpu_clear(cpu, rq->rd->online); 6380 set_rq_offline(rq);
6161 } 6381 }
6162 spin_unlock_irqrestore(&rq->lock, flags); 6382 spin_unlock_irqrestore(&rq->lock, flags);
6163 break; 6383 break;
@@ -6191,6 +6411,28 @@ void __init migration_init(void)
6191 6411
6192#ifdef CONFIG_SCHED_DEBUG 6412#ifdef CONFIG_SCHED_DEBUG
6193 6413
6414static inline const char *sd_level_to_string(enum sched_domain_level lvl)
6415{
6416 switch (lvl) {
6417 case SD_LV_NONE:
6418 return "NONE";
6419 case SD_LV_SIBLING:
6420 return "SIBLING";
6421 case SD_LV_MC:
6422 return "MC";
6423 case SD_LV_CPU:
6424 return "CPU";
6425 case SD_LV_NODE:
6426 return "NODE";
6427 case SD_LV_ALLNODES:
6428 return "ALLNODES";
6429 case SD_LV_MAX:
6430 return "MAX";
6431
6432 }
6433 return "MAX";
6434}
6435
6194static int sched_domain_debug_one(struct sched_domain *sd, int cpu, int level, 6436static int sched_domain_debug_one(struct sched_domain *sd, int cpu, int level,
6195 cpumask_t *groupmask) 6437 cpumask_t *groupmask)
6196{ 6438{
@@ -6210,7 +6452,8 @@ static int sched_domain_debug_one(struct sched_domain *sd, int cpu, int level,
6210 return -1; 6452 return -1;
6211 } 6453 }
6212 6454
6213 printk(KERN_CONT "span %s\n", str); 6455 printk(KERN_CONT "span %s level %s\n",
6456 str, sd_level_to_string(sd->level));
6214 6457
6215 if (!cpu_isset(cpu, sd->span)) { 6458 if (!cpu_isset(cpu, sd->span)) {
6216 printk(KERN_ERR "ERROR: domain->span does not contain " 6459 printk(KERN_ERR "ERROR: domain->span does not contain "
@@ -6294,9 +6537,9 @@ static void sched_domain_debug(struct sched_domain *sd, int cpu)
6294 } 6537 }
6295 kfree(groupmask); 6538 kfree(groupmask);
6296} 6539}
6297#else 6540#else /* !CONFIG_SCHED_DEBUG */
6298# define sched_domain_debug(sd, cpu) do { } while (0) 6541# define sched_domain_debug(sd, cpu) do { } while (0)
6299#endif 6542#endif /* CONFIG_SCHED_DEBUG */
6300 6543
6301static int sd_degenerate(struct sched_domain *sd) 6544static int sd_degenerate(struct sched_domain *sd)
6302{ 6545{
@@ -6356,20 +6599,16 @@ sd_parent_degenerate(struct sched_domain *sd, struct sched_domain *parent)
6356static void rq_attach_root(struct rq *rq, struct root_domain *rd) 6599static void rq_attach_root(struct rq *rq, struct root_domain *rd)
6357{ 6600{
6358 unsigned long flags; 6601 unsigned long flags;
6359 const struct sched_class *class;
6360 6602
6361 spin_lock_irqsave(&rq->lock, flags); 6603 spin_lock_irqsave(&rq->lock, flags);
6362 6604
6363 if (rq->rd) { 6605 if (rq->rd) {
6364 struct root_domain *old_rd = rq->rd; 6606 struct root_domain *old_rd = rq->rd;
6365 6607
6366 for (class = sched_class_highest; class; class = class->next) { 6608 if (cpu_isset(rq->cpu, old_rd->online))
6367 if (class->leave_domain) 6609 set_rq_offline(rq);
6368 class->leave_domain(rq);
6369 }
6370 6610
6371 cpu_clear(rq->cpu, old_rd->span); 6611 cpu_clear(rq->cpu, old_rd->span);
6372 cpu_clear(rq->cpu, old_rd->online);
6373 6612
6374 if (atomic_dec_and_test(&old_rd->refcount)) 6613 if (atomic_dec_and_test(&old_rd->refcount))
6375 kfree(old_rd); 6614 kfree(old_rd);
@@ -6380,12 +6619,7 @@ static void rq_attach_root(struct rq *rq, struct root_domain *rd)
6380 6619
6381 cpu_set(rq->cpu, rd->span); 6620 cpu_set(rq->cpu, rd->span);
6382 if (cpu_isset(rq->cpu, cpu_online_map)) 6621 if (cpu_isset(rq->cpu, cpu_online_map))
6383 cpu_set(rq->cpu, rd->online); 6622 set_rq_online(rq);
6384
6385 for (class = sched_class_highest; class; class = class->next) {
6386 if (class->join_domain)
6387 class->join_domain(rq);
6388 }
6389 6623
6390 spin_unlock_irqrestore(&rq->lock, flags); 6624 spin_unlock_irqrestore(&rq->lock, flags);
6391} 6625}
@@ -6396,6 +6630,8 @@ static void init_rootdomain(struct root_domain *rd)
6396 6630
6397 cpus_clear(rd->span); 6631 cpus_clear(rd->span);
6398 cpus_clear(rd->online); 6632 cpus_clear(rd->online);
6633
6634 cpupri_init(&rd->cpupri);
6399} 6635}
6400 6636
6401static void init_defrootdomain(void) 6637static void init_defrootdomain(void)
@@ -6590,7 +6826,7 @@ static void sched_domain_node_span(int node, cpumask_t *span)
6590 cpus_or(*span, *span, *nodemask); 6826 cpus_or(*span, *span, *nodemask);
6591 } 6827 }
6592} 6828}
6593#endif 6829#endif /* CONFIG_NUMA */
6594 6830
6595int sched_smt_power_savings = 0, sched_mc_power_savings = 0; 6831int sched_smt_power_savings = 0, sched_mc_power_savings = 0;
6596 6832
@@ -6609,7 +6845,7 @@ cpu_to_cpu_group(int cpu, const cpumask_t *cpu_map, struct sched_group **sg,
6609 *sg = &per_cpu(sched_group_cpus, cpu); 6845 *sg = &per_cpu(sched_group_cpus, cpu);
6610 return cpu; 6846 return cpu;
6611} 6847}
6612#endif 6848#endif /* CONFIG_SCHED_SMT */
6613 6849
6614/* 6850/*
6615 * multi-core sched-domains: 6851 * multi-core sched-domains:
@@ -6617,7 +6853,7 @@ cpu_to_cpu_group(int cpu, const cpumask_t *cpu_map, struct sched_group **sg,
6617#ifdef CONFIG_SCHED_MC 6853#ifdef CONFIG_SCHED_MC
6618static DEFINE_PER_CPU(struct sched_domain, core_domains); 6854static DEFINE_PER_CPU(struct sched_domain, core_domains);
6619static DEFINE_PER_CPU(struct sched_group, sched_group_core); 6855static DEFINE_PER_CPU(struct sched_group, sched_group_core);
6620#endif 6856#endif /* CONFIG_SCHED_MC */
6621 6857
6622#if defined(CONFIG_SCHED_MC) && defined(CONFIG_SCHED_SMT) 6858#if defined(CONFIG_SCHED_MC) && defined(CONFIG_SCHED_SMT)
6623static int 6859static int
@@ -6719,7 +6955,7 @@ static void init_numa_sched_groups_power(struct sched_group *group_head)
6719 sg = sg->next; 6955 sg = sg->next;
6720 } while (sg != group_head); 6956 } while (sg != group_head);
6721} 6957}
6722#endif 6958#endif /* CONFIG_NUMA */
6723 6959
6724#ifdef CONFIG_NUMA 6960#ifdef CONFIG_NUMA
6725/* Free memory allocated for various sched_group structures */ 6961/* Free memory allocated for various sched_group structures */
@@ -6756,11 +6992,11 @@ next_sg:
6756 sched_group_nodes_bycpu[cpu] = NULL; 6992 sched_group_nodes_bycpu[cpu] = NULL;
6757 } 6993 }
6758} 6994}
6759#else 6995#else /* !CONFIG_NUMA */
6760static void free_sched_groups(const cpumask_t *cpu_map, cpumask_t *nodemask) 6996static void free_sched_groups(const cpumask_t *cpu_map, cpumask_t *nodemask)
6761{ 6997{
6762} 6998}
6763#endif 6999#endif /* CONFIG_NUMA */
6764 7000
6765/* 7001/*
6766 * Initialize sched groups cpu_power. 7002 * Initialize sched groups cpu_power.
@@ -7469,7 +7705,7 @@ int sched_create_sysfs_power_savings_entries(struct sysdev_class *cls)
7469#endif 7705#endif
7470 return err; 7706 return err;
7471} 7707}
7472#endif 7708#endif /* CONFIG_SCHED_MC || CONFIG_SCHED_SMT */
7473 7709
7474/* 7710/*
7475 * Force a reinitialization of the sched domains hierarchy. The domains 7711 * Force a reinitialization of the sched domains hierarchy. The domains
@@ -7480,21 +7716,28 @@ int sched_create_sysfs_power_savings_entries(struct sysdev_class *cls)
7480static int update_sched_domains(struct notifier_block *nfb, 7716static int update_sched_domains(struct notifier_block *nfb,
7481 unsigned long action, void *hcpu) 7717 unsigned long action, void *hcpu)
7482{ 7718{
7719 int cpu = (int)(long)hcpu;
7720
7483 switch (action) { 7721 switch (action) {
7484 case CPU_UP_PREPARE:
7485 case CPU_UP_PREPARE_FROZEN:
7486 case CPU_DOWN_PREPARE: 7722 case CPU_DOWN_PREPARE:
7487 case CPU_DOWN_PREPARE_FROZEN: 7723 case CPU_DOWN_PREPARE_FROZEN:
7724 disable_runtime(cpu_rq(cpu));
7725 /* fall-through */
7726 case CPU_UP_PREPARE:
7727 case CPU_UP_PREPARE_FROZEN:
7488 detach_destroy_domains(&cpu_online_map); 7728 detach_destroy_domains(&cpu_online_map);
7489 free_sched_domains(); 7729 free_sched_domains();
7490 return NOTIFY_OK; 7730 return NOTIFY_OK;
7491 7731
7492 case CPU_UP_CANCELED: 7732
7493 case CPU_UP_CANCELED_FROZEN:
7494 case CPU_DOWN_FAILED: 7733 case CPU_DOWN_FAILED:
7495 case CPU_DOWN_FAILED_FROZEN: 7734 case CPU_DOWN_FAILED_FROZEN:
7496 case CPU_ONLINE: 7735 case CPU_ONLINE:
7497 case CPU_ONLINE_FROZEN: 7736 case CPU_ONLINE_FROZEN:
7737 enable_runtime(cpu_rq(cpu));
7738 /* fall-through */
7739 case CPU_UP_CANCELED:
7740 case CPU_UP_CANCELED_FROZEN:
7498 case CPU_DEAD: 7741 case CPU_DEAD:
7499 case CPU_DEAD_FROZEN: 7742 case CPU_DEAD_FROZEN:
7500 /* 7743 /*
@@ -7694,8 +7937,8 @@ void __init sched_init(void)
7694 7937
7695 root_task_group.cfs_rq = (struct cfs_rq **)ptr; 7938 root_task_group.cfs_rq = (struct cfs_rq **)ptr;
7696 ptr += nr_cpu_ids * sizeof(void **); 7939 ptr += nr_cpu_ids * sizeof(void **);
7697#endif 7940#endif /* CONFIG_USER_SCHED */
7698#endif 7941#endif /* CONFIG_FAIR_GROUP_SCHED */
7699#ifdef CONFIG_RT_GROUP_SCHED 7942#ifdef CONFIG_RT_GROUP_SCHED
7700 init_task_group.rt_se = (struct sched_rt_entity **)ptr; 7943 init_task_group.rt_se = (struct sched_rt_entity **)ptr;
7701 ptr += nr_cpu_ids * sizeof(void **); 7944 ptr += nr_cpu_ids * sizeof(void **);
@@ -7709,8 +7952,8 @@ void __init sched_init(void)
7709 7952
7710 root_task_group.rt_rq = (struct rt_rq **)ptr; 7953 root_task_group.rt_rq = (struct rt_rq **)ptr;
7711 ptr += nr_cpu_ids * sizeof(void **); 7954 ptr += nr_cpu_ids * sizeof(void **);
7712#endif 7955#endif /* CONFIG_USER_SCHED */
7713#endif 7956#endif /* CONFIG_RT_GROUP_SCHED */
7714 } 7957 }
7715 7958
7716#ifdef CONFIG_SMP 7959#ifdef CONFIG_SMP
@@ -7726,8 +7969,8 @@ void __init sched_init(void)
7726#ifdef CONFIG_USER_SCHED 7969#ifdef CONFIG_USER_SCHED
7727 init_rt_bandwidth(&root_task_group.rt_bandwidth, 7970 init_rt_bandwidth(&root_task_group.rt_bandwidth,
7728 global_rt_period(), RUNTIME_INF); 7971 global_rt_period(), RUNTIME_INF);
7729#endif 7972#endif /* CONFIG_USER_SCHED */
7730#endif 7973#endif /* CONFIG_RT_GROUP_SCHED */
7731 7974
7732#ifdef CONFIG_GROUP_SCHED 7975#ifdef CONFIG_GROUP_SCHED
7733 list_add(&init_task_group.list, &task_groups); 7976 list_add(&init_task_group.list, &task_groups);
@@ -7737,8 +7980,8 @@ void __init sched_init(void)
7737 INIT_LIST_HEAD(&root_task_group.children); 7980 INIT_LIST_HEAD(&root_task_group.children);
7738 init_task_group.parent = &root_task_group; 7981 init_task_group.parent = &root_task_group;
7739 list_add(&init_task_group.siblings, &root_task_group.children); 7982 list_add(&init_task_group.siblings, &root_task_group.children);
7740#endif 7983#endif /* CONFIG_USER_SCHED */
7741#endif 7984#endif /* CONFIG_GROUP_SCHED */
7742 7985
7743 for_each_possible_cpu(i) { 7986 for_each_possible_cpu(i) {
7744 struct rq *rq; 7987 struct rq *rq;
@@ -7818,6 +8061,7 @@ void __init sched_init(void)
7818 rq->next_balance = jiffies; 8061 rq->next_balance = jiffies;
7819 rq->push_cpu = 0; 8062 rq->push_cpu = 0;
7820 rq->cpu = i; 8063 rq->cpu = i;
8064 rq->online = 0;
7821 rq->migration_thread = NULL; 8065 rq->migration_thread = NULL;
7822 INIT_LIST_HEAD(&rq->migration_queue); 8066 INIT_LIST_HEAD(&rq->migration_queue);
7823 rq_attach_root(rq, &def_root_domain); 8067 rq_attach_root(rq, &def_root_domain);
@@ -8057,7 +8301,7 @@ static inline void unregister_fair_sched_group(struct task_group *tg, int cpu)
8057{ 8301{
8058 list_del_rcu(&tg->cfs_rq[cpu]->leaf_cfs_rq_list); 8302 list_del_rcu(&tg->cfs_rq[cpu]->leaf_cfs_rq_list);
8059} 8303}
8060#else 8304#else /* !CONFG_FAIR_GROUP_SCHED */
8061static inline void free_fair_sched_group(struct task_group *tg) 8305static inline void free_fair_sched_group(struct task_group *tg)
8062{ 8306{
8063} 8307}
@@ -8075,7 +8319,7 @@ static inline void register_fair_sched_group(struct task_group *tg, int cpu)
8075static inline void unregister_fair_sched_group(struct task_group *tg, int cpu) 8319static inline void unregister_fair_sched_group(struct task_group *tg, int cpu)
8076{ 8320{
8077} 8321}
8078#endif 8322#endif /* CONFIG_FAIR_GROUP_SCHED */
8079 8323
8080#ifdef CONFIG_RT_GROUP_SCHED 8324#ifdef CONFIG_RT_GROUP_SCHED
8081static void free_rt_sched_group(struct task_group *tg) 8325static void free_rt_sched_group(struct task_group *tg)
@@ -8146,7 +8390,7 @@ static inline void unregister_rt_sched_group(struct task_group *tg, int cpu)
8146{ 8390{
8147 list_del_rcu(&tg->rt_rq[cpu]->leaf_rt_rq_list); 8391 list_del_rcu(&tg->rt_rq[cpu]->leaf_rt_rq_list);
8148} 8392}
8149#else 8393#else /* !CONFIG_RT_GROUP_SCHED */
8150static inline void free_rt_sched_group(struct task_group *tg) 8394static inline void free_rt_sched_group(struct task_group *tg)
8151{ 8395{
8152} 8396}
@@ -8164,7 +8408,7 @@ static inline void register_rt_sched_group(struct task_group *tg, int cpu)
8164static inline void unregister_rt_sched_group(struct task_group *tg, int cpu) 8408static inline void unregister_rt_sched_group(struct task_group *tg, int cpu)
8165{ 8409{
8166} 8410}
8167#endif 8411#endif /* CONFIG_RT_GROUP_SCHED */
8168 8412
8169#ifdef CONFIG_GROUP_SCHED 8413#ifdef CONFIG_GROUP_SCHED
8170static void free_sched_group(struct task_group *tg) 8414static void free_sched_group(struct task_group *tg)
@@ -8275,17 +8519,14 @@ void sched_move_task(struct task_struct *tsk)
8275 8519
8276 task_rq_unlock(rq, &flags); 8520 task_rq_unlock(rq, &flags);
8277} 8521}
8278#endif 8522#endif /* CONFIG_GROUP_SCHED */
8279 8523
8280#ifdef CONFIG_FAIR_GROUP_SCHED 8524#ifdef CONFIG_FAIR_GROUP_SCHED
8281static void set_se_shares(struct sched_entity *se, unsigned long shares) 8525static void __set_se_shares(struct sched_entity *se, unsigned long shares)
8282{ 8526{
8283 struct cfs_rq *cfs_rq = se->cfs_rq; 8527 struct cfs_rq *cfs_rq = se->cfs_rq;
8284 struct rq *rq = cfs_rq->rq;
8285 int on_rq; 8528 int on_rq;
8286 8529
8287 spin_lock_irq(&rq->lock);
8288
8289 on_rq = se->on_rq; 8530 on_rq = se->on_rq;
8290 if (on_rq) 8531 if (on_rq)
8291 dequeue_entity(cfs_rq, se, 0); 8532 dequeue_entity(cfs_rq, se, 0);
@@ -8295,8 +8536,17 @@ static void set_se_shares(struct sched_entity *se, unsigned long shares)
8295 8536
8296 if (on_rq) 8537 if (on_rq)
8297 enqueue_entity(cfs_rq, se, 0); 8538 enqueue_entity(cfs_rq, se, 0);
8539}
8298 8540
8299 spin_unlock_irq(&rq->lock); 8541static void set_se_shares(struct sched_entity *se, unsigned long shares)
8542{
8543 struct cfs_rq *cfs_rq = se->cfs_rq;
8544 struct rq *rq = cfs_rq->rq;
8545 unsigned long flags;
8546
8547 spin_lock_irqsave(&rq->lock, flags);
8548 __set_se_shares(se, shares);
8549 spin_unlock_irqrestore(&rq->lock, flags);
8300} 8550}
8301 8551
8302static DEFINE_MUTEX(shares_mutex); 8552static DEFINE_MUTEX(shares_mutex);
@@ -8335,8 +8585,13 @@ int sched_group_set_shares(struct task_group *tg, unsigned long shares)
8335 * w/o tripping rebalance_share or load_balance_fair. 8585 * w/o tripping rebalance_share or load_balance_fair.
8336 */ 8586 */
8337 tg->shares = shares; 8587 tg->shares = shares;
8338 for_each_possible_cpu(i) 8588 for_each_possible_cpu(i) {
8589 /*
8590 * force a rebalance
8591 */
8592 cfs_rq_set_shares(tg->cfs_rq[i], 0);
8339 set_se_shares(tg->se[i], shares); 8593 set_se_shares(tg->se[i], shares);
8594 }
8340 8595
8341 /* 8596 /*
8342 * Enable load balance activity on this group, by inserting it back on 8597 * Enable load balance activity on this group, by inserting it back on
@@ -8375,7 +8630,7 @@ static unsigned long to_ratio(u64 period, u64 runtime)
8375#ifdef CONFIG_CGROUP_SCHED 8630#ifdef CONFIG_CGROUP_SCHED
8376static int __rt_schedulable(struct task_group *tg, u64 period, u64 runtime) 8631static int __rt_schedulable(struct task_group *tg, u64 period, u64 runtime)
8377{ 8632{
8378 struct task_group *tgi, *parent = tg ? tg->parent : NULL; 8633 struct task_group *tgi, *parent = tg->parent;
8379 unsigned long total = 0; 8634 unsigned long total = 0;
8380 8635
8381 if (!parent) { 8636 if (!parent) {
@@ -8399,7 +8654,7 @@ static int __rt_schedulable(struct task_group *tg, u64 period, u64 runtime)
8399 } 8654 }
8400 rcu_read_unlock(); 8655 rcu_read_unlock();
8401 8656
8402 return total + to_ratio(period, runtime) < 8657 return total + to_ratio(period, runtime) <=
8403 to_ratio(ktime_to_ns(parent->rt_bandwidth.rt_period), 8658 to_ratio(ktime_to_ns(parent->rt_bandwidth.rt_period),
8404 parent->rt_bandwidth.rt_runtime); 8659 parent->rt_bandwidth.rt_runtime);
8405} 8660}
@@ -8519,16 +8774,21 @@ long sched_group_rt_period(struct task_group *tg)
8519 8774
8520static int sched_rt_global_constraints(void) 8775static int sched_rt_global_constraints(void)
8521{ 8776{
8777 struct task_group *tg = &root_task_group;
8778 u64 rt_runtime, rt_period;
8522 int ret = 0; 8779 int ret = 0;
8523 8780
8781 rt_period = ktime_to_ns(tg->rt_bandwidth.rt_period);
8782 rt_runtime = tg->rt_bandwidth.rt_runtime;
8783
8524 mutex_lock(&rt_constraints_mutex); 8784 mutex_lock(&rt_constraints_mutex);
8525 if (!__rt_schedulable(NULL, 1, 0)) 8785 if (!__rt_schedulable(tg, rt_period, rt_runtime))
8526 ret = -EINVAL; 8786 ret = -EINVAL;
8527 mutex_unlock(&rt_constraints_mutex); 8787 mutex_unlock(&rt_constraints_mutex);
8528 8788
8529 return ret; 8789 return ret;
8530} 8790}
8531#else 8791#else /* !CONFIG_RT_GROUP_SCHED */
8532static int sched_rt_global_constraints(void) 8792static int sched_rt_global_constraints(void)
8533{ 8793{
8534 unsigned long flags; 8794 unsigned long flags;
@@ -8546,7 +8806,7 @@ static int sched_rt_global_constraints(void)
8546 8806
8547 return 0; 8807 return 0;
8548} 8808}
8549#endif 8809#endif /* CONFIG_RT_GROUP_SCHED */
8550 8810
8551int sched_rt_handler(struct ctl_table *table, int write, 8811int sched_rt_handler(struct ctl_table *table, int write,
8552 struct file *filp, void __user *buffer, size_t *lenp, 8812 struct file *filp, void __user *buffer, size_t *lenp,
@@ -8654,7 +8914,7 @@ static u64 cpu_shares_read_u64(struct cgroup *cgrp, struct cftype *cft)
8654 8914
8655 return (u64) tg->shares; 8915 return (u64) tg->shares;
8656} 8916}
8657#endif 8917#endif /* CONFIG_FAIR_GROUP_SCHED */
8658 8918
8659#ifdef CONFIG_RT_GROUP_SCHED 8919#ifdef CONFIG_RT_GROUP_SCHED
8660static int cpu_rt_runtime_write(struct cgroup *cgrp, struct cftype *cft, 8920static int cpu_rt_runtime_write(struct cgroup *cgrp, struct cftype *cft,
@@ -8678,7 +8938,7 @@ static u64 cpu_rt_period_read_uint(struct cgroup *cgrp, struct cftype *cft)
8678{ 8938{
8679 return sched_group_rt_period(cgroup_tg(cgrp)); 8939 return sched_group_rt_period(cgroup_tg(cgrp));
8680} 8940}
8681#endif 8941#endif /* CONFIG_RT_GROUP_SCHED */
8682 8942
8683static struct cftype cpu_files[] = { 8943static struct cftype cpu_files[] = {
8684#ifdef CONFIG_FAIR_GROUP_SCHED 8944#ifdef CONFIG_FAIR_GROUP_SCHED
diff --git a/kernel/sched_clock.c b/kernel/sched_clock.c
index 8affbfd0cdb0..22ed55d1167f 100644
--- a/kernel/sched_clock.c
+++ b/kernel/sched_clock.c
@@ -330,3 +330,16 @@ unsigned long long __attribute__((weak)) sched_clock(void)
330{ 330{
331 return (unsigned long long)jiffies * (NSEC_PER_SEC / HZ); 331 return (unsigned long long)jiffies * (NSEC_PER_SEC / HZ);
332} 332}
333
334unsigned long long cpu_clock(int cpu)
335{
336 unsigned long long clock;
337 unsigned long flags;
338
339 local_irq_save(flags);
340 clock = sched_clock_cpu(cpu);
341 local_irq_restore(flags);
342
343 return clock;
344}
345EXPORT_SYMBOL_GPL(cpu_clock);
diff --git a/kernel/sched_cpupri.c b/kernel/sched_cpupri.c
new file mode 100644
index 000000000000..52154fefab7e
--- /dev/null
+++ b/kernel/sched_cpupri.c
@@ -0,0 +1,174 @@
1/*
2 * kernel/sched_cpupri.c
3 *
4 * CPU priority management
5 *
6 * Copyright (C) 2007-2008 Novell
7 *
8 * Author: Gregory Haskins <ghaskins@novell.com>
9 *
10 * This code tracks the priority of each CPU so that global migration
11 * decisions are easy to calculate. Each CPU can be in a state as follows:
12 *
13 * (INVALID), IDLE, NORMAL, RT1, ... RT99
14 *
15 * going from the lowest priority to the highest. CPUs in the INVALID state
16 * are not eligible for routing. The system maintains this state with
17 * a 2 dimensional bitmap (the first for priority class, the second for cpus
18 * in that class). Therefore a typical application without affinity
19 * restrictions can find a suitable CPU with O(1) complexity (e.g. two bit
20 * searches). For tasks with affinity restrictions, the algorithm has a
21 * worst case complexity of O(min(102, nr_domcpus)), though the scenario that
22 * yields the worst case search is fairly contrived.
23 *
24 * This program is free software; you can redistribute it and/or
25 * modify it under the terms of the GNU General Public License
26 * as published by the Free Software Foundation; version 2
27 * of the License.
28 */
29
30#include "sched_cpupri.h"
31
32/* Convert between a 140 based task->prio, and our 102 based cpupri */
33static int convert_prio(int prio)
34{
35 int cpupri;
36
37 if (prio == CPUPRI_INVALID)
38 cpupri = CPUPRI_INVALID;
39 else if (prio == MAX_PRIO)
40 cpupri = CPUPRI_IDLE;
41 else if (prio >= MAX_RT_PRIO)
42 cpupri = CPUPRI_NORMAL;
43 else
44 cpupri = MAX_RT_PRIO - prio + 1;
45
46 return cpupri;
47}
48
49#define for_each_cpupri_active(array, idx) \
50 for (idx = find_first_bit(array, CPUPRI_NR_PRIORITIES); \
51 idx < CPUPRI_NR_PRIORITIES; \
52 idx = find_next_bit(array, CPUPRI_NR_PRIORITIES, idx+1))
53
54/**
55 * cpupri_find - find the best (lowest-pri) CPU in the system
56 * @cp: The cpupri context
57 * @p: The task
58 * @lowest_mask: A mask to fill in with selected CPUs
59 *
60 * Note: This function returns the recommended CPUs as calculated during the
61 * current invokation. By the time the call returns, the CPUs may have in
62 * fact changed priorities any number of times. While not ideal, it is not
63 * an issue of correctness since the normal rebalancer logic will correct
64 * any discrepancies created by racing against the uncertainty of the current
65 * priority configuration.
66 *
67 * Returns: (int)bool - CPUs were found
68 */
69int cpupri_find(struct cpupri *cp, struct task_struct *p,
70 cpumask_t *lowest_mask)
71{
72 int idx = 0;
73 int task_pri = convert_prio(p->prio);
74
75 for_each_cpupri_active(cp->pri_active, idx) {
76 struct cpupri_vec *vec = &cp->pri_to_cpu[idx];
77 cpumask_t mask;
78
79 if (idx >= task_pri)
80 break;
81
82 cpus_and(mask, p->cpus_allowed, vec->mask);
83
84 if (cpus_empty(mask))
85 continue;
86
87 *lowest_mask = mask;
88 return 1;
89 }
90
91 return 0;
92}
93
94/**
95 * cpupri_set - update the cpu priority setting
96 * @cp: The cpupri context
97 * @cpu: The target cpu
98 * @pri: The priority (INVALID-RT99) to assign to this CPU
99 *
100 * Note: Assumes cpu_rq(cpu)->lock is locked
101 *
102 * Returns: (void)
103 */
104void cpupri_set(struct cpupri *cp, int cpu, int newpri)
105{
106 int *currpri = &cp->cpu_to_pri[cpu];
107 int oldpri = *currpri;
108 unsigned long flags;
109
110 newpri = convert_prio(newpri);
111
112 BUG_ON(newpri >= CPUPRI_NR_PRIORITIES);
113
114 if (newpri == oldpri)
115 return;
116
117 /*
118 * If the cpu was currently mapped to a different value, we
119 * first need to unmap the old value
120 */
121 if (likely(oldpri != CPUPRI_INVALID)) {
122 struct cpupri_vec *vec = &cp->pri_to_cpu[oldpri];
123
124 spin_lock_irqsave(&vec->lock, flags);
125
126 vec->count--;
127 if (!vec->count)
128 clear_bit(oldpri, cp->pri_active);
129 cpu_clear(cpu, vec->mask);
130
131 spin_unlock_irqrestore(&vec->lock, flags);
132 }
133
134 if (likely(newpri != CPUPRI_INVALID)) {
135 struct cpupri_vec *vec = &cp->pri_to_cpu[newpri];
136
137 spin_lock_irqsave(&vec->lock, flags);
138
139 cpu_set(cpu, vec->mask);
140 vec->count++;
141 if (vec->count == 1)
142 set_bit(newpri, cp->pri_active);
143
144 spin_unlock_irqrestore(&vec->lock, flags);
145 }
146
147 *currpri = newpri;
148}
149
150/**
151 * cpupri_init - initialize the cpupri structure
152 * @cp: The cpupri context
153 *
154 * Returns: (void)
155 */
156void cpupri_init(struct cpupri *cp)
157{
158 int i;
159
160 memset(cp, 0, sizeof(*cp));
161
162 for (i = 0; i < CPUPRI_NR_PRIORITIES; i++) {
163 struct cpupri_vec *vec = &cp->pri_to_cpu[i];
164
165 spin_lock_init(&vec->lock);
166 vec->count = 0;
167 cpus_clear(vec->mask);
168 }
169
170 for_each_possible_cpu(i)
171 cp->cpu_to_pri[i] = CPUPRI_INVALID;
172}
173
174
diff --git a/kernel/sched_cpupri.h b/kernel/sched_cpupri.h
new file mode 100644
index 000000000000..f25811b0f931
--- /dev/null
+++ b/kernel/sched_cpupri.h
@@ -0,0 +1,36 @@
1#ifndef _LINUX_CPUPRI_H
2#define _LINUX_CPUPRI_H
3
4#include <linux/sched.h>
5
6#define CPUPRI_NR_PRIORITIES (MAX_RT_PRIO + 2)
7#define CPUPRI_NR_PRI_WORDS BITS_TO_LONGS(CPUPRI_NR_PRIORITIES)
8
9#define CPUPRI_INVALID -1
10#define CPUPRI_IDLE 0
11#define CPUPRI_NORMAL 1
12/* values 2-101 are RT priorities 0-99 */
13
14struct cpupri_vec {
15 spinlock_t lock;
16 int count;
17 cpumask_t mask;
18};
19
20struct cpupri {
21 struct cpupri_vec pri_to_cpu[CPUPRI_NR_PRIORITIES];
22 long pri_active[CPUPRI_NR_PRI_WORDS];
23 int cpu_to_pri[NR_CPUS];
24};
25
26#ifdef CONFIG_SMP
27int cpupri_find(struct cpupri *cp,
28 struct task_struct *p, cpumask_t *lowest_mask);
29void cpupri_set(struct cpupri *cp, int cpu, int pri);
30void cpupri_init(struct cpupri *cp);
31#else
32#define cpupri_set(cp, cpu, pri) do { } while (0)
33#define cpupri_init() do { } while (0)
34#endif
35
36#endif /* _LINUX_CPUPRI_H */
diff --git a/kernel/sched_debug.c b/kernel/sched_debug.c
index 8bb713040ac9..bbe6b31c3c56 100644
--- a/kernel/sched_debug.c
+++ b/kernel/sched_debug.c
@@ -119,9 +119,7 @@ void print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq)
119 struct sched_entity *last; 119 struct sched_entity *last;
120 unsigned long flags; 120 unsigned long flags;
121 121
122#if !defined(CONFIG_CGROUP_SCHED) || !defined(CONFIG_USER_SCHED) 122#if defined(CONFIG_CGROUP_SCHED) && defined(CONFIG_FAIR_GROUP_SCHED)
123 SEQ_printf(m, "\ncfs_rq[%d]:\n", cpu);
124#else
125 char path[128] = ""; 123 char path[128] = "";
126 struct cgroup *cgroup = NULL; 124 struct cgroup *cgroup = NULL;
127 struct task_group *tg = cfs_rq->tg; 125 struct task_group *tg = cfs_rq->tg;
@@ -133,6 +131,8 @@ void print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq)
133 cgroup_path(cgroup, path, sizeof(path)); 131 cgroup_path(cgroup, path, sizeof(path));
134 132
135 SEQ_printf(m, "\ncfs_rq[%d]:%s\n", cpu, path); 133 SEQ_printf(m, "\ncfs_rq[%d]:%s\n", cpu, path);
134#else
135 SEQ_printf(m, "\ncfs_rq[%d]:\n", cpu);
136#endif 136#endif
137 137
138 SEQ_printf(m, " .%-30s: %Ld.%06ld\n", "exec_clock", 138 SEQ_printf(m, " .%-30s: %Ld.%06ld\n", "exec_clock",
@@ -162,11 +162,64 @@ void print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq)
162 SEQ_printf(m, " .%-30s: %ld\n", "nr_running", cfs_rq->nr_running); 162 SEQ_printf(m, " .%-30s: %ld\n", "nr_running", cfs_rq->nr_running);
163 SEQ_printf(m, " .%-30s: %ld\n", "load", cfs_rq->load.weight); 163 SEQ_printf(m, " .%-30s: %ld\n", "load", cfs_rq->load.weight);
164#ifdef CONFIG_SCHEDSTATS 164#ifdef CONFIG_SCHEDSTATS
165 SEQ_printf(m, " .%-30s: %d\n", "bkl_count", 165#define P(n) SEQ_printf(m, " .%-30s: %d\n", #n, rq->n);
166 rq->bkl_count); 166
167 P(yld_exp_empty);
168 P(yld_act_empty);
169 P(yld_both_empty);
170 P(yld_count);
171
172 P(sched_switch);
173 P(sched_count);
174 P(sched_goidle);
175
176 P(ttwu_count);
177 P(ttwu_local);
178
179 P(bkl_count);
180
181#undef P
167#endif 182#endif
168 SEQ_printf(m, " .%-30s: %ld\n", "nr_spread_over", 183 SEQ_printf(m, " .%-30s: %ld\n", "nr_spread_over",
169 cfs_rq->nr_spread_over); 184 cfs_rq->nr_spread_over);
185#ifdef CONFIG_FAIR_GROUP_SCHED
186#ifdef CONFIG_SMP
187 SEQ_printf(m, " .%-30s: %lu\n", "shares", cfs_rq->shares);
188#endif
189#endif
190}
191
192void print_rt_rq(struct seq_file *m, int cpu, struct rt_rq *rt_rq)
193{
194#if defined(CONFIG_CGROUP_SCHED) && defined(CONFIG_RT_GROUP_SCHED)
195 char path[128] = "";
196 struct cgroup *cgroup = NULL;
197 struct task_group *tg = rt_rq->tg;
198
199 if (tg)
200 cgroup = tg->css.cgroup;
201
202 if (cgroup)
203 cgroup_path(cgroup, path, sizeof(path));
204
205 SEQ_printf(m, "\nrt_rq[%d]:%s\n", cpu, path);
206#else
207 SEQ_printf(m, "\nrt_rq[%d]:\n", cpu);
208#endif
209
210
211#define P(x) \
212 SEQ_printf(m, " .%-30s: %Ld\n", #x, (long long)(rt_rq->x))
213#define PN(x) \
214 SEQ_printf(m, " .%-30s: %Ld.%06ld\n", #x, SPLIT_NS(rt_rq->x))
215
216 P(rt_nr_running);
217 P(rt_throttled);
218 PN(rt_time);
219 PN(rt_runtime);
220
221#undef PN
222#undef P
170} 223}
171 224
172static void print_cpu(struct seq_file *m, int cpu) 225static void print_cpu(struct seq_file *m, int cpu)
@@ -208,6 +261,7 @@ static void print_cpu(struct seq_file *m, int cpu)
208#undef PN 261#undef PN
209 262
210 print_cfs_stats(m, cpu); 263 print_cfs_stats(m, cpu);
264 print_rt_stats(m, cpu);
211 265
212 print_rq(m, rq, cpu); 266 print_rq(m, rq, cpu);
213} 267}
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c
index 08ae848b71d4..f2aa987027d6 100644
--- a/kernel/sched_fair.c
+++ b/kernel/sched_fair.c
@@ -63,13 +63,13 @@ unsigned int __read_mostly sysctl_sched_compat_yield;
63 63
64/* 64/*
65 * SCHED_OTHER wake-up granularity. 65 * SCHED_OTHER wake-up granularity.
66 * (default: 10 msec * (1 + ilog(ncpus)), units: nanoseconds) 66 * (default: 5 msec * (1 + ilog(ncpus)), units: nanoseconds)
67 * 67 *
68 * This option delays the preemption effects of decoupled workloads 68 * This option delays the preemption effects of decoupled workloads
69 * and reduces their over-scheduling. Synchronous workloads will still 69 * and reduces their over-scheduling. Synchronous workloads will still
70 * have immediate wakeup/sleep latencies. 70 * have immediate wakeup/sleep latencies.
71 */ 71 */
72unsigned int sysctl_sched_wakeup_granularity = 10000000UL; 72unsigned int sysctl_sched_wakeup_granularity = 5000000UL;
73 73
74const_debug unsigned int sysctl_sched_migration_cost = 500000UL; 74const_debug unsigned int sysctl_sched_migration_cost = 500000UL;
75 75
@@ -334,6 +334,34 @@ int sched_nr_latency_handler(struct ctl_table *table, int write,
334#endif 334#endif
335 335
336/* 336/*
337 * delta *= w / rw
338 */
339static inline unsigned long
340calc_delta_weight(unsigned long delta, struct sched_entity *se)
341{
342 for_each_sched_entity(se) {
343 delta = calc_delta_mine(delta,
344 se->load.weight, &cfs_rq_of(se)->load);
345 }
346
347 return delta;
348}
349
350/*
351 * delta *= rw / w
352 */
353static inline unsigned long
354calc_delta_fair(unsigned long delta, struct sched_entity *se)
355{
356 for_each_sched_entity(se) {
357 delta = calc_delta_mine(delta,
358 cfs_rq_of(se)->load.weight, &se->load);
359 }
360
361 return delta;
362}
363
364/*
337 * The idea is to set a period in which each task runs once. 365 * The idea is to set a period in which each task runs once.
338 * 366 *
339 * When there are too many tasks (sysctl_sched_nr_latency) we have to stretch 367 * When there are too many tasks (sysctl_sched_nr_latency) we have to stretch
@@ -362,47 +390,80 @@ static u64 __sched_period(unsigned long nr_running)
362 */ 390 */
363static u64 sched_slice(struct cfs_rq *cfs_rq, struct sched_entity *se) 391static u64 sched_slice(struct cfs_rq *cfs_rq, struct sched_entity *se)
364{ 392{
365 u64 slice = __sched_period(cfs_rq->nr_running); 393 return calc_delta_weight(__sched_period(cfs_rq->nr_running), se);
366
367 for_each_sched_entity(se) {
368 cfs_rq = cfs_rq_of(se);
369
370 slice *= se->load.weight;
371 do_div(slice, cfs_rq->load.weight);
372 }
373
374
375 return slice;
376} 394}
377 395
378/* 396/*
379 * We calculate the vruntime slice of a to be inserted task 397 * We calculate the vruntime slice of a to be inserted task
380 * 398 *
381 * vs = s/w = p/rw 399 * vs = s*rw/w = p
382 */ 400 */
383static u64 sched_vslice_add(struct cfs_rq *cfs_rq, struct sched_entity *se) 401static u64 sched_vslice_add(struct cfs_rq *cfs_rq, struct sched_entity *se)
384{ 402{
385 unsigned long nr_running = cfs_rq->nr_running; 403 unsigned long nr_running = cfs_rq->nr_running;
386 unsigned long weight;
387 u64 vslice;
388 404
389 if (!se->on_rq) 405 if (!se->on_rq)
390 nr_running++; 406 nr_running++;
391 407
392 vslice = __sched_period(nr_running); 408 return __sched_period(nr_running);
409}
410
411/*
412 * The goal of calc_delta_asym() is to be asymmetrically around NICE_0_LOAD, in
413 * that it favours >=0 over <0.
414 *
415 * -20 |
416 * |
417 * 0 --------+-------
418 * .'
419 * 19 .'
420 *
421 */
422static unsigned long
423calc_delta_asym(unsigned long delta, struct sched_entity *se)
424{
425 struct load_weight lw = {
426 .weight = NICE_0_LOAD,
427 .inv_weight = 1UL << (WMULT_SHIFT-NICE_0_SHIFT)
428 };
393 429
394 for_each_sched_entity(se) { 430 for_each_sched_entity(se) {
395 cfs_rq = cfs_rq_of(se); 431 struct load_weight *se_lw = &se->load;
432 unsigned long rw = cfs_rq_of(se)->load.weight;
433
434#ifdef CONFIG_FAIR_SCHED_GROUP
435 struct cfs_rq *cfs_rq = se->my_q;
436 struct task_group *tg = NULL
437
438 if (cfs_rq)
439 tg = cfs_rq->tg;
440
441 if (tg && tg->shares < NICE_0_LOAD) {
442 /*
443 * scale shares to what it would have been had
444 * tg->weight been NICE_0_LOAD:
445 *
446 * weight = 1024 * shares / tg->weight
447 */
448 lw.weight *= se->load.weight;
449 lw.weight /= tg->shares;
450
451 lw.inv_weight = 0;
452
453 se_lw = &lw;
454 rw += lw.weight - se->load.weight;
455 } else
456#endif
396 457
397 weight = cfs_rq->load.weight; 458 if (se->load.weight < NICE_0_LOAD) {
398 if (!se->on_rq) 459 se_lw = &lw;
399 weight += se->load.weight; 460 rw += NICE_0_LOAD - se->load.weight;
461 }
400 462
401 vslice *= NICE_0_LOAD; 463 delta = calc_delta_mine(delta, rw, se_lw);
402 do_div(vslice, weight);
403 } 464 }
404 465
405 return vslice; 466 return delta;
406} 467}
407 468
408/* 469/*
@@ -419,11 +480,7 @@ __update_curr(struct cfs_rq *cfs_rq, struct sched_entity *curr,
419 480
420 curr->sum_exec_runtime += delta_exec; 481 curr->sum_exec_runtime += delta_exec;
421 schedstat_add(cfs_rq, exec_clock, delta_exec); 482 schedstat_add(cfs_rq, exec_clock, delta_exec);
422 delta_exec_weighted = delta_exec; 483 delta_exec_weighted = calc_delta_fair(delta_exec, curr);
423 if (unlikely(curr->load.weight != NICE_0_LOAD)) {
424 delta_exec_weighted = calc_delta_fair(delta_exec_weighted,
425 &curr->load);
426 }
427 curr->vruntime += delta_exec_weighted; 484 curr->vruntime += delta_exec_weighted;
428} 485}
429 486
@@ -510,10 +567,27 @@ update_stats_curr_start(struct cfs_rq *cfs_rq, struct sched_entity *se)
510 * Scheduling class queueing methods: 567 * Scheduling class queueing methods:
511 */ 568 */
512 569
570#if defined CONFIG_SMP && defined CONFIG_FAIR_GROUP_SCHED
571static void
572add_cfs_task_weight(struct cfs_rq *cfs_rq, unsigned long weight)
573{
574 cfs_rq->task_weight += weight;
575}
576#else
577static inline void
578add_cfs_task_weight(struct cfs_rq *cfs_rq, unsigned long weight)
579{
580}
581#endif
582
513static void 583static void
514account_entity_enqueue(struct cfs_rq *cfs_rq, struct sched_entity *se) 584account_entity_enqueue(struct cfs_rq *cfs_rq, struct sched_entity *se)
515{ 585{
516 update_load_add(&cfs_rq->load, se->load.weight); 586 update_load_add(&cfs_rq->load, se->load.weight);
587 if (!parent_entity(se))
588 inc_cpu_load(rq_of(cfs_rq), se->load.weight);
589 if (entity_is_task(se))
590 add_cfs_task_weight(cfs_rq, se->load.weight);
517 cfs_rq->nr_running++; 591 cfs_rq->nr_running++;
518 se->on_rq = 1; 592 se->on_rq = 1;
519 list_add(&se->group_node, &cfs_rq->tasks); 593 list_add(&se->group_node, &cfs_rq->tasks);
@@ -523,6 +597,10 @@ static void
523account_entity_dequeue(struct cfs_rq *cfs_rq, struct sched_entity *se) 597account_entity_dequeue(struct cfs_rq *cfs_rq, struct sched_entity *se)
524{ 598{
525 update_load_sub(&cfs_rq->load, se->load.weight); 599 update_load_sub(&cfs_rq->load, se->load.weight);
600 if (!parent_entity(se))
601 dec_cpu_load(rq_of(cfs_rq), se->load.weight);
602 if (entity_is_task(se))
603 add_cfs_task_weight(cfs_rq, -se->load.weight);
526 cfs_rq->nr_running--; 604 cfs_rq->nr_running--;
527 se->on_rq = 0; 605 se->on_rq = 0;
528 list_del_init(&se->group_node); 606 list_del_init(&se->group_node);
@@ -609,8 +687,17 @@ place_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int initial)
609 687
610 if (!initial) { 688 if (!initial) {
611 /* sleeps upto a single latency don't count. */ 689 /* sleeps upto a single latency don't count. */
612 if (sched_feat(NEW_FAIR_SLEEPERS)) 690 if (sched_feat(NEW_FAIR_SLEEPERS)) {
613 vruntime -= sysctl_sched_latency; 691 unsigned long thresh = sysctl_sched_latency;
692
693 /*
694 * convert the sleeper threshold into virtual time
695 */
696 if (sched_feat(NORMALIZED_SLEEPER))
697 thresh = calc_delta_fair(thresh, se);
698
699 vruntime -= thresh;
700 }
614 701
615 /* ensure we never gain time by being placed backwards. */ 702 /* ensure we never gain time by being placed backwards. */
616 vruntime = max_vruntime(se->vruntime, vruntime); 703 vruntime = max_vruntime(se->vruntime, vruntime);
@@ -639,21 +726,6 @@ enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int wakeup)
639 __enqueue_entity(cfs_rq, se); 726 __enqueue_entity(cfs_rq, se);
640} 727}
641 728
642static void update_avg(u64 *avg, u64 sample)
643{
644 s64 diff = sample - *avg;
645 *avg += diff >> 3;
646}
647
648static void update_avg_stats(struct cfs_rq *cfs_rq, struct sched_entity *se)
649{
650 if (!se->last_wakeup)
651 return;
652
653 update_avg(&se->avg_overlap, se->sum_exec_runtime - se->last_wakeup);
654 se->last_wakeup = 0;
655}
656
657static void 729static void
658dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int sleep) 730dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int sleep)
659{ 731{
@@ -664,7 +736,6 @@ dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int sleep)
664 736
665 update_stats_dequeue(cfs_rq, se); 737 update_stats_dequeue(cfs_rq, se);
666 if (sleep) { 738 if (sleep) {
667 update_avg_stats(cfs_rq, se);
668#ifdef CONFIG_SCHEDSTATS 739#ifdef CONFIG_SCHEDSTATS
669 if (entity_is_task(se)) { 740 if (entity_is_task(se)) {
670 struct task_struct *tsk = task_of(se); 741 struct task_struct *tsk = task_of(se);
@@ -726,17 +797,16 @@ set_next_entity(struct cfs_rq *cfs_rq, struct sched_entity *se)
726 se->prev_sum_exec_runtime = se->sum_exec_runtime; 797 se->prev_sum_exec_runtime = se->sum_exec_runtime;
727} 798}
728 799
729static int
730wakeup_preempt_entity(struct sched_entity *curr, struct sched_entity *se);
731
732static struct sched_entity * 800static struct sched_entity *
733pick_next(struct cfs_rq *cfs_rq, struct sched_entity *se) 801pick_next(struct cfs_rq *cfs_rq, struct sched_entity *se)
734{ 802{
735 if (!cfs_rq->next) 803 struct rq *rq = rq_of(cfs_rq);
736 return se; 804 u64 pair_slice = rq->clock - cfs_rq->pair_start;
737 805
738 if (wakeup_preempt_entity(cfs_rq->next, se) != 0) 806 if (!cfs_rq->next || pair_slice > sched_slice(cfs_rq, cfs_rq->next)) {
807 cfs_rq->pair_start = rq->clock;
739 return se; 808 return se;
809 }
740 810
741 return cfs_rq->next; 811 return cfs_rq->next;
742} 812}
@@ -835,7 +905,7 @@ static void hrtick_start_fair(struct rq *rq, struct task_struct *p)
835 hrtick_start(rq, delta, requeue); 905 hrtick_start(rq, delta, requeue);
836 } 906 }
837} 907}
838#else 908#else /* !CONFIG_SCHED_HRTICK */
839static inline void 909static inline void
840hrtick_start_fair(struct rq *rq, struct task_struct *p) 910hrtick_start_fair(struct rq *rq, struct task_struct *p)
841{ 911{
@@ -976,7 +1046,7 @@ static int wake_idle(int cpu, struct task_struct *p)
976 } 1046 }
977 return cpu; 1047 return cpu;
978} 1048}
979#else 1049#else /* !ARCH_HAS_SCHED_WAKE_IDLE*/
980static inline int wake_idle(int cpu, struct task_struct *p) 1050static inline int wake_idle(int cpu, struct task_struct *p)
981{ 1051{
982 return cpu; 1052 return cpu;
@@ -987,6 +1057,89 @@ static inline int wake_idle(int cpu, struct task_struct *p)
987 1057
988static const struct sched_class fair_sched_class; 1058static const struct sched_class fair_sched_class;
989 1059
1060#ifdef CONFIG_FAIR_GROUP_SCHED
1061/*
1062 * effective_load() calculates the load change as seen from the root_task_group
1063 *
1064 * Adding load to a group doesn't make a group heavier, but can cause movement
1065 * of group shares between cpus. Assuming the shares were perfectly aligned one
1066 * can calculate the shift in shares.
1067 *
1068 * The problem is that perfectly aligning the shares is rather expensive, hence
1069 * we try to avoid doing that too often - see update_shares(), which ratelimits
1070 * this change.
1071 *
1072 * We compensate this by not only taking the current delta into account, but
1073 * also considering the delta between when the shares were last adjusted and
1074 * now.
1075 *
1076 * We still saw a performance dip, some tracing learned us that between
1077 * cgroup:/ and cgroup:/foo balancing the number of affine wakeups increased
1078 * significantly. Therefore try to bias the error in direction of failing
1079 * the affine wakeup.
1080 *
1081 */
1082static long effective_load(struct task_group *tg, int cpu,
1083 long wl, long wg)
1084{
1085 struct sched_entity *se = tg->se[cpu];
1086 long more_w;
1087
1088 if (!tg->parent)
1089 return wl;
1090
1091 /*
1092 * By not taking the decrease of shares on the other cpu into
1093 * account our error leans towards reducing the affine wakeups.
1094 */
1095 if (!wl && sched_feat(ASYM_EFF_LOAD))
1096 return wl;
1097
1098 /*
1099 * Instead of using this increment, also add the difference
1100 * between when the shares were last updated and now.
1101 */
1102 more_w = se->my_q->load.weight - se->my_q->rq_weight;
1103 wl += more_w;
1104 wg += more_w;
1105
1106 for_each_sched_entity(se) {
1107#define D(n) (likely(n) ? (n) : 1)
1108
1109 long S, rw, s, a, b;
1110
1111 S = se->my_q->tg->shares;
1112 s = se->my_q->shares;
1113 rw = se->my_q->rq_weight;
1114
1115 a = S*(rw + wl);
1116 b = S*rw + s*wg;
1117
1118 wl = s*(a-b)/D(b);
1119 /*
1120 * Assume the group is already running and will
1121 * thus already be accounted for in the weight.
1122 *
1123 * That is, moving shares between CPUs, does not
1124 * alter the group weight.
1125 */
1126 wg = 0;
1127#undef D
1128 }
1129
1130 return wl;
1131}
1132
1133#else
1134
1135static inline unsigned long effective_load(struct task_group *tg, int cpu,
1136 unsigned long wl, unsigned long wg)
1137{
1138 return wl;
1139}
1140
1141#endif
1142
990static int 1143static int
991wake_affine(struct rq *rq, struct sched_domain *this_sd, struct rq *this_rq, 1144wake_affine(struct rq *rq, struct sched_domain *this_sd, struct rq *this_rq,
992 struct task_struct *p, int prev_cpu, int this_cpu, int sync, 1145 struct task_struct *p, int prev_cpu, int this_cpu, int sync,
@@ -994,8 +1147,10 @@ wake_affine(struct rq *rq, struct sched_domain *this_sd, struct rq *this_rq,
994 unsigned int imbalance) 1147 unsigned int imbalance)
995{ 1148{
996 struct task_struct *curr = this_rq->curr; 1149 struct task_struct *curr = this_rq->curr;
1150 struct task_group *tg;
997 unsigned long tl = this_load; 1151 unsigned long tl = this_load;
998 unsigned long tl_per_task; 1152 unsigned long tl_per_task;
1153 unsigned long weight;
999 int balanced; 1154 int balanced;
1000 1155
1001 if (!(this_sd->flags & SD_WAKE_AFFINE) || !sched_feat(AFFINE_WAKEUPS)) 1156 if (!(this_sd->flags & SD_WAKE_AFFINE) || !sched_feat(AFFINE_WAKEUPS))
@@ -1006,19 +1161,28 @@ wake_affine(struct rq *rq, struct sched_domain *this_sd, struct rq *this_rq,
1006 * effect of the currently running task from the load 1161 * effect of the currently running task from the load
1007 * of the current CPU: 1162 * of the current CPU:
1008 */ 1163 */
1009 if (sync) 1164 if (sync) {
1010 tl -= current->se.load.weight; 1165 tg = task_group(current);
1166 weight = current->se.load.weight;
1167
1168 tl += effective_load(tg, this_cpu, -weight, -weight);
1169 load += effective_load(tg, prev_cpu, 0, -weight);
1170 }
1011 1171
1012 balanced = 100*(tl + p->se.load.weight) <= imbalance*load; 1172 tg = task_group(p);
1173 weight = p->se.load.weight;
1174
1175 balanced = 100*(tl + effective_load(tg, this_cpu, weight, weight)) <=
1176 imbalance*(load + effective_load(tg, prev_cpu, 0, weight));
1013 1177
1014 /* 1178 /*
1015 * If the currently running task will sleep within 1179 * If the currently running task will sleep within
1016 * a reasonable amount of time then attract this newly 1180 * a reasonable amount of time then attract this newly
1017 * woken task: 1181 * woken task:
1018 */ 1182 */
1019 if (sync && balanced && curr->sched_class == &fair_sched_class) { 1183 if (sync && balanced) {
1020 if (curr->se.avg_overlap < sysctl_sched_migration_cost && 1184 if (curr->se.avg_overlap < sysctl_sched_migration_cost &&
1021 p->se.avg_overlap < sysctl_sched_migration_cost) 1185 p->se.avg_overlap < sysctl_sched_migration_cost)
1022 return 1; 1186 return 1;
1023 } 1187 }
1024 1188
@@ -1111,11 +1275,13 @@ static unsigned long wakeup_gran(struct sched_entity *se)
1111 unsigned long gran = sysctl_sched_wakeup_granularity; 1275 unsigned long gran = sysctl_sched_wakeup_granularity;
1112 1276
1113 /* 1277 /*
1114 * More easily preempt - nice tasks, while not making 1278 * More easily preempt - nice tasks, while not making it harder for
1115 * it harder for + nice tasks. 1279 * + nice tasks.
1116 */ 1280 */
1117 if (unlikely(se->load.weight > NICE_0_LOAD)) 1281 if (sched_feat(ASYM_GRAN))
1118 gran = calc_delta_fair(gran, &se->load); 1282 gran = calc_delta_asym(sysctl_sched_wakeup_granularity, se);
1283 else
1284 gran = calc_delta_fair(sysctl_sched_wakeup_granularity, se);
1119 1285
1120 return gran; 1286 return gran;
1121} 1287}
@@ -1177,7 +1343,6 @@ static void check_preempt_wakeup(struct rq *rq, struct task_struct *p)
1177 return; 1343 return;
1178 } 1344 }
1179 1345
1180 se->last_wakeup = se->sum_exec_runtime;
1181 if (unlikely(se == pse)) 1346 if (unlikely(se == pse))
1182 return; 1347 return;
1183 1348
@@ -1275,23 +1440,18 @@ __load_balance_iterator(struct cfs_rq *cfs_rq, struct list_head *next)
1275 struct task_struct *p = NULL; 1440 struct task_struct *p = NULL;
1276 struct sched_entity *se; 1441 struct sched_entity *se;
1277 1442
1278 if (next == &cfs_rq->tasks) 1443 while (next != &cfs_rq->tasks) {
1279 return NULL;
1280
1281 /* Skip over entities that are not tasks */
1282 do {
1283 se = list_entry(next, struct sched_entity, group_node); 1444 se = list_entry(next, struct sched_entity, group_node);
1284 next = next->next; 1445 next = next->next;
1285 } while (next != &cfs_rq->tasks && !entity_is_task(se));
1286 1446
1287 if (next == &cfs_rq->tasks) 1447 /* Skip over entities that are not tasks */
1288 return NULL; 1448 if (entity_is_task(se)) {
1449 p = task_of(se);
1450 break;
1451 }
1452 }
1289 1453
1290 cfs_rq->balance_iterator = next; 1454 cfs_rq->balance_iterator = next;
1291
1292 if (entity_is_task(se))
1293 p = task_of(se);
1294
1295 return p; 1455 return p;
1296} 1456}
1297 1457
@@ -1309,75 +1469,82 @@ static struct task_struct *load_balance_next_fair(void *arg)
1309 return __load_balance_iterator(cfs_rq, cfs_rq->balance_iterator); 1469 return __load_balance_iterator(cfs_rq, cfs_rq->balance_iterator);
1310} 1470}
1311 1471
1312#ifdef CONFIG_FAIR_GROUP_SCHED 1472static unsigned long
1313static int cfs_rq_best_prio(struct cfs_rq *cfs_rq) 1473__load_balance_fair(struct rq *this_rq, int this_cpu, struct rq *busiest,
1474 unsigned long max_load_move, struct sched_domain *sd,
1475 enum cpu_idle_type idle, int *all_pinned, int *this_best_prio,
1476 struct cfs_rq *cfs_rq)
1314{ 1477{
1315 struct sched_entity *curr; 1478 struct rq_iterator cfs_rq_iterator;
1316 struct task_struct *p;
1317
1318 if (!cfs_rq->nr_running || !first_fair(cfs_rq))
1319 return MAX_PRIO;
1320
1321 curr = cfs_rq->curr;
1322 if (!curr)
1323 curr = __pick_next_entity(cfs_rq);
1324 1479
1325 p = task_of(curr); 1480 cfs_rq_iterator.start = load_balance_start_fair;
1481 cfs_rq_iterator.next = load_balance_next_fair;
1482 cfs_rq_iterator.arg = cfs_rq;
1326 1483
1327 return p->prio; 1484 return balance_tasks(this_rq, this_cpu, busiest,
1485 max_load_move, sd, idle, all_pinned,
1486 this_best_prio, &cfs_rq_iterator);
1328} 1487}
1329#endif
1330 1488
1489#ifdef CONFIG_FAIR_GROUP_SCHED
1331static unsigned long 1490static unsigned long
1332load_balance_fair(struct rq *this_rq, int this_cpu, struct rq *busiest, 1491load_balance_fair(struct rq *this_rq, int this_cpu, struct rq *busiest,
1333 unsigned long max_load_move, 1492 unsigned long max_load_move,
1334 struct sched_domain *sd, enum cpu_idle_type idle, 1493 struct sched_domain *sd, enum cpu_idle_type idle,
1335 int *all_pinned, int *this_best_prio) 1494 int *all_pinned, int *this_best_prio)
1336{ 1495{
1337 struct cfs_rq *busy_cfs_rq;
1338 long rem_load_move = max_load_move; 1496 long rem_load_move = max_load_move;
1339 struct rq_iterator cfs_rq_iterator; 1497 int busiest_cpu = cpu_of(busiest);
1340 1498 struct task_group *tg;
1341 cfs_rq_iterator.start = load_balance_start_fair;
1342 cfs_rq_iterator.next = load_balance_next_fair;
1343 1499
1344 for_each_leaf_cfs_rq(busiest, busy_cfs_rq) { 1500 rcu_read_lock();
1345#ifdef CONFIG_FAIR_GROUP_SCHED 1501 update_h_load(busiest_cpu);
1346 struct cfs_rq *this_cfs_rq;
1347 long imbalance;
1348 unsigned long maxload;
1349 1502
1350 this_cfs_rq = cpu_cfs_rq(busy_cfs_rq, this_cpu); 1503 list_for_each_entry(tg, &task_groups, list) {
1504 struct cfs_rq *busiest_cfs_rq = tg->cfs_rq[busiest_cpu];
1505 unsigned long busiest_h_load = busiest_cfs_rq->h_load;
1506 unsigned long busiest_weight = busiest_cfs_rq->load.weight;
1507 u64 rem_load, moved_load;
1351 1508
1352 imbalance = busy_cfs_rq->load.weight - this_cfs_rq->load.weight; 1509 /*
1353 /* Don't pull if this_cfs_rq has more load than busy_cfs_rq */ 1510 * empty group
1354 if (imbalance <= 0) 1511 */
1512 if (!busiest_cfs_rq->task_weight)
1355 continue; 1513 continue;
1356 1514
1357 /* Don't pull more than imbalance/2 */ 1515 rem_load = (u64)rem_load_move * busiest_weight;
1358 imbalance /= 2; 1516 rem_load = div_u64(rem_load, busiest_h_load + 1);
1359 maxload = min(rem_load_move, imbalance);
1360 1517
1361 *this_best_prio = cfs_rq_best_prio(this_cfs_rq); 1518 moved_load = __load_balance_fair(this_rq, this_cpu, busiest,
1362#else 1519 rem_load, sd, idle, all_pinned, this_best_prio,
1363# define maxload rem_load_move 1520 tg->cfs_rq[busiest_cpu]);
1364#endif 1521
1365 /* 1522 if (!moved_load)
1366 * pass busy_cfs_rq argument into 1523 continue;
1367 * load_balance_[start|next]_fair iterators 1524
1368 */ 1525 moved_load *= busiest_h_load;
1369 cfs_rq_iterator.arg = busy_cfs_rq; 1526 moved_load = div_u64(moved_load, busiest_weight + 1);
1370 rem_load_move -= balance_tasks(this_rq, this_cpu, busiest,
1371 maxload, sd, idle, all_pinned,
1372 this_best_prio,
1373 &cfs_rq_iterator);
1374 1527
1375 if (rem_load_move <= 0) 1528 rem_load_move -= moved_load;
1529 if (rem_load_move < 0)
1376 break; 1530 break;
1377 } 1531 }
1532 rcu_read_unlock();
1378 1533
1379 return max_load_move - rem_load_move; 1534 return max_load_move - rem_load_move;
1380} 1535}
1536#else
1537static unsigned long
1538load_balance_fair(struct rq *this_rq, int this_cpu, struct rq *busiest,
1539 unsigned long max_load_move,
1540 struct sched_domain *sd, enum cpu_idle_type idle,
1541 int *all_pinned, int *this_best_prio)
1542{
1543 return __load_balance_fair(this_rq, this_cpu, busiest,
1544 max_load_move, sd, idle, all_pinned,
1545 this_best_prio, &busiest->cfs);
1546}
1547#endif
1381 1548
1382static int 1549static int
1383move_one_task_fair(struct rq *this_rq, int this_cpu, struct rq *busiest, 1550move_one_task_fair(struct rq *this_rq, int this_cpu, struct rq *busiest,
@@ -1402,7 +1569,7 @@ move_one_task_fair(struct rq *this_rq, int this_cpu, struct rq *busiest,
1402 1569
1403 return 0; 1570 return 0;
1404} 1571}
1405#endif 1572#endif /* CONFIG_SMP */
1406 1573
1407/* 1574/*
1408 * scheduler tick hitting a task of our scheduling class: 1575 * scheduler tick hitting a task of our scheduling class:
diff --git a/kernel/sched_features.h b/kernel/sched_features.h
index 1c7283cb9581..862b06bd560a 100644
--- a/kernel/sched_features.h
+++ b/kernel/sched_features.h
@@ -1,4 +1,5 @@
1SCHED_FEAT(NEW_FAIR_SLEEPERS, 1) 1SCHED_FEAT(NEW_FAIR_SLEEPERS, 1)
2SCHED_FEAT(NORMALIZED_SLEEPER, 1)
2SCHED_FEAT(WAKEUP_PREEMPT, 1) 3SCHED_FEAT(WAKEUP_PREEMPT, 1)
3SCHED_FEAT(START_DEBIT, 1) 4SCHED_FEAT(START_DEBIT, 1)
4SCHED_FEAT(AFFINE_WAKEUPS, 1) 5SCHED_FEAT(AFFINE_WAKEUPS, 1)
@@ -6,5 +7,7 @@ SCHED_FEAT(CACHE_HOT_BUDDY, 1)
6SCHED_FEAT(SYNC_WAKEUPS, 1) 7SCHED_FEAT(SYNC_WAKEUPS, 1)
7SCHED_FEAT(HRTICK, 1) 8SCHED_FEAT(HRTICK, 1)
8SCHED_FEAT(DOUBLE_TICK, 0) 9SCHED_FEAT(DOUBLE_TICK, 0)
9SCHED_FEAT(NORMALIZED_SLEEPER, 1) 10SCHED_FEAT(ASYM_GRAN, 1)
10SCHED_FEAT(DEADLINE, 1) 11SCHED_FEAT(LB_BIAS, 0)
12SCHED_FEAT(LB_WAKEUP_UPDATE, 1)
13SCHED_FEAT(ASYM_EFF_LOAD, 1)
diff --git a/kernel/sched_rt.c b/kernel/sched_rt.c
index 0f3c19197fa4..47ceac9e8552 100644
--- a/kernel/sched_rt.c
+++ b/kernel/sched_rt.c
@@ -12,6 +12,9 @@ static inline int rt_overloaded(struct rq *rq)
12 12
13static inline void rt_set_overload(struct rq *rq) 13static inline void rt_set_overload(struct rq *rq)
14{ 14{
15 if (!rq->online)
16 return;
17
15 cpu_set(rq->cpu, rq->rd->rto_mask); 18 cpu_set(rq->cpu, rq->rd->rto_mask);
16 /* 19 /*
17 * Make sure the mask is visible before we set 20 * Make sure the mask is visible before we set
@@ -26,6 +29,9 @@ static inline void rt_set_overload(struct rq *rq)
26 29
27static inline void rt_clear_overload(struct rq *rq) 30static inline void rt_clear_overload(struct rq *rq)
28{ 31{
32 if (!rq->online)
33 return;
34
29 /* the order here really doesn't matter */ 35 /* the order here really doesn't matter */
30 atomic_dec(&rq->rd->rto_count); 36 atomic_dec(&rq->rd->rto_count);
31 cpu_clear(rq->cpu, rq->rd->rto_mask); 37 cpu_clear(rq->cpu, rq->rd->rto_mask);
@@ -155,7 +161,7 @@ static inline struct rt_bandwidth *sched_rt_bandwidth(struct rt_rq *rt_rq)
155 return &rt_rq->tg->rt_bandwidth; 161 return &rt_rq->tg->rt_bandwidth;
156} 162}
157 163
158#else 164#else /* !CONFIG_RT_GROUP_SCHED */
159 165
160static inline u64 sched_rt_runtime(struct rt_rq *rt_rq) 166static inline u64 sched_rt_runtime(struct rt_rq *rt_rq)
161{ 167{
@@ -220,49 +226,10 @@ static inline struct rt_bandwidth *sched_rt_bandwidth(struct rt_rq *rt_rq)
220 return &def_rt_bandwidth; 226 return &def_rt_bandwidth;
221} 227}
222 228
223#endif 229#endif /* CONFIG_RT_GROUP_SCHED */
224
225static int do_sched_rt_period_timer(struct rt_bandwidth *rt_b, int overrun)
226{
227 int i, idle = 1;
228 cpumask_t span;
229
230 if (rt_b->rt_runtime == RUNTIME_INF)
231 return 1;
232
233 span = sched_rt_period_mask();
234 for_each_cpu_mask(i, span) {
235 int enqueue = 0;
236 struct rt_rq *rt_rq = sched_rt_period_rt_rq(rt_b, i);
237 struct rq *rq = rq_of_rt_rq(rt_rq);
238
239 spin_lock(&rq->lock);
240 if (rt_rq->rt_time) {
241 u64 runtime;
242
243 spin_lock(&rt_rq->rt_runtime_lock);
244 runtime = rt_rq->rt_runtime;
245 rt_rq->rt_time -= min(rt_rq->rt_time, overrun*runtime);
246 if (rt_rq->rt_throttled && rt_rq->rt_time < runtime) {
247 rt_rq->rt_throttled = 0;
248 enqueue = 1;
249 }
250 if (rt_rq->rt_time || rt_rq->rt_nr_running)
251 idle = 0;
252 spin_unlock(&rt_rq->rt_runtime_lock);
253 } else if (rt_rq->rt_nr_running)
254 idle = 0;
255
256 if (enqueue)
257 sched_rt_rq_enqueue(rt_rq);
258 spin_unlock(&rq->lock);
259 }
260
261 return idle;
262}
263 230
264#ifdef CONFIG_SMP 231#ifdef CONFIG_SMP
265static int balance_runtime(struct rt_rq *rt_rq) 232static int do_balance_runtime(struct rt_rq *rt_rq)
266{ 233{
267 struct rt_bandwidth *rt_b = sched_rt_bandwidth(rt_rq); 234 struct rt_bandwidth *rt_b = sched_rt_bandwidth(rt_rq);
268 struct root_domain *rd = cpu_rq(smp_processor_id())->rd; 235 struct root_domain *rd = cpu_rq(smp_processor_id())->rd;
@@ -281,6 +248,9 @@ static int balance_runtime(struct rt_rq *rt_rq)
281 continue; 248 continue;
282 249
283 spin_lock(&iter->rt_runtime_lock); 250 spin_lock(&iter->rt_runtime_lock);
251 if (iter->rt_runtime == RUNTIME_INF)
252 goto next;
253
284 diff = iter->rt_runtime - iter->rt_time; 254 diff = iter->rt_runtime - iter->rt_time;
285 if (diff > 0) { 255 if (diff > 0) {
286 do_div(diff, weight); 256 do_div(diff, weight);
@@ -294,13 +264,163 @@ static int balance_runtime(struct rt_rq *rt_rq)
294 break; 264 break;
295 } 265 }
296 } 266 }
267next:
297 spin_unlock(&iter->rt_runtime_lock); 268 spin_unlock(&iter->rt_runtime_lock);
298 } 269 }
299 spin_unlock(&rt_b->rt_runtime_lock); 270 spin_unlock(&rt_b->rt_runtime_lock);
300 271
301 return more; 272 return more;
302} 273}
303#endif 274
275static void __disable_runtime(struct rq *rq)
276{
277 struct root_domain *rd = rq->rd;
278 struct rt_rq *rt_rq;
279
280 if (unlikely(!scheduler_running))
281 return;
282
283 for_each_leaf_rt_rq(rt_rq, rq) {
284 struct rt_bandwidth *rt_b = sched_rt_bandwidth(rt_rq);
285 s64 want;
286 int i;
287
288 spin_lock(&rt_b->rt_runtime_lock);
289 spin_lock(&rt_rq->rt_runtime_lock);
290 if (rt_rq->rt_runtime == RUNTIME_INF ||
291 rt_rq->rt_runtime == rt_b->rt_runtime)
292 goto balanced;
293 spin_unlock(&rt_rq->rt_runtime_lock);
294
295 want = rt_b->rt_runtime - rt_rq->rt_runtime;
296
297 for_each_cpu_mask(i, rd->span) {
298 struct rt_rq *iter = sched_rt_period_rt_rq(rt_b, i);
299 s64 diff;
300
301 if (iter == rt_rq)
302 continue;
303
304 spin_lock(&iter->rt_runtime_lock);
305 if (want > 0) {
306 diff = min_t(s64, iter->rt_runtime, want);
307 iter->rt_runtime -= diff;
308 want -= diff;
309 } else {
310 iter->rt_runtime -= want;
311 want -= want;
312 }
313 spin_unlock(&iter->rt_runtime_lock);
314
315 if (!want)
316 break;
317 }
318
319 spin_lock(&rt_rq->rt_runtime_lock);
320 BUG_ON(want);
321balanced:
322 rt_rq->rt_runtime = RUNTIME_INF;
323 spin_unlock(&rt_rq->rt_runtime_lock);
324 spin_unlock(&rt_b->rt_runtime_lock);
325 }
326}
327
328static void disable_runtime(struct rq *rq)
329{
330 unsigned long flags;
331
332 spin_lock_irqsave(&rq->lock, flags);
333 __disable_runtime(rq);
334 spin_unlock_irqrestore(&rq->lock, flags);
335}
336
337static void __enable_runtime(struct rq *rq)
338{
339 struct rt_rq *rt_rq;
340
341 if (unlikely(!scheduler_running))
342 return;
343
344 for_each_leaf_rt_rq(rt_rq, rq) {
345 struct rt_bandwidth *rt_b = sched_rt_bandwidth(rt_rq);
346
347 spin_lock(&rt_b->rt_runtime_lock);
348 spin_lock(&rt_rq->rt_runtime_lock);
349 rt_rq->rt_runtime = rt_b->rt_runtime;
350 rt_rq->rt_time = 0;
351 spin_unlock(&rt_rq->rt_runtime_lock);
352 spin_unlock(&rt_b->rt_runtime_lock);
353 }
354}
355
356static void enable_runtime(struct rq *rq)
357{
358 unsigned long flags;
359
360 spin_lock_irqsave(&rq->lock, flags);
361 __enable_runtime(rq);
362 spin_unlock_irqrestore(&rq->lock, flags);
363}
364
365static int balance_runtime(struct rt_rq *rt_rq)
366{
367 int more = 0;
368
369 if (rt_rq->rt_time > rt_rq->rt_runtime) {
370 spin_unlock(&rt_rq->rt_runtime_lock);
371 more = do_balance_runtime(rt_rq);
372 spin_lock(&rt_rq->rt_runtime_lock);
373 }
374
375 return more;
376}
377#else /* !CONFIG_SMP */
378static inline int balance_runtime(struct rt_rq *rt_rq)
379{
380 return 0;
381}
382#endif /* CONFIG_SMP */
383
384static int do_sched_rt_period_timer(struct rt_bandwidth *rt_b, int overrun)
385{
386 int i, idle = 1;
387 cpumask_t span;
388
389 if (rt_b->rt_runtime == RUNTIME_INF)
390 return 1;
391
392 span = sched_rt_period_mask();
393 for_each_cpu_mask(i, span) {
394 int enqueue = 0;
395 struct rt_rq *rt_rq = sched_rt_period_rt_rq(rt_b, i);
396 struct rq *rq = rq_of_rt_rq(rt_rq);
397
398 spin_lock(&rq->lock);
399 if (rt_rq->rt_time) {
400 u64 runtime;
401
402 spin_lock(&rt_rq->rt_runtime_lock);
403 if (rt_rq->rt_throttled)
404 balance_runtime(rt_rq);
405 runtime = rt_rq->rt_runtime;
406 rt_rq->rt_time -= min(rt_rq->rt_time, overrun*runtime);
407 if (rt_rq->rt_throttled && rt_rq->rt_time < runtime) {
408 rt_rq->rt_throttled = 0;
409 enqueue = 1;
410 }
411 if (rt_rq->rt_time || rt_rq->rt_nr_running)
412 idle = 0;
413 spin_unlock(&rt_rq->rt_runtime_lock);
414 } else if (rt_rq->rt_nr_running)
415 idle = 0;
416
417 if (enqueue)
418 sched_rt_rq_enqueue(rt_rq);
419 spin_unlock(&rq->lock);
420 }
421
422 return idle;
423}
304 424
305static inline int rt_se_prio(struct sched_rt_entity *rt_se) 425static inline int rt_se_prio(struct sched_rt_entity *rt_se)
306{ 426{
@@ -327,18 +447,10 @@ static int sched_rt_runtime_exceeded(struct rt_rq *rt_rq)
327 if (sched_rt_runtime(rt_rq) >= sched_rt_period(rt_rq)) 447 if (sched_rt_runtime(rt_rq) >= sched_rt_period(rt_rq))
328 return 0; 448 return 0;
329 449
330#ifdef CONFIG_SMP 450 balance_runtime(rt_rq);
331 if (rt_rq->rt_time > runtime) { 451 runtime = sched_rt_runtime(rt_rq);
332 int more; 452 if (runtime == RUNTIME_INF)
333 453 return 0;
334 spin_unlock(&rt_rq->rt_runtime_lock);
335 more = balance_runtime(rt_rq);
336 spin_lock(&rt_rq->rt_runtime_lock);
337
338 if (more)
339 runtime = sched_rt_runtime(rt_rq);
340 }
341#endif
342 454
343 if (rt_rq->rt_time > runtime) { 455 if (rt_rq->rt_time > runtime) {
344 rt_rq->rt_throttled = 1; 456 rt_rq->rt_throttled = 1;
@@ -392,12 +504,21 @@ void inc_rt_tasks(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq)
392 WARN_ON(!rt_prio(rt_se_prio(rt_se))); 504 WARN_ON(!rt_prio(rt_se_prio(rt_se)));
393 rt_rq->rt_nr_running++; 505 rt_rq->rt_nr_running++;
394#if defined CONFIG_SMP || defined CONFIG_RT_GROUP_SCHED 506#if defined CONFIG_SMP || defined CONFIG_RT_GROUP_SCHED
395 if (rt_se_prio(rt_se) < rt_rq->highest_prio) 507 if (rt_se_prio(rt_se) < rt_rq->highest_prio) {
508 struct rq *rq = rq_of_rt_rq(rt_rq);
509
396 rt_rq->highest_prio = rt_se_prio(rt_se); 510 rt_rq->highest_prio = rt_se_prio(rt_se);
511#ifdef CONFIG_SMP
512 if (rq->online)
513 cpupri_set(&rq->rd->cpupri, rq->cpu,
514 rt_se_prio(rt_se));
515#endif
516 }
397#endif 517#endif
398#ifdef CONFIG_SMP 518#ifdef CONFIG_SMP
399 if (rt_se->nr_cpus_allowed > 1) { 519 if (rt_se->nr_cpus_allowed > 1) {
400 struct rq *rq = rq_of_rt_rq(rt_rq); 520 struct rq *rq = rq_of_rt_rq(rt_rq);
521
401 rq->rt.rt_nr_migratory++; 522 rq->rt.rt_nr_migratory++;
402 } 523 }
403 524
@@ -417,6 +538,10 @@ void inc_rt_tasks(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq)
417static inline 538static inline
418void dec_rt_tasks(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq) 539void dec_rt_tasks(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq)
419{ 540{
541#ifdef CONFIG_SMP
542 int highest_prio = rt_rq->highest_prio;
543#endif
544
420 WARN_ON(!rt_prio(rt_se_prio(rt_se))); 545 WARN_ON(!rt_prio(rt_se_prio(rt_se)));
421 WARN_ON(!rt_rq->rt_nr_running); 546 WARN_ON(!rt_rq->rt_nr_running);
422 rt_rq->rt_nr_running--; 547 rt_rq->rt_nr_running--;
@@ -440,6 +565,14 @@ void dec_rt_tasks(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq)
440 rq->rt.rt_nr_migratory--; 565 rq->rt.rt_nr_migratory--;
441 } 566 }
442 567
568 if (rt_rq->highest_prio != highest_prio) {
569 struct rq *rq = rq_of_rt_rq(rt_rq);
570
571 if (rq->online)
572 cpupri_set(&rq->rd->cpupri, rq->cpu,
573 rt_rq->highest_prio);
574 }
575
443 update_rt_migration(rq_of_rt_rq(rt_rq)); 576 update_rt_migration(rq_of_rt_rq(rt_rq));
444#endif /* CONFIG_SMP */ 577#endif /* CONFIG_SMP */
445#ifdef CONFIG_RT_GROUP_SCHED 578#ifdef CONFIG_RT_GROUP_SCHED
@@ -455,6 +588,7 @@ static void __enqueue_rt_entity(struct sched_rt_entity *rt_se)
455 struct rt_rq *rt_rq = rt_rq_of_se(rt_se); 588 struct rt_rq *rt_rq = rt_rq_of_se(rt_se);
456 struct rt_prio_array *array = &rt_rq->active; 589 struct rt_prio_array *array = &rt_rq->active;
457 struct rt_rq *group_rq = group_rt_rq(rt_se); 590 struct rt_rq *group_rq = group_rt_rq(rt_se);
591 struct list_head *queue = array->queue + rt_se_prio(rt_se);
458 592
459 /* 593 /*
460 * Don't enqueue the group if its throttled, or when empty. 594 * Don't enqueue the group if its throttled, or when empty.
@@ -465,7 +599,11 @@ static void __enqueue_rt_entity(struct sched_rt_entity *rt_se)
465 if (group_rq && (rt_rq_throttled(group_rq) || !group_rq->rt_nr_running)) 599 if (group_rq && (rt_rq_throttled(group_rq) || !group_rq->rt_nr_running))
466 return; 600 return;
467 601
468 list_add_tail(&rt_se->run_list, array->queue + rt_se_prio(rt_se)); 602 if (rt_se->nr_cpus_allowed == 1)
603 list_add(&rt_se->run_list, queue);
604 else
605 list_add_tail(&rt_se->run_list, queue);
606
469 __set_bit(rt_se_prio(rt_se), array->bitmap); 607 __set_bit(rt_se_prio(rt_se), array->bitmap);
470 608
471 inc_rt_tasks(rt_se, rt_rq); 609 inc_rt_tasks(rt_se, rt_rq);
@@ -532,6 +670,8 @@ static void enqueue_task_rt(struct rq *rq, struct task_struct *p, int wakeup)
532 rt_se->timeout = 0; 670 rt_se->timeout = 0;
533 671
534 enqueue_rt_entity(rt_se); 672 enqueue_rt_entity(rt_se);
673
674 inc_cpu_load(rq, p->se.load.weight);
535} 675}
536 676
537static void dequeue_task_rt(struct rq *rq, struct task_struct *p, int sleep) 677static void dequeue_task_rt(struct rq *rq, struct task_struct *p, int sleep)
@@ -540,6 +680,8 @@ static void dequeue_task_rt(struct rq *rq, struct task_struct *p, int sleep)
540 680
541 update_curr_rt(rq); 681 update_curr_rt(rq);
542 dequeue_rt_entity(rt_se); 682 dequeue_rt_entity(rt_se);
683
684 dec_cpu_load(rq, p->se.load.weight);
543} 685}
544 686
545/* 687/*
@@ -550,10 +692,12 @@ static
550void requeue_rt_entity(struct rt_rq *rt_rq, struct sched_rt_entity *rt_se) 692void requeue_rt_entity(struct rt_rq *rt_rq, struct sched_rt_entity *rt_se)
551{ 693{
552 struct rt_prio_array *array = &rt_rq->active; 694 struct rt_prio_array *array = &rt_rq->active;
553 struct list_head *queue = array->queue + rt_se_prio(rt_se);
554 695
555 if (on_rt_rq(rt_se)) 696 if (on_rt_rq(rt_se)) {
556 list_move_tail(&rt_se->run_list, queue); 697 list_del_init(&rt_se->run_list);
698 list_add_tail(&rt_se->run_list,
699 array->queue + rt_se_prio(rt_se));
700 }
557} 701}
558 702
559static void requeue_task_rt(struct rq *rq, struct task_struct *p) 703static void requeue_task_rt(struct rq *rq, struct task_struct *p)
@@ -616,8 +760,37 @@ static int select_task_rq_rt(struct task_struct *p, int sync)
616 */ 760 */
617static void check_preempt_curr_rt(struct rq *rq, struct task_struct *p) 761static void check_preempt_curr_rt(struct rq *rq, struct task_struct *p)
618{ 762{
619 if (p->prio < rq->curr->prio) 763 if (p->prio < rq->curr->prio) {
620 resched_task(rq->curr); 764 resched_task(rq->curr);
765 return;
766 }
767
768#ifdef CONFIG_SMP
769 /*
770 * If:
771 *
772 * - the newly woken task is of equal priority to the current task
773 * - the newly woken task is non-migratable while current is migratable
774 * - current will be preempted on the next reschedule
775 *
776 * we should check to see if current can readily move to a different
777 * cpu. If so, we will reschedule to allow the push logic to try
778 * to move current somewhere else, making room for our non-migratable
779 * task.
780 */
781 if((p->prio == rq->curr->prio)
782 && p->rt.nr_cpus_allowed == 1
783 && rq->curr->rt.nr_cpus_allowed != 1) {
784 cpumask_t mask;
785
786 if (cpupri_find(&rq->rd->cpupri, rq->curr, &mask))
787 /*
788 * There appears to be other cpus that can accept
789 * current, so lets reschedule to try and push it away
790 */
791 resched_task(rq->curr);
792 }
793#endif
621} 794}
622 795
623static struct sched_rt_entity *pick_next_rt_entity(struct rq *rq, 796static struct sched_rt_entity *pick_next_rt_entity(struct rq *rq,
@@ -720,73 +893,6 @@ static struct task_struct *pick_next_highest_task_rt(struct rq *rq, int cpu)
720 893
721static DEFINE_PER_CPU(cpumask_t, local_cpu_mask); 894static DEFINE_PER_CPU(cpumask_t, local_cpu_mask);
722 895
723static int find_lowest_cpus(struct task_struct *task, cpumask_t *lowest_mask)
724{
725 int lowest_prio = -1;
726 int lowest_cpu = -1;
727 int count = 0;
728 int cpu;
729
730 cpus_and(*lowest_mask, task_rq(task)->rd->online, task->cpus_allowed);
731
732 /*
733 * Scan each rq for the lowest prio.
734 */
735 for_each_cpu_mask(cpu, *lowest_mask) {
736 struct rq *rq = cpu_rq(cpu);
737
738 /* We look for lowest RT prio or non-rt CPU */
739 if (rq->rt.highest_prio >= MAX_RT_PRIO) {
740 /*
741 * if we already found a low RT queue
742 * and now we found this non-rt queue
743 * clear the mask and set our bit.
744 * Otherwise just return the queue as is
745 * and the count==1 will cause the algorithm
746 * to use the first bit found.
747 */
748 if (lowest_cpu != -1) {
749 cpus_clear(*lowest_mask);
750 cpu_set(rq->cpu, *lowest_mask);
751 }
752 return 1;
753 }
754
755 /* no locking for now */
756 if ((rq->rt.highest_prio > task->prio)
757 && (rq->rt.highest_prio >= lowest_prio)) {
758 if (rq->rt.highest_prio > lowest_prio) {
759 /* new low - clear old data */
760 lowest_prio = rq->rt.highest_prio;
761 lowest_cpu = cpu;
762 count = 0;
763 }
764 count++;
765 } else
766 cpu_clear(cpu, *lowest_mask);
767 }
768
769 /*
770 * Clear out all the set bits that represent
771 * runqueues that were of higher prio than
772 * the lowest_prio.
773 */
774 if (lowest_cpu > 0) {
775 /*
776 * Perhaps we could add another cpumask op to
777 * zero out bits. Like cpu_zero_bits(cpumask, nrbits);
778 * Then that could be optimized to use memset and such.
779 */
780 for_each_cpu_mask(cpu, *lowest_mask) {
781 if (cpu >= lowest_cpu)
782 break;
783 cpu_clear(cpu, *lowest_mask);
784 }
785 }
786
787 return count;
788}
789
790static inline int pick_optimal_cpu(int this_cpu, cpumask_t *mask) 896static inline int pick_optimal_cpu(int this_cpu, cpumask_t *mask)
791{ 897{
792 int first; 898 int first;
@@ -808,17 +914,12 @@ static int find_lowest_rq(struct task_struct *task)
808 cpumask_t *lowest_mask = &__get_cpu_var(local_cpu_mask); 914 cpumask_t *lowest_mask = &__get_cpu_var(local_cpu_mask);
809 int this_cpu = smp_processor_id(); 915 int this_cpu = smp_processor_id();
810 int cpu = task_cpu(task); 916 int cpu = task_cpu(task);
811 int count = find_lowest_cpus(task, lowest_mask);
812 917
813 if (!count) 918 if (task->rt.nr_cpus_allowed == 1)
814 return -1; /* No targets found */ 919 return -1; /* No other targets possible */
815 920
816 /* 921 if (!cpupri_find(&task_rq(task)->rd->cpupri, task, lowest_mask))
817 * There is no sense in performing an optimal search if only one 922 return -1; /* No targets found */
818 * target is found.
819 */
820 if (count == 1)
821 return first_cpu(*lowest_mask);
822 923
823 /* 924 /*
824 * At this point we have built a mask of cpus representing the 925 * At this point we have built a mask of cpus representing the
@@ -1163,17 +1264,25 @@ static void set_cpus_allowed_rt(struct task_struct *p,
1163} 1264}
1164 1265
1165/* Assumes rq->lock is held */ 1266/* Assumes rq->lock is held */
1166static void join_domain_rt(struct rq *rq) 1267static void rq_online_rt(struct rq *rq)
1167{ 1268{
1168 if (rq->rt.overloaded) 1269 if (rq->rt.overloaded)
1169 rt_set_overload(rq); 1270 rt_set_overload(rq);
1271
1272 __enable_runtime(rq);
1273
1274 cpupri_set(&rq->rd->cpupri, rq->cpu, rq->rt.highest_prio);
1170} 1275}
1171 1276
1172/* Assumes rq->lock is held */ 1277/* Assumes rq->lock is held */
1173static void leave_domain_rt(struct rq *rq) 1278static void rq_offline_rt(struct rq *rq)
1174{ 1279{
1175 if (rq->rt.overloaded) 1280 if (rq->rt.overloaded)
1176 rt_clear_overload(rq); 1281 rt_clear_overload(rq);
1282
1283 __disable_runtime(rq);
1284
1285 cpupri_set(&rq->rd->cpupri, rq->cpu, CPUPRI_INVALID);
1177} 1286}
1178 1287
1179/* 1288/*
@@ -1336,8 +1445,8 @@ static const struct sched_class rt_sched_class = {
1336 .load_balance = load_balance_rt, 1445 .load_balance = load_balance_rt,
1337 .move_one_task = move_one_task_rt, 1446 .move_one_task = move_one_task_rt,
1338 .set_cpus_allowed = set_cpus_allowed_rt, 1447 .set_cpus_allowed = set_cpus_allowed_rt,
1339 .join_domain = join_domain_rt, 1448 .rq_online = rq_online_rt,
1340 .leave_domain = leave_domain_rt, 1449 .rq_offline = rq_offline_rt,
1341 .pre_schedule = pre_schedule_rt, 1450 .pre_schedule = pre_schedule_rt,
1342 .post_schedule = post_schedule_rt, 1451 .post_schedule = post_schedule_rt,
1343 .task_wake_up = task_wake_up_rt, 1452 .task_wake_up = task_wake_up_rt,
@@ -1350,3 +1459,17 @@ static const struct sched_class rt_sched_class = {
1350 .prio_changed = prio_changed_rt, 1459 .prio_changed = prio_changed_rt,
1351 .switched_to = switched_to_rt, 1460 .switched_to = switched_to_rt,
1352}; 1461};
1462
1463#ifdef CONFIG_SCHED_DEBUG
1464extern void print_rt_rq(struct seq_file *m, int cpu, struct rt_rq *rt_rq);
1465
1466static void print_rt_stats(struct seq_file *m, int cpu)
1467{
1468 struct rt_rq *rt_rq;
1469
1470 rcu_read_lock();
1471 for_each_leaf_rt_rq(rt_rq, cpu_rq(cpu))
1472 print_rt_rq(m, cpu, rt_rq);
1473 rcu_read_unlock();
1474}
1475#endif /* CONFIG_SCHED_DEBUG */
diff --git a/kernel/sched_stats.h b/kernel/sched_stats.h
index 80179ef7450e..8385d43987e2 100644
--- a/kernel/sched_stats.h
+++ b/kernel/sched_stats.h
@@ -118,6 +118,13 @@ rq_sched_info_depart(struct rq *rq, unsigned long long delta)
118 if (rq) 118 if (rq)
119 rq->rq_sched_info.cpu_time += delta; 119 rq->rq_sched_info.cpu_time += delta;
120} 120}
121
122static inline void
123rq_sched_info_dequeued(struct rq *rq, unsigned long long delta)
124{
125 if (rq)
126 rq->rq_sched_info.run_delay += delta;
127}
121# define schedstat_inc(rq, field) do { (rq)->field++; } while (0) 128# define schedstat_inc(rq, field) do { (rq)->field++; } while (0)
122# define schedstat_add(rq, field, amt) do { (rq)->field += (amt); } while (0) 129# define schedstat_add(rq, field, amt) do { (rq)->field += (amt); } while (0)
123# define schedstat_set(var, val) do { var = (val); } while (0) 130# define schedstat_set(var, val) do { var = (val); } while (0)
@@ -126,6 +133,9 @@ static inline void
126rq_sched_info_arrive(struct rq *rq, unsigned long long delta) 133rq_sched_info_arrive(struct rq *rq, unsigned long long delta)
127{} 134{}
128static inline void 135static inline void
136rq_sched_info_dequeued(struct rq *rq, unsigned long long delta)
137{}
138static inline void
129rq_sched_info_depart(struct rq *rq, unsigned long long delta) 139rq_sched_info_depart(struct rq *rq, unsigned long long delta)
130{} 140{}
131# define schedstat_inc(rq, field) do { } while (0) 141# define schedstat_inc(rq, field) do { } while (0)
@@ -134,6 +144,11 @@ rq_sched_info_depart(struct rq *rq, unsigned long long delta)
134#endif 144#endif
135 145
136#if defined(CONFIG_SCHEDSTATS) || defined(CONFIG_TASK_DELAY_ACCT) 146#if defined(CONFIG_SCHEDSTATS) || defined(CONFIG_TASK_DELAY_ACCT)
147static inline void sched_info_reset_dequeued(struct task_struct *t)
148{
149 t->sched_info.last_queued = 0;
150}
151
137/* 152/*
138 * Called when a process is dequeued from the active array and given 153 * Called when a process is dequeued from the active array and given
139 * the cpu. We should note that with the exception of interactive 154 * the cpu. We should note that with the exception of interactive
@@ -143,15 +158,22 @@ rq_sched_info_depart(struct rq *rq, unsigned long long delta)
143 * active queue, thus delaying tasks in the expired queue from running; 158 * active queue, thus delaying tasks in the expired queue from running;
144 * see scheduler_tick()). 159 * see scheduler_tick()).
145 * 160 *
146 * This function is only called from sched_info_arrive(), rather than 161 * Though we are interested in knowing how long it was from the *first* time a
147 * dequeue_task(). Even though a task may be queued and dequeued multiple 162 * task was queued to the time that it finally hit a cpu, we call this routine
148 * times as it is shuffled about, we're really interested in knowing how 163 * from dequeue_task() to account for possible rq->clock skew across cpus. The
149 * long it was from the *first* time it was queued to the time that it 164 * delta taken on each cpu would annul the skew.
150 * finally hit a cpu.
151 */ 165 */
152static inline void sched_info_dequeued(struct task_struct *t) 166static inline void sched_info_dequeued(struct task_struct *t)
153{ 167{
154 t->sched_info.last_queued = 0; 168 unsigned long long now = task_rq(t)->clock, delta = 0;
169
170 if (unlikely(sched_info_on()))
171 if (t->sched_info.last_queued)
172 delta = now - t->sched_info.last_queued;
173 sched_info_reset_dequeued(t);
174 t->sched_info.run_delay += delta;
175
176 rq_sched_info_dequeued(task_rq(t), delta);
155} 177}
156 178
157/* 179/*
@@ -165,7 +187,7 @@ static void sched_info_arrive(struct task_struct *t)
165 187
166 if (t->sched_info.last_queued) 188 if (t->sched_info.last_queued)
167 delta = now - t->sched_info.last_queued; 189 delta = now - t->sched_info.last_queued;
168 sched_info_dequeued(t); 190 sched_info_reset_dequeued(t);
169 t->sched_info.run_delay += delta; 191 t->sched_info.run_delay += delta;
170 t->sched_info.last_arrival = now; 192 t->sched_info.last_arrival = now;
171 t->sched_info.pcount++; 193 t->sched_info.pcount++;
@@ -242,7 +264,9 @@ sched_info_switch(struct task_struct *prev, struct task_struct *next)
242 __sched_info_switch(prev, next); 264 __sched_info_switch(prev, next);
243} 265}
244#else 266#else
245#define sched_info_queued(t) do { } while (0) 267#define sched_info_queued(t) do { } while (0)
246#define sched_info_switch(t, next) do { } while (0) 268#define sched_info_reset_dequeued(t) do { } while (0)
269#define sched_info_dequeued(t) do { } while (0)
270#define sched_info_switch(t, next) do { } while (0)
247#endif /* CONFIG_SCHEDSTATS || CONFIG_TASK_DELAY_ACCT */ 271#endif /* CONFIG_SCHEDSTATS || CONFIG_TASK_DELAY_ACCT */
248 272
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 29116652dca8..fe8cdc80ff02 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -266,6 +266,14 @@ static struct ctl_table kern_table[] = {
266 }, 266 },
267 { 267 {
268 .ctl_name = CTL_UNNUMBERED, 268 .ctl_name = CTL_UNNUMBERED,
269 .procname = "sched_shares_ratelimit",
270 .data = &sysctl_sched_shares_ratelimit,
271 .maxlen = sizeof(unsigned int),
272 .mode = 0644,
273 .proc_handler = &proc_dointvec,
274 },
275 {
276 .ctl_name = CTL_UNNUMBERED,
269 .procname = "sched_child_runs_first", 277 .procname = "sched_child_runs_first",
270 .data = &sysctl_sched_child_runs_first, 278 .data = &sysctl_sched_child_runs_first,
271 .maxlen = sizeof(unsigned int), 279 .maxlen = sizeof(unsigned int),
diff --git a/mm/slub.c b/mm/slub.c
index 1a427c0ae83b..315c392253c7 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -1628,9 +1628,11 @@ static __always_inline void *slab_alloc(struct kmem_cache *s,
1628 void **object; 1628 void **object;
1629 struct kmem_cache_cpu *c; 1629 struct kmem_cache_cpu *c;
1630 unsigned long flags; 1630 unsigned long flags;
1631 unsigned int objsize;
1631 1632
1632 local_irq_save(flags); 1633 local_irq_save(flags);
1633 c = get_cpu_slab(s, smp_processor_id()); 1634 c = get_cpu_slab(s, smp_processor_id());
1635 objsize = c->objsize;
1634 if (unlikely(!c->freelist || !node_match(c, node))) 1636 if (unlikely(!c->freelist || !node_match(c, node)))
1635 1637
1636 object = __slab_alloc(s, gfpflags, node, addr, c); 1638 object = __slab_alloc(s, gfpflags, node, addr, c);
@@ -1643,7 +1645,7 @@ static __always_inline void *slab_alloc(struct kmem_cache *s,
1643 local_irq_restore(flags); 1645 local_irq_restore(flags);
1644 1646
1645 if (unlikely((gfpflags & __GFP_ZERO) && object)) 1647 if (unlikely((gfpflags & __GFP_ZERO) && object))
1646 memset(object, 0, c->objsize); 1648 memset(object, 0, objsize);
1647 1649
1648 return object; 1650 return object;
1649} 1651}
diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c
index 4b02d14e7ab9..e1600ad8fb0e 100644
--- a/net/ipv4/fib_trie.c
+++ b/net/ipv4/fib_trie.c
@@ -1359,17 +1359,17 @@ static int check_leaf(struct trie *t, struct leaf *l,
1359 t->stats.semantic_match_miss++; 1359 t->stats.semantic_match_miss++;
1360#endif 1360#endif
1361 if (err <= 0) 1361 if (err <= 0)
1362 return plen; 1362 return err;
1363 } 1363 }
1364 1364
1365 return -1; 1365 return 1;
1366} 1366}
1367 1367
1368static int fn_trie_lookup(struct fib_table *tb, const struct flowi *flp, 1368static int fn_trie_lookup(struct fib_table *tb, const struct flowi *flp,
1369 struct fib_result *res) 1369 struct fib_result *res)
1370{ 1370{
1371 struct trie *t = (struct trie *) tb->tb_data; 1371 struct trie *t = (struct trie *) tb->tb_data;
1372 int plen, ret = 0; 1372 int ret;
1373 struct node *n; 1373 struct node *n;
1374 struct tnode *pn; 1374 struct tnode *pn;
1375 int pos, bits; 1375 int pos, bits;
@@ -1393,10 +1393,7 @@ static int fn_trie_lookup(struct fib_table *tb, const struct flowi *flp,
1393 1393
1394 /* Just a leaf? */ 1394 /* Just a leaf? */
1395 if (IS_LEAF(n)) { 1395 if (IS_LEAF(n)) {
1396 plen = check_leaf(t, (struct leaf *)n, key, flp, res); 1396 ret = check_leaf(t, (struct leaf *)n, key, flp, res);
1397 if (plen < 0)
1398 goto failed;
1399 ret = 0;
1400 goto found; 1397 goto found;
1401 } 1398 }
1402 1399
@@ -1421,11 +1418,9 @@ static int fn_trie_lookup(struct fib_table *tb, const struct flowi *flp,
1421 } 1418 }
1422 1419
1423 if (IS_LEAF(n)) { 1420 if (IS_LEAF(n)) {
1424 plen = check_leaf(t, (struct leaf *)n, key, flp, res); 1421 ret = check_leaf(t, (struct leaf *)n, key, flp, res);
1425 if (plen < 0) 1422 if (ret > 0)
1426 goto backtrace; 1423 goto backtrace;
1427
1428 ret = 0;
1429 goto found; 1424 goto found;
1430 } 1425 }
1431 1426
diff --git a/net/ipv4/netfilter/nf_nat_snmp_basic.c b/net/ipv4/netfilter/nf_nat_snmp_basic.c
index 7750c97fde7b..ffeaffc3fffe 100644
--- a/net/ipv4/netfilter/nf_nat_snmp_basic.c
+++ b/net/ipv4/netfilter/nf_nat_snmp_basic.c
@@ -439,8 +439,8 @@ static unsigned char asn1_oid_decode(struct asn1_ctx *ctx,
439 unsigned int *len) 439 unsigned int *len)
440{ 440{
441 unsigned long subid; 441 unsigned long subid;
442 unsigned int size;
443 unsigned long *optr; 442 unsigned long *optr;
443 size_t size;
444 444
445 size = eoc - ctx->pointer + 1; 445 size = eoc - ctx->pointer + 1;
446 446
diff --git a/net/ipv4/tcp_probe.c b/net/ipv4/tcp_probe.c
index 5ff0ce6e9d39..7ddc30f0744f 100644
--- a/net/ipv4/tcp_probe.c
+++ b/net/ipv4/tcp_probe.c
@@ -224,7 +224,7 @@ static __init int tcpprobe_init(void)
224 if (bufsize < 0) 224 if (bufsize < 0)
225 return -EINVAL; 225 return -EINVAL;
226 226
227 tcp_probe.log = kcalloc(sizeof(struct tcp_log), bufsize, GFP_KERNEL); 227 tcp_probe.log = kcalloc(bufsize, sizeof(struct tcp_log), GFP_KERNEL);
228 if (!tcp_probe.log) 228 if (!tcp_probe.log)
229 goto err0; 229 goto err0;
230 230
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 147588f4c7c0..ff61a5cdb0b3 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -749,12 +749,12 @@ static void ipv6_del_addr(struct inet6_ifaddr *ifp)
749 } 749 }
750 write_unlock_bh(&idev->lock); 750 write_unlock_bh(&idev->lock);
751 751
752 addrconf_del_timer(ifp);
753
752 ipv6_ifa_notify(RTM_DELADDR, ifp); 754 ipv6_ifa_notify(RTM_DELADDR, ifp);
753 755
754 atomic_notifier_call_chain(&inet6addr_chain, NETDEV_DOWN, ifp); 756 atomic_notifier_call_chain(&inet6addr_chain, NETDEV_DOWN, ifp);
755 757
756 addrconf_del_timer(ifp);
757
758 /* 758 /*
759 * Purge or update corresponding prefix 759 * Purge or update corresponding prefix
760 * 760 *
diff --git a/net/ipv6/exthdrs.c b/net/ipv6/exthdrs.c
index 3cd1c993d52b..dcf94fdfb863 100644
--- a/net/ipv6/exthdrs.c
+++ b/net/ipv6/exthdrs.c
@@ -445,7 +445,7 @@ looped_back:
445 kfree_skb(skb); 445 kfree_skb(skb);
446 return -1; 446 return -1;
447 } 447 }
448 if (!ipv6_chk_home_addr(&init_net, addr)) { 448 if (!ipv6_chk_home_addr(dev_net(skb->dst->dev), addr)) {
449 IP6_INC_STATS_BH(ip6_dst_idev(skb->dst), 449 IP6_INC_STATS_BH(ip6_dst_idev(skb->dst),
450 IPSTATS_MIB_INADDRERRORS); 450 IPSTATS_MIB_INADDRERRORS);
451 kfree_skb(skb); 451 kfree_skb(skb);
diff --git a/net/irda/irnetlink.c b/net/irda/irnetlink.c
index 9e1fb82e3220..2f05ec1037ab 100644
--- a/net/irda/irnetlink.c
+++ b/net/irda/irnetlink.c
@@ -101,8 +101,8 @@ static int irda_nl_get_mode(struct sk_buff *skb, struct genl_info *info)
101 101
102 hdr = genlmsg_put(msg, info->snd_pid, info->snd_seq, 102 hdr = genlmsg_put(msg, info->snd_pid, info->snd_seq,
103 &irda_nl_family, 0, IRDA_NL_CMD_GET_MODE); 103 &irda_nl_family, 0, IRDA_NL_CMD_GET_MODE);
104 if (IS_ERR(hdr)) { 104 if (hdr == NULL) {
105 ret = PTR_ERR(hdr); 105 ret = -EMSGSIZE;
106 goto err_out; 106 goto err_out;
107 } 107 }
108 108
diff --git a/net/mac80211/main.c b/net/mac80211/main.c
index 98c0b5e56ecc..df0836ff1a20 100644
--- a/net/mac80211/main.c
+++ b/net/mac80211/main.c
@@ -530,8 +530,6 @@ static int ieee80211_stop(struct net_device *dev)
530 local->sta_hw_scanning = 0; 530 local->sta_hw_scanning = 0;
531 } 531 }
532 532
533 flush_workqueue(local->hw.workqueue);
534
535 sdata->u.sta.flags &= ~IEEE80211_STA_PRIVACY_INVOKED; 533 sdata->u.sta.flags &= ~IEEE80211_STA_PRIVACY_INVOKED;
536 kfree(sdata->u.sta.extra_ie); 534 kfree(sdata->u.sta.extra_ie);
537 sdata->u.sta.extra_ie = NULL; 535 sdata->u.sta.extra_ie = NULL;
@@ -555,6 +553,8 @@ static int ieee80211_stop(struct net_device *dev)
555 553
556 ieee80211_led_radio(local, 0); 554 ieee80211_led_radio(local, 0);
557 555
556 flush_workqueue(local->hw.workqueue);
557
558 tasklet_disable(&local->tx_pending_tasklet); 558 tasklet_disable(&local->tx_pending_tasklet);
559 tasklet_disable(&local->tasklet); 559 tasklet_disable(&local->tasklet);
560 } 560 }
diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index 4d2b582dd055..b404537c0bcd 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -547,15 +547,14 @@ static void ieee80211_set_associated(struct net_device *dev,
547 sdata->bss_conf.ht_bss_conf = &conf->ht_bss_conf; 547 sdata->bss_conf.ht_bss_conf = &conf->ht_bss_conf;
548 } 548 }
549 549
550 netif_carrier_on(dev);
551 ifsta->flags |= IEEE80211_STA_PREV_BSSID_SET; 550 ifsta->flags |= IEEE80211_STA_PREV_BSSID_SET;
552 memcpy(ifsta->prev_bssid, sdata->u.sta.bssid, ETH_ALEN); 551 memcpy(ifsta->prev_bssid, sdata->u.sta.bssid, ETH_ALEN);
553 memcpy(wrqu.ap_addr.sa_data, sdata->u.sta.bssid, ETH_ALEN); 552 memcpy(wrqu.ap_addr.sa_data, sdata->u.sta.bssid, ETH_ALEN);
554 ieee80211_sta_send_associnfo(dev, ifsta); 553 ieee80211_sta_send_associnfo(dev, ifsta);
555 } else { 554 } else {
555 netif_carrier_off(dev);
556 ieee80211_sta_tear_down_BA_sessions(dev, ifsta->bssid); 556 ieee80211_sta_tear_down_BA_sessions(dev, ifsta->bssid);
557 ifsta->flags &= ~IEEE80211_STA_ASSOCIATED; 557 ifsta->flags &= ~IEEE80211_STA_ASSOCIATED;
558 netif_carrier_off(dev);
559 ieee80211_reset_erp_info(dev); 558 ieee80211_reset_erp_info(dev);
560 559
561 sdata->bss_conf.assoc_ht = 0; 560 sdata->bss_conf.assoc_ht = 0;
@@ -569,6 +568,10 @@ static void ieee80211_set_associated(struct net_device *dev,
569 568
570 sdata->bss_conf.assoc = assoc; 569 sdata->bss_conf.assoc = assoc;
571 ieee80211_bss_info_change_notify(sdata, changed); 570 ieee80211_bss_info_change_notify(sdata, changed);
571
572 if (assoc)
573 netif_carrier_on(dev);
574
572 wrqu.ap_addr.sa_family = ARPHRD_ETHER; 575 wrqu.ap_addr.sa_family = ARPHRD_ETHER;
573 wireless_send_event(dev, SIOCGIWAP, &wrqu, NULL); 576 wireless_send_event(dev, SIOCGIWAP, &wrqu, NULL);
574} 577}
@@ -3611,8 +3614,10 @@ static int ieee80211_sta_find_ibss(struct net_device *dev,
3611 spin_unlock_bh(&local->sta_bss_lock); 3614 spin_unlock_bh(&local->sta_bss_lock);
3612 3615
3613#ifdef CONFIG_MAC80211_IBSS_DEBUG 3616#ifdef CONFIG_MAC80211_IBSS_DEBUG
3614 printk(KERN_DEBUG " sta_find_ibss: selected %s current " 3617 if (found)
3615 "%s\n", print_mac(mac, bssid), print_mac(mac2, ifsta->bssid)); 3618 printk(KERN_DEBUG " sta_find_ibss: selected %s current "
3619 "%s\n", print_mac(mac, bssid),
3620 print_mac(mac2, ifsta->bssid));
3616#endif /* CONFIG_MAC80211_IBSS_DEBUG */ 3621#endif /* CONFIG_MAC80211_IBSS_DEBUG */
3617 if (found && memcmp(ifsta->bssid, bssid, ETH_ALEN) != 0 && 3622 if (found && memcmp(ifsta->bssid, bssid, ETH_ALEN) != 0 &&
3618 (bss = ieee80211_rx_bss_get(dev, bssid, 3623 (bss = ieee80211_rx_bss_get(dev, bssid,
diff --git a/net/mac80211/rc80211_pid.h b/net/mac80211/rc80211_pid.h
index 04afc13ed825..4ea7b97d1af1 100644
--- a/net/mac80211/rc80211_pid.h
+++ b/net/mac80211/rc80211_pid.h
@@ -141,7 +141,6 @@ struct rc_pid_events_file_info {
141 * rate behaviour values (lower means we should trust more what we learnt 141 * rate behaviour values (lower means we should trust more what we learnt
142 * about behaviour of rates, higher means we should trust more the natural 142 * about behaviour of rates, higher means we should trust more the natural
143 * ordering of rates) 143 * ordering of rates)
144 * @fast_start: if Y, push high rates right after initialization
145 */ 144 */
146struct rc_pid_debugfs_entries { 145struct rc_pid_debugfs_entries {
147 struct dentry *dir; 146 struct dentry *dir;
@@ -154,7 +153,6 @@ struct rc_pid_debugfs_entries {
154 struct dentry *sharpen_factor; 153 struct dentry *sharpen_factor;
155 struct dentry *sharpen_duration; 154 struct dentry *sharpen_duration;
156 struct dentry *norm_offset; 155 struct dentry *norm_offset;
157 struct dentry *fast_start;
158}; 156};
159 157
160void rate_control_pid_event_tx_status(struct rc_pid_event_buffer *buf, 158void rate_control_pid_event_tx_status(struct rc_pid_event_buffer *buf,
@@ -267,9 +265,6 @@ struct rc_pid_info {
267 /* Normalization offset. */ 265 /* Normalization offset. */
268 unsigned int norm_offset; 266 unsigned int norm_offset;
269 267
270 /* Fast starst parameter. */
271 unsigned int fast_start;
272
273 /* Rates information. */ 268 /* Rates information. */
274 struct rc_pid_rateinfo *rinfo; 269 struct rc_pid_rateinfo *rinfo;
275 270
diff --git a/net/mac80211/rc80211_pid_algo.c b/net/mac80211/rc80211_pid_algo.c
index a849b745bdb5..bcd27c1d7594 100644
--- a/net/mac80211/rc80211_pid_algo.c
+++ b/net/mac80211/rc80211_pid_algo.c
@@ -398,13 +398,25 @@ static void *rate_control_pid_alloc(struct ieee80211_local *local)
398 return NULL; 398 return NULL;
399 } 399 }
400 400
401 pinfo->target = RC_PID_TARGET_PF;
402 pinfo->sampling_period = RC_PID_INTERVAL;
403 pinfo->coeff_p = RC_PID_COEFF_P;
404 pinfo->coeff_i = RC_PID_COEFF_I;
405 pinfo->coeff_d = RC_PID_COEFF_D;
406 pinfo->smoothing_shift = RC_PID_SMOOTHING_SHIFT;
407 pinfo->sharpen_factor = RC_PID_SHARPENING_FACTOR;
408 pinfo->sharpen_duration = RC_PID_SHARPENING_DURATION;
409 pinfo->norm_offset = RC_PID_NORM_OFFSET;
410 pinfo->rinfo = rinfo;
411 pinfo->oldrate = 0;
412
401 /* Sort the rates. This is optimized for the most common case (i.e. 413 /* Sort the rates. This is optimized for the most common case (i.e.
402 * almost-sorted CCK+OFDM rates). Kind of bubble-sort with reversed 414 * almost-sorted CCK+OFDM rates). Kind of bubble-sort with reversed
403 * mapping too. */ 415 * mapping too. */
404 for (i = 0; i < sband->n_bitrates; i++) { 416 for (i = 0; i < sband->n_bitrates; i++) {
405 rinfo[i].index = i; 417 rinfo[i].index = i;
406 rinfo[i].rev_index = i; 418 rinfo[i].rev_index = i;
407 if (pinfo->fast_start) 419 if (RC_PID_FAST_START)
408 rinfo[i].diff = 0; 420 rinfo[i].diff = 0;
409 else 421 else
410 rinfo[i].diff = i * pinfo->norm_offset; 422 rinfo[i].diff = i * pinfo->norm_offset;
@@ -425,19 +437,6 @@ static void *rate_control_pid_alloc(struct ieee80211_local *local)
425 break; 437 break;
426 } 438 }
427 439
428 pinfo->target = RC_PID_TARGET_PF;
429 pinfo->sampling_period = RC_PID_INTERVAL;
430 pinfo->coeff_p = RC_PID_COEFF_P;
431 pinfo->coeff_i = RC_PID_COEFF_I;
432 pinfo->coeff_d = RC_PID_COEFF_D;
433 pinfo->smoothing_shift = RC_PID_SMOOTHING_SHIFT;
434 pinfo->sharpen_factor = RC_PID_SHARPENING_FACTOR;
435 pinfo->sharpen_duration = RC_PID_SHARPENING_DURATION;
436 pinfo->norm_offset = RC_PID_NORM_OFFSET;
437 pinfo->fast_start = RC_PID_FAST_START;
438 pinfo->rinfo = rinfo;
439 pinfo->oldrate = 0;
440
441#ifdef CONFIG_MAC80211_DEBUGFS 440#ifdef CONFIG_MAC80211_DEBUGFS
442 de = &pinfo->dentries; 441 de = &pinfo->dentries;
443 de->dir = debugfs_create_dir("rc80211_pid", 442 de->dir = debugfs_create_dir("rc80211_pid",
@@ -465,9 +464,6 @@ static void *rate_control_pid_alloc(struct ieee80211_local *local)
465 de->norm_offset = debugfs_create_u32("norm_offset", 464 de->norm_offset = debugfs_create_u32("norm_offset",
466 S_IRUSR | S_IWUSR, de->dir, 465 S_IRUSR | S_IWUSR, de->dir,
467 &pinfo->norm_offset); 466 &pinfo->norm_offset);
468 de->fast_start = debugfs_create_bool("fast_start",
469 S_IRUSR | S_IWUSR, de->dir,
470 &pinfo->fast_start);
471#endif 467#endif
472 468
473 return pinfo; 469 return pinfo;
@@ -479,7 +475,6 @@ static void rate_control_pid_free(void *priv)
479#ifdef CONFIG_MAC80211_DEBUGFS 475#ifdef CONFIG_MAC80211_DEBUGFS
480 struct rc_pid_debugfs_entries *de = &pinfo->dentries; 476 struct rc_pid_debugfs_entries *de = &pinfo->dentries;
481 477
482 debugfs_remove(de->fast_start);
483 debugfs_remove(de->norm_offset); 478 debugfs_remove(de->norm_offset);
484 debugfs_remove(de->sharpen_duration); 479 debugfs_remove(de->sharpen_duration);
485 debugfs_remove(de->sharpen_factor); 480 debugfs_remove(de->sharpen_factor);
diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c
index 271cd01d57ae..dd28fb239a60 100644
--- a/net/netfilter/nf_conntrack_proto_tcp.c
+++ b/net/netfilter/nf_conntrack_proto_tcp.c
@@ -844,9 +844,15 @@ static int tcp_packet(struct nf_conn *ct,
844 /* Attempt to reopen a closed/aborted connection. 844 /* Attempt to reopen a closed/aborted connection.
845 * Delete this connection and look up again. */ 845 * Delete this connection and look up again. */
846 write_unlock_bh(&tcp_lock); 846 write_unlock_bh(&tcp_lock);
847 if (del_timer(&ct->timeout)) 847 /* Only repeat if we can actually remove the timer.
848 * Destruction may already be in progress in process
849 * context and we must give it a chance to terminate.
850 */
851 if (del_timer(&ct->timeout)) {
848 ct->timeout.function((unsigned long)ct); 852 ct->timeout.function((unsigned long)ct);
849 return -NF_REPEAT; 853 return -NF_REPEAT;
854 }
855 return -NF_DROP;
850 } 856 }
851 /* Fall through */ 857 /* Fall through */
852 case TCP_CONNTRACK_IGNORE: 858 case TCP_CONNTRACK_IGNORE:
diff --git a/net/netlabel/netlabel_cipso_v4.c b/net/netlabel/netlabel_cipso_v4.c
index fdc14a0d21af..9080c61b71a5 100644
--- a/net/netlabel/netlabel_cipso_v4.c
+++ b/net/netlabel/netlabel_cipso_v4.c
@@ -584,12 +584,7 @@ list_start:
584 rcu_read_unlock(); 584 rcu_read_unlock();
585 585
586 genlmsg_end(ans_skb, data); 586 genlmsg_end(ans_skb, data);
587 587 return genlmsg_reply(ans_skb, info);
588 ret_val = genlmsg_reply(ans_skb, info);
589 if (ret_val != 0)
590 goto list_failure;
591
592 return 0;
593 588
594list_retry: 589list_retry:
595 /* XXX - this limit is a guesstimate */ 590 /* XXX - this limit is a guesstimate */
diff --git a/net/netlabel/netlabel_mgmt.c b/net/netlabel/netlabel_mgmt.c
index 22c191267808..44be5d5261f4 100644
--- a/net/netlabel/netlabel_mgmt.c
+++ b/net/netlabel/netlabel_mgmt.c
@@ -386,11 +386,7 @@ static int netlbl_mgmt_listdef(struct sk_buff *skb, struct genl_info *info)
386 rcu_read_unlock(); 386 rcu_read_unlock();
387 387
388 genlmsg_end(ans_skb, data); 388 genlmsg_end(ans_skb, data);
389 389 return genlmsg_reply(ans_skb, info);
390 ret_val = genlmsg_reply(ans_skb, info);
391 if (ret_val != 0)
392 goto listdef_failure;
393 return 0;
394 390
395listdef_failure_lock: 391listdef_failure_lock:
396 rcu_read_unlock(); 392 rcu_read_unlock();
@@ -501,11 +497,7 @@ static int netlbl_mgmt_version(struct sk_buff *skb, struct genl_info *info)
501 goto version_failure; 497 goto version_failure;
502 498
503 genlmsg_end(ans_skb, data); 499 genlmsg_end(ans_skb, data);
504 500 return genlmsg_reply(ans_skb, info);
505 ret_val = genlmsg_reply(ans_skb, info);
506 if (ret_val != 0)
507 goto version_failure;
508 return 0;
509 501
510version_failure: 502version_failure:
511 kfree_skb(ans_skb); 503 kfree_skb(ans_skb);
diff --git a/net/netlabel/netlabel_unlabeled.c b/net/netlabel/netlabel_unlabeled.c
index 52b2611a6eb6..56f80872924e 100644
--- a/net/netlabel/netlabel_unlabeled.c
+++ b/net/netlabel/netlabel_unlabeled.c
@@ -1107,11 +1107,7 @@ static int netlbl_unlabel_list(struct sk_buff *skb, struct genl_info *info)
1107 goto list_failure; 1107 goto list_failure;
1108 1108
1109 genlmsg_end(ans_skb, data); 1109 genlmsg_end(ans_skb, data);
1110 1110 return genlmsg_reply(ans_skb, info);
1111 ret_val = genlmsg_reply(ans_skb, info);
1112 if (ret_val != 0)
1113 goto list_failure;
1114 return 0;
1115 1111
1116list_failure: 1112list_failure:
1117 kfree_skb(ans_skb); 1113 kfree_skb(ans_skb);
diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c
index 0c9d5a6950fe..fcdb45d1071b 100644
--- a/net/sctp/sm_statefuns.c
+++ b/net/sctp/sm_statefuns.c
@@ -5899,12 +5899,6 @@ static int sctp_eat_data(const struct sctp_association *asoc,
5899 return SCTP_IERROR_NO_DATA; 5899 return SCTP_IERROR_NO_DATA;
5900 } 5900 }
5901 5901
5902 /* If definately accepting the DATA chunk, record its TSN, otherwise
5903 * wait for renege processing.
5904 */
5905 if (SCTP_CMD_CHUNK_ULP == deliver)
5906 sctp_add_cmd_sf(commands, SCTP_CMD_REPORT_TSN, SCTP_U32(tsn));
5907
5908 chunk->data_accepted = 1; 5902 chunk->data_accepted = 1;
5909 5903
5910 /* Note: Some chunks may get overcounted (if we drop) or overcounted 5904 /* Note: Some chunks may get overcounted (if we drop) or overcounted
@@ -5924,6 +5918,9 @@ static int sctp_eat_data(const struct sctp_association *asoc,
5924 * and discard the DATA chunk. 5918 * and discard the DATA chunk.
5925 */ 5919 */
5926 if (ntohs(data_hdr->stream) >= asoc->c.sinit_max_instreams) { 5920 if (ntohs(data_hdr->stream) >= asoc->c.sinit_max_instreams) {
5921 /* Mark tsn as received even though we drop it */
5922 sctp_add_cmd_sf(commands, SCTP_CMD_REPORT_TSN, SCTP_U32(tsn));
5923
5927 err = sctp_make_op_error(asoc, chunk, SCTP_ERROR_INV_STRM, 5924 err = sctp_make_op_error(asoc, chunk, SCTP_ERROR_INV_STRM,
5928 &data_hdr->stream, 5925 &data_hdr->stream,
5929 sizeof(data_hdr->stream)); 5926 sizeof(data_hdr->stream));
diff --git a/net/sctp/ulpevent.c b/net/sctp/ulpevent.c
index ce6cda6b6994..a1f654aea268 100644
--- a/net/sctp/ulpevent.c
+++ b/net/sctp/ulpevent.c
@@ -710,6 +710,11 @@ struct sctp_ulpevent *sctp_ulpevent_make_rcvmsg(struct sctp_association *asoc,
710 if (!skb) 710 if (!skb)
711 goto fail; 711 goto fail;
712 712
713 /* Now that all memory allocations for this chunk succeeded, we
714 * can mark it as received so the tsn_map is updated correctly.
715 */
716 sctp_tsnmap_mark(&asoc->peer.tsn_map, ntohl(chunk->subh.data_hdr->tsn));
717
713 /* First calculate the padding, so we don't inadvertently 718 /* First calculate the padding, so we don't inadvertently
714 * pass up the wrong length to the user. 719 * pass up the wrong length to the user.
715 * 720 *
diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c
index b976d9ed10e4..04c41504f84c 100644
--- a/net/xfrm/xfrm_user.c
+++ b/net/xfrm/xfrm_user.c
@@ -277,9 +277,8 @@ static void copy_from_user_state(struct xfrm_state *x, struct xfrm_usersa_info *
277 memcpy(&x->props.saddr, &p->saddr, sizeof(x->props.saddr)); 277 memcpy(&x->props.saddr, &p->saddr, sizeof(x->props.saddr));
278 x->props.flags = p->flags; 278 x->props.flags = p->flags;
279 279
280 if (!x->sel.family) 280 if (!x->sel.family && !(p->flags & XFRM_STATE_AF_UNSPEC))
281 x->sel.family = p->family; 281 x->sel.family = p->family;
282
283} 282}
284 283
285/* 284/*