aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Documentation/x86/boot.txt2
-rw-r--r--arch/arm/mach-davinci/board-dm365-evm.c4
-rw-r--r--arch/arm/mach-davinci/gpio.c21
-rw-r--r--arch/arm/mach-davinci/irq.c6
-rw-r--r--arch/x86/Kconfig2
-rw-r--r--arch/x86/kernel/reboot.c24
-rw-r--r--drivers/gpu/drm/i915/i915_drv.h5
-rw-r--r--drivers/gpu/drm/i915/i915_gem.c71
-rw-r--r--drivers/gpu/drm/i915/i915_gem_tiling.c4
-rw-r--r--drivers/gpu/drm/i915/intel_display.c15
-rw-r--r--fs/cifs/cifsfs.c100
-rw-r--r--fs/cifs/file.c2
-rw-r--r--fs/dcache.c2
-rw-r--r--fs/fscache/page.c14
-rw-r--r--fs/libfs.c2
-rw-r--r--fs/namei.c7
-rw-r--r--include/linux/sched.h17
-rw-r--r--kernel/rcutree_plugin.h53
-rw-r--r--kernel/sched.c233
-rw-r--r--kernel/sched_fair.c46
-rw-r--r--kernel/sched_features.h2
-rw-r--r--kernel/signal.c19
-rw-r--r--kernel/softirq.c12
-rw-r--r--mm/vmscan.c3
-rw-r--r--net/ceph/ceph_fs.c17
25 files changed, 453 insertions, 230 deletions
diff --git a/Documentation/x86/boot.txt b/Documentation/x86/boot.txt
index 9b7221a86df2..7c3a8801b7ce 100644
--- a/Documentation/x86/boot.txt
+++ b/Documentation/x86/boot.txt
@@ -674,7 +674,7 @@ Protocol: 2.10+
674 674
675Field name: init_size 675Field name: init_size
676Type: read 676Type: read
677Offset/size: 0x25c/4 677Offset/size: 0x260/4
678 678
679 This field indicates the amount of linear contiguous memory starting 679 This field indicates the amount of linear contiguous memory starting
680 at the kernel runtime start address that the kernel needs before it 680 at the kernel runtime start address that the kernel needs before it
diff --git a/arch/arm/mach-davinci/board-dm365-evm.c b/arch/arm/mach-davinci/board-dm365-evm.c
index c67f684ee3e5..09a87e61ffcf 100644
--- a/arch/arm/mach-davinci/board-dm365-evm.c
+++ b/arch/arm/mach-davinci/board-dm365-evm.c
@@ -520,7 +520,7 @@ fail:
520 */ 520 */
521 if (have_imager()) { 521 if (have_imager()) {
522 label = "HD imager"; 522 label = "HD imager";
523 mux |= 1; 523 mux |= 2;
524 524
525 /* externally mux MMC1/ENET/AIC33 to imager */ 525 /* externally mux MMC1/ENET/AIC33 to imager */
526 mux |= BIT(6) | BIT(5) | BIT(3); 526 mux |= BIT(6) | BIT(5) | BIT(3);
@@ -540,7 +540,7 @@ fail:
540 resets &= ~BIT(1); 540 resets &= ~BIT(1);
541 541
542 if (have_tvp7002()) { 542 if (have_tvp7002()) {
543 mux |= 2; 543 mux |= 1;
544 resets &= ~BIT(2); 544 resets &= ~BIT(2);
545 label = "tvp7002 HD"; 545 label = "tvp7002 HD";
546 } else { 546 } else {
diff --git a/arch/arm/mach-davinci/gpio.c b/arch/arm/mach-davinci/gpio.c
index e7221398e5af..cafbe13a82a5 100644
--- a/arch/arm/mach-davinci/gpio.c
+++ b/arch/arm/mach-davinci/gpio.c
@@ -254,8 +254,10 @@ gpio_irq_handler(unsigned irq, struct irq_desc *desc)
254{ 254{
255 struct davinci_gpio_regs __iomem *g; 255 struct davinci_gpio_regs __iomem *g;
256 u32 mask = 0xffff; 256 u32 mask = 0xffff;
257 struct davinci_gpio_controller *d;
257 258
258 g = (__force struct davinci_gpio_regs __iomem *) irq_desc_get_handler_data(desc); 259 d = (struct davinci_gpio_controller *)irq_desc_get_handler_data(desc);
260 g = (struct davinci_gpio_regs __iomem *)d->regs;
259 261
260 /* we only care about one bank */ 262 /* we only care about one bank */
261 if (irq & 1) 263 if (irq & 1)
@@ -274,11 +276,14 @@ gpio_irq_handler(unsigned irq, struct irq_desc *desc)
274 if (!status) 276 if (!status)
275 break; 277 break;
276 __raw_writel(status, &g->intstat); 278 __raw_writel(status, &g->intstat);
277 if (irq & 1)
278 status >>= 16;
279 279
280 /* now demux them to the right lowlevel handler */ 280 /* now demux them to the right lowlevel handler */
281 n = (int)irq_get_handler_data(irq); 281 n = d->irq_base;
282 if (irq & 1) {
283 n += 16;
284 status >>= 16;
285 }
286
282 while (status) { 287 while (status) {
283 res = ffs(status); 288 res = ffs(status);
284 n += res; 289 n += res;
@@ -424,7 +429,13 @@ static int __init davinci_gpio_irq_setup(void)
424 429
425 /* set up all irqs in this bank */ 430 /* set up all irqs in this bank */
426 irq_set_chained_handler(bank_irq, gpio_irq_handler); 431 irq_set_chained_handler(bank_irq, gpio_irq_handler);
427 irq_set_handler_data(bank_irq, (__force void *)g); 432
433 /*
434 * Each chip handles 32 gpios, and each irq bank consists of 16
435 * gpio irqs. Pass the irq bank's corresponding controller to
436 * the chained irq handler.
437 */
438 irq_set_handler_data(bank_irq, &chips[gpio / 32]);
428 439
429 for (i = 0; i < 16 && gpio < ngpio; i++, irq++, gpio++) { 440 for (i = 0; i < 16 && gpio < ngpio; i++, irq++, gpio++) {
430 irq_set_chip(irq, &gpio_irqchip); 441 irq_set_chip(irq, &gpio_irqchip);
diff --git a/arch/arm/mach-davinci/irq.c b/arch/arm/mach-davinci/irq.c
index d8c1af025931..952dc126c390 100644
--- a/arch/arm/mach-davinci/irq.c
+++ b/arch/arm/mach-davinci/irq.c
@@ -52,6 +52,12 @@ davinci_alloc_gc(void __iomem *base, unsigned int irq_start, unsigned int num)
52 struct irq_chip_type *ct; 52 struct irq_chip_type *ct;
53 53
54 gc = irq_alloc_generic_chip("AINTC", 1, irq_start, base, handle_edge_irq); 54 gc = irq_alloc_generic_chip("AINTC", 1, irq_start, base, handle_edge_irq);
55 if (!gc) {
56 pr_err("%s: irq_alloc_generic_chip for IRQ %u failed\n",
57 __func__, irq_start);
58 return;
59 }
60
55 ct = gc->chip_types; 61 ct = gc->chip_types;
56 ct->chip.irq_ack = irq_gc_ack_set_bit; 62 ct->chip.irq_ack = irq_gc_ack_set_bit;
57 ct->chip.irq_mask = irq_gc_mask_clr_bit; 63 ct->chip.irq_mask = irq_gc_mask_clr_bit;
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index da349723d411..37357a599dca 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -1170,7 +1170,7 @@ comment "NUMA (Summit) requires SMP, 64GB highmem support, ACPI"
1170config AMD_NUMA 1170config AMD_NUMA
1171 def_bool y 1171 def_bool y
1172 prompt "Old style AMD Opteron NUMA detection" 1172 prompt "Old style AMD Opteron NUMA detection"
1173 depends on NUMA && PCI 1173 depends on X86_64 && NUMA && PCI
1174 ---help--- 1174 ---help---
1175 Enable AMD NUMA node topology detection. You should say Y here if 1175 Enable AMD NUMA node topology detection. You should say Y here if
1176 you have a multi processor AMD system. This uses an old method to 1176 you have a multi processor AMD system. This uses an old method to
diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c
index 4f0d46fefa7f..9242436e9937 100644
--- a/arch/x86/kernel/reboot.c
+++ b/arch/x86/kernel/reboot.c
@@ -419,6 +419,30 @@ static struct dmi_system_id __initdata pci_reboot_dmi_table[] = {
419 DMI_MATCH(DMI_PRODUCT_NAME, "iMac9,1"), 419 DMI_MATCH(DMI_PRODUCT_NAME, "iMac9,1"),
420 }, 420 },
421 }, 421 },
422 { /* Handle problems with rebooting on the Latitude E6320. */
423 .callback = set_pci_reboot,
424 .ident = "Dell Latitude E6320",
425 .matches = {
426 DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."),
427 DMI_MATCH(DMI_PRODUCT_NAME, "Latitude E6320"),
428 },
429 },
430 { /* Handle problems with rebooting on the Latitude E5420. */
431 .callback = set_pci_reboot,
432 .ident = "Dell Latitude E5420",
433 .matches = {
434 DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."),
435 DMI_MATCH(DMI_PRODUCT_NAME, "Latitude E5420"),
436 },
437 },
438 { /* Handle problems with rebooting on the Latitude E6420. */
439 .callback = set_pci_reboot,
440 .ident = "Dell Latitude E6420",
441 .matches = {
442 DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."),
443 DMI_MATCH(DMI_PRODUCT_NAME, "Latitude E6420"),
444 },
445 },
422 { } 446 { }
423}; 447};
424 448
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index f245c588ae95..ce7914c4c044 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -262,6 +262,7 @@ enum intel_pch {
262}; 262};
263 263
264#define QUIRK_PIPEA_FORCE (1<<0) 264#define QUIRK_PIPEA_FORCE (1<<0)
265#define QUIRK_LVDS_SSC_DISABLE (1<<1)
265 266
266struct intel_fbdev; 267struct intel_fbdev;
267 268
@@ -1194,7 +1195,9 @@ void i915_gem_free_all_phys_object(struct drm_device *dev);
1194void i915_gem_release(struct drm_device *dev, struct drm_file *file); 1195void i915_gem_release(struct drm_device *dev, struct drm_file *file);
1195 1196
1196uint32_t 1197uint32_t
1197i915_gem_get_unfenced_gtt_alignment(struct drm_i915_gem_object *obj); 1198i915_gem_get_unfenced_gtt_alignment(struct drm_device *dev,
1199 uint32_t size,
1200 int tiling_mode);
1198 1201
1199/* i915_gem_gtt.c */ 1202/* i915_gem_gtt.c */
1200void i915_gem_restore_gtt_mappings(struct drm_device *dev); 1203void i915_gem_restore_gtt_mappings(struct drm_device *dev);
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 5c0d1247f453..a087e1bf0c2f 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -1374,25 +1374,24 @@ i915_gem_free_mmap_offset(struct drm_i915_gem_object *obj)
1374} 1374}
1375 1375
1376static uint32_t 1376static uint32_t
1377i915_gem_get_gtt_size(struct drm_i915_gem_object *obj) 1377i915_gem_get_gtt_size(struct drm_device *dev, uint32_t size, int tiling_mode)
1378{ 1378{
1379 struct drm_device *dev = obj->base.dev; 1379 uint32_t gtt_size;
1380 uint32_t size;
1381 1380
1382 if (INTEL_INFO(dev)->gen >= 4 || 1381 if (INTEL_INFO(dev)->gen >= 4 ||
1383 obj->tiling_mode == I915_TILING_NONE) 1382 tiling_mode == I915_TILING_NONE)
1384 return obj->base.size; 1383 return size;
1385 1384
1386 /* Previous chips need a power-of-two fence region when tiling */ 1385 /* Previous chips need a power-of-two fence region when tiling */
1387 if (INTEL_INFO(dev)->gen == 3) 1386 if (INTEL_INFO(dev)->gen == 3)
1388 size = 1024*1024; 1387 gtt_size = 1024*1024;
1389 else 1388 else
1390 size = 512*1024; 1389 gtt_size = 512*1024;
1391 1390
1392 while (size < obj->base.size) 1391 while (gtt_size < size)
1393 size <<= 1; 1392 gtt_size <<= 1;
1394 1393
1395 return size; 1394 return gtt_size;
1396} 1395}
1397 1396
1398/** 1397/**
@@ -1403,59 +1402,52 @@ i915_gem_get_gtt_size(struct drm_i915_gem_object *obj)
1403 * potential fence register mapping. 1402 * potential fence register mapping.
1404 */ 1403 */
1405static uint32_t 1404static uint32_t
1406i915_gem_get_gtt_alignment(struct drm_i915_gem_object *obj) 1405i915_gem_get_gtt_alignment(struct drm_device *dev,
1406 uint32_t size,
1407 int tiling_mode)
1407{ 1408{
1408 struct drm_device *dev = obj->base.dev;
1409
1410 /* 1409 /*
1411 * Minimum alignment is 4k (GTT page size), but might be greater 1410 * Minimum alignment is 4k (GTT page size), but might be greater
1412 * if a fence register is needed for the object. 1411 * if a fence register is needed for the object.
1413 */ 1412 */
1414 if (INTEL_INFO(dev)->gen >= 4 || 1413 if (INTEL_INFO(dev)->gen >= 4 ||
1415 obj->tiling_mode == I915_TILING_NONE) 1414 tiling_mode == I915_TILING_NONE)
1416 return 4096; 1415 return 4096;
1417 1416
1418 /* 1417 /*
1419 * Previous chips need to be aligned to the size of the smallest 1418 * Previous chips need to be aligned to the size of the smallest
1420 * fence register that can contain the object. 1419 * fence register that can contain the object.
1421 */ 1420 */
1422 return i915_gem_get_gtt_size(obj); 1421 return i915_gem_get_gtt_size(dev, size, tiling_mode);
1423} 1422}
1424 1423
1425/** 1424/**
1426 * i915_gem_get_unfenced_gtt_alignment - return required GTT alignment for an 1425 * i915_gem_get_unfenced_gtt_alignment - return required GTT alignment for an
1427 * unfenced object 1426 * unfenced object
1428 * @obj: object to check 1427 * @dev: the device
1428 * @size: size of the object
1429 * @tiling_mode: tiling mode of the object
1429 * 1430 *
1430 * Return the required GTT alignment for an object, only taking into account 1431 * Return the required GTT alignment for an object, only taking into account
1431 * unfenced tiled surface requirements. 1432 * unfenced tiled surface requirements.
1432 */ 1433 */
1433uint32_t 1434uint32_t
1434i915_gem_get_unfenced_gtt_alignment(struct drm_i915_gem_object *obj) 1435i915_gem_get_unfenced_gtt_alignment(struct drm_device *dev,
1436 uint32_t size,
1437 int tiling_mode)
1435{ 1438{
1436 struct drm_device *dev = obj->base.dev;
1437 int tile_height;
1438
1439 /* 1439 /*
1440 * Minimum alignment is 4k (GTT page size) for sane hw. 1440 * Minimum alignment is 4k (GTT page size) for sane hw.
1441 */ 1441 */
1442 if (INTEL_INFO(dev)->gen >= 4 || IS_G33(dev) || 1442 if (INTEL_INFO(dev)->gen >= 4 || IS_G33(dev) ||
1443 obj->tiling_mode == I915_TILING_NONE) 1443 tiling_mode == I915_TILING_NONE)
1444 return 4096; 1444 return 4096;
1445 1445
1446 /* 1446 /* Previous hardware however needs to be aligned to a power-of-two
1447 * Older chips need unfenced tiled buffers to be aligned to the left 1447 * tile height. The simplest method for determining this is to reuse
1448 * edge of an even tile row (where tile rows are counted as if the bo is 1448 * the power-of-tile object size.
1449 * placed in a fenced gtt region).
1450 */ 1449 */
1451 if (IS_GEN2(dev)) 1450 return i915_gem_get_gtt_size(dev, size, tiling_mode);
1452 tile_height = 16;
1453 else if (obj->tiling_mode == I915_TILING_Y && HAS_128_BYTE_Y_TILING(dev))
1454 tile_height = 32;
1455 else
1456 tile_height = 8;
1457
1458 return tile_height * obj->stride * 2;
1459} 1451}
1460 1452
1461int 1453int
@@ -2744,9 +2736,16 @@ i915_gem_object_bind_to_gtt(struct drm_i915_gem_object *obj,
2744 return -EINVAL; 2736 return -EINVAL;
2745 } 2737 }
2746 2738
2747 fence_size = i915_gem_get_gtt_size(obj); 2739 fence_size = i915_gem_get_gtt_size(dev,
2748 fence_alignment = i915_gem_get_gtt_alignment(obj); 2740 obj->base.size,
2749 unfenced_alignment = i915_gem_get_unfenced_gtt_alignment(obj); 2741 obj->tiling_mode);
2742 fence_alignment = i915_gem_get_gtt_alignment(dev,
2743 obj->base.size,
2744 obj->tiling_mode);
2745 unfenced_alignment =
2746 i915_gem_get_unfenced_gtt_alignment(dev,
2747 obj->base.size,
2748 obj->tiling_mode);
2750 2749
2751 if (alignment == 0) 2750 if (alignment == 0)
2752 alignment = map_and_fenceable ? fence_alignment : 2751 alignment = map_and_fenceable ? fence_alignment :
diff --git a/drivers/gpu/drm/i915/i915_gem_tiling.c b/drivers/gpu/drm/i915/i915_gem_tiling.c
index 82d70fd9e933..99c4faa59d8f 100644
--- a/drivers/gpu/drm/i915/i915_gem_tiling.c
+++ b/drivers/gpu/drm/i915/i915_gem_tiling.c
@@ -348,7 +348,9 @@ i915_gem_set_tiling(struct drm_device *dev, void *data,
348 /* Rebind if we need a change of alignment */ 348 /* Rebind if we need a change of alignment */
349 if (!obj->map_and_fenceable) { 349 if (!obj->map_and_fenceable) {
350 u32 unfenced_alignment = 350 u32 unfenced_alignment =
351 i915_gem_get_unfenced_gtt_alignment(obj); 351 i915_gem_get_unfenced_gtt_alignment(dev,
352 obj->base.size,
353 args->tiling_mode);
352 if (obj->gtt_offset & (unfenced_alignment - 1)) 354 if (obj->gtt_offset & (unfenced_alignment - 1))
353 ret = i915_gem_object_unbind(obj); 355 ret = i915_gem_object_unbind(obj);
354 } 356 }
diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
index 21b6f93fe919..0f1c799afea1 100644
--- a/drivers/gpu/drm/i915/intel_display.c
+++ b/drivers/gpu/drm/i915/intel_display.c
@@ -4305,7 +4305,8 @@ static void intel_update_watermarks(struct drm_device *dev)
4305 4305
4306static inline bool intel_panel_use_ssc(struct drm_i915_private *dev_priv) 4306static inline bool intel_panel_use_ssc(struct drm_i915_private *dev_priv)
4307{ 4307{
4308 return dev_priv->lvds_use_ssc && i915_panel_use_ssc; 4308 return dev_priv->lvds_use_ssc && i915_panel_use_ssc
4309 && !(dev_priv->quirks & QUIRK_LVDS_SSC_DISABLE);
4309} 4310}
4310 4311
4311static int i9xx_crtc_mode_set(struct drm_crtc *crtc, 4312static int i9xx_crtc_mode_set(struct drm_crtc *crtc,
@@ -7810,6 +7811,15 @@ static void quirk_pipea_force (struct drm_device *dev)
7810 DRM_DEBUG_DRIVER("applying pipe a force quirk\n"); 7811 DRM_DEBUG_DRIVER("applying pipe a force quirk\n");
7811} 7812}
7812 7813
7814/*
7815 * Some machines (Lenovo U160) do not work with SSC on LVDS for some reason
7816 */
7817static void quirk_ssc_force_disable(struct drm_device *dev)
7818{
7819 struct drm_i915_private *dev_priv = dev->dev_private;
7820 dev_priv->quirks |= QUIRK_LVDS_SSC_DISABLE;
7821}
7822
7813struct intel_quirk { 7823struct intel_quirk {
7814 int device; 7824 int device;
7815 int subsystem_vendor; 7825 int subsystem_vendor;
@@ -7838,6 +7848,9 @@ struct intel_quirk intel_quirks[] = {
7838 /* 855 & before need to leave pipe A & dpll A up */ 7848 /* 855 & before need to leave pipe A & dpll A up */
7839 { 0x3582, PCI_ANY_ID, PCI_ANY_ID, quirk_pipea_force }, 7849 { 0x3582, PCI_ANY_ID, PCI_ANY_ID, quirk_pipea_force },
7840 { 0x2562, PCI_ANY_ID, PCI_ANY_ID, quirk_pipea_force }, 7850 { 0x2562, PCI_ANY_ID, PCI_ANY_ID, quirk_pipea_force },
7851
7852 /* Lenovo U160 cannot use SSC on LVDS */
7853 { 0x0046, 0x17aa, 0x3920, quirk_ssc_force_disable },
7841}; 7854};
7842 7855
7843static void intel_init_quirks(struct drm_device *dev) 7856static void intel_init_quirks(struct drm_device *dev)
diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c
index 3e2989976297..bc4b12ca537b 100644
--- a/fs/cifs/cifsfs.c
+++ b/fs/cifs/cifsfs.c
@@ -35,6 +35,7 @@
35#include <linux/delay.h> 35#include <linux/delay.h>
36#include <linux/kthread.h> 36#include <linux/kthread.h>
37#include <linux/freezer.h> 37#include <linux/freezer.h>
38#include <linux/namei.h>
38#include <net/ipv6.h> 39#include <net/ipv6.h>
39#include "cifsfs.h" 40#include "cifsfs.h"
40#include "cifspdu.h" 41#include "cifspdu.h"
@@ -542,14 +543,12 @@ static const struct super_operations cifs_super_ops = {
542static struct dentry * 543static struct dentry *
543cifs_get_root(struct smb_vol *vol, struct super_block *sb) 544cifs_get_root(struct smb_vol *vol, struct super_block *sb)
544{ 545{
545 int xid, rc; 546 struct dentry *dentry;
546 struct inode *inode;
547 struct qstr name;
548 struct dentry *dparent = NULL, *dchild = NULL, *alias;
549 struct cifs_sb_info *cifs_sb = CIFS_SB(sb); 547 struct cifs_sb_info *cifs_sb = CIFS_SB(sb);
550 unsigned int i, full_len, len; 548 char *full_path = NULL;
551 char *full_path = NULL, *pstart; 549 char *s, *p;
552 char sep; 550 char sep;
551 int xid;
553 552
554 full_path = cifs_build_path_to_root(vol, cifs_sb, 553 full_path = cifs_build_path_to_root(vol, cifs_sb,
555 cifs_sb_master_tcon(cifs_sb)); 554 cifs_sb_master_tcon(cifs_sb));
@@ -560,73 +559,32 @@ cifs_get_root(struct smb_vol *vol, struct super_block *sb)
560 559
561 xid = GetXid(); 560 xid = GetXid();
562 sep = CIFS_DIR_SEP(cifs_sb); 561 sep = CIFS_DIR_SEP(cifs_sb);
563 dparent = dget(sb->s_root); 562 dentry = dget(sb->s_root);
564 full_len = strlen(full_path); 563 p = s = full_path;
565 full_path[full_len] = sep; 564
566 pstart = full_path + 1; 565 do {
567 566 struct inode *dir = dentry->d_inode;
568 for (i = 1, len = 0; i <= full_len; i++) { 567 struct dentry *child;
569 if (full_path[i] != sep || !len) { 568
570 len++; 569 /* skip separators */
571 continue; 570 while (*s == sep)
572 } 571 s++;
573 572 if (!*s)
574 full_path[i] = 0; 573 break;
575 cFYI(1, "get dentry for %s", pstart); 574 p = s++;
576 575 /* next separator */
577 name.name = pstart; 576 while (*s && *s != sep)
578 name.len = len; 577 s++;
579 name.hash = full_name_hash(pstart, len); 578
580 dchild = d_lookup(dparent, &name); 579 mutex_lock(&dir->i_mutex);
581 if (dchild == NULL) { 580 child = lookup_one_len(p, dentry, s - p);
582 cFYI(1, "not exists"); 581 mutex_unlock(&dir->i_mutex);
583 dchild = d_alloc(dparent, &name); 582 dput(dentry);
584 if (dchild == NULL) { 583 dentry = child;
585 dput(dparent); 584 } while (!IS_ERR(dentry));
586 dparent = ERR_PTR(-ENOMEM);
587 goto out;
588 }
589 }
590
591 cFYI(1, "get inode");
592 if (dchild->d_inode == NULL) {
593 cFYI(1, "not exists");
594 inode = NULL;
595 if (cifs_sb_master_tcon(CIFS_SB(sb))->unix_ext)
596 rc = cifs_get_inode_info_unix(&inode, full_path,
597 sb, xid);
598 else
599 rc = cifs_get_inode_info(&inode, full_path,
600 NULL, sb, xid, NULL);
601 if (rc) {
602 dput(dchild);
603 dput(dparent);
604 dparent = ERR_PTR(rc);
605 goto out;
606 }
607 alias = d_materialise_unique(dchild, inode);
608 if (alias != NULL) {
609 dput(dchild);
610 if (IS_ERR(alias)) {
611 dput(dparent);
612 dparent = ERR_PTR(-EINVAL); /* XXX */
613 goto out;
614 }
615 dchild = alias;
616 }
617 }
618 cFYI(1, "parent %p, child %p", dparent, dchild);
619
620 dput(dparent);
621 dparent = dchild;
622 len = 0;
623 pstart = full_path + i + 1;
624 full_path[i] = sep;
625 }
626out:
627 _FreeXid(xid); 585 _FreeXid(xid);
628 kfree(full_path); 586 kfree(full_path);
629 return dparent; 587 return dentry;
630} 588}
631 589
632static int cifs_set_super(struct super_block *sb, void *data) 590static int cifs_set_super(struct super_block *sb, void *data)
diff --git a/fs/cifs/file.c b/fs/cifs/file.c
index bb71471a4d9d..a9b4a24f2a16 100644
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -1737,7 +1737,7 @@ cifs_iovec_read(struct file *file, const struct iovec *iov,
1737 io_parms.pid = pid; 1737 io_parms.pid = pid;
1738 io_parms.tcon = pTcon; 1738 io_parms.tcon = pTcon;
1739 io_parms.offset = *poffset; 1739 io_parms.offset = *poffset;
1740 io_parms.length = len; 1740 io_parms.length = cur_len;
1741 rc = CIFSSMBRead(xid, &io_parms, &bytes_read, 1741 rc = CIFSSMBRead(xid, &io_parms, &bytes_read,
1742 &read_data, &buf_type); 1742 &read_data, &buf_type);
1743 pSMBr = (struct smb_com_read_rsp *)read_data; 1743 pSMBr = (struct smb_com_read_rsp *)read_data;
diff --git a/fs/dcache.c b/fs/dcache.c
index 6e4ea6d87774..fbdcbca40725 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -1813,8 +1813,6 @@ seqretry:
1813 tname = dentry->d_name.name; 1813 tname = dentry->d_name.name;
1814 i = dentry->d_inode; 1814 i = dentry->d_inode;
1815 prefetch(tname); 1815 prefetch(tname);
1816 if (i)
1817 prefetch(i);
1818 /* 1816 /*
1819 * This seqcount check is required to ensure name and 1817 * This seqcount check is required to ensure name and
1820 * len are loaded atomically, so as not to walk off the 1818 * len are loaded atomically, so as not to walk off the
diff --git a/fs/fscache/page.c b/fs/fscache/page.c
index 2f343b4d7a7d..3f7a59bfa7ad 100644
--- a/fs/fscache/page.c
+++ b/fs/fscache/page.c
@@ -976,16 +976,12 @@ void __fscache_uncache_all_inode_pages(struct fscache_cookie *cookie,
976 976
977 pagevec_init(&pvec, 0); 977 pagevec_init(&pvec, 0);
978 next = 0; 978 next = 0;
979 while (next <= (loff_t)-1 && 979 do {
980 pagevec_lookup(&pvec, mapping, next, PAGEVEC_SIZE) 980 if (!pagevec_lookup(&pvec, mapping, next, PAGEVEC_SIZE))
981 ) { 981 break;
982 for (i = 0; i < pagevec_count(&pvec); i++) { 982 for (i = 0; i < pagevec_count(&pvec); i++) {
983 struct page *page = pvec.pages[i]; 983 struct page *page = pvec.pages[i];
984 pgoff_t page_index = page->index; 984 next = page->index;
985
986 ASSERTCMP(page_index, >=, next);
987 next = page_index + 1;
988
989 if (PageFsCache(page)) { 985 if (PageFsCache(page)) {
990 __fscache_wait_on_page_write(cookie, page); 986 __fscache_wait_on_page_write(cookie, page);
991 __fscache_uncache_page(cookie, page); 987 __fscache_uncache_page(cookie, page);
@@ -993,7 +989,7 @@ void __fscache_uncache_all_inode_pages(struct fscache_cookie *cookie,
993 } 989 }
994 pagevec_release(&pvec); 990 pagevec_release(&pvec);
995 cond_resched(); 991 cond_resched();
996 } 992 } while (++next);
997 993
998 _leave(""); 994 _leave("");
999} 995}
diff --git a/fs/libfs.c b/fs/libfs.c
index c88eab55aec9..275ca4749a2e 100644
--- a/fs/libfs.c
+++ b/fs/libfs.c
@@ -822,7 +822,7 @@ ssize_t simple_attr_write(struct file *file, const char __user *buf,
822 goto out; 822 goto out;
823 823
824 attr->set_buf[size] = '\0'; 824 attr->set_buf[size] = '\0';
825 val = simple_strtol(attr->set_buf, NULL, 0); 825 val = simple_strtoll(attr->set_buf, NULL, 0);
826 ret = attr->set(attr->data, val); 826 ret = attr->set(attr->data, val);
827 if (ret == 0) 827 if (ret == 0)
828 ret = len; /* on success, claim we got the whole input */ 828 ret = len; /* on success, claim we got the whole input */
diff --git a/fs/namei.c b/fs/namei.c
index 5c867dd1c0b3..14ab8d3f2f0c 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -942,7 +942,6 @@ static bool __follow_mount_rcu(struct nameidata *nd, struct path *path,
942 * Don't forget we might have a non-mountpoint managed dentry 942 * Don't forget we might have a non-mountpoint managed dentry
943 * that wants to block transit. 943 * that wants to block transit.
944 */ 944 */
945 *inode = path->dentry->d_inode;
946 if (unlikely(managed_dentry_might_block(path->dentry))) 945 if (unlikely(managed_dentry_might_block(path->dentry)))
947 return false; 946 return false;
948 947
@@ -955,6 +954,12 @@ static bool __follow_mount_rcu(struct nameidata *nd, struct path *path,
955 path->mnt = mounted; 954 path->mnt = mounted;
956 path->dentry = mounted->mnt_root; 955 path->dentry = mounted->mnt_root;
957 nd->seq = read_seqcount_begin(&path->dentry->d_seq); 956 nd->seq = read_seqcount_begin(&path->dentry->d_seq);
957 /*
958 * Update the inode too. We don't need to re-check the
959 * dentry sequence number here after this d_inode read,
960 * because a mount-point is always pinned.
961 */
962 *inode = path->dentry->d_inode;
958 } 963 }
959 return true; 964 return true;
960} 965}
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 496770a96487..14a6c7b545de 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -844,6 +844,7 @@ enum cpu_idle_type {
844#define SD_SERIALIZE 0x0400 /* Only a single load balancing instance */ 844#define SD_SERIALIZE 0x0400 /* Only a single load balancing instance */
845#define SD_ASYM_PACKING 0x0800 /* Place busy groups earlier in the domain */ 845#define SD_ASYM_PACKING 0x0800 /* Place busy groups earlier in the domain */
846#define SD_PREFER_SIBLING 0x1000 /* Prefer to place tasks in a sibling domain */ 846#define SD_PREFER_SIBLING 0x1000 /* Prefer to place tasks in a sibling domain */
847#define SD_OVERLAP 0x2000 /* sched_domains of this level overlap */
847 848
848enum powersavings_balance_level { 849enum powersavings_balance_level {
849 POWERSAVINGS_BALANCE_NONE = 0, /* No power saving load balance */ 850 POWERSAVINGS_BALANCE_NONE = 0, /* No power saving load balance */
@@ -893,16 +894,21 @@ static inline int sd_power_saving_flags(void)
893 return 0; 894 return 0;
894} 895}
895 896
896struct sched_group { 897struct sched_group_power {
897 struct sched_group *next; /* Must be a circular list */
898 atomic_t ref; 898 atomic_t ref;
899
900 /* 899 /*
901 * CPU power of this group, SCHED_LOAD_SCALE being max power for a 900 * CPU power of this group, SCHED_LOAD_SCALE being max power for a
902 * single CPU. 901 * single CPU.
903 */ 902 */
904 unsigned int cpu_power, cpu_power_orig; 903 unsigned int power, power_orig;
904};
905
906struct sched_group {
907 struct sched_group *next; /* Must be a circular list */
908 atomic_t ref;
909
905 unsigned int group_weight; 910 unsigned int group_weight;
911 struct sched_group_power *sgp;
906 912
907 /* 913 /*
908 * The CPUs this group covers. 914 * The CPUs this group covers.
@@ -1254,6 +1260,9 @@ struct task_struct {
1254#ifdef CONFIG_PREEMPT_RCU 1260#ifdef CONFIG_PREEMPT_RCU
1255 int rcu_read_lock_nesting; 1261 int rcu_read_lock_nesting;
1256 char rcu_read_unlock_special; 1262 char rcu_read_unlock_special;
1263#if defined(CONFIG_RCU_BOOST) && defined(CONFIG_TREE_PREEMPT_RCU)
1264 int rcu_boosted;
1265#endif /* #if defined(CONFIG_RCU_BOOST) && defined(CONFIG_TREE_PREEMPT_RCU) */
1257 struct list_head rcu_node_entry; 1266 struct list_head rcu_node_entry;
1258#endif /* #ifdef CONFIG_PREEMPT_RCU */ 1267#endif /* #ifdef CONFIG_PREEMPT_RCU */
1259#ifdef CONFIG_TREE_PREEMPT_RCU 1268#ifdef CONFIG_TREE_PREEMPT_RCU
diff --git a/kernel/rcutree_plugin.h b/kernel/rcutree_plugin.h
index 75113cb7c4fb..8aafbb80b8b0 100644
--- a/kernel/rcutree_plugin.h
+++ b/kernel/rcutree_plugin.h
@@ -68,6 +68,7 @@ struct rcu_state rcu_preempt_state = RCU_STATE_INITIALIZER(rcu_preempt_state);
68DEFINE_PER_CPU(struct rcu_data, rcu_preempt_data); 68DEFINE_PER_CPU(struct rcu_data, rcu_preempt_data);
69static struct rcu_state *rcu_state = &rcu_preempt_state; 69static struct rcu_state *rcu_state = &rcu_preempt_state;
70 70
71static void rcu_read_unlock_special(struct task_struct *t);
71static int rcu_preempted_readers_exp(struct rcu_node *rnp); 72static int rcu_preempted_readers_exp(struct rcu_node *rnp);
72 73
73/* 74/*
@@ -147,7 +148,7 @@ static void rcu_preempt_note_context_switch(int cpu)
147 struct rcu_data *rdp; 148 struct rcu_data *rdp;
148 struct rcu_node *rnp; 149 struct rcu_node *rnp;
149 150
150 if (t->rcu_read_lock_nesting && 151 if (t->rcu_read_lock_nesting > 0 &&
151 (t->rcu_read_unlock_special & RCU_READ_UNLOCK_BLOCKED) == 0) { 152 (t->rcu_read_unlock_special & RCU_READ_UNLOCK_BLOCKED) == 0) {
152 153
153 /* Possibly blocking in an RCU read-side critical section. */ 154 /* Possibly blocking in an RCU read-side critical section. */
@@ -190,6 +191,14 @@ static void rcu_preempt_note_context_switch(int cpu)
190 rnp->gp_tasks = &t->rcu_node_entry; 191 rnp->gp_tasks = &t->rcu_node_entry;
191 } 192 }
192 raw_spin_unlock_irqrestore(&rnp->lock, flags); 193 raw_spin_unlock_irqrestore(&rnp->lock, flags);
194 } else if (t->rcu_read_lock_nesting < 0 &&
195 t->rcu_read_unlock_special) {
196
197 /*
198 * Complete exit from RCU read-side critical section on
199 * behalf of preempted instance of __rcu_read_unlock().
200 */
201 rcu_read_unlock_special(t);
193 } 202 }
194 203
195 /* 204 /*
@@ -284,7 +293,7 @@ static struct list_head *rcu_next_node_entry(struct task_struct *t,
284 * notify RCU core processing or task having blocked during the RCU 293 * notify RCU core processing or task having blocked during the RCU
285 * read-side critical section. 294 * read-side critical section.
286 */ 295 */
287static void rcu_read_unlock_special(struct task_struct *t) 296static noinline void rcu_read_unlock_special(struct task_struct *t)
288{ 297{
289 int empty; 298 int empty;
290 int empty_exp; 299 int empty_exp;
@@ -309,7 +318,7 @@ static void rcu_read_unlock_special(struct task_struct *t)
309 } 318 }
310 319
311 /* Hardware IRQ handlers cannot block. */ 320 /* Hardware IRQ handlers cannot block. */
312 if (in_irq()) { 321 if (in_irq() || in_serving_softirq()) {
313 local_irq_restore(flags); 322 local_irq_restore(flags);
314 return; 323 return;
315 } 324 }
@@ -342,6 +351,11 @@ static void rcu_read_unlock_special(struct task_struct *t)
342#ifdef CONFIG_RCU_BOOST 351#ifdef CONFIG_RCU_BOOST
343 if (&t->rcu_node_entry == rnp->boost_tasks) 352 if (&t->rcu_node_entry == rnp->boost_tasks)
344 rnp->boost_tasks = np; 353 rnp->boost_tasks = np;
354 /* Snapshot and clear ->rcu_boosted with rcu_node lock held. */
355 if (t->rcu_boosted) {
356 special |= RCU_READ_UNLOCK_BOOSTED;
357 t->rcu_boosted = 0;
358 }
345#endif /* #ifdef CONFIG_RCU_BOOST */ 359#endif /* #ifdef CONFIG_RCU_BOOST */
346 t->rcu_blocked_node = NULL; 360 t->rcu_blocked_node = NULL;
347 361
@@ -358,7 +372,6 @@ static void rcu_read_unlock_special(struct task_struct *t)
358#ifdef CONFIG_RCU_BOOST 372#ifdef CONFIG_RCU_BOOST
359 /* Unboost if we were boosted. */ 373 /* Unboost if we were boosted. */
360 if (special & RCU_READ_UNLOCK_BOOSTED) { 374 if (special & RCU_READ_UNLOCK_BOOSTED) {
361 t->rcu_read_unlock_special &= ~RCU_READ_UNLOCK_BOOSTED;
362 rt_mutex_unlock(t->rcu_boost_mutex); 375 rt_mutex_unlock(t->rcu_boost_mutex);
363 t->rcu_boost_mutex = NULL; 376 t->rcu_boost_mutex = NULL;
364 } 377 }
@@ -387,13 +400,22 @@ void __rcu_read_unlock(void)
387 struct task_struct *t = current; 400 struct task_struct *t = current;
388 401
389 barrier(); /* needed if we ever invoke rcu_read_unlock in rcutree.c */ 402 barrier(); /* needed if we ever invoke rcu_read_unlock in rcutree.c */
390 --t->rcu_read_lock_nesting; 403 if (t->rcu_read_lock_nesting != 1)
391 barrier(); /* decrement before load of ->rcu_read_unlock_special */ 404 --t->rcu_read_lock_nesting;
392 if (t->rcu_read_lock_nesting == 0 && 405 else {
393 unlikely(ACCESS_ONCE(t->rcu_read_unlock_special))) 406 t->rcu_read_lock_nesting = INT_MIN;
394 rcu_read_unlock_special(t); 407 barrier(); /* assign before ->rcu_read_unlock_special load */
408 if (unlikely(ACCESS_ONCE(t->rcu_read_unlock_special)))
409 rcu_read_unlock_special(t);
410 barrier(); /* ->rcu_read_unlock_special load before assign */
411 t->rcu_read_lock_nesting = 0;
412 }
395#ifdef CONFIG_PROVE_LOCKING 413#ifdef CONFIG_PROVE_LOCKING
396 WARN_ON_ONCE(ACCESS_ONCE(t->rcu_read_lock_nesting) < 0); 414 {
415 int rrln = ACCESS_ONCE(t->rcu_read_lock_nesting);
416
417 WARN_ON_ONCE(rrln < 0 && rrln > INT_MIN / 2);
418 }
397#endif /* #ifdef CONFIG_PROVE_LOCKING */ 419#endif /* #ifdef CONFIG_PROVE_LOCKING */
398} 420}
399EXPORT_SYMBOL_GPL(__rcu_read_unlock); 421EXPORT_SYMBOL_GPL(__rcu_read_unlock);
@@ -589,7 +611,8 @@ static void rcu_preempt_check_callbacks(int cpu)
589 rcu_preempt_qs(cpu); 611 rcu_preempt_qs(cpu);
590 return; 612 return;
591 } 613 }
592 if (per_cpu(rcu_preempt_data, cpu).qs_pending) 614 if (t->rcu_read_lock_nesting > 0 &&
615 per_cpu(rcu_preempt_data, cpu).qs_pending)
593 t->rcu_read_unlock_special |= RCU_READ_UNLOCK_NEED_QS; 616 t->rcu_read_unlock_special |= RCU_READ_UNLOCK_NEED_QS;
594} 617}
595 618
@@ -695,9 +718,12 @@ static void rcu_report_exp_rnp(struct rcu_state *rsp, struct rcu_node *rnp)
695 718
696 raw_spin_lock_irqsave(&rnp->lock, flags); 719 raw_spin_lock_irqsave(&rnp->lock, flags);
697 for (;;) { 720 for (;;) {
698 if (!sync_rcu_preempt_exp_done(rnp)) 721 if (!sync_rcu_preempt_exp_done(rnp)) {
722 raw_spin_unlock_irqrestore(&rnp->lock, flags);
699 break; 723 break;
724 }
700 if (rnp->parent == NULL) { 725 if (rnp->parent == NULL) {
726 raw_spin_unlock_irqrestore(&rnp->lock, flags);
701 wake_up(&sync_rcu_preempt_exp_wq); 727 wake_up(&sync_rcu_preempt_exp_wq);
702 break; 728 break;
703 } 729 }
@@ -707,7 +733,6 @@ static void rcu_report_exp_rnp(struct rcu_state *rsp, struct rcu_node *rnp)
707 raw_spin_lock(&rnp->lock); /* irqs already disabled */ 733 raw_spin_lock(&rnp->lock); /* irqs already disabled */
708 rnp->expmask &= ~mask; 734 rnp->expmask &= ~mask;
709 } 735 }
710 raw_spin_unlock_irqrestore(&rnp->lock, flags);
711} 736}
712 737
713/* 738/*
@@ -1174,7 +1199,7 @@ static int rcu_boost(struct rcu_node *rnp)
1174 t = container_of(tb, struct task_struct, rcu_node_entry); 1199 t = container_of(tb, struct task_struct, rcu_node_entry);
1175 rt_mutex_init_proxy_locked(&mtx, t); 1200 rt_mutex_init_proxy_locked(&mtx, t);
1176 t->rcu_boost_mutex = &mtx; 1201 t->rcu_boost_mutex = &mtx;
1177 t->rcu_read_unlock_special |= RCU_READ_UNLOCK_BOOSTED; 1202 t->rcu_boosted = 1;
1178 raw_spin_unlock_irqrestore(&rnp->lock, flags); 1203 raw_spin_unlock_irqrestore(&rnp->lock, flags);
1179 rt_mutex_lock(&mtx); /* Side effect: boosts task t's priority. */ 1204 rt_mutex_lock(&mtx); /* Side effect: boosts task t's priority. */
1180 rt_mutex_unlock(&mtx); /* Keep lockdep happy. */ 1205 rt_mutex_unlock(&mtx); /* Keep lockdep happy. */
diff --git a/kernel/sched.c b/kernel/sched.c
index 3dc716f6d8ad..fde6ff903525 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -2544,13 +2544,9 @@ static int ttwu_remote(struct task_struct *p, int wake_flags)
2544} 2544}
2545 2545
2546#ifdef CONFIG_SMP 2546#ifdef CONFIG_SMP
2547static void sched_ttwu_pending(void) 2547static void sched_ttwu_do_pending(struct task_struct *list)
2548{ 2548{
2549 struct rq *rq = this_rq(); 2549 struct rq *rq = this_rq();
2550 struct task_struct *list = xchg(&rq->wake_list, NULL);
2551
2552 if (!list)
2553 return;
2554 2550
2555 raw_spin_lock(&rq->lock); 2551 raw_spin_lock(&rq->lock);
2556 2552
@@ -2563,9 +2559,45 @@ static void sched_ttwu_pending(void)
2563 raw_spin_unlock(&rq->lock); 2559 raw_spin_unlock(&rq->lock);
2564} 2560}
2565 2561
2562#ifdef CONFIG_HOTPLUG_CPU
2563
2564static void sched_ttwu_pending(void)
2565{
2566 struct rq *rq = this_rq();
2567 struct task_struct *list = xchg(&rq->wake_list, NULL);
2568
2569 if (!list)
2570 return;
2571
2572 sched_ttwu_do_pending(list);
2573}
2574
2575#endif /* CONFIG_HOTPLUG_CPU */
2576
2566void scheduler_ipi(void) 2577void scheduler_ipi(void)
2567{ 2578{
2568 sched_ttwu_pending(); 2579 struct rq *rq = this_rq();
2580 struct task_struct *list = xchg(&rq->wake_list, NULL);
2581
2582 if (!list)
2583 return;
2584
2585 /*
2586 * Not all reschedule IPI handlers call irq_enter/irq_exit, since
2587 * traditionally all their work was done from the interrupt return
2588 * path. Now that we actually do some work, we need to make sure
2589 * we do call them.
2590 *
2591 * Some archs already do call them, luckily irq_enter/exit nest
2592 * properly.
2593 *
2594 * Arguably we should visit all archs and update all handlers,
2595 * however a fair share of IPIs are still resched only so this would
2596 * somewhat pessimize the simple resched case.
2597 */
2598 irq_enter();
2599 sched_ttwu_do_pending(list);
2600 irq_exit();
2569} 2601}
2570 2602
2571static void ttwu_queue_remote(struct task_struct *p, int cpu) 2603static void ttwu_queue_remote(struct task_struct *p, int cpu)
@@ -6557,7 +6589,7 @@ static int sched_domain_debug_one(struct sched_domain *sd, int cpu, int level,
6557 break; 6589 break;
6558 } 6590 }
6559 6591
6560 if (!group->cpu_power) { 6592 if (!group->sgp->power) {
6561 printk(KERN_CONT "\n"); 6593 printk(KERN_CONT "\n");
6562 printk(KERN_ERR "ERROR: domain->cpu_power not " 6594 printk(KERN_ERR "ERROR: domain->cpu_power not "
6563 "set\n"); 6595 "set\n");
@@ -6581,9 +6613,9 @@ static int sched_domain_debug_one(struct sched_domain *sd, int cpu, int level,
6581 cpulist_scnprintf(str, sizeof(str), sched_group_cpus(group)); 6613 cpulist_scnprintf(str, sizeof(str), sched_group_cpus(group));
6582 6614
6583 printk(KERN_CONT " %s", str); 6615 printk(KERN_CONT " %s", str);
6584 if (group->cpu_power != SCHED_POWER_SCALE) { 6616 if (group->sgp->power != SCHED_POWER_SCALE) {
6585 printk(KERN_CONT " (cpu_power = %d)", 6617 printk(KERN_CONT " (cpu_power = %d)",
6586 group->cpu_power); 6618 group->sgp->power);
6587 } 6619 }
6588 6620
6589 group = group->next; 6621 group = group->next;
@@ -6774,11 +6806,39 @@ static struct root_domain *alloc_rootdomain(void)
6774 return rd; 6806 return rd;
6775} 6807}
6776 6808
6809static void free_sched_groups(struct sched_group *sg, int free_sgp)
6810{
6811 struct sched_group *tmp, *first;
6812
6813 if (!sg)
6814 return;
6815
6816 first = sg;
6817 do {
6818 tmp = sg->next;
6819
6820 if (free_sgp && atomic_dec_and_test(&sg->sgp->ref))
6821 kfree(sg->sgp);
6822
6823 kfree(sg);
6824 sg = tmp;
6825 } while (sg != first);
6826}
6827
6777static void free_sched_domain(struct rcu_head *rcu) 6828static void free_sched_domain(struct rcu_head *rcu)
6778{ 6829{
6779 struct sched_domain *sd = container_of(rcu, struct sched_domain, rcu); 6830 struct sched_domain *sd = container_of(rcu, struct sched_domain, rcu);
6780 if (atomic_dec_and_test(&sd->groups->ref)) 6831
6832 /*
6833 * If its an overlapping domain it has private groups, iterate and
6834 * nuke them all.
6835 */
6836 if (sd->flags & SD_OVERLAP) {
6837 free_sched_groups(sd->groups, 1);
6838 } else if (atomic_dec_and_test(&sd->groups->ref)) {
6839 kfree(sd->groups->sgp);
6781 kfree(sd->groups); 6840 kfree(sd->groups);
6841 }
6782 kfree(sd); 6842 kfree(sd);
6783} 6843}
6784 6844
@@ -6945,6 +7005,7 @@ int sched_smt_power_savings = 0, sched_mc_power_savings = 0;
6945struct sd_data { 7005struct sd_data {
6946 struct sched_domain **__percpu sd; 7006 struct sched_domain **__percpu sd;
6947 struct sched_group **__percpu sg; 7007 struct sched_group **__percpu sg;
7008 struct sched_group_power **__percpu sgp;
6948}; 7009};
6949 7010
6950struct s_data { 7011struct s_data {
@@ -6964,15 +7025,73 @@ struct sched_domain_topology_level;
6964typedef struct sched_domain *(*sched_domain_init_f)(struct sched_domain_topology_level *tl, int cpu); 7025typedef struct sched_domain *(*sched_domain_init_f)(struct sched_domain_topology_level *tl, int cpu);
6965typedef const struct cpumask *(*sched_domain_mask_f)(int cpu); 7026typedef const struct cpumask *(*sched_domain_mask_f)(int cpu);
6966 7027
7028#define SDTL_OVERLAP 0x01
7029
6967struct sched_domain_topology_level { 7030struct sched_domain_topology_level {
6968 sched_domain_init_f init; 7031 sched_domain_init_f init;
6969 sched_domain_mask_f mask; 7032 sched_domain_mask_f mask;
7033 int flags;
6970 struct sd_data data; 7034 struct sd_data data;
6971}; 7035};
6972 7036
6973/* 7037static int
6974 * Assumes the sched_domain tree is fully constructed 7038build_overlap_sched_groups(struct sched_domain *sd, int cpu)
6975 */ 7039{
7040 struct sched_group *first = NULL, *last = NULL, *groups = NULL, *sg;
7041 const struct cpumask *span = sched_domain_span(sd);
7042 struct cpumask *covered = sched_domains_tmpmask;
7043 struct sd_data *sdd = sd->private;
7044 struct sched_domain *child;
7045 int i;
7046
7047 cpumask_clear(covered);
7048
7049 for_each_cpu(i, span) {
7050 struct cpumask *sg_span;
7051
7052 if (cpumask_test_cpu(i, covered))
7053 continue;
7054
7055 sg = kzalloc_node(sizeof(struct sched_group) + cpumask_size(),
7056 GFP_KERNEL, cpu_to_node(i));
7057
7058 if (!sg)
7059 goto fail;
7060
7061 sg_span = sched_group_cpus(sg);
7062
7063 child = *per_cpu_ptr(sdd->sd, i);
7064 if (child->child) {
7065 child = child->child;
7066 cpumask_copy(sg_span, sched_domain_span(child));
7067 } else
7068 cpumask_set_cpu(i, sg_span);
7069
7070 cpumask_or(covered, covered, sg_span);
7071
7072 sg->sgp = *per_cpu_ptr(sdd->sgp, cpumask_first(sg_span));
7073 atomic_inc(&sg->sgp->ref);
7074
7075 if (cpumask_test_cpu(cpu, sg_span))
7076 groups = sg;
7077
7078 if (!first)
7079 first = sg;
7080 if (last)
7081 last->next = sg;
7082 last = sg;
7083 last->next = first;
7084 }
7085 sd->groups = groups;
7086
7087 return 0;
7088
7089fail:
7090 free_sched_groups(first, 0);
7091
7092 return -ENOMEM;
7093}
7094
6976static int get_group(int cpu, struct sd_data *sdd, struct sched_group **sg) 7095static int get_group(int cpu, struct sd_data *sdd, struct sched_group **sg)
6977{ 7096{
6978 struct sched_domain *sd = *per_cpu_ptr(sdd->sd, cpu); 7097 struct sched_domain *sd = *per_cpu_ptr(sdd->sd, cpu);
@@ -6981,24 +7100,24 @@ static int get_group(int cpu, struct sd_data *sdd, struct sched_group **sg)
6981 if (child) 7100 if (child)
6982 cpu = cpumask_first(sched_domain_span(child)); 7101 cpu = cpumask_first(sched_domain_span(child));
6983 7102
6984 if (sg) 7103 if (sg) {
6985 *sg = *per_cpu_ptr(sdd->sg, cpu); 7104 *sg = *per_cpu_ptr(sdd->sg, cpu);
7105 (*sg)->sgp = *per_cpu_ptr(sdd->sgp, cpu);
7106 atomic_set(&(*sg)->sgp->ref, 1); /* for claim_allocations */
7107 }
6986 7108
6987 return cpu; 7109 return cpu;
6988} 7110}
6989 7111
6990/* 7112/*
6991 * build_sched_groups takes the cpumask we wish to span, and a pointer
6992 * to a function which identifies what group(along with sched group) a CPU
6993 * belongs to. The return value of group_fn must be a >= 0 and < nr_cpu_ids
6994 * (due to the fact that we keep track of groups covered with a struct cpumask).
6995 *
6996 * build_sched_groups will build a circular linked list of the groups 7113 * build_sched_groups will build a circular linked list of the groups
6997 * covered by the given span, and will set each group's ->cpumask correctly, 7114 * covered by the given span, and will set each group's ->cpumask correctly,
6998 * and ->cpu_power to 0. 7115 * and ->cpu_power to 0.
7116 *
7117 * Assumes the sched_domain tree is fully constructed
6999 */ 7118 */
7000static void 7119static int
7001build_sched_groups(struct sched_domain *sd) 7120build_sched_groups(struct sched_domain *sd, int cpu)
7002{ 7121{
7003 struct sched_group *first = NULL, *last = NULL; 7122 struct sched_group *first = NULL, *last = NULL;
7004 struct sd_data *sdd = sd->private; 7123 struct sd_data *sdd = sd->private;
@@ -7006,6 +7125,12 @@ build_sched_groups(struct sched_domain *sd)
7006 struct cpumask *covered; 7125 struct cpumask *covered;
7007 int i; 7126 int i;
7008 7127
7128 get_group(cpu, sdd, &sd->groups);
7129 atomic_inc(&sd->groups->ref);
7130
7131 if (cpu != cpumask_first(sched_domain_span(sd)))
7132 return 0;
7133
7009 lockdep_assert_held(&sched_domains_mutex); 7134 lockdep_assert_held(&sched_domains_mutex);
7010 covered = sched_domains_tmpmask; 7135 covered = sched_domains_tmpmask;
7011 7136
@@ -7020,7 +7145,7 @@ build_sched_groups(struct sched_domain *sd)
7020 continue; 7145 continue;
7021 7146
7022 cpumask_clear(sched_group_cpus(sg)); 7147 cpumask_clear(sched_group_cpus(sg));
7023 sg->cpu_power = 0; 7148 sg->sgp->power = 0;
7024 7149
7025 for_each_cpu(j, span) { 7150 for_each_cpu(j, span) {
7026 if (get_group(j, sdd, NULL) != group) 7151 if (get_group(j, sdd, NULL) != group)
@@ -7037,6 +7162,8 @@ build_sched_groups(struct sched_domain *sd)
7037 last = sg; 7162 last = sg;
7038 } 7163 }
7039 last->next = first; 7164 last->next = first;
7165
7166 return 0;
7040} 7167}
7041 7168
7042/* 7169/*
@@ -7051,12 +7178,17 @@ build_sched_groups(struct sched_domain *sd)
7051 */ 7178 */
7052static void init_sched_groups_power(int cpu, struct sched_domain *sd) 7179static void init_sched_groups_power(int cpu, struct sched_domain *sd)
7053{ 7180{
7054 WARN_ON(!sd || !sd->groups); 7181 struct sched_group *sg = sd->groups;
7055 7182
7056 if (cpu != group_first_cpu(sd->groups)) 7183 WARN_ON(!sd || !sg);
7057 return;
7058 7184
7059 sd->groups->group_weight = cpumask_weight(sched_group_cpus(sd->groups)); 7185 do {
7186 sg->group_weight = cpumask_weight(sched_group_cpus(sg));
7187 sg = sg->next;
7188 } while (sg != sd->groups);
7189
7190 if (cpu != group_first_cpu(sg))
7191 return;
7060 7192
7061 update_group_power(sd, cpu); 7193 update_group_power(sd, cpu);
7062} 7194}
@@ -7177,15 +7309,15 @@ static enum s_alloc __visit_domain_allocation_hell(struct s_data *d,
7177static void claim_allocations(int cpu, struct sched_domain *sd) 7309static void claim_allocations(int cpu, struct sched_domain *sd)
7178{ 7310{
7179 struct sd_data *sdd = sd->private; 7311 struct sd_data *sdd = sd->private;
7180 struct sched_group *sg = sd->groups;
7181 7312
7182 WARN_ON_ONCE(*per_cpu_ptr(sdd->sd, cpu) != sd); 7313 WARN_ON_ONCE(*per_cpu_ptr(sdd->sd, cpu) != sd);
7183 *per_cpu_ptr(sdd->sd, cpu) = NULL; 7314 *per_cpu_ptr(sdd->sd, cpu) = NULL;
7184 7315
7185 if (cpu == cpumask_first(sched_group_cpus(sg))) { 7316 if (atomic_read(&(*per_cpu_ptr(sdd->sg, cpu))->ref))
7186 WARN_ON_ONCE(*per_cpu_ptr(sdd->sg, cpu) != sg);
7187 *per_cpu_ptr(sdd->sg, cpu) = NULL; 7317 *per_cpu_ptr(sdd->sg, cpu) = NULL;
7188 } 7318
7319 if (atomic_read(&(*per_cpu_ptr(sdd->sgp, cpu))->ref))
7320 *per_cpu_ptr(sdd->sgp, cpu) = NULL;
7189} 7321}
7190 7322
7191#ifdef CONFIG_SCHED_SMT 7323#ifdef CONFIG_SCHED_SMT
@@ -7210,7 +7342,7 @@ static struct sched_domain_topology_level default_topology[] = {
7210#endif 7342#endif
7211 { sd_init_CPU, cpu_cpu_mask, }, 7343 { sd_init_CPU, cpu_cpu_mask, },
7212#ifdef CONFIG_NUMA 7344#ifdef CONFIG_NUMA
7213 { sd_init_NODE, cpu_node_mask, }, 7345 { sd_init_NODE, cpu_node_mask, SDTL_OVERLAP, },
7214 { sd_init_ALLNODES, cpu_allnodes_mask, }, 7346 { sd_init_ALLNODES, cpu_allnodes_mask, },
7215#endif 7347#endif
7216 { NULL, }, 7348 { NULL, },
@@ -7234,9 +7366,14 @@ static int __sdt_alloc(const struct cpumask *cpu_map)
7234 if (!sdd->sg) 7366 if (!sdd->sg)
7235 return -ENOMEM; 7367 return -ENOMEM;
7236 7368
7369 sdd->sgp = alloc_percpu(struct sched_group_power *);
7370 if (!sdd->sgp)
7371 return -ENOMEM;
7372
7237 for_each_cpu(j, cpu_map) { 7373 for_each_cpu(j, cpu_map) {
7238 struct sched_domain *sd; 7374 struct sched_domain *sd;
7239 struct sched_group *sg; 7375 struct sched_group *sg;
7376 struct sched_group_power *sgp;
7240 7377
7241 sd = kzalloc_node(sizeof(struct sched_domain) + cpumask_size(), 7378 sd = kzalloc_node(sizeof(struct sched_domain) + cpumask_size(),
7242 GFP_KERNEL, cpu_to_node(j)); 7379 GFP_KERNEL, cpu_to_node(j));
@@ -7251,6 +7388,13 @@ static int __sdt_alloc(const struct cpumask *cpu_map)
7251 return -ENOMEM; 7388 return -ENOMEM;
7252 7389
7253 *per_cpu_ptr(sdd->sg, j) = sg; 7390 *per_cpu_ptr(sdd->sg, j) = sg;
7391
7392 sgp = kzalloc_node(sizeof(struct sched_group_power),
7393 GFP_KERNEL, cpu_to_node(j));
7394 if (!sgp)
7395 return -ENOMEM;
7396
7397 *per_cpu_ptr(sdd->sgp, j) = sgp;
7254 } 7398 }
7255 } 7399 }
7256 7400
@@ -7266,11 +7410,15 @@ static void __sdt_free(const struct cpumask *cpu_map)
7266 struct sd_data *sdd = &tl->data; 7410 struct sd_data *sdd = &tl->data;
7267 7411
7268 for_each_cpu(j, cpu_map) { 7412 for_each_cpu(j, cpu_map) {
7269 kfree(*per_cpu_ptr(sdd->sd, j)); 7413 struct sched_domain *sd = *per_cpu_ptr(sdd->sd, j);
7414 if (sd && (sd->flags & SD_OVERLAP))
7415 free_sched_groups(sd->groups, 0);
7270 kfree(*per_cpu_ptr(sdd->sg, j)); 7416 kfree(*per_cpu_ptr(sdd->sg, j));
7417 kfree(*per_cpu_ptr(sdd->sgp, j));
7271 } 7418 }
7272 free_percpu(sdd->sd); 7419 free_percpu(sdd->sd);
7273 free_percpu(sdd->sg); 7420 free_percpu(sdd->sg);
7421 free_percpu(sdd->sgp);
7274 } 7422 }
7275} 7423}
7276 7424
@@ -7316,8 +7464,13 @@ static int build_sched_domains(const struct cpumask *cpu_map,
7316 struct sched_domain_topology_level *tl; 7464 struct sched_domain_topology_level *tl;
7317 7465
7318 sd = NULL; 7466 sd = NULL;
7319 for (tl = sched_domain_topology; tl->init; tl++) 7467 for (tl = sched_domain_topology; tl->init; tl++) {
7320 sd = build_sched_domain(tl, &d, cpu_map, attr, sd, i); 7468 sd = build_sched_domain(tl, &d, cpu_map, attr, sd, i);
7469 if (tl->flags & SDTL_OVERLAP || sched_feat(FORCE_SD_OVERLAP))
7470 sd->flags |= SD_OVERLAP;
7471 if (cpumask_equal(cpu_map, sched_domain_span(sd)))
7472 break;
7473 }
7321 7474
7322 while (sd->child) 7475 while (sd->child)
7323 sd = sd->child; 7476 sd = sd->child;
@@ -7329,13 +7482,13 @@ static int build_sched_domains(const struct cpumask *cpu_map,
7329 for_each_cpu(i, cpu_map) { 7482 for_each_cpu(i, cpu_map) {
7330 for (sd = *per_cpu_ptr(d.sd, i); sd; sd = sd->parent) { 7483 for (sd = *per_cpu_ptr(d.sd, i); sd; sd = sd->parent) {
7331 sd->span_weight = cpumask_weight(sched_domain_span(sd)); 7484 sd->span_weight = cpumask_weight(sched_domain_span(sd));
7332 get_group(i, sd->private, &sd->groups); 7485 if (sd->flags & SD_OVERLAP) {
7333 atomic_inc(&sd->groups->ref); 7486 if (build_overlap_sched_groups(sd, i))
7334 7487 goto error;
7335 if (i != cpumask_first(sched_domain_span(sd))) 7488 } else {
7336 continue; 7489 if (build_sched_groups(sd, i))
7337 7490 goto error;
7338 build_sched_groups(sd); 7491 }
7339 } 7492 }
7340 } 7493 }
7341 7494
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c
index 433491c2dc8f..c768588e180b 100644
--- a/kernel/sched_fair.c
+++ b/kernel/sched_fair.c
@@ -1585,7 +1585,7 @@ find_idlest_group(struct sched_domain *sd, struct task_struct *p,
1585 } 1585 }
1586 1586
1587 /* Adjust by relative CPU power of the group */ 1587 /* Adjust by relative CPU power of the group */
1588 avg_load = (avg_load * SCHED_POWER_SCALE) / group->cpu_power; 1588 avg_load = (avg_load * SCHED_POWER_SCALE) / group->sgp->power;
1589 1589
1590 if (local_group) { 1590 if (local_group) {
1591 this_load = avg_load; 1591 this_load = avg_load;
@@ -2631,7 +2631,7 @@ static void update_cpu_power(struct sched_domain *sd, int cpu)
2631 power >>= SCHED_POWER_SHIFT; 2631 power >>= SCHED_POWER_SHIFT;
2632 } 2632 }
2633 2633
2634 sdg->cpu_power_orig = power; 2634 sdg->sgp->power_orig = power;
2635 2635
2636 if (sched_feat(ARCH_POWER)) 2636 if (sched_feat(ARCH_POWER))
2637 power *= arch_scale_freq_power(sd, cpu); 2637 power *= arch_scale_freq_power(sd, cpu);
@@ -2647,7 +2647,7 @@ static void update_cpu_power(struct sched_domain *sd, int cpu)
2647 power = 1; 2647 power = 1;
2648 2648
2649 cpu_rq(cpu)->cpu_power = power; 2649 cpu_rq(cpu)->cpu_power = power;
2650 sdg->cpu_power = power; 2650 sdg->sgp->power = power;
2651} 2651}
2652 2652
2653static void update_group_power(struct sched_domain *sd, int cpu) 2653static void update_group_power(struct sched_domain *sd, int cpu)
@@ -2665,11 +2665,11 @@ static void update_group_power(struct sched_domain *sd, int cpu)
2665 2665
2666 group = child->groups; 2666 group = child->groups;
2667 do { 2667 do {
2668 power += group->cpu_power; 2668 power += group->sgp->power;
2669 group = group->next; 2669 group = group->next;
2670 } while (group != child->groups); 2670 } while (group != child->groups);
2671 2671
2672 sdg->cpu_power = power; 2672 sdg->sgp->power = power;
2673} 2673}
2674 2674
2675/* 2675/*
@@ -2691,7 +2691,7 @@ fix_small_capacity(struct sched_domain *sd, struct sched_group *group)
2691 /* 2691 /*
2692 * If ~90% of the cpu_power is still there, we're good. 2692 * If ~90% of the cpu_power is still there, we're good.
2693 */ 2693 */
2694 if (group->cpu_power * 32 > group->cpu_power_orig * 29) 2694 if (group->sgp->power * 32 > group->sgp->power_orig * 29)
2695 return 1; 2695 return 1;
2696 2696
2697 return 0; 2697 return 0;
@@ -2771,7 +2771,7 @@ static inline void update_sg_lb_stats(struct sched_domain *sd,
2771 } 2771 }
2772 2772
2773 /* Adjust by relative CPU power of the group */ 2773 /* Adjust by relative CPU power of the group */
2774 sgs->avg_load = (sgs->group_load*SCHED_POWER_SCALE) / group->cpu_power; 2774 sgs->avg_load = (sgs->group_load*SCHED_POWER_SCALE) / group->sgp->power;
2775 2775
2776 /* 2776 /*
2777 * Consider the group unbalanced when the imbalance is larger 2777 * Consider the group unbalanced when the imbalance is larger
@@ -2788,7 +2788,7 @@ static inline void update_sg_lb_stats(struct sched_domain *sd,
2788 if ((max_cpu_load - min_cpu_load) >= avg_load_per_task && max_nr_running > 1) 2788 if ((max_cpu_load - min_cpu_load) >= avg_load_per_task && max_nr_running > 1)
2789 sgs->group_imb = 1; 2789 sgs->group_imb = 1;
2790 2790
2791 sgs->group_capacity = DIV_ROUND_CLOSEST(group->cpu_power, 2791 sgs->group_capacity = DIV_ROUND_CLOSEST(group->sgp->power,
2792 SCHED_POWER_SCALE); 2792 SCHED_POWER_SCALE);
2793 if (!sgs->group_capacity) 2793 if (!sgs->group_capacity)
2794 sgs->group_capacity = fix_small_capacity(sd, group); 2794 sgs->group_capacity = fix_small_capacity(sd, group);
@@ -2877,7 +2877,7 @@ static inline void update_sd_lb_stats(struct sched_domain *sd, int this_cpu,
2877 return; 2877 return;
2878 2878
2879 sds->total_load += sgs.group_load; 2879 sds->total_load += sgs.group_load;
2880 sds->total_pwr += sg->cpu_power; 2880 sds->total_pwr += sg->sgp->power;
2881 2881
2882 /* 2882 /*
2883 * In case the child domain prefers tasks go to siblings 2883 * In case the child domain prefers tasks go to siblings
@@ -2962,7 +2962,7 @@ static int check_asym_packing(struct sched_domain *sd,
2962 if (this_cpu > busiest_cpu) 2962 if (this_cpu > busiest_cpu)
2963 return 0; 2963 return 0;
2964 2964
2965 *imbalance = DIV_ROUND_CLOSEST(sds->max_load * sds->busiest->cpu_power, 2965 *imbalance = DIV_ROUND_CLOSEST(sds->max_load * sds->busiest->sgp->power,
2966 SCHED_POWER_SCALE); 2966 SCHED_POWER_SCALE);
2967 return 1; 2967 return 1;
2968} 2968}
@@ -2993,7 +2993,7 @@ static inline void fix_small_imbalance(struct sd_lb_stats *sds,
2993 2993
2994 scaled_busy_load_per_task = sds->busiest_load_per_task 2994 scaled_busy_load_per_task = sds->busiest_load_per_task
2995 * SCHED_POWER_SCALE; 2995 * SCHED_POWER_SCALE;
2996 scaled_busy_load_per_task /= sds->busiest->cpu_power; 2996 scaled_busy_load_per_task /= sds->busiest->sgp->power;
2997 2997
2998 if (sds->max_load - sds->this_load + scaled_busy_load_per_task >= 2998 if (sds->max_load - sds->this_load + scaled_busy_load_per_task >=
2999 (scaled_busy_load_per_task * imbn)) { 2999 (scaled_busy_load_per_task * imbn)) {
@@ -3007,28 +3007,28 @@ static inline void fix_small_imbalance(struct sd_lb_stats *sds,
3007 * moving them. 3007 * moving them.
3008 */ 3008 */
3009 3009
3010 pwr_now += sds->busiest->cpu_power * 3010 pwr_now += sds->busiest->sgp->power *
3011 min(sds->busiest_load_per_task, sds->max_load); 3011 min(sds->busiest_load_per_task, sds->max_load);
3012 pwr_now += sds->this->cpu_power * 3012 pwr_now += sds->this->sgp->power *
3013 min(sds->this_load_per_task, sds->this_load); 3013 min(sds->this_load_per_task, sds->this_load);
3014 pwr_now /= SCHED_POWER_SCALE; 3014 pwr_now /= SCHED_POWER_SCALE;
3015 3015
3016 /* Amount of load we'd subtract */ 3016 /* Amount of load we'd subtract */
3017 tmp = (sds->busiest_load_per_task * SCHED_POWER_SCALE) / 3017 tmp = (sds->busiest_load_per_task * SCHED_POWER_SCALE) /
3018 sds->busiest->cpu_power; 3018 sds->busiest->sgp->power;
3019 if (sds->max_load > tmp) 3019 if (sds->max_load > tmp)
3020 pwr_move += sds->busiest->cpu_power * 3020 pwr_move += sds->busiest->sgp->power *
3021 min(sds->busiest_load_per_task, sds->max_load - tmp); 3021 min(sds->busiest_load_per_task, sds->max_load - tmp);
3022 3022
3023 /* Amount of load we'd add */ 3023 /* Amount of load we'd add */
3024 if (sds->max_load * sds->busiest->cpu_power < 3024 if (sds->max_load * sds->busiest->sgp->power <
3025 sds->busiest_load_per_task * SCHED_POWER_SCALE) 3025 sds->busiest_load_per_task * SCHED_POWER_SCALE)
3026 tmp = (sds->max_load * sds->busiest->cpu_power) / 3026 tmp = (sds->max_load * sds->busiest->sgp->power) /
3027 sds->this->cpu_power; 3027 sds->this->sgp->power;
3028 else 3028 else
3029 tmp = (sds->busiest_load_per_task * SCHED_POWER_SCALE) / 3029 tmp = (sds->busiest_load_per_task * SCHED_POWER_SCALE) /
3030 sds->this->cpu_power; 3030 sds->this->sgp->power;
3031 pwr_move += sds->this->cpu_power * 3031 pwr_move += sds->this->sgp->power *
3032 min(sds->this_load_per_task, sds->this_load + tmp); 3032 min(sds->this_load_per_task, sds->this_load + tmp);
3033 pwr_move /= SCHED_POWER_SCALE; 3033 pwr_move /= SCHED_POWER_SCALE;
3034 3034
@@ -3074,7 +3074,7 @@ static inline void calculate_imbalance(struct sd_lb_stats *sds, int this_cpu,
3074 3074
3075 load_above_capacity *= (SCHED_LOAD_SCALE * SCHED_POWER_SCALE); 3075 load_above_capacity *= (SCHED_LOAD_SCALE * SCHED_POWER_SCALE);
3076 3076
3077 load_above_capacity /= sds->busiest->cpu_power; 3077 load_above_capacity /= sds->busiest->sgp->power;
3078 } 3078 }
3079 3079
3080 /* 3080 /*
@@ -3090,8 +3090,8 @@ static inline void calculate_imbalance(struct sd_lb_stats *sds, int this_cpu,
3090 max_pull = min(sds->max_load - sds->avg_load, load_above_capacity); 3090 max_pull = min(sds->max_load - sds->avg_load, load_above_capacity);
3091 3091
3092 /* How much load to actually move to equalise the imbalance */ 3092 /* How much load to actually move to equalise the imbalance */
3093 *imbalance = min(max_pull * sds->busiest->cpu_power, 3093 *imbalance = min(max_pull * sds->busiest->sgp->power,
3094 (sds->avg_load - sds->this_load) * sds->this->cpu_power) 3094 (sds->avg_load - sds->this_load) * sds->this->sgp->power)
3095 / SCHED_POWER_SCALE; 3095 / SCHED_POWER_SCALE;
3096 3096
3097 /* 3097 /*
diff --git a/kernel/sched_features.h b/kernel/sched_features.h
index be40f7371ee1..1e7066d76c26 100644
--- a/kernel/sched_features.h
+++ b/kernel/sched_features.h
@@ -70,3 +70,5 @@ SCHED_FEAT(NONIRQ_POWER, 1)
70 * using the scheduler IPI. Reduces rq->lock contention/bounces. 70 * using the scheduler IPI. Reduces rq->lock contention/bounces.
71 */ 71 */
72SCHED_FEAT(TTWU_QUEUE, 1) 72SCHED_FEAT(TTWU_QUEUE, 1)
73
74SCHED_FEAT(FORCE_SD_OVERLAP, 0)
diff --git a/kernel/signal.c b/kernel/signal.c
index ff7678603328..415d85d6f6c6 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -1178,18 +1178,25 @@ struct sighand_struct *__lock_task_sighand(struct task_struct *tsk,
1178{ 1178{
1179 struct sighand_struct *sighand; 1179 struct sighand_struct *sighand;
1180 1180
1181 rcu_read_lock();
1182 for (;;) { 1181 for (;;) {
1182 local_irq_save(*flags);
1183 rcu_read_lock();
1183 sighand = rcu_dereference(tsk->sighand); 1184 sighand = rcu_dereference(tsk->sighand);
1184 if (unlikely(sighand == NULL)) 1185 if (unlikely(sighand == NULL)) {
1186 rcu_read_unlock();
1187 local_irq_restore(*flags);
1185 break; 1188 break;
1189 }
1186 1190
1187 spin_lock_irqsave(&sighand->siglock, *flags); 1191 spin_lock(&sighand->siglock);
1188 if (likely(sighand == tsk->sighand)) 1192 if (likely(sighand == tsk->sighand)) {
1193 rcu_read_unlock();
1189 break; 1194 break;
1190 spin_unlock_irqrestore(&sighand->siglock, *flags); 1195 }
1196 spin_unlock(&sighand->siglock);
1197 rcu_read_unlock();
1198 local_irq_restore(*flags);
1191 } 1199 }
1192 rcu_read_unlock();
1193 1200
1194 return sighand; 1201 return sighand;
1195} 1202}
diff --git a/kernel/softirq.c b/kernel/softirq.c
index 40cf63ddd4b3..fca82c32042b 100644
--- a/kernel/softirq.c
+++ b/kernel/softirq.c
@@ -315,16 +315,24 @@ static inline void invoke_softirq(void)
315{ 315{
316 if (!force_irqthreads) 316 if (!force_irqthreads)
317 __do_softirq(); 317 __do_softirq();
318 else 318 else {
319 __local_bh_disable((unsigned long)__builtin_return_address(0),
320 SOFTIRQ_OFFSET);
319 wakeup_softirqd(); 321 wakeup_softirqd();
322 __local_bh_enable(SOFTIRQ_OFFSET);
323 }
320} 324}
321#else 325#else
322static inline void invoke_softirq(void) 326static inline void invoke_softirq(void)
323{ 327{
324 if (!force_irqthreads) 328 if (!force_irqthreads)
325 do_softirq(); 329 do_softirq();
326 else 330 else {
331 __local_bh_disable((unsigned long)__builtin_return_address(0),
332 SOFTIRQ_OFFSET);
327 wakeup_softirqd(); 333 wakeup_softirqd();
334 __local_bh_enable(SOFTIRQ_OFFSET);
335 }
328} 336}
329#endif 337#endif
330 338
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 5ed24b94c5e6..d036e59d302b 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -2310,7 +2310,8 @@ static bool pgdat_balanced(pg_data_t *pgdat, unsigned long balanced_pages,
2310 for (i = 0; i <= classzone_idx; i++) 2310 for (i = 0; i <= classzone_idx; i++)
2311 present_pages += pgdat->node_zones[i].present_pages; 2311 present_pages += pgdat->node_zones[i].present_pages;
2312 2312
2313 return balanced_pages > (present_pages >> 2); 2313 /* A special case here: if zone has no page, we think it's balanced */
2314 return balanced_pages >= (present_pages >> 2);
2314} 2315}
2315 2316
2316/* is kswapd sleeping prematurely? */ 2317/* is kswapd sleeping prematurely? */
diff --git a/net/ceph/ceph_fs.c b/net/ceph/ceph_fs.c
index a3a3a31d3c37..41466ccb972a 100644
--- a/net/ceph/ceph_fs.c
+++ b/net/ceph/ceph_fs.c
@@ -36,16 +36,19 @@ int ceph_flags_to_mode(int flags)
36 if ((flags & O_DIRECTORY) == O_DIRECTORY) 36 if ((flags & O_DIRECTORY) == O_DIRECTORY)
37 return CEPH_FILE_MODE_PIN; 37 return CEPH_FILE_MODE_PIN;
38#endif 38#endif
39 if ((flags & O_APPEND) == O_APPEND)
40 flags |= O_WRONLY;
41 39
42 if ((flags & O_ACCMODE) == O_RDWR) 40 switch (flags & O_ACCMODE) {
43 mode = CEPH_FILE_MODE_RDWR; 41 case O_WRONLY:
44 else if ((flags & O_ACCMODE) == O_WRONLY)
45 mode = CEPH_FILE_MODE_WR; 42 mode = CEPH_FILE_MODE_WR;
46 else 43 break;
44 case O_RDONLY:
47 mode = CEPH_FILE_MODE_RD; 45 mode = CEPH_FILE_MODE_RD;
48 46 break;
47 case O_RDWR:
48 case O_ACCMODE: /* this is what the VFS does */
49 mode = CEPH_FILE_MODE_RDWR;
50 break;
51 }
49#ifdef O_LAZY 52#ifdef O_LAZY
50 if (flags & O_LAZY) 53 if (flags & O_LAZY)
51 mode |= CEPH_FILE_MODE_LAZY; 54 mode |= CEPH_FILE_MODE_LAZY;