aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Documentation/ABI/testing/sysfs-class-backlight-driver-lm353348
-rw-r--r--Documentation/ABI/testing/sysfs-class-led-driver-lm353365
-rw-r--r--Documentation/cgroups/memory.txt37
-rw-r--r--Documentation/cgroups/resource_counter.txt8
-rw-r--r--Documentation/devicetree/bindings/rtc/lpc32xx-rtc.txt15
-rw-r--r--Documentation/devicetree/bindings/rtc/spear-rtc.txt17
-rw-r--r--Documentation/filesystems/Locking2
-rw-r--r--Documentation/filesystems/proc.txt2
-rw-r--r--Documentation/filesystems/vfs.txt13
-rw-r--r--Documentation/leds/ledtrig-transient.txt152
-rw-r--r--Documentation/vm/transhuge.txt62
-rw-r--r--MAINTAINERS10
-rw-r--r--arch/arm/configs/imx_v4_v5_defconfig2
-rw-r--r--arch/arm/configs/imx_v6_v7_defconfig2
-rw-r--r--arch/cris/Kconfig1
-rw-r--r--arch/x86/include/asm/pgtable-3level.h50
-rw-r--r--arch/x86/kernel/e820.c53
-rw-r--r--arch/x86/kernel/mpparse.c10
-rw-r--r--arch/x86/kernel/setup.c16
-rw-r--r--arch/x86/mm/init.c16
-rw-r--r--arch/x86/mm/numa.c32
-rw-r--r--arch/x86/mm/numa_emulation.c4
-rw-r--r--arch/x86/mm/pat.c42
-rw-r--r--arch/x86/mm/srat.c5
-rw-r--r--drivers/base/node.c8
-rw-r--r--drivers/leds/Kconfig29
-rw-r--r--drivers/leds/Makefile3
-rw-r--r--drivers/leds/led-class.c21
-rw-r--r--drivers/leds/leds-da9052.c214
-rw-r--r--drivers/leds/leds-lm3530.c100
-rw-r--r--drivers/leds/leds-lm3533.c785
-rw-r--r--drivers/leds/leds-lp5521.c12
-rw-r--r--drivers/leds/leds-mc13783.c2
-rw-r--r--drivers/leds/leds-pca955x.c95
-rw-r--r--drivers/leds/ledtrig-backlight.c4
-rw-r--r--drivers/leds/ledtrig-gpio.c4
-rw-r--r--drivers/leds/ledtrig-heartbeat.c32
-rw-r--r--drivers/leds/ledtrig-timer.c54
-rw-r--r--drivers/leds/ledtrig-transient.c237
-rw-r--r--drivers/platform/x86/toshiba_acpi.c1
-rw-r--r--drivers/rtc/Kconfig42
-rw-r--r--drivers/rtc/Makefile2
-rw-r--r--drivers/rtc/rtc-ds1307.c20
-rw-r--r--drivers/rtc/rtc-ep93xx.c24
-rw-r--r--drivers/rtc/rtc-lpc32xx.c12
-rw-r--r--drivers/rtc/rtc-m41t93.c46
-rw-r--r--drivers/rtc/rtc-pcf8563.c44
-rw-r--r--drivers/rtc/rtc-pl031.c14
-rw-r--r--drivers/rtc/rtc-s3c.c2
-rw-r--r--drivers/rtc/rtc-spear.c10
-rw-r--r--drivers/rtc/rtc-tegra.c50
-rw-r--r--drivers/staging/android/ashmem.c8
-rw-r--r--drivers/video/backlight/Kconfig12
-rw-r--r--drivers/video/backlight/Makefile1
-rw-r--r--drivers/video/backlight/adp5520_bl.c4
-rw-r--r--drivers/video/backlight/adp8860_bl.c28
-rw-r--r--drivers/video/backlight/adp8870_bl.c28
-rw-r--r--drivers/video/backlight/ams369fg06.c16
-rw-r--r--drivers/video/backlight/apple_bl.c21
-rw-r--r--drivers/video/backlight/backlight.c11
-rw-r--r--drivers/video/backlight/corgi_lcd.c12
-rw-r--r--drivers/video/backlight/cr_bllcd.c9
-rw-r--r--drivers/video/backlight/da903x_bl.c1
-rw-r--r--drivers/video/backlight/generic_bl.c6
-rw-r--r--drivers/video/backlight/ili9320.c9
-rw-r--r--drivers/video/backlight/jornada720_bl.c14
-rw-r--r--drivers/video/backlight/jornada720_lcd.c8
-rw-r--r--drivers/video/backlight/l4f00242t03.c27
-rw-r--r--drivers/video/backlight/lcd.c20
-rw-r--r--drivers/video/backlight/ld9040.c15
-rw-r--r--drivers/video/backlight/lm3533_bl.c423
-rw-r--r--drivers/video/backlight/lms283gf05.c9
-rw-r--r--drivers/video/backlight/ltv350qv.c24
-rw-r--r--drivers/video/backlight/omap1_bl.c4
-rw-r--r--drivers/video/backlight/pcf50633-backlight.c1
-rw-r--r--drivers/video/backlight/progear_bl.c6
-rw-r--r--drivers/video/backlight/s6e63m0.c16
-rw-r--r--drivers/video/backlight/tdo24m.c21
-rw-r--r--drivers/video/backlight/tosa_bl.c11
-rw-r--r--drivers/video/backlight/tosa_lcd.c8
-rw-r--r--drivers/video/backlight/wm831x_bl.c1
-rw-r--r--drivers/video/fbmem.c21
-rw-r--r--drivers/video/omap2/displays/panel-acx565akm.c1
-rw-r--r--fs/bad_inode.c1
-rw-r--r--fs/proc/base.c5
-rw-r--r--fs/proc/task_mmu.c2
-rw-r--r--include/asm-generic/pgtable.h22
-rw-r--r--include/linux/Kbuild1
-rw-r--r--include/linux/apple_bl.h2
-rw-r--r--include/linux/bootmem.h3
-rw-r--r--include/linux/bug.h7
-rw-r--r--include/linux/compaction.h19
-rw-r--r--include/linux/fb.h4
-rw-r--r--include/linux/fs.h1
-rw-r--r--include/linux/huge_mm.h2
-rw-r--r--include/linux/kallsyms.h7
-rw-r--r--include/linux/kernel-page-flags.h4
-rw-r--r--include/linux/lcd.h10
-rw-r--r--include/linux/led-lm3530.h2
-rw-r--r--include/linux/leds.h2
-rw-r--r--include/linux/memcontrol.h69
-rw-r--r--include/linux/mempolicy.h9
-rw-r--r--include/linux/mm.h6
-rw-r--r--include/linux/mm_inline.h24
-rw-r--r--include/linux/mm_types.h11
-rw-r--r--include/linux/mmdebug.h2
-rw-r--r--include/linux/mmzone.h53
-rw-r--r--include/linux/oom.h5
-rw-r--r--include/linux/pagemap.h8
-rw-r--r--include/linux/res_counter.h5
-rw-r--r--include/linux/rmap.h2
-rw-r--r--include/linux/rtc.h3
-rw-r--r--include/linux/rtc/ds1307.h22
-rw-r--r--include/linux/swap.h56
-rw-r--r--include/net/sock.h22
-rw-r--r--include/trace/events/vmscan.h122
-rw-r--r--kernel/cgroup.c20
-rw-r--r--kernel/fork.c12
-rw-r--r--kernel/kallsyms.c32
-rw-r--r--kernel/res_counter.c10
-rw-r--r--lib/bitmap.c12
-rw-r--r--lib/list_debug.c3
-rw-r--r--lib/radix-tree.c15
-rw-r--r--lib/spinlock_debug.c2
-rw-r--r--lib/string_helpers.c8
-rw-r--r--lib/swiotlb.c8
-rw-r--r--lib/test-kstrtox.c4
-rw-r--r--lib/vsprintf.c14
-rw-r--r--mm/Kconfig10
-rw-r--r--mm/Makefile9
-rw-r--r--mm/bootmem.c134
-rw-r--r--mm/compaction.c151
-rw-r--r--mm/filemap.c39
-rw-r--r--mm/huge_memory.c29
-rw-r--r--mm/hugetlb.c32
-rw-r--r--mm/internal.h14
-rw-r--r--mm/madvise.c15
-rw-r--r--mm/memblock.c42
-rw-r--r--mm/memcontrol.c642
-rw-r--r--mm/memory-failure.c8
-rw-r--r--mm/memory.c20
-rw-r--r--mm/memory_hotplug.c14
-rw-r--r--mm/mempolicy.c36
-rw-r--r--mm/mmap.c53
-rw-r--r--mm/mmzone.c14
-rw-r--r--mm/nobootmem.c112
-rw-r--r--mm/oom_kill.c44
-rw-r--r--mm/page_alloc.c78
-rw-r--r--mm/readahead.c40
-rw-r--r--mm/rmap.c6
-rw-r--r--mm/shmem.c513
-rw-r--r--mm/sparse.c25
-rw-r--r--mm/swap.c129
-rw-r--r--mm/swapfile.c33
-rw-r--r--mm/thrash.c155
-rw-r--r--mm/truncate.c25
-rw-r--r--mm/vmalloc.c7
-rw-r--r--mm/vmscan.c738
-rw-r--r--mm/vmstat.c10
-rw-r--r--net/ipv4/tcp_memcontrol.c34
-rw-r--r--tools/vm/page-types.c50
161 files changed, 4693 insertions, 2548 deletions
diff --git a/Documentation/ABI/testing/sysfs-class-backlight-driver-lm3533 b/Documentation/ABI/testing/sysfs-class-backlight-driver-lm3533
new file mode 100644
index 000000000000..77cf7ac949af
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-class-backlight-driver-lm3533
@@ -0,0 +1,48 @@
1What: /sys/class/backlight/<backlight>/als_channel
2Date: May 2012
3KernelVersion: 3.5
4Contact: Johan Hovold <jhovold@gmail.com>
5Description:
6 Get the ALS output channel used as input in
7 ALS-current-control mode (0, 1), where
8
9 0 - out_current0 (backlight 0)
10 1 - out_current1 (backlight 1)
11
12What: /sys/class/backlight/<backlight>/als_en
13Date: May 2012
14KernelVersion: 3.5
15Contact: Johan Hovold <jhovold@gmail.com>
16Description:
17 Enable ALS-current-control mode (0, 1).
18
19What: /sys/class/backlight/<backlight>/id
20Date: April 2012
21KernelVersion: 3.5
22Contact: Johan Hovold <jhovold@gmail.com>
23Description:
24 Get the id of this backlight (0, 1).
25
26What: /sys/class/backlight/<backlight>/linear
27Date: April 2012
28KernelVersion: 3.5
29Contact: Johan Hovold <jhovold@gmail.com>
30Description:
31 Set the brightness-mapping mode (0, 1), where
32
33 0 - exponential mode
34 1 - linear mode
35
36What: /sys/class/backlight/<backlight>/pwm
37Date: April 2012
38KernelVersion: 3.5
39Contact: Johan Hovold <jhovold@gmail.com>
40Description:
41 Set the PWM-input control mask (5 bits), where
42
43 bit 5 - PWM-input enabled in Zone 4
44 bit 4 - PWM-input enabled in Zone 3
45 bit 3 - PWM-input enabled in Zone 2
46 bit 2 - PWM-input enabled in Zone 1
47 bit 1 - PWM-input enabled in Zone 0
48 bit 0 - PWM-input enabled
diff --git a/Documentation/ABI/testing/sysfs-class-led-driver-lm3533 b/Documentation/ABI/testing/sysfs-class-led-driver-lm3533
new file mode 100644
index 000000000000..620ebb3b9baa
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-class-led-driver-lm3533
@@ -0,0 +1,65 @@
1What: /sys/class/leds/<led>/als_channel
2Date: May 2012
3KernelVersion: 3.5
4Contact: Johan Hovold <jhovold@gmail.com>
5Description:
6 Set the ALS output channel to use as input in
7 ALS-current-control mode (1, 2), where
8
9 1 - out_current1
10 2 - out_current2
11
12What: /sys/class/leds/<led>/als_en
13Date: May 2012
14KernelVersion: 3.5
15Contact: Johan Hovold <jhovold@gmail.com>
16Description:
17 Enable ALS-current-control mode (0, 1).
18
19What: /sys/class/leds/<led>/falltime
20What: /sys/class/leds/<led>/risetime
21Date: April 2012
22KernelVersion: 3.5
23Contact: Johan Hovold <jhovold@gmail.com>
24Description:
25 Set the pattern generator fall and rise times (0..7), where
26
27 0 - 2048 us
28 1 - 262 ms
29 2 - 524 ms
30 3 - 1.049 s
31 4 - 2.097 s
32 5 - 4.194 s
33 6 - 8.389 s
34 7 - 16.78 s
35
36What: /sys/class/leds/<led>/id
37Date: April 2012
38KernelVersion: 3.5
39Contact: Johan Hovold <jhovold@gmail.com>
40Description:
41 Get the id of this led (0..3).
42
43What: /sys/class/leds/<led>/linear
44Date: April 2012
45KernelVersion: 3.5
46Contact: Johan Hovold <jhovold@gmail.com>
47Description:
48 Set the brightness-mapping mode (0, 1), where
49
50 0 - exponential mode
51 1 - linear mode
52
53What: /sys/class/leds/<led>/pwm
54Date: April 2012
55KernelVersion: 3.5
56Contact: Johan Hovold <jhovold@gmail.com>
57Description:
58 Set the PWM-input control mask (5 bits), where
59
60 bit 5 - PWM-input enabled in Zone 4
61 bit 4 - PWM-input enabled in Zone 3
62 bit 3 - PWM-input enabled in Zone 2
63 bit 2 - PWM-input enabled in Zone 1
64 bit 1 - PWM-input enabled in Zone 0
65 bit 0 - PWM-input enabled
diff --git a/Documentation/cgroups/memory.txt b/Documentation/cgroups/memory.txt
index 9b1067afb224..dd88540bb995 100644
--- a/Documentation/cgroups/memory.txt
+++ b/Documentation/cgroups/memory.txt
@@ -184,12 +184,14 @@ behind this approach is that a cgroup that aggressively uses a shared
184page will eventually get charged for it (once it is uncharged from 184page will eventually get charged for it (once it is uncharged from
185the cgroup that brought it in -- this will happen on memory pressure). 185the cgroup that brought it in -- this will happen on memory pressure).
186 186
187But see section 8.2: when moving a task to another cgroup, its pages may
188be recharged to the new cgroup, if move_charge_at_immigrate has been chosen.
189
187Exception: If CONFIG_CGROUP_CGROUP_MEM_RES_CTLR_SWAP is not used. 190Exception: If CONFIG_CGROUP_CGROUP_MEM_RES_CTLR_SWAP is not used.
188When you do swapoff and make swapped-out pages of shmem(tmpfs) to 191When you do swapoff and make swapped-out pages of shmem(tmpfs) to
189be backed into memory in force, charges for pages are accounted against the 192be backed into memory in force, charges for pages are accounted against the
190caller of swapoff rather than the users of shmem. 193caller of swapoff rather than the users of shmem.
191 194
192
1932.4 Swap Extension (CONFIG_CGROUP_MEM_RES_CTLR_SWAP) 1952.4 Swap Extension (CONFIG_CGROUP_MEM_RES_CTLR_SWAP)
194 196
195Swap Extension allows you to record charge for swap. A swapped-in page is 197Swap Extension allows you to record charge for swap. A swapped-in page is
@@ -374,14 +376,15 @@ cgroup might have some charge associated with it, even though all
374tasks have migrated away from it. (because we charge against pages, not 376tasks have migrated away from it. (because we charge against pages, not
375against tasks.) 377against tasks.)
376 378
377Such charges are freed or moved to their parent. At moving, both of RSS 379We move the stats to root (if use_hierarchy==0) or parent (if
378and CACHES are moved to parent. 380use_hierarchy==1), and no change on the charge except uncharging
379rmdir() may return -EBUSY if freeing/moving fails. See 5.1 also. 381from the child.
380 382
381Charges recorded in swap information is not updated at removal of cgroup. 383Charges recorded in swap information is not updated at removal of cgroup.
382Recorded information is discarded and a cgroup which uses swap (swapcache) 384Recorded information is discarded and a cgroup which uses swap (swapcache)
383will be charged as a new owner of it. 385will be charged as a new owner of it.
384 386
387About use_hierarchy, see Section 6.
385 388
3865. Misc. interfaces. 3895. Misc. interfaces.
387 390
@@ -394,13 +397,15 @@ will be charged as a new owner of it.
394 397
395 Almost all pages tracked by this memory cgroup will be unmapped and freed. 398 Almost all pages tracked by this memory cgroup will be unmapped and freed.
396 Some pages cannot be freed because they are locked or in-use. Such pages are 399 Some pages cannot be freed because they are locked or in-use. Such pages are
397 moved to parent and this cgroup will be empty. This may return -EBUSY if 400 moved to parent(if use_hierarchy==1) or root (if use_hierarchy==0) and this
398 VM is too busy to free/move all pages immediately. 401 cgroup will be empty.
399 402
400 Typical use case of this interface is that calling this before rmdir(). 403 Typical use case of this interface is that calling this before rmdir().
401 Because rmdir() moves all pages to parent, some out-of-use page caches can be 404 Because rmdir() moves all pages to parent, some out-of-use page caches can be
402 moved to the parent. If you want to avoid that, force_empty will be useful. 405 moved to the parent. If you want to avoid that, force_empty will be useful.
403 406
407 About use_hierarchy, see Section 6.
408
4045.2 stat file 4095.2 stat file
405 410
406memory.stat file includes following statistics 411memory.stat file includes following statistics
@@ -430,17 +435,10 @@ hierarchical_memory_limit - # of bytes of memory limit with regard to hierarchy
430hierarchical_memsw_limit - # of bytes of memory+swap limit with regard to 435hierarchical_memsw_limit - # of bytes of memory+swap limit with regard to
431 hierarchy under which memory cgroup is. 436 hierarchy under which memory cgroup is.
432 437
433total_cache - sum of all children's "cache" 438total_<counter> - # hierarchical version of <counter>, which in
434total_rss - sum of all children's "rss" 439 addition to the cgroup's own value includes the
435total_mapped_file - sum of all children's "cache" 440 sum of all hierarchical children's values of
436total_pgpgin - sum of all children's "pgpgin" 441 <counter>, i.e. total_cache
437total_pgpgout - sum of all children's "pgpgout"
438total_swap - sum of all children's "swap"
439total_inactive_anon - sum of all children's "inactive_anon"
440total_active_anon - sum of all children's "active_anon"
441total_inactive_file - sum of all children's "inactive_file"
442total_active_file - sum of all children's "active_file"
443total_unevictable - sum of all children's "unevictable"
444 442
445# The following additional stats are dependent on CONFIG_DEBUG_VM. 443# The following additional stats are dependent on CONFIG_DEBUG_VM.
446 444
@@ -622,8 +620,7 @@ memory cgroup.
622 bit | what type of charges would be moved ? 620 bit | what type of charges would be moved ?
623 -----+------------------------------------------------------------------------ 621 -----+------------------------------------------------------------------------
624 0 | A charge of an anonymous page(or swap of it) used by the target task. 622 0 | A charge of an anonymous page(or swap of it) used by the target task.
625 | Those pages and swaps must be used only by the target task. You must 623 | You must enable Swap Extension(see 2.4) to enable move of swap charges.
626 | enable Swap Extension(see 2.4) to enable move of swap charges.
627 -----+------------------------------------------------------------------------ 624 -----+------------------------------------------------------------------------
628 1 | A charge of file pages(normal file, tmpfs file(e.g. ipc shared memory) 625 1 | A charge of file pages(normal file, tmpfs file(e.g. ipc shared memory)
629 | and swaps of tmpfs file) mmapped by the target task. Unlike the case of 626 | and swaps of tmpfs file) mmapped by the target task. Unlike the case of
@@ -636,8 +633,6 @@ memory cgroup.
636 633
6378.3 TODO 6348.3 TODO
638 635
639- Implement madvise(2) to let users decide the vma to be moved or not to be
640 moved.
641- All of moving charge operations are done under cgroup_mutex. It's not good 636- All of moving charge operations are done under cgroup_mutex. It's not good
642 behavior to hold the mutex too long, so we may need some trick. 637 behavior to hold the mutex too long, so we may need some trick.
643 638
diff --git a/Documentation/cgroups/resource_counter.txt b/Documentation/cgroups/resource_counter.txt
index f3c4ec3626a2..0c4a344e78fa 100644
--- a/Documentation/cgroups/resource_counter.txt
+++ b/Documentation/cgroups/resource_counter.txt
@@ -92,6 +92,14 @@ to work with it.
92 92
93 The _locked routines imply that the res_counter->lock is taken. 93 The _locked routines imply that the res_counter->lock is taken.
94 94
95 f. void res_counter_uncharge_until
96 (struct res_counter *rc, struct res_counter *top,
97 unsinged long val)
98
99 Almost same as res_cunter_uncharge() but propagation of uncharge
100 stops when rc == top. This is useful when kill a res_coutner in
101 child cgroup.
102
95 2.1 Other accounting routines 103 2.1 Other accounting routines
96 104
97 There are more routines that may help you with common needs, like 105 There are more routines that may help you with common needs, like
diff --git a/Documentation/devicetree/bindings/rtc/lpc32xx-rtc.txt b/Documentation/devicetree/bindings/rtc/lpc32xx-rtc.txt
new file mode 100644
index 000000000000..a87a1e9bc060
--- /dev/null
+++ b/Documentation/devicetree/bindings/rtc/lpc32xx-rtc.txt
@@ -0,0 +1,15 @@
1* NXP LPC32xx SoC Real Time Clock controller
2
3Required properties:
4- compatible: must be "nxp,lpc3220-rtc"
5- reg: physical base address of the controller and length of memory mapped
6 region.
7- interrupts: The RTC interrupt
8
9Example:
10
11 rtc@40024000 {
12 compatible = "nxp,lpc3220-rtc";
13 reg = <0x40024000 0x1000>;
14 interrupts = <52 0>;
15 };
diff --git a/Documentation/devicetree/bindings/rtc/spear-rtc.txt b/Documentation/devicetree/bindings/rtc/spear-rtc.txt
new file mode 100644
index 000000000000..ca67ac62108e
--- /dev/null
+++ b/Documentation/devicetree/bindings/rtc/spear-rtc.txt
@@ -0,0 +1,17 @@
1* SPEAr RTC
2
3Required properties:
4- compatible : "st,spear600-rtc"
5- reg : Address range of the rtc registers
6- interrupt-parent: Should be the phandle for the interrupt controller
7 that services interrupts for this device
8- interrupt: Should contain the rtc interrupt number
9
10Example:
11
12 rtc@fc000000 {
13 compatible = "st,spear600-rtc";
14 reg = <0xfc000000 0x1000>;
15 interrupt-parent = <&vic1>;
16 interrupts = <12>;
17 };
diff --git a/Documentation/filesystems/Locking b/Documentation/filesystems/Locking
index 4fca82e5276e..d449e632e6a0 100644
--- a/Documentation/filesystems/Locking
+++ b/Documentation/filesystems/Locking
@@ -60,7 +60,6 @@ ata *);
60 ssize_t (*getxattr) (struct dentry *, const char *, void *, size_t); 60 ssize_t (*getxattr) (struct dentry *, const char *, void *, size_t);
61 ssize_t (*listxattr) (struct dentry *, char *, size_t); 61 ssize_t (*listxattr) (struct dentry *, char *, size_t);
62 int (*removexattr) (struct dentry *, const char *); 62 int (*removexattr) (struct dentry *, const char *);
63 void (*truncate_range)(struct inode *, loff_t, loff_t);
64 int (*fiemap)(struct inode *, struct fiemap_extent_info *, u64 start, u64 len); 63 int (*fiemap)(struct inode *, struct fiemap_extent_info *, u64 start, u64 len);
65 64
66locking rules: 65locking rules:
@@ -87,7 +86,6 @@ setxattr: yes
87getxattr: no 86getxattr: no
88listxattr: no 87listxattr: no
89removexattr: yes 88removexattr: yes
90truncate_range: yes
91fiemap: no 89fiemap: no
92 Additionally, ->rmdir(), ->unlink() and ->rename() have ->i_mutex on 90 Additionally, ->rmdir(), ->unlink() and ->rename() have ->i_mutex on
93victim. 91victim.
diff --git a/Documentation/filesystems/proc.txt b/Documentation/filesystems/proc.txt
index ef088e55ab2e..912af6ce5626 100644
--- a/Documentation/filesystems/proc.txt
+++ b/Documentation/filesystems/proc.txt
@@ -743,6 +743,7 @@ Committed_AS: 100056 kB
743VmallocTotal: 112216 kB 743VmallocTotal: 112216 kB
744VmallocUsed: 428 kB 744VmallocUsed: 428 kB
745VmallocChunk: 111088 kB 745VmallocChunk: 111088 kB
746AnonHugePages: 49152 kB
746 747
747 MemTotal: Total usable ram (i.e. physical ram minus a few reserved 748 MemTotal: Total usable ram (i.e. physical ram minus a few reserved
748 bits and the kernel binary code) 749 bits and the kernel binary code)
@@ -776,6 +777,7 @@ VmallocChunk: 111088 kB
776 Dirty: Memory which is waiting to get written back to the disk 777 Dirty: Memory which is waiting to get written back to the disk
777 Writeback: Memory which is actively being written back to the disk 778 Writeback: Memory which is actively being written back to the disk
778 AnonPages: Non-file backed pages mapped into userspace page tables 779 AnonPages: Non-file backed pages mapped into userspace page tables
780AnonHugePages: Non-file backed huge pages mapped into userspace page tables
779 Mapped: files which have been mmaped, such as libraries 781 Mapped: files which have been mmaped, such as libraries
780 Slab: in-kernel data structures cache 782 Slab: in-kernel data structures cache
781SReclaimable: Part of Slab, that might be reclaimed, such as caches 783SReclaimable: Part of Slab, that might be reclaimed, such as caches
diff --git a/Documentation/filesystems/vfs.txt b/Documentation/filesystems/vfs.txt
index 0d0492028082..ef19f91a0f12 100644
--- a/Documentation/filesystems/vfs.txt
+++ b/Documentation/filesystems/vfs.txt
@@ -363,7 +363,6 @@ struct inode_operations {
363 ssize_t (*getxattr) (struct dentry *, const char *, void *, size_t); 363 ssize_t (*getxattr) (struct dentry *, const char *, void *, size_t);
364 ssize_t (*listxattr) (struct dentry *, char *, size_t); 364 ssize_t (*listxattr) (struct dentry *, char *, size_t);
365 int (*removexattr) (struct dentry *, const char *); 365 int (*removexattr) (struct dentry *, const char *);
366 void (*truncate_range)(struct inode *, loff_t, loff_t);
367}; 366};
368 367
369Again, all methods are called without any locks being held, unless 368Again, all methods are called without any locks being held, unless
@@ -472,9 +471,6 @@ otherwise noted.
472 removexattr: called by the VFS to remove an extended attribute from 471 removexattr: called by the VFS to remove an extended attribute from
473 a file. This method is called by removexattr(2) system call. 472 a file. This method is called by removexattr(2) system call.
474 473
475 truncate_range: a method provided by the underlying filesystem to truncate a
476 range of blocks , i.e. punch a hole somewhere in a file.
477
478 474
479The Address Space Object 475The Address Space Object
480======================== 476========================
@@ -760,7 +756,7 @@ struct file_operations
760---------------------- 756----------------------
761 757
762This describes how the VFS can manipulate an open file. As of kernel 758This describes how the VFS can manipulate an open file. As of kernel
7632.6.22, the following members are defined: 7593.5, the following members are defined:
764 760
765struct file_operations { 761struct file_operations {
766 struct module *owner; 762 struct module *owner;
@@ -790,6 +786,8 @@ struct file_operations {
790 int (*flock) (struct file *, int, struct file_lock *); 786 int (*flock) (struct file *, int, struct file_lock *);
791 ssize_t (*splice_write)(struct pipe_inode_info *, struct file *, size_t, unsigned int); 787 ssize_t (*splice_write)(struct pipe_inode_info *, struct file *, size_t, unsigned int);
792 ssize_t (*splice_read)(struct file *, struct pipe_inode_info *, size_t, unsigned int); 788 ssize_t (*splice_read)(struct file *, struct pipe_inode_info *, size_t, unsigned int);
789 int (*setlease)(struct file *, long arg, struct file_lock **);
790 long (*fallocate)(struct file *, int mode, loff_t offset, loff_t len);
793}; 791};
794 792
795Again, all methods are called without any locks being held, unless 793Again, all methods are called without any locks being held, unless
@@ -858,6 +856,11 @@ otherwise noted.
858 splice_read: called by the VFS to splice data from file to a pipe. This 856 splice_read: called by the VFS to splice data from file to a pipe. This
859 method is used by the splice(2) system call 857 method is used by the splice(2) system call
860 858
859 setlease: called by the VFS to set or release a file lock lease.
860 setlease has the file_lock_lock held and must not sleep.
861
862 fallocate: called by the VFS to preallocate blocks or punch a hole.
863
861Note that the file operations are implemented by the specific 864Note that the file operations are implemented by the specific
862filesystem in which the inode resides. When opening a device node 865filesystem in which the inode resides. When opening a device node
863(character or block special) most filesystems will call special 866(character or block special) most filesystems will call special
diff --git a/Documentation/leds/ledtrig-transient.txt b/Documentation/leds/ledtrig-transient.txt
new file mode 100644
index 000000000000..3bd38b487df1
--- /dev/null
+++ b/Documentation/leds/ledtrig-transient.txt
@@ -0,0 +1,152 @@
1LED Transient Trigger
2=====================
3
4The leds timer trigger does not currently have an interface to activate
5a one shot timer. The current support allows for setting two timers, one for
6specifying how long a state to be on, and the second for how long the state
7to be off. The delay_on value specifies the time period an LED should stay
8in on state, followed by a delay_off value that specifies how long the LED
9should stay in off state. The on and off cycle repeats until the trigger
10gets deactivated. There is no provision for one time activation to implement
11features that require an on or off state to be held just once and then stay in
12the original state forever.
13
14Without one shot timer interface, user space can still use timer trigger to
15set a timer to hold a state, however when user space application crashes or
16goes away without deactivating the timer, the hardware will be left in that
17state permanently.
18
19As a specific example of this use-case, let's look at vibrate feature on
20phones. Vibrate function on phones is implemented using PWM pins on SoC or
21PMIC. There is a need to activate one shot timer to control the vibrate
22feature, to prevent user space crashes leaving the phone in vibrate mode
23permanently causing the battery to drain.
24
25Transient trigger addresses the need for one shot timer activation. The
26transient trigger can be enabled and disabled just like the other leds
27triggers.
28
29When an led class device driver registers itself, it can specify all leds
30triggers it supports and a default trigger. During registration, activation
31routine for the default trigger gets called. During registration of an led
32class device, the LED state does not change.
33
34When the driver unregisters, deactivation routine for the currently active
35trigger will be called, and LED state is changed to LED_OFF.
36
37Driver suspend changes the LED state to LED_OFF and resume doesn't change
38the state. Please note that there is no explicit interaction between the
39suspend and resume actions and the currently enabled trigger. LED state
40changes are suspended while the driver is in suspend state. Any timers
41that are active at the time driver gets suspended, continue to run, without
42being able to actually change the LED state. Once driver is resumed, triggers
43start functioning again.
44
45LED state changes are controlled using brightness which is a common led
46class device property. When brightness is set to 0 from user space via
47echo 0 > brightness, it will result in deactivating the current trigger.
48
49Transient trigger uses standard register and unregister interfaces. During
50trigger registration, for each led class device that specifies this trigger
51as its default trigger, trigger activation routine will get called. During
52registration, the LED state does not change, unless there is another trigger
53active, in which case LED state changes to LED_OFF.
54
55During trigger unregistration, LED state gets changed to LED_OFF.
56
57Transient trigger activation routine doesn't change the LED state. It
58creates its properties and does its initialization. Transient trigger
59deactivation routine, will cancel any timer that is active before it cleans
60up and removes the properties it created. It will restore the LED state to
61non-transient state. When driver gets suspended, irrespective of the transient
62state, the LED state changes to LED_OFF.
63
64Transient trigger can be enabled and disabled from user space on led class
65devices, that support this trigger as shown below:
66
67echo transient > trigger
68echo none > trigger
69
70NOTE: Add a new property trigger state to control the state.
71
72This trigger exports three properties, activate, state, and duration. When
73transient trigger is activated these properties are set to default values.
74
75- duration allows setting timer value in msecs. The initial value is 0.
76- activate allows activating and deactivating the timer specified by
77 duration as needed. The initial and default value is 0. This will allow
78 duration to be set after trigger activation.
79- state allows user to specify a transient state to be held for the specified
80 duration.
81
82 activate - one shot timer activate mechanism.
83 1 when activated, 0 when deactivated.
84 default value is zero when transient trigger is enabled,
85 to allow duration to be set.
86
87 activate state indicates a timer with a value of specified
88 duration running.
89 deactivated state indicates that there is no active timer
90 running.
91
92 duration - one shot timer value. When activate is set, duration value
93 is used to start a timer that runs once. This value doesn't
94 get changed by the trigger unless user does a set via
95 echo new_value > duration
96
97 state - transient state to be held. It has two values 0 or 1. 0 maps
98 to LED_OFF and 1 maps to LED_FULL. The specified state is
99 held for the duration of the one shot timer and then the
100 state gets changed to the non-transient state which is the
101 inverse of transient state.
102 If state = LED_FULL, when the timer runs out the state will
103 go back to LED_OFF.
104 If state = LED_OFF, when the timer runs out the state will
105 go back to LED_FULL.
106 Please note that current LED state is not checked prior to
107 changing the state to the specified state.
108 Driver could map these values to inverted depending on the
109 default states it defines for the LED in its brightness_set()
110 interface which is called from the led brightness_set()
111 interfaces to control the LED state.
112
113When timer expires activate goes back to deactivated state, duration is left
114at the set value to be used when activate is set at a future time. This will
115allow user app to set the time once and activate it to run it once for the
116specified value as needed. When timer expires, state is restored to the
117non-transient state which is the inverse of the transient state.
118
119 echo 1 > activate - starts timer = duration when duration is not 0.
120 echo 0 > activate - cancels currently running timer.
121 echo n > duration - stores timer value to be used upon next
122 activate. Currently active timer if
123 any, continues to run for the specified time.
124 echo 0 > duration - stores timer value to be used upon next
125 activate. Currently active timer if any,
126 continues to run for the specified time.
127 echo 1 > state - stores desired transient state LED_FULL to be
128 held for the specified duration.
129 echo 0 > state - stores desired transient state LED_OFF to be
130 held for the specified duration.
131
132What is not supported:
133======================
134- Timer activation is one shot and extending and/or shortening the timer
135 is not supported.
136
137Example use-case 1:
138 echo transient > trigger
139 echo n > duration
140 echo 1 > state
141repeat the following step as needed:
142 echo 1 > activate - start timer = duration to run once
143 echo 1 > activate - start timer = duration to run once
144 echo none > trigger
145
146This trigger is intended to be used for for the following example use cases:
147 - Control of vibrate (phones, tablets etc.) hardware by user space app.
148 - Use of LED by user space app as activity indicator.
149 - Use of LED by user space app as a kind of watchdog indicator -- as
150 long as the app is alive, it can keep the LED illuminated, if it dies
151 the LED will be extinguished automatically.
152 - Use by any user space app that needs a transient GPIO output.
diff --git a/Documentation/vm/transhuge.txt b/Documentation/vm/transhuge.txt
index 29bdf62aac09..f734bb2a78dc 100644
--- a/Documentation/vm/transhuge.txt
+++ b/Documentation/vm/transhuge.txt
@@ -166,6 +166,68 @@ behavior. So to make them effective you need to restart any
166application that could have been using hugepages. This also applies to 166application that could have been using hugepages. This also applies to
167the regions registered in khugepaged. 167the regions registered in khugepaged.
168 168
169== Monitoring usage ==
170
171The number of transparent huge pages currently used by the system is
172available by reading the AnonHugePages field in /proc/meminfo. To
173identify what applications are using transparent huge pages, it is
174necessary to read /proc/PID/smaps and count the AnonHugePages fields
175for each mapping. Note that reading the smaps file is expensive and
176reading it frequently will incur overhead.
177
178There are a number of counters in /proc/vmstat that may be used to
179monitor how successfully the system is providing huge pages for use.
180
181thp_fault_alloc is incremented every time a huge page is successfully
182 allocated to handle a page fault. This applies to both the
183 first time a page is faulted and for COW faults.
184
185thp_collapse_alloc is incremented by khugepaged when it has found
186 a range of pages to collapse into one huge page and has
187 successfully allocated a new huge page to store the data.
188
189thp_fault_fallback is incremented if a page fault fails to allocate
190 a huge page and instead falls back to using small pages.
191
192thp_collapse_alloc_failed is incremented if khugepaged found a range
193 of pages that should be collapsed into one huge page but failed
194 the allocation.
195
196thp_split is incremented every time a huge page is split into base
197 pages. This can happen for a variety of reasons but a common
198 reason is that a huge page is old and is being reclaimed.
199
200As the system ages, allocating huge pages may be expensive as the
201system uses memory compaction to copy data around memory to free a
202huge page for use. There are some counters in /proc/vmstat to help
203monitor this overhead.
204
205compact_stall is incremented every time a process stalls to run
206 memory compaction so that a huge page is free for use.
207
208compact_success is incremented if the system compacted memory and
209 freed a huge page for use.
210
211compact_fail is incremented if the system tries to compact memory
212 but failed.
213
214compact_pages_moved is incremented each time a page is moved. If
215 this value is increasing rapidly, it implies that the system
216 is copying a lot of data to satisfy the huge page allocation.
217 It is possible that the cost of copying exceeds any savings
218 from reduced TLB misses.
219
220compact_pagemigrate_failed is incremented when the underlying mechanism
221 for moving a page failed.
222
223compact_blocks_moved is incremented each time memory compaction examines
224 a huge page aligned range of pages.
225
226It is possible to establish how long the stalls were using the function
227tracer to record how long was spent in __alloc_pages_nodemask and
228using the mm_page_alloc tracepoint to identify which allocations were
229for huge pages.
230
169== get_user_pages and follow_page == 231== get_user_pages and follow_page ==
170 232
171get_user_pages and follow_page if run on a hugepage, will return the 233get_user_pages and follow_page if run on a hugepage, will return the
diff --git a/MAINTAINERS b/MAINTAINERS
index cc710d2ef009..a246490c95eb 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -3232,10 +3232,8 @@ F: include/linux/clockchips.h
3232F: include/linux/hrtimer.h 3232F: include/linux/hrtimer.h
3233 3233
3234HIGH-SPEED SCC DRIVER FOR AX.25 3234HIGH-SPEED SCC DRIVER FOR AX.25
3235M: Klaus Kudielka <klaus.kudielka@ieee.org>
3236L: linux-hams@vger.kernel.org 3235L: linux-hams@vger.kernel.org
3237W: http://www.nt.tuwien.ac.at/~kkudielk/Linux/ 3236S: Orphan
3238S: Maintained
3239F: drivers/net/hamradio/dmascc.c 3237F: drivers/net/hamradio/dmascc.c
3240F: drivers/net/hamradio/scc.c 3238F: drivers/net/hamradio/scc.c
3241 3239
@@ -4511,12 +4509,6 @@ L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
4511S: Maintained 4509S: Maintained
4512F: drivers/mmc/host/imxmmc.* 4510F: drivers/mmc/host/imxmmc.*
4513 4511
4514MOUSE AND MISC DEVICES [GENERAL]
4515M: Alessandro Rubini <rubini@ipvvis.unipv.it>
4516S: Maintained
4517F: drivers/input/mouse/
4518F: include/linux/gpio_mouse.h
4519
4520MOXA SMARTIO/INDUSTIO/INTELLIO SERIAL CARD 4512MOXA SMARTIO/INDUSTIO/INTELLIO SERIAL CARD
4521M: Jiri Slaby <jirislaby@gmail.com> 4513M: Jiri Slaby <jirislaby@gmail.com>
4522S: Maintained 4514S: Maintained
diff --git a/arch/arm/configs/imx_v4_v5_defconfig b/arch/arm/configs/imx_v4_v5_defconfig
index ebda45bd5763..e05a2f1665a7 100644
--- a/arch/arm/configs/imx_v4_v5_defconfig
+++ b/arch/arm/configs/imx_v4_v5_defconfig
@@ -173,7 +173,7 @@ CONFIG_LEDS_TRIGGER_DEFAULT_ON=y
173CONFIG_RTC_CLASS=y 173CONFIG_RTC_CLASS=y
174CONFIG_RTC_DRV_PCF8563=y 174CONFIG_RTC_DRV_PCF8563=y
175CONFIG_RTC_DRV_IMXDI=y 175CONFIG_RTC_DRV_IMXDI=y
176CONFIG_RTC_MXC=y 176CONFIG_RTC_DRV_MXC=y
177CONFIG_DMADEVICES=y 177CONFIG_DMADEVICES=y
178CONFIG_IMX_SDMA=y 178CONFIG_IMX_SDMA=y
179CONFIG_IMX_DMA=y 179CONFIG_IMX_DMA=y
diff --git a/arch/arm/configs/imx_v6_v7_defconfig b/arch/arm/configs/imx_v6_v7_defconfig
index 12617f7296e6..b1d3675df72c 100644
--- a/arch/arm/configs/imx_v6_v7_defconfig
+++ b/arch/arm/configs/imx_v6_v7_defconfig
@@ -178,7 +178,7 @@ CONFIG_NEW_LEDS=y
178CONFIG_LEDS_CLASS=y 178CONFIG_LEDS_CLASS=y
179CONFIG_RTC_CLASS=y 179CONFIG_RTC_CLASS=y
180CONFIG_RTC_INTF_DEV_UIE_EMUL=y 180CONFIG_RTC_INTF_DEV_UIE_EMUL=y
181CONFIG_RTC_MXC=y 181CONFIG_RTC_DRV_MXC=y
182CONFIG_DMADEVICES=y 182CONFIG_DMADEVICES=y
183CONFIG_IMX_SDMA=y 183CONFIG_IMX_SDMA=y
184CONFIG_EXT2_FS=y 184CONFIG_EXT2_FS=y
diff --git a/arch/cris/Kconfig b/arch/cris/Kconfig
index 22d34d64cc81..bb344650a14f 100644
--- a/arch/cris/Kconfig
+++ b/arch/cris/Kconfig
@@ -40,6 +40,7 @@ config CRIS
40 bool 40 bool
41 default y 41 default y
42 select HAVE_IDE 42 select HAVE_IDE
43 select GENERIC_ATOMIC64
43 select HAVE_GENERIC_HARDIRQS 44 select HAVE_GENERIC_HARDIRQS
44 select GENERIC_IRQ_SHOW 45 select GENERIC_IRQ_SHOW
45 select GENERIC_IOMAP 46 select GENERIC_IOMAP
diff --git a/arch/x86/include/asm/pgtable-3level.h b/arch/x86/include/asm/pgtable-3level.h
index effff47a3c82..43876f16caf1 100644
--- a/arch/x86/include/asm/pgtable-3level.h
+++ b/arch/x86/include/asm/pgtable-3level.h
@@ -31,6 +31,56 @@ static inline void native_set_pte(pte_t *ptep, pte_t pte)
31 ptep->pte_low = pte.pte_low; 31 ptep->pte_low = pte.pte_low;
32} 32}
33 33
34#define pmd_read_atomic pmd_read_atomic
35/*
36 * pte_offset_map_lock on 32bit PAE kernels was reading the pmd_t with
37 * a "*pmdp" dereference done by gcc. Problem is, in certain places
38 * where pte_offset_map_lock is called, concurrent page faults are
39 * allowed, if the mmap_sem is hold for reading. An example is mincore
40 * vs page faults vs MADV_DONTNEED. On the page fault side
41 * pmd_populate rightfully does a set_64bit, but if we're reading the
42 * pmd_t with a "*pmdp" on the mincore side, a SMP race can happen
43 * because gcc will not read the 64bit of the pmd atomically. To fix
44 * this all places running pmd_offset_map_lock() while holding the
45 * mmap_sem in read mode, shall read the pmdp pointer using this
46 * function to know if the pmd is null nor not, and in turn to know if
47 * they can run pmd_offset_map_lock or pmd_trans_huge or other pmd
48 * operations.
49 *
50 * Without THP if the mmap_sem is hold for reading, the
51 * pmd can only transition from null to not null while pmd_read_atomic runs.
52 * So there's no need of literally reading it atomically.
53 *
54 * With THP if the mmap_sem is hold for reading, the pmd can become
55 * THP or null or point to a pte (and in turn become "stable") at any
56 * time under pmd_read_atomic, so it's mandatory to read it atomically
57 * with cmpxchg8b.
58 */
59#ifndef CONFIG_TRANSPARENT_HUGEPAGE
60static inline pmd_t pmd_read_atomic(pmd_t *pmdp)
61{
62 pmdval_t ret;
63 u32 *tmp = (u32 *)pmdp;
64
65 ret = (pmdval_t) (*tmp);
66 if (ret) {
67 /*
68 * If the low part is null, we must not read the high part
69 * or we can end up with a partial pmd.
70 */
71 smp_rmb();
72 ret |= ((pmdval_t)*(tmp + 1)) << 32;
73 }
74
75 return (pmd_t) { ret };
76}
77#else /* CONFIG_TRANSPARENT_HUGEPAGE */
78static inline pmd_t pmd_read_atomic(pmd_t *pmdp)
79{
80 return (pmd_t) { atomic64_read((atomic64_t *)pmdp) };
81}
82#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
83
34static inline void native_set_pte_atomic(pte_t *ptep, pte_t pte) 84static inline void native_set_pte_atomic(pte_t *ptep, pte_t pte)
35{ 85{
36 set_64bit((unsigned long long *)(ptep), native_pte_val(pte)); 86 set_64bit((unsigned long long *)(ptep), native_pte_val(pte));
diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c
index 62d61e9976eb..41857970517f 100644
--- a/arch/x86/kernel/e820.c
+++ b/arch/x86/kernel/e820.c
@@ -113,7 +113,9 @@ static void __init __e820_add_region(struct e820map *e820x, u64 start, u64 size,
113 int x = e820x->nr_map; 113 int x = e820x->nr_map;
114 114
115 if (x >= ARRAY_SIZE(e820x->map)) { 115 if (x >= ARRAY_SIZE(e820x->map)) {
116 printk(KERN_ERR "Ooops! Too many entries in the memory map!\n"); 116 printk(KERN_ERR "e820: too many entries; ignoring [mem %#010llx-%#010llx]\n",
117 (unsigned long long) start,
118 (unsigned long long) (start + size - 1));
117 return; 119 return;
118 } 120 }
119 121
@@ -133,19 +135,19 @@ static void __init e820_print_type(u32 type)
133 switch (type) { 135 switch (type) {
134 case E820_RAM: 136 case E820_RAM:
135 case E820_RESERVED_KERN: 137 case E820_RESERVED_KERN:
136 printk(KERN_CONT "(usable)"); 138 printk(KERN_CONT "usable");
137 break; 139 break;
138 case E820_RESERVED: 140 case E820_RESERVED:
139 printk(KERN_CONT "(reserved)"); 141 printk(KERN_CONT "reserved");
140 break; 142 break;
141 case E820_ACPI: 143 case E820_ACPI:
142 printk(KERN_CONT "(ACPI data)"); 144 printk(KERN_CONT "ACPI data");
143 break; 145 break;
144 case E820_NVS: 146 case E820_NVS:
145 printk(KERN_CONT "(ACPI NVS)"); 147 printk(KERN_CONT "ACPI NVS");
146 break; 148 break;
147 case E820_UNUSABLE: 149 case E820_UNUSABLE:
148 printk(KERN_CONT "(unusable)"); 150 printk(KERN_CONT "unusable");
149 break; 151 break;
150 default: 152 default:
151 printk(KERN_CONT "type %u", type); 153 printk(KERN_CONT "type %u", type);
@@ -158,10 +160,10 @@ void __init e820_print_map(char *who)
158 int i; 160 int i;
159 161
160 for (i = 0; i < e820.nr_map; i++) { 162 for (i = 0; i < e820.nr_map; i++) {
161 printk(KERN_INFO " %s: %016Lx - %016Lx ", who, 163 printk(KERN_INFO "%s: [mem %#018Lx-%#018Lx] ", who,
162 (unsigned long long) e820.map[i].addr, 164 (unsigned long long) e820.map[i].addr,
163 (unsigned long long) 165 (unsigned long long)
164 (e820.map[i].addr + e820.map[i].size)); 166 (e820.map[i].addr + e820.map[i].size - 1));
165 e820_print_type(e820.map[i].type); 167 e820_print_type(e820.map[i].type);
166 printk(KERN_CONT "\n"); 168 printk(KERN_CONT "\n");
167 } 169 }
@@ -428,9 +430,8 @@ static u64 __init __e820_update_range(struct e820map *e820x, u64 start,
428 size = ULLONG_MAX - start; 430 size = ULLONG_MAX - start;
429 431
430 end = start + size; 432 end = start + size;
431 printk(KERN_DEBUG "e820 update range: %016Lx - %016Lx ", 433 printk(KERN_DEBUG "e820: update [mem %#010Lx-%#010Lx] ",
432 (unsigned long long) start, 434 (unsigned long long) start, (unsigned long long) (end - 1));
433 (unsigned long long) end);
434 e820_print_type(old_type); 435 e820_print_type(old_type);
435 printk(KERN_CONT " ==> "); 436 printk(KERN_CONT " ==> ");
436 e820_print_type(new_type); 437 e820_print_type(new_type);
@@ -509,9 +510,8 @@ u64 __init e820_remove_range(u64 start, u64 size, unsigned old_type,
509 size = ULLONG_MAX - start; 510 size = ULLONG_MAX - start;
510 511
511 end = start + size; 512 end = start + size;
512 printk(KERN_DEBUG "e820 remove range: %016Lx - %016Lx ", 513 printk(KERN_DEBUG "e820: remove [mem %#010Lx-%#010Lx] ",
513 (unsigned long long) start, 514 (unsigned long long) start, (unsigned long long) (end - 1));
514 (unsigned long long) end);
515 if (checktype) 515 if (checktype)
516 e820_print_type(old_type); 516 e820_print_type(old_type);
517 printk(KERN_CONT "\n"); 517 printk(KERN_CONT "\n");
@@ -567,7 +567,7 @@ void __init update_e820(void)
567 if (sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &nr_map)) 567 if (sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &nr_map))
568 return; 568 return;
569 e820.nr_map = nr_map; 569 e820.nr_map = nr_map;
570 printk(KERN_INFO "modified physical RAM map:\n"); 570 printk(KERN_INFO "e820: modified physical RAM map:\n");
571 e820_print_map("modified"); 571 e820_print_map("modified");
572} 572}
573static void __init update_e820_saved(void) 573static void __init update_e820_saved(void)
@@ -637,8 +637,8 @@ __init void e820_setup_gap(void)
637 if (!found) { 637 if (!found) {
638 gapstart = (max_pfn << PAGE_SHIFT) + 1024*1024; 638 gapstart = (max_pfn << PAGE_SHIFT) + 1024*1024;
639 printk(KERN_ERR 639 printk(KERN_ERR
640 "PCI: Warning: Cannot find a gap in the 32bit address range\n" 640 "e820: cannot find a gap in the 32bit address range\n"
641 "PCI: Unassigned devices with 32bit resource registers may break!\n"); 641 "e820: PCI devices with unassigned 32bit BARs may break!\n");
642 } 642 }
643#endif 643#endif
644 644
@@ -648,8 +648,8 @@ __init void e820_setup_gap(void)
648 pci_mem_start = gapstart; 648 pci_mem_start = gapstart;
649 649
650 printk(KERN_INFO 650 printk(KERN_INFO
651 "Allocating PCI resources starting at %lx (gap: %lx:%lx)\n", 651 "e820: [mem %#010lx-%#010lx] available for PCI devices\n",
652 pci_mem_start, gapstart, gapsize); 652 gapstart, gapstart + gapsize - 1);
653} 653}
654 654
655/** 655/**
@@ -667,7 +667,7 @@ void __init parse_e820_ext(struct setup_data *sdata)
667 extmap = (struct e820entry *)(sdata->data); 667 extmap = (struct e820entry *)(sdata->data);
668 __append_e820_map(extmap, entries); 668 __append_e820_map(extmap, entries);
669 sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &e820.nr_map); 669 sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &e820.nr_map);
670 printk(KERN_INFO "extended physical RAM map:\n"); 670 printk(KERN_INFO "e820: extended physical RAM map:\n");
671 e820_print_map("extended"); 671 e820_print_map("extended");
672} 672}
673 673
@@ -734,7 +734,7 @@ u64 __init early_reserve_e820(u64 size, u64 align)
734 addr = __memblock_alloc_base(size, align, MEMBLOCK_ALLOC_ACCESSIBLE); 734 addr = __memblock_alloc_base(size, align, MEMBLOCK_ALLOC_ACCESSIBLE);
735 if (addr) { 735 if (addr) {
736 e820_update_range_saved(addr, size, E820_RAM, E820_RESERVED); 736 e820_update_range_saved(addr, size, E820_RAM, E820_RESERVED);
737 printk(KERN_INFO "update e820_saved for early_reserve_e820\n"); 737 printk(KERN_INFO "e820: update e820_saved for early_reserve_e820\n");
738 update_e820_saved(); 738 update_e820_saved();
739 } 739 }
740 740
@@ -784,7 +784,7 @@ static unsigned long __init e820_end_pfn(unsigned long limit_pfn, unsigned type)
784 if (last_pfn > max_arch_pfn) 784 if (last_pfn > max_arch_pfn)
785 last_pfn = max_arch_pfn; 785 last_pfn = max_arch_pfn;
786 786
787 printk(KERN_INFO "last_pfn = %#lx max_arch_pfn = %#lx\n", 787 printk(KERN_INFO "e820: last_pfn = %#lx max_arch_pfn = %#lx\n",
788 last_pfn, max_arch_pfn); 788 last_pfn, max_arch_pfn);
789 return last_pfn; 789 return last_pfn;
790} 790}
@@ -888,7 +888,7 @@ void __init finish_e820_parsing(void)
888 early_panic("Invalid user supplied memory map"); 888 early_panic("Invalid user supplied memory map");
889 e820.nr_map = nr; 889 e820.nr_map = nr;
890 890
891 printk(KERN_INFO "user-defined physical RAM map:\n"); 891 printk(KERN_INFO "e820: user-defined physical RAM map:\n");
892 e820_print_map("user"); 892 e820_print_map("user");
893 } 893 }
894} 894}
@@ -996,8 +996,9 @@ void __init e820_reserve_resources_late(void)
996 end = MAX_RESOURCE_SIZE; 996 end = MAX_RESOURCE_SIZE;
997 if (start >= end) 997 if (start >= end)
998 continue; 998 continue;
999 printk(KERN_DEBUG "reserve RAM buffer: %016llx - %016llx ", 999 printk(KERN_DEBUG
1000 start, end); 1000 "e820: reserve RAM buffer [mem %#010llx-%#010llx]\n",
1001 start, end);
1001 reserve_region_with_split(&iomem_resource, start, end, 1002 reserve_region_with_split(&iomem_resource, start, end,
1002 "RAM buffer"); 1003 "RAM buffer");
1003 } 1004 }
@@ -1047,7 +1048,7 @@ void __init setup_memory_map(void)
1047 1048
1048 who = x86_init.resources.memory_setup(); 1049 who = x86_init.resources.memory_setup();
1049 memcpy(&e820_saved, &e820, sizeof(struct e820map)); 1050 memcpy(&e820_saved, &e820, sizeof(struct e820map));
1050 printk(KERN_INFO "BIOS-provided physical RAM map:\n"); 1051 printk(KERN_INFO "e820: BIOS-provided physical RAM map:\n");
1051 e820_print_map(who); 1052 e820_print_map(who);
1052} 1053}
1053 1054
diff --git a/arch/x86/kernel/mpparse.c b/arch/x86/kernel/mpparse.c
index b02d4dd6b8a3..fbca2e6223bf 100644
--- a/arch/x86/kernel/mpparse.c
+++ b/arch/x86/kernel/mpparse.c
@@ -568,8 +568,8 @@ static int __init smp_scan_config(unsigned long base, unsigned long length)
568 struct mpf_intel *mpf; 568 struct mpf_intel *mpf;
569 unsigned long mem; 569 unsigned long mem;
570 570
571 apic_printk(APIC_VERBOSE, "Scan SMP from %p for %ld bytes.\n", 571 apic_printk(APIC_VERBOSE, "Scan for SMP in [mem %#010lx-%#010lx]\n",
572 bp, length); 572 base, base + length - 1);
573 BUILD_BUG_ON(sizeof(*mpf) != 16); 573 BUILD_BUG_ON(sizeof(*mpf) != 16);
574 574
575 while (length > 0) { 575 while (length > 0) {
@@ -584,8 +584,10 @@ static int __init smp_scan_config(unsigned long base, unsigned long length)
584#endif 584#endif
585 mpf_found = mpf; 585 mpf_found = mpf;
586 586
587 printk(KERN_INFO "found SMP MP-table at [%p] %llx\n", 587 printk(KERN_INFO "found SMP MP-table at [mem %#010llx-%#010llx] mapped at [%p]\n",
588 mpf, (u64)virt_to_phys(mpf)); 588 (unsigned long long) virt_to_phys(mpf),
589 (unsigned long long) virt_to_phys(mpf) +
590 sizeof(*mpf) - 1, mpf);
589 591
590 mem = virt_to_phys(mpf); 592 mem = virt_to_phys(mpf);
591 memblock_reserve(mem, sizeof(*mpf)); 593 memblock_reserve(mem, sizeof(*mpf));
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index f2afee6a19c1..982e44f960db 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -334,8 +334,8 @@ static void __init relocate_initrd(void)
334 memblock_reserve(ramdisk_here, area_size); 334 memblock_reserve(ramdisk_here, area_size);
335 initrd_start = ramdisk_here + PAGE_OFFSET; 335 initrd_start = ramdisk_here + PAGE_OFFSET;
336 initrd_end = initrd_start + ramdisk_size; 336 initrd_end = initrd_start + ramdisk_size;
337 printk(KERN_INFO "Allocated new RAMDISK: %08llx - %08llx\n", 337 printk(KERN_INFO "Allocated new RAMDISK: [mem %#010llx-%#010llx]\n",
338 ramdisk_here, ramdisk_here + ramdisk_size); 338 ramdisk_here, ramdisk_here + ramdisk_size - 1);
339 339
340 q = (char *)initrd_start; 340 q = (char *)initrd_start;
341 341
@@ -366,8 +366,8 @@ static void __init relocate_initrd(void)
366 /* high pages is not converted by early_res_to_bootmem */ 366 /* high pages is not converted by early_res_to_bootmem */
367 ramdisk_image = boot_params.hdr.ramdisk_image; 367 ramdisk_image = boot_params.hdr.ramdisk_image;
368 ramdisk_size = boot_params.hdr.ramdisk_size; 368 ramdisk_size = boot_params.hdr.ramdisk_size;
369 printk(KERN_INFO "Move RAMDISK from %016llx - %016llx to" 369 printk(KERN_INFO "Move RAMDISK from [mem %#010llx-%#010llx] to"
370 " %08llx - %08llx\n", 370 " [mem %#010llx-%#010llx]\n",
371 ramdisk_image, ramdisk_image + ramdisk_size - 1, 371 ramdisk_image, ramdisk_image + ramdisk_size - 1,
372 ramdisk_here, ramdisk_here + ramdisk_size - 1); 372 ramdisk_here, ramdisk_here + ramdisk_size - 1);
373} 373}
@@ -392,8 +392,8 @@ static void __init reserve_initrd(void)
392 ramdisk_size, end_of_lowmem>>1); 392 ramdisk_size, end_of_lowmem>>1);
393 } 393 }
394 394
395 printk(KERN_INFO "RAMDISK: %08llx - %08llx\n", ramdisk_image, 395 printk(KERN_INFO "RAMDISK: [mem %#010llx-%#010llx]\n", ramdisk_image,
396 ramdisk_end); 396 ramdisk_end - 1);
397 397
398 398
399 if (ramdisk_end <= end_of_lowmem) { 399 if (ramdisk_end <= end_of_lowmem) {
@@ -906,8 +906,8 @@ void __init setup_arch(char **cmdline_p)
906 setup_bios_corruption_check(); 906 setup_bios_corruption_check();
907#endif 907#endif
908 908
909 printk(KERN_DEBUG "initial memory mapped : 0 - %08lx\n", 909 printk(KERN_DEBUG "initial memory mapped: [mem 0x00000000-%#010lx]\n",
910 max_pfn_mapped<<PAGE_SHIFT); 910 (max_pfn_mapped<<PAGE_SHIFT) - 1);
911 911
912 setup_trampolines(); 912 setup_trampolines();
913 913
diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c
index 319b6f2fb8b9..97141c26a13a 100644
--- a/arch/x86/mm/init.c
+++ b/arch/x86/mm/init.c
@@ -84,8 +84,9 @@ static void __init find_early_table_space(struct map_range *mr, unsigned long en
84 pgt_buf_end = pgt_buf_start; 84 pgt_buf_end = pgt_buf_start;
85 pgt_buf_top = pgt_buf_start + (tables >> PAGE_SHIFT); 85 pgt_buf_top = pgt_buf_start + (tables >> PAGE_SHIFT);
86 86
87 printk(KERN_DEBUG "kernel direct mapping tables up to %lx @ %lx-%lx\n", 87 printk(KERN_DEBUG "kernel direct mapping tables up to %#lx @ [mem %#010lx-%#010lx]\n",
88 end, pgt_buf_start << PAGE_SHIFT, pgt_buf_top << PAGE_SHIFT); 88 end - 1, pgt_buf_start << PAGE_SHIFT,
89 (pgt_buf_top << PAGE_SHIFT) - 1);
89} 90}
90 91
91void __init native_pagetable_reserve(u64 start, u64 end) 92void __init native_pagetable_reserve(u64 start, u64 end)
@@ -132,7 +133,8 @@ unsigned long __init_refok init_memory_mapping(unsigned long start,
132 int nr_range, i; 133 int nr_range, i;
133 int use_pse, use_gbpages; 134 int use_pse, use_gbpages;
134 135
135 printk(KERN_INFO "init_memory_mapping: %016lx-%016lx\n", start, end); 136 printk(KERN_INFO "init_memory_mapping: [mem %#010lx-%#010lx]\n",
137 start, end - 1);
136 138
137#if defined(CONFIG_DEBUG_PAGEALLOC) || defined(CONFIG_KMEMCHECK) 139#if defined(CONFIG_DEBUG_PAGEALLOC) || defined(CONFIG_KMEMCHECK)
138 /* 140 /*
@@ -251,8 +253,8 @@ unsigned long __init_refok init_memory_mapping(unsigned long start,
251 } 253 }
252 254
253 for (i = 0; i < nr_range; i++) 255 for (i = 0; i < nr_range; i++)
254 printk(KERN_DEBUG " %010lx - %010lx page %s\n", 256 printk(KERN_DEBUG " [mem %#010lx-%#010lx] page %s\n",
255 mr[i].start, mr[i].end, 257 mr[i].start, mr[i].end - 1,
256 (mr[i].page_size_mask & (1<<PG_LEVEL_1G))?"1G":( 258 (mr[i].page_size_mask & (1<<PG_LEVEL_1G))?"1G":(
257 (mr[i].page_size_mask & (1<<PG_LEVEL_2M))?"2M":"4k")); 259 (mr[i].page_size_mask & (1<<PG_LEVEL_2M))?"2M":"4k"));
258 260
@@ -350,8 +352,8 @@ void free_init_pages(char *what, unsigned long begin, unsigned long end)
350 * create a kernel page fault: 352 * create a kernel page fault:
351 */ 353 */
352#ifdef CONFIG_DEBUG_PAGEALLOC 354#ifdef CONFIG_DEBUG_PAGEALLOC
353 printk(KERN_INFO "debug: unmapping init memory %08lx..%08lx\n", 355 printk(KERN_INFO "debug: unmapping init [mem %#010lx-%#010lx]\n",
354 begin, end); 356 begin, end - 1);
355 set_memory_np(begin, (end - begin) >> PAGE_SHIFT); 357 set_memory_np(begin, (end - begin) >> PAGE_SHIFT);
356#else 358#else
357 /* 359 /*
diff --git a/arch/x86/mm/numa.c b/arch/x86/mm/numa.c
index 19d3fa08b119..2d125be1bae9 100644
--- a/arch/x86/mm/numa.c
+++ b/arch/x86/mm/numa.c
@@ -141,8 +141,8 @@ static int __init numa_add_memblk_to(int nid, u64 start, u64 end,
141 141
142 /* whine about and ignore invalid blks */ 142 /* whine about and ignore invalid blks */
143 if (start > end || nid < 0 || nid >= MAX_NUMNODES) { 143 if (start > end || nid < 0 || nid >= MAX_NUMNODES) {
144 pr_warning("NUMA: Warning: invalid memblk node %d (%Lx-%Lx)\n", 144 pr_warning("NUMA: Warning: invalid memblk node %d [mem %#010Lx-%#010Lx]\n",
145 nid, start, end); 145 nid, start, end - 1);
146 return 0; 146 return 0;
147 } 147 }
148 148
@@ -210,8 +210,8 @@ static void __init setup_node_data(int nid, u64 start, u64 end)
210 210
211 start = roundup(start, ZONE_ALIGN); 211 start = roundup(start, ZONE_ALIGN);
212 212
213 printk(KERN_INFO "Initmem setup node %d %016Lx-%016Lx\n", 213 printk(KERN_INFO "Initmem setup node %d [mem %#010Lx-%#010Lx]\n",
214 nid, start, end); 214 nid, start, end - 1);
215 215
216 /* 216 /*
217 * Allocate node data. Try remap allocator first, node-local 217 * Allocate node data. Try remap allocator first, node-local
@@ -232,7 +232,7 @@ static void __init setup_node_data(int nid, u64 start, u64 end)
232 } 232 }
233 233
234 /* report and initialize */ 234 /* report and initialize */
235 printk(KERN_INFO " NODE_DATA [%016Lx - %016Lx]%s\n", 235 printk(KERN_INFO " NODE_DATA [mem %#010Lx-%#010Lx]%s\n",
236 nd_pa, nd_pa + nd_size - 1, remapped ? " (remapped)" : ""); 236 nd_pa, nd_pa + nd_size - 1, remapped ? " (remapped)" : "");
237 tnid = early_pfn_to_nid(nd_pa >> PAGE_SHIFT); 237 tnid = early_pfn_to_nid(nd_pa >> PAGE_SHIFT);
238 if (!remapped && tnid != nid) 238 if (!remapped && tnid != nid)
@@ -291,14 +291,14 @@ int __init numa_cleanup_meminfo(struct numa_meminfo *mi)
291 */ 291 */
292 if (bi->end > bj->start && bi->start < bj->end) { 292 if (bi->end > bj->start && bi->start < bj->end) {
293 if (bi->nid != bj->nid) { 293 if (bi->nid != bj->nid) {
294 pr_err("NUMA: node %d (%Lx-%Lx) overlaps with node %d (%Lx-%Lx)\n", 294 pr_err("NUMA: node %d [mem %#010Lx-%#010Lx] overlaps with node %d [mem %#010Lx-%#010Lx]\n",
295 bi->nid, bi->start, bi->end, 295 bi->nid, bi->start, bi->end - 1,
296 bj->nid, bj->start, bj->end); 296 bj->nid, bj->start, bj->end - 1);
297 return -EINVAL; 297 return -EINVAL;
298 } 298 }
299 pr_warning("NUMA: Warning: node %d (%Lx-%Lx) overlaps with itself (%Lx-%Lx)\n", 299 pr_warning("NUMA: Warning: node %d [mem %#010Lx-%#010Lx] overlaps with itself [mem %#010Lx-%#010Lx]\n",
300 bi->nid, bi->start, bi->end, 300 bi->nid, bi->start, bi->end - 1,
301 bj->start, bj->end); 301 bj->start, bj->end - 1);
302 } 302 }
303 303
304 /* 304 /*
@@ -320,9 +320,9 @@ int __init numa_cleanup_meminfo(struct numa_meminfo *mi)
320 } 320 }
321 if (k < mi->nr_blks) 321 if (k < mi->nr_blks)
322 continue; 322 continue;
323 printk(KERN_INFO "NUMA: Node %d [%Lx,%Lx) + [%Lx,%Lx) -> [%Lx,%Lx)\n", 323 printk(KERN_INFO "NUMA: Node %d [mem %#010Lx-%#010Lx] + [mem %#010Lx-%#010Lx] -> [mem %#010Lx-%#010Lx]\n",
324 bi->nid, bi->start, bi->end, bj->start, bj->end, 324 bi->nid, bi->start, bi->end - 1, bj->start,
325 start, end); 325 bj->end - 1, start, end - 1);
326 bi->start = start; 326 bi->start = start;
327 bi->end = end; 327 bi->end = end;
328 numa_remove_memblk_from(j--, mi); 328 numa_remove_memblk_from(j--, mi);
@@ -616,8 +616,8 @@ static int __init dummy_numa_init(void)
616{ 616{
617 printk(KERN_INFO "%s\n", 617 printk(KERN_INFO "%s\n",
618 numa_off ? "NUMA turned off" : "No NUMA configuration found"); 618 numa_off ? "NUMA turned off" : "No NUMA configuration found");
619 printk(KERN_INFO "Faking a node at %016Lx-%016Lx\n", 619 printk(KERN_INFO "Faking a node at [mem %#018Lx-%#018Lx]\n",
620 0LLU, PFN_PHYS(max_pfn)); 620 0LLU, PFN_PHYS(max_pfn) - 1);
621 621
622 node_set(0, numa_nodes_parsed); 622 node_set(0, numa_nodes_parsed);
623 numa_add_memblk(0, 0, PFN_PHYS(max_pfn)); 623 numa_add_memblk(0, 0, PFN_PHYS(max_pfn));
diff --git a/arch/x86/mm/numa_emulation.c b/arch/x86/mm/numa_emulation.c
index 871dd8868170..dbbbb47260cc 100644
--- a/arch/x86/mm/numa_emulation.c
+++ b/arch/x86/mm/numa_emulation.c
@@ -68,8 +68,8 @@ static int __init emu_setup_memblk(struct numa_meminfo *ei,
68 numa_remove_memblk_from(phys_blk, pi); 68 numa_remove_memblk_from(phys_blk, pi);
69 } 69 }
70 70
71 printk(KERN_INFO "Faking node %d at %016Lx-%016Lx (%LuMB)\n", nid, 71 printk(KERN_INFO "Faking node %d at [mem %#018Lx-%#018Lx] (%LuMB)\n",
72 eb->start, eb->end, (eb->end - eb->start) >> 20); 72 nid, eb->start, eb->end - 1, (eb->end - eb->start) >> 20);
73 return 0; 73 return 0;
74} 74}
75 75
diff --git a/arch/x86/mm/pat.c b/arch/x86/mm/pat.c
index f6ff57b7efa5..f11729fd019c 100644
--- a/arch/x86/mm/pat.c
+++ b/arch/x86/mm/pat.c
@@ -209,9 +209,8 @@ static int reserve_ram_pages_type(u64 start, u64 end, unsigned long req_type,
209 page = pfn_to_page(pfn); 209 page = pfn_to_page(pfn);
210 type = get_page_memtype(page); 210 type = get_page_memtype(page);
211 if (type != -1) { 211 if (type != -1) {
212 printk(KERN_INFO "reserve_ram_pages_type failed " 212 printk(KERN_INFO "reserve_ram_pages_type failed [mem %#010Lx-%#010Lx], track 0x%lx, req 0x%lx\n",
213 "0x%Lx-0x%Lx, track 0x%lx, req 0x%lx\n", 213 start, end - 1, type, req_type);
214 start, end, type, req_type);
215 if (new_type) 214 if (new_type)
216 *new_type = type; 215 *new_type = type;
217 216
@@ -314,9 +313,9 @@ int reserve_memtype(u64 start, u64 end, unsigned long req_type,
314 313
315 err = rbt_memtype_check_insert(new, new_type); 314 err = rbt_memtype_check_insert(new, new_type);
316 if (err) { 315 if (err) {
317 printk(KERN_INFO "reserve_memtype failed 0x%Lx-0x%Lx, " 316 printk(KERN_INFO "reserve_memtype failed [mem %#010Lx-%#010Lx], track %s, req %s\n",
318 "track %s, req %s\n", 317 start, end - 1,
319 start, end, cattr_name(new->type), cattr_name(req_type)); 318 cattr_name(new->type), cattr_name(req_type));
320 kfree(new); 319 kfree(new);
321 spin_unlock(&memtype_lock); 320 spin_unlock(&memtype_lock);
322 321
@@ -325,8 +324,8 @@ int reserve_memtype(u64 start, u64 end, unsigned long req_type,
325 324
326 spin_unlock(&memtype_lock); 325 spin_unlock(&memtype_lock);
327 326
328 dprintk("reserve_memtype added 0x%Lx-0x%Lx, track %s, req %s, ret %s\n", 327 dprintk("reserve_memtype added [mem %#010Lx-%#010Lx], track %s, req %s, ret %s\n",
329 start, end, cattr_name(new->type), cattr_name(req_type), 328 start, end - 1, cattr_name(new->type), cattr_name(req_type),
330 new_type ? cattr_name(*new_type) : "-"); 329 new_type ? cattr_name(*new_type) : "-");
331 330
332 return err; 331 return err;
@@ -360,14 +359,14 @@ int free_memtype(u64 start, u64 end)
360 spin_unlock(&memtype_lock); 359 spin_unlock(&memtype_lock);
361 360
362 if (!entry) { 361 if (!entry) {
363 printk(KERN_INFO "%s:%d freeing invalid memtype %Lx-%Lx\n", 362 printk(KERN_INFO "%s:%d freeing invalid memtype [mem %#010Lx-%#010Lx]\n",
364 current->comm, current->pid, start, end); 363 current->comm, current->pid, start, end - 1);
365 return -EINVAL; 364 return -EINVAL;
366 } 365 }
367 366
368 kfree(entry); 367 kfree(entry);
369 368
370 dprintk("free_memtype request 0x%Lx-0x%Lx\n", start, end); 369 dprintk("free_memtype request [mem %#010Lx-%#010Lx]\n", start, end - 1);
371 370
372 return 0; 371 return 0;
373} 372}
@@ -491,9 +490,8 @@ static inline int range_is_allowed(unsigned long pfn, unsigned long size)
491 490
492 while (cursor < to) { 491 while (cursor < to) {
493 if (!devmem_is_allowed(pfn)) { 492 if (!devmem_is_allowed(pfn)) {
494 printk(KERN_INFO 493 printk(KERN_INFO "Program %s tried to access /dev/mem between [mem %#010Lx-%#010Lx]\n",
495 "Program %s tried to access /dev/mem between %Lx->%Lx.\n", 494 current->comm, from, to - 1);
496 current->comm, from, to);
497 return 0; 495 return 0;
498 } 496 }
499 cursor += PAGE_SIZE; 497 cursor += PAGE_SIZE;
@@ -554,12 +552,11 @@ int kernel_map_sync_memtype(u64 base, unsigned long size, unsigned long flags)
554 size; 552 size;
555 553
556 if (ioremap_change_attr((unsigned long)__va(base), id_sz, flags) < 0) { 554 if (ioremap_change_attr((unsigned long)__va(base), id_sz, flags) < 0) {
557 printk(KERN_INFO 555 printk(KERN_INFO "%s:%d ioremap_change_attr failed %s "
558 "%s:%d ioremap_change_attr failed %s " 556 "for [mem %#010Lx-%#010Lx]\n",
559 "for %Lx-%Lx\n",
560 current->comm, current->pid, 557 current->comm, current->pid,
561 cattr_name(flags), 558 cattr_name(flags),
562 base, (unsigned long long)(base + size)); 559 base, (unsigned long long)(base + size-1));
563 return -EINVAL; 560 return -EINVAL;
564 } 561 }
565 return 0; 562 return 0;
@@ -591,12 +588,11 @@ static int reserve_pfn_range(u64 paddr, unsigned long size, pgprot_t *vma_prot,
591 588
592 flags = lookup_memtype(paddr); 589 flags = lookup_memtype(paddr);
593 if (want_flags != flags) { 590 if (want_flags != flags) {
594 printk(KERN_WARNING 591 printk(KERN_WARNING "%s:%d map pfn RAM range req %s for [mem %#010Lx-%#010Lx], got %s\n",
595 "%s:%d map pfn RAM range req %s for %Lx-%Lx, got %s\n",
596 current->comm, current->pid, 592 current->comm, current->pid,
597 cattr_name(want_flags), 593 cattr_name(want_flags),
598 (unsigned long long)paddr, 594 (unsigned long long)paddr,
599 (unsigned long long)(paddr + size), 595 (unsigned long long)(paddr + size - 1),
600 cattr_name(flags)); 596 cattr_name(flags));
601 *vma_prot = __pgprot((pgprot_val(*vma_prot) & 597 *vma_prot = __pgprot((pgprot_val(*vma_prot) &
602 (~_PAGE_CACHE_MASK)) | 598 (~_PAGE_CACHE_MASK)) |
@@ -614,11 +610,11 @@ static int reserve_pfn_range(u64 paddr, unsigned long size, pgprot_t *vma_prot,
614 !is_new_memtype_allowed(paddr, size, want_flags, flags)) { 610 !is_new_memtype_allowed(paddr, size, want_flags, flags)) {
615 free_memtype(paddr, paddr + size); 611 free_memtype(paddr, paddr + size);
616 printk(KERN_ERR "%s:%d map pfn expected mapping type %s" 612 printk(KERN_ERR "%s:%d map pfn expected mapping type %s"
617 " for %Lx-%Lx, got %s\n", 613 " for [mem %#010Lx-%#010Lx], got %s\n",
618 current->comm, current->pid, 614 current->comm, current->pid,
619 cattr_name(want_flags), 615 cattr_name(want_flags),
620 (unsigned long long)paddr, 616 (unsigned long long)paddr,
621 (unsigned long long)(paddr + size), 617 (unsigned long long)(paddr + size - 1),
622 cattr_name(flags)); 618 cattr_name(flags));
623 return -EINVAL; 619 return -EINVAL;
624 } 620 }
diff --git a/arch/x86/mm/srat.c b/arch/x86/mm/srat.c
index efb5b4b93711..732af3a96183 100644
--- a/arch/x86/mm/srat.c
+++ b/arch/x86/mm/srat.c
@@ -176,8 +176,9 @@ acpi_numa_memory_affinity_init(struct acpi_srat_mem_affinity *ma)
176 return; 176 return;
177 } 177 }
178 178
179 printk(KERN_INFO "SRAT: Node %u PXM %u %Lx-%Lx\n", node, pxm, 179 printk(KERN_INFO "SRAT: Node %u PXM %u [mem %#010Lx-%#010Lx]\n",
180 start, end); 180 node, pxm,
181 (unsigned long long) start, (unsigned long long) end - 1);
181} 182}
182 183
183void __init acpi_numa_arch_fixup(void) {} 184void __init acpi_numa_arch_fixup(void) {}
diff --git a/drivers/base/node.c b/drivers/base/node.c
index 90aa2a11a933..af1a177216f1 100644
--- a/drivers/base/node.c
+++ b/drivers/base/node.c
@@ -592,11 +592,9 @@ static ssize_t print_nodes_state(enum node_states state, char *buf)
592{ 592{
593 int n; 593 int n;
594 594
595 n = nodelist_scnprintf(buf, PAGE_SIZE, node_states[state]); 595 n = nodelist_scnprintf(buf, PAGE_SIZE-2, node_states[state]);
596 if (n > 0 && PAGE_SIZE > n + 1) { 596 buf[n++] = '\n';
597 *(buf + n++) = '\n'; 597 buf[n] = '\0';
598 *(buf + n++) = '\0';
599 }
600 return n; 598 return n;
601} 599}
602 600
diff --git a/drivers/leds/Kconfig b/drivers/leds/Kconfig
index ff4b8cfda585..04cb8c88d74b 100644
--- a/drivers/leds/Kconfig
+++ b/drivers/leds/Kconfig
@@ -50,6 +50,19 @@ config LEDS_LM3530
50 controlled manually or using PWM input or using ambient 50 controlled manually or using PWM input or using ambient
51 light automatically. 51 light automatically.
52 52
53config LEDS_LM3533
54 tristate "LED support for LM3533"
55 depends on LEDS_CLASS
56 depends on MFD_LM3533
57 help
58 This option enables support for the LEDs on National Semiconductor /
59 TI LM3533 Lighting Power chips.
60
61 The LEDs can be controlled directly, through PWM input, or by the
62 ambient-light-sensor interface. The chip supports
63 hardware-accelerated blinking with maximum on and off periods of 9.8
64 and 77 seconds respectively.
65
53config LEDS_LOCOMO 66config LEDS_LOCOMO
54 tristate "LED Support for Locomo device" 67 tristate "LED Support for Locomo device"
55 depends on LEDS_CLASS 68 depends on LEDS_CLASS
@@ -259,6 +272,14 @@ config LEDS_DA903X
259 This option enables support for on-chip LED drivers found 272 This option enables support for on-chip LED drivers found
260 on Dialog Semiconductor DA9030/DA9034 PMICs. 273 on Dialog Semiconductor DA9030/DA9034 PMICs.
261 274
275config LEDS_DA9052
276 tristate "Dialog DA9052/DA9053 LEDS"
277 depends on LEDS_CLASS
278 depends on PMIC_DA9052
279 help
280 This option enables support for on-chip LED drivers found
281 on Dialog Semiconductor DA9052-BC and DA9053-AA/Bx PMICs.
282
262config LEDS_DAC124S085 283config LEDS_DAC124S085
263 tristate "LED Support for DAC124S085 SPI DAC" 284 tristate "LED Support for DAC124S085 SPI DAC"
264 depends on LEDS_CLASS 285 depends on LEDS_CLASS
@@ -471,4 +492,12 @@ config LEDS_TRIGGER_DEFAULT_ON
471comment "iptables trigger is under Netfilter config (LED target)" 492comment "iptables trigger is under Netfilter config (LED target)"
472 depends on LEDS_TRIGGERS 493 depends on LEDS_TRIGGERS
473 494
495config LEDS_TRIGGER_TRANSIENT
496 tristate "LED Transient Trigger"
497 depends on LEDS_TRIGGERS
498 help
499 This allows one time activation of a transient state on
500 GPIO/PWM based hadrware.
501 If unsure, say Y.
502
474endif # NEW_LEDS 503endif # NEW_LEDS
diff --git a/drivers/leds/Makefile b/drivers/leds/Makefile
index 890481cb09f6..f8958cd6cf6e 100644
--- a/drivers/leds/Makefile
+++ b/drivers/leds/Makefile
@@ -10,6 +10,7 @@ obj-$(CONFIG_LEDS_ATMEL_PWM) += leds-atmel-pwm.o
10obj-$(CONFIG_LEDS_BD2802) += leds-bd2802.o 10obj-$(CONFIG_LEDS_BD2802) += leds-bd2802.o
11obj-$(CONFIG_LEDS_LOCOMO) += leds-locomo.o 11obj-$(CONFIG_LEDS_LOCOMO) += leds-locomo.o
12obj-$(CONFIG_LEDS_LM3530) += leds-lm3530.o 12obj-$(CONFIG_LEDS_LM3530) += leds-lm3530.o
13obj-$(CONFIG_LEDS_LM3533) += leds-lm3533.o
13obj-$(CONFIG_LEDS_MIKROTIK_RB532) += leds-rb532.o 14obj-$(CONFIG_LEDS_MIKROTIK_RB532) += leds-rb532.o
14obj-$(CONFIG_LEDS_S3C24XX) += leds-s3c24xx.o 15obj-$(CONFIG_LEDS_S3C24XX) += leds-s3c24xx.o
15obj-$(CONFIG_LEDS_NET48XX) += leds-net48xx.o 16obj-$(CONFIG_LEDS_NET48XX) += leds-net48xx.o
@@ -31,6 +32,7 @@ obj-$(CONFIG_LEDS_FSG) += leds-fsg.o
31obj-$(CONFIG_LEDS_PCA955X) += leds-pca955x.o 32obj-$(CONFIG_LEDS_PCA955X) += leds-pca955x.o
32obj-$(CONFIG_LEDS_PCA9633) += leds-pca9633.o 33obj-$(CONFIG_LEDS_PCA9633) += leds-pca9633.o
33obj-$(CONFIG_LEDS_DA903X) += leds-da903x.o 34obj-$(CONFIG_LEDS_DA903X) += leds-da903x.o
35obj-$(CONFIG_LEDS_DA9052) += leds-da9052.o
34obj-$(CONFIG_LEDS_WM831X_STATUS) += leds-wm831x-status.o 36obj-$(CONFIG_LEDS_WM831X_STATUS) += leds-wm831x-status.o
35obj-$(CONFIG_LEDS_WM8350) += leds-wm8350.o 37obj-$(CONFIG_LEDS_WM8350) += leds-wm8350.o
36obj-$(CONFIG_LEDS_PWM) += leds-pwm.o 38obj-$(CONFIG_LEDS_PWM) += leds-pwm.o
@@ -56,3 +58,4 @@ obj-$(CONFIG_LEDS_TRIGGER_HEARTBEAT) += ledtrig-heartbeat.o
56obj-$(CONFIG_LEDS_TRIGGER_BACKLIGHT) += ledtrig-backlight.o 58obj-$(CONFIG_LEDS_TRIGGER_BACKLIGHT) += ledtrig-backlight.o
57obj-$(CONFIG_LEDS_TRIGGER_GPIO) += ledtrig-gpio.o 59obj-$(CONFIG_LEDS_TRIGGER_GPIO) += ledtrig-gpio.o
58obj-$(CONFIG_LEDS_TRIGGER_DEFAULT_ON) += ledtrig-default-on.o 60obj-$(CONFIG_LEDS_TRIGGER_DEFAULT_ON) += ledtrig-default-on.o
61obj-$(CONFIG_LEDS_TRIGGER_TRANSIENT) += ledtrig-transient.o
diff --git a/drivers/leds/led-class.c b/drivers/leds/led-class.c
index 5bff8439dc68..8ee92c81aec2 100644
--- a/drivers/leds/led-class.c
+++ b/drivers/leds/led-class.c
@@ -44,23 +44,18 @@ static ssize_t led_brightness_store(struct device *dev,
44 struct device_attribute *attr, const char *buf, size_t size) 44 struct device_attribute *attr, const char *buf, size_t size)
45{ 45{
46 struct led_classdev *led_cdev = dev_get_drvdata(dev); 46 struct led_classdev *led_cdev = dev_get_drvdata(dev);
47 unsigned long state;
47 ssize_t ret = -EINVAL; 48 ssize_t ret = -EINVAL;
48 char *after;
49 unsigned long state = simple_strtoul(buf, &after, 10);
50 size_t count = after - buf;
51 49
52 if (isspace(*after)) 50 ret = kstrtoul(buf, 10, &state);
53 count++; 51 if (ret)
52 return ret;
54 53
55 if (count == size) { 54 if (state == LED_OFF)
56 ret = count; 55 led_trigger_remove(led_cdev);
56 led_set_brightness(led_cdev, state);
57 57
58 if (state == LED_OFF) 58 return size;
59 led_trigger_remove(led_cdev);
60 led_set_brightness(led_cdev, state);
61 }
62
63 return ret;
64} 59}
65 60
66static ssize_t led_max_brightness_show(struct device *dev, 61static ssize_t led_max_brightness_show(struct device *dev,
diff --git a/drivers/leds/leds-da9052.c b/drivers/leds/leds-da9052.c
new file mode 100644
index 000000000000..58a5244c437e
--- /dev/null
+++ b/drivers/leds/leds-da9052.c
@@ -0,0 +1,214 @@
1/*
2 * LED Driver for Dialog DA9052 PMICs.
3 *
4 * Copyright(c) 2012 Dialog Semiconductor Ltd.
5 *
6 * Author: David Dajun Chen <dchen@diasemi.com>
7 *
8 * This program is free software; you can redistribute it and/or modify it
9 * under the terms of the GNU General Public License as published by the
10 * Free Software Foundation; either version 2 of the License, or (at your
11 * option) any later version.
12 *
13 */
14
15#include <linux/module.h>
16#include <linux/kernel.h>
17#include <linux/init.h>
18#include <linux/platform_device.h>
19#include <linux/leds.h>
20#include <linux/workqueue.h>
21#include <linux/slab.h>
22
23#include <linux/mfd/da9052/reg.h>
24#include <linux/mfd/da9052/da9052.h>
25#include <linux/mfd/da9052/pdata.h>
26
27#define DA9052_OPENDRAIN_OUTPUT 2
28#define DA9052_SET_HIGH_LVL_OUTPUT (1 << 3)
29#define DA9052_MASK_UPPER_NIBBLE 0xF0
30#define DA9052_MASK_LOWER_NIBBLE 0x0F
31#define DA9052_NIBBLE_SHIFT 4
32#define DA9052_MAX_BRIGHTNESS 0x5f
33
34struct da9052_led {
35 struct led_classdev cdev;
36 struct work_struct work;
37 struct da9052 *da9052;
38 unsigned char led_index;
39 unsigned char id;
40 int brightness;
41};
42
43static unsigned char led_reg[] = {
44 DA9052_LED_CONT_4_REG,
45 DA9052_LED_CONT_5_REG,
46};
47
48static int da9052_set_led_brightness(struct da9052_led *led)
49{
50 u8 val;
51 int error;
52
53 val = (led->brightness & 0x7f) | DA9052_LED_CONT_DIM;
54
55 error = da9052_reg_write(led->da9052, led_reg[led->led_index], val);
56 if (error < 0)
57 dev_err(led->da9052->dev, "Failed to set led brightness, %d\n",
58 error);
59 return error;
60}
61
62static void da9052_led_work(struct work_struct *work)
63{
64 struct da9052_led *led = container_of(work, struct da9052_led, work);
65
66 da9052_set_led_brightness(led);
67}
68
69static void da9052_led_set(struct led_classdev *led_cdev,
70 enum led_brightness value)
71{
72 struct da9052_led *led;
73
74 led = container_of(led_cdev, struct da9052_led, cdev);
75 led->brightness = value;
76 schedule_work(&led->work);
77}
78
79static int da9052_configure_leds(struct da9052 *da9052)
80{
81 int error;
82 unsigned char register_value = DA9052_OPENDRAIN_OUTPUT
83 | DA9052_SET_HIGH_LVL_OUTPUT;
84
85 error = da9052_reg_update(da9052, DA9052_GPIO_14_15_REG,
86 DA9052_MASK_LOWER_NIBBLE,
87 register_value);
88
89 if (error < 0) {
90 dev_err(da9052->dev, "Failed to write GPIO 14-15 reg, %d\n",
91 error);
92 return error;
93 }
94
95 error = da9052_reg_update(da9052, DA9052_GPIO_14_15_REG,
96 DA9052_MASK_UPPER_NIBBLE,
97 register_value << DA9052_NIBBLE_SHIFT);
98 if (error < 0)
99 dev_err(da9052->dev, "Failed to write GPIO 14-15 reg, %d\n",
100 error);
101
102 return error;
103}
104
105static int __devinit da9052_led_probe(struct platform_device *pdev)
106{
107 struct da9052_pdata *pdata;
108 struct da9052 *da9052;
109 struct led_platform_data *pled;
110 struct da9052_led *led = NULL;
111 int error = -ENODEV;
112 int i;
113
114 da9052 = dev_get_drvdata(pdev->dev.parent);
115 pdata = da9052->dev->platform_data;
116 if (pdata == NULL) {
117 dev_err(&pdev->dev, "No platform data\n");
118 goto err;
119 }
120
121 pled = pdata->pled;
122 if (pled == NULL) {
123 dev_err(&pdev->dev, "No platform data for LED\n");
124 goto err;
125 }
126
127 led = devm_kzalloc(&pdev->dev,
128 sizeof(struct da9052_led) * pled->num_leds,
129 GFP_KERNEL);
130 if (led == NULL) {
131 dev_err(&pdev->dev, "Failed to alloc memory\n");
132 error = -ENOMEM;
133 goto err;
134 }
135
136 for (i = 0; i < pled->num_leds; i++) {
137 led[i].cdev.name = pled->leds[i].name;
138 led[i].cdev.brightness_set = da9052_led_set;
139 led[i].cdev.brightness = LED_OFF;
140 led[i].cdev.max_brightness = DA9052_MAX_BRIGHTNESS;
141 led[i].brightness = LED_OFF;
142 led[i].led_index = pled->leds[i].flags;
143 led[i].da9052 = dev_get_drvdata(pdev->dev.parent);
144 INIT_WORK(&led[i].work, da9052_led_work);
145
146 error = led_classdev_register(pdev->dev.parent, &led[i].cdev);
147 if (error) {
148 dev_err(&pdev->dev, "Failed to register led %d\n",
149 led[i].led_index);
150 goto err_register;
151 }
152
153 error = da9052_set_led_brightness(&led[i]);
154 if (error) {
155 dev_err(&pdev->dev, "Unable to init led %d\n",
156 led[i].led_index);
157 continue;
158 }
159 }
160 error = da9052_configure_leds(led->da9052);
161 if (error) {
162 dev_err(&pdev->dev, "Failed to configure GPIO LED%d\n", error);
163 goto err_register;
164 }
165
166 platform_set_drvdata(pdev, led);
167
168 return 0;
169
170err_register:
171 for (i = i - 1; i >= 0; i--) {
172 led_classdev_unregister(&led[i].cdev);
173 cancel_work_sync(&led[i].work);
174 }
175err:
176 return error;
177}
178
179static int __devexit da9052_led_remove(struct platform_device *pdev)
180{
181 struct da9052_led *led = platform_get_drvdata(pdev);
182 struct da9052_pdata *pdata;
183 struct da9052 *da9052;
184 struct led_platform_data *pled;
185 int i;
186
187 da9052 = dev_get_drvdata(pdev->dev.parent);
188 pdata = da9052->dev->platform_data;
189 pled = pdata->pled;
190
191 for (i = 0; i < pled->num_leds; i++) {
192 led[i].brightness = 0;
193 da9052_set_led_brightness(&led[i]);
194 led_classdev_unregister(&led[i].cdev);
195 cancel_work_sync(&led[i].work);
196 }
197
198 return 0;
199}
200
201static struct platform_driver da9052_led_driver = {
202 .driver = {
203 .name = "da9052-leds",
204 .owner = THIS_MODULE,
205 },
206 .probe = da9052_led_probe,
207 .remove = __devexit_p(da9052_led_remove),
208};
209
210module_platform_driver(da9052_led_driver);
211
212MODULE_AUTHOR("Dialog Semiconductor Ltd <dchen@diasemi.com>");
213MODULE_DESCRIPTION("LED driver for Dialog DA9052 PMIC");
214MODULE_LICENSE("GPL");
diff --git a/drivers/leds/leds-lm3530.c b/drivers/leds/leds-lm3530.c
index 968fd5fef4fc..84ba6de8039c 100644
--- a/drivers/leds/leds-lm3530.c
+++ b/drivers/leds/leds-lm3530.c
@@ -113,6 +113,18 @@ struct lm3530_data {
113 bool enable; 113 bool enable;
114}; 114};
115 115
116/*
117 * struct lm3530_als_data
118 * @config : value of ALS configuration register
119 * @imp_sel : value of ALS resistor select register
120 * @zone : values of ALS ZB(Zone Boundary) registers
121 */
122struct lm3530_als_data {
123 u8 config;
124 u8 imp_sel;
125 u8 zones[LM3530_ALS_ZB_MAX];
126};
127
116static const u8 lm3530_reg[LM3530_REG_MAX] = { 128static const u8 lm3530_reg[LM3530_REG_MAX] = {
117 LM3530_GEN_CONFIG, 129 LM3530_GEN_CONFIG,
118 LM3530_ALS_CONFIG, 130 LM3530_ALS_CONFIG,
@@ -141,29 +153,65 @@ static int lm3530_get_mode_from_str(const char *str)
141 return -1; 153 return -1;
142} 154}
143 155
156static void lm3530_als_configure(struct lm3530_platform_data *pdata,
157 struct lm3530_als_data *als)
158{
159 int i;
160 u32 als_vmin, als_vmax, als_vstep;
161
162 if (pdata->als_vmax == 0) {
163 pdata->als_vmin = 0;
164 pdata->als_vmax = LM3530_ALS_WINDOW_mV;
165 }
166
167 als_vmin = pdata->als_vmin;
168 als_vmax = pdata->als_vmax;
169
170 if ((als_vmax - als_vmin) > LM3530_ALS_WINDOW_mV)
171 pdata->als_vmax = als_vmax = als_vmin + LM3530_ALS_WINDOW_mV;
172
173 /* n zone boundary makes n+1 zones */
174 als_vstep = (als_vmax - als_vmin) / (LM3530_ALS_ZB_MAX + 1);
175
176 for (i = 0; i < LM3530_ALS_ZB_MAX; i++)
177 als->zones[i] = (((als_vmin + LM3530_ALS_OFFSET_mV) +
178 als_vstep + (i * als_vstep)) * LED_FULL) / 1000;
179
180 als->config =
181 (pdata->als_avrg_time << LM3530_ALS_AVG_TIME_SHIFT) |
182 (LM3530_ENABLE_ALS) |
183 (pdata->als_input_mode << LM3530_ALS_SEL_SHIFT);
184
185 als->imp_sel =
186 (pdata->als1_resistor_sel << LM3530_ALS1_IMP_SHIFT) |
187 (pdata->als2_resistor_sel << LM3530_ALS2_IMP_SHIFT);
188}
189
144static int lm3530_init_registers(struct lm3530_data *drvdata) 190static int lm3530_init_registers(struct lm3530_data *drvdata)
145{ 191{
146 int ret = 0; 192 int ret = 0;
147 int i; 193 int i;
148 u8 gen_config; 194 u8 gen_config;
149 u8 als_config = 0;
150 u8 brt_ramp; 195 u8 brt_ramp;
151 u8 als_imp_sel = 0;
152 u8 brightness; 196 u8 brightness;
153 u8 reg_val[LM3530_REG_MAX]; 197 u8 reg_val[LM3530_REG_MAX];
154 u8 zones[LM3530_ALS_ZB_MAX];
155 u32 als_vmin, als_vmax, als_vstep;
156 struct lm3530_platform_data *pdata = drvdata->pdata; 198 struct lm3530_platform_data *pdata = drvdata->pdata;
157 struct i2c_client *client = drvdata->client; 199 struct i2c_client *client = drvdata->client;
158 struct lm3530_pwm_data *pwm = &pdata->pwm_data; 200 struct lm3530_pwm_data *pwm = &pdata->pwm_data;
201 struct lm3530_als_data als;
202
203 memset(&als, 0, sizeof(struct lm3530_als_data));
159 204
160 gen_config = (pdata->brt_ramp_law << LM3530_RAMP_LAW_SHIFT) | 205 gen_config = (pdata->brt_ramp_law << LM3530_RAMP_LAW_SHIFT) |
161 ((pdata->max_current & 7) << LM3530_MAX_CURR_SHIFT); 206 ((pdata->max_current & 7) << LM3530_MAX_CURR_SHIFT);
162 207
163 switch (drvdata->mode) { 208 switch (drvdata->mode) {
164 case LM3530_BL_MODE_MANUAL: 209 case LM3530_BL_MODE_MANUAL:
210 gen_config |= LM3530_ENABLE_I2C;
211 break;
165 case LM3530_BL_MODE_ALS: 212 case LM3530_BL_MODE_ALS:
166 gen_config |= LM3530_ENABLE_I2C; 213 gen_config |= LM3530_ENABLE_I2C;
214 lm3530_als_configure(pdata, &als);
167 break; 215 break;
168 case LM3530_BL_MODE_PWM: 216 case LM3530_BL_MODE_PWM:
169 gen_config |= LM3530_ENABLE_PWM | LM3530_ENABLE_PWM_SIMPLE | 217 gen_config |= LM3530_ENABLE_PWM | LM3530_ENABLE_PWM_SIMPLE |
@@ -171,38 +219,6 @@ static int lm3530_init_registers(struct lm3530_data *drvdata)
171 break; 219 break;
172 } 220 }
173 221
174 if (drvdata->mode == LM3530_BL_MODE_ALS) {
175 if (pdata->als_vmax == 0) {
176 pdata->als_vmin = 0;
177 pdata->als_vmax = LM3530_ALS_WINDOW_mV;
178 }
179
180 als_vmin = pdata->als_vmin;
181 als_vmax = pdata->als_vmax;
182
183 if ((als_vmax - als_vmin) > LM3530_ALS_WINDOW_mV)
184 pdata->als_vmax = als_vmax =
185 als_vmin + LM3530_ALS_WINDOW_mV;
186
187 /* n zone boundary makes n+1 zones */
188 als_vstep = (als_vmax - als_vmin) / (LM3530_ALS_ZB_MAX + 1);
189
190 for (i = 0; i < LM3530_ALS_ZB_MAX; i++)
191 zones[i] = (((als_vmin + LM3530_ALS_OFFSET_mV) +
192 als_vstep + (i * als_vstep)) * LED_FULL)
193 / 1000;
194
195 als_config =
196 (pdata->als_avrg_time << LM3530_ALS_AVG_TIME_SHIFT) |
197 (LM3530_ENABLE_ALS) |
198 (pdata->als_input_mode << LM3530_ALS_SEL_SHIFT);
199
200 als_imp_sel =
201 (pdata->als1_resistor_sel << LM3530_ALS1_IMP_SHIFT) |
202 (pdata->als2_resistor_sel << LM3530_ALS2_IMP_SHIFT);
203
204 }
205
206 brt_ramp = (pdata->brt_ramp_fall << LM3530_BRT_RAMP_FALL_SHIFT) | 222 brt_ramp = (pdata->brt_ramp_fall << LM3530_BRT_RAMP_FALL_SHIFT) |
207 (pdata->brt_ramp_rise << LM3530_BRT_RAMP_RISE_SHIFT); 223 (pdata->brt_ramp_rise << LM3530_BRT_RAMP_RISE_SHIFT);
208 224
@@ -215,14 +231,14 @@ static int lm3530_init_registers(struct lm3530_data *drvdata)
215 brightness = drvdata->led_dev.max_brightness; 231 brightness = drvdata->led_dev.max_brightness;
216 232
217 reg_val[0] = gen_config; /* LM3530_GEN_CONFIG */ 233 reg_val[0] = gen_config; /* LM3530_GEN_CONFIG */
218 reg_val[1] = als_config; /* LM3530_ALS_CONFIG */ 234 reg_val[1] = als.config; /* LM3530_ALS_CONFIG */
219 reg_val[2] = brt_ramp; /* LM3530_BRT_RAMP_RATE */ 235 reg_val[2] = brt_ramp; /* LM3530_BRT_RAMP_RATE */
220 reg_val[3] = als_imp_sel; /* LM3530_ALS_IMP_SELECT */ 236 reg_val[3] = als.imp_sel; /* LM3530_ALS_IMP_SELECT */
221 reg_val[4] = brightness; /* LM3530_BRT_CTRL_REG */ 237 reg_val[4] = brightness; /* LM3530_BRT_CTRL_REG */
222 reg_val[5] = zones[0]; /* LM3530_ALS_ZB0_REG */ 238 reg_val[5] = als.zones[0]; /* LM3530_ALS_ZB0_REG */
223 reg_val[6] = zones[1]; /* LM3530_ALS_ZB1_REG */ 239 reg_val[6] = als.zones[1]; /* LM3530_ALS_ZB1_REG */
224 reg_val[7] = zones[2]; /* LM3530_ALS_ZB2_REG */ 240 reg_val[7] = als.zones[2]; /* LM3530_ALS_ZB2_REG */
225 reg_val[8] = zones[3]; /* LM3530_ALS_ZB3_REG */ 241 reg_val[8] = als.zones[3]; /* LM3530_ALS_ZB3_REG */
226 reg_val[9] = LM3530_DEF_ZT_0; /* LM3530_ALS_Z0T_REG */ 242 reg_val[9] = LM3530_DEF_ZT_0; /* LM3530_ALS_Z0T_REG */
227 reg_val[10] = LM3530_DEF_ZT_1; /* LM3530_ALS_Z1T_REG */ 243 reg_val[10] = LM3530_DEF_ZT_1; /* LM3530_ALS_Z1T_REG */
228 reg_val[11] = LM3530_DEF_ZT_2; /* LM3530_ALS_Z2T_REG */ 244 reg_val[11] = LM3530_DEF_ZT_2; /* LM3530_ALS_Z2T_REG */
diff --git a/drivers/leds/leds-lm3533.c b/drivers/leds/leds-lm3533.c
new file mode 100644
index 000000000000..f56b6e7ffdac
--- /dev/null
+++ b/drivers/leds/leds-lm3533.c
@@ -0,0 +1,785 @@
1/*
2 * leds-lm3533.c -- LM3533 LED driver
3 *
4 * Copyright (C) 2011-2012 Texas Instruments
5 *
6 * Author: Johan Hovold <jhovold@gmail.com>
7 *
8 * This program is free software; you can redistribute it and/or modify it
9 * under the terms of the GNU General Public License as published by the
10 * Free Software Foundation; either version 2 of the License, or (at your
11 * option) any later version.
12 */
13
14#include <linux/module.h>
15#include <linux/init.h>
16#include <linux/leds.h>
17#include <linux/mfd/core.h>
18#include <linux/mutex.h>
19#include <linux/platform_device.h>
20#include <linux/slab.h>
21#include <linux/workqueue.h>
22
23#include <linux/mfd/lm3533.h>
24
25
26#define LM3533_LVCTRLBANK_MIN 2
27#define LM3533_LVCTRLBANK_MAX 5
28#define LM3533_LVCTRLBANK_COUNT 4
29#define LM3533_RISEFALLTIME_MAX 7
30#define LM3533_ALS_CHANNEL_LV_MIN 1
31#define LM3533_ALS_CHANNEL_LV_MAX 2
32
33#define LM3533_REG_CTRLBANK_BCONF_BASE 0x1b
34#define LM3533_REG_PATTERN_ENABLE 0x28
35#define LM3533_REG_PATTERN_LOW_TIME_BASE 0x71
36#define LM3533_REG_PATTERN_HIGH_TIME_BASE 0x72
37#define LM3533_REG_PATTERN_RISETIME_BASE 0x74
38#define LM3533_REG_PATTERN_FALLTIME_BASE 0x75
39
40#define LM3533_REG_PATTERN_STEP 0x10
41
42#define LM3533_REG_CTRLBANK_BCONF_MAPPING_MASK 0x04
43#define LM3533_REG_CTRLBANK_BCONF_ALS_EN_MASK 0x02
44#define LM3533_REG_CTRLBANK_BCONF_ALS_CHANNEL_MASK 0x01
45
46#define LM3533_LED_FLAG_PATTERN_ENABLE 1
47
48
49struct lm3533_led {
50 struct lm3533 *lm3533;
51 struct lm3533_ctrlbank cb;
52 struct led_classdev cdev;
53 int id;
54
55 struct mutex mutex;
56 unsigned long flags;
57
58 struct work_struct work;
59 u8 new_brightness;
60};
61
62
63static inline struct lm3533_led *to_lm3533_led(struct led_classdev *cdev)
64{
65 return container_of(cdev, struct lm3533_led, cdev);
66}
67
68static inline int lm3533_led_get_ctrlbank_id(struct lm3533_led *led)
69{
70 return led->id + 2;
71}
72
73static inline u8 lm3533_led_get_lv_reg(struct lm3533_led *led, u8 base)
74{
75 return base + led->id;
76}
77
78static inline u8 lm3533_led_get_pattern(struct lm3533_led *led)
79{
80 return led->id;
81}
82
83static inline u8 lm3533_led_get_pattern_reg(struct lm3533_led *led,
84 u8 base)
85{
86 return base + lm3533_led_get_pattern(led) * LM3533_REG_PATTERN_STEP;
87}
88
89static int lm3533_led_pattern_enable(struct lm3533_led *led, int enable)
90{
91 u8 mask;
92 u8 val;
93 int pattern;
94 int state;
95 int ret = 0;
96
97 dev_dbg(led->cdev.dev, "%s - %d\n", __func__, enable);
98
99 mutex_lock(&led->mutex);
100
101 state = test_bit(LM3533_LED_FLAG_PATTERN_ENABLE, &led->flags);
102 if ((enable && state) || (!enable && !state))
103 goto out;
104
105 pattern = lm3533_led_get_pattern(led);
106 mask = 1 << (2 * pattern);
107
108 if (enable)
109 val = mask;
110 else
111 val = 0;
112
113 ret = lm3533_update(led->lm3533, LM3533_REG_PATTERN_ENABLE, val, mask);
114 if (ret) {
115 dev_err(led->cdev.dev, "failed to enable pattern %d (%d)\n",
116 pattern, enable);
117 goto out;
118 }
119
120 __change_bit(LM3533_LED_FLAG_PATTERN_ENABLE, &led->flags);
121out:
122 mutex_unlock(&led->mutex);
123
124 return ret;
125}
126
127static void lm3533_led_work(struct work_struct *work)
128{
129 struct lm3533_led *led = container_of(work, struct lm3533_led, work);
130
131 dev_dbg(led->cdev.dev, "%s - %u\n", __func__, led->new_brightness);
132
133 if (led->new_brightness == 0)
134 lm3533_led_pattern_enable(led, 0); /* disable blink */
135
136 lm3533_ctrlbank_set_brightness(&led->cb, led->new_brightness);
137}
138
139static void lm3533_led_set(struct led_classdev *cdev,
140 enum led_brightness value)
141{
142 struct lm3533_led *led = to_lm3533_led(cdev);
143
144 dev_dbg(led->cdev.dev, "%s - %d\n", __func__, value);
145
146 led->new_brightness = value;
147 schedule_work(&led->work);
148}
149
150static enum led_brightness lm3533_led_get(struct led_classdev *cdev)
151{
152 struct lm3533_led *led = to_lm3533_led(cdev);
153 u8 val;
154 int ret;
155
156 ret = lm3533_ctrlbank_get_brightness(&led->cb, &val);
157 if (ret)
158 return ret;
159
160 dev_dbg(led->cdev.dev, "%s - %u\n", __func__, val);
161
162 return val;
163}
164
165/* Pattern generator defines (delays in us). */
166#define LM3533_LED_DELAY1_VMIN 0x00
167#define LM3533_LED_DELAY2_VMIN 0x3d
168#define LM3533_LED_DELAY3_VMIN 0x80
169
170#define LM3533_LED_DELAY1_VMAX (LM3533_LED_DELAY2_VMIN - 1)
171#define LM3533_LED_DELAY2_VMAX (LM3533_LED_DELAY3_VMIN - 1)
172#define LM3533_LED_DELAY3_VMAX 0xff
173
174#define LM3533_LED_DELAY1_TMIN 16384U
175#define LM3533_LED_DELAY2_TMIN 1130496U
176#define LM3533_LED_DELAY3_TMIN 10305536U
177
178#define LM3533_LED_DELAY1_TMAX 999424U
179#define LM3533_LED_DELAY2_TMAX 9781248U
180#define LM3533_LED_DELAY3_TMAX 76890112U
181
182/* t_step = (t_max - t_min) / (v_max - v_min) */
183#define LM3533_LED_DELAY1_TSTEP 16384
184#define LM3533_LED_DELAY2_TSTEP 131072
185#define LM3533_LED_DELAY3_TSTEP 524288
186
187/* Delay limits for hardware accelerated blinking (in ms). */
188#define LM3533_LED_DELAY_ON_MAX \
189 ((LM3533_LED_DELAY2_TMAX + LM3533_LED_DELAY2_TSTEP / 2) / 1000)
190#define LM3533_LED_DELAY_OFF_MAX \
191 ((LM3533_LED_DELAY3_TMAX + LM3533_LED_DELAY3_TSTEP / 2) / 1000)
192
193/*
194 * Returns linear map of *t from [t_min,t_max] to [v_min,v_max] with a step
195 * size of t_step, where
196 *
197 * t_step = (t_max - t_min) / (v_max - v_min)
198 *
199 * and updates *t to reflect the mapped value.
200 */
201static u8 time_to_val(unsigned *t, unsigned t_min, unsigned t_step,
202 u8 v_min, u8 v_max)
203{
204 unsigned val;
205
206 val = (*t + t_step / 2 - t_min) / t_step + v_min;
207
208 *t = t_step * (val - v_min) + t_min;
209
210 return (u8)val;
211}
212
213/*
214 * Returns time code corresponding to *delay (in ms) and updates *delay to
215 * reflect actual hardware delay.
216 *
217 * Hardware supports 256 discrete delay times, divided into three groups with
218 * the following ranges and step-sizes:
219 *
220 * [ 16, 999] [0x00, 0x3e] step 16 ms
221 * [ 1130, 9781] [0x3d, 0x7f] step 131 ms
222 * [10306, 76890] [0x80, 0xff] step 524 ms
223 *
224 * Note that delay group 3 is only available for delay_off.
225 */
226static u8 lm3533_led_get_hw_delay(unsigned *delay)
227{
228 unsigned t;
229 u8 val;
230
231 t = *delay * 1000;
232
233 if (t >= (LM3533_LED_DELAY2_TMAX + LM3533_LED_DELAY3_TMIN) / 2) {
234 t = clamp(t, LM3533_LED_DELAY3_TMIN, LM3533_LED_DELAY3_TMAX);
235 val = time_to_val(&t, LM3533_LED_DELAY3_TMIN,
236 LM3533_LED_DELAY3_TSTEP,
237 LM3533_LED_DELAY3_VMIN,
238 LM3533_LED_DELAY3_VMAX);
239 } else if (t >= (LM3533_LED_DELAY1_TMAX + LM3533_LED_DELAY2_TMIN) / 2) {
240 t = clamp(t, LM3533_LED_DELAY2_TMIN, LM3533_LED_DELAY2_TMAX);
241 val = time_to_val(&t, LM3533_LED_DELAY2_TMIN,
242 LM3533_LED_DELAY2_TSTEP,
243 LM3533_LED_DELAY2_VMIN,
244 LM3533_LED_DELAY2_VMAX);
245 } else {
246 t = clamp(t, LM3533_LED_DELAY1_TMIN, LM3533_LED_DELAY1_TMAX);
247 val = time_to_val(&t, LM3533_LED_DELAY1_TMIN,
248 LM3533_LED_DELAY1_TSTEP,
249 LM3533_LED_DELAY1_VMIN,
250 LM3533_LED_DELAY1_VMAX);
251 }
252
253 *delay = (t + 500) / 1000;
254
255 return val;
256}
257
258/*
259 * Set delay register base to *delay (in ms) and update *delay to reflect
260 * actual hardware delay used.
261 */
262static u8 lm3533_led_delay_set(struct lm3533_led *led, u8 base,
263 unsigned long *delay)
264{
265 unsigned t;
266 u8 val;
267 u8 reg;
268 int ret;
269
270 t = (unsigned)*delay;
271
272 /* Delay group 3 is only available for low time (delay off). */
273 if (base != LM3533_REG_PATTERN_LOW_TIME_BASE)
274 t = min(t, LM3533_LED_DELAY2_TMAX / 1000);
275
276 val = lm3533_led_get_hw_delay(&t);
277
278 dev_dbg(led->cdev.dev, "%s - %lu: %u (0x%02x)\n", __func__,
279 *delay, t, val);
280 reg = lm3533_led_get_pattern_reg(led, base);
281 ret = lm3533_write(led->lm3533, reg, val);
282 if (ret)
283 dev_err(led->cdev.dev, "failed to set delay (%02x)\n", reg);
284
285 *delay = t;
286
287 return ret;
288}
289
290static int lm3533_led_delay_on_set(struct lm3533_led *led, unsigned long *t)
291{
292 return lm3533_led_delay_set(led, LM3533_REG_PATTERN_HIGH_TIME_BASE, t);
293}
294
295static int lm3533_led_delay_off_set(struct lm3533_led *led, unsigned long *t)
296{
297 return lm3533_led_delay_set(led, LM3533_REG_PATTERN_LOW_TIME_BASE, t);
298}
299
300static int lm3533_led_blink_set(struct led_classdev *cdev,
301 unsigned long *delay_on,
302 unsigned long *delay_off)
303{
304 struct lm3533_led *led = to_lm3533_led(cdev);
305 int ret;
306
307 dev_dbg(led->cdev.dev, "%s - on = %lu, off = %lu\n", __func__,
308 *delay_on, *delay_off);
309
310 if (*delay_on > LM3533_LED_DELAY_ON_MAX ||
311 *delay_off > LM3533_LED_DELAY_OFF_MAX)
312 return -EINVAL;
313
314 if (*delay_on == 0 && *delay_off == 0) {
315 *delay_on = 500;
316 *delay_off = 500;
317 }
318
319 ret = lm3533_led_delay_on_set(led, delay_on);
320 if (ret)
321 return ret;
322
323 ret = lm3533_led_delay_off_set(led, delay_off);
324 if (ret)
325 return ret;
326
327 return lm3533_led_pattern_enable(led, 1);
328}
329
330static ssize_t show_id(struct device *dev,
331 struct device_attribute *attr, char *buf)
332{
333 struct led_classdev *led_cdev = dev_get_drvdata(dev);
334 struct lm3533_led *led = to_lm3533_led(led_cdev);
335
336 return scnprintf(buf, PAGE_SIZE, "%d\n", led->id);
337}
338
339/*
340 * Pattern generator rise/fall times:
341 *
342 * 0 - 2048 us (default)
343 * 1 - 262 ms
344 * 2 - 524 ms
345 * 3 - 1.049 s
346 * 4 - 2.097 s
347 * 5 - 4.194 s
348 * 6 - 8.389 s
349 * 7 - 16.78 s
350 */
351static ssize_t show_risefalltime(struct device *dev,
352 struct device_attribute *attr,
353 char *buf, u8 base)
354{
355 struct led_classdev *led_cdev = dev_get_drvdata(dev);
356 struct lm3533_led *led = to_lm3533_led(led_cdev);
357 ssize_t ret;
358 u8 reg;
359 u8 val;
360
361 reg = lm3533_led_get_pattern_reg(led, base);
362 ret = lm3533_read(led->lm3533, reg, &val);
363 if (ret)
364 return ret;
365
366 return scnprintf(buf, PAGE_SIZE, "%x\n", val);
367}
368
369static ssize_t show_risetime(struct device *dev,
370 struct device_attribute *attr, char *buf)
371{
372 return show_risefalltime(dev, attr, buf,
373 LM3533_REG_PATTERN_RISETIME_BASE);
374}
375
376static ssize_t show_falltime(struct device *dev,
377 struct device_attribute *attr, char *buf)
378{
379 return show_risefalltime(dev, attr, buf,
380 LM3533_REG_PATTERN_FALLTIME_BASE);
381}
382
383static ssize_t store_risefalltime(struct device *dev,
384 struct device_attribute *attr,
385 const char *buf, size_t len, u8 base)
386{
387 struct led_classdev *led_cdev = dev_get_drvdata(dev);
388 struct lm3533_led *led = to_lm3533_led(led_cdev);
389 u8 val;
390 u8 reg;
391 int ret;
392
393 if (kstrtou8(buf, 0, &val) || val > LM3533_RISEFALLTIME_MAX)
394 return -EINVAL;
395
396 reg = lm3533_led_get_pattern_reg(led, base);
397 ret = lm3533_write(led->lm3533, reg, val);
398 if (ret)
399 return ret;
400
401 return len;
402}
403
404static ssize_t store_risetime(struct device *dev,
405 struct device_attribute *attr,
406 const char *buf, size_t len)
407{
408 return store_risefalltime(dev, attr, buf, len,
409 LM3533_REG_PATTERN_RISETIME_BASE);
410}
411
412static ssize_t store_falltime(struct device *dev,
413 struct device_attribute *attr,
414 const char *buf, size_t len)
415{
416 return store_risefalltime(dev, attr, buf, len,
417 LM3533_REG_PATTERN_FALLTIME_BASE);
418}
419
420static ssize_t show_als_channel(struct device *dev,
421 struct device_attribute *attr, char *buf)
422{
423 struct led_classdev *led_cdev = dev_get_drvdata(dev);
424 struct lm3533_led *led = to_lm3533_led(led_cdev);
425 unsigned channel;
426 u8 reg;
427 u8 val;
428 int ret;
429
430 reg = lm3533_led_get_lv_reg(led, LM3533_REG_CTRLBANK_BCONF_BASE);
431 ret = lm3533_read(led->lm3533, reg, &val);
432 if (ret)
433 return ret;
434
435 channel = (val & LM3533_REG_CTRLBANK_BCONF_ALS_CHANNEL_MASK) + 1;
436
437 return scnprintf(buf, PAGE_SIZE, "%u\n", channel);
438}
439
440static ssize_t store_als_channel(struct device *dev,
441 struct device_attribute *attr,
442 const char *buf, size_t len)
443{
444 struct led_classdev *led_cdev = dev_get_drvdata(dev);
445 struct lm3533_led *led = to_lm3533_led(led_cdev);
446 unsigned channel;
447 u8 reg;
448 u8 val;
449 u8 mask;
450 int ret;
451
452 if (kstrtouint(buf, 0, &channel))
453 return -EINVAL;
454
455 if (channel < LM3533_ALS_CHANNEL_LV_MIN ||
456 channel > LM3533_ALS_CHANNEL_LV_MAX)
457 return -EINVAL;
458
459 reg = lm3533_led_get_lv_reg(led, LM3533_REG_CTRLBANK_BCONF_BASE);
460 mask = LM3533_REG_CTRLBANK_BCONF_ALS_CHANNEL_MASK;
461 val = channel - 1;
462
463 ret = lm3533_update(led->lm3533, reg, val, mask);
464 if (ret)
465 return ret;
466
467 return len;
468}
469
470static ssize_t show_als_en(struct device *dev,
471 struct device_attribute *attr, char *buf)
472{
473 struct led_classdev *led_cdev = dev_get_drvdata(dev);
474 struct lm3533_led *led = to_lm3533_led(led_cdev);
475 bool enable;
476 u8 reg;
477 u8 val;
478 int ret;
479
480 reg = lm3533_led_get_lv_reg(led, LM3533_REG_CTRLBANK_BCONF_BASE);
481 ret = lm3533_read(led->lm3533, reg, &val);
482 if (ret)
483 return ret;
484
485 enable = val & LM3533_REG_CTRLBANK_BCONF_ALS_EN_MASK;
486
487 return scnprintf(buf, PAGE_SIZE, "%d\n", enable);
488}
489
490static ssize_t store_als_en(struct device *dev,
491 struct device_attribute *attr,
492 const char *buf, size_t len)
493{
494 struct led_classdev *led_cdev = dev_get_drvdata(dev);
495 struct lm3533_led *led = to_lm3533_led(led_cdev);
496 unsigned enable;
497 u8 reg;
498 u8 mask;
499 u8 val;
500 int ret;
501
502 if (kstrtouint(buf, 0, &enable))
503 return -EINVAL;
504
505 reg = lm3533_led_get_lv_reg(led, LM3533_REG_CTRLBANK_BCONF_BASE);
506 mask = LM3533_REG_CTRLBANK_BCONF_ALS_EN_MASK;
507
508 if (enable)
509 val = mask;
510 else
511 val = 0;
512
513 ret = lm3533_update(led->lm3533, reg, val, mask);
514 if (ret)
515 return ret;
516
517 return len;
518}
519
520static ssize_t show_linear(struct device *dev,
521 struct device_attribute *attr, char *buf)
522{
523 struct led_classdev *led_cdev = dev_get_drvdata(dev);
524 struct lm3533_led *led = to_lm3533_led(led_cdev);
525 u8 reg;
526 u8 val;
527 int linear;
528 int ret;
529
530 reg = lm3533_led_get_lv_reg(led, LM3533_REG_CTRLBANK_BCONF_BASE);
531 ret = lm3533_read(led->lm3533, reg, &val);
532 if (ret)
533 return ret;
534
535 if (val & LM3533_REG_CTRLBANK_BCONF_MAPPING_MASK)
536 linear = 1;
537 else
538 linear = 0;
539
540 return scnprintf(buf, PAGE_SIZE, "%x\n", linear);
541}
542
543static ssize_t store_linear(struct device *dev,
544 struct device_attribute *attr,
545 const char *buf, size_t len)
546{
547 struct led_classdev *led_cdev = dev_get_drvdata(dev);
548 struct lm3533_led *led = to_lm3533_led(led_cdev);
549 unsigned long linear;
550 u8 reg;
551 u8 mask;
552 u8 val;
553 int ret;
554
555 if (kstrtoul(buf, 0, &linear))
556 return -EINVAL;
557
558 reg = lm3533_led_get_lv_reg(led, LM3533_REG_CTRLBANK_BCONF_BASE);
559 mask = LM3533_REG_CTRLBANK_BCONF_MAPPING_MASK;
560
561 if (linear)
562 val = mask;
563 else
564 val = 0;
565
566 ret = lm3533_update(led->lm3533, reg, val, mask);
567 if (ret)
568 return ret;
569
570 return len;
571}
572
573static ssize_t show_pwm(struct device *dev,
574 struct device_attribute *attr,
575 char *buf)
576{
577 struct led_classdev *led_cdev = dev_get_drvdata(dev);
578 struct lm3533_led *led = to_lm3533_led(led_cdev);
579 u8 val;
580 int ret;
581
582 ret = lm3533_ctrlbank_get_pwm(&led->cb, &val);
583 if (ret)
584 return ret;
585
586 return scnprintf(buf, PAGE_SIZE, "%u\n", val);
587}
588
589static ssize_t store_pwm(struct device *dev,
590 struct device_attribute *attr,
591 const char *buf, size_t len)
592{
593 struct led_classdev *led_cdev = dev_get_drvdata(dev);
594 struct lm3533_led *led = to_lm3533_led(led_cdev);
595 u8 val;
596 int ret;
597
598 if (kstrtou8(buf, 0, &val))
599 return -EINVAL;
600
601 ret = lm3533_ctrlbank_set_pwm(&led->cb, val);
602 if (ret)
603 return ret;
604
605 return len;
606}
607
608static LM3533_ATTR_RW(als_channel);
609static LM3533_ATTR_RW(als_en);
610static LM3533_ATTR_RW(falltime);
611static LM3533_ATTR_RO(id);
612static LM3533_ATTR_RW(linear);
613static LM3533_ATTR_RW(pwm);
614static LM3533_ATTR_RW(risetime);
615
616static struct attribute *lm3533_led_attributes[] = {
617 &dev_attr_als_channel.attr,
618 &dev_attr_als_en.attr,
619 &dev_attr_falltime.attr,
620 &dev_attr_id.attr,
621 &dev_attr_linear.attr,
622 &dev_attr_pwm.attr,
623 &dev_attr_risetime.attr,
624 NULL,
625};
626
627static umode_t lm3533_led_attr_is_visible(struct kobject *kobj,
628 struct attribute *attr, int n)
629{
630 struct device *dev = container_of(kobj, struct device, kobj);
631 struct led_classdev *led_cdev = dev_get_drvdata(dev);
632 struct lm3533_led *led = to_lm3533_led(led_cdev);
633 umode_t mode = attr->mode;
634
635 if (attr == &dev_attr_als_channel.attr ||
636 attr == &dev_attr_als_en.attr) {
637 if (!led->lm3533->have_als)
638 mode = 0;
639 }
640
641 return mode;
642};
643
644static struct attribute_group lm3533_led_attribute_group = {
645 .is_visible = lm3533_led_attr_is_visible,
646 .attrs = lm3533_led_attributes
647};
648
649static int __devinit lm3533_led_setup(struct lm3533_led *led,
650 struct lm3533_led_platform_data *pdata)
651{
652 int ret;
653
654 ret = lm3533_ctrlbank_set_max_current(&led->cb, pdata->max_current);
655 if (ret)
656 return ret;
657
658 return lm3533_ctrlbank_set_pwm(&led->cb, pdata->pwm);
659}
660
661static int __devinit lm3533_led_probe(struct platform_device *pdev)
662{
663 struct lm3533 *lm3533;
664 struct lm3533_led_platform_data *pdata;
665 struct lm3533_led *led;
666 int ret;
667
668 dev_dbg(&pdev->dev, "%s\n", __func__);
669
670 lm3533 = dev_get_drvdata(pdev->dev.parent);
671 if (!lm3533)
672 return -EINVAL;
673
674 pdata = pdev->dev.platform_data;
675 if (!pdata) {
676 dev_err(&pdev->dev, "no platform data\n");
677 return -EINVAL;
678 }
679
680 if (pdev->id < 0 || pdev->id >= LM3533_LVCTRLBANK_COUNT) {
681 dev_err(&pdev->dev, "illegal LED id %d\n", pdev->id);
682 return -EINVAL;
683 }
684
685 led = devm_kzalloc(&pdev->dev, sizeof(*led), GFP_KERNEL);
686 if (!led)
687 return -ENOMEM;
688
689 led->lm3533 = lm3533;
690 led->cdev.name = pdata->name;
691 led->cdev.default_trigger = pdata->default_trigger;
692 led->cdev.brightness_set = lm3533_led_set;
693 led->cdev.brightness_get = lm3533_led_get;
694 led->cdev.blink_set = lm3533_led_blink_set;
695 led->cdev.brightness = LED_OFF;
696 led->id = pdev->id;
697
698 mutex_init(&led->mutex);
699 INIT_WORK(&led->work, lm3533_led_work);
700
701 /* The class framework makes a callback to get brightness during
702 * registration so use parent device (for error reporting) until
703 * registered.
704 */
705 led->cb.lm3533 = lm3533;
706 led->cb.id = lm3533_led_get_ctrlbank_id(led);
707 led->cb.dev = lm3533->dev;
708
709 platform_set_drvdata(pdev, led);
710
711 ret = led_classdev_register(pdev->dev.parent, &led->cdev);
712 if (ret) {
713 dev_err(&pdev->dev, "failed to register LED %d\n", pdev->id);
714 return ret;
715 }
716
717 led->cb.dev = led->cdev.dev;
718
719 ret = sysfs_create_group(&led->cdev.dev->kobj,
720 &lm3533_led_attribute_group);
721 if (ret < 0) {
722 dev_err(&pdev->dev, "failed to create sysfs attributes\n");
723 goto err_unregister;
724 }
725
726 ret = lm3533_led_setup(led, pdata);
727 if (ret)
728 goto err_sysfs_remove;
729
730 ret = lm3533_ctrlbank_enable(&led->cb);
731 if (ret)
732 goto err_sysfs_remove;
733
734 return 0;
735
736err_sysfs_remove:
737 sysfs_remove_group(&led->cdev.dev->kobj, &lm3533_led_attribute_group);
738err_unregister:
739 led_classdev_unregister(&led->cdev);
740 flush_work_sync(&led->work);
741
742 return ret;
743}
744
745static int __devexit lm3533_led_remove(struct platform_device *pdev)
746{
747 struct lm3533_led *led = platform_get_drvdata(pdev);
748
749 dev_dbg(&pdev->dev, "%s\n", __func__);
750
751 lm3533_ctrlbank_disable(&led->cb);
752 sysfs_remove_group(&led->cdev.dev->kobj, &lm3533_led_attribute_group);
753 led_classdev_unregister(&led->cdev);
754 flush_work_sync(&led->work);
755
756 return 0;
757}
758
759static void lm3533_led_shutdown(struct platform_device *pdev)
760{
761
762 struct lm3533_led *led = platform_get_drvdata(pdev);
763
764 dev_dbg(&pdev->dev, "%s\n", __func__);
765
766 lm3533_ctrlbank_disable(&led->cb);
767 lm3533_led_set(&led->cdev, LED_OFF); /* disable blink */
768 flush_work_sync(&led->work);
769}
770
771static struct platform_driver lm3533_led_driver = {
772 .driver = {
773 .name = "lm3533-leds",
774 .owner = THIS_MODULE,
775 },
776 .probe = lm3533_led_probe,
777 .remove = __devexit_p(lm3533_led_remove),
778 .shutdown = lm3533_led_shutdown,
779};
780module_platform_driver(lm3533_led_driver);
781
782MODULE_AUTHOR("Johan Hovold <jhovold@gmail.com>");
783MODULE_DESCRIPTION("LM3533 LED driver");
784MODULE_LICENSE("GPL");
785MODULE_ALIAS("platform:lm3533-leds");
diff --git a/drivers/leds/leds-lp5521.c b/drivers/leds/leds-lp5521.c
index 410a723b8691..23815624f35e 100644
--- a/drivers/leds/leds-lp5521.c
+++ b/drivers/leds/leds-lp5521.c
@@ -193,9 +193,14 @@ static int lp5521_load_program(struct lp5521_engine *eng, const u8 *pattern)
193 193
194 /* move current engine to direct mode and remember the state */ 194 /* move current engine to direct mode and remember the state */
195 ret = lp5521_set_engine_mode(eng, LP5521_CMD_DIRECT); 195 ret = lp5521_set_engine_mode(eng, LP5521_CMD_DIRECT);
196 if (ret)
197 return ret;
198
196 /* Mode change requires min 500 us delay. 1 - 2 ms with margin */ 199 /* Mode change requires min 500 us delay. 1 - 2 ms with margin */
197 usleep_range(1000, 2000); 200 usleep_range(1000, 2000);
198 ret |= lp5521_read(client, LP5521_REG_OP_MODE, &mode); 201 ret = lp5521_read(client, LP5521_REG_OP_MODE, &mode);
202 if (ret)
203 return ret;
199 204
200 /* For loading, all the engines to load mode */ 205 /* For loading, all the engines to load mode */
201 lp5521_write(client, LP5521_REG_OP_MODE, LP5521_CMD_DIRECT); 206 lp5521_write(client, LP5521_REG_OP_MODE, LP5521_CMD_DIRECT);
@@ -211,8 +216,7 @@ static int lp5521_load_program(struct lp5521_engine *eng, const u8 *pattern)
211 LP5521_PROG_MEM_SIZE, 216 LP5521_PROG_MEM_SIZE,
212 pattern); 217 pattern);
213 218
214 ret |= lp5521_write(client, LP5521_REG_OP_MODE, mode); 219 return lp5521_write(client, LP5521_REG_OP_MODE, mode);
215 return ret;
216} 220}
217 221
218static int lp5521_set_led_current(struct lp5521_chip *chip, int led, u8 curr) 222static int lp5521_set_led_current(struct lp5521_chip *chip, int led, u8 curr)
@@ -785,7 +789,7 @@ static int __devinit lp5521_probe(struct i2c_client *client,
785 * LP5521_REG_ENABLE register will not have any effect - strange! 789 * LP5521_REG_ENABLE register will not have any effect - strange!
786 */ 790 */
787 ret = lp5521_read(client, LP5521_REG_R_CURRENT, &buf); 791 ret = lp5521_read(client, LP5521_REG_R_CURRENT, &buf);
788 if (buf != LP5521_REG_R_CURR_DEFAULT) { 792 if (ret || buf != LP5521_REG_R_CURR_DEFAULT) {
789 dev_err(&client->dev, "error in resetting chip\n"); 793 dev_err(&client->dev, "error in resetting chip\n");
790 goto fail2; 794 goto fail2;
791 } 795 }
diff --git a/drivers/leds/leds-mc13783.c b/drivers/leds/leds-mc13783.c
index 8bc491541550..4cc6a2e3df34 100644
--- a/drivers/leds/leds-mc13783.c
+++ b/drivers/leds/leds-mc13783.c
@@ -280,7 +280,7 @@ static int __devinit mc13783_led_probe(struct platform_device *pdev)
280 return -EINVAL; 280 return -EINVAL;
281 } 281 }
282 282
283 led = kzalloc(sizeof(*led) * pdata->num_leds, GFP_KERNEL); 283 led = kcalloc(pdata->num_leds, sizeof(*led), GFP_KERNEL);
284 if (led == NULL) { 284 if (led == NULL) {
285 dev_err(&pdev->dev, "failed to alloc memory\n"); 285 dev_err(&pdev->dev, "failed to alloc memory\n");
286 return -ENOMEM; 286 return -ENOMEM;
diff --git a/drivers/leds/leds-pca955x.c b/drivers/leds/leds-pca955x.c
index dcc3bc3d38db..5f462dbf0dbb 100644
--- a/drivers/leds/leds-pca955x.c
+++ b/drivers/leds/leds-pca955x.c
@@ -101,11 +101,16 @@ static const struct i2c_device_id pca955x_id[] = {
101}; 101};
102MODULE_DEVICE_TABLE(i2c, pca955x_id); 102MODULE_DEVICE_TABLE(i2c, pca955x_id);
103 103
104struct pca955x_led { 104struct pca955x {
105 struct mutex lock;
106 struct pca955x_led *leds;
105 struct pca955x_chipdef *chipdef; 107 struct pca955x_chipdef *chipdef;
106 struct i2c_client *client; 108 struct i2c_client *client;
109};
110
111struct pca955x_led {
112 struct pca955x *pca955x;
107 struct work_struct work; 113 struct work_struct work;
108 spinlock_t lock;
109 enum led_brightness brightness; 114 enum led_brightness brightness;
110 struct led_classdev led_cdev; 115 struct led_classdev led_cdev;
111 int led_num; /* 0 .. 15 potentially */ 116 int led_num; /* 0 .. 15 potentially */
@@ -140,7 +145,7 @@ static inline u8 pca955x_ledsel(u8 oldval, int led_num, int state)
140 */ 145 */
141static void pca955x_write_psc(struct i2c_client *client, int n, u8 val) 146static void pca955x_write_psc(struct i2c_client *client, int n, u8 val)
142{ 147{
143 struct pca955x_led *pca955x = i2c_get_clientdata(client); 148 struct pca955x *pca955x = i2c_get_clientdata(client);
144 149
145 i2c_smbus_write_byte_data(client, 150 i2c_smbus_write_byte_data(client,
146 pca95xx_num_input_regs(pca955x->chipdef->bits) + 2*n, 151 pca95xx_num_input_regs(pca955x->chipdef->bits) + 2*n,
@@ -156,7 +161,7 @@ static void pca955x_write_psc(struct i2c_client *client, int n, u8 val)
156 */ 161 */
157static void pca955x_write_pwm(struct i2c_client *client, int n, u8 val) 162static void pca955x_write_pwm(struct i2c_client *client, int n, u8 val)
158{ 163{
159 struct pca955x_led *pca955x = i2c_get_clientdata(client); 164 struct pca955x *pca955x = i2c_get_clientdata(client);
160 165
161 i2c_smbus_write_byte_data(client, 166 i2c_smbus_write_byte_data(client,
162 pca95xx_num_input_regs(pca955x->chipdef->bits) + 1 + 2*n, 167 pca95xx_num_input_regs(pca955x->chipdef->bits) + 1 + 2*n,
@@ -169,7 +174,7 @@ static void pca955x_write_pwm(struct i2c_client *client, int n, u8 val)
169 */ 174 */
170static void pca955x_write_ls(struct i2c_client *client, int n, u8 val) 175static void pca955x_write_ls(struct i2c_client *client, int n, u8 val)
171{ 176{
172 struct pca955x_led *pca955x = i2c_get_clientdata(client); 177 struct pca955x *pca955x = i2c_get_clientdata(client);
173 178
174 i2c_smbus_write_byte_data(client, 179 i2c_smbus_write_byte_data(client,
175 pca95xx_num_input_regs(pca955x->chipdef->bits) + 4 + n, 180 pca95xx_num_input_regs(pca955x->chipdef->bits) + 4 + n,
@@ -182,7 +187,7 @@ static void pca955x_write_ls(struct i2c_client *client, int n, u8 val)
182 */ 187 */
183static u8 pca955x_read_ls(struct i2c_client *client, int n) 188static u8 pca955x_read_ls(struct i2c_client *client, int n)
184{ 189{
185 struct pca955x_led *pca955x = i2c_get_clientdata(client); 190 struct pca955x *pca955x = i2c_get_clientdata(client);
186 191
187 return (u8) i2c_smbus_read_byte_data(client, 192 return (u8) i2c_smbus_read_byte_data(client,
188 pca95xx_num_input_regs(pca955x->chipdef->bits) + 4 + n); 193 pca95xx_num_input_regs(pca955x->chipdef->bits) + 4 + n);
@@ -190,18 +195,23 @@ static u8 pca955x_read_ls(struct i2c_client *client, int n)
190 195
191static void pca955x_led_work(struct work_struct *work) 196static void pca955x_led_work(struct work_struct *work)
192{ 197{
193 struct pca955x_led *pca955x; 198 struct pca955x_led *pca955x_led;
199 struct pca955x *pca955x;
194 u8 ls; 200 u8 ls;
195 int chip_ls; /* which LSx to use (0-3 potentially) */ 201 int chip_ls; /* which LSx to use (0-3 potentially) */
196 int ls_led; /* which set of bits within LSx to use (0-3) */ 202 int ls_led; /* which set of bits within LSx to use (0-3) */
197 203
198 pca955x = container_of(work, struct pca955x_led, work); 204 pca955x_led = container_of(work, struct pca955x_led, work);
199 chip_ls = pca955x->led_num / 4; 205 pca955x = pca955x_led->pca955x;
200 ls_led = pca955x->led_num % 4; 206
207 chip_ls = pca955x_led->led_num / 4;
208 ls_led = pca955x_led->led_num % 4;
209
210 mutex_lock(&pca955x->lock);
201 211
202 ls = pca955x_read_ls(pca955x->client, chip_ls); 212 ls = pca955x_read_ls(pca955x->client, chip_ls);
203 213
204 switch (pca955x->brightness) { 214 switch (pca955x_led->brightness) {
205 case LED_FULL: 215 case LED_FULL:
206 ls = pca955x_ledsel(ls, ls_led, PCA955X_LS_LED_ON); 216 ls = pca955x_ledsel(ls, ls_led, PCA955X_LS_LED_ON);
207 break; 217 break;
@@ -219,12 +229,15 @@ static void pca955x_led_work(struct work_struct *work)
219 * OFF, HALF, or FULL. But, this is probably better than 229 * OFF, HALF, or FULL. But, this is probably better than
220 * just turning off for all other values. 230 * just turning off for all other values.
221 */ 231 */
222 pca955x_write_pwm(pca955x->client, 1, 255-pca955x->brightness); 232 pca955x_write_pwm(pca955x->client, 1,
233 255 - pca955x_led->brightness);
223 ls = pca955x_ledsel(ls, ls_led, PCA955X_LS_BLINK1); 234 ls = pca955x_ledsel(ls, ls_led, PCA955X_LS_BLINK1);
224 break; 235 break;
225 } 236 }
226 237
227 pca955x_write_ls(pca955x->client, chip_ls, ls); 238 pca955x_write_ls(pca955x->client, chip_ls, ls);
239
240 mutex_unlock(&pca955x->lock);
228} 241}
229 242
230static void pca955x_led_set(struct led_classdev *led_cdev, enum led_brightness value) 243static void pca955x_led_set(struct led_classdev *led_cdev, enum led_brightness value)
@@ -233,7 +246,6 @@ static void pca955x_led_set(struct led_classdev *led_cdev, enum led_brightness v
233 246
234 pca955x = container_of(led_cdev, struct pca955x_led, led_cdev); 247 pca955x = container_of(led_cdev, struct pca955x_led, led_cdev);
235 248
236 spin_lock(&pca955x->lock);
237 pca955x->brightness = value; 249 pca955x->brightness = value;
238 250
239 /* 251 /*
@@ -241,14 +253,13 @@ static void pca955x_led_set(struct led_classdev *led_cdev, enum led_brightness v
241 * can sleep. 253 * can sleep.
242 */ 254 */
243 schedule_work(&pca955x->work); 255 schedule_work(&pca955x->work);
244
245 spin_unlock(&pca955x->lock);
246} 256}
247 257
248static int __devinit pca955x_probe(struct i2c_client *client, 258static int __devinit pca955x_probe(struct i2c_client *client,
249 const struct i2c_device_id *id) 259 const struct i2c_device_id *id)
250{ 260{
251 struct pca955x_led *pca955x; 261 struct pca955x *pca955x;
262 struct pca955x_led *pca955x_led;
252 struct pca955x_chipdef *chip; 263 struct pca955x_chipdef *chip;
253 struct i2c_adapter *adapter; 264 struct i2c_adapter *adapter;
254 struct led_platform_data *pdata; 265 struct led_platform_data *pdata;
@@ -282,39 +293,48 @@ static int __devinit pca955x_probe(struct i2c_client *client,
282 } 293 }
283 } 294 }
284 295
285 pca955x = kzalloc(sizeof(*pca955x) * chip->bits, GFP_KERNEL); 296 pca955x = kzalloc(sizeof(*pca955x), GFP_KERNEL);
286 if (!pca955x) 297 if (!pca955x)
287 return -ENOMEM; 298 return -ENOMEM;
288 299
300 pca955x->leds = kzalloc(sizeof(*pca955x_led) * chip->bits, GFP_KERNEL);
301 if (!pca955x->leds) {
302 err = -ENOMEM;
303 goto exit_nomem;
304 }
305
289 i2c_set_clientdata(client, pca955x); 306 i2c_set_clientdata(client, pca955x);
290 307
308 mutex_init(&pca955x->lock);
309 pca955x->client = client;
310 pca955x->chipdef = chip;
311
291 for (i = 0; i < chip->bits; i++) { 312 for (i = 0; i < chip->bits; i++) {
292 pca955x[i].chipdef = chip; 313 pca955x_led = &pca955x->leds[i];
293 pca955x[i].client = client; 314 pca955x_led->led_num = i;
294 pca955x[i].led_num = i; 315 pca955x_led->pca955x = pca955x;
295 316
296 /* Platform data can specify LED names and default triggers */ 317 /* Platform data can specify LED names and default triggers */
297 if (pdata) { 318 if (pdata) {
298 if (pdata->leds[i].name) 319 if (pdata->leds[i].name)
299 snprintf(pca955x[i].name, 320 snprintf(pca955x_led->name,
300 sizeof(pca955x[i].name), "pca955x:%s", 321 sizeof(pca955x_led->name), "pca955x:%s",
301 pdata->leds[i].name); 322 pdata->leds[i].name);
302 if (pdata->leds[i].default_trigger) 323 if (pdata->leds[i].default_trigger)
303 pca955x[i].led_cdev.default_trigger = 324 pca955x_led->led_cdev.default_trigger =
304 pdata->leds[i].default_trigger; 325 pdata->leds[i].default_trigger;
305 } else { 326 } else {
306 snprintf(pca955x[i].name, sizeof(pca955x[i].name), 327 snprintf(pca955x_led->name, sizeof(pca955x_led->name),
307 "pca955x:%d", i); 328 "pca955x:%d", i);
308 } 329 }
309 330
310 spin_lock_init(&pca955x[i].lock); 331 pca955x_led->led_cdev.name = pca955x_led->name;
311 332 pca955x_led->led_cdev.brightness_set = pca955x_led_set;
312 pca955x[i].led_cdev.name = pca955x[i].name;
313 pca955x[i].led_cdev.brightness_set = pca955x_led_set;
314 333
315 INIT_WORK(&pca955x[i].work, pca955x_led_work); 334 INIT_WORK(&pca955x_led->work, pca955x_led_work);
316 335
317 err = led_classdev_register(&client->dev, &pca955x[i].led_cdev); 336 err = led_classdev_register(&client->dev,
337 &pca955x_led->led_cdev);
318 if (err < 0) 338 if (err < 0)
319 goto exit; 339 goto exit;
320 } 340 }
@@ -337,10 +357,12 @@ static int __devinit pca955x_probe(struct i2c_client *client,
337 357
338exit: 358exit:
339 while (i--) { 359 while (i--) {
340 led_classdev_unregister(&pca955x[i].led_cdev); 360 led_classdev_unregister(&pca955x->leds[i].led_cdev);
341 cancel_work_sync(&pca955x[i].work); 361 cancel_work_sync(&pca955x->leds[i].work);
342 } 362 }
343 363
364 kfree(pca955x->leds);
365exit_nomem:
344 kfree(pca955x); 366 kfree(pca955x);
345 367
346 return err; 368 return err;
@@ -348,14 +370,15 @@ exit:
348 370
349static int __devexit pca955x_remove(struct i2c_client *client) 371static int __devexit pca955x_remove(struct i2c_client *client)
350{ 372{
351 struct pca955x_led *pca955x = i2c_get_clientdata(client); 373 struct pca955x *pca955x = i2c_get_clientdata(client);
352 int i; 374 int i;
353 375
354 for (i = 0; i < pca955x->chipdef->bits; i++) { 376 for (i = 0; i < pca955x->chipdef->bits; i++) {
355 led_classdev_unregister(&pca955x[i].led_cdev); 377 led_classdev_unregister(&pca955x->leds[i].led_cdev);
356 cancel_work_sync(&pca955x[i].work); 378 cancel_work_sync(&pca955x->leds[i].work);
357 } 379 }
358 380
381 kfree(pca955x->leds);
359 kfree(pca955x); 382 kfree(pca955x);
360 383
361 return 0; 384 return 0;
diff --git a/drivers/leds/ledtrig-backlight.c b/drivers/leds/ledtrig-backlight.c
index 2b513a2ad7de..e2726867c5d4 100644
--- a/drivers/leds/ledtrig-backlight.c
+++ b/drivers/leds/ledtrig-backlight.c
@@ -120,6 +120,7 @@ static void bl_trig_activate(struct led_classdev *led)
120 ret = fb_register_client(&n->notifier); 120 ret = fb_register_client(&n->notifier);
121 if (ret) 121 if (ret)
122 dev_err(led->dev, "unable to register backlight trigger\n"); 122 dev_err(led->dev, "unable to register backlight trigger\n");
123 led->activated = true;
123 124
124 return; 125 return;
125 126
@@ -133,10 +134,11 @@ static void bl_trig_deactivate(struct led_classdev *led)
133 struct bl_trig_notifier *n = 134 struct bl_trig_notifier *n =
134 (struct bl_trig_notifier *) led->trigger_data; 135 (struct bl_trig_notifier *) led->trigger_data;
135 136
136 if (n) { 137 if (led->activated) {
137 device_remove_file(led->dev, &dev_attr_inverted); 138 device_remove_file(led->dev, &dev_attr_inverted);
138 fb_unregister_client(&n->notifier); 139 fb_unregister_client(&n->notifier);
139 kfree(n); 140 kfree(n);
141 led->activated = false;
140 } 142 }
141} 143}
142 144
diff --git a/drivers/leds/ledtrig-gpio.c b/drivers/leds/ledtrig-gpio.c
index ecc4bf3f37a9..f057c101b896 100644
--- a/drivers/leds/ledtrig-gpio.c
+++ b/drivers/leds/ledtrig-gpio.c
@@ -200,6 +200,7 @@ static void gpio_trig_activate(struct led_classdev *led)
200 gpio_data->led = led; 200 gpio_data->led = led;
201 led->trigger_data = gpio_data; 201 led->trigger_data = gpio_data;
202 INIT_WORK(&gpio_data->work, gpio_trig_work); 202 INIT_WORK(&gpio_data->work, gpio_trig_work);
203 led->activated = true;
203 204
204 return; 205 return;
205 206
@@ -217,7 +218,7 @@ static void gpio_trig_deactivate(struct led_classdev *led)
217{ 218{
218 struct gpio_trig_data *gpio_data = led->trigger_data; 219 struct gpio_trig_data *gpio_data = led->trigger_data;
219 220
220 if (gpio_data) { 221 if (led->activated) {
221 device_remove_file(led->dev, &dev_attr_gpio); 222 device_remove_file(led->dev, &dev_attr_gpio);
222 device_remove_file(led->dev, &dev_attr_inverted); 223 device_remove_file(led->dev, &dev_attr_inverted);
223 device_remove_file(led->dev, &dev_attr_desired_brightness); 224 device_remove_file(led->dev, &dev_attr_desired_brightness);
@@ -225,6 +226,7 @@ static void gpio_trig_deactivate(struct led_classdev *led)
225 if (gpio_data->gpio != 0) 226 if (gpio_data->gpio != 0)
226 free_irq(gpio_to_irq(gpio_data->gpio), led); 227 free_irq(gpio_to_irq(gpio_data->gpio), led);
227 kfree(gpio_data); 228 kfree(gpio_data);
229 led->activated = false;
228 } 230 }
229} 231}
230 232
diff --git a/drivers/leds/ledtrig-heartbeat.c b/drivers/leds/ledtrig-heartbeat.c
index 759c0bba4a8f..41dc76db4311 100644
--- a/drivers/leds/ledtrig-heartbeat.c
+++ b/drivers/leds/ledtrig-heartbeat.c
@@ -18,6 +18,7 @@
18#include <linux/timer.h> 18#include <linux/timer.h>
19#include <linux/sched.h> 19#include <linux/sched.h>
20#include <linux/leds.h> 20#include <linux/leds.h>
21#include <linux/reboot.h>
21#include "leds.h" 22#include "leds.h"
22 23
23struct heartbeat_trig_data { 24struct heartbeat_trig_data {
@@ -83,15 +84,17 @@ static void heartbeat_trig_activate(struct led_classdev *led_cdev)
83 led_heartbeat_function, (unsigned long) led_cdev); 84 led_heartbeat_function, (unsigned long) led_cdev);
84 heartbeat_data->phase = 0; 85 heartbeat_data->phase = 0;
85 led_heartbeat_function(heartbeat_data->timer.data); 86 led_heartbeat_function(heartbeat_data->timer.data);
87 led_cdev->activated = true;
86} 88}
87 89
88static void heartbeat_trig_deactivate(struct led_classdev *led_cdev) 90static void heartbeat_trig_deactivate(struct led_classdev *led_cdev)
89{ 91{
90 struct heartbeat_trig_data *heartbeat_data = led_cdev->trigger_data; 92 struct heartbeat_trig_data *heartbeat_data = led_cdev->trigger_data;
91 93
92 if (heartbeat_data) { 94 if (led_cdev->activated) {
93 del_timer_sync(&heartbeat_data->timer); 95 del_timer_sync(&heartbeat_data->timer);
94 kfree(heartbeat_data); 96 kfree(heartbeat_data);
97 led_cdev->activated = false;
95 } 98 }
96} 99}
97 100
@@ -101,13 +104,38 @@ static struct led_trigger heartbeat_led_trigger = {
101 .deactivate = heartbeat_trig_deactivate, 104 .deactivate = heartbeat_trig_deactivate,
102}; 105};
103 106
107static int heartbeat_reboot_notifier(struct notifier_block *nb,
108 unsigned long code, void *unused)
109{
110 led_trigger_unregister(&heartbeat_led_trigger);
111 return NOTIFY_DONE;
112}
113
114static struct notifier_block heartbeat_reboot_nb = {
115 .notifier_call = heartbeat_reboot_notifier,
116};
117
118static struct notifier_block heartbeat_panic_nb = {
119 .notifier_call = heartbeat_reboot_notifier,
120};
121
104static int __init heartbeat_trig_init(void) 122static int __init heartbeat_trig_init(void)
105{ 123{
106 return led_trigger_register(&heartbeat_led_trigger); 124 int rc = led_trigger_register(&heartbeat_led_trigger);
125
126 if (!rc) {
127 atomic_notifier_chain_register(&panic_notifier_list,
128 &heartbeat_panic_nb);
129 register_reboot_notifier(&heartbeat_reboot_nb);
130 }
131 return rc;
107} 132}
108 133
109static void __exit heartbeat_trig_exit(void) 134static void __exit heartbeat_trig_exit(void)
110{ 135{
136 unregister_reboot_notifier(&heartbeat_reboot_nb);
137 atomic_notifier_chain_unregister(&panic_notifier_list,
138 &heartbeat_panic_nb);
111 led_trigger_unregister(&heartbeat_led_trigger); 139 led_trigger_unregister(&heartbeat_led_trigger);
112} 140}
113 141
diff --git a/drivers/leds/ledtrig-timer.c b/drivers/leds/ledtrig-timer.c
index 328c64c0841c..9010f7abaf2c 100644
--- a/drivers/leds/ledtrig-timer.c
+++ b/drivers/leds/ledtrig-timer.c
@@ -31,21 +31,17 @@ static ssize_t led_delay_on_store(struct device *dev,
31 struct device_attribute *attr, const char *buf, size_t size) 31 struct device_attribute *attr, const char *buf, size_t size)
32{ 32{
33 struct led_classdev *led_cdev = dev_get_drvdata(dev); 33 struct led_classdev *led_cdev = dev_get_drvdata(dev);
34 int ret = -EINVAL; 34 unsigned long state;
35 char *after; 35 ssize_t ret = -EINVAL;
36 unsigned long state = simple_strtoul(buf, &after, 10); 36
37 size_t count = after - buf; 37 ret = kstrtoul(buf, 10, &state);
38 38 if (ret)
39 if (isspace(*after)) 39 return ret;
40 count++;
41
42 if (count == size) {
43 led_blink_set(led_cdev, &state, &led_cdev->blink_delay_off);
44 led_cdev->blink_delay_on = state;
45 ret = count;
46 }
47 40
48 return ret; 41 led_blink_set(led_cdev, &state, &led_cdev->blink_delay_off);
42 led_cdev->blink_delay_on = state;
43
44 return size;
49} 45}
50 46
51static ssize_t led_delay_off_show(struct device *dev, 47static ssize_t led_delay_off_show(struct device *dev,
@@ -60,21 +56,17 @@ static ssize_t led_delay_off_store(struct device *dev,
60 struct device_attribute *attr, const char *buf, size_t size) 56 struct device_attribute *attr, const char *buf, size_t size)
61{ 57{
62 struct led_classdev *led_cdev = dev_get_drvdata(dev); 58 struct led_classdev *led_cdev = dev_get_drvdata(dev);
63 int ret = -EINVAL; 59 unsigned long state;
64 char *after; 60 ssize_t ret = -EINVAL;
65 unsigned long state = simple_strtoul(buf, &after, 10);
66 size_t count = after - buf;
67
68 if (isspace(*after))
69 count++;
70
71 if (count == size) {
72 led_blink_set(led_cdev, &led_cdev->blink_delay_on, &state);
73 led_cdev->blink_delay_off = state;
74 ret = count;
75 }
76 61
77 return ret; 62 ret = kstrtoul(buf, 10, &state);
63 if (ret)
64 return ret;
65
66 led_blink_set(led_cdev, &led_cdev->blink_delay_on, &state);
67 led_cdev->blink_delay_off = state;
68
69 return size;
78} 70}
79 71
80static DEVICE_ATTR(delay_on, 0644, led_delay_on_show, led_delay_on_store); 72static DEVICE_ATTR(delay_on, 0644, led_delay_on_show, led_delay_on_store);
@@ -95,8 +87,7 @@ static void timer_trig_activate(struct led_classdev *led_cdev)
95 87
96 led_blink_set(led_cdev, &led_cdev->blink_delay_on, 88 led_blink_set(led_cdev, &led_cdev->blink_delay_on,
97 &led_cdev->blink_delay_off); 89 &led_cdev->blink_delay_off);
98 90 led_cdev->activated = true;
99 led_cdev->trigger_data = (void *)1;
100 91
101 return; 92 return;
102 93
@@ -106,9 +97,10 @@ err_out_delayon:
106 97
107static void timer_trig_deactivate(struct led_classdev *led_cdev) 98static void timer_trig_deactivate(struct led_classdev *led_cdev)
108{ 99{
109 if (led_cdev->trigger_data) { 100 if (led_cdev->activated) {
110 device_remove_file(led_cdev->dev, &dev_attr_delay_on); 101 device_remove_file(led_cdev->dev, &dev_attr_delay_on);
111 device_remove_file(led_cdev->dev, &dev_attr_delay_off); 102 device_remove_file(led_cdev->dev, &dev_attr_delay_off);
103 led_cdev->activated = false;
112 } 104 }
113 105
114 /* Stop blinking */ 106 /* Stop blinking */
diff --git a/drivers/leds/ledtrig-transient.c b/drivers/leds/ledtrig-transient.c
new file mode 100644
index 000000000000..83179f435e1e
--- /dev/null
+++ b/drivers/leds/ledtrig-transient.c
@@ -0,0 +1,237 @@
1/*
2 * LED Kernel Transient Trigger
3 *
4 * Copyright (C) 2012 Shuah Khan <shuahkhan@gmail.com>
5 *
6 * Based on Richard Purdie's ledtrig-timer.c and Atsushi Nemoto's
7 * ledtrig-heartbeat.c
8 * Design and use-case input from Jonas Bonn <jonas@southpole.se> and
9 * Neil Brown <neilb@suse.de>
10 *
11 * This program is free software; you can redistribute it and/or modify
12 * it under the terms of the GNU General Public License version 2 as
13 * published by the Free Software Foundation.
14 *
15 */
16/*
17 * Transient trigger allows one shot timer activation. Please refer to
18 * Documentation/leds/ledtrig-transient.txt for details
19*/
20
21#include <linux/module.h>
22#include <linux/kernel.h>
23#include <linux/init.h>
24#include <linux/device.h>
25#include <linux/slab.h>
26#include <linux/timer.h>
27#include <linux/leds.h>
28#include "leds.h"
29
30struct transient_trig_data {
31 int activate;
32 int state;
33 int restore_state;
34 unsigned long duration;
35 struct timer_list timer;
36};
37
38static void transient_timer_function(unsigned long data)
39{
40 struct led_classdev *led_cdev = (struct led_classdev *) data;
41 struct transient_trig_data *transient_data = led_cdev->trigger_data;
42
43 transient_data->activate = 0;
44 led_set_brightness(led_cdev, transient_data->restore_state);
45}
46
47static ssize_t transient_activate_show(struct device *dev,
48 struct device_attribute *attr, char *buf)
49{
50 struct led_classdev *led_cdev = dev_get_drvdata(dev);
51 struct transient_trig_data *transient_data = led_cdev->trigger_data;
52
53 return sprintf(buf, "%d\n", transient_data->activate);
54}
55
56static ssize_t transient_activate_store(struct device *dev,
57 struct device_attribute *attr, const char *buf, size_t size)
58{
59 struct led_classdev *led_cdev = dev_get_drvdata(dev);
60 struct transient_trig_data *transient_data = led_cdev->trigger_data;
61 unsigned long state;
62 ssize_t ret;
63
64 ret = kstrtoul(buf, 10, &state);
65 if (ret)
66 return ret;
67
68 if (state != 1 && state != 0)
69 return -EINVAL;
70
71 /* cancel the running timer */
72 if (state == 0 && transient_data->activate == 1) {
73 del_timer(&transient_data->timer);
74 transient_data->activate = state;
75 led_set_brightness(led_cdev, transient_data->restore_state);
76 return size;
77 }
78
79 /* start timer if there is no active timer */
80 if (state == 1 && transient_data->activate == 0 &&
81 transient_data->duration != 0) {
82 transient_data->activate = state;
83 led_set_brightness(led_cdev, transient_data->state);
84 transient_data->restore_state =
85 (transient_data->state == LED_FULL) ? LED_OFF : LED_FULL;
86 mod_timer(&transient_data->timer,
87 jiffies + transient_data->duration);
88 }
89
90 /* state == 0 && transient_data->activate == 0
91 timer is not active - just return */
92 /* state == 1 && transient_data->activate == 1
93 timer is already active - just return */
94
95 return size;
96}
97
98static ssize_t transient_duration_show(struct device *dev,
99 struct device_attribute *attr, char *buf)
100{
101 struct led_classdev *led_cdev = dev_get_drvdata(dev);
102 struct transient_trig_data *transient_data = led_cdev->trigger_data;
103
104 return sprintf(buf, "%lu\n", transient_data->duration);
105}
106
107static ssize_t transient_duration_store(struct device *dev,
108 struct device_attribute *attr, const char *buf, size_t size)
109{
110 struct led_classdev *led_cdev = dev_get_drvdata(dev);
111 struct transient_trig_data *transient_data = led_cdev->trigger_data;
112 unsigned long state;
113 ssize_t ret;
114
115 ret = kstrtoul(buf, 10, &state);
116 if (ret)
117 return ret;
118
119 transient_data->duration = state;
120 return size;
121}
122
123static ssize_t transient_state_show(struct device *dev,
124 struct device_attribute *attr, char *buf)
125{
126 struct led_classdev *led_cdev = dev_get_drvdata(dev);
127 struct transient_trig_data *transient_data = led_cdev->trigger_data;
128 int state;
129
130 state = (transient_data->state == LED_FULL) ? 1 : 0;
131 return sprintf(buf, "%d\n", state);
132}
133
134static ssize_t transient_state_store(struct device *dev,
135 struct device_attribute *attr, const char *buf, size_t size)
136{
137 struct led_classdev *led_cdev = dev_get_drvdata(dev);
138 struct transient_trig_data *transient_data = led_cdev->trigger_data;
139 unsigned long state;
140 ssize_t ret;
141
142 ret = kstrtoul(buf, 10, &state);
143 if (ret)
144 return ret;
145
146 if (state != 1 && state != 0)
147 return -EINVAL;
148
149 transient_data->state = (state == 1) ? LED_FULL : LED_OFF;
150 return size;
151}
152
153static DEVICE_ATTR(activate, 0644, transient_activate_show,
154 transient_activate_store);
155static DEVICE_ATTR(duration, 0644, transient_duration_show,
156 transient_duration_store);
157static DEVICE_ATTR(state, 0644, transient_state_show, transient_state_store);
158
159static void transient_trig_activate(struct led_classdev *led_cdev)
160{
161 int rc;
162 struct transient_trig_data *tdata;
163
164 tdata = kzalloc(sizeof(struct transient_trig_data), GFP_KERNEL);
165 if (!tdata) {
166 dev_err(led_cdev->dev,
167 "unable to allocate transient trigger\n");
168 return;
169 }
170 led_cdev->trigger_data = tdata;
171
172 rc = device_create_file(led_cdev->dev, &dev_attr_activate);
173 if (rc)
174 goto err_out;
175
176 rc = device_create_file(led_cdev->dev, &dev_attr_duration);
177 if (rc)
178 goto err_out_duration;
179
180 rc = device_create_file(led_cdev->dev, &dev_attr_state);
181 if (rc)
182 goto err_out_state;
183
184 setup_timer(&tdata->timer, transient_timer_function,
185 (unsigned long) led_cdev);
186 led_cdev->activated = true;
187
188 return;
189
190err_out_state:
191 device_remove_file(led_cdev->dev, &dev_attr_duration);
192err_out_duration:
193 device_remove_file(led_cdev->dev, &dev_attr_activate);
194err_out:
195 dev_err(led_cdev->dev, "unable to register transient trigger\n");
196 led_cdev->trigger_data = NULL;
197 kfree(tdata);
198}
199
200static void transient_trig_deactivate(struct led_classdev *led_cdev)
201{
202 struct transient_trig_data *transient_data = led_cdev->trigger_data;
203
204 if (led_cdev->activated) {
205 del_timer_sync(&transient_data->timer);
206 led_set_brightness(led_cdev, transient_data->restore_state);
207 device_remove_file(led_cdev->dev, &dev_attr_activate);
208 device_remove_file(led_cdev->dev, &dev_attr_duration);
209 device_remove_file(led_cdev->dev, &dev_attr_state);
210 led_cdev->trigger_data = NULL;
211 led_cdev->activated = false;
212 kfree(transient_data);
213 }
214}
215
216static struct led_trigger transient_trigger = {
217 .name = "transient",
218 .activate = transient_trig_activate,
219 .deactivate = transient_trig_deactivate,
220};
221
222static int __init transient_trig_init(void)
223{
224 return led_trigger_register(&transient_trigger);
225}
226
227static void __exit transient_trig_exit(void)
228{
229 led_trigger_unregister(&transient_trigger);
230}
231
232module_init(transient_trig_init);
233module_exit(transient_trig_exit);
234
235MODULE_AUTHOR("Shuah Khan <shuahkhan@gmail.com>");
236MODULE_DESCRIPTION("Transient LED trigger");
237MODULE_LICENSE("GPL");
diff --git a/drivers/platform/x86/toshiba_acpi.c b/drivers/platform/x86/toshiba_acpi.c
index ee79ce64d9df..57787d87d9a4 100644
--- a/drivers/platform/x86/toshiba_acpi.c
+++ b/drivers/platform/x86/toshiba_acpi.c
@@ -1104,6 +1104,7 @@ static int __devinit toshiba_acpi_add(struct acpi_device *acpi_dev)
1104 1104
1105 mutex_init(&dev->mutex); 1105 mutex_init(&dev->mutex);
1106 1106
1107 memset(&props, 0, sizeof(props));
1107 props.type = BACKLIGHT_PLATFORM; 1108 props.type = BACKLIGHT_PLATFORM;
1108 props.max_brightness = HCI_LCD_BRIGHTNESS_LEVELS - 1; 1109 props.max_brightness = HCI_LCD_BRIGHTNESS_LEVELS - 1;
1109 dev->backlight_dev = backlight_device_register("toshiba", 1110 dev->backlight_dev = backlight_device_register("toshiba",
diff --git a/drivers/rtc/Kconfig b/drivers/rtc/Kconfig
index 4161bfe462cd..08cbdb900a18 100644
--- a/drivers/rtc/Kconfig
+++ b/drivers/rtc/Kconfig
@@ -620,27 +620,6 @@ config RTC_DRV_MSM6242
620 This driver can also be built as a module. If so, the module 620 This driver can also be built as a module. If so, the module
621 will be called rtc-msm6242. 621 will be called rtc-msm6242.
622 622
623config RTC_DRV_IMXDI
624 tristate "Freescale IMX DryIce Real Time Clock"
625 depends on ARCH_MX25
626 depends on RTC_CLASS
627 help
628 Support for Freescale IMX DryIce RTC
629
630 This driver can also be built as a module, if so, the module
631 will be called "rtc-imxdi".
632
633config RTC_MXC
634 tristate "Freescale MXC Real Time Clock"
635 depends on ARCH_MXC
636 depends on RTC_CLASS
637 help
638 If you say yes here you get support for the Freescale MXC
639 RTC module.
640
641 This driver can also be built as a module, if so, the module
642 will be called "rtc-mxc".
643
644config RTC_DRV_BQ4802 623config RTC_DRV_BQ4802
645 tristate "TI BQ4802" 624 tristate "TI BQ4802"
646 help 625 help
@@ -738,6 +717,16 @@ config RTC_DRV_DAVINCI
738 This driver can also be built as a module. If so, the module 717 This driver can also be built as a module. If so, the module
739 will be called rtc-davinci. 718 will be called rtc-davinci.
740 719
720config RTC_DRV_IMXDI
721 tristate "Freescale IMX DryIce Real Time Clock"
722 depends on SOC_IMX25
723 depends on RTC_CLASS
724 help
725 Support for Freescale IMX DryIce RTC
726
727 This driver can also be built as a module, if so, the module
728 will be called "rtc-imxdi".
729
741config RTC_DRV_OMAP 730config RTC_DRV_OMAP
742 tristate "TI OMAP1" 731 tristate "TI OMAP1"
743 depends on ARCH_OMAP15XX || ARCH_OMAP16XX || ARCH_OMAP730 || ARCH_DAVINCI_DA8XX 732 depends on ARCH_OMAP15XX || ARCH_OMAP16XX || ARCH_OMAP730 || ARCH_DAVINCI_DA8XX
@@ -1087,4 +1076,15 @@ config RTC_DRV_LOONGSON1
1087 This driver can also be built as a module. If so, the module 1076 This driver can also be built as a module. If so, the module
1088 will be called rtc-ls1x. 1077 will be called rtc-ls1x.
1089 1078
1079config RTC_DRV_MXC
1080 tristate "Freescale MXC Real Time Clock"
1081 depends on ARCH_MXC
1082 depends on RTC_CLASS
1083 help
1084 If you say yes here you get support for the Freescale MXC
1085 RTC module.
1086
1087 This driver can also be built as a module, if so, the module
1088 will be called "rtc-mxc".
1089
1090endif # RTC_CLASS 1090endif # RTC_CLASS
diff --git a/drivers/rtc/Makefile b/drivers/rtc/Makefile
index 727ae7786e6c..2973921c30d8 100644
--- a/drivers/rtc/Makefile
+++ b/drivers/rtc/Makefile
@@ -61,7 +61,7 @@ obj-$(CONFIG_RTC_DRV_M41T94) += rtc-m41t94.o
61obj-$(CONFIG_RTC_DRV_M48T35) += rtc-m48t35.o 61obj-$(CONFIG_RTC_DRV_M48T35) += rtc-m48t35.o
62obj-$(CONFIG_RTC_DRV_M48T59) += rtc-m48t59.o 62obj-$(CONFIG_RTC_DRV_M48T59) += rtc-m48t59.o
63obj-$(CONFIG_RTC_DRV_M48T86) += rtc-m48t86.o 63obj-$(CONFIG_RTC_DRV_M48T86) += rtc-m48t86.o
64obj-$(CONFIG_RTC_MXC) += rtc-mxc.o 64obj-$(CONFIG_RTC_DRV_MXC) += rtc-mxc.o
65obj-$(CONFIG_RTC_DRV_MAX6900) += rtc-max6900.o 65obj-$(CONFIG_RTC_DRV_MAX6900) += rtc-max6900.o
66obj-$(CONFIG_RTC_DRV_MAX8925) += rtc-max8925.o 66obj-$(CONFIG_RTC_DRV_MAX8925) += rtc-max8925.o
67obj-$(CONFIG_RTC_DRV_MAX8998) += rtc-max8998.o 67obj-$(CONFIG_RTC_DRV_MAX8998) += rtc-max8998.o
diff --git a/drivers/rtc/rtc-ds1307.c b/drivers/rtc/rtc-ds1307.c
index c293d0cdb104..836710ce750e 100644
--- a/drivers/rtc/rtc-ds1307.c
+++ b/drivers/rtc/rtc-ds1307.c
@@ -17,8 +17,7 @@
17#include <linux/string.h> 17#include <linux/string.h>
18#include <linux/rtc.h> 18#include <linux/rtc.h>
19#include <linux/bcd.h> 19#include <linux/bcd.h>
20 20#include <linux/rtc/ds1307.h>
21
22 21
23/* 22/*
24 * We can't determine type by probing, but if we expect pre-Linux code 23 * We can't determine type by probing, but if we expect pre-Linux code
@@ -92,7 +91,8 @@ enum ds_type {
92# define DS1337_BIT_A2I 0x02 91# define DS1337_BIT_A2I 0x02
93# define DS1337_BIT_A1I 0x01 92# define DS1337_BIT_A1I 0x01
94#define DS1339_REG_ALARM1_SECS 0x07 93#define DS1339_REG_ALARM1_SECS 0x07
95#define DS1339_REG_TRICKLE 0x10 94
95#define DS13XX_TRICKLE_CHARGER_MAGIC 0xa0
96 96
97#define RX8025_REG_CTRL1 0x0e 97#define RX8025_REG_CTRL1 0x0e
98# define RX8025_BIT_2412 0x20 98# define RX8025_BIT_2412 0x20
@@ -124,6 +124,7 @@ struct chip_desc {
124 unsigned alarm:1; 124 unsigned alarm:1;
125 u16 nvram_offset; 125 u16 nvram_offset;
126 u16 nvram_size; 126 u16 nvram_size;
127 u16 trickle_charger_reg;
127}; 128};
128 129
129static const struct chip_desc chips[last_ds_type] = { 130static const struct chip_desc chips[last_ds_type] = {
@@ -140,6 +141,13 @@ static const struct chip_desc chips[last_ds_type] = {
140 }, 141 },
141 [ds_1339] = { 142 [ds_1339] = {
142 .alarm = 1, 143 .alarm = 1,
144 .trickle_charger_reg = 0x10,
145 },
146 [ds_1340] = {
147 .trickle_charger_reg = 0x08,
148 },
149 [ds_1388] = {
150 .trickle_charger_reg = 0x0a,
143 }, 151 },
144 [ds_3231] = { 152 [ds_3231] = {
145 .alarm = 1, 153 .alarm = 1,
@@ -619,6 +627,7 @@ static int __devinit ds1307_probe(struct i2c_client *client,
619 struct i2c_adapter *adapter = to_i2c_adapter(client->dev.parent); 627 struct i2c_adapter *adapter = to_i2c_adapter(client->dev.parent);
620 int want_irq = false; 628 int want_irq = false;
621 unsigned char *buf; 629 unsigned char *buf;
630 struct ds1307_platform_data *pdata = client->dev.platform_data;
622 static const int bbsqi_bitpos[] = { 631 static const int bbsqi_bitpos[] = {
623 [ds_1337] = 0, 632 [ds_1337] = 0,
624 [ds_1339] = DS1339_BIT_BBSQI, 633 [ds_1339] = DS1339_BIT_BBSQI,
@@ -637,7 +646,10 @@ static int __devinit ds1307_probe(struct i2c_client *client,
637 646
638 ds1307->client = client; 647 ds1307->client = client;
639 ds1307->type = id->driver_data; 648 ds1307->type = id->driver_data;
640 ds1307->offset = 0; 649
650 if (pdata && pdata->trickle_charger_setup && chip->trickle_charger_reg)
651 i2c_smbus_write_byte_data(client, chip->trickle_charger_reg,
652 DS13XX_TRICKLE_CHARGER_MAGIC | pdata->trickle_charger_setup);
641 653
642 buf = ds1307->regs; 654 buf = ds1307->regs;
643 if (i2c_check_functionality(adapter, I2C_FUNC_SMBUS_I2C_BLOCK)) { 655 if (i2c_check_functionality(adapter, I2C_FUNC_SMBUS_I2C_BLOCK)) {
diff --git a/drivers/rtc/rtc-ep93xx.c b/drivers/rtc/rtc-ep93xx.c
index 14a42a1edc66..9602278ff988 100644
--- a/drivers/rtc/rtc-ep93xx.c
+++ b/drivers/rtc/rtc-ep93xx.c
@@ -127,7 +127,7 @@ static const struct attribute_group ep93xx_rtc_sysfs_files = {
127 .attrs = ep93xx_rtc_attrs, 127 .attrs = ep93xx_rtc_attrs,
128}; 128};
129 129
130static int __init ep93xx_rtc_probe(struct platform_device *pdev) 130static int __devinit ep93xx_rtc_probe(struct platform_device *pdev)
131{ 131{
132 struct ep93xx_rtc *ep93xx_rtc; 132 struct ep93xx_rtc *ep93xx_rtc;
133 struct resource *res; 133 struct resource *res;
@@ -174,7 +174,7 @@ exit:
174 return err; 174 return err;
175} 175}
176 176
177static int __exit ep93xx_rtc_remove(struct platform_device *pdev) 177static int __devexit ep93xx_rtc_remove(struct platform_device *pdev)
178{ 178{
179 struct ep93xx_rtc *ep93xx_rtc = platform_get_drvdata(pdev); 179 struct ep93xx_rtc *ep93xx_rtc = platform_get_drvdata(pdev);
180 180
@@ -186,31 +186,19 @@ static int __exit ep93xx_rtc_remove(struct platform_device *pdev)
186 return 0; 186 return 0;
187} 187}
188 188
189/* work with hotplug and coldplug */
190MODULE_ALIAS("platform:ep93xx-rtc");
191
192static struct platform_driver ep93xx_rtc_driver = { 189static struct platform_driver ep93xx_rtc_driver = {
193 .driver = { 190 .driver = {
194 .name = "ep93xx-rtc", 191 .name = "ep93xx-rtc",
195 .owner = THIS_MODULE, 192 .owner = THIS_MODULE,
196 }, 193 },
197 .remove = __exit_p(ep93xx_rtc_remove), 194 .probe = ep93xx_rtc_probe,
195 .remove = __devexit_p(ep93xx_rtc_remove),
198}; 196};
199 197
200static int __init ep93xx_rtc_init(void) 198module_platform_driver(ep93xx_rtc_driver);
201{
202 return platform_driver_probe(&ep93xx_rtc_driver, ep93xx_rtc_probe);
203}
204
205static void __exit ep93xx_rtc_exit(void)
206{
207 platform_driver_unregister(&ep93xx_rtc_driver);
208}
209 199
210MODULE_AUTHOR("Alessandro Zummo <a.zummo@towertech.it>"); 200MODULE_AUTHOR("Alessandro Zummo <a.zummo@towertech.it>");
211MODULE_DESCRIPTION("EP93XX RTC driver"); 201MODULE_DESCRIPTION("EP93XX RTC driver");
212MODULE_LICENSE("GPL"); 202MODULE_LICENSE("GPL");
213MODULE_VERSION(DRV_VERSION); 203MODULE_VERSION(DRV_VERSION);
214 204MODULE_ALIAS("platform:ep93xx-rtc");
215module_init(ep93xx_rtc_init);
216module_exit(ep93xx_rtc_exit);
diff --git a/drivers/rtc/rtc-lpc32xx.c b/drivers/rtc/rtc-lpc32xx.c
index 63c72189c64b..d5218553741f 100644
--- a/drivers/rtc/rtc-lpc32xx.c
+++ b/drivers/rtc/rtc-lpc32xx.c
@@ -19,6 +19,7 @@
19#include <linux/rtc.h> 19#include <linux/rtc.h>
20#include <linux/slab.h> 20#include <linux/slab.h>
21#include <linux/io.h> 21#include <linux/io.h>
22#include <linux/of.h>
22 23
23/* 24/*
24 * Clock and Power control register offsets 25 * Clock and Power control register offsets
@@ -386,13 +387,22 @@ static const struct dev_pm_ops lpc32xx_rtc_pm_ops = {
386#define LPC32XX_RTC_PM_OPS NULL 387#define LPC32XX_RTC_PM_OPS NULL
387#endif 388#endif
388 389
390#ifdef CONFIG_OF
391static const struct of_device_id lpc32xx_rtc_match[] = {
392 { .compatible = "nxp,lpc3220-rtc" },
393 { }
394};
395MODULE_DEVICE_TABLE(of, lpc32xx_rtc_match);
396#endif
397
389static struct platform_driver lpc32xx_rtc_driver = { 398static struct platform_driver lpc32xx_rtc_driver = {
390 .probe = lpc32xx_rtc_probe, 399 .probe = lpc32xx_rtc_probe,
391 .remove = __devexit_p(lpc32xx_rtc_remove), 400 .remove = __devexit_p(lpc32xx_rtc_remove),
392 .driver = { 401 .driver = {
393 .name = RTC_NAME, 402 .name = RTC_NAME,
394 .owner = THIS_MODULE, 403 .owner = THIS_MODULE,
395 .pm = LPC32XX_RTC_PM_OPS 404 .pm = LPC32XX_RTC_PM_OPS,
405 .of_match_table = of_match_ptr(lpc32xx_rtc_match),
396 }, 406 },
397}; 407};
398 408
diff --git a/drivers/rtc/rtc-m41t93.c b/drivers/rtc/rtc-m41t93.c
index 10f1c29436ec..efab3d48cb15 100644
--- a/drivers/rtc/rtc-m41t93.c
+++ b/drivers/rtc/rtc-m41t93.c
@@ -48,6 +48,7 @@ static inline int m41t93_set_reg(struct spi_device *spi, u8 addr, u8 data)
48static int m41t93_set_time(struct device *dev, struct rtc_time *tm) 48static int m41t93_set_time(struct device *dev, struct rtc_time *tm)
49{ 49{
50 struct spi_device *spi = to_spi_device(dev); 50 struct spi_device *spi = to_spi_device(dev);
51 int tmp;
51 u8 buf[9] = {0x80}; /* write cmd + 8 data bytes */ 52 u8 buf[9] = {0x80}; /* write cmd + 8 data bytes */
52 u8 * const data = &buf[1]; /* ptr to first data byte */ 53 u8 * const data = &buf[1]; /* ptr to first data byte */
53 54
@@ -62,6 +63,30 @@ static int m41t93_set_time(struct device *dev, struct rtc_time *tm)
62 return -EINVAL; 63 return -EINVAL;
63 } 64 }
64 65
66 tmp = spi_w8r8(spi, M41T93_REG_FLAGS);
67 if (tmp < 0)
68 return tmp;
69
70 if (tmp & M41T93_FLAG_OF) {
71 dev_warn(&spi->dev, "OF bit is set, resetting.\n");
72 m41t93_set_reg(spi, M41T93_REG_FLAGS, tmp & ~M41T93_FLAG_OF);
73
74 tmp = spi_w8r8(spi, M41T93_REG_FLAGS);
75 if (tmp < 0) {
76 return tmp;
77 } else if (tmp & M41T93_FLAG_OF) {
78 /* OF cannot be immediately reset: oscillator has to be
79 * restarted. */
80 u8 reset_osc = buf[M41T93_REG_ST_SEC] | M41T93_FLAG_ST;
81
82 dev_warn(&spi->dev,
83 "OF bit is still set, kickstarting clock.\n");
84 m41t93_set_reg(spi, M41T93_REG_ST_SEC, reset_osc);
85 reset_osc &= ~M41T93_FLAG_ST;
86 m41t93_set_reg(spi, M41T93_REG_ST_SEC, reset_osc);
87 }
88 }
89
65 data[M41T93_REG_SSEC] = 0; 90 data[M41T93_REG_SSEC] = 0;
66 data[M41T93_REG_ST_SEC] = bin2bcd(tm->tm_sec); 91 data[M41T93_REG_ST_SEC] = bin2bcd(tm->tm_sec);
67 data[M41T93_REG_MIN] = bin2bcd(tm->tm_min); 92 data[M41T93_REG_MIN] = bin2bcd(tm->tm_min);
@@ -89,10 +114,7 @@ static int m41t93_get_time(struct device *dev, struct rtc_time *tm)
89 1. halt bit (HT) is set: the clock is running but update of readout 114 1. halt bit (HT) is set: the clock is running but update of readout
90 registers has been disabled due to power failure. This is normal 115 registers has been disabled due to power failure. This is normal
91 case after poweron. Time is valid after resetting HT bit. 116 case after poweron. Time is valid after resetting HT bit.
92 2. oscillator fail bit (OF) is set. Oscillator has be stopped and 117 2. oscillator fail bit (OF) is set: time is invalid.
93 time is invalid:
94 a) OF can be immeditely reset.
95 b) OF cannot be immediately reset: oscillator has to be restarted.
96 */ 118 */
97 tmp = spi_w8r8(spi, M41T93_REG_ALM_HOUR_HT); 119 tmp = spi_w8r8(spi, M41T93_REG_ALM_HOUR_HT);
98 if (tmp < 0) 120 if (tmp < 0)
@@ -110,21 +132,7 @@ static int m41t93_get_time(struct device *dev, struct rtc_time *tm)
110 132
111 if (tmp & M41T93_FLAG_OF) { 133 if (tmp & M41T93_FLAG_OF) {
112 ret = -EINVAL; 134 ret = -EINVAL;
113 dev_warn(&spi->dev, "OF bit is set, resetting.\n"); 135 dev_warn(&spi->dev, "OF bit is set, write time to restart.\n");
114 m41t93_set_reg(spi, M41T93_REG_FLAGS, tmp & ~M41T93_FLAG_OF);
115
116 tmp = spi_w8r8(spi, M41T93_REG_FLAGS);
117 if (tmp < 0)
118 return tmp;
119 else if (tmp & M41T93_FLAG_OF) {
120 u8 reset_osc = buf[M41T93_REG_ST_SEC] | M41T93_FLAG_ST;
121
122 dev_warn(&spi->dev,
123 "OF bit is still set, kickstarting clock.\n");
124 m41t93_set_reg(spi, M41T93_REG_ST_SEC, reset_osc);
125 reset_osc &= ~M41T93_FLAG_ST;
126 m41t93_set_reg(spi, M41T93_REG_ST_SEC, reset_osc);
127 }
128 } 136 }
129 137
130 if (tmp & M41T93_FLAG_BL) 138 if (tmp & M41T93_FLAG_BL)
diff --git a/drivers/rtc/rtc-pcf8563.c b/drivers/rtc/rtc-pcf8563.c
index bc0677de1996..97a3284bb7c6 100644
--- a/drivers/rtc/rtc-pcf8563.c
+++ b/drivers/rtc/rtc-pcf8563.c
@@ -64,6 +64,7 @@ struct pcf8563 {
64 * 1970...2069. 64 * 1970...2069.
65 */ 65 */
66 int c_polarity; /* 0: MO_C=1 means 19xx, otherwise MO_C=1 means 20xx */ 66 int c_polarity; /* 0: MO_C=1 means 19xx, otherwise MO_C=1 means 20xx */
67 int voltage_low; /* incicates if a low_voltage was detected */
67}; 68};
68 69
69/* 70/*
@@ -86,9 +87,11 @@ static int pcf8563_get_datetime(struct i2c_client *client, struct rtc_time *tm)
86 return -EIO; 87 return -EIO;
87 } 88 }
88 89
89 if (buf[PCF8563_REG_SC] & PCF8563_SC_LV) 90 if (buf[PCF8563_REG_SC] & PCF8563_SC_LV) {
91 pcf8563->voltage_low = 1;
90 dev_info(&client->dev, 92 dev_info(&client->dev,
91 "low voltage detected, date/time is not reliable.\n"); 93 "low voltage detected, date/time is not reliable.\n");
94 }
92 95
93 dev_dbg(&client->dev, 96 dev_dbg(&client->dev,
94 "%s: raw data is st1=%02x, st2=%02x, sec=%02x, min=%02x, hr=%02x, " 97 "%s: raw data is st1=%02x, st2=%02x, sec=%02x, min=%02x, hr=%02x, "
@@ -173,6 +176,44 @@ static int pcf8563_set_datetime(struct i2c_client *client, struct rtc_time *tm)
173 return 0; 176 return 0;
174} 177}
175 178
179#ifdef CONFIG_RTC_INTF_DEV
180static int pcf8563_rtc_ioctl(struct device *dev, unsigned int cmd, unsigned long arg)
181{
182 struct pcf8563 *pcf8563 = i2c_get_clientdata(to_i2c_client(dev));
183 struct rtc_time tm;
184
185 switch (cmd) {
186 case RTC_VL_READ:
187 if (pcf8563->voltage_low)
188 dev_info(dev, "low voltage detected, date/time is not reliable.\n");
189
190 if (copy_to_user((void __user *)arg, &pcf8563->voltage_low,
191 sizeof(int)))
192 return -EFAULT;
193 return 0;
194 case RTC_VL_CLR:
195 /*
196 * Clear the VL bit in the seconds register in case
197 * the time has not been set already (which would
198 * have cleared it). This does not really matter
199 * because of the cached voltage_low value but do it
200 * anyway for consistency.
201 */
202 if (pcf8563_get_datetime(to_i2c_client(dev), &tm))
203 pcf8563_set_datetime(to_i2c_client(dev), &tm);
204
205 /* Clear the cached value. */
206 pcf8563->voltage_low = 0;
207
208 return 0;
209 default:
210 return -ENOIOCTLCMD;
211 }
212}
213#else
214#define pcf8563_rtc_ioctl NULL
215#endif
216
176static int pcf8563_rtc_read_time(struct device *dev, struct rtc_time *tm) 217static int pcf8563_rtc_read_time(struct device *dev, struct rtc_time *tm)
177{ 218{
178 return pcf8563_get_datetime(to_i2c_client(dev), tm); 219 return pcf8563_get_datetime(to_i2c_client(dev), tm);
@@ -184,6 +225,7 @@ static int pcf8563_rtc_set_time(struct device *dev, struct rtc_time *tm)
184} 225}
185 226
186static const struct rtc_class_ops pcf8563_rtc_ops = { 227static const struct rtc_class_ops pcf8563_rtc_ops = {
228 .ioctl = pcf8563_rtc_ioctl,
187 .read_time = pcf8563_rtc_read_time, 229 .read_time = pcf8563_rtc_read_time,
188 .set_time = pcf8563_rtc_set_time, 230 .set_time = pcf8563_rtc_set_time,
189}; 231};
diff --git a/drivers/rtc/rtc-pl031.c b/drivers/rtc/rtc-pl031.c
index f027c063fb20..cc0533994f6e 100644
--- a/drivers/rtc/rtc-pl031.c
+++ b/drivers/rtc/rtc-pl031.c
@@ -220,17 +220,9 @@ static irqreturn_t pl031_interrupt(int irq, void *dev_id)
220 unsigned long events = 0; 220 unsigned long events = 0;
221 221
222 rtcmis = readl(ldata->base + RTC_MIS); 222 rtcmis = readl(ldata->base + RTC_MIS);
223 if (rtcmis) { 223 if (rtcmis & RTC_BIT_AI) {
224 writel(rtcmis, ldata->base + RTC_ICR); 224 writel(RTC_BIT_AI, ldata->base + RTC_ICR);
225 225 events |= (RTC_AF | RTC_IRQF);
226 if (rtcmis & RTC_BIT_AI)
227 events |= (RTC_AF | RTC_IRQF);
228
229 /* Timer interrupt is only available in ST variants */
230 if ((rtcmis & RTC_BIT_PI) &&
231 (ldata->hw_designer == AMBA_VENDOR_ST))
232 events |= (RTC_PF | RTC_IRQF);
233
234 rtc_update_irq(ldata->rtc, 1, events); 226 rtc_update_irq(ldata->rtc, 1, events);
235 227
236 return IRQ_HANDLED; 228 return IRQ_HANDLED;
diff --git a/drivers/rtc/rtc-s3c.c b/drivers/rtc/rtc-s3c.c
index 3f3a29752369..7e6af0b22f17 100644
--- a/drivers/rtc/rtc-s3c.c
+++ b/drivers/rtc/rtc-s3c.c
@@ -670,6 +670,7 @@ static int s3c_rtc_resume(struct platform_device *pdev)
670#define s3c_rtc_resume NULL 670#define s3c_rtc_resume NULL
671#endif 671#endif
672 672
673#ifdef CONFIG_OF
673static struct s3c_rtc_drv_data s3c_rtc_drv_data_array[] = { 674static struct s3c_rtc_drv_data s3c_rtc_drv_data_array[] = {
674 [TYPE_S3C2410] = { TYPE_S3C2410 }, 675 [TYPE_S3C2410] = { TYPE_S3C2410 },
675 [TYPE_S3C2416] = { TYPE_S3C2416 }, 676 [TYPE_S3C2416] = { TYPE_S3C2416 },
@@ -677,7 +678,6 @@ static struct s3c_rtc_drv_data s3c_rtc_drv_data_array[] = {
677 [TYPE_S3C64XX] = { TYPE_S3C64XX }, 678 [TYPE_S3C64XX] = { TYPE_S3C64XX },
678}; 679};
679 680
680#ifdef CONFIG_OF
681static const struct of_device_id s3c_rtc_dt_match[] = { 681static const struct of_device_id s3c_rtc_dt_match[] = {
682 { 682 {
683 .compatible = "samsung,s3c2410-rtc", 683 .compatible = "samsung,s3c2410-rtc",
diff --git a/drivers/rtc/rtc-spear.c b/drivers/rtc/rtc-spear.c
index e38da0dc4187..1f76320e545b 100644
--- a/drivers/rtc/rtc-spear.c
+++ b/drivers/rtc/rtc-spear.c
@@ -16,6 +16,7 @@
16#include <linux/io.h> 16#include <linux/io.h>
17#include <linux/irq.h> 17#include <linux/irq.h>
18#include <linux/module.h> 18#include <linux/module.h>
19#include <linux/of.h>
19#include <linux/platform_device.h> 20#include <linux/platform_device.h>
20#include <linux/rtc.h> 21#include <linux/rtc.h>
21#include <linux/slab.h> 22#include <linux/slab.h>
@@ -519,6 +520,14 @@ static void spear_rtc_shutdown(struct platform_device *pdev)
519 clk_disable(config->clk); 520 clk_disable(config->clk);
520} 521}
521 522
523#ifdef CONFIG_OF
524static const struct of_device_id spear_rtc_id_table[] = {
525 { .compatible = "st,spear600-rtc" },
526 {}
527};
528MODULE_DEVICE_TABLE(of, spear_rtc_id_table);
529#endif
530
522static struct platform_driver spear_rtc_driver = { 531static struct platform_driver spear_rtc_driver = {
523 .probe = spear_rtc_probe, 532 .probe = spear_rtc_probe,
524 .remove = __devexit_p(spear_rtc_remove), 533 .remove = __devexit_p(spear_rtc_remove),
@@ -527,6 +536,7 @@ static struct platform_driver spear_rtc_driver = {
527 .shutdown = spear_rtc_shutdown, 536 .shutdown = spear_rtc_shutdown,
528 .driver = { 537 .driver = {
529 .name = "rtc-spear", 538 .name = "rtc-spear",
539 .of_match_table = of_match_ptr(spear_rtc_id_table),
530 }, 540 },
531}; 541};
532 542
diff --git a/drivers/rtc/rtc-tegra.c b/drivers/rtc/rtc-tegra.c
index 75259fe38602..c006025cecc8 100644
--- a/drivers/rtc/rtc-tegra.c
+++ b/drivers/rtc/rtc-tegra.c
@@ -309,7 +309,8 @@ static int __devinit tegra_rtc_probe(struct platform_device *pdev)
309 struct resource *res; 309 struct resource *res;
310 int ret; 310 int ret;
311 311
312 info = kzalloc(sizeof(struct tegra_rtc_info), GFP_KERNEL); 312 info = devm_kzalloc(&pdev->dev, sizeof(struct tegra_rtc_info),
313 GFP_KERNEL);
313 if (!info) 314 if (!info)
314 return -ENOMEM; 315 return -ENOMEM;
315 316
@@ -317,29 +318,18 @@ static int __devinit tegra_rtc_probe(struct platform_device *pdev)
317 if (!res) { 318 if (!res) {
318 dev_err(&pdev->dev, 319 dev_err(&pdev->dev,
319 "Unable to allocate resources for device.\n"); 320 "Unable to allocate resources for device.\n");
320 ret = -EBUSY; 321 return -EBUSY;
321 goto err_free_info;
322 } 322 }
323 323
324 if (!request_mem_region(res->start, resource_size(res), pdev->name)) { 324 info->rtc_base = devm_request_and_ioremap(&pdev->dev, res);
325 dev_err(&pdev->dev, 325 if (!info->rtc_base) {
326 "Unable to request mem region for device.\n"); 326 dev_err(&pdev->dev, "Unable to request mem region and grab IOs for device.\n");
327 ret = -EBUSY; 327 return -EBUSY;
328 goto err_free_info;
329 } 328 }
330 329
331 info->tegra_rtc_irq = platform_get_irq(pdev, 0); 330 info->tegra_rtc_irq = platform_get_irq(pdev, 0);
332 if (info->tegra_rtc_irq <= 0) { 331 if (info->tegra_rtc_irq <= 0)
333 ret = -EBUSY; 332 return -EBUSY;
334 goto err_release_mem_region;
335 }
336
337 info->rtc_base = ioremap_nocache(res->start, resource_size(res));
338 if (!info->rtc_base) {
339 dev_err(&pdev->dev, "Unable to grab IOs for device.\n");
340 ret = -EBUSY;
341 goto err_release_mem_region;
342 }
343 333
344 /* set context info. */ 334 /* set context info. */
345 info->pdev = pdev; 335 info->pdev = pdev;
@@ -362,11 +352,12 @@ static int __devinit tegra_rtc_probe(struct platform_device *pdev)
362 dev_err(&pdev->dev, 352 dev_err(&pdev->dev,
363 "Unable to register device (err=%d).\n", 353 "Unable to register device (err=%d).\n",
364 ret); 354 ret);
365 goto err_iounmap; 355 return ret;
366 } 356 }
367 357
368 ret = request_irq(info->tegra_rtc_irq, tegra_rtc_irq_handler, 358 ret = devm_request_irq(&pdev->dev, info->tegra_rtc_irq,
369 IRQF_TRIGGER_HIGH, "rtc alarm", &pdev->dev); 359 tegra_rtc_irq_handler, IRQF_TRIGGER_HIGH,
360 "rtc alarm", &pdev->dev);
370 if (ret) { 361 if (ret) {
371 dev_err(&pdev->dev, 362 dev_err(&pdev->dev,
372 "Unable to request interrupt for device (err=%d).\n", 363 "Unable to request interrupt for device (err=%d).\n",
@@ -380,12 +371,6 @@ static int __devinit tegra_rtc_probe(struct platform_device *pdev)
380 371
381err_dev_unreg: 372err_dev_unreg:
382 rtc_device_unregister(info->rtc_dev); 373 rtc_device_unregister(info->rtc_dev);
383err_iounmap:
384 iounmap(info->rtc_base);
385err_release_mem_region:
386 release_mem_region(res->start, resource_size(res));
387err_free_info:
388 kfree(info);
389 374
390 return ret; 375 return ret;
391} 376}
@@ -393,17 +378,8 @@ err_free_info:
393static int __devexit tegra_rtc_remove(struct platform_device *pdev) 378static int __devexit tegra_rtc_remove(struct platform_device *pdev)
394{ 379{
395 struct tegra_rtc_info *info = platform_get_drvdata(pdev); 380 struct tegra_rtc_info *info = platform_get_drvdata(pdev);
396 struct resource *res;
397
398 res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
399 if (!res)
400 return -EBUSY;
401 381
402 free_irq(info->tegra_rtc_irq, &pdev->dev);
403 rtc_device_unregister(info->rtc_dev); 382 rtc_device_unregister(info->rtc_dev);
404 iounmap(info->rtc_base);
405 release_mem_region(res->start, resource_size(res));
406 kfree(info);
407 383
408 platform_set_drvdata(pdev, NULL); 384 platform_set_drvdata(pdev, NULL);
409 385
diff --git a/drivers/staging/android/ashmem.c b/drivers/staging/android/ashmem.c
index 4511420849bc..e84dbecd0991 100644
--- a/drivers/staging/android/ashmem.c
+++ b/drivers/staging/android/ashmem.c
@@ -19,6 +19,7 @@
19#include <linux/module.h> 19#include <linux/module.h>
20#include <linux/file.h> 20#include <linux/file.h>
21#include <linux/fs.h> 21#include <linux/fs.h>
22#include <linux/falloc.h>
22#include <linux/miscdevice.h> 23#include <linux/miscdevice.h>
23#include <linux/security.h> 24#include <linux/security.h>
24#include <linux/mm.h> 25#include <linux/mm.h>
@@ -363,11 +364,12 @@ static int ashmem_shrink(struct shrinker *s, struct shrink_control *sc)
363 364
364 mutex_lock(&ashmem_mutex); 365 mutex_lock(&ashmem_mutex);
365 list_for_each_entry_safe(range, next, &ashmem_lru_list, lru) { 366 list_for_each_entry_safe(range, next, &ashmem_lru_list, lru) {
366 struct inode *inode = range->asma->file->f_dentry->d_inode;
367 loff_t start = range->pgstart * PAGE_SIZE; 367 loff_t start = range->pgstart * PAGE_SIZE;
368 loff_t end = (range->pgend + 1) * PAGE_SIZE - 1; 368 loff_t end = (range->pgend + 1) * PAGE_SIZE;
369 369
370 vmtruncate_range(inode, start, end); 370 do_fallocate(range->asma->file,
371 FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE,
372 start, end - start);
371 range->purged = ASHMEM_WAS_PURGED; 373 range->purged = ASHMEM_WAS_PURGED;
372 lru_del(range); 374 lru_del(range);
373 375
diff --git a/drivers/video/backlight/Kconfig b/drivers/video/backlight/Kconfig
index af16884491ed..fa2b03750316 100644
--- a/drivers/video/backlight/Kconfig
+++ b/drivers/video/backlight/Kconfig
@@ -184,6 +184,18 @@ config BACKLIGHT_GENERIC
184 known as the Corgi backlight driver. If you have a Sharp Zaurus 184 known as the Corgi backlight driver. If you have a Sharp Zaurus
185 SL-C7xx, SL-Cxx00 or SL-6000x say y. 185 SL-C7xx, SL-Cxx00 or SL-6000x say y.
186 186
187config BACKLIGHT_LM3533
188 tristate "Backlight Driver for LM3533"
189 depends on BACKLIGHT_CLASS_DEVICE
190 depends on MFD_LM3533
191 help
192 Say Y to enable the backlight driver for National Semiconductor / TI
193 LM3533 Lighting Power chips.
194
195 The backlights can be controlled directly, through PWM input, or by
196 the ambient-light-sensor interface. The chip supports 256 brightness
197 levels.
198
187config BACKLIGHT_LOCOMO 199config BACKLIGHT_LOCOMO
188 tristate "Sharp LOCOMO LCD/Backlight Driver" 200 tristate "Sharp LOCOMO LCD/Backlight Driver"
189 depends on SHARP_LOCOMO 201 depends on SHARP_LOCOMO
diff --git a/drivers/video/backlight/Makefile b/drivers/video/backlight/Makefile
index 36855ae887d6..a2ac9cfbaf6b 100644
--- a/drivers/video/backlight/Makefile
+++ b/drivers/video/backlight/Makefile
@@ -21,6 +21,7 @@ obj-$(CONFIG_BACKLIGHT_EP93XX) += ep93xx_bl.o
21obj-$(CONFIG_BACKLIGHT_GENERIC) += generic_bl.o 21obj-$(CONFIG_BACKLIGHT_GENERIC) += generic_bl.o
22obj-$(CONFIG_BACKLIGHT_HP700) += jornada720_bl.o 22obj-$(CONFIG_BACKLIGHT_HP700) += jornada720_bl.o
23obj-$(CONFIG_BACKLIGHT_HP680) += hp680_bl.o 23obj-$(CONFIG_BACKLIGHT_HP680) += hp680_bl.o
24obj-$(CONFIG_BACKLIGHT_LM3533) += lm3533_bl.o
24obj-$(CONFIG_BACKLIGHT_LOCOMO) += locomolcd.o 25obj-$(CONFIG_BACKLIGHT_LOCOMO) += locomolcd.o
25obj-$(CONFIG_BACKLIGHT_LP855X) += lp855x_bl.o 26obj-$(CONFIG_BACKLIGHT_LP855X) += lp855x_bl.o
26obj-$(CONFIG_BACKLIGHT_OMAP1) += omap1_bl.o 27obj-$(CONFIG_BACKLIGHT_OMAP1) += omap1_bl.o
diff --git a/drivers/video/backlight/adp5520_bl.c b/drivers/video/backlight/adp5520_bl.c
index 4911ea7989c8..df5db99af23d 100644
--- a/drivers/video/backlight/adp5520_bl.c
+++ b/drivers/video/backlight/adp5520_bl.c
@@ -160,7 +160,7 @@ static ssize_t adp5520_store(struct device *dev, const char *buf,
160 unsigned long val; 160 unsigned long val;
161 int ret; 161 int ret;
162 162
163 ret = strict_strtoul(buf, 10, &val); 163 ret = kstrtoul(buf, 10, &val);
164 if (ret) 164 if (ret)
165 return ret; 165 return ret;
166 166
@@ -214,7 +214,7 @@ static ssize_t adp5520_bl_daylight_max_store(struct device *dev,
214 struct adp5520_bl *data = dev_get_drvdata(dev); 214 struct adp5520_bl *data = dev_get_drvdata(dev);
215 int ret; 215 int ret;
216 216
217 ret = strict_strtoul(buf, 10, &data->cached_daylight_max); 217 ret = kstrtoul(buf, 10, &data->cached_daylight_max);
218 if (ret < 0) 218 if (ret < 0)
219 return ret; 219 return ret;
220 220
diff --git a/drivers/video/backlight/adp8860_bl.c b/drivers/video/backlight/adp8860_bl.c
index 550dbf0bb896..77d1fdba597f 100644
--- a/drivers/video/backlight/adp8860_bl.c
+++ b/drivers/video/backlight/adp8860_bl.c
@@ -222,7 +222,8 @@ static int __devinit adp8860_led_probe(struct i2c_client *client)
222 struct led_info *cur_led; 222 struct led_info *cur_led;
223 int ret, i; 223 int ret, i;
224 224
225 led = kzalloc(sizeof(*led) * pdata->num_leds, GFP_KERNEL); 225 led = devm_kzalloc(&client->dev, sizeof(*led) * pdata->num_leds,
226 GFP_KERNEL);
226 if (led == NULL) { 227 if (led == NULL) {
227 dev_err(&client->dev, "failed to alloc memory\n"); 228 dev_err(&client->dev, "failed to alloc memory\n");
228 return -ENOMEM; 229 return -ENOMEM;
@@ -236,7 +237,7 @@ static int __devinit adp8860_led_probe(struct i2c_client *client)
236 237
237 if (ret) { 238 if (ret) {
238 dev_err(&client->dev, "failed to write\n"); 239 dev_err(&client->dev, "failed to write\n");
239 goto err_free; 240 return ret;
240 } 241 }
241 242
242 for (i = 0; i < pdata->num_leds; ++i) { 243 for (i = 0; i < pdata->num_leds; ++i) {
@@ -291,9 +292,6 @@ static int __devinit adp8860_led_probe(struct i2c_client *client)
291 cancel_work_sync(&led[i].work); 292 cancel_work_sync(&led[i].work);
292 } 293 }
293 294
294 err_free:
295 kfree(led);
296
297 return ret; 295 return ret;
298} 296}
299 297
@@ -309,7 +307,6 @@ static int __devexit adp8860_led_remove(struct i2c_client *client)
309 cancel_work_sync(&data->led[i].work); 307 cancel_work_sync(&data->led[i].work);
310 } 308 }
311 309
312 kfree(data->led);
313 return 0; 310 return 0;
314} 311}
315#else 312#else
@@ -451,7 +448,7 @@ static ssize_t adp8860_store(struct device *dev, const char *buf,
451 unsigned long val; 448 unsigned long val;
452 int ret; 449 int ret;
453 450
454 ret = strict_strtoul(buf, 10, &val); 451 ret = kstrtoul(buf, 10, &val);
455 if (ret) 452 if (ret)
456 return ret; 453 return ret;
457 454
@@ -501,7 +498,7 @@ static ssize_t adp8860_bl_l1_daylight_max_store(struct device *dev,
501 struct device_attribute *attr, const char *buf, size_t count) 498 struct device_attribute *attr, const char *buf, size_t count)
502{ 499{
503 struct adp8860_bl *data = dev_get_drvdata(dev); 500 struct adp8860_bl *data = dev_get_drvdata(dev);
504 int ret = strict_strtoul(buf, 10, &data->cached_daylight_max); 501 int ret = kstrtoul(buf, 10, &data->cached_daylight_max);
505 if (ret) 502 if (ret)
506 return ret; 503 return ret;
507 504
@@ -608,7 +605,7 @@ static ssize_t adp8860_bl_ambient_light_zone_store(struct device *dev,
608 uint8_t reg_val; 605 uint8_t reg_val;
609 int ret; 606 int ret;
610 607
611 ret = strict_strtoul(buf, 10, &val); 608 ret = kstrtoul(buf, 10, &val);
612 if (ret) 609 if (ret)
613 return ret; 610 return ret;
614 611
@@ -675,13 +672,13 @@ static int __devinit adp8860_probe(struct i2c_client *client,
675 return -EINVAL; 672 return -EINVAL;
676 } 673 }
677 674
678 data = kzalloc(sizeof(*data), GFP_KERNEL); 675 data = devm_kzalloc(&client->dev, sizeof(*data), GFP_KERNEL);
679 if (data == NULL) 676 if (data == NULL)
680 return -ENOMEM; 677 return -ENOMEM;
681 678
682 ret = adp8860_read(client, ADP8860_MFDVID, &reg_val); 679 ret = adp8860_read(client, ADP8860_MFDVID, &reg_val);
683 if (ret < 0) 680 if (ret < 0)
684 goto out2; 681 return ret;
685 682
686 switch (ADP8860_MANID(reg_val)) { 683 switch (ADP8860_MANID(reg_val)) {
687 case ADP8863_MANUFID: 684 case ADP8863_MANUFID:
@@ -694,8 +691,7 @@ static int __devinit adp8860_probe(struct i2c_client *client,
694 break; 691 break;
695 default: 692 default:
696 dev_err(&client->dev, "failed to probe\n"); 693 dev_err(&client->dev, "failed to probe\n");
697 ret = -ENODEV; 694 return -ENODEV;
698 goto out2;
699 } 695 }
700 696
701 /* It's confirmed that the DEVID field is actually a REVID */ 697 /* It's confirmed that the DEVID field is actually a REVID */
@@ -717,8 +713,7 @@ static int __devinit adp8860_probe(struct i2c_client *client,
717 &client->dev, data, &adp8860_bl_ops, &props); 713 &client->dev, data, &adp8860_bl_ops, &props);
718 if (IS_ERR(bl)) { 714 if (IS_ERR(bl)) {
719 dev_err(&client->dev, "failed to register backlight\n"); 715 dev_err(&client->dev, "failed to register backlight\n");
720 ret = PTR_ERR(bl); 716 return PTR_ERR(bl);
721 goto out2;
722 } 717 }
723 718
724 bl->props.brightness = ADP8860_MAX_BRIGHTNESS; 719 bl->props.brightness = ADP8860_MAX_BRIGHTNESS;
@@ -756,8 +751,6 @@ out:
756 &adp8860_bl_attr_group); 751 &adp8860_bl_attr_group);
757out1: 752out1:
758 backlight_device_unregister(bl); 753 backlight_device_unregister(bl);
759out2:
760 kfree(data);
761 754
762 return ret; 755 return ret;
763} 756}
@@ -776,7 +769,6 @@ static int __devexit adp8860_remove(struct i2c_client *client)
776 &adp8860_bl_attr_group); 769 &adp8860_bl_attr_group);
777 770
778 backlight_device_unregister(data->bl); 771 backlight_device_unregister(data->bl);
779 kfree(data);
780 772
781 return 0; 773 return 0;
782} 774}
diff --git a/drivers/video/backlight/adp8870_bl.c b/drivers/video/backlight/adp8870_bl.c
index 9be58c6f18f1..edf7f91c8e61 100644
--- a/drivers/video/backlight/adp8870_bl.c
+++ b/drivers/video/backlight/adp8870_bl.c
@@ -244,8 +244,8 @@ static int __devinit adp8870_led_probe(struct i2c_client *client)
244 struct led_info *cur_led; 244 struct led_info *cur_led;
245 int ret, i; 245 int ret, i;
246 246
247 247 led = devm_kzalloc(&client->dev, pdata->num_leds * sizeof(*led),
248 led = kcalloc(pdata->num_leds, sizeof(*led), GFP_KERNEL); 248 GFP_KERNEL);
249 if (led == NULL) { 249 if (led == NULL) {
250 dev_err(&client->dev, "failed to alloc memory\n"); 250 dev_err(&client->dev, "failed to alloc memory\n");
251 return -ENOMEM; 251 return -ENOMEM;
@@ -253,17 +253,17 @@ static int __devinit adp8870_led_probe(struct i2c_client *client)
253 253
254 ret = adp8870_write(client, ADP8870_ISCLAW, pdata->led_fade_law); 254 ret = adp8870_write(client, ADP8870_ISCLAW, pdata->led_fade_law);
255 if (ret) 255 if (ret)
256 goto err_free; 256 return ret;
257 257
258 ret = adp8870_write(client, ADP8870_ISCT1, 258 ret = adp8870_write(client, ADP8870_ISCT1,
259 (pdata->led_on_time & 0x3) << 6); 259 (pdata->led_on_time & 0x3) << 6);
260 if (ret) 260 if (ret)
261 goto err_free; 261 return ret;
262 262
263 ret = adp8870_write(client, ADP8870_ISCF, 263 ret = adp8870_write(client, ADP8870_ISCF,
264 FADE_VAL(pdata->led_fade_in, pdata->led_fade_out)); 264 FADE_VAL(pdata->led_fade_in, pdata->led_fade_out));
265 if (ret) 265 if (ret)
266 goto err_free; 266 return ret;
267 267
268 for (i = 0; i < pdata->num_leds; ++i) { 268 for (i = 0; i < pdata->num_leds; ++i) {
269 cur_led = &pdata->leds[i]; 269 cur_led = &pdata->leds[i];
@@ -317,9 +317,6 @@ static int __devinit adp8870_led_probe(struct i2c_client *client)
317 cancel_work_sync(&led[i].work); 317 cancel_work_sync(&led[i].work);
318 } 318 }
319 319
320 err_free:
321 kfree(led);
322
323 return ret; 320 return ret;
324} 321}
325 322
@@ -335,7 +332,6 @@ static int __devexit adp8870_led_remove(struct i2c_client *client)
335 cancel_work_sync(&data->led[i].work); 332 cancel_work_sync(&data->led[i].work);
336 } 333 }
337 334
338 kfree(data->led);
339 return 0; 335 return 0;
340} 336}
341#else 337#else
@@ -572,7 +568,7 @@ static ssize_t adp8870_store(struct device *dev, const char *buf,
572 unsigned long val; 568 unsigned long val;
573 int ret; 569 int ret;
574 570
575 ret = strict_strtoul(buf, 10, &val); 571 ret = kstrtoul(buf, 10, &val);
576 if (ret) 572 if (ret)
577 return ret; 573 return ret;
578 574
@@ -652,7 +648,7 @@ static ssize_t adp8870_bl_l1_daylight_max_store(struct device *dev,
652 struct device_attribute *attr, const char *buf, size_t count) 648 struct device_attribute *attr, const char *buf, size_t count)
653{ 649{
654 struct adp8870_bl *data = dev_get_drvdata(dev); 650 struct adp8870_bl *data = dev_get_drvdata(dev);
655 int ret = strict_strtoul(buf, 10, &data->cached_daylight_max); 651 int ret = kstrtoul(buf, 10, &data->cached_daylight_max);
656 if (ret) 652 if (ret)
657 return ret; 653 return ret;
658 654
@@ -794,7 +790,7 @@ static ssize_t adp8870_bl_ambient_light_zone_store(struct device *dev,
794 uint8_t reg_val; 790 uint8_t reg_val;
795 int ret; 791 int ret;
796 792
797 ret = strict_strtoul(buf, 10, &val); 793 ret = kstrtoul(buf, 10, &val);
798 if (ret) 794 if (ret)
799 return ret; 795 return ret;
800 796
@@ -874,7 +870,7 @@ static int __devinit adp8870_probe(struct i2c_client *client,
874 return -ENODEV; 870 return -ENODEV;
875 } 871 }
876 872
877 data = kzalloc(sizeof(*data), GFP_KERNEL); 873 data = devm_kzalloc(&client->dev, sizeof(*data), GFP_KERNEL);
878 if (data == NULL) 874 if (data == NULL)
879 return -ENOMEM; 875 return -ENOMEM;
880 876
@@ -894,8 +890,7 @@ static int __devinit adp8870_probe(struct i2c_client *client,
894 &client->dev, data, &adp8870_bl_ops, &props); 890 &client->dev, data, &adp8870_bl_ops, &props);
895 if (IS_ERR(bl)) { 891 if (IS_ERR(bl)) {
896 dev_err(&client->dev, "failed to register backlight\n"); 892 dev_err(&client->dev, "failed to register backlight\n");
897 ret = PTR_ERR(bl); 893 return PTR_ERR(bl);
898 goto out2;
899 } 894 }
900 895
901 data->bl = bl; 896 data->bl = bl;
@@ -930,8 +925,6 @@ out:
930 &adp8870_bl_attr_group); 925 &adp8870_bl_attr_group);
931out1: 926out1:
932 backlight_device_unregister(bl); 927 backlight_device_unregister(bl);
933out2:
934 kfree(data);
935 928
936 return ret; 929 return ret;
937} 930}
@@ -950,7 +943,6 @@ static int __devexit adp8870_remove(struct i2c_client *client)
950 &adp8870_bl_attr_group); 943 &adp8870_bl_attr_group);
951 944
952 backlight_device_unregister(data->bl); 945 backlight_device_unregister(data->bl);
953 kfree(data);
954 946
955 return 0; 947 return 0;
956} 948}
diff --git a/drivers/video/backlight/ams369fg06.c b/drivers/video/backlight/ams369fg06.c
index 7bdadc790117..3729238e7096 100644
--- a/drivers/video/backlight/ams369fg06.c
+++ b/drivers/video/backlight/ams369fg06.c
@@ -482,7 +482,7 @@ static int __devinit ams369fg06_probe(struct spi_device *spi)
482 struct backlight_device *bd = NULL; 482 struct backlight_device *bd = NULL;
483 struct backlight_properties props; 483 struct backlight_properties props;
484 484
485 lcd = kzalloc(sizeof(struct ams369fg06), GFP_KERNEL); 485 lcd = devm_kzalloc(&spi->dev, sizeof(struct ams369fg06), GFP_KERNEL);
486 if (!lcd) 486 if (!lcd)
487 return -ENOMEM; 487 return -ENOMEM;
488 488
@@ -492,7 +492,7 @@ static int __devinit ams369fg06_probe(struct spi_device *spi)
492 ret = spi_setup(spi); 492 ret = spi_setup(spi);
493 if (ret < 0) { 493 if (ret < 0) {
494 dev_err(&spi->dev, "spi setup failed.\n"); 494 dev_err(&spi->dev, "spi setup failed.\n");
495 goto out_free_lcd; 495 return ret;
496 } 496 }
497 497
498 lcd->spi = spi; 498 lcd->spi = spi;
@@ -501,15 +501,13 @@ static int __devinit ams369fg06_probe(struct spi_device *spi)
501 lcd->lcd_pd = spi->dev.platform_data; 501 lcd->lcd_pd = spi->dev.platform_data;
502 if (!lcd->lcd_pd) { 502 if (!lcd->lcd_pd) {
503 dev_err(&spi->dev, "platform data is NULL\n"); 503 dev_err(&spi->dev, "platform data is NULL\n");
504 goto out_free_lcd; 504 return -EFAULT;
505 } 505 }
506 506
507 ld = lcd_device_register("ams369fg06", &spi->dev, lcd, 507 ld = lcd_device_register("ams369fg06", &spi->dev, lcd,
508 &ams369fg06_lcd_ops); 508 &ams369fg06_lcd_ops);
509 if (IS_ERR(ld)) { 509 if (IS_ERR(ld))
510 ret = PTR_ERR(ld); 510 return PTR_ERR(ld);
511 goto out_free_lcd;
512 }
513 511
514 lcd->ld = ld; 512 lcd->ld = ld;
515 513
@@ -547,8 +545,6 @@ static int __devinit ams369fg06_probe(struct spi_device *spi)
547 545
548out_lcd_unregister: 546out_lcd_unregister:
549 lcd_device_unregister(ld); 547 lcd_device_unregister(ld);
550out_free_lcd:
551 kfree(lcd);
552 return ret; 548 return ret;
553} 549}
554 550
@@ -559,7 +555,6 @@ static int __devexit ams369fg06_remove(struct spi_device *spi)
559 ams369fg06_power(lcd, FB_BLANK_POWERDOWN); 555 ams369fg06_power(lcd, FB_BLANK_POWERDOWN);
560 backlight_device_unregister(lcd->bd); 556 backlight_device_unregister(lcd->bd);
561 lcd_device_unregister(lcd->ld); 557 lcd_device_unregister(lcd->ld);
562 kfree(lcd);
563 558
564 return 0; 559 return 0;
565} 560}
@@ -619,7 +614,6 @@ static void ams369fg06_shutdown(struct spi_device *spi)
619static struct spi_driver ams369fg06_driver = { 614static struct spi_driver ams369fg06_driver = {
620 .driver = { 615 .driver = {
621 .name = "ams369fg06", 616 .name = "ams369fg06",
622 .bus = &spi_bus_type,
623 .owner = THIS_MODULE, 617 .owner = THIS_MODULE,
624 }, 618 },
625 .probe = ams369fg06_probe, 619 .probe = ams369fg06_probe,
diff --git a/drivers/video/backlight/apple_bl.c b/drivers/video/backlight/apple_bl.c
index a523b255e124..9dc73ac3709a 100644
--- a/drivers/video/backlight/apple_bl.c
+++ b/drivers/video/backlight/apple_bl.c
@@ -16,6 +16,8 @@
16 * get at the firmware code in order to figure out what it's actually doing. 16 * get at the firmware code in order to figure out what it's actually doing.
17 */ 17 */
18 18
19#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
20
19#include <linux/module.h> 21#include <linux/module.h>
20#include <linux/kernel.h> 22#include <linux/kernel.h>
21#include <linux/init.h> 23#include <linux/init.h>
@@ -25,6 +27,7 @@
25#include <linux/pci.h> 27#include <linux/pci.h>
26#include <linux/acpi.h> 28#include <linux/acpi.h>
27#include <linux/atomic.h> 29#include <linux/atomic.h>
30#include <linux/apple_bl.h>
28 31
29static struct backlight_device *apple_backlight_device; 32static struct backlight_device *apple_backlight_device;
30 33
@@ -39,8 +42,6 @@ struct hw_data {
39 42
40static const struct hw_data *hw_data; 43static const struct hw_data *hw_data;
41 44
42#define DRIVER "apple_backlight: "
43
44/* Module parameters. */ 45/* Module parameters. */
45static int debug; 46static int debug;
46module_param_named(debug, debug, int, 0644); 47module_param_named(debug, debug, int, 0644);
@@ -60,8 +61,7 @@ static int intel_chipset_send_intensity(struct backlight_device *bd)
60 int intensity = bd->props.brightness; 61 int intensity = bd->props.brightness;
61 62
62 if (debug) 63 if (debug)
63 printk(KERN_DEBUG DRIVER "setting brightness to %d\n", 64 pr_debug("setting brightness to %d\n", intensity);
64 intensity);
65 65
66 intel_chipset_set_brightness(intensity); 66 intel_chipset_set_brightness(intensity);
67 return 0; 67 return 0;
@@ -76,8 +76,7 @@ static int intel_chipset_get_intensity(struct backlight_device *bd)
76 intensity = inb(0xb3) >> 4; 76 intensity = inb(0xb3) >> 4;
77 77
78 if (debug) 78 if (debug)
79 printk(KERN_DEBUG DRIVER "read brightness of %d\n", 79 pr_debug("read brightness of %d\n", intensity);
80 intensity);
81 80
82 return intensity; 81 return intensity;
83} 82}
@@ -107,8 +106,7 @@ static int nvidia_chipset_send_intensity(struct backlight_device *bd)
107 int intensity = bd->props.brightness; 106 int intensity = bd->props.brightness;
108 107
109 if (debug) 108 if (debug)
110 printk(KERN_DEBUG DRIVER "setting brightness to %d\n", 109 pr_debug("setting brightness to %d\n", intensity);
111 intensity);
112 110
113 nvidia_chipset_set_brightness(intensity); 111 nvidia_chipset_set_brightness(intensity);
114 return 0; 112 return 0;
@@ -123,8 +121,7 @@ static int nvidia_chipset_get_intensity(struct backlight_device *bd)
123 intensity = inb(0x52f) >> 4; 121 intensity = inb(0x52f) >> 4;
124 122
125 if (debug) 123 if (debug)
126 printk(KERN_DEBUG DRIVER "read brightness of %d\n", 124 pr_debug("read brightness of %d\n", intensity);
127 intensity);
128 125
129 return intensity; 126 return intensity;
130} 127}
@@ -149,7 +146,7 @@ static int __devinit apple_bl_add(struct acpi_device *dev)
149 host = pci_get_bus_and_slot(0, 0); 146 host = pci_get_bus_and_slot(0, 0);
150 147
151 if (!host) { 148 if (!host) {
152 printk(KERN_ERR DRIVER "unable to find PCI host\n"); 149 pr_err("unable to find PCI host\n");
153 return -ENODEV; 150 return -ENODEV;
154 } 151 }
155 152
@@ -161,7 +158,7 @@ static int __devinit apple_bl_add(struct acpi_device *dev)
161 pci_dev_put(host); 158 pci_dev_put(host);
162 159
163 if (!hw_data) { 160 if (!hw_data) {
164 printk(KERN_ERR DRIVER "unknown hardware\n"); 161 pr_err("unknown hardware\n");
165 return -ENODEV; 162 return -ENODEV;
166 } 163 }
167 164
diff --git a/drivers/video/backlight/backlight.c b/drivers/video/backlight/backlight.c
index bf5b1ece7160..297db2fa91f5 100644
--- a/drivers/video/backlight/backlight.c
+++ b/drivers/video/backlight/backlight.c
@@ -5,6 +5,8 @@
5 * 5 *
6 */ 6 */
7 7
8#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
9
8#include <linux/module.h> 10#include <linux/module.h>
9#include <linux/init.h> 11#include <linux/init.h>
10#include <linux/device.h> 12#include <linux/device.h>
@@ -123,7 +125,7 @@ static ssize_t backlight_store_power(struct device *dev,
123 rc = -ENXIO; 125 rc = -ENXIO;
124 mutex_lock(&bd->ops_lock); 126 mutex_lock(&bd->ops_lock);
125 if (bd->ops) { 127 if (bd->ops) {
126 pr_debug("backlight: set power to %lu\n", power); 128 pr_debug("set power to %lu\n", power);
127 if (bd->props.power != power) { 129 if (bd->props.power != power) {
128 bd->props.power = power; 130 bd->props.power = power;
129 backlight_update_status(bd); 131 backlight_update_status(bd);
@@ -161,8 +163,7 @@ static ssize_t backlight_store_brightness(struct device *dev,
161 if (brightness > bd->props.max_brightness) 163 if (brightness > bd->props.max_brightness)
162 rc = -EINVAL; 164 rc = -EINVAL;
163 else { 165 else {
164 pr_debug("backlight: set brightness to %lu\n", 166 pr_debug("set brightness to %lu\n", brightness);
165 brightness);
166 bd->props.brightness = brightness; 167 bd->props.brightness = brightness;
167 backlight_update_status(bd); 168 backlight_update_status(bd);
168 rc = count; 169 rc = count;
@@ -378,8 +379,8 @@ static int __init backlight_class_init(void)
378{ 379{
379 backlight_class = class_create(THIS_MODULE, "backlight"); 380 backlight_class = class_create(THIS_MODULE, "backlight");
380 if (IS_ERR(backlight_class)) { 381 if (IS_ERR(backlight_class)) {
381 printk(KERN_WARNING "Unable to create backlight class; errno = %ld\n", 382 pr_warn("Unable to create backlight class; errno = %ld\n",
382 PTR_ERR(backlight_class)); 383 PTR_ERR(backlight_class));
383 return PTR_ERR(backlight_class); 384 return PTR_ERR(backlight_class);
384 } 385 }
385 386
diff --git a/drivers/video/backlight/corgi_lcd.c b/drivers/video/backlight/corgi_lcd.c
index 6dab13fe562e..23d732677ba1 100644
--- a/drivers/video/backlight/corgi_lcd.c
+++ b/drivers/video/backlight/corgi_lcd.c
@@ -544,7 +544,7 @@ static int __devinit corgi_lcd_probe(struct spi_device *spi)
544 return -EINVAL; 544 return -EINVAL;
545 } 545 }
546 546
547 lcd = kzalloc(sizeof(struct corgi_lcd), GFP_KERNEL); 547 lcd = devm_kzalloc(&spi->dev, sizeof(struct corgi_lcd), GFP_KERNEL);
548 if (!lcd) { 548 if (!lcd) {
549 dev_err(&spi->dev, "failed to allocate memory\n"); 549 dev_err(&spi->dev, "failed to allocate memory\n");
550 return -ENOMEM; 550 return -ENOMEM;
@@ -554,10 +554,9 @@ static int __devinit corgi_lcd_probe(struct spi_device *spi)
554 554
555 lcd->lcd_dev = lcd_device_register("corgi_lcd", &spi->dev, 555 lcd->lcd_dev = lcd_device_register("corgi_lcd", &spi->dev,
556 lcd, &corgi_lcd_ops); 556 lcd, &corgi_lcd_ops);
557 if (IS_ERR(lcd->lcd_dev)) { 557 if (IS_ERR(lcd->lcd_dev))
558 ret = PTR_ERR(lcd->lcd_dev); 558 return PTR_ERR(lcd->lcd_dev);
559 goto err_free_lcd; 559
560 }
561 lcd->power = FB_BLANK_POWERDOWN; 560 lcd->power = FB_BLANK_POWERDOWN;
562 lcd->mode = (pdata) ? pdata->init_mode : CORGI_LCD_MODE_VGA; 561 lcd->mode = (pdata) ? pdata->init_mode : CORGI_LCD_MODE_VGA;
563 562
@@ -591,8 +590,6 @@ err_unregister_bl:
591 backlight_device_unregister(lcd->bl_dev); 590 backlight_device_unregister(lcd->bl_dev);
592err_unregister_lcd: 591err_unregister_lcd:
593 lcd_device_unregister(lcd->lcd_dev); 592 lcd_device_unregister(lcd->lcd_dev);
594err_free_lcd:
595 kfree(lcd);
596 return ret; 593 return ret;
597} 594}
598 595
@@ -613,7 +610,6 @@ static int __devexit corgi_lcd_remove(struct spi_device *spi)
613 610
614 corgi_lcd_set_power(lcd->lcd_dev, FB_BLANK_POWERDOWN); 611 corgi_lcd_set_power(lcd->lcd_dev, FB_BLANK_POWERDOWN);
615 lcd_device_unregister(lcd->lcd_dev); 612 lcd_device_unregister(lcd->lcd_dev);
616 kfree(lcd);
617 613
618 return 0; 614 return 0;
619} 615}
diff --git a/drivers/video/backlight/cr_bllcd.c b/drivers/video/backlight/cr_bllcd.c
index 22489eb5f3e0..37bae801e23b 100644
--- a/drivers/video/backlight/cr_bllcd.c
+++ b/drivers/video/backlight/cr_bllcd.c
@@ -27,6 +27,8 @@
27 * Alan Hourihane <alanh-at-tungstengraphics-dot-com> 27 * Alan Hourihane <alanh-at-tungstengraphics-dot-com>
28 */ 28 */
29 29
30#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
31
30#include <linux/module.h> 32#include <linux/module.h>
31#include <linux/kernel.h> 33#include <linux/kernel.h>
32#include <linux/init.h> 34#include <linux/init.h>
@@ -180,14 +182,13 @@ static int cr_backlight_probe(struct platform_device *pdev)
180 lpc_dev = pci_get_device(PCI_VENDOR_ID_INTEL, 182 lpc_dev = pci_get_device(PCI_VENDOR_ID_INTEL,
181 CRVML_DEVICE_LPC, NULL); 183 CRVML_DEVICE_LPC, NULL);
182 if (!lpc_dev) { 184 if (!lpc_dev) {
183 printk("INTEL CARILLO RANCH LPC not found.\n"); 185 pr_err("INTEL CARILLO RANCH LPC not found.\n");
184 return -ENODEV; 186 return -ENODEV;
185 } 187 }
186 188
187 pci_read_config_byte(lpc_dev, CRVML_REG_GPIOEN, &dev_en); 189 pci_read_config_byte(lpc_dev, CRVML_REG_GPIOEN, &dev_en);
188 if (!(dev_en & CRVML_GPIOEN_BIT)) { 190 if (!(dev_en & CRVML_GPIOEN_BIT)) {
189 printk(KERN_ERR 191 pr_err("Carillo Ranch GPIO device was not enabled.\n");
190 "Carillo Ranch GPIO device was not enabled.\n");
191 pci_dev_put(lpc_dev); 192 pci_dev_put(lpc_dev);
192 return -ENODEV; 193 return -ENODEV;
193 } 194 }
@@ -270,7 +271,7 @@ static int __init cr_backlight_init(void)
270 return PTR_ERR(crp); 271 return PTR_ERR(crp);
271 } 272 }
272 273
273 printk("Carillo Ranch Backlight Driver Initialized.\n"); 274 pr_info("Carillo Ranch Backlight Driver Initialized.\n");
274 275
275 return 0; 276 return 0;
276} 277}
diff --git a/drivers/video/backlight/da903x_bl.c b/drivers/video/backlight/da903x_bl.c
index 30e19681a30b..573c7ece0fde 100644
--- a/drivers/video/backlight/da903x_bl.c
+++ b/drivers/video/backlight/da903x_bl.c
@@ -136,6 +136,7 @@ static int da903x_backlight_probe(struct platform_device *pdev)
136 da903x_write(data->da903x_dev, DA9034_WLED_CONTROL2, 136 da903x_write(data->da903x_dev, DA9034_WLED_CONTROL2,
137 DA9034_WLED_ISET(pdata->output_current)); 137 DA9034_WLED_ISET(pdata->output_current));
138 138
139 memset(&props, 0, sizeof(props));
139 props.type = BACKLIGHT_RAW; 140 props.type = BACKLIGHT_RAW;
140 props.max_brightness = max_brightness; 141 props.max_brightness = max_brightness;
141 bl = backlight_device_register(pdev->name, data->da903x_dev, data, 142 bl = backlight_device_register(pdev->name, data->da903x_dev, data,
diff --git a/drivers/video/backlight/generic_bl.c b/drivers/video/backlight/generic_bl.c
index 9ce6170c1860..8c660fcd250d 100644
--- a/drivers/video/backlight/generic_bl.c
+++ b/drivers/video/backlight/generic_bl.c
@@ -9,6 +9,8 @@
9 * 9 *
10 */ 10 */
11 11
12#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
13
12#include <linux/module.h> 14#include <linux/module.h>
13#include <linux/kernel.h> 15#include <linux/kernel.h>
14#include <linux/init.h> 16#include <linux/init.h>
@@ -106,7 +108,7 @@ static int genericbl_probe(struct platform_device *pdev)
106 108
107 generic_backlight_device = bd; 109 generic_backlight_device = bd;
108 110
109 printk("Generic Backlight Driver Initialized.\n"); 111 pr_info("Generic Backlight Driver Initialized.\n");
110 return 0; 112 return 0;
111} 113}
112 114
@@ -120,7 +122,7 @@ static int genericbl_remove(struct platform_device *pdev)
120 122
121 backlight_device_unregister(bd); 123 backlight_device_unregister(bd);
122 124
123 printk("Generic Backlight Driver Unloaded\n"); 125 pr_info("Generic Backlight Driver Unloaded\n");
124 return 0; 126 return 0;
125} 127}
126 128
diff --git a/drivers/video/backlight/ili9320.c b/drivers/video/backlight/ili9320.c
index 5118a9f029ab..6c9399341bcf 100644
--- a/drivers/video/backlight/ili9320.c
+++ b/drivers/video/backlight/ili9320.c
@@ -220,7 +220,7 @@ int __devinit ili9320_probe_spi(struct spi_device *spi,
220 220
221 /* allocate and initialse our state */ 221 /* allocate and initialse our state */
222 222
223 ili = kzalloc(sizeof(struct ili9320), GFP_KERNEL); 223 ili = devm_kzalloc(&spi->dev, sizeof(struct ili9320), GFP_KERNEL);
224 if (ili == NULL) { 224 if (ili == NULL) {
225 dev_err(dev, "no memory for device\n"); 225 dev_err(dev, "no memory for device\n");
226 return -ENOMEM; 226 return -ENOMEM;
@@ -240,8 +240,7 @@ int __devinit ili9320_probe_spi(struct spi_device *spi,
240 lcd = lcd_device_register("ili9320", dev, ili, &ili9320_ops); 240 lcd = lcd_device_register("ili9320", dev, ili, &ili9320_ops);
241 if (IS_ERR(lcd)) { 241 if (IS_ERR(lcd)) {
242 dev_err(dev, "failed to register lcd device\n"); 242 dev_err(dev, "failed to register lcd device\n");
243 ret = PTR_ERR(lcd); 243 return PTR_ERR(lcd);
244 goto err_free;
245 } 244 }
246 245
247 ili->lcd = lcd; 246 ili->lcd = lcd;
@@ -259,9 +258,6 @@ int __devinit ili9320_probe_spi(struct spi_device *spi,
259 err_unregister: 258 err_unregister:
260 lcd_device_unregister(lcd); 259 lcd_device_unregister(lcd);
261 260
262 err_free:
263 kfree(ili);
264
265 return ret; 261 return ret;
266} 262}
267 263
@@ -272,7 +268,6 @@ int __devexit ili9320_remove(struct ili9320 *ili)
272 ili9320_power(ili, FB_BLANK_POWERDOWN); 268 ili9320_power(ili, FB_BLANK_POWERDOWN);
273 269
274 lcd_device_unregister(ili->lcd); 270 lcd_device_unregister(ili->lcd);
275 kfree(ili);
276 271
277 return 0; 272 return 0;
278} 273}
diff --git a/drivers/video/backlight/jornada720_bl.c b/drivers/video/backlight/jornada720_bl.c
index 2f8af5d786ab..16f593b64427 100644
--- a/drivers/video/backlight/jornada720_bl.c
+++ b/drivers/video/backlight/jornada720_bl.c
@@ -9,6 +9,8 @@
9 * 9 *
10 */ 10 */
11 11
12#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
13
12#include <linux/backlight.h> 14#include <linux/backlight.h>
13#include <linux/device.h> 15#include <linux/device.h>
14#include <linux/fb.h> 16#include <linux/fb.h>
@@ -38,7 +40,7 @@ static int jornada_bl_get_brightness(struct backlight_device *bd)
38 ret = jornada_ssp_byte(GETBRIGHTNESS); 40 ret = jornada_ssp_byte(GETBRIGHTNESS);
39 41
40 if (jornada_ssp_byte(GETBRIGHTNESS) != TXDUMMY) { 42 if (jornada_ssp_byte(GETBRIGHTNESS) != TXDUMMY) {
41 printk(KERN_ERR "bl : get brightness timeout\n"); 43 pr_err("get brightness timeout\n");
42 jornada_ssp_end(); 44 jornada_ssp_end();
43 return -ETIMEDOUT; 45 return -ETIMEDOUT;
44 } else /* exchange txdummy for value */ 46 } else /* exchange txdummy for value */
@@ -59,7 +61,7 @@ static int jornada_bl_update_status(struct backlight_device *bd)
59 if ((bd->props.power != FB_BLANK_UNBLANK) || (bd->props.fb_blank != FB_BLANK_UNBLANK)) { 61 if ((bd->props.power != FB_BLANK_UNBLANK) || (bd->props.fb_blank != FB_BLANK_UNBLANK)) {
60 ret = jornada_ssp_byte(BRIGHTNESSOFF); 62 ret = jornada_ssp_byte(BRIGHTNESSOFF);
61 if (ret != TXDUMMY) { 63 if (ret != TXDUMMY) {
62 printk(KERN_INFO "bl : brightness off timeout\n"); 64 pr_info("brightness off timeout\n");
63 /* turn off backlight */ 65 /* turn off backlight */
64 PPSR &= ~PPC_LDD1; 66 PPSR &= ~PPC_LDD1;
65 PPDR |= PPC_LDD1; 67 PPDR |= PPC_LDD1;
@@ -70,7 +72,7 @@ static int jornada_bl_update_status(struct backlight_device *bd)
70 72
71 /* send command to our mcu */ 73 /* send command to our mcu */
72 if (jornada_ssp_byte(SETBRIGHTNESS) != TXDUMMY) { 74 if (jornada_ssp_byte(SETBRIGHTNESS) != TXDUMMY) {
73 printk(KERN_INFO "bl : failed to set brightness\n"); 75 pr_info("failed to set brightness\n");
74 ret = -ETIMEDOUT; 76 ret = -ETIMEDOUT;
75 goto out; 77 goto out;
76 } 78 }
@@ -81,7 +83,7 @@ static int jornada_bl_update_status(struct backlight_device *bd)
81 but due to physical layout it is equal to 0, so we simply 83 but due to physical layout it is equal to 0, so we simply
82 invert the value (MAX VALUE - NEW VALUE). */ 84 invert the value (MAX VALUE - NEW VALUE). */
83 if (jornada_ssp_byte(BL_MAX_BRIGHT - bd->props.brightness) != TXDUMMY) { 85 if (jornada_ssp_byte(BL_MAX_BRIGHT - bd->props.brightness) != TXDUMMY) {
84 printk(KERN_ERR "bl : set brightness failed\n"); 86 pr_err("set brightness failed\n");
85 ret = -ETIMEDOUT; 87 ret = -ETIMEDOUT;
86 } 88 }
87 89
@@ -113,7 +115,7 @@ static int jornada_bl_probe(struct platform_device *pdev)
113 115
114 if (IS_ERR(bd)) { 116 if (IS_ERR(bd)) {
115 ret = PTR_ERR(bd); 117 ret = PTR_ERR(bd);
116 printk(KERN_ERR "bl : failed to register device, err=%x\n", ret); 118 pr_err("failed to register device, err=%x\n", ret);
117 return ret; 119 return ret;
118 } 120 }
119 121
@@ -125,7 +127,7 @@ static int jornada_bl_probe(struct platform_device *pdev)
125 jornada_bl_update_status(bd); 127 jornada_bl_update_status(bd);
126 128
127 platform_set_drvdata(pdev, bd); 129 platform_set_drvdata(pdev, bd);
128 printk(KERN_INFO "HP Jornada 700 series backlight driver\n"); 130 pr_info("HP Jornada 700 series backlight driver\n");
129 131
130 return 0; 132 return 0;
131} 133}
diff --git a/drivers/video/backlight/jornada720_lcd.c b/drivers/video/backlight/jornada720_lcd.c
index 22d231a17e3c..635b30523fd5 100644
--- a/drivers/video/backlight/jornada720_lcd.c
+++ b/drivers/video/backlight/jornada720_lcd.c
@@ -9,6 +9,8 @@
9 * 9 *
10 */ 10 */
11 11
12#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
13
12#include <linux/device.h> 14#include <linux/device.h>
13#include <linux/fb.h> 15#include <linux/fb.h>
14#include <linux/kernel.h> 16#include <linux/kernel.h>
@@ -44,7 +46,7 @@ static int jornada_lcd_get_contrast(struct lcd_device *dev)
44 jornada_ssp_start(); 46 jornada_ssp_start();
45 47
46 if (jornada_ssp_byte(GETCONTRAST) != TXDUMMY) { 48 if (jornada_ssp_byte(GETCONTRAST) != TXDUMMY) {
47 printk(KERN_ERR "lcd: get contrast failed\n"); 49 pr_err("get contrast failed\n");
48 jornada_ssp_end(); 50 jornada_ssp_end();
49 return -ETIMEDOUT; 51 return -ETIMEDOUT;
50 } else { 52 } else {
@@ -65,7 +67,7 @@ static int jornada_lcd_set_contrast(struct lcd_device *dev, int value)
65 67
66 /* push the new value */ 68 /* push the new value */
67 if (jornada_ssp_byte(value) != TXDUMMY) { 69 if (jornada_ssp_byte(value) != TXDUMMY) {
68 printk(KERN_ERR "lcd : set contrast failed\n"); 70 pr_err("set contrast failed\n");
69 jornada_ssp_end(); 71 jornada_ssp_end();
70 return -ETIMEDOUT; 72 return -ETIMEDOUT;
71 } 73 }
@@ -103,7 +105,7 @@ static int jornada_lcd_probe(struct platform_device *pdev)
103 105
104 if (IS_ERR(lcd_device)) { 106 if (IS_ERR(lcd_device)) {
105 ret = PTR_ERR(lcd_device); 107 ret = PTR_ERR(lcd_device);
106 printk(KERN_ERR "lcd : failed to register device\n"); 108 pr_err("failed to register device\n");
107 return ret; 109 return ret;
108 } 110 }
109 111
diff --git a/drivers/video/backlight/l4f00242t03.c b/drivers/video/backlight/l4f00242t03.c
index 6022b67285ec..40f606a86093 100644
--- a/drivers/video/backlight/l4f00242t03.c
+++ b/drivers/video/backlight/l4f00242t03.c
@@ -11,6 +11,8 @@
11 * published by the Free Software Foundation. 11 * published by the Free Software Foundation.
12 */ 12 */
13 13
14#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
15
14#include <linux/device.h> 16#include <linux/device.h>
15#include <linux/kernel.h> 17#include <linux/kernel.h>
16#include <linux/delay.h> 18#include <linux/delay.h>
@@ -159,7 +161,8 @@ static int __devinit l4f00242t03_probe(struct spi_device *spi)
159 return -EINVAL; 161 return -EINVAL;
160 } 162 }
161 163
162 priv = kzalloc(sizeof(struct l4f00242t03_priv), GFP_KERNEL); 164 priv = devm_kzalloc(&spi->dev, sizeof(struct l4f00242t03_priv),
165 GFP_KERNEL);
163 166
164 if (priv == NULL) { 167 if (priv == NULL) {
165 dev_err(&spi->dev, "No memory for this device.\n"); 168 dev_err(&spi->dev, "No memory for this device.\n");
@@ -177,7 +180,7 @@ static int __devinit l4f00242t03_probe(struct spi_device *spi)
177 if (ret) { 180 if (ret) {
178 dev_err(&spi->dev, 181 dev_err(&spi->dev,
179 "Unable to get the lcd l4f00242t03 reset gpio.\n"); 182 "Unable to get the lcd l4f00242t03 reset gpio.\n");
180 goto err; 183 return ret;
181 } 184 }
182 185
183 ret = gpio_request_one(pdata->data_enable_gpio, GPIOF_OUT_INIT_LOW, 186 ret = gpio_request_one(pdata->data_enable_gpio, GPIOF_OUT_INIT_LOW,
@@ -185,7 +188,7 @@ static int __devinit l4f00242t03_probe(struct spi_device *spi)
185 if (ret) { 188 if (ret) {
186 dev_err(&spi->dev, 189 dev_err(&spi->dev,
187 "Unable to get the lcd l4f00242t03 data en gpio.\n"); 190 "Unable to get the lcd l4f00242t03 data en gpio.\n");
188 goto err2; 191 goto err;
189 } 192 }
190 193
191 priv->io_reg = regulator_get(&spi->dev, "vdd"); 194 priv->io_reg = regulator_get(&spi->dev, "vdd");
@@ -193,7 +196,7 @@ static int __devinit l4f00242t03_probe(struct spi_device *spi)
193 ret = PTR_ERR(priv->io_reg); 196 ret = PTR_ERR(priv->io_reg);
194 dev_err(&spi->dev, "%s: Unable to get the IO regulator\n", 197 dev_err(&spi->dev, "%s: Unable to get the IO regulator\n",
195 __func__); 198 __func__);
196 goto err3; 199 goto err2;
197 } 200 }
198 201
199 priv->core_reg = regulator_get(&spi->dev, "vcore"); 202 priv->core_reg = regulator_get(&spi->dev, "vcore");
@@ -201,14 +204,14 @@ static int __devinit l4f00242t03_probe(struct spi_device *spi)
201 ret = PTR_ERR(priv->core_reg); 204 ret = PTR_ERR(priv->core_reg);
202 dev_err(&spi->dev, "%s: Unable to get the core regulator\n", 205 dev_err(&spi->dev, "%s: Unable to get the core regulator\n",
203 __func__); 206 __func__);
204 goto err4; 207 goto err3;
205 } 208 }
206 209
207 priv->ld = lcd_device_register("l4f00242t03", 210 priv->ld = lcd_device_register("l4f00242t03",
208 &spi->dev, priv, &l4f_ops); 211 &spi->dev, priv, &l4f_ops);
209 if (IS_ERR(priv->ld)) { 212 if (IS_ERR(priv->ld)) {
210 ret = PTR_ERR(priv->ld); 213 ret = PTR_ERR(priv->ld);
211 goto err5; 214 goto err4;
212 } 215 }
213 216
214 /* Init the LCD */ 217 /* Init the LCD */
@@ -220,16 +223,14 @@ static int __devinit l4f00242t03_probe(struct spi_device *spi)
220 223
221 return 0; 224 return 0;
222 225
223err5:
224 regulator_put(priv->core_reg);
225err4: 226err4:
226 regulator_put(priv->io_reg); 227 regulator_put(priv->core_reg);
227err3: 228err3:
228 gpio_free(pdata->data_enable_gpio); 229 regulator_put(priv->io_reg);
229err2: 230err2:
230 gpio_free(pdata->reset_gpio); 231 gpio_free(pdata->data_enable_gpio);
231err: 232err:
232 kfree(priv); 233 gpio_free(pdata->reset_gpio);
233 234
234 return ret; 235 return ret;
235} 236}
@@ -250,8 +251,6 @@ static int __devexit l4f00242t03_remove(struct spi_device *spi)
250 regulator_put(priv->io_reg); 251 regulator_put(priv->io_reg);
251 regulator_put(priv->core_reg); 252 regulator_put(priv->core_reg);
252 253
253 kfree(priv);
254
255 return 0; 254 return 0;
256} 255}
257 256
diff --git a/drivers/video/backlight/lcd.c b/drivers/video/backlight/lcd.c
index 79c1b0d609a8..a5d0d024bb92 100644
--- a/drivers/video/backlight/lcd.c
+++ b/drivers/video/backlight/lcd.c
@@ -5,6 +5,8 @@
5 * 5 *
6 */ 6 */
7 7
8#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
9
8#include <linux/module.h> 10#include <linux/module.h>
9#include <linux/init.h> 11#include <linux/init.h>
10#include <linux/device.h> 12#include <linux/device.h>
@@ -32,6 +34,8 @@ static int fb_notifier_callback(struct notifier_block *self,
32 case FB_EVENT_BLANK: 34 case FB_EVENT_BLANK:
33 case FB_EVENT_MODE_CHANGE: 35 case FB_EVENT_MODE_CHANGE:
34 case FB_EVENT_MODE_CHANGE_ALL: 36 case FB_EVENT_MODE_CHANGE_ALL:
37 case FB_EARLY_EVENT_BLANK:
38 case FB_R_EARLY_EVENT_BLANK:
35 break; 39 break;
36 default: 40 default:
37 return 0; 41 return 0;
@@ -46,6 +50,14 @@ static int fb_notifier_callback(struct notifier_block *self,
46 if (event == FB_EVENT_BLANK) { 50 if (event == FB_EVENT_BLANK) {
47 if (ld->ops->set_power) 51 if (ld->ops->set_power)
48 ld->ops->set_power(ld, *(int *)evdata->data); 52 ld->ops->set_power(ld, *(int *)evdata->data);
53 } else if (event == FB_EARLY_EVENT_BLANK) {
54 if (ld->ops->early_set_power)
55 ld->ops->early_set_power(ld,
56 *(int *)evdata->data);
57 } else if (event == FB_R_EARLY_EVENT_BLANK) {
58 if (ld->ops->r_early_set_power)
59 ld->ops->r_early_set_power(ld,
60 *(int *)evdata->data);
49 } else { 61 } else {
50 if (ld->ops->set_mode) 62 if (ld->ops->set_mode)
51 ld->ops->set_mode(ld, evdata->data); 63 ld->ops->set_mode(ld, evdata->data);
@@ -106,7 +118,7 @@ static ssize_t lcd_store_power(struct device *dev,
106 118
107 mutex_lock(&ld->ops_lock); 119 mutex_lock(&ld->ops_lock);
108 if (ld->ops && ld->ops->set_power) { 120 if (ld->ops && ld->ops->set_power) {
109 pr_debug("lcd: set power to %lu\n", power); 121 pr_debug("set power to %lu\n", power);
110 ld->ops->set_power(ld, power); 122 ld->ops->set_power(ld, power);
111 rc = count; 123 rc = count;
112 } 124 }
@@ -142,7 +154,7 @@ static ssize_t lcd_store_contrast(struct device *dev,
142 154
143 mutex_lock(&ld->ops_lock); 155 mutex_lock(&ld->ops_lock);
144 if (ld->ops && ld->ops->set_contrast) { 156 if (ld->ops && ld->ops->set_contrast) {
145 pr_debug("lcd: set contrast to %lu\n", contrast); 157 pr_debug("set contrast to %lu\n", contrast);
146 ld->ops->set_contrast(ld, contrast); 158 ld->ops->set_contrast(ld, contrast);
147 rc = count; 159 rc = count;
148 } 160 }
@@ -253,8 +265,8 @@ static int __init lcd_class_init(void)
253{ 265{
254 lcd_class = class_create(THIS_MODULE, "lcd"); 266 lcd_class = class_create(THIS_MODULE, "lcd");
255 if (IS_ERR(lcd_class)) { 267 if (IS_ERR(lcd_class)) {
256 printk(KERN_WARNING "Unable to create backlight class; errno = %ld\n", 268 pr_warn("Unable to create backlight class; errno = %ld\n",
257 PTR_ERR(lcd_class)); 269 PTR_ERR(lcd_class));
258 return PTR_ERR(lcd_class); 270 return PTR_ERR(lcd_class);
259 } 271 }
260 272
diff --git a/drivers/video/backlight/ld9040.c b/drivers/video/backlight/ld9040.c
index efd352be21ae..58f517fb7d40 100644
--- a/drivers/video/backlight/ld9040.c
+++ b/drivers/video/backlight/ld9040.c
@@ -707,7 +707,7 @@ static int ld9040_probe(struct spi_device *spi)
707 struct backlight_device *bd = NULL; 707 struct backlight_device *bd = NULL;
708 struct backlight_properties props; 708 struct backlight_properties props;
709 709
710 lcd = kzalloc(sizeof(struct ld9040), GFP_KERNEL); 710 lcd = devm_kzalloc(&spi->dev, sizeof(struct ld9040), GFP_KERNEL);
711 if (!lcd) 711 if (!lcd)
712 return -ENOMEM; 712 return -ENOMEM;
713 713
@@ -717,7 +717,7 @@ static int ld9040_probe(struct spi_device *spi)
717 ret = spi_setup(spi); 717 ret = spi_setup(spi);
718 if (ret < 0) { 718 if (ret < 0) {
719 dev_err(&spi->dev, "spi setup failed.\n"); 719 dev_err(&spi->dev, "spi setup failed.\n");
720 goto out_free_lcd; 720 return ret;
721 } 721 }
722 722
723 lcd->spi = spi; 723 lcd->spi = spi;
@@ -726,7 +726,7 @@ static int ld9040_probe(struct spi_device *spi)
726 lcd->lcd_pd = spi->dev.platform_data; 726 lcd->lcd_pd = spi->dev.platform_data;
727 if (!lcd->lcd_pd) { 727 if (!lcd->lcd_pd) {
728 dev_err(&spi->dev, "platform data is NULL.\n"); 728 dev_err(&spi->dev, "platform data is NULL.\n");
729 goto out_free_lcd; 729 return -EFAULT;
730 } 730 }
731 731
732 mutex_init(&lcd->lock); 732 mutex_init(&lcd->lock);
@@ -734,13 +734,13 @@ static int ld9040_probe(struct spi_device *spi)
734 ret = regulator_bulk_get(lcd->dev, ARRAY_SIZE(supplies), supplies); 734 ret = regulator_bulk_get(lcd->dev, ARRAY_SIZE(supplies), supplies);
735 if (ret) { 735 if (ret) {
736 dev_err(lcd->dev, "Failed to get regulators: %d\n", ret); 736 dev_err(lcd->dev, "Failed to get regulators: %d\n", ret);
737 goto out_free_lcd; 737 return ret;
738 } 738 }
739 739
740 ld = lcd_device_register("ld9040", &spi->dev, lcd, &ld9040_lcd_ops); 740 ld = lcd_device_register("ld9040", &spi->dev, lcd, &ld9040_lcd_ops);
741 if (IS_ERR(ld)) { 741 if (IS_ERR(ld)) {
742 ret = PTR_ERR(ld); 742 ret = PTR_ERR(ld);
743 goto out_free_lcd; 743 goto out_free_regulator;
744 } 744 }
745 745
746 lcd->ld = ld; 746 lcd->ld = ld;
@@ -782,10 +782,9 @@ static int ld9040_probe(struct spi_device *spi)
782 782
783out_unregister_lcd: 783out_unregister_lcd:
784 lcd_device_unregister(lcd->ld); 784 lcd_device_unregister(lcd->ld);
785out_free_lcd: 785out_free_regulator:
786 regulator_bulk_free(ARRAY_SIZE(supplies), supplies); 786 regulator_bulk_free(ARRAY_SIZE(supplies), supplies);
787 787
788 kfree(lcd);
789 return ret; 788 return ret;
790} 789}
791 790
@@ -797,7 +796,6 @@ static int __devexit ld9040_remove(struct spi_device *spi)
797 backlight_device_unregister(lcd->bd); 796 backlight_device_unregister(lcd->bd);
798 lcd_device_unregister(lcd->ld); 797 lcd_device_unregister(lcd->ld);
799 regulator_bulk_free(ARRAY_SIZE(supplies), supplies); 798 regulator_bulk_free(ARRAY_SIZE(supplies), supplies);
800 kfree(lcd);
801 799
802 return 0; 800 return 0;
803} 801}
@@ -846,7 +844,6 @@ static void ld9040_shutdown(struct spi_device *spi)
846static struct spi_driver ld9040_driver = { 844static struct spi_driver ld9040_driver = {
847 .driver = { 845 .driver = {
848 .name = "ld9040", 846 .name = "ld9040",
849 .bus = &spi_bus_type,
850 .owner = THIS_MODULE, 847 .owner = THIS_MODULE,
851 }, 848 },
852 .probe = ld9040_probe, 849 .probe = ld9040_probe,
diff --git a/drivers/video/backlight/lm3533_bl.c b/drivers/video/backlight/lm3533_bl.c
new file mode 100644
index 000000000000..bebeb63607db
--- /dev/null
+++ b/drivers/video/backlight/lm3533_bl.c
@@ -0,0 +1,423 @@
1/*
2 * lm3533-bl.c -- LM3533 Backlight driver
3 *
4 * Copyright (C) 2011-2012 Texas Instruments
5 *
6 * Author: Johan Hovold <jhovold@gmail.com>
7 *
8 * This program is free software; you can redistribute it and/or modify it
9 * under the terms of the GNU General Public License as published by the
10 * Free Software Foundation; either version 2 of the License, or (at your
11 * option) any later version.
12 */
13
14#include <linux/module.h>
15#include <linux/init.h>
16#include <linux/platform_device.h>
17#include <linux/backlight.h>
18#include <linux/fb.h>
19#include <linux/slab.h>
20
21#include <linux/mfd/lm3533.h>
22
23
24#define LM3533_HVCTRLBANK_COUNT 2
25#define LM3533_BL_MAX_BRIGHTNESS 255
26
27#define LM3533_REG_CTRLBANK_AB_BCONF 0x1a
28
29
30struct lm3533_bl {
31 struct lm3533 *lm3533;
32 struct lm3533_ctrlbank cb;
33 struct backlight_device *bd;
34 int id;
35};
36
37
38static inline int lm3533_bl_get_ctrlbank_id(struct lm3533_bl *bl)
39{
40 return bl->id;
41}
42
43static int lm3533_bl_update_status(struct backlight_device *bd)
44{
45 struct lm3533_bl *bl = bl_get_data(bd);
46 int brightness = bd->props.brightness;
47
48 if (bd->props.power != FB_BLANK_UNBLANK)
49 brightness = 0;
50 if (bd->props.fb_blank != FB_BLANK_UNBLANK)
51 brightness = 0;
52
53 return lm3533_ctrlbank_set_brightness(&bl->cb, (u8)brightness);
54}
55
56static int lm3533_bl_get_brightness(struct backlight_device *bd)
57{
58 struct lm3533_bl *bl = bl_get_data(bd);
59 u8 val;
60 int ret;
61
62 ret = lm3533_ctrlbank_get_brightness(&bl->cb, &val);
63 if (ret)
64 return ret;
65
66 return val;
67}
68
69static const struct backlight_ops lm3533_bl_ops = {
70 .get_brightness = lm3533_bl_get_brightness,
71 .update_status = lm3533_bl_update_status,
72};
73
74static ssize_t show_id(struct device *dev,
75 struct device_attribute *attr, char *buf)
76{
77 struct lm3533_bl *bl = dev_get_drvdata(dev);
78
79 return scnprintf(buf, PAGE_SIZE, "%d\n", bl->id);
80}
81
82static ssize_t show_als_channel(struct device *dev,
83 struct device_attribute *attr, char *buf)
84{
85 struct lm3533_bl *bl = dev_get_drvdata(dev);
86 unsigned channel = lm3533_bl_get_ctrlbank_id(bl);
87
88 return scnprintf(buf, PAGE_SIZE, "%u\n", channel);
89}
90
91static ssize_t show_als_en(struct device *dev,
92 struct device_attribute *attr, char *buf)
93{
94 struct lm3533_bl *bl = dev_get_drvdata(dev);
95 int ctrlbank = lm3533_bl_get_ctrlbank_id(bl);
96 u8 val;
97 u8 mask;
98 bool enable;
99 int ret;
100
101 ret = lm3533_read(bl->lm3533, LM3533_REG_CTRLBANK_AB_BCONF, &val);
102 if (ret)
103 return ret;
104
105 mask = 1 << (2 * ctrlbank);
106 enable = val & mask;
107
108 return scnprintf(buf, PAGE_SIZE, "%d\n", enable);
109}
110
111static ssize_t store_als_en(struct device *dev,
112 struct device_attribute *attr,
113 const char *buf, size_t len)
114{
115 struct lm3533_bl *bl = dev_get_drvdata(dev);
116 int ctrlbank = lm3533_bl_get_ctrlbank_id(bl);
117 int enable;
118 u8 val;
119 u8 mask;
120 int ret;
121
122 if (kstrtoint(buf, 0, &enable))
123 return -EINVAL;
124
125 mask = 1 << (2 * ctrlbank);
126
127 if (enable)
128 val = mask;
129 else
130 val = 0;
131
132 ret = lm3533_update(bl->lm3533, LM3533_REG_CTRLBANK_AB_BCONF, val,
133 mask);
134 if (ret)
135 return ret;
136
137 return len;
138}
139
140static ssize_t show_linear(struct device *dev,
141 struct device_attribute *attr, char *buf)
142{
143 struct lm3533_bl *bl = dev_get_drvdata(dev);
144 u8 val;
145 u8 mask;
146 int linear;
147 int ret;
148
149 ret = lm3533_read(bl->lm3533, LM3533_REG_CTRLBANK_AB_BCONF, &val);
150 if (ret)
151 return ret;
152
153 mask = 1 << (2 * lm3533_bl_get_ctrlbank_id(bl) + 1);
154
155 if (val & mask)
156 linear = 1;
157 else
158 linear = 0;
159
160 return scnprintf(buf, PAGE_SIZE, "%x\n", linear);
161}
162
163static ssize_t store_linear(struct device *dev,
164 struct device_attribute *attr,
165 const char *buf, size_t len)
166{
167 struct lm3533_bl *bl = dev_get_drvdata(dev);
168 unsigned long linear;
169 u8 mask;
170 u8 val;
171 int ret;
172
173 if (kstrtoul(buf, 0, &linear))
174 return -EINVAL;
175
176 mask = 1 << (2 * lm3533_bl_get_ctrlbank_id(bl) + 1);
177
178 if (linear)
179 val = mask;
180 else
181 val = 0;
182
183 ret = lm3533_update(bl->lm3533, LM3533_REG_CTRLBANK_AB_BCONF, val,
184 mask);
185 if (ret)
186 return ret;
187
188 return len;
189}
190
191static ssize_t show_pwm(struct device *dev,
192 struct device_attribute *attr,
193 char *buf)
194{
195 struct lm3533_bl *bl = dev_get_drvdata(dev);
196 u8 val;
197 int ret;
198
199 ret = lm3533_ctrlbank_get_pwm(&bl->cb, &val);
200 if (ret)
201 return ret;
202
203 return scnprintf(buf, PAGE_SIZE, "%u\n", val);
204}
205
206static ssize_t store_pwm(struct device *dev,
207 struct device_attribute *attr,
208 const char *buf, size_t len)
209{
210 struct lm3533_bl *bl = dev_get_drvdata(dev);
211 u8 val;
212 int ret;
213
214 if (kstrtou8(buf, 0, &val))
215 return -EINVAL;
216
217 ret = lm3533_ctrlbank_set_pwm(&bl->cb, val);
218 if (ret)
219 return ret;
220
221 return len;
222}
223
224static LM3533_ATTR_RO(als_channel);
225static LM3533_ATTR_RW(als_en);
226static LM3533_ATTR_RO(id);
227static LM3533_ATTR_RW(linear);
228static LM3533_ATTR_RW(pwm);
229
230static struct attribute *lm3533_bl_attributes[] = {
231 &dev_attr_als_channel.attr,
232 &dev_attr_als_en.attr,
233 &dev_attr_id.attr,
234 &dev_attr_linear.attr,
235 &dev_attr_pwm.attr,
236 NULL,
237};
238
239static umode_t lm3533_bl_attr_is_visible(struct kobject *kobj,
240 struct attribute *attr, int n)
241{
242 struct device *dev = container_of(kobj, struct device, kobj);
243 struct lm3533_bl *bl = dev_get_drvdata(dev);
244 umode_t mode = attr->mode;
245
246 if (attr == &dev_attr_als_channel.attr ||
247 attr == &dev_attr_als_en.attr) {
248 if (!bl->lm3533->have_als)
249 mode = 0;
250 }
251
252 return mode;
253};
254
255static struct attribute_group lm3533_bl_attribute_group = {
256 .is_visible = lm3533_bl_attr_is_visible,
257 .attrs = lm3533_bl_attributes
258};
259
260static int __devinit lm3533_bl_setup(struct lm3533_bl *bl,
261 struct lm3533_bl_platform_data *pdata)
262{
263 int ret;
264
265 ret = lm3533_ctrlbank_set_max_current(&bl->cb, pdata->max_current);
266 if (ret)
267 return ret;
268
269 return lm3533_ctrlbank_set_pwm(&bl->cb, pdata->pwm);
270}
271
272static int __devinit lm3533_bl_probe(struct platform_device *pdev)
273{
274 struct lm3533 *lm3533;
275 struct lm3533_bl_platform_data *pdata;
276 struct lm3533_bl *bl;
277 struct backlight_device *bd;
278 struct backlight_properties props;
279 int ret;
280
281 dev_dbg(&pdev->dev, "%s\n", __func__);
282
283 lm3533 = dev_get_drvdata(pdev->dev.parent);
284 if (!lm3533)
285 return -EINVAL;
286
287 pdata = pdev->dev.platform_data;
288 if (!pdata) {
289 dev_err(&pdev->dev, "no platform data\n");
290 return -EINVAL;
291 }
292
293 if (pdev->id < 0 || pdev->id >= LM3533_HVCTRLBANK_COUNT) {
294 dev_err(&pdev->dev, "illegal backlight id %d\n", pdev->id);
295 return -EINVAL;
296 }
297
298 bl = kzalloc(sizeof(*bl), GFP_KERNEL);
299 if (!bl) {
300 dev_err(&pdev->dev,
301 "failed to allocate memory for backlight\n");
302 return -ENOMEM;
303 }
304
305 bl->lm3533 = lm3533;
306 bl->id = pdev->id;
307
308 bl->cb.lm3533 = lm3533;
309 bl->cb.id = lm3533_bl_get_ctrlbank_id(bl);
310 bl->cb.dev = NULL; /* until registered */
311
312 memset(&props, 0, sizeof(props));
313 props.type = BACKLIGHT_RAW;
314 props.max_brightness = LM3533_BL_MAX_BRIGHTNESS;
315 props.brightness = pdata->default_brightness;
316 bd = backlight_device_register(pdata->name, pdev->dev.parent, bl,
317 &lm3533_bl_ops, &props);
318 if (IS_ERR(bd)) {
319 dev_err(&pdev->dev, "failed to register backlight device\n");
320 ret = PTR_ERR(bd);
321 goto err_free;
322 }
323
324 bl->bd = bd;
325 bl->cb.dev = &bl->bd->dev;
326
327 platform_set_drvdata(pdev, bl);
328
329 ret = sysfs_create_group(&bd->dev.kobj, &lm3533_bl_attribute_group);
330 if (ret < 0) {
331 dev_err(&pdev->dev, "failed to create sysfs attributes\n");
332 goto err_unregister;
333 }
334
335 backlight_update_status(bd);
336
337 ret = lm3533_bl_setup(bl, pdata);
338 if (ret)
339 goto err_sysfs_remove;
340
341 ret = lm3533_ctrlbank_enable(&bl->cb);
342 if (ret)
343 goto err_sysfs_remove;
344
345 return 0;
346
347err_sysfs_remove:
348 sysfs_remove_group(&bd->dev.kobj, &lm3533_bl_attribute_group);
349err_unregister:
350 backlight_device_unregister(bd);
351err_free:
352 kfree(bl);
353
354 return ret;
355}
356
357static int __devexit lm3533_bl_remove(struct platform_device *pdev)
358{
359 struct lm3533_bl *bl = platform_get_drvdata(pdev);
360 struct backlight_device *bd = bl->bd;
361
362 dev_dbg(&bd->dev, "%s\n", __func__);
363
364 bd->props.power = FB_BLANK_POWERDOWN;
365 bd->props.brightness = 0;
366
367 lm3533_ctrlbank_disable(&bl->cb);
368 sysfs_remove_group(&bd->dev.kobj, &lm3533_bl_attribute_group);
369 backlight_device_unregister(bd);
370 kfree(bl);
371
372 return 0;
373}
374
375#ifdef CONFIG_PM
376static int lm3533_bl_suspend(struct platform_device *pdev, pm_message_t state)
377{
378 struct lm3533_bl *bl = platform_get_drvdata(pdev);
379
380 dev_dbg(&pdev->dev, "%s\n", __func__);
381
382 return lm3533_ctrlbank_disable(&bl->cb);
383}
384
385static int lm3533_bl_resume(struct platform_device *pdev)
386{
387 struct lm3533_bl *bl = platform_get_drvdata(pdev);
388
389 dev_dbg(&pdev->dev, "%s\n", __func__);
390
391 return lm3533_ctrlbank_enable(&bl->cb);
392}
393#else
394#define lm3533_bl_suspend NULL
395#define lm3533_bl_resume NULL
396#endif
397
398static void lm3533_bl_shutdown(struct platform_device *pdev)
399{
400 struct lm3533_bl *bl = platform_get_drvdata(pdev);
401
402 dev_dbg(&pdev->dev, "%s\n", __func__);
403
404 lm3533_ctrlbank_disable(&bl->cb);
405}
406
407static struct platform_driver lm3533_bl_driver = {
408 .driver = {
409 .name = "lm3533-backlight",
410 .owner = THIS_MODULE,
411 },
412 .probe = lm3533_bl_probe,
413 .remove = __devexit_p(lm3533_bl_remove),
414 .shutdown = lm3533_bl_shutdown,
415 .suspend = lm3533_bl_suspend,
416 .resume = lm3533_bl_resume,
417};
418module_platform_driver(lm3533_bl_driver);
419
420MODULE_AUTHOR("Johan Hovold <jhovold@gmail.com>");
421MODULE_DESCRIPTION("LM3533 Backlight driver");
422MODULE_LICENSE("GPL");
423MODULE_ALIAS("platform:lm3533-backlight");
diff --git a/drivers/video/backlight/lms283gf05.c b/drivers/video/backlight/lms283gf05.c
index 4161f9e3982a..a9f2c36966f1 100644
--- a/drivers/video/backlight/lms283gf05.c
+++ b/drivers/video/backlight/lms283gf05.c
@@ -168,7 +168,8 @@ static int __devinit lms283gf05_probe(struct spi_device *spi)
168 goto err; 168 goto err;
169 } 169 }
170 170
171 st = kzalloc(sizeof(struct lms283gf05_state), GFP_KERNEL); 171 st = devm_kzalloc(&spi->dev, sizeof(struct lms283gf05_state),
172 GFP_KERNEL);
172 if (st == NULL) { 173 if (st == NULL) {
173 dev_err(&spi->dev, "No memory for device state\n"); 174 dev_err(&spi->dev, "No memory for device state\n");
174 ret = -ENOMEM; 175 ret = -ENOMEM;
@@ -178,7 +179,7 @@ static int __devinit lms283gf05_probe(struct spi_device *spi)
178 ld = lcd_device_register("lms283gf05", &spi->dev, st, &lms_ops); 179 ld = lcd_device_register("lms283gf05", &spi->dev, st, &lms_ops);
179 if (IS_ERR(ld)) { 180 if (IS_ERR(ld)) {
180 ret = PTR_ERR(ld); 181 ret = PTR_ERR(ld);
181 goto err2; 182 goto err;
182 } 183 }
183 184
184 st->spi = spi; 185 st->spi = spi;
@@ -193,8 +194,6 @@ static int __devinit lms283gf05_probe(struct spi_device *spi)
193 194
194 return 0; 195 return 0;
195 196
196err2:
197 kfree(st);
198err: 197err:
199 if (pdata != NULL) 198 if (pdata != NULL)
200 gpio_free(pdata->reset_gpio); 199 gpio_free(pdata->reset_gpio);
@@ -212,8 +211,6 @@ static int __devexit lms283gf05_remove(struct spi_device *spi)
212 if (pdata != NULL) 211 if (pdata != NULL)
213 gpio_free(pdata->reset_gpio); 212 gpio_free(pdata->reset_gpio);
214 213
215 kfree(st);
216
217 return 0; 214 return 0;
218} 215}
219 216
diff --git a/drivers/video/backlight/ltv350qv.c b/drivers/video/backlight/ltv350qv.c
index 333949ff3265..6c0f1ac0d32a 100644
--- a/drivers/video/backlight/ltv350qv.c
+++ b/drivers/video/backlight/ltv350qv.c
@@ -232,23 +232,20 @@ static int __devinit ltv350qv_probe(struct spi_device *spi)
232 struct lcd_device *ld; 232 struct lcd_device *ld;
233 int ret; 233 int ret;
234 234
235 lcd = kzalloc(sizeof(struct ltv350qv), GFP_KERNEL); 235 lcd = devm_kzalloc(&spi->dev, sizeof(struct ltv350qv), GFP_KERNEL);
236 if (!lcd) 236 if (!lcd)
237 return -ENOMEM; 237 return -ENOMEM;
238 238
239 lcd->spi = spi; 239 lcd->spi = spi;
240 lcd->power = FB_BLANK_POWERDOWN; 240 lcd->power = FB_BLANK_POWERDOWN;
241 lcd->buffer = kzalloc(8, GFP_KERNEL); 241 lcd->buffer = devm_kzalloc(&spi->dev, 8, GFP_KERNEL);
242 if (!lcd->buffer) { 242 if (!lcd->buffer)
243 ret = -ENOMEM; 243 return -ENOMEM;
244 goto out_free_lcd;
245 }
246 244
247 ld = lcd_device_register("ltv350qv", &spi->dev, lcd, &ltv_ops); 245 ld = lcd_device_register("ltv350qv", &spi->dev, lcd, &ltv_ops);
248 if (IS_ERR(ld)) { 246 if (IS_ERR(ld))
249 ret = PTR_ERR(ld); 247 return PTR_ERR(ld);
250 goto out_free_buffer; 248
251 }
252 lcd->ld = ld; 249 lcd->ld = ld;
253 250
254 ret = ltv350qv_power(lcd, FB_BLANK_UNBLANK); 251 ret = ltv350qv_power(lcd, FB_BLANK_UNBLANK);
@@ -261,10 +258,6 @@ static int __devinit ltv350qv_probe(struct spi_device *spi)
261 258
262out_unregister: 259out_unregister:
263 lcd_device_unregister(ld); 260 lcd_device_unregister(ld);
264out_free_buffer:
265 kfree(lcd->buffer);
266out_free_lcd:
267 kfree(lcd);
268 return ret; 261 return ret;
269} 262}
270 263
@@ -274,8 +267,6 @@ static int __devexit ltv350qv_remove(struct spi_device *spi)
274 267
275 ltv350qv_power(lcd, FB_BLANK_POWERDOWN); 268 ltv350qv_power(lcd, FB_BLANK_POWERDOWN);
276 lcd_device_unregister(lcd->ld); 269 lcd_device_unregister(lcd->ld);
277 kfree(lcd->buffer);
278 kfree(lcd);
279 270
280 return 0; 271 return 0;
281} 272}
@@ -310,7 +301,6 @@ static void ltv350qv_shutdown(struct spi_device *spi)
310static struct spi_driver ltv350qv_driver = { 301static struct spi_driver ltv350qv_driver = {
311 .driver = { 302 .driver = {
312 .name = "ltv350qv", 303 .name = "ltv350qv",
313 .bus = &spi_bus_type,
314 .owner = THIS_MODULE, 304 .owner = THIS_MODULE,
315 }, 305 },
316 306
diff --git a/drivers/video/backlight/omap1_bl.c b/drivers/video/backlight/omap1_bl.c
index 0175bfb08a1c..bfdc5fbeaa11 100644
--- a/drivers/video/backlight/omap1_bl.c
+++ b/drivers/video/backlight/omap1_bl.c
@@ -18,6 +18,8 @@
18 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA 18 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
19 */ 19 */
20 20
21#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
22
21#include <linux/module.h> 23#include <linux/module.h>
22#include <linux/kernel.h> 24#include <linux/kernel.h>
23#include <linux/init.h> 25#include <linux/init.h>
@@ -168,7 +170,7 @@ static int omapbl_probe(struct platform_device *pdev)
168 dev->props.brightness = pdata->default_intensity; 170 dev->props.brightness = pdata->default_intensity;
169 omapbl_update_status(dev); 171 omapbl_update_status(dev);
170 172
171 printk(KERN_INFO "OMAP LCD backlight initialised\n"); 173 pr_info("OMAP LCD backlight initialised\n");
172 174
173 return 0; 175 return 0;
174} 176}
diff --git a/drivers/video/backlight/pcf50633-backlight.c b/drivers/video/backlight/pcf50633-backlight.c
index c65853cb9740..c092159f4383 100644
--- a/drivers/video/backlight/pcf50633-backlight.c
+++ b/drivers/video/backlight/pcf50633-backlight.c
@@ -111,6 +111,7 @@ static int __devinit pcf50633_bl_probe(struct platform_device *pdev)
111 if (!pcf_bl) 111 if (!pcf_bl)
112 return -ENOMEM; 112 return -ENOMEM;
113 113
114 memset(&bl_props, 0, sizeof(bl_props));
114 bl_props.type = BACKLIGHT_RAW; 115 bl_props.type = BACKLIGHT_RAW;
115 bl_props.max_brightness = 0x3f; 116 bl_props.max_brightness = 0x3f;
116 bl_props.power = FB_BLANK_UNBLANK; 117 bl_props.power = FB_BLANK_UNBLANK;
diff --git a/drivers/video/backlight/progear_bl.c b/drivers/video/backlight/progear_bl.c
index 6af183d6465e..69b35f02929e 100644
--- a/drivers/video/backlight/progear_bl.c
+++ b/drivers/video/backlight/progear_bl.c
@@ -15,6 +15,8 @@
15 * 15 *
16 */ 16 */
17 17
18#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
19
18#include <linux/module.h> 20#include <linux/module.h>
19#include <linux/kernel.h> 21#include <linux/kernel.h>
20#include <linux/init.h> 22#include <linux/init.h>
@@ -68,13 +70,13 @@ static int progearbl_probe(struct platform_device *pdev)
68 70
69 pmu_dev = pci_get_device(PCI_VENDOR_ID_AL, PCI_DEVICE_ID_AL_M7101, NULL); 71 pmu_dev = pci_get_device(PCI_VENDOR_ID_AL, PCI_DEVICE_ID_AL_M7101, NULL);
70 if (!pmu_dev) { 72 if (!pmu_dev) {
71 printk("ALI M7101 PMU not found.\n"); 73 pr_err("ALI M7101 PMU not found.\n");
72 return -ENODEV; 74 return -ENODEV;
73 } 75 }
74 76
75 sb_dev = pci_get_device(PCI_VENDOR_ID_AL, PCI_DEVICE_ID_AL_M1533, NULL); 77 sb_dev = pci_get_device(PCI_VENDOR_ID_AL, PCI_DEVICE_ID_AL_M1533, NULL);
76 if (!sb_dev) { 78 if (!sb_dev) {
77 printk("ALI 1533 SB not found.\n"); 79 pr_err("ALI 1533 SB not found.\n");
78 ret = -ENODEV; 80 ret = -ENODEV;
79 goto put_pmu; 81 goto put_pmu;
80 } 82 }
diff --git a/drivers/video/backlight/s6e63m0.c b/drivers/video/backlight/s6e63m0.c
index e264f55b2574..6437ae474cf2 100644
--- a/drivers/video/backlight/s6e63m0.c
+++ b/drivers/video/backlight/s6e63m0.c
@@ -741,7 +741,7 @@ static int __devinit s6e63m0_probe(struct spi_device *spi)
741 struct backlight_device *bd = NULL; 741 struct backlight_device *bd = NULL;
742 struct backlight_properties props; 742 struct backlight_properties props;
743 743
744 lcd = kzalloc(sizeof(struct s6e63m0), GFP_KERNEL); 744 lcd = devm_kzalloc(&spi->dev, sizeof(struct s6e63m0), GFP_KERNEL);
745 if (!lcd) 745 if (!lcd)
746 return -ENOMEM; 746 return -ENOMEM;
747 747
@@ -751,7 +751,7 @@ static int __devinit s6e63m0_probe(struct spi_device *spi)
751 ret = spi_setup(spi); 751 ret = spi_setup(spi);
752 if (ret < 0) { 752 if (ret < 0) {
753 dev_err(&spi->dev, "spi setup failed.\n"); 753 dev_err(&spi->dev, "spi setup failed.\n");
754 goto out_free_lcd; 754 return ret;
755 } 755 }
756 756
757 lcd->spi = spi; 757 lcd->spi = spi;
@@ -760,14 +760,12 @@ static int __devinit s6e63m0_probe(struct spi_device *spi)
760 lcd->lcd_pd = (struct lcd_platform_data *)spi->dev.platform_data; 760 lcd->lcd_pd = (struct lcd_platform_data *)spi->dev.platform_data;
761 if (!lcd->lcd_pd) { 761 if (!lcd->lcd_pd) {
762 dev_err(&spi->dev, "platform data is NULL.\n"); 762 dev_err(&spi->dev, "platform data is NULL.\n");
763 goto out_free_lcd; 763 return -EFAULT;
764 } 764 }
765 765
766 ld = lcd_device_register("s6e63m0", &spi->dev, lcd, &s6e63m0_lcd_ops); 766 ld = lcd_device_register("s6e63m0", &spi->dev, lcd, &s6e63m0_lcd_ops);
767 if (IS_ERR(ld)) { 767 if (IS_ERR(ld))
768 ret = PTR_ERR(ld); 768 return PTR_ERR(ld);
769 goto out_free_lcd;
770 }
771 769
772 lcd->ld = ld; 770 lcd->ld = ld;
773 771
@@ -824,8 +822,6 @@ static int __devinit s6e63m0_probe(struct spi_device *spi)
824 822
825out_lcd_unregister: 823out_lcd_unregister:
826 lcd_device_unregister(ld); 824 lcd_device_unregister(ld);
827out_free_lcd:
828 kfree(lcd);
829 return ret; 825 return ret;
830} 826}
831 827
@@ -838,7 +834,6 @@ static int __devexit s6e63m0_remove(struct spi_device *spi)
838 device_remove_file(&spi->dev, &dev_attr_gamma_mode); 834 device_remove_file(&spi->dev, &dev_attr_gamma_mode);
839 backlight_device_unregister(lcd->bd); 835 backlight_device_unregister(lcd->bd);
840 lcd_device_unregister(lcd->ld); 836 lcd_device_unregister(lcd->ld);
841 kfree(lcd);
842 837
843 return 0; 838 return 0;
844} 839}
@@ -899,7 +894,6 @@ static void s6e63m0_shutdown(struct spi_device *spi)
899static struct spi_driver s6e63m0_driver = { 894static struct spi_driver s6e63m0_driver = {
900 .driver = { 895 .driver = {
901 .name = "s6e63m0", 896 .name = "s6e63m0",
902 .bus = &spi_bus_type,
903 .owner = THIS_MODULE, 897 .owner = THIS_MODULE,
904 }, 898 },
905 .probe = s6e63m0_probe, 899 .probe = s6e63m0_probe,
diff --git a/drivers/video/backlight/tdo24m.c b/drivers/video/backlight/tdo24m.c
index 2368b8e5f89e..02444d042cd5 100644
--- a/drivers/video/backlight/tdo24m.c
+++ b/drivers/video/backlight/tdo24m.c
@@ -349,7 +349,7 @@ static int __devinit tdo24m_probe(struct spi_device *spi)
349 if (err) 349 if (err)
350 return err; 350 return err;
351 351
352 lcd = kzalloc(sizeof(struct tdo24m), GFP_KERNEL); 352 lcd = devm_kzalloc(&spi->dev, sizeof(struct tdo24m), GFP_KERNEL);
353 if (!lcd) 353 if (!lcd)
354 return -ENOMEM; 354 return -ENOMEM;
355 355
@@ -357,11 +357,9 @@ static int __devinit tdo24m_probe(struct spi_device *spi)
357 lcd->power = FB_BLANK_POWERDOWN; 357 lcd->power = FB_BLANK_POWERDOWN;
358 lcd->mode = MODE_VGA; /* default to VGA */ 358 lcd->mode = MODE_VGA; /* default to VGA */
359 359
360 lcd->buf = kmalloc(TDO24M_SPI_BUFF_SIZE, GFP_KERNEL); 360 lcd->buf = devm_kzalloc(&spi->dev, TDO24M_SPI_BUFF_SIZE, GFP_KERNEL);
361 if (lcd->buf == NULL) { 361 if (lcd->buf == NULL)
362 kfree(lcd);
363 return -ENOMEM; 362 return -ENOMEM;
364 }
365 363
366 m = &lcd->msg; 364 m = &lcd->msg;
367 x = &lcd->xfer; 365 x = &lcd->xfer;
@@ -383,15 +381,13 @@ static int __devinit tdo24m_probe(struct spi_device *spi)
383 break; 381 break;
384 default: 382 default:
385 dev_err(&spi->dev, "Unsupported model"); 383 dev_err(&spi->dev, "Unsupported model");
386 goto out_free; 384 return -EINVAL;
387 } 385 }
388 386
389 lcd->lcd_dev = lcd_device_register("tdo24m", &spi->dev, 387 lcd->lcd_dev = lcd_device_register("tdo24m", &spi->dev,
390 lcd, &tdo24m_ops); 388 lcd, &tdo24m_ops);
391 if (IS_ERR(lcd->lcd_dev)) { 389 if (IS_ERR(lcd->lcd_dev))
392 err = PTR_ERR(lcd->lcd_dev); 390 return PTR_ERR(lcd->lcd_dev);
393 goto out_free;
394 }
395 391
396 dev_set_drvdata(&spi->dev, lcd); 392 dev_set_drvdata(&spi->dev, lcd);
397 err = tdo24m_power(lcd, FB_BLANK_UNBLANK); 393 err = tdo24m_power(lcd, FB_BLANK_UNBLANK);
@@ -402,9 +398,6 @@ static int __devinit tdo24m_probe(struct spi_device *spi)
402 398
403out_unregister: 399out_unregister:
404 lcd_device_unregister(lcd->lcd_dev); 400 lcd_device_unregister(lcd->lcd_dev);
405out_free:
406 kfree(lcd->buf);
407 kfree(lcd);
408 return err; 401 return err;
409} 402}
410 403
@@ -414,8 +407,6 @@ static int __devexit tdo24m_remove(struct spi_device *spi)
414 407
415 tdo24m_power(lcd, FB_BLANK_POWERDOWN); 408 tdo24m_power(lcd, FB_BLANK_POWERDOWN);
416 lcd_device_unregister(lcd->lcd_dev); 409 lcd_device_unregister(lcd->lcd_dev);
417 kfree(lcd->buf);
418 kfree(lcd);
419 410
420 return 0; 411 return 0;
421} 412}
diff --git a/drivers/video/backlight/tosa_bl.c b/drivers/video/backlight/tosa_bl.c
index 2b241abced43..0d54e607e82d 100644
--- a/drivers/video/backlight/tosa_bl.c
+++ b/drivers/video/backlight/tosa_bl.c
@@ -82,8 +82,11 @@ static int __devinit tosa_bl_probe(struct i2c_client *client,
82 const struct i2c_device_id *id) 82 const struct i2c_device_id *id)
83{ 83{
84 struct backlight_properties props; 84 struct backlight_properties props;
85 struct tosa_bl_data *data = kzalloc(sizeof(struct tosa_bl_data), GFP_KERNEL); 85 struct tosa_bl_data *data;
86 int ret = 0; 86 int ret = 0;
87
88 data = devm_kzalloc(&client->dev, sizeof(struct tosa_bl_data),
89 GFP_KERNEL);
87 if (!data) 90 if (!data)
88 return -ENOMEM; 91 return -ENOMEM;
89 92
@@ -92,7 +95,7 @@ static int __devinit tosa_bl_probe(struct i2c_client *client,
92 ret = gpio_request(TOSA_GPIO_BL_C20MA, "backlight"); 95 ret = gpio_request(TOSA_GPIO_BL_C20MA, "backlight");
93 if (ret) { 96 if (ret) {
94 dev_dbg(&data->bl->dev, "Unable to request gpio!\n"); 97 dev_dbg(&data->bl->dev, "Unable to request gpio!\n");
95 goto err_gpio_bl; 98 return ret;
96 } 99 }
97 ret = gpio_direction_output(TOSA_GPIO_BL_C20MA, 0); 100 ret = gpio_direction_output(TOSA_GPIO_BL_C20MA, 0);
98 if (ret) 101 if (ret)
@@ -122,8 +125,6 @@ err_reg:
122 data->bl = NULL; 125 data->bl = NULL;
123err_gpio_dir: 126err_gpio_dir:
124 gpio_free(TOSA_GPIO_BL_C20MA); 127 gpio_free(TOSA_GPIO_BL_C20MA);
125err_gpio_bl:
126 kfree(data);
127 return ret; 128 return ret;
128} 129}
129 130
@@ -136,8 +137,6 @@ static int __devexit tosa_bl_remove(struct i2c_client *client)
136 137
137 gpio_free(TOSA_GPIO_BL_C20MA); 138 gpio_free(TOSA_GPIO_BL_C20MA);
138 139
139 kfree(data);
140
141 return 0; 140 return 0;
142} 141}
143 142
diff --git a/drivers/video/backlight/tosa_lcd.c b/drivers/video/backlight/tosa_lcd.c
index 2231aec23918..47823b8efff0 100644
--- a/drivers/video/backlight/tosa_lcd.c
+++ b/drivers/video/backlight/tosa_lcd.c
@@ -174,7 +174,8 @@ static int __devinit tosa_lcd_probe(struct spi_device *spi)
174 int ret; 174 int ret;
175 struct tosa_lcd_data *data; 175 struct tosa_lcd_data *data;
176 176
177 data = kzalloc(sizeof(struct tosa_lcd_data), GFP_KERNEL); 177 data = devm_kzalloc(&spi->dev, sizeof(struct tosa_lcd_data),
178 GFP_KERNEL);
178 if (!data) 179 if (!data)
179 return -ENOMEM; 180 return -ENOMEM;
180 181
@@ -187,7 +188,7 @@ static int __devinit tosa_lcd_probe(struct spi_device *spi)
187 188
188 ret = spi_setup(spi); 189 ret = spi_setup(spi);
189 if (ret < 0) 190 if (ret < 0)
190 goto err_spi; 191 return ret;
191 192
192 data->spi = spi; 193 data->spi = spi;
193 dev_set_drvdata(&spi->dev, data); 194 dev_set_drvdata(&spi->dev, data);
@@ -224,8 +225,6 @@ err_gpio_dir:
224 gpio_free(TOSA_GPIO_TG_ON); 225 gpio_free(TOSA_GPIO_TG_ON);
225err_gpio_tg: 226err_gpio_tg:
226 dev_set_drvdata(&spi->dev, NULL); 227 dev_set_drvdata(&spi->dev, NULL);
227err_spi:
228 kfree(data);
229 return ret; 228 return ret;
230} 229}
231 230
@@ -242,7 +241,6 @@ static int __devexit tosa_lcd_remove(struct spi_device *spi)
242 241
243 gpio_free(TOSA_GPIO_TG_ON); 242 gpio_free(TOSA_GPIO_TG_ON);
244 dev_set_drvdata(&spi->dev, NULL); 243 dev_set_drvdata(&spi->dev, NULL);
245 kfree(data);
246 244
247 return 0; 245 return 0;
248} 246}
diff --git a/drivers/video/backlight/wm831x_bl.c b/drivers/video/backlight/wm831x_bl.c
index 5d365deb5f82..9e5517a3a52b 100644
--- a/drivers/video/backlight/wm831x_bl.c
+++ b/drivers/video/backlight/wm831x_bl.c
@@ -194,6 +194,7 @@ static int wm831x_backlight_probe(struct platform_device *pdev)
194 data->current_brightness = 0; 194 data->current_brightness = 0;
195 data->isink_reg = isink_reg; 195 data->isink_reg = isink_reg;
196 196
197 memset(&props, 0, sizeof(props));
197 props.type = BACKLIGHT_RAW; 198 props.type = BACKLIGHT_RAW;
198 props.max_brightness = max_isel; 199 props.max_brightness = max_isel;
199 bl = backlight_device_register("wm831x", &pdev->dev, data, 200 bl = backlight_device_register("wm831x", &pdev->dev, data,
diff --git a/drivers/video/fbmem.c b/drivers/video/fbmem.c
index c6ce416ab587..0dff12a1daef 100644
--- a/drivers/video/fbmem.c
+++ b/drivers/video/fbmem.c
@@ -1046,20 +1046,29 @@ fb_set_var(struct fb_info *info, struct fb_var_screeninfo *var)
1046int 1046int
1047fb_blank(struct fb_info *info, int blank) 1047fb_blank(struct fb_info *info, int blank)
1048{ 1048{
1049 int ret = -EINVAL; 1049 struct fb_event event;
1050 int ret = -EINVAL, early_ret;
1050 1051
1051 if (blank > FB_BLANK_POWERDOWN) 1052 if (blank > FB_BLANK_POWERDOWN)
1052 blank = FB_BLANK_POWERDOWN; 1053 blank = FB_BLANK_POWERDOWN;
1053 1054
1055 event.info = info;
1056 event.data = &blank;
1057
1058 early_ret = fb_notifier_call_chain(FB_EARLY_EVENT_BLANK, &event);
1059
1054 if (info->fbops->fb_blank) 1060 if (info->fbops->fb_blank)
1055 ret = info->fbops->fb_blank(blank, info); 1061 ret = info->fbops->fb_blank(blank, info);
1056 1062
1057 if (!ret) { 1063 if (!ret)
1058 struct fb_event event;
1059
1060 event.info = info;
1061 event.data = &blank;
1062 fb_notifier_call_chain(FB_EVENT_BLANK, &event); 1064 fb_notifier_call_chain(FB_EVENT_BLANK, &event);
1065 else {
1066 /*
1067 * if fb_blank is failed then revert effects of
1068 * the early blank event.
1069 */
1070 if (!early_ret)
1071 fb_notifier_call_chain(FB_R_EARLY_EVENT_BLANK, &event);
1063 } 1072 }
1064 1073
1065 return ret; 1074 return ret;
diff --git a/drivers/video/omap2/displays/panel-acx565akm.c b/drivers/video/omap2/displays/panel-acx565akm.c
index d26f37ac69d8..74e7cf078505 100644
--- a/drivers/video/omap2/displays/panel-acx565akm.c
+++ b/drivers/video/omap2/displays/panel-acx565akm.c
@@ -532,6 +532,7 @@ static int acx_panel_probe(struct omap_dss_device *dssdev)
532 532
533 /*------- Backlight control --------*/ 533 /*------- Backlight control --------*/
534 534
535 memset(&props, 0, sizeof(props));
535 props.fb_blank = FB_BLANK_UNBLANK; 536 props.fb_blank = FB_BLANK_UNBLANK;
536 props.power = FB_BLANK_UNBLANK; 537 props.power = FB_BLANK_UNBLANK;
537 props.type = BACKLIGHT_RAW; 538 props.type = BACKLIGHT_RAW;
diff --git a/fs/bad_inode.c b/fs/bad_inode.c
index 37268c5bb98b..1b35d6bd06b0 100644
--- a/fs/bad_inode.c
+++ b/fs/bad_inode.c
@@ -292,7 +292,6 @@ static const struct inode_operations bad_inode_ops =
292 .getxattr = bad_inode_getxattr, 292 .getxattr = bad_inode_getxattr,
293 .listxattr = bad_inode_listxattr, 293 .listxattr = bad_inode_listxattr,
294 .removexattr = bad_inode_removexattr, 294 .removexattr = bad_inode_removexattr,
295 /* truncate_range returns void */
296}; 295};
297 296
298 297
diff --git a/fs/proc/base.c b/fs/proc/base.c
index d2d3108a611c..d7d711876b6a 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -411,12 +411,13 @@ static const struct file_operations proc_lstats_operations = {
411 411
412static int proc_oom_score(struct task_struct *task, char *buffer) 412static int proc_oom_score(struct task_struct *task, char *buffer)
413{ 413{
414 unsigned long totalpages = totalram_pages + total_swap_pages;
414 unsigned long points = 0; 415 unsigned long points = 0;
415 416
416 read_lock(&tasklist_lock); 417 read_lock(&tasklist_lock);
417 if (pid_alive(task)) 418 if (pid_alive(task))
418 points = oom_badness(task, NULL, NULL, 419 points = oom_badness(task, NULL, NULL, totalpages) *
419 totalram_pages + total_swap_pages); 420 1000 / totalpages;
420 read_unlock(&tasklist_lock); 421 read_unlock(&tasklist_lock);
421 return sprintf(buffer, "%lu\n", points); 422 return sprintf(buffer, "%lu\n", points);
422} 423}
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index 1030a716d155..7faaf2acc570 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -784,7 +784,7 @@ static int pagemap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end,
784 784
785 /* find the first VMA at or above 'addr' */ 785 /* find the first VMA at or above 'addr' */
786 vma = find_vma(walk->mm, addr); 786 vma = find_vma(walk->mm, addr);
787 if (pmd_trans_huge_lock(pmd, vma) == 1) { 787 if (vma && pmd_trans_huge_lock(pmd, vma) == 1) {
788 for (; addr != end; addr += PAGE_SIZE) { 788 for (; addr != end; addr += PAGE_SIZE) {
789 unsigned long offset; 789 unsigned long offset;
790 790
diff --git a/include/asm-generic/pgtable.h b/include/asm-generic/pgtable.h
index e2768f188f55..6f2b45a9b6bc 100644
--- a/include/asm-generic/pgtable.h
+++ b/include/asm-generic/pgtable.h
@@ -445,6 +445,18 @@ static inline int pmd_write(pmd_t pmd)
445#endif /* __HAVE_ARCH_PMD_WRITE */ 445#endif /* __HAVE_ARCH_PMD_WRITE */
446#endif /* CONFIG_TRANSPARENT_HUGEPAGE */ 446#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
447 447
448#ifndef pmd_read_atomic
449static inline pmd_t pmd_read_atomic(pmd_t *pmdp)
450{
451 /*
452 * Depend on compiler for an atomic pmd read. NOTE: this is
453 * only going to work, if the pmdval_t isn't larger than
454 * an unsigned long.
455 */
456 return *pmdp;
457}
458#endif
459
448/* 460/*
449 * This function is meant to be used by sites walking pagetables with 461 * This function is meant to be used by sites walking pagetables with
450 * the mmap_sem hold in read mode to protect against MADV_DONTNEED and 462 * the mmap_sem hold in read mode to protect against MADV_DONTNEED and
@@ -458,11 +470,17 @@ static inline int pmd_write(pmd_t pmd)
458 * undefined so behaving like if the pmd was none is safe (because it 470 * undefined so behaving like if the pmd was none is safe (because it
459 * can return none anyway). The compiler level barrier() is critically 471 * can return none anyway). The compiler level barrier() is critically
460 * important to compute the two checks atomically on the same pmdval. 472 * important to compute the two checks atomically on the same pmdval.
473 *
474 * For 32bit kernels with a 64bit large pmd_t this automatically takes
475 * care of reading the pmd atomically to avoid SMP race conditions
476 * against pmd_populate() when the mmap_sem is hold for reading by the
477 * caller (a special atomic read not done by "gcc" as in the generic
478 * version above, is also needed when THP is disabled because the page
479 * fault can populate the pmd from under us).
461 */ 480 */
462static inline int pmd_none_or_trans_huge_or_clear_bad(pmd_t *pmd) 481static inline int pmd_none_or_trans_huge_or_clear_bad(pmd_t *pmd)
463{ 482{
464 /* depend on compiler for an atomic pmd read */ 483 pmd_t pmdval = pmd_read_atomic(pmd);
465 pmd_t pmdval = *pmd;
466 /* 484 /*
467 * The barrier will stabilize the pmdval in a register or on 485 * The barrier will stabilize the pmdval in a register or on
468 * the stack so that it will stop changing under the code. 486 * the stack so that it will stop changing under the code.
diff --git a/include/linux/Kbuild b/include/linux/Kbuild
index 4cd59b95858f..7185b8f15ced 100644
--- a/include/linux/Kbuild
+++ b/include/linux/Kbuild
@@ -225,6 +225,7 @@ header-y += kd.h
225header-y += kdev_t.h 225header-y += kdev_t.h
226header-y += kernel.h 226header-y += kernel.h
227header-y += kernelcapi.h 227header-y += kernelcapi.h
228header-y += kernel-page-flags.h
228header-y += keyboard.h 229header-y += keyboard.h
229header-y += keyctl.h 230header-y += keyctl.h
230header-y += l2tp.h 231header-y += l2tp.h
diff --git a/include/linux/apple_bl.h b/include/linux/apple_bl.h
index 47bedc0eee69..0a95e730fcea 100644
--- a/include/linux/apple_bl.h
+++ b/include/linux/apple_bl.h
@@ -5,7 +5,7 @@
5#ifndef _LINUX_APPLE_BL_H 5#ifndef _LINUX_APPLE_BL_H
6#define _LINUX_APPLE_BL_H 6#define _LINUX_APPLE_BL_H
7 7
8#ifdef CONFIG_BACKLIGHT_APPLE 8#if defined(CONFIG_BACKLIGHT_APPLE) || defined(CONFIG_BACKLIGHT_APPLE_MODULE)
9 9
10extern int apple_bl_register(void); 10extern int apple_bl_register(void);
11extern void apple_bl_unregister(void); 11extern void apple_bl_unregister(void);
diff --git a/include/linux/bootmem.h b/include/linux/bootmem.h
index 1a0cd270bb7a..324fe08ea3b1 100644
--- a/include/linux/bootmem.h
+++ b/include/linux/bootmem.h
@@ -135,9 +135,6 @@ extern void *__alloc_bootmem_low_node(pg_data_t *pgdat,
135extern int reserve_bootmem_generic(unsigned long addr, unsigned long size, 135extern int reserve_bootmem_generic(unsigned long addr, unsigned long size,
136 int flags); 136 int flags);
137 137
138extern void *alloc_bootmem_section(unsigned long size,
139 unsigned long section_nr);
140
141#ifdef CONFIG_HAVE_ARCH_ALLOC_REMAP 138#ifdef CONFIG_HAVE_ARCH_ALLOC_REMAP
142extern void *alloc_remap(int nid, unsigned long size); 139extern void *alloc_remap(int nid, unsigned long size);
143#else 140#else
diff --git a/include/linux/bug.h b/include/linux/bug.h
index 72961c39576a..aaac4bba6f5c 100644
--- a/include/linux/bug.h
+++ b/include/linux/bug.h
@@ -30,6 +30,13 @@ struct pt_regs;
30#define BUILD_BUG_ON_ZERO(e) (sizeof(struct { int:-!!(e); })) 30#define BUILD_BUG_ON_ZERO(e) (sizeof(struct { int:-!!(e); }))
31#define BUILD_BUG_ON_NULL(e) ((void *)sizeof(struct { int:-!!(e); })) 31#define BUILD_BUG_ON_NULL(e) ((void *)sizeof(struct { int:-!!(e); }))
32 32
33/*
34 * BUILD_BUG_ON_INVALID() permits the compiler to check the validity of the
35 * expression but avoids the generation of any code, even if that expression
36 * has side-effects.
37 */
38#define BUILD_BUG_ON_INVALID(e) ((void)(sizeof((__force long)(e))))
39
33/** 40/**
34 * BUILD_BUG_ON - break compile if a condition is true. 41 * BUILD_BUG_ON - break compile if a condition is true.
35 * @condition: the condition which the compiler should know is false. 42 * @condition: the condition which the compiler should know is false.
diff --git a/include/linux/compaction.h b/include/linux/compaction.h
index 51a90b7f2d60..e988037abd2a 100644
--- a/include/linux/compaction.h
+++ b/include/linux/compaction.h
@@ -1,6 +1,8 @@
1#ifndef _LINUX_COMPACTION_H 1#ifndef _LINUX_COMPACTION_H
2#define _LINUX_COMPACTION_H 2#define _LINUX_COMPACTION_H
3 3
4#include <linux/node.h>
5
4/* Return values for compact_zone() and try_to_compact_pages() */ 6/* Return values for compact_zone() and try_to_compact_pages() */
5/* compaction didn't start as it was not possible or direct reclaim was more suitable */ 7/* compaction didn't start as it was not possible or direct reclaim was more suitable */
6#define COMPACT_SKIPPED 0 8#define COMPACT_SKIPPED 0
@@ -11,6 +13,23 @@
11/* The full zone was compacted */ 13/* The full zone was compacted */
12#define COMPACT_COMPLETE 3 14#define COMPACT_COMPLETE 3
13 15
16/*
17 * compaction supports three modes
18 *
19 * COMPACT_ASYNC_MOVABLE uses asynchronous migration and only scans
20 * MIGRATE_MOVABLE pageblocks as migration sources and targets.
21 * COMPACT_ASYNC_UNMOVABLE uses asynchronous migration and only scans
22 * MIGRATE_MOVABLE pageblocks as migration sources.
23 * MIGRATE_UNMOVABLE pageblocks are scanned as potential migration
24 * targets and convers them to MIGRATE_MOVABLE if possible
25 * COMPACT_SYNC uses synchronous migration and scans all pageblocks
26 */
27enum compact_mode {
28 COMPACT_ASYNC_MOVABLE,
29 COMPACT_ASYNC_UNMOVABLE,
30 COMPACT_SYNC,
31};
32
14#ifdef CONFIG_COMPACTION 33#ifdef CONFIG_COMPACTION
15extern int sysctl_compact_memory; 34extern int sysctl_compact_memory;
16extern int sysctl_compaction_handler(struct ctl_table *table, int write, 35extern int sysctl_compaction_handler(struct ctl_table *table, int write,
diff --git a/include/linux/fb.h b/include/linux/fb.h
index d31cb682e173..a3229d7ab9f2 100644
--- a/include/linux/fb.h
+++ b/include/linux/fb.h
@@ -554,6 +554,10 @@ struct fb_cursor_user {
554#define FB_EVENT_FB_UNBIND 0x0E 554#define FB_EVENT_FB_UNBIND 0x0E
555/* CONSOLE-SPECIFIC: remap all consoles to new fb - for vga switcheroo */ 555/* CONSOLE-SPECIFIC: remap all consoles to new fb - for vga switcheroo */
556#define FB_EVENT_REMAP_ALL_CONSOLE 0x0F 556#define FB_EVENT_REMAP_ALL_CONSOLE 0x0F
557/* A hardware display blank early change occured */
558#define FB_EARLY_EVENT_BLANK 0x10
559/* A hardware display blank revert early change occured */
560#define FB_R_EARLY_EVENT_BLANK 0x11
557 561
558struct fb_event { 562struct fb_event {
559 struct fb_info *info; 563 struct fb_info *info;
diff --git a/include/linux/fs.h b/include/linux/fs.h
index cdc1a9630948..038076b27ea4 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1681,7 +1681,6 @@ struct inode_operations {
1681 ssize_t (*getxattr) (struct dentry *, const char *, void *, size_t); 1681 ssize_t (*getxattr) (struct dentry *, const char *, void *, size_t);
1682 ssize_t (*listxattr) (struct dentry *, char *, size_t); 1682 ssize_t (*listxattr) (struct dentry *, char *, size_t);
1683 int (*removexattr) (struct dentry *, const char *); 1683 int (*removexattr) (struct dentry *, const char *);
1684 void (*truncate_range)(struct inode *, loff_t, loff_t);
1685 int (*fiemap)(struct inode *, struct fiemap_extent_info *, u64 start, 1684 int (*fiemap)(struct inode *, struct fiemap_extent_info *, u64 start,
1686 u64 len); 1685 u64 len);
1687} ____cacheline_aligned; 1686} ____cacheline_aligned;
diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h
index c8af7a2efb52..4c59b1131187 100644
--- a/include/linux/huge_mm.h
+++ b/include/linux/huge_mm.h
@@ -59,6 +59,8 @@ extern pmd_t *page_check_address_pmd(struct page *page,
59#define HPAGE_PMD_MASK HPAGE_MASK 59#define HPAGE_PMD_MASK HPAGE_MASK
60#define HPAGE_PMD_SIZE HPAGE_SIZE 60#define HPAGE_PMD_SIZE HPAGE_SIZE
61 61
62extern bool is_vma_temporary_stack(struct vm_area_struct *vma);
63
62#define transparent_hugepage_enabled(__vma) \ 64#define transparent_hugepage_enabled(__vma) \
63 ((transparent_hugepage_flags & \ 65 ((transparent_hugepage_flags & \
64 (1<<TRANSPARENT_HUGEPAGE_FLAG) || \ 66 (1<<TRANSPARENT_HUGEPAGE_FLAG) || \
diff --git a/include/linux/kallsyms.h b/include/linux/kallsyms.h
index 387571959dd9..6883e197acb9 100644
--- a/include/linux/kallsyms.h
+++ b/include/linux/kallsyms.h
@@ -36,6 +36,7 @@ const char *kallsyms_lookup(unsigned long addr,
36 36
37/* Look up a kernel symbol and return it in a text buffer. */ 37/* Look up a kernel symbol and return it in a text buffer. */
38extern int sprint_symbol(char *buffer, unsigned long address); 38extern int sprint_symbol(char *buffer, unsigned long address);
39extern int sprint_symbol_no_offset(char *buffer, unsigned long address);
39extern int sprint_backtrace(char *buffer, unsigned long address); 40extern int sprint_backtrace(char *buffer, unsigned long address);
40 41
41/* Look up a kernel symbol and print it to the kernel messages. */ 42/* Look up a kernel symbol and print it to the kernel messages. */
@@ -80,6 +81,12 @@ static inline int sprint_symbol(char *buffer, unsigned long addr)
80 return 0; 81 return 0;
81} 82}
82 83
84static inline int sprint_symbol_no_offset(char *buffer, unsigned long addr)
85{
86 *buffer = '\0';
87 return 0;
88}
89
83static inline int sprint_backtrace(char *buffer, unsigned long addr) 90static inline int sprint_backtrace(char *buffer, unsigned long addr)
84{ 91{
85 *buffer = '\0'; 92 *buffer = '\0';
diff --git a/include/linux/kernel-page-flags.h b/include/linux/kernel-page-flags.h
index 26a65711676f..a1bdf6966357 100644
--- a/include/linux/kernel-page-flags.h
+++ b/include/linux/kernel-page-flags.h
@@ -32,6 +32,8 @@
32#define KPF_KSM 21 32#define KPF_KSM 21
33#define KPF_THP 22 33#define KPF_THP 22
34 34
35#ifdef __KERNEL__
36
35/* kernel hacking assistances 37/* kernel hacking assistances
36 * WARNING: subject to change, never rely on them! 38 * WARNING: subject to change, never rely on them!
37 */ 39 */
@@ -44,4 +46,6 @@
44#define KPF_ARCH 38 46#define KPF_ARCH 38
45#define KPF_UNCACHED 39 47#define KPF_UNCACHED 39
46 48
49#endif /* __KERNEL__ */
50
47#endif /* LINUX_KERNEL_PAGE_FLAGS_H */ 51#endif /* LINUX_KERNEL_PAGE_FLAGS_H */
diff --git a/include/linux/lcd.h b/include/linux/lcd.h
index 8877123f2d6e..e00c3b0ebc6b 100644
--- a/include/linux/lcd.h
+++ b/include/linux/lcd.h
@@ -40,6 +40,16 @@ struct lcd_ops {
40 /* Get the LCD panel power status (0: full on, 1..3: controller 40 /* Get the LCD panel power status (0: full on, 1..3: controller
41 power on, flat panel power off, 4: full off), see FB_BLANK_XXX */ 41 power on, flat panel power off, 4: full off), see FB_BLANK_XXX */
42 int (*get_power)(struct lcd_device *); 42 int (*get_power)(struct lcd_device *);
43 /*
44 * Enable or disable power to the LCD(0: on; 4: off, see FB_BLANK_XXX)
45 * and this callback would be called proir to fb driver's callback.
46 *
47 * P.S. note that if early_set_power is not NULL then early fb notifier
48 * would be registered.
49 */
50 int (*early_set_power)(struct lcd_device *, int power);
51 /* revert the effects of the early blank event. */
52 int (*r_early_set_power)(struct lcd_device *, int power);
43 /* Enable or disable power to the LCD (0: on; 4: off, see FB_BLANK_XXX) */ 53 /* Enable or disable power to the LCD (0: on; 4: off, see FB_BLANK_XXX) */
44 int (*set_power)(struct lcd_device *, int power); 54 int (*set_power)(struct lcd_device *, int power);
45 /* Get the current contrast setting (0-max_contrast) */ 55 /* Get the current contrast setting (0-max_contrast) */
diff --git a/include/linux/led-lm3530.h b/include/linux/led-lm3530.h
index eeae6e742471..4b133479d6ea 100644
--- a/include/linux/led-lm3530.h
+++ b/include/linux/led-lm3530.h
@@ -92,7 +92,7 @@ struct lm3530_pwm_data {
92 * @als2_resistor_sel: internal resistance from ALS2 input to ground 92 * @als2_resistor_sel: internal resistance from ALS2 input to ground
93 * @als_vmin: als input voltage calibrated for max brightness in mV 93 * @als_vmin: als input voltage calibrated for max brightness in mV
94 * @als_vmax: als input voltage calibrated for min brightness in mV 94 * @als_vmax: als input voltage calibrated for min brightness in mV
95 * @brt_val: brightness value (0-255) 95 * @brt_val: brightness value (0-127)
96 * @pwm_data: PWM control functions (only valid when the mode is PWM) 96 * @pwm_data: PWM control functions (only valid when the mode is PWM)
97 */ 97 */
98struct lm3530_platform_data { 98struct lm3530_platform_data {
diff --git a/include/linux/leds.h b/include/linux/leds.h
index 5884def15a24..39eee41d8c6f 100644
--- a/include/linux/leds.h
+++ b/include/linux/leds.h
@@ -73,6 +73,8 @@ struct led_classdev {
73 struct led_trigger *trigger; 73 struct led_trigger *trigger;
74 struct list_head trig_list; 74 struct list_head trig_list;
75 void *trigger_data; 75 void *trigger_data;
76 /* true if activated - deactivate routine uses it to do cleanup */
77 bool activated;
76#endif 78#endif
77}; 79};
78 80
diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index f94efd2f6c27..83e7ba90d6e5 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -63,12 +63,7 @@ extern int mem_cgroup_cache_charge(struct page *page, struct mm_struct *mm,
63 gfp_t gfp_mask); 63 gfp_t gfp_mask);
64 64
65struct lruvec *mem_cgroup_zone_lruvec(struct zone *, struct mem_cgroup *); 65struct lruvec *mem_cgroup_zone_lruvec(struct zone *, struct mem_cgroup *);
66struct lruvec *mem_cgroup_lru_add_list(struct zone *, struct page *, 66struct lruvec *mem_cgroup_page_lruvec(struct page *, struct zone *);
67 enum lru_list);
68void mem_cgroup_lru_del_list(struct page *, enum lru_list);
69void mem_cgroup_lru_del(struct page *);
70struct lruvec *mem_cgroup_lru_move_lists(struct zone *, struct page *,
71 enum lru_list, enum lru_list);
72 67
73/* For coalescing uncharge for reducing memcg' overhead*/ 68/* For coalescing uncharge for reducing memcg' overhead*/
74extern void mem_cgroup_uncharge_start(void); 69extern void mem_cgroup_uncharge_start(void);
@@ -79,6 +74,8 @@ extern void mem_cgroup_uncharge_cache_page(struct page *page);
79 74
80extern void mem_cgroup_out_of_memory(struct mem_cgroup *memcg, gfp_t gfp_mask, 75extern void mem_cgroup_out_of_memory(struct mem_cgroup *memcg, gfp_t gfp_mask,
81 int order); 76 int order);
77bool __mem_cgroup_same_or_subtree(const struct mem_cgroup *root_memcg,
78 struct mem_cgroup *memcg);
82int task_in_mem_cgroup(struct task_struct *task, const struct mem_cgroup *memcg); 79int task_in_mem_cgroup(struct task_struct *task, const struct mem_cgroup *memcg);
83 80
84extern struct mem_cgroup *try_get_mem_cgroup_from_page(struct page *page); 81extern struct mem_cgroup *try_get_mem_cgroup_from_page(struct page *page);
@@ -92,10 +89,13 @@ static inline
92int mm_match_cgroup(const struct mm_struct *mm, const struct mem_cgroup *cgroup) 89int mm_match_cgroup(const struct mm_struct *mm, const struct mem_cgroup *cgroup)
93{ 90{
94 struct mem_cgroup *memcg; 91 struct mem_cgroup *memcg;
92 int match;
93
95 rcu_read_lock(); 94 rcu_read_lock();
96 memcg = mem_cgroup_from_task(rcu_dereference((mm)->owner)); 95 memcg = mem_cgroup_from_task(rcu_dereference((mm)->owner));
96 match = __mem_cgroup_same_or_subtree(cgroup, memcg);
97 rcu_read_unlock(); 97 rcu_read_unlock();
98 return cgroup == memcg; 98 return match;
99} 99}
100 100
101extern struct cgroup_subsys_state *mem_cgroup_css(struct mem_cgroup *memcg); 101extern struct cgroup_subsys_state *mem_cgroup_css(struct mem_cgroup *memcg);
@@ -114,17 +114,11 @@ void mem_cgroup_iter_break(struct mem_cgroup *, struct mem_cgroup *);
114/* 114/*
115 * For memory reclaim. 115 * For memory reclaim.
116 */ 116 */
117int mem_cgroup_inactive_anon_is_low(struct mem_cgroup *memcg, 117int mem_cgroup_inactive_anon_is_low(struct lruvec *lruvec);
118 struct zone *zone); 118int mem_cgroup_inactive_file_is_low(struct lruvec *lruvec);
119int mem_cgroup_inactive_file_is_low(struct mem_cgroup *memcg,
120 struct zone *zone);
121int mem_cgroup_select_victim_node(struct mem_cgroup *memcg); 119int mem_cgroup_select_victim_node(struct mem_cgroup *memcg);
122unsigned long mem_cgroup_zone_nr_lru_pages(struct mem_cgroup *memcg, 120unsigned long mem_cgroup_get_lru_size(struct lruvec *lruvec, enum lru_list);
123 int nid, int zid, unsigned int lrumask); 121void mem_cgroup_update_lru_size(struct lruvec *, enum lru_list, int);
124struct zone_reclaim_stat *mem_cgroup_get_reclaim_stat(struct mem_cgroup *memcg,
125 struct zone *zone);
126struct zone_reclaim_stat*
127mem_cgroup_get_reclaim_stat_from_page(struct page *page);
128extern void mem_cgroup_print_oom_info(struct mem_cgroup *memcg, 122extern void mem_cgroup_print_oom_info(struct mem_cgroup *memcg,
129 struct task_struct *p); 123 struct task_struct *p);
130extern void mem_cgroup_replace_page_cache(struct page *oldpage, 124extern void mem_cgroup_replace_page_cache(struct page *oldpage,
@@ -251,25 +245,8 @@ static inline struct lruvec *mem_cgroup_zone_lruvec(struct zone *zone,
251 return &zone->lruvec; 245 return &zone->lruvec;
252} 246}
253 247
254static inline struct lruvec *mem_cgroup_lru_add_list(struct zone *zone, 248static inline struct lruvec *mem_cgroup_page_lruvec(struct page *page,
255 struct page *page, 249 struct zone *zone)
256 enum lru_list lru)
257{
258 return &zone->lruvec;
259}
260
261static inline void mem_cgroup_lru_del_list(struct page *page, enum lru_list lru)
262{
263}
264
265static inline void mem_cgroup_lru_del(struct page *page)
266{
267}
268
269static inline struct lruvec *mem_cgroup_lru_move_lists(struct zone *zone,
270 struct page *page,
271 enum lru_list from,
272 enum lru_list to)
273{ 250{
274 return &zone->lruvec; 251 return &zone->lruvec;
275} 252}
@@ -333,35 +310,27 @@ static inline bool mem_cgroup_disabled(void)
333} 310}
334 311
335static inline int 312static inline int
336mem_cgroup_inactive_anon_is_low(struct mem_cgroup *memcg, struct zone *zone) 313mem_cgroup_inactive_anon_is_low(struct lruvec *lruvec)
337{ 314{
338 return 1; 315 return 1;
339} 316}
340 317
341static inline int 318static inline int
342mem_cgroup_inactive_file_is_low(struct mem_cgroup *memcg, struct zone *zone) 319mem_cgroup_inactive_file_is_low(struct lruvec *lruvec)
343{ 320{
344 return 1; 321 return 1;
345} 322}
346 323
347static inline unsigned long 324static inline unsigned long
348mem_cgroup_zone_nr_lru_pages(struct mem_cgroup *memcg, int nid, int zid, 325mem_cgroup_get_lru_size(struct lruvec *lruvec, enum lru_list lru)
349 unsigned int lru_mask)
350{ 326{
351 return 0; 327 return 0;
352} 328}
353 329
354 330static inline void
355static inline struct zone_reclaim_stat* 331mem_cgroup_update_lru_size(struct lruvec *lruvec, enum lru_list lru,
356mem_cgroup_get_reclaim_stat(struct mem_cgroup *memcg, struct zone *zone) 332 int increment)
357{
358 return NULL;
359}
360
361static inline struct zone_reclaim_stat*
362mem_cgroup_get_reclaim_stat_from_page(struct page *page)
363{ 333{
364 return NULL;
365} 334}
366 335
367static inline void 336static inline void
diff --git a/include/linux/mempolicy.h b/include/linux/mempolicy.h
index 7c727a90d70d..4aa42732e47f 100644
--- a/include/linux/mempolicy.h
+++ b/include/linux/mempolicy.h
@@ -225,8 +225,8 @@ static inline void check_highest_zone(enum zone_type k)
225 policy_zone = k; 225 policy_zone = k;
226} 226}
227 227
228int do_migrate_pages(struct mm_struct *mm, 228int do_migrate_pages(struct mm_struct *mm, const nodemask_t *from,
229 const nodemask_t *from_nodes, const nodemask_t *to_nodes, int flags); 229 const nodemask_t *to, int flags);
230 230
231 231
232#ifdef CONFIG_TMPFS 232#ifdef CONFIG_TMPFS
@@ -354,9 +354,8 @@ static inline bool mempolicy_nodemask_intersects(struct task_struct *tsk,
354 return false; 354 return false;
355} 355}
356 356
357static inline int do_migrate_pages(struct mm_struct *mm, 357static inline int do_migrate_pages(struct mm_struct *mm, const nodemask_t *from,
358 const nodemask_t *from_nodes, 358 const nodemask_t *to, int flags)
359 const nodemask_t *to_nodes, int flags)
360{ 359{
361 return 0; 360 return 0;
362} 361}
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 7d5c37f24c63..ce26716238c3 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -321,6 +321,7 @@ static inline int is_vmalloc_or_module_addr(const void *x)
321static inline void compound_lock(struct page *page) 321static inline void compound_lock(struct page *page)
322{ 322{
323#ifdef CONFIG_TRANSPARENT_HUGEPAGE 323#ifdef CONFIG_TRANSPARENT_HUGEPAGE
324 VM_BUG_ON(PageSlab(page));
324 bit_spin_lock(PG_compound_lock, &page->flags); 325 bit_spin_lock(PG_compound_lock, &page->flags);
325#endif 326#endif
326} 327}
@@ -328,6 +329,7 @@ static inline void compound_lock(struct page *page)
328static inline void compound_unlock(struct page *page) 329static inline void compound_unlock(struct page *page)
329{ 330{
330#ifdef CONFIG_TRANSPARENT_HUGEPAGE 331#ifdef CONFIG_TRANSPARENT_HUGEPAGE
332 VM_BUG_ON(PageSlab(page));
331 bit_spin_unlock(PG_compound_lock, &page->flags); 333 bit_spin_unlock(PG_compound_lock, &page->flags);
332#endif 334#endif
333} 335}
@@ -871,8 +873,6 @@ extern void pagefault_out_of_memory(void);
871extern void show_free_areas(unsigned int flags); 873extern void show_free_areas(unsigned int flags);
872extern bool skip_free_areas_node(unsigned int flags, int nid); 874extern bool skip_free_areas_node(unsigned int flags, int nid);
873 875
874int shmem_lock(struct file *file, int lock, struct user_struct *user);
875struct file *shmem_file_setup(const char *name, loff_t size, unsigned long flags);
876int shmem_zero_setup(struct vm_area_struct *); 876int shmem_zero_setup(struct vm_area_struct *);
877 877
878extern int can_do_mlock(void); 878extern int can_do_mlock(void);
@@ -951,11 +951,9 @@ static inline void unmap_shared_mapping_range(struct address_space *mapping,
951extern void truncate_pagecache(struct inode *inode, loff_t old, loff_t new); 951extern void truncate_pagecache(struct inode *inode, loff_t old, loff_t new);
952extern void truncate_setsize(struct inode *inode, loff_t newsize); 952extern void truncate_setsize(struct inode *inode, loff_t newsize);
953extern int vmtruncate(struct inode *inode, loff_t offset); 953extern int vmtruncate(struct inode *inode, loff_t offset);
954extern int vmtruncate_range(struct inode *inode, loff_t offset, loff_t end);
955void truncate_pagecache_range(struct inode *inode, loff_t offset, loff_t end); 954void truncate_pagecache_range(struct inode *inode, loff_t offset, loff_t end);
956int truncate_inode_page(struct address_space *mapping, struct page *page); 955int truncate_inode_page(struct address_space *mapping, struct page *page);
957int generic_error_remove_page(struct address_space *mapping, struct page *page); 956int generic_error_remove_page(struct address_space *mapping, struct page *page);
958
959int invalidate_inode_page(struct page *page); 957int invalidate_inode_page(struct page *page);
960 958
961#ifdef CONFIG_MMU 959#ifdef CONFIG_MMU
diff --git a/include/linux/mm_inline.h b/include/linux/mm_inline.h
index 227fd3e9a9c9..1397ccf81e91 100644
--- a/include/linux/mm_inline.h
+++ b/include/linux/mm_inline.h
@@ -21,22 +21,22 @@ static inline int page_is_file_cache(struct page *page)
21 return !PageSwapBacked(page); 21 return !PageSwapBacked(page);
22} 22}
23 23
24static inline void 24static __always_inline void add_page_to_lru_list(struct page *page,
25add_page_to_lru_list(struct zone *zone, struct page *page, enum lru_list lru) 25 struct lruvec *lruvec, enum lru_list lru)
26{ 26{
27 struct lruvec *lruvec; 27 int nr_pages = hpage_nr_pages(page);
28 28 mem_cgroup_update_lru_size(lruvec, lru, nr_pages);
29 lruvec = mem_cgroup_lru_add_list(zone, page, lru);
30 list_add(&page->lru, &lruvec->lists[lru]); 29 list_add(&page->lru, &lruvec->lists[lru]);
31 __mod_zone_page_state(zone, NR_LRU_BASE + lru, hpage_nr_pages(page)); 30 __mod_zone_page_state(lruvec_zone(lruvec), NR_LRU_BASE + lru, nr_pages);
32} 31}
33 32
34static inline void 33static __always_inline void del_page_from_lru_list(struct page *page,
35del_page_from_lru_list(struct zone *zone, struct page *page, enum lru_list lru) 34 struct lruvec *lruvec, enum lru_list lru)
36{ 35{
37 mem_cgroup_lru_del_list(page, lru); 36 int nr_pages = hpage_nr_pages(page);
37 mem_cgroup_update_lru_size(lruvec, lru, -nr_pages);
38 list_del(&page->lru); 38 list_del(&page->lru);
39 __mod_zone_page_state(zone, NR_LRU_BASE + lru, -hpage_nr_pages(page)); 39 __mod_zone_page_state(lruvec_zone(lruvec), NR_LRU_BASE + lru, -nr_pages);
40} 40}
41 41
42/** 42/**
@@ -61,7 +61,7 @@ static inline enum lru_list page_lru_base_type(struct page *page)
61 * Returns the LRU list a page was on, as an index into the array of LRU 61 * Returns the LRU list a page was on, as an index into the array of LRU
62 * lists; and clears its Unevictable or Active flags, ready for freeing. 62 * lists; and clears its Unevictable or Active flags, ready for freeing.
63 */ 63 */
64static inline enum lru_list page_off_lru(struct page *page) 64static __always_inline enum lru_list page_off_lru(struct page *page)
65{ 65{
66 enum lru_list lru; 66 enum lru_list lru;
67 67
@@ -85,7 +85,7 @@ static inline enum lru_list page_off_lru(struct page *page)
85 * Returns the LRU list a page should be on, as an index 85 * Returns the LRU list a page should be on, as an index
86 * into the array of LRU lists. 86 * into the array of LRU lists.
87 */ 87 */
88static inline enum lru_list page_lru(struct page *page) 88static __always_inline enum lru_list page_lru(struct page *page)
89{ 89{
90 enum lru_list lru; 90 enum lru_list lru;
91 91
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index 26574c726121..dad95bdd06d7 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -345,17 +345,6 @@ struct mm_struct {
345 /* Architecture-specific MM context */ 345 /* Architecture-specific MM context */
346 mm_context_t context; 346 mm_context_t context;
347 347
348 /* Swap token stuff */
349 /*
350 * Last value of global fault stamp as seen by this process.
351 * In other words, this value gives an indication of how long
352 * it has been since this task got the token.
353 * Look at mm/thrash.c
354 */
355 unsigned int faultstamp;
356 unsigned int token_priority;
357 unsigned int last_interval;
358
359 unsigned long flags; /* Must use atomic bitops to access the bits */ 348 unsigned long flags; /* Must use atomic bitops to access the bits */
360 349
361 struct core_state *core_state; /* coredumping support */ 350 struct core_state *core_state; /* coredumping support */
diff --git a/include/linux/mmdebug.h b/include/linux/mmdebug.h
index c04ecfe03f7f..580bd587d916 100644
--- a/include/linux/mmdebug.h
+++ b/include/linux/mmdebug.h
@@ -4,7 +4,7 @@
4#ifdef CONFIG_DEBUG_VM 4#ifdef CONFIG_DEBUG_VM
5#define VM_BUG_ON(cond) BUG_ON(cond) 5#define VM_BUG_ON(cond) BUG_ON(cond)
6#else 6#else
7#define VM_BUG_ON(cond) do { (void)(cond); } while (0) 7#define VM_BUG_ON(cond) BUILD_BUG_ON_INVALID(cond)
8#endif 8#endif
9 9
10#ifdef CONFIG_DEBUG_VIRTUAL 10#ifdef CONFIG_DEBUG_VIRTUAL
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index 4871e31ae277..2427706f78b4 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -185,8 +185,25 @@ static inline int is_unevictable_lru(enum lru_list lru)
185 return (lru == LRU_UNEVICTABLE); 185 return (lru == LRU_UNEVICTABLE);
186} 186}
187 187
188struct zone_reclaim_stat {
189 /*
190 * The pageout code in vmscan.c keeps track of how many of the
191 * mem/swap backed and file backed pages are refeferenced.
192 * The higher the rotated/scanned ratio, the more valuable
193 * that cache is.
194 *
195 * The anon LRU stats live in [0], file LRU stats in [1]
196 */
197 unsigned long recent_rotated[2];
198 unsigned long recent_scanned[2];
199};
200
188struct lruvec { 201struct lruvec {
189 struct list_head lists[NR_LRU_LISTS]; 202 struct list_head lists[NR_LRU_LISTS];
203 struct zone_reclaim_stat reclaim_stat;
204#ifdef CONFIG_CGROUP_MEM_RES_CTLR
205 struct zone *zone;
206#endif
190}; 207};
191 208
192/* Mask used at gathering information at once (see memcontrol.c) */ 209/* Mask used at gathering information at once (see memcontrol.c) */
@@ -195,16 +212,12 @@ struct lruvec {
195#define LRU_ALL_EVICTABLE (LRU_ALL_FILE | LRU_ALL_ANON) 212#define LRU_ALL_EVICTABLE (LRU_ALL_FILE | LRU_ALL_ANON)
196#define LRU_ALL ((1 << NR_LRU_LISTS) - 1) 213#define LRU_ALL ((1 << NR_LRU_LISTS) - 1)
197 214
198/* Isolate inactive pages */
199#define ISOLATE_INACTIVE ((__force isolate_mode_t)0x1)
200/* Isolate active pages */
201#define ISOLATE_ACTIVE ((__force isolate_mode_t)0x2)
202/* Isolate clean file */ 215/* Isolate clean file */
203#define ISOLATE_CLEAN ((__force isolate_mode_t)0x4) 216#define ISOLATE_CLEAN ((__force isolate_mode_t)0x1)
204/* Isolate unmapped file */ 217/* Isolate unmapped file */
205#define ISOLATE_UNMAPPED ((__force isolate_mode_t)0x8) 218#define ISOLATE_UNMAPPED ((__force isolate_mode_t)0x2)
206/* Isolate for asynchronous migration */ 219/* Isolate for asynchronous migration */
207#define ISOLATE_ASYNC_MIGRATE ((__force isolate_mode_t)0x10) 220#define ISOLATE_ASYNC_MIGRATE ((__force isolate_mode_t)0x4)
208 221
209/* LRU Isolation modes. */ 222/* LRU Isolation modes. */
210typedef unsigned __bitwise__ isolate_mode_t; 223typedef unsigned __bitwise__ isolate_mode_t;
@@ -313,19 +326,6 @@ enum zone_type {
313#error ZONES_SHIFT -- too many zones configured adjust calculation 326#error ZONES_SHIFT -- too many zones configured adjust calculation
314#endif 327#endif
315 328
316struct zone_reclaim_stat {
317 /*
318 * The pageout code in vmscan.c keeps track of how many of the
319 * mem/swap backed and file backed pages are refeferenced.
320 * The higher the rotated/scanned ratio, the more valuable
321 * that cache is.
322 *
323 * The anon LRU stats live in [0], file LRU stats in [1]
324 */
325 unsigned long recent_rotated[2];
326 unsigned long recent_scanned[2];
327};
328
329struct zone { 329struct zone {
330 /* Fields commonly accessed by the page allocator */ 330 /* Fields commonly accessed by the page allocator */
331 331
@@ -407,8 +407,6 @@ struct zone {
407 spinlock_t lru_lock; 407 spinlock_t lru_lock;
408 struct lruvec lruvec; 408 struct lruvec lruvec;
409 409
410 struct zone_reclaim_stat reclaim_stat;
411
412 unsigned long pages_scanned; /* since last reclaim */ 410 unsigned long pages_scanned; /* since last reclaim */
413 unsigned long flags; /* zone flags, see below */ 411 unsigned long flags; /* zone flags, see below */
414 412
@@ -734,6 +732,17 @@ extern int init_currently_empty_zone(struct zone *zone, unsigned long start_pfn,
734 unsigned long size, 732 unsigned long size,
735 enum memmap_context context); 733 enum memmap_context context);
736 734
735extern void lruvec_init(struct lruvec *lruvec, struct zone *zone);
736
737static inline struct zone *lruvec_zone(struct lruvec *lruvec)
738{
739#ifdef CONFIG_CGROUP_MEM_RES_CTLR
740 return lruvec->zone;
741#else
742 return container_of(lruvec, struct zone, lruvec);
743#endif
744}
745
737#ifdef CONFIG_HAVE_MEMORY_PRESENT 746#ifdef CONFIG_HAVE_MEMORY_PRESENT
738void memory_present(int nid, unsigned long start, unsigned long end); 747void memory_present(int nid, unsigned long start, unsigned long end);
739#else 748#else
diff --git a/include/linux/oom.h b/include/linux/oom.h
index 3d7647536b03..e4c29bc72e70 100644
--- a/include/linux/oom.h
+++ b/include/linux/oom.h
@@ -43,8 +43,9 @@ enum oom_constraint {
43extern void compare_swap_oom_score_adj(int old_val, int new_val); 43extern void compare_swap_oom_score_adj(int old_val, int new_val);
44extern int test_set_oom_score_adj(int new_val); 44extern int test_set_oom_score_adj(int new_val);
45 45
46extern unsigned int oom_badness(struct task_struct *p, struct mem_cgroup *memcg, 46extern unsigned long oom_badness(struct task_struct *p,
47 const nodemask_t *nodemask, unsigned long totalpages); 47 struct mem_cgroup *memcg, const nodemask_t *nodemask,
48 unsigned long totalpages);
48extern int try_set_zonelist_oom(struct zonelist *zonelist, gfp_t gfp_flags); 49extern int try_set_zonelist_oom(struct zonelist *zonelist, gfp_t gfp_flags);
49extern void clear_zonelist_oom(struct zonelist *zonelist, gfp_t gfp_flags); 50extern void clear_zonelist_oom(struct zonelist *zonelist, gfp_t gfp_flags);
50 51
diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h
index efa26b4da8d2..7cfad3bbb0cc 100644
--- a/include/linux/pagemap.h
+++ b/include/linux/pagemap.h
@@ -460,11 +460,11 @@ static inline int fault_in_pages_readable(const char __user *uaddr, int size)
460 */ 460 */
461static inline int fault_in_multipages_writeable(char __user *uaddr, int size) 461static inline int fault_in_multipages_writeable(char __user *uaddr, int size)
462{ 462{
463 int ret; 463 int ret = 0;
464 char __user *end = uaddr + size - 1; 464 char __user *end = uaddr + size - 1;
465 465
466 if (unlikely(size == 0)) 466 if (unlikely(size == 0))
467 return 0; 467 return ret;
468 468
469 /* 469 /*
470 * Writing zeroes into userspace here is OK, because we know that if 470 * Writing zeroes into userspace here is OK, because we know that if
@@ -489,11 +489,11 @@ static inline int fault_in_multipages_readable(const char __user *uaddr,
489 int size) 489 int size)
490{ 490{
491 volatile char c; 491 volatile char c;
492 int ret; 492 int ret = 0;
493 const char __user *end = uaddr + size - 1; 493 const char __user *end = uaddr + size - 1;
494 494
495 if (unlikely(size == 0)) 495 if (unlikely(size == 0))
496 return 0; 496 return ret;
497 497
498 while (uaddr <= end) { 498 while (uaddr <= end) {
499 ret = __get_user(c, uaddr); 499 ret = __get_user(c, uaddr);
diff --git a/include/linux/res_counter.h b/include/linux/res_counter.h
index fb201896a8b0..7d7fbe2ef782 100644
--- a/include/linux/res_counter.h
+++ b/include/linux/res_counter.h
@@ -119,7 +119,7 @@ int __must_check res_counter_charge_locked(struct res_counter *counter,
119 unsigned long val, bool force); 119 unsigned long val, bool force);
120int __must_check res_counter_charge(struct res_counter *counter, 120int __must_check res_counter_charge(struct res_counter *counter,
121 unsigned long val, struct res_counter **limit_fail_at); 121 unsigned long val, struct res_counter **limit_fail_at);
122int __must_check res_counter_charge_nofail(struct res_counter *counter, 122int res_counter_charge_nofail(struct res_counter *counter,
123 unsigned long val, struct res_counter **limit_fail_at); 123 unsigned long val, struct res_counter **limit_fail_at);
124 124
125/* 125/*
@@ -135,6 +135,9 @@ int __must_check res_counter_charge_nofail(struct res_counter *counter,
135void res_counter_uncharge_locked(struct res_counter *counter, unsigned long val); 135void res_counter_uncharge_locked(struct res_counter *counter, unsigned long val);
136void res_counter_uncharge(struct res_counter *counter, unsigned long val); 136void res_counter_uncharge(struct res_counter *counter, unsigned long val);
137 137
138void res_counter_uncharge_until(struct res_counter *counter,
139 struct res_counter *top,
140 unsigned long val);
138/** 141/**
139 * res_counter_margin - calculate chargeable space of a counter 142 * res_counter_margin - calculate chargeable space of a counter
140 * @cnt: the counter 143 * @cnt: the counter
diff --git a/include/linux/rmap.h b/include/linux/rmap.h
index fd07c4542cee..3fce545df394 100644
--- a/include/linux/rmap.h
+++ b/include/linux/rmap.h
@@ -173,8 +173,6 @@ enum ttu_flags {
173}; 173};
174#define TTU_ACTION(x) ((x) & TTU_ACTION_MASK) 174#define TTU_ACTION(x) ((x) & TTU_ACTION_MASK)
175 175
176bool is_vma_temporary_stack(struct vm_area_struct *vma);
177
178int try_to_unmap(struct page *, enum ttu_flags flags); 176int try_to_unmap(struct page *, enum ttu_flags flags);
179int try_to_unmap_one(struct page *, struct vm_area_struct *, 177int try_to_unmap_one(struct page *, struct vm_area_struct *,
180 unsigned long address, enum ttu_flags flags); 178 unsigned long address, enum ttu_flags flags);
diff --git a/include/linux/rtc.h b/include/linux/rtc.h
index fcabfb4873c8..f071b3922c67 100644
--- a/include/linux/rtc.h
+++ b/include/linux/rtc.h
@@ -91,6 +91,9 @@ struct rtc_pll_info {
91#define RTC_PLL_GET _IOR('p', 0x11, struct rtc_pll_info) /* Get PLL correction */ 91#define RTC_PLL_GET _IOR('p', 0x11, struct rtc_pll_info) /* Get PLL correction */
92#define RTC_PLL_SET _IOW('p', 0x12, struct rtc_pll_info) /* Set PLL correction */ 92#define RTC_PLL_SET _IOW('p', 0x12, struct rtc_pll_info) /* Set PLL correction */
93 93
94#define RTC_VL_READ _IOR('p', 0x13, int) /* Voltage low detector */
95#define RTC_VL_CLR _IO('p', 0x14) /* Clear voltage low information */
96
94/* interrupt flags */ 97/* interrupt flags */
95#define RTC_IRQF 0x80 /* Any of the following is active */ 98#define RTC_IRQF 0x80 /* Any of the following is active */
96#define RTC_PF 0x40 /* Periodic interrupt */ 99#define RTC_PF 0x40 /* Periodic interrupt */
diff --git a/include/linux/rtc/ds1307.h b/include/linux/rtc/ds1307.h
new file mode 100644
index 000000000000..291b1c490367
--- /dev/null
+++ b/include/linux/rtc/ds1307.h
@@ -0,0 +1,22 @@
1/*
2 * ds1307.h - platform_data for the ds1307 (and variants) rtc driver
3 * (C) Copyright 2012 by Wolfram Sang, Pengutronix e.K.
4 * same license as the driver
5 */
6
7#ifndef _LINUX_DS1307_H
8#define _LINUX_DS1307_H
9
10#include <linux/types.h>
11
12#define DS1307_TRICKLE_CHARGER_250_OHM 0x01
13#define DS1307_TRICKLE_CHARGER_2K_OHM 0x02
14#define DS1307_TRICKLE_CHARGER_4K_OHM 0x03
15#define DS1307_TRICKLE_CHARGER_NO_DIODE 0x04
16#define DS1307_TRICKLE_CHARGER_DIODE 0x08
17
18struct ds1307_platform_data {
19 u8 trickle_charger_setup;
20};
21
22#endif /* _LINUX_DS1307_H */
diff --git a/include/linux/swap.h b/include/linux/swap.h
index b1fd5c7925fe..b6661933e252 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -221,8 +221,8 @@ extern unsigned int nr_free_pagecache_pages(void);
221/* linux/mm/swap.c */ 221/* linux/mm/swap.c */
222extern void __lru_cache_add(struct page *, enum lru_list lru); 222extern void __lru_cache_add(struct page *, enum lru_list lru);
223extern void lru_cache_add_lru(struct page *, enum lru_list lru); 223extern void lru_cache_add_lru(struct page *, enum lru_list lru);
224extern void lru_add_page_tail(struct zone* zone, 224extern void lru_add_page_tail(struct page *page, struct page *page_tail,
225 struct page *page, struct page *page_tail); 225 struct lruvec *lruvec);
226extern void activate_page(struct page *); 226extern void activate_page(struct page *);
227extern void mark_page_accessed(struct page *); 227extern void mark_page_accessed(struct page *);
228extern void lru_add_drain(void); 228extern void lru_add_drain(void);
@@ -251,7 +251,7 @@ static inline void lru_cache_add_file(struct page *page)
251/* linux/mm/vmscan.c */ 251/* linux/mm/vmscan.c */
252extern unsigned long try_to_free_pages(struct zonelist *zonelist, int order, 252extern unsigned long try_to_free_pages(struct zonelist *zonelist, int order,
253 gfp_t gfp_mask, nodemask_t *mask); 253 gfp_t gfp_mask, nodemask_t *mask);
254extern int __isolate_lru_page(struct page *page, isolate_mode_t mode, int file); 254extern int __isolate_lru_page(struct page *page, isolate_mode_t mode);
255extern unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *mem, 255extern unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *mem,
256 gfp_t gfp_mask, bool noswap); 256 gfp_t gfp_mask, bool noswap);
257extern unsigned long mem_cgroup_shrink_node_zone(struct mem_cgroup *mem, 257extern unsigned long mem_cgroup_shrink_node_zone(struct mem_cgroup *mem,
@@ -351,31 +351,14 @@ extern int swap_type_of(dev_t, sector_t, struct block_device **);
351extern unsigned int count_swap_pages(int, int); 351extern unsigned int count_swap_pages(int, int);
352extern sector_t map_swap_page(struct page *, struct block_device **); 352extern sector_t map_swap_page(struct page *, struct block_device **);
353extern sector_t swapdev_block(int, pgoff_t); 353extern sector_t swapdev_block(int, pgoff_t);
354extern int page_swapcount(struct page *);
354extern int reuse_swap_page(struct page *); 355extern int reuse_swap_page(struct page *);
355extern int try_to_free_swap(struct page *); 356extern int try_to_free_swap(struct page *);
356struct backing_dev_info; 357struct backing_dev_info;
357 358
358/* linux/mm/thrash.c */
359extern struct mm_struct *swap_token_mm;
360extern void grab_swap_token(struct mm_struct *);
361extern void __put_swap_token(struct mm_struct *);
362extern void disable_swap_token(struct mem_cgroup *memcg);
363
364static inline int has_swap_token(struct mm_struct *mm)
365{
366 return (mm == swap_token_mm);
367}
368
369static inline void put_swap_token(struct mm_struct *mm)
370{
371 if (has_swap_token(mm))
372 __put_swap_token(mm);
373}
374
375#ifdef CONFIG_CGROUP_MEM_RES_CTLR 359#ifdef CONFIG_CGROUP_MEM_RES_CTLR
376extern void 360extern void
377mem_cgroup_uncharge_swapcache(struct page *page, swp_entry_t ent, bool swapout); 361mem_cgroup_uncharge_swapcache(struct page *page, swp_entry_t ent, bool swapout);
378extern int mem_cgroup_count_swap_user(swp_entry_t ent, struct page **pagep);
379#else 362#else
380static inline void 363static inline void
381mem_cgroup_uncharge_swapcache(struct page *page, swp_entry_t ent, bool swapout) 364mem_cgroup_uncharge_swapcache(struct page *page, swp_entry_t ent, bool swapout)
@@ -462,6 +445,11 @@ static inline void delete_from_swap_cache(struct page *page)
462{ 445{
463} 446}
464 447
448static inline int page_swapcount(struct page *page)
449{
450 return 0;
451}
452
465#define reuse_swap_page(page) (page_mapcount(page) == 1) 453#define reuse_swap_page(page) (page_mapcount(page) == 1)
466 454
467static inline int try_to_free_swap(struct page *page) 455static inline int try_to_free_swap(struct page *page)
@@ -476,37 +464,11 @@ static inline swp_entry_t get_swap_page(void)
476 return entry; 464 return entry;
477} 465}
478 466
479/* linux/mm/thrash.c */
480static inline void put_swap_token(struct mm_struct *mm)
481{
482}
483
484static inline void grab_swap_token(struct mm_struct *mm)
485{
486}
487
488static inline int has_swap_token(struct mm_struct *mm)
489{
490 return 0;
491}
492
493static inline void disable_swap_token(struct mem_cgroup *memcg)
494{
495}
496
497static inline void 467static inline void
498mem_cgroup_uncharge_swapcache(struct page *page, swp_entry_t ent) 468mem_cgroup_uncharge_swapcache(struct page *page, swp_entry_t ent)
499{ 469{
500} 470}
501 471
502#ifdef CONFIG_CGROUP_MEM_RES_CTLR
503static inline int
504mem_cgroup_count_swap_user(swp_entry_t ent, struct page **pagep)
505{
506 return 0;
507}
508#endif
509
510#endif /* CONFIG_SWAP */ 472#endif /* CONFIG_SWAP */
511#endif /* __KERNEL__*/ 473#endif /* __KERNEL__*/
512#endif /* _LINUX_SWAP_H */ 474#endif /* _LINUX_SWAP_H */
diff --git a/include/net/sock.h b/include/net/sock.h
index d89f0582b6b6..4a4521699563 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -46,6 +46,7 @@
46#include <linux/list_nulls.h> 46#include <linux/list_nulls.h>
47#include <linux/timer.h> 47#include <linux/timer.h>
48#include <linux/cache.h> 48#include <linux/cache.h>
49#include <linux/bitops.h>
49#include <linux/lockdep.h> 50#include <linux/lockdep.h>
50#include <linux/netdevice.h> 51#include <linux/netdevice.h>
51#include <linux/skbuff.h> /* struct sk_buff */ 52#include <linux/skbuff.h> /* struct sk_buff */
@@ -921,12 +922,23 @@ struct proto {
921#endif 922#endif
922}; 923};
923 924
925/*
926 * Bits in struct cg_proto.flags
927 */
928enum cg_proto_flags {
929 /* Currently active and new sockets should be assigned to cgroups */
930 MEMCG_SOCK_ACTIVE,
931 /* It was ever activated; we must disarm static keys on destruction */
932 MEMCG_SOCK_ACTIVATED,
933};
934
924struct cg_proto { 935struct cg_proto {
925 void (*enter_memory_pressure)(struct sock *sk); 936 void (*enter_memory_pressure)(struct sock *sk);
926 struct res_counter *memory_allocated; /* Current allocated memory. */ 937 struct res_counter *memory_allocated; /* Current allocated memory. */
927 struct percpu_counter *sockets_allocated; /* Current number of sockets. */ 938 struct percpu_counter *sockets_allocated; /* Current number of sockets. */
928 int *memory_pressure; 939 int *memory_pressure;
929 long *sysctl_mem; 940 long *sysctl_mem;
941 unsigned long flags;
930 /* 942 /*
931 * memcg field is used to find which memcg we belong directly 943 * memcg field is used to find which memcg we belong directly
932 * Each memcg struct can hold more than one cg_proto, so container_of 944 * Each memcg struct can hold more than one cg_proto, so container_of
@@ -942,6 +954,16 @@ struct cg_proto {
942extern int proto_register(struct proto *prot, int alloc_slab); 954extern int proto_register(struct proto *prot, int alloc_slab);
943extern void proto_unregister(struct proto *prot); 955extern void proto_unregister(struct proto *prot);
944 956
957static inline bool memcg_proto_active(struct cg_proto *cg_proto)
958{
959 return test_bit(MEMCG_SOCK_ACTIVE, &cg_proto->flags);
960}
961
962static inline bool memcg_proto_activated(struct cg_proto *cg_proto)
963{
964 return test_bit(MEMCG_SOCK_ACTIVATED, &cg_proto->flags);
965}
966
945#ifdef SOCK_REFCNT_DEBUG 967#ifdef SOCK_REFCNT_DEBUG
946static inline void sk_refcnt_debug_inc(struct sock *sk) 968static inline void sk_refcnt_debug_inc(struct sock *sk)
947{ 969{
diff --git a/include/trace/events/vmscan.h b/include/trace/events/vmscan.h
index f64560e204bc..bab3b87e4064 100644
--- a/include/trace/events/vmscan.h
+++ b/include/trace/events/vmscan.h
@@ -13,7 +13,7 @@
13#define RECLAIM_WB_ANON 0x0001u 13#define RECLAIM_WB_ANON 0x0001u
14#define RECLAIM_WB_FILE 0x0002u 14#define RECLAIM_WB_FILE 0x0002u
15#define RECLAIM_WB_MIXED 0x0010u 15#define RECLAIM_WB_MIXED 0x0010u
16#define RECLAIM_WB_SYNC 0x0004u 16#define RECLAIM_WB_SYNC 0x0004u /* Unused, all reclaim async */
17#define RECLAIM_WB_ASYNC 0x0008u 17#define RECLAIM_WB_ASYNC 0x0008u
18 18
19#define show_reclaim_flags(flags) \ 19#define show_reclaim_flags(flags) \
@@ -25,15 +25,15 @@
25 {RECLAIM_WB_ASYNC, "RECLAIM_WB_ASYNC"} \ 25 {RECLAIM_WB_ASYNC, "RECLAIM_WB_ASYNC"} \
26 ) : "RECLAIM_WB_NONE" 26 ) : "RECLAIM_WB_NONE"
27 27
28#define trace_reclaim_flags(page, sync) ( \ 28#define trace_reclaim_flags(page) ( \
29 (page_is_file_cache(page) ? RECLAIM_WB_FILE : RECLAIM_WB_ANON) | \ 29 (page_is_file_cache(page) ? RECLAIM_WB_FILE : RECLAIM_WB_ANON) | \
30 (sync & RECLAIM_MODE_SYNC ? RECLAIM_WB_SYNC : RECLAIM_WB_ASYNC) \ 30 (RECLAIM_WB_ASYNC) \
31 ) 31 )
32 32
33#define trace_shrink_flags(file, sync) ( \ 33#define trace_shrink_flags(file) \
34 (sync & RECLAIM_MODE_SYNC ? RECLAIM_WB_MIXED : \ 34 ( \
35 (file ? RECLAIM_WB_FILE : RECLAIM_WB_ANON)) | \ 35 (file ? RECLAIM_WB_FILE : RECLAIM_WB_ANON) | \
36 (sync & RECLAIM_MODE_SYNC ? RECLAIM_WB_SYNC : RECLAIM_WB_ASYNC) \ 36 (RECLAIM_WB_ASYNC) \
37 ) 37 )
38 38
39TRACE_EVENT(mm_vmscan_kswapd_sleep, 39TRACE_EVENT(mm_vmscan_kswapd_sleep,
@@ -263,22 +263,16 @@ DECLARE_EVENT_CLASS(mm_vmscan_lru_isolate_template,
263 unsigned long nr_requested, 263 unsigned long nr_requested,
264 unsigned long nr_scanned, 264 unsigned long nr_scanned,
265 unsigned long nr_taken, 265 unsigned long nr_taken,
266 unsigned long nr_lumpy_taken,
267 unsigned long nr_lumpy_dirty,
268 unsigned long nr_lumpy_failed,
269 isolate_mode_t isolate_mode, 266 isolate_mode_t isolate_mode,
270 int file), 267 int file),
271 268
272 TP_ARGS(order, nr_requested, nr_scanned, nr_taken, nr_lumpy_taken, nr_lumpy_dirty, nr_lumpy_failed, isolate_mode, file), 269 TP_ARGS(order, nr_requested, nr_scanned, nr_taken, isolate_mode, file),
273 270
274 TP_STRUCT__entry( 271 TP_STRUCT__entry(
275 __field(int, order) 272 __field(int, order)
276 __field(unsigned long, nr_requested) 273 __field(unsigned long, nr_requested)
277 __field(unsigned long, nr_scanned) 274 __field(unsigned long, nr_scanned)
278 __field(unsigned long, nr_taken) 275 __field(unsigned long, nr_taken)
279 __field(unsigned long, nr_lumpy_taken)
280 __field(unsigned long, nr_lumpy_dirty)
281 __field(unsigned long, nr_lumpy_failed)
282 __field(isolate_mode_t, isolate_mode) 276 __field(isolate_mode_t, isolate_mode)
283 __field(int, file) 277 __field(int, file)
284 ), 278 ),
@@ -288,22 +282,16 @@ DECLARE_EVENT_CLASS(mm_vmscan_lru_isolate_template,
288 __entry->nr_requested = nr_requested; 282 __entry->nr_requested = nr_requested;
289 __entry->nr_scanned = nr_scanned; 283 __entry->nr_scanned = nr_scanned;
290 __entry->nr_taken = nr_taken; 284 __entry->nr_taken = nr_taken;
291 __entry->nr_lumpy_taken = nr_lumpy_taken;
292 __entry->nr_lumpy_dirty = nr_lumpy_dirty;
293 __entry->nr_lumpy_failed = nr_lumpy_failed;
294 __entry->isolate_mode = isolate_mode; 285 __entry->isolate_mode = isolate_mode;
295 __entry->file = file; 286 __entry->file = file;
296 ), 287 ),
297 288
298 TP_printk("isolate_mode=%d order=%d nr_requested=%lu nr_scanned=%lu nr_taken=%lu contig_taken=%lu contig_dirty=%lu contig_failed=%lu file=%d", 289 TP_printk("isolate_mode=%d order=%d nr_requested=%lu nr_scanned=%lu nr_taken=%lu file=%d",
299 __entry->isolate_mode, 290 __entry->isolate_mode,
300 __entry->order, 291 __entry->order,
301 __entry->nr_requested, 292 __entry->nr_requested,
302 __entry->nr_scanned, 293 __entry->nr_scanned,
303 __entry->nr_taken, 294 __entry->nr_taken,
304 __entry->nr_lumpy_taken,
305 __entry->nr_lumpy_dirty,
306 __entry->nr_lumpy_failed,
307 __entry->file) 295 __entry->file)
308); 296);
309 297
@@ -313,13 +301,10 @@ DEFINE_EVENT(mm_vmscan_lru_isolate_template, mm_vmscan_lru_isolate,
313 unsigned long nr_requested, 301 unsigned long nr_requested,
314 unsigned long nr_scanned, 302 unsigned long nr_scanned,
315 unsigned long nr_taken, 303 unsigned long nr_taken,
316 unsigned long nr_lumpy_taken,
317 unsigned long nr_lumpy_dirty,
318 unsigned long nr_lumpy_failed,
319 isolate_mode_t isolate_mode, 304 isolate_mode_t isolate_mode,
320 int file), 305 int file),
321 306
322 TP_ARGS(order, nr_requested, nr_scanned, nr_taken, nr_lumpy_taken, nr_lumpy_dirty, nr_lumpy_failed, isolate_mode, file) 307 TP_ARGS(order, nr_requested, nr_scanned, nr_taken, isolate_mode, file)
323 308
324); 309);
325 310
@@ -329,13 +314,10 @@ DEFINE_EVENT(mm_vmscan_lru_isolate_template, mm_vmscan_memcg_isolate,
329 unsigned long nr_requested, 314 unsigned long nr_requested,
330 unsigned long nr_scanned, 315 unsigned long nr_scanned,
331 unsigned long nr_taken, 316 unsigned long nr_taken,
332 unsigned long nr_lumpy_taken,
333 unsigned long nr_lumpy_dirty,
334 unsigned long nr_lumpy_failed,
335 isolate_mode_t isolate_mode, 317 isolate_mode_t isolate_mode,
336 int file), 318 int file),
337 319
338 TP_ARGS(order, nr_requested, nr_scanned, nr_taken, nr_lumpy_taken, nr_lumpy_dirty, nr_lumpy_failed, isolate_mode, file) 320 TP_ARGS(order, nr_requested, nr_scanned, nr_taken, isolate_mode, file)
339 321
340); 322);
341 323
@@ -395,88 +377,6 @@ TRACE_EVENT(mm_vmscan_lru_shrink_inactive,
395 show_reclaim_flags(__entry->reclaim_flags)) 377 show_reclaim_flags(__entry->reclaim_flags))
396); 378);
397 379
398TRACE_EVENT(replace_swap_token,
399 TP_PROTO(struct mm_struct *old_mm,
400 struct mm_struct *new_mm),
401
402 TP_ARGS(old_mm, new_mm),
403
404 TP_STRUCT__entry(
405 __field(struct mm_struct*, old_mm)
406 __field(unsigned int, old_prio)
407 __field(struct mm_struct*, new_mm)
408 __field(unsigned int, new_prio)
409 ),
410
411 TP_fast_assign(
412 __entry->old_mm = old_mm;
413 __entry->old_prio = old_mm ? old_mm->token_priority : 0;
414 __entry->new_mm = new_mm;
415 __entry->new_prio = new_mm->token_priority;
416 ),
417
418 TP_printk("old_token_mm=%p old_prio=%u new_token_mm=%p new_prio=%u",
419 __entry->old_mm, __entry->old_prio,
420 __entry->new_mm, __entry->new_prio)
421);
422
423DECLARE_EVENT_CLASS(put_swap_token_template,
424 TP_PROTO(struct mm_struct *swap_token_mm),
425
426 TP_ARGS(swap_token_mm),
427
428 TP_STRUCT__entry(
429 __field(struct mm_struct*, swap_token_mm)
430 ),
431
432 TP_fast_assign(
433 __entry->swap_token_mm = swap_token_mm;
434 ),
435
436 TP_printk("token_mm=%p", __entry->swap_token_mm)
437);
438
439DEFINE_EVENT(put_swap_token_template, put_swap_token,
440 TP_PROTO(struct mm_struct *swap_token_mm),
441 TP_ARGS(swap_token_mm)
442);
443
444DEFINE_EVENT_CONDITION(put_swap_token_template, disable_swap_token,
445 TP_PROTO(struct mm_struct *swap_token_mm),
446 TP_ARGS(swap_token_mm),
447 TP_CONDITION(swap_token_mm != NULL)
448);
449
450TRACE_EVENT_CONDITION(update_swap_token_priority,
451 TP_PROTO(struct mm_struct *mm,
452 unsigned int old_prio,
453 struct mm_struct *swap_token_mm),
454
455 TP_ARGS(mm, old_prio, swap_token_mm),
456
457 TP_CONDITION(mm->token_priority != old_prio),
458
459 TP_STRUCT__entry(
460 __field(struct mm_struct*, mm)
461 __field(unsigned int, old_prio)
462 __field(unsigned int, new_prio)
463 __field(struct mm_struct*, swap_token_mm)
464 __field(unsigned int, swap_token_prio)
465 ),
466
467 TP_fast_assign(
468 __entry->mm = mm;
469 __entry->old_prio = old_prio;
470 __entry->new_prio = mm->token_priority;
471 __entry->swap_token_mm = swap_token_mm;
472 __entry->swap_token_prio = swap_token_mm ? swap_token_mm->token_priority : 0;
473 ),
474
475 TP_printk("mm=%p old_prio=%u new_prio=%u swap_token_mm=%p token_prio=%u",
476 __entry->mm, __entry->old_prio, __entry->new_prio,
477 __entry->swap_token_mm, __entry->swap_token_prio)
478);
479
480#endif /* _TRACE_VMSCAN_H */ 380#endif /* _TRACE_VMSCAN_H */
481 381
482/* This part must be outside protection */ 382/* This part must be outside protection */
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index a0c6af34d500..0f3527d6184a 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -5132,7 +5132,7 @@ EXPORT_SYMBOL_GPL(css_depth);
5132 * @root: the css supporsed to be an ancestor of the child. 5132 * @root: the css supporsed to be an ancestor of the child.
5133 * 5133 *
5134 * Returns true if "root" is an ancestor of "child" in its hierarchy. Because 5134 * Returns true if "root" is an ancestor of "child" in its hierarchy. Because
5135 * this function reads css->id, this use rcu_dereference() and rcu_read_lock(). 5135 * this function reads css->id, the caller must hold rcu_read_lock().
5136 * But, considering usual usage, the csses should be valid objects after test. 5136 * But, considering usual usage, the csses should be valid objects after test.
5137 * Assuming that the caller will do some action to the child if this returns 5137 * Assuming that the caller will do some action to the child if this returns
5138 * returns true, the caller must take "child";s reference count. 5138 * returns true, the caller must take "child";s reference count.
@@ -5144,18 +5144,18 @@ bool css_is_ancestor(struct cgroup_subsys_state *child,
5144{ 5144{
5145 struct css_id *child_id; 5145 struct css_id *child_id;
5146 struct css_id *root_id; 5146 struct css_id *root_id;
5147 bool ret = true;
5148 5147
5149 rcu_read_lock();
5150 child_id = rcu_dereference(child->id); 5148 child_id = rcu_dereference(child->id);
5149 if (!child_id)
5150 return false;
5151 root_id = rcu_dereference(root->id); 5151 root_id = rcu_dereference(root->id);
5152 if (!child_id 5152 if (!root_id)
5153 || !root_id 5153 return false;
5154 || (child_id->depth < root_id->depth) 5154 if (child_id->depth < root_id->depth)
5155 || (child_id->stack[root_id->depth] != root_id->id)) 5155 return false;
5156 ret = false; 5156 if (child_id->stack[root_id->depth] != root_id->id)
5157 rcu_read_unlock(); 5157 return false;
5158 return ret; 5158 return true;
5159} 5159}
5160 5160
5161void free_css_id(struct cgroup_subsys *ss, struct cgroup_subsys_state *css) 5161void free_css_id(struct cgroup_subsys *ss, struct cgroup_subsys_state *css)
diff --git a/kernel/fork.c b/kernel/fork.c
index 47b4e4f379f9..017fb23d5983 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -386,7 +386,8 @@ static int dup_mmap(struct mm_struct *mm, struct mm_struct *oldmm)
386 } 386 }
387 charge = 0; 387 charge = 0;
388 if (mpnt->vm_flags & VM_ACCOUNT) { 388 if (mpnt->vm_flags & VM_ACCOUNT) {
389 unsigned int len = (mpnt->vm_end - mpnt->vm_start) >> PAGE_SHIFT; 389 unsigned long len;
390 len = (mpnt->vm_end - mpnt->vm_start) >> PAGE_SHIFT;
390 if (security_vm_enough_memory_mm(oldmm, len)) /* sic */ 391 if (security_vm_enough_memory_mm(oldmm, len)) /* sic */
391 goto fail_nomem; 392 goto fail_nomem;
392 charge = len; 393 charge = len;
@@ -614,7 +615,6 @@ void mmput(struct mm_struct *mm)
614 list_del(&mm->mmlist); 615 list_del(&mm->mmlist);
615 spin_unlock(&mmlist_lock); 616 spin_unlock(&mmlist_lock);
616 } 617 }
617 put_swap_token(mm);
618 if (mm->binfmt) 618 if (mm->binfmt)
619 module_put(mm->binfmt->module); 619 module_put(mm->binfmt->module);
620 mmdrop(mm); 620 mmdrop(mm);
@@ -831,10 +831,6 @@ struct mm_struct *dup_mm(struct task_struct *tsk)
831 memcpy(mm, oldmm, sizeof(*mm)); 831 memcpy(mm, oldmm, sizeof(*mm));
832 mm_init_cpumask(mm); 832 mm_init_cpumask(mm);
833 833
834 /* Initializing for Swap token stuff */
835 mm->token_priority = 0;
836 mm->last_interval = 0;
837
838#ifdef CONFIG_TRANSPARENT_HUGEPAGE 834#ifdef CONFIG_TRANSPARENT_HUGEPAGE
839 mm->pmd_huge_pte = NULL; 835 mm->pmd_huge_pte = NULL;
840#endif 836#endif
@@ -913,10 +909,6 @@ static int copy_mm(unsigned long clone_flags, struct task_struct *tsk)
913 goto fail_nomem; 909 goto fail_nomem;
914 910
915good_mm: 911good_mm:
916 /* Initializing for Swap token stuff */
917 mm->token_priority = 0;
918 mm->last_interval = 0;
919
920 tsk->mm = mm; 912 tsk->mm = mm;
921 tsk->active_mm = mm; 913 tsk->active_mm = mm;
922 return 0; 914 return 0;
diff --git a/kernel/kallsyms.c b/kernel/kallsyms.c
index 079f1d39a8b8..2169feeba529 100644
--- a/kernel/kallsyms.c
+++ b/kernel/kallsyms.c
@@ -343,7 +343,7 @@ int lookup_symbol_attrs(unsigned long addr, unsigned long *size,
343 343
344/* Look up a kernel symbol and return it in a text buffer. */ 344/* Look up a kernel symbol and return it in a text buffer. */
345static int __sprint_symbol(char *buffer, unsigned long address, 345static int __sprint_symbol(char *buffer, unsigned long address,
346 int symbol_offset) 346 int symbol_offset, int add_offset)
347{ 347{
348 char *modname; 348 char *modname;
349 const char *name; 349 const char *name;
@@ -358,13 +358,13 @@ static int __sprint_symbol(char *buffer, unsigned long address,
358 if (name != buffer) 358 if (name != buffer)
359 strcpy(buffer, name); 359 strcpy(buffer, name);
360 len = strlen(buffer); 360 len = strlen(buffer);
361 buffer += len;
362 offset -= symbol_offset; 361 offset -= symbol_offset;
363 362
363 if (add_offset)
364 len += sprintf(buffer + len, "+%#lx/%#lx", offset, size);
365
364 if (modname) 366 if (modname)
365 len += sprintf(buffer, "+%#lx/%#lx [%s]", offset, size, modname); 367 len += sprintf(buffer + len, " [%s]", modname);
366 else
367 len += sprintf(buffer, "+%#lx/%#lx", offset, size);
368 368
369 return len; 369 return len;
370} 370}
@@ -382,12 +382,28 @@ static int __sprint_symbol(char *buffer, unsigned long address,
382 */ 382 */
383int sprint_symbol(char *buffer, unsigned long address) 383int sprint_symbol(char *buffer, unsigned long address)
384{ 384{
385 return __sprint_symbol(buffer, address, 0); 385 return __sprint_symbol(buffer, address, 0, 1);
386} 386}
387
388EXPORT_SYMBOL_GPL(sprint_symbol); 387EXPORT_SYMBOL_GPL(sprint_symbol);
389 388
390/** 389/**
390 * sprint_symbol_no_offset - Look up a kernel symbol and return it in a text buffer
391 * @buffer: buffer to be stored
392 * @address: address to lookup
393 *
394 * This function looks up a kernel symbol with @address and stores its name
395 * and module name to @buffer if possible. If no symbol was found, just saves
396 * its @address as is.
397 *
398 * This function returns the number of bytes stored in @buffer.
399 */
400int sprint_symbol_no_offset(char *buffer, unsigned long address)
401{
402 return __sprint_symbol(buffer, address, 0, 0);
403}
404EXPORT_SYMBOL_GPL(sprint_symbol_no_offset);
405
406/**
391 * sprint_backtrace - Look up a backtrace symbol and return it in a text buffer 407 * sprint_backtrace - Look up a backtrace symbol and return it in a text buffer
392 * @buffer: buffer to be stored 408 * @buffer: buffer to be stored
393 * @address: address to lookup 409 * @address: address to lookup
@@ -403,7 +419,7 @@ EXPORT_SYMBOL_GPL(sprint_symbol);
403 */ 419 */
404int sprint_backtrace(char *buffer, unsigned long address) 420int sprint_backtrace(char *buffer, unsigned long address)
405{ 421{
406 return __sprint_symbol(buffer, address, -1); 422 return __sprint_symbol(buffer, address, -1, 1);
407} 423}
408 424
409/* Look up a kernel symbol and print it to the kernel messages. */ 425/* Look up a kernel symbol and print it to the kernel messages. */
diff --git a/kernel/res_counter.c b/kernel/res_counter.c
index bebe2b170d49..ad581aa2369a 100644
--- a/kernel/res_counter.c
+++ b/kernel/res_counter.c
@@ -94,13 +94,15 @@ void res_counter_uncharge_locked(struct res_counter *counter, unsigned long val)
94 counter->usage -= val; 94 counter->usage -= val;
95} 95}
96 96
97void res_counter_uncharge(struct res_counter *counter, unsigned long val) 97void res_counter_uncharge_until(struct res_counter *counter,
98 struct res_counter *top,
99 unsigned long val)
98{ 100{
99 unsigned long flags; 101 unsigned long flags;
100 struct res_counter *c; 102 struct res_counter *c;
101 103
102 local_irq_save(flags); 104 local_irq_save(flags);
103 for (c = counter; c != NULL; c = c->parent) { 105 for (c = counter; c != top; c = c->parent) {
104 spin_lock(&c->lock); 106 spin_lock(&c->lock);
105 res_counter_uncharge_locked(c, val); 107 res_counter_uncharge_locked(c, val);
106 spin_unlock(&c->lock); 108 spin_unlock(&c->lock);
@@ -108,6 +110,10 @@ void res_counter_uncharge(struct res_counter *counter, unsigned long val)
108 local_irq_restore(flags); 110 local_irq_restore(flags);
109} 111}
110 112
113void res_counter_uncharge(struct res_counter *counter, unsigned long val)
114{
115 res_counter_uncharge_until(counter, NULL, val);
116}
111 117
112static inline unsigned long long * 118static inline unsigned long long *
113res_counter_member(struct res_counter *counter, int member) 119res_counter_member(struct res_counter *counter, int member)
diff --git a/lib/bitmap.c b/lib/bitmap.c
index b5a8b6ad2454..06fdfa1aeba7 100644
--- a/lib/bitmap.c
+++ b/lib/bitmap.c
@@ -369,7 +369,8 @@ EXPORT_SYMBOL(bitmap_find_next_zero_area);
369 * @nmaskbits: size of bitmap, in bits 369 * @nmaskbits: size of bitmap, in bits
370 * 370 *
371 * Exactly @nmaskbits bits are displayed. Hex digits are grouped into 371 * Exactly @nmaskbits bits are displayed. Hex digits are grouped into
372 * comma-separated sets of eight digits per set. 372 * comma-separated sets of eight digits per set. Returns the number of
373 * characters which were written to *buf, excluding the trailing \0.
373 */ 374 */
374int bitmap_scnprintf(char *buf, unsigned int buflen, 375int bitmap_scnprintf(char *buf, unsigned int buflen,
375 const unsigned long *maskp, int nmaskbits) 376 const unsigned long *maskp, int nmaskbits)
@@ -517,8 +518,8 @@ EXPORT_SYMBOL(bitmap_parse_user);
517 * 518 *
518 * Helper routine for bitmap_scnlistprintf(). Write decimal number 519 * Helper routine for bitmap_scnlistprintf(). Write decimal number
519 * or range to buf, suppressing output past buf+buflen, with optional 520 * or range to buf, suppressing output past buf+buflen, with optional
520 * comma-prefix. Return len of what would be written to buf, if it 521 * comma-prefix. Return len of what was written to *buf, excluding the
521 * all fit. 522 * trailing \0.
522 */ 523 */
523static inline int bscnl_emit(char *buf, int buflen, int rbot, int rtop, int len) 524static inline int bscnl_emit(char *buf, int buflen, int rbot, int rtop, int len)
524{ 525{
@@ -544,9 +545,8 @@ static inline int bscnl_emit(char *buf, int buflen, int rbot, int rtop, int len)
544 * the range. Output format is compatible with the format 545 * the range. Output format is compatible with the format
545 * accepted as input by bitmap_parselist(). 546 * accepted as input by bitmap_parselist().
546 * 547 *
547 * The return value is the number of characters which would be 548 * The return value is the number of characters which were written to *buf
548 * generated for the given input, excluding the trailing '\0', as 549 * excluding the trailing '\0', as per ISO C99's scnprintf.
549 * per ISO C99.
550 */ 550 */
551int bitmap_scnlistprintf(char *buf, unsigned int buflen, 551int bitmap_scnlistprintf(char *buf, unsigned int buflen,
552 const unsigned long *maskp, int nmaskbits) 552 const unsigned long *maskp, int nmaskbits)
diff --git a/lib/list_debug.c b/lib/list_debug.c
index 3810b481f940..23a5e031cd8b 100644
--- a/lib/list_debug.c
+++ b/lib/list_debug.c
@@ -31,6 +31,9 @@ void __list_add(struct list_head *new,
31 "list_add corruption. prev->next should be " 31 "list_add corruption. prev->next should be "
32 "next (%p), but was %p. (prev=%p).\n", 32 "next (%p), but was %p. (prev=%p).\n",
33 next, prev->next, prev); 33 next, prev->next, prev);
34 WARN(new == prev || new == next,
35 "list_add double add: new=%p, prev=%p, next=%p.\n",
36 new, prev, next);
34 next->prev = new; 37 next->prev = new;
35 new->next = next; 38 new->next = next;
36 new->prev = prev; 39 new->prev = prev;
diff --git a/lib/radix-tree.c b/lib/radix-tree.c
index 86516f5588e3..d7c878cc006c 100644
--- a/lib/radix-tree.c
+++ b/lib/radix-tree.c
@@ -73,11 +73,24 @@ static unsigned long height_to_maxindex[RADIX_TREE_MAX_PATH + 1] __read_mostly;
73static struct kmem_cache *radix_tree_node_cachep; 73static struct kmem_cache *radix_tree_node_cachep;
74 74
75/* 75/*
76 * The radix tree is variable-height, so an insert operation not only has
77 * to build the branch to its corresponding item, it also has to build the
78 * branch to existing items if the size has to be increased (by
79 * radix_tree_extend).
80 *
81 * The worst case is a zero height tree with just a single item at index 0,
82 * and then inserting an item at index ULONG_MAX. This requires 2 new branches
83 * of RADIX_TREE_MAX_PATH size to be created, with only the root node shared.
84 * Hence:
85 */
86#define RADIX_TREE_PRELOAD_SIZE (RADIX_TREE_MAX_PATH * 2 - 1)
87
88/*
76 * Per-cpu pool of preloaded nodes 89 * Per-cpu pool of preloaded nodes
77 */ 90 */
78struct radix_tree_preload { 91struct radix_tree_preload {
79 int nr; 92 int nr;
80 struct radix_tree_node *nodes[RADIX_TREE_MAX_PATH]; 93 struct radix_tree_node *nodes[RADIX_TREE_PRELOAD_SIZE];
81}; 94};
82static DEFINE_PER_CPU(struct radix_tree_preload, radix_tree_preloads) = { 0, }; 95static DEFINE_PER_CPU(struct radix_tree_preload, radix_tree_preloads) = { 0, };
83 96
diff --git a/lib/spinlock_debug.c b/lib/spinlock_debug.c
index 525d160d44f0..d0ec4f3d1593 100644
--- a/lib/spinlock_debug.c
+++ b/lib/spinlock_debug.c
@@ -58,7 +58,7 @@ static void spin_dump(raw_spinlock_t *lock, const char *msg)
58 printk(KERN_EMERG "BUG: spinlock %s on CPU#%d, %s/%d\n", 58 printk(KERN_EMERG "BUG: spinlock %s on CPU#%d, %s/%d\n",
59 msg, raw_smp_processor_id(), 59 msg, raw_smp_processor_id(),
60 current->comm, task_pid_nr(current)); 60 current->comm, task_pid_nr(current));
61 printk(KERN_EMERG " lock: %p, .magic: %08x, .owner: %s/%d, " 61 printk(KERN_EMERG " lock: %ps, .magic: %08x, .owner: %s/%d, "
62 ".owner_cpu: %d\n", 62 ".owner_cpu: %d\n",
63 lock, lock->magic, 63 lock, lock->magic,
64 owner ? owner->comm : "<none>", 64 owner ? owner->comm : "<none>",
diff --git a/lib/string_helpers.c b/lib/string_helpers.c
index dd4ece372699..1cffc223bff5 100644
--- a/lib/string_helpers.c
+++ b/lib/string_helpers.c
@@ -23,15 +23,15 @@
23int string_get_size(u64 size, const enum string_size_units units, 23int string_get_size(u64 size, const enum string_size_units units,
24 char *buf, int len) 24 char *buf, int len)
25{ 25{
26 const char *units_10[] = { "B", "kB", "MB", "GB", "TB", "PB", 26 static const char *units_10[] = { "B", "kB", "MB", "GB", "TB", "PB",
27 "EB", "ZB", "YB", NULL}; 27 "EB", "ZB", "YB", NULL};
28 const char *units_2[] = {"B", "KiB", "MiB", "GiB", "TiB", "PiB", 28 static const char *units_2[] = {"B", "KiB", "MiB", "GiB", "TiB", "PiB",
29 "EiB", "ZiB", "YiB", NULL }; 29 "EiB", "ZiB", "YiB", NULL };
30 const char **units_str[] = { 30 static const char **units_str[] = {
31 [STRING_UNITS_10] = units_10, 31 [STRING_UNITS_10] = units_10,
32 [STRING_UNITS_2] = units_2, 32 [STRING_UNITS_2] = units_2,
33 }; 33 };
34 const unsigned int divisor[] = { 34 static const unsigned int divisor[] = {
35 [STRING_UNITS_10] = 1000, 35 [STRING_UNITS_10] = 1000,
36 [STRING_UNITS_2] = 1024, 36 [STRING_UNITS_2] = 1024,
37 }; 37 };
diff --git a/lib/swiotlb.c b/lib/swiotlb.c
index 414f46ed1dcd..45bc1f83a5ad 100644
--- a/lib/swiotlb.c
+++ b/lib/swiotlb.c
@@ -130,11 +130,9 @@ void swiotlb_print_info(void)
130 pstart = virt_to_phys(io_tlb_start); 130 pstart = virt_to_phys(io_tlb_start);
131 pend = virt_to_phys(io_tlb_end); 131 pend = virt_to_phys(io_tlb_end);
132 132
133 printk(KERN_INFO "Placing %luMB software IO TLB between %p - %p\n", 133 printk(KERN_INFO "software IO TLB [mem %#010llx-%#010llx] (%luMB) mapped at [%p-%p]\n",
134 bytes >> 20, io_tlb_start, io_tlb_end); 134 (unsigned long long)pstart, (unsigned long long)pend - 1,
135 printk(KERN_INFO "software IO TLB at phys %#llx - %#llx\n", 135 bytes >> 20, io_tlb_start, io_tlb_end - 1);
136 (unsigned long long)pstart,
137 (unsigned long long)pend);
138} 136}
139 137
140void __init swiotlb_init_with_tbl(char *tlb, unsigned long nslabs, int verbose) 138void __init swiotlb_init_with_tbl(char *tlb, unsigned long nslabs, int verbose)
diff --git a/lib/test-kstrtox.c b/lib/test-kstrtox.c
index d55769d63cb8..bea3f3fa3f02 100644
--- a/lib/test-kstrtox.c
+++ b/lib/test-kstrtox.c
@@ -11,7 +11,7 @@ struct test_fail {
11}; 11};
12 12
13#define DEFINE_TEST_FAIL(test) \ 13#define DEFINE_TEST_FAIL(test) \
14 const struct test_fail test[] __initdata 14 const struct test_fail test[] __initconst
15 15
16#define DECLARE_TEST_OK(type, test_type) \ 16#define DECLARE_TEST_OK(type, test_type) \
17 test_type { \ 17 test_type { \
@@ -21,7 +21,7 @@ struct test_fail {
21 } 21 }
22 22
23#define DEFINE_TEST_OK(type, test) \ 23#define DEFINE_TEST_OK(type, test) \
24 const type test[] __initdata 24 const type test[] __initconst
25 25
26#define TEST_FAIL(fn, type, fmt, test) \ 26#define TEST_FAIL(fn, type, fmt, test) \
27{ \ 27{ \
diff --git a/lib/vsprintf.c b/lib/vsprintf.c
index abbabec9720a..5391299c1e78 100644
--- a/lib/vsprintf.c
+++ b/lib/vsprintf.c
@@ -284,6 +284,7 @@ char *number(char *buf, char *end, unsigned long long num,
284 char locase; 284 char locase;
285 int need_pfx = ((spec.flags & SPECIAL) && spec.base != 10); 285 int need_pfx = ((spec.flags & SPECIAL) && spec.base != 10);
286 int i; 286 int i;
287 bool is_zero = num == 0LL;
287 288
288 /* locase = 0 or 0x20. ORing digits or letters with 'locase' 289 /* locase = 0 or 0x20. ORing digits or letters with 'locase'
289 * produces same digits or (maybe lowercased) letters */ 290 * produces same digits or (maybe lowercased) letters */
@@ -305,8 +306,9 @@ char *number(char *buf, char *end, unsigned long long num,
305 } 306 }
306 } 307 }
307 if (need_pfx) { 308 if (need_pfx) {
308 spec.field_width--;
309 if (spec.base == 16) 309 if (spec.base == 16)
310 spec.field_width -= 2;
311 else if (!is_zero)
310 spec.field_width--; 312 spec.field_width--;
311 } 313 }
312 314
@@ -353,9 +355,11 @@ char *number(char *buf, char *end, unsigned long long num,
353 } 355 }
354 /* "0x" / "0" prefix */ 356 /* "0x" / "0" prefix */
355 if (need_pfx) { 357 if (need_pfx) {
356 if (buf < end) 358 if (spec.base == 16 || !is_zero) {
357 *buf = '0'; 359 if (buf < end)
358 ++buf; 360 *buf = '0';
361 ++buf;
362 }
359 if (spec.base == 16) { 363 if (spec.base == 16) {
360 if (buf < end) 364 if (buf < end)
361 *buf = ('X' | locase); 365 *buf = ('X' | locase);
@@ -436,7 +440,7 @@ char *symbol_string(char *buf, char *end, void *ptr,
436 else if (ext != 'f' && ext != 's') 440 else if (ext != 'f' && ext != 's')
437 sprint_symbol(sym, value); 441 sprint_symbol(sym, value);
438 else 442 else
439 kallsyms_lookup(value, NULL, NULL, NULL, sym); 443 sprint_symbol_no_offset(sym, value);
440 444
441 return string(buf, end, sym, spec); 445 return string(buf, end, sym, spec);
442#else 446#else
diff --git a/mm/Kconfig b/mm/Kconfig
index 39220026c797..b2176374b98e 100644
--- a/mm/Kconfig
+++ b/mm/Kconfig
@@ -349,6 +349,16 @@ choice
349 benefit. 349 benefit.
350endchoice 350endchoice
351 351
352config CROSS_MEMORY_ATTACH
353 bool "Cross Memory Support"
354 depends on MMU
355 default y
356 help
357 Enabling this option adds the system calls process_vm_readv and
358 process_vm_writev which allow a process with the correct privileges
359 to directly read from or write to to another process's address space.
360 See the man page for more details.
361
352# 362#
353# UP and nommu archs use km based percpu allocator 363# UP and nommu archs use km based percpu allocator
354# 364#
diff --git a/mm/Makefile b/mm/Makefile
index 8aada89efbbb..a156285ce88d 100644
--- a/mm/Makefile
+++ b/mm/Makefile
@@ -5,8 +5,11 @@
5mmu-y := nommu.o 5mmu-y := nommu.o
6mmu-$(CONFIG_MMU) := fremap.o highmem.o madvise.o memory.o mincore.o \ 6mmu-$(CONFIG_MMU) := fremap.o highmem.o madvise.o memory.o mincore.o \
7 mlock.o mmap.o mprotect.o mremap.o msync.o rmap.o \ 7 mlock.o mmap.o mprotect.o mremap.o msync.o rmap.o \
8 vmalloc.o pagewalk.o pgtable-generic.o \ 8 vmalloc.o pagewalk.o pgtable-generic.o
9 process_vm_access.o 9
10ifdef CONFIG_CROSS_MEMORY_ATTACH
11mmu-$(CONFIG_MMU) += process_vm_access.o
12endif
10 13
11obj-y := filemap.o mempool.o oom_kill.o fadvise.o \ 14obj-y := filemap.o mempool.o oom_kill.o fadvise.o \
12 maccess.o page_alloc.o page-writeback.o \ 15 maccess.o page_alloc.o page-writeback.o \
@@ -25,7 +28,7 @@ endif
25obj-$(CONFIG_HAVE_MEMBLOCK) += memblock.o 28obj-$(CONFIG_HAVE_MEMBLOCK) += memblock.o
26 29
27obj-$(CONFIG_BOUNCE) += bounce.o 30obj-$(CONFIG_BOUNCE) += bounce.o
28obj-$(CONFIG_SWAP) += page_io.o swap_state.o swapfile.o thrash.o 31obj-$(CONFIG_SWAP) += page_io.o swap_state.o swapfile.o
29obj-$(CONFIG_HAS_DMA) += dmapool.o 32obj-$(CONFIG_HAS_DMA) += dmapool.o
30obj-$(CONFIG_HUGETLBFS) += hugetlb.o 33obj-$(CONFIG_HUGETLBFS) += hugetlb.o
31obj-$(CONFIG_NUMA) += mempolicy.o 34obj-$(CONFIG_NUMA) += mempolicy.o
diff --git a/mm/bootmem.c b/mm/bootmem.c
index 0131170c9d54..ec4fcb7a56c8 100644
--- a/mm/bootmem.c
+++ b/mm/bootmem.c
@@ -77,16 +77,16 @@ unsigned long __init bootmem_bootmap_pages(unsigned long pages)
77 */ 77 */
78static void __init link_bootmem(bootmem_data_t *bdata) 78static void __init link_bootmem(bootmem_data_t *bdata)
79{ 79{
80 struct list_head *iter; 80 bootmem_data_t *ent;
81 81
82 list_for_each(iter, &bdata_list) { 82 list_for_each_entry(ent, &bdata_list, list) {
83 bootmem_data_t *ent; 83 if (bdata->node_min_pfn < ent->node_min_pfn) {
84 84 list_add_tail(&bdata->list, &ent->list);
85 ent = list_entry(iter, bootmem_data_t, list); 85 return;
86 if (bdata->node_min_pfn < ent->node_min_pfn) 86 }
87 break;
88 } 87 }
89 list_add_tail(&bdata->list, iter); 88
89 list_add_tail(&bdata->list, &bdata_list);
90} 90}
91 91
92/* 92/*
@@ -203,7 +203,8 @@ static unsigned long __init free_all_bootmem_core(bootmem_data_t *bdata)
203 } else { 203 } else {
204 unsigned long off = 0; 204 unsigned long off = 0;
205 205
206 while (vec && off < BITS_PER_LONG) { 206 vec >>= start & (BITS_PER_LONG - 1);
207 while (vec) {
207 if (vec & 1) { 208 if (vec & 1) {
208 page = pfn_to_page(start + off); 209 page = pfn_to_page(start + off);
209 __free_pages_bootmem(page, 0); 210 __free_pages_bootmem(page, 0);
@@ -467,7 +468,7 @@ static unsigned long __init align_off(struct bootmem_data *bdata,
467 return ALIGN(base + off, align) - base; 468 return ALIGN(base + off, align) - base;
468} 469}
469 470
470static void * __init alloc_bootmem_core(struct bootmem_data *bdata, 471static void * __init alloc_bootmem_bdata(struct bootmem_data *bdata,
471 unsigned long size, unsigned long align, 472 unsigned long size, unsigned long align,
472 unsigned long goal, unsigned long limit) 473 unsigned long goal, unsigned long limit)
473{ 474{
@@ -588,14 +589,14 @@ static void * __init alloc_arch_preferred_bootmem(bootmem_data_t *bdata,
588 p_bdata = bootmem_arch_preferred_node(bdata, size, align, 589 p_bdata = bootmem_arch_preferred_node(bdata, size, align,
589 goal, limit); 590 goal, limit);
590 if (p_bdata) 591 if (p_bdata)
591 return alloc_bootmem_core(p_bdata, size, align, 592 return alloc_bootmem_bdata(p_bdata, size, align,
592 goal, limit); 593 goal, limit);
593 } 594 }
594#endif 595#endif
595 return NULL; 596 return NULL;
596} 597}
597 598
598static void * __init ___alloc_bootmem_nopanic(unsigned long size, 599static void * __init alloc_bootmem_core(unsigned long size,
599 unsigned long align, 600 unsigned long align,
600 unsigned long goal, 601 unsigned long goal,
601 unsigned long limit) 602 unsigned long limit)
@@ -603,7 +604,6 @@ static void * __init ___alloc_bootmem_nopanic(unsigned long size,
603 bootmem_data_t *bdata; 604 bootmem_data_t *bdata;
604 void *region; 605 void *region;
605 606
606restart:
607 region = alloc_arch_preferred_bootmem(NULL, size, align, goal, limit); 607 region = alloc_arch_preferred_bootmem(NULL, size, align, goal, limit);
608 if (region) 608 if (region)
609 return region; 609 return region;
@@ -614,11 +614,25 @@ restart:
614 if (limit && bdata->node_min_pfn >= PFN_DOWN(limit)) 614 if (limit && bdata->node_min_pfn >= PFN_DOWN(limit))
615 break; 615 break;
616 616
617 region = alloc_bootmem_core(bdata, size, align, goal, limit); 617 region = alloc_bootmem_bdata(bdata, size, align, goal, limit);
618 if (region) 618 if (region)
619 return region; 619 return region;
620 } 620 }
621 621
622 return NULL;
623}
624
625static void * __init ___alloc_bootmem_nopanic(unsigned long size,
626 unsigned long align,
627 unsigned long goal,
628 unsigned long limit)
629{
630 void *ptr;
631
632restart:
633 ptr = alloc_bootmem_core(size, align, goal, limit);
634 if (ptr)
635 return ptr;
622 if (goal) { 636 if (goal) {
623 goal = 0; 637 goal = 0;
624 goto restart; 638 goto restart;
@@ -684,21 +698,56 @@ void * __init __alloc_bootmem(unsigned long size, unsigned long align,
684 return ___alloc_bootmem(size, align, goal, limit); 698 return ___alloc_bootmem(size, align, goal, limit);
685} 699}
686 700
687static void * __init ___alloc_bootmem_node(bootmem_data_t *bdata, 701static void * __init ___alloc_bootmem_node_nopanic(pg_data_t *pgdat,
688 unsigned long size, unsigned long align, 702 unsigned long size, unsigned long align,
689 unsigned long goal, unsigned long limit) 703 unsigned long goal, unsigned long limit)
690{ 704{
691 void *ptr; 705 void *ptr;
692 706
693 ptr = alloc_arch_preferred_bootmem(bdata, size, align, goal, limit); 707again:
708 ptr = alloc_arch_preferred_bootmem(pgdat->bdata, size,
709 align, goal, limit);
694 if (ptr) 710 if (ptr)
695 return ptr; 711 return ptr;
696 712
697 ptr = alloc_bootmem_core(bdata, size, align, goal, limit); 713 ptr = alloc_bootmem_bdata(pgdat->bdata, size, align, goal, limit);
698 if (ptr) 714 if (ptr)
699 return ptr; 715 return ptr;
700 716
701 return ___alloc_bootmem(size, align, goal, limit); 717 ptr = alloc_bootmem_core(size, align, goal, limit);
718 if (ptr)
719 return ptr;
720
721 if (goal) {
722 goal = 0;
723 goto again;
724 }
725
726 return NULL;
727}
728
729void * __init __alloc_bootmem_node_nopanic(pg_data_t *pgdat, unsigned long size,
730 unsigned long align, unsigned long goal)
731{
732 if (WARN_ON_ONCE(slab_is_available()))
733 return kzalloc_node(size, GFP_NOWAIT, pgdat->node_id);
734
735 return ___alloc_bootmem_node_nopanic(pgdat, size, align, goal, 0);
736}
737
738void * __init ___alloc_bootmem_node(pg_data_t *pgdat, unsigned long size,
739 unsigned long align, unsigned long goal,
740 unsigned long limit)
741{
742 void *ptr;
743
744 ptr = ___alloc_bootmem_node_nopanic(pgdat, size, align, goal, 0);
745 if (ptr)
746 return ptr;
747
748 printk(KERN_ALERT "bootmem alloc of %lu bytes failed!\n", size);
749 panic("Out of memory");
750 return NULL;
702} 751}
703 752
704/** 753/**
@@ -722,7 +771,7 @@ void * __init __alloc_bootmem_node(pg_data_t *pgdat, unsigned long size,
722 if (WARN_ON_ONCE(slab_is_available())) 771 if (WARN_ON_ONCE(slab_is_available()))
723 return kzalloc_node(size, GFP_NOWAIT, pgdat->node_id); 772 return kzalloc_node(size, GFP_NOWAIT, pgdat->node_id);
724 773
725 return ___alloc_bootmem_node(pgdat->bdata, size, align, goal, 0); 774 return ___alloc_bootmem_node(pgdat, size, align, goal, 0);
726} 775}
727 776
728void * __init __alloc_bootmem_node_high(pg_data_t *pgdat, unsigned long size, 777void * __init __alloc_bootmem_node_high(pg_data_t *pgdat, unsigned long size,
@@ -743,7 +792,7 @@ void * __init __alloc_bootmem_node_high(pg_data_t *pgdat, unsigned long size,
743 unsigned long new_goal; 792 unsigned long new_goal;
744 793
745 new_goal = MAX_DMA32_PFN << PAGE_SHIFT; 794 new_goal = MAX_DMA32_PFN << PAGE_SHIFT;
746 ptr = alloc_bootmem_core(pgdat->bdata, size, align, 795 ptr = alloc_bootmem_bdata(pgdat->bdata, size, align,
747 new_goal, 0); 796 new_goal, 0);
748 if (ptr) 797 if (ptr)
749 return ptr; 798 return ptr;
@@ -754,47 +803,6 @@ void * __init __alloc_bootmem_node_high(pg_data_t *pgdat, unsigned long size,
754 803
755} 804}
756 805
757#ifdef CONFIG_SPARSEMEM
758/**
759 * alloc_bootmem_section - allocate boot memory from a specific section
760 * @size: size of the request in bytes
761 * @section_nr: sparse map section to allocate from
762 *
763 * Return NULL on failure.
764 */
765void * __init alloc_bootmem_section(unsigned long size,
766 unsigned long section_nr)
767{
768 bootmem_data_t *bdata;
769 unsigned long pfn, goal;
770
771 pfn = section_nr_to_pfn(section_nr);
772 goal = pfn << PAGE_SHIFT;
773 bdata = &bootmem_node_data[early_pfn_to_nid(pfn)];
774
775 return alloc_bootmem_core(bdata, size, SMP_CACHE_BYTES, goal, 0);
776}
777#endif
778
779void * __init __alloc_bootmem_node_nopanic(pg_data_t *pgdat, unsigned long size,
780 unsigned long align, unsigned long goal)
781{
782 void *ptr;
783
784 if (WARN_ON_ONCE(slab_is_available()))
785 return kzalloc_node(size, GFP_NOWAIT, pgdat->node_id);
786
787 ptr = alloc_arch_preferred_bootmem(pgdat->bdata, size, align, goal, 0);
788 if (ptr)
789 return ptr;
790
791 ptr = alloc_bootmem_core(pgdat->bdata, size, align, goal, 0);
792 if (ptr)
793 return ptr;
794
795 return __alloc_bootmem_nopanic(size, align, goal);
796}
797
798#ifndef ARCH_LOW_ADDRESS_LIMIT 806#ifndef ARCH_LOW_ADDRESS_LIMIT
799#define ARCH_LOW_ADDRESS_LIMIT 0xffffffffUL 807#define ARCH_LOW_ADDRESS_LIMIT 0xffffffffUL
800#endif 808#endif
@@ -839,6 +847,6 @@ void * __init __alloc_bootmem_low_node(pg_data_t *pgdat, unsigned long size,
839 if (WARN_ON_ONCE(slab_is_available())) 847 if (WARN_ON_ONCE(slab_is_available()))
840 return kzalloc_node(size, GFP_NOWAIT, pgdat->node_id); 848 return kzalloc_node(size, GFP_NOWAIT, pgdat->node_id);
841 849
842 return ___alloc_bootmem_node(pgdat->bdata, size, align, 850 return ___alloc_bootmem_node(pgdat, size, align,
843 goal, ARCH_LOW_ADDRESS_LIMIT); 851 goal, ARCH_LOW_ADDRESS_LIMIT);
844} 852}
diff --git a/mm/compaction.c b/mm/compaction.c
index da7d35ea5103..4ac338af5120 100644
--- a/mm/compaction.c
+++ b/mm/compaction.c
@@ -226,7 +226,8 @@ isolate_migratepages_range(struct zone *zone, struct compact_control *cc,
226 unsigned long last_pageblock_nr = 0, pageblock_nr; 226 unsigned long last_pageblock_nr = 0, pageblock_nr;
227 unsigned long nr_scanned = 0, nr_isolated = 0; 227 unsigned long nr_scanned = 0, nr_isolated = 0;
228 struct list_head *migratelist = &cc->migratepages; 228 struct list_head *migratelist = &cc->migratepages;
229 isolate_mode_t mode = ISOLATE_ACTIVE|ISOLATE_INACTIVE; 229 isolate_mode_t mode = 0;
230 struct lruvec *lruvec;
230 231
231 /* 232 /*
232 * Ensure that there are not too many pages isolated from the LRU 233 * Ensure that there are not too many pages isolated from the LRU
@@ -235,7 +236,7 @@ isolate_migratepages_range(struct zone *zone, struct compact_control *cc,
235 */ 236 */
236 while (unlikely(too_many_isolated(zone))) { 237 while (unlikely(too_many_isolated(zone))) {
237 /* async migration should just abort */ 238 /* async migration should just abort */
238 if (!cc->sync) 239 if (cc->mode != COMPACT_SYNC)
239 return 0; 240 return 0;
240 241
241 congestion_wait(BLK_RW_ASYNC, HZ/10); 242 congestion_wait(BLK_RW_ASYNC, HZ/10);
@@ -303,7 +304,8 @@ isolate_migratepages_range(struct zone *zone, struct compact_control *cc,
303 * satisfies the allocation 304 * satisfies the allocation
304 */ 305 */
305 pageblock_nr = low_pfn >> pageblock_order; 306 pageblock_nr = low_pfn >> pageblock_order;
306 if (!cc->sync && last_pageblock_nr != pageblock_nr && 307 if (cc->mode != COMPACT_SYNC &&
308 last_pageblock_nr != pageblock_nr &&
307 !migrate_async_suitable(get_pageblock_migratetype(page))) { 309 !migrate_async_suitable(get_pageblock_migratetype(page))) {
308 low_pfn += pageblock_nr_pages; 310 low_pfn += pageblock_nr_pages;
309 low_pfn = ALIGN(low_pfn, pageblock_nr_pages) - 1; 311 low_pfn = ALIGN(low_pfn, pageblock_nr_pages) - 1;
@@ -324,17 +326,19 @@ isolate_migratepages_range(struct zone *zone, struct compact_control *cc,
324 continue; 326 continue;
325 } 327 }
326 328
327 if (!cc->sync) 329 if (cc->mode != COMPACT_SYNC)
328 mode |= ISOLATE_ASYNC_MIGRATE; 330 mode |= ISOLATE_ASYNC_MIGRATE;
329 331
332 lruvec = mem_cgroup_page_lruvec(page, zone);
333
330 /* Try isolate the page */ 334 /* Try isolate the page */
331 if (__isolate_lru_page(page, mode, 0) != 0) 335 if (__isolate_lru_page(page, mode) != 0)
332 continue; 336 continue;
333 337
334 VM_BUG_ON(PageTransCompound(page)); 338 VM_BUG_ON(PageTransCompound(page));
335 339
336 /* Successfully isolated */ 340 /* Successfully isolated */
337 del_page_from_lru_list(zone, page, page_lru(page)); 341 del_page_from_lru_list(page, lruvec, page_lru(page));
338 list_add(&page->lru, migratelist); 342 list_add(&page->lru, migratelist);
339 cc->nr_migratepages++; 343 cc->nr_migratepages++;
340 nr_isolated++; 344 nr_isolated++;
@@ -357,27 +361,90 @@ isolate_migratepages_range(struct zone *zone, struct compact_control *cc,
357 361
358#endif /* CONFIG_COMPACTION || CONFIG_CMA */ 362#endif /* CONFIG_COMPACTION || CONFIG_CMA */
359#ifdef CONFIG_COMPACTION 363#ifdef CONFIG_COMPACTION
364/*
365 * Returns true if MIGRATE_UNMOVABLE pageblock was successfully
366 * converted to MIGRATE_MOVABLE type, false otherwise.
367 */
368static bool rescue_unmovable_pageblock(struct page *page)
369{
370 unsigned long pfn, start_pfn, end_pfn;
371 struct page *start_page, *end_page;
372
373 pfn = page_to_pfn(page);
374 start_pfn = pfn & ~(pageblock_nr_pages - 1);
375 end_pfn = start_pfn + pageblock_nr_pages;
376
377 start_page = pfn_to_page(start_pfn);
378 end_page = pfn_to_page(end_pfn);
379
380 /* Do not deal with pageblocks that overlap zones */
381 if (page_zone(start_page) != page_zone(end_page))
382 return false;
383
384 for (page = start_page, pfn = start_pfn; page < end_page; pfn++,
385 page++) {
386 if (!pfn_valid_within(pfn))
387 continue;
388
389 if (PageBuddy(page)) {
390 int order = page_order(page);
391
392 pfn += (1 << order) - 1;
393 page += (1 << order) - 1;
394
395 continue;
396 } else if (page_count(page) == 0 || PageLRU(page))
397 continue;
398
399 return false;
400 }
401
402 set_pageblock_migratetype(page, MIGRATE_MOVABLE);
403 move_freepages_block(page_zone(page), page, MIGRATE_MOVABLE);
404 return true;
405}
406
407enum smt_result {
408 GOOD_AS_MIGRATION_TARGET,
409 FAIL_UNMOVABLE_TARGET,
410 FAIL_BAD_TARGET,
411};
360 412
361/* Returns true if the page is within a block suitable for migration to */ 413/*
362static bool suitable_migration_target(struct page *page) 414 * Returns GOOD_AS_MIGRATION_TARGET if the page is within a block
415 * suitable for migration to, FAIL_UNMOVABLE_TARGET if the page
416 * is within a MIGRATE_UNMOVABLE block, FAIL_BAD_TARGET otherwise.
417 */
418static enum smt_result suitable_migration_target(struct page *page,
419 struct compact_control *cc)
363{ 420{
364 421
365 int migratetype = get_pageblock_migratetype(page); 422 int migratetype = get_pageblock_migratetype(page);
366 423
367 /* Don't interfere with memory hot-remove or the min_free_kbytes blocks */ 424 /* Don't interfere with memory hot-remove or the min_free_kbytes blocks */
368 if (migratetype == MIGRATE_ISOLATE || migratetype == MIGRATE_RESERVE) 425 if (migratetype == MIGRATE_ISOLATE || migratetype == MIGRATE_RESERVE)
369 return false; 426 return FAIL_BAD_TARGET;
370 427
371 /* If the page is a large free page, then allow migration */ 428 /* If the page is a large free page, then allow migration */
372 if (PageBuddy(page) && page_order(page) >= pageblock_order) 429 if (PageBuddy(page) && page_order(page) >= pageblock_order)
373 return true; 430 return GOOD_AS_MIGRATION_TARGET;
374 431
375 /* If the block is MIGRATE_MOVABLE or MIGRATE_CMA, allow migration */ 432 /* If the block is MIGRATE_MOVABLE or MIGRATE_CMA, allow migration */
376 if (migrate_async_suitable(migratetype)) 433 if (cc->mode != COMPACT_ASYNC_UNMOVABLE &&
377 return true; 434 migrate_async_suitable(migratetype))
435 return GOOD_AS_MIGRATION_TARGET;
436
437 if (cc->mode == COMPACT_ASYNC_MOVABLE &&
438 migratetype == MIGRATE_UNMOVABLE)
439 return FAIL_UNMOVABLE_TARGET;
440
441 if (cc->mode != COMPACT_ASYNC_MOVABLE &&
442 migratetype == MIGRATE_UNMOVABLE &&
443 rescue_unmovable_pageblock(page))
444 return GOOD_AS_MIGRATION_TARGET;
378 445
379 /* Otherwise skip the block */ 446 /* Otherwise skip the block */
380 return false; 447 return FAIL_BAD_TARGET;
381} 448}
382 449
383/* 450/*
@@ -411,6 +478,13 @@ static void isolate_freepages(struct zone *zone,
411 zone_end_pfn = zone->zone_start_pfn + zone->spanned_pages; 478 zone_end_pfn = zone->zone_start_pfn + zone->spanned_pages;
412 479
413 /* 480 /*
481 * isolate_freepages() may be called more than once during
482 * compact_zone_order() run and we want only the most recent
483 * count.
484 */
485 cc->nr_pageblocks_skipped = 0;
486
487 /*
414 * Isolate free pages until enough are available to migrate the 488 * Isolate free pages until enough are available to migrate the
415 * pages on cc->migratepages. We stop searching if the migrate 489 * pages on cc->migratepages. We stop searching if the migrate
416 * and free page scanners meet or enough free pages are isolated. 490 * and free page scanners meet or enough free pages are isolated.
@@ -418,6 +492,7 @@ static void isolate_freepages(struct zone *zone,
418 for (; pfn > low_pfn && cc->nr_migratepages > nr_freepages; 492 for (; pfn > low_pfn && cc->nr_migratepages > nr_freepages;
419 pfn -= pageblock_nr_pages) { 493 pfn -= pageblock_nr_pages) {
420 unsigned long isolated; 494 unsigned long isolated;
495 enum smt_result ret;
421 496
422 if (!pfn_valid(pfn)) 497 if (!pfn_valid(pfn))
423 continue; 498 continue;
@@ -434,9 +509,12 @@ static void isolate_freepages(struct zone *zone,
434 continue; 509 continue;
435 510
436 /* Check the block is suitable for migration */ 511 /* Check the block is suitable for migration */
437 if (!suitable_migration_target(page)) 512 ret = suitable_migration_target(page, cc);
513 if (ret != GOOD_AS_MIGRATION_TARGET) {
514 if (ret == FAIL_UNMOVABLE_TARGET)
515 cc->nr_pageblocks_skipped++;
438 continue; 516 continue;
439 517 }
440 /* 518 /*
441 * Found a block suitable for isolating free pages from. Now 519 * Found a block suitable for isolating free pages from. Now
442 * we disabled interrupts, double check things are ok and 520 * we disabled interrupts, double check things are ok and
@@ -445,12 +523,14 @@ static void isolate_freepages(struct zone *zone,
445 */ 523 */
446 isolated = 0; 524 isolated = 0;
447 spin_lock_irqsave(&zone->lock, flags); 525 spin_lock_irqsave(&zone->lock, flags);
448 if (suitable_migration_target(page)) { 526 ret = suitable_migration_target(page, cc);
527 if (ret == GOOD_AS_MIGRATION_TARGET) {
449 end_pfn = min(pfn + pageblock_nr_pages, zone_end_pfn); 528 end_pfn = min(pfn + pageblock_nr_pages, zone_end_pfn);
450 isolated = isolate_freepages_block(pfn, end_pfn, 529 isolated = isolate_freepages_block(pfn, end_pfn,
451 freelist, false); 530 freelist, false);
452 nr_freepages += isolated; 531 nr_freepages += isolated;
453 } 532 } else if (ret == FAIL_UNMOVABLE_TARGET)
533 cc->nr_pageblocks_skipped++;
454 spin_unlock_irqrestore(&zone->lock, flags); 534 spin_unlock_irqrestore(&zone->lock, flags);
455 535
456 /* 536 /*
@@ -682,8 +762,9 @@ static int compact_zone(struct zone *zone, struct compact_control *cc)
682 762
683 nr_migrate = cc->nr_migratepages; 763 nr_migrate = cc->nr_migratepages;
684 err = migrate_pages(&cc->migratepages, compaction_alloc, 764 err = migrate_pages(&cc->migratepages, compaction_alloc,
685 (unsigned long)cc, false, 765 (unsigned long)&cc->freepages, false,
686 cc->sync ? MIGRATE_SYNC_LIGHT : MIGRATE_ASYNC); 766 (cc->mode == COMPACT_SYNC) ? MIGRATE_SYNC_LIGHT
767 : MIGRATE_ASYNC);
687 update_nr_listpages(cc); 768 update_nr_listpages(cc);
688 nr_remaining = cc->nr_migratepages; 769 nr_remaining = cc->nr_migratepages;
689 770
@@ -712,7 +793,8 @@ out:
712 793
713static unsigned long compact_zone_order(struct zone *zone, 794static unsigned long compact_zone_order(struct zone *zone,
714 int order, gfp_t gfp_mask, 795 int order, gfp_t gfp_mask,
715 bool sync) 796 enum compact_mode mode,
797 unsigned long *nr_pageblocks_skipped)
716{ 798{
717 struct compact_control cc = { 799 struct compact_control cc = {
718 .nr_freepages = 0, 800 .nr_freepages = 0,
@@ -720,12 +802,17 @@ static unsigned long compact_zone_order(struct zone *zone,
720 .order = order, 802 .order = order,
721 .migratetype = allocflags_to_migratetype(gfp_mask), 803 .migratetype = allocflags_to_migratetype(gfp_mask),
722 .zone = zone, 804 .zone = zone,
723 .sync = sync, 805 .mode = mode,
724 }; 806 };
807 unsigned long rc;
808
725 INIT_LIST_HEAD(&cc.freepages); 809 INIT_LIST_HEAD(&cc.freepages);
726 INIT_LIST_HEAD(&cc.migratepages); 810 INIT_LIST_HEAD(&cc.migratepages);
727 811
728 return compact_zone(zone, &cc); 812 rc = compact_zone(zone, &cc);
813 *nr_pageblocks_skipped = cc.nr_pageblocks_skipped;
814
815 return rc;
729} 816}
730 817
731int sysctl_extfrag_threshold = 500; 818int sysctl_extfrag_threshold = 500;
@@ -750,6 +837,8 @@ unsigned long try_to_compact_pages(struct zonelist *zonelist,
750 struct zoneref *z; 837 struct zoneref *z;
751 struct zone *zone; 838 struct zone *zone;
752 int rc = COMPACT_SKIPPED; 839 int rc = COMPACT_SKIPPED;
840 unsigned long nr_pageblocks_skipped;
841 enum compact_mode mode;
753 842
754 /* 843 /*
755 * Check whether it is worth even starting compaction. The order check is 844 * Check whether it is worth even starting compaction. The order check is
@@ -766,12 +855,22 @@ unsigned long try_to_compact_pages(struct zonelist *zonelist,
766 nodemask) { 855 nodemask) {
767 int status; 856 int status;
768 857
769 status = compact_zone_order(zone, order, gfp_mask, sync); 858 mode = sync ? COMPACT_SYNC : COMPACT_ASYNC_MOVABLE;
859retry:
860 status = compact_zone_order(zone, order, gfp_mask, mode,
861 &nr_pageblocks_skipped);
770 rc = max(status, rc); 862 rc = max(status, rc);
771 863
772 /* If a normal allocation would succeed, stop compacting */ 864 /* If a normal allocation would succeed, stop compacting */
773 if (zone_watermark_ok(zone, order, low_wmark_pages(zone), 0, 0)) 865 if (zone_watermark_ok(zone, order, low_wmark_pages(zone), 0, 0))
774 break; 866 break;
867
868 if (rc == COMPACT_COMPLETE && mode == COMPACT_ASYNC_MOVABLE) {
869 if (nr_pageblocks_skipped) {
870 mode = COMPACT_ASYNC_UNMOVABLE;
871 goto retry;
872 }
873 }
775 } 874 }
776 875
777 return rc; 876 return rc;
@@ -805,7 +904,7 @@ static int __compact_pgdat(pg_data_t *pgdat, struct compact_control *cc)
805 if (ok && cc->order > zone->compact_order_failed) 904 if (ok && cc->order > zone->compact_order_failed)
806 zone->compact_order_failed = cc->order + 1; 905 zone->compact_order_failed = cc->order + 1;
807 /* Currently async compaction is never deferred. */ 906 /* Currently async compaction is never deferred. */
808 else if (!ok && cc->sync) 907 else if (!ok && cc->mode == COMPACT_SYNC)
809 defer_compaction(zone, cc->order); 908 defer_compaction(zone, cc->order);
810 } 909 }
811 910
@@ -820,7 +919,7 @@ int compact_pgdat(pg_data_t *pgdat, int order)
820{ 919{
821 struct compact_control cc = { 920 struct compact_control cc = {
822 .order = order, 921 .order = order,
823 .sync = false, 922 .mode = COMPACT_ASYNC_MOVABLE,
824 }; 923 };
825 924
826 return __compact_pgdat(pgdat, &cc); 925 return __compact_pgdat(pgdat, &cc);
@@ -830,7 +929,7 @@ static int compact_node(int nid)
830{ 929{
831 struct compact_control cc = { 930 struct compact_control cc = {
832 .order = -1, 931 .order = -1,
833 .sync = true, 932 .mode = COMPACT_SYNC,
834 }; 933 };
835 934
836 return __compact_pgdat(NODE_DATA(nid), &cc); 935 return __compact_pgdat(NODE_DATA(nid), &cc);
diff --git a/mm/filemap.c b/mm/filemap.c
index 79c4b2b0b14e..64b48f934b89 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -29,7 +29,6 @@
29#include <linux/pagevec.h> 29#include <linux/pagevec.h>
30#include <linux/blkdev.h> 30#include <linux/blkdev.h>
31#include <linux/security.h> 31#include <linux/security.h>
32#include <linux/syscalls.h>
33#include <linux/cpuset.h> 32#include <linux/cpuset.h>
34#include <linux/hardirq.h> /* for BUG_ON(!in_atomic()) only */ 33#include <linux/hardirq.h> /* for BUG_ON(!in_atomic()) only */
35#include <linux/memcontrol.h> 34#include <linux/memcontrol.h>
@@ -1478,44 +1477,6 @@ out:
1478} 1477}
1479EXPORT_SYMBOL(generic_file_aio_read); 1478EXPORT_SYMBOL(generic_file_aio_read);
1480 1479
1481static ssize_t
1482do_readahead(struct address_space *mapping, struct file *filp,
1483 pgoff_t index, unsigned long nr)
1484{
1485 if (!mapping || !mapping->a_ops || !mapping->a_ops->readpage)
1486 return -EINVAL;
1487
1488 force_page_cache_readahead(mapping, filp, index, nr);
1489 return 0;
1490}
1491
1492SYSCALL_DEFINE(readahead)(int fd, loff_t offset, size_t count)
1493{
1494 ssize_t ret;
1495 struct file *file;
1496
1497 ret = -EBADF;
1498 file = fget(fd);
1499 if (file) {
1500 if (file->f_mode & FMODE_READ) {
1501 struct address_space *mapping = file->f_mapping;
1502 pgoff_t start = offset >> PAGE_CACHE_SHIFT;
1503 pgoff_t end = (offset + count - 1) >> PAGE_CACHE_SHIFT;
1504 unsigned long len = end - start + 1;
1505 ret = do_readahead(mapping, file, start, len);
1506 }
1507 fput(file);
1508 }
1509 return ret;
1510}
1511#ifdef CONFIG_HAVE_SYSCALL_WRAPPERS
1512asmlinkage long SyS_readahead(long fd, loff_t offset, long count)
1513{
1514 return SYSC_readahead((int) fd, offset, (size_t) count);
1515}
1516SYSCALL_ALIAS(sys_readahead, SyS_readahead);
1517#endif
1518
1519#ifdef CONFIG_MMU 1480#ifdef CONFIG_MMU
1520/** 1481/**
1521 * page_cache_read - adds requested page to the page cache if not already there 1482 * page_cache_read - adds requested page to the page cache if not already there
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index f0e5306eeb55..57c4b9309015 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -636,16 +636,12 @@ static int __do_huge_pmd_anonymous_page(struct mm_struct *mm,
636 unsigned long haddr, pmd_t *pmd, 636 unsigned long haddr, pmd_t *pmd,
637 struct page *page) 637 struct page *page)
638{ 638{
639 int ret = 0;
640 pgtable_t pgtable; 639 pgtable_t pgtable;
641 640
642 VM_BUG_ON(!PageCompound(page)); 641 VM_BUG_ON(!PageCompound(page));
643 pgtable = pte_alloc_one(mm, haddr); 642 pgtable = pte_alloc_one(mm, haddr);
644 if (unlikely(!pgtable)) { 643 if (unlikely(!pgtable))
645 mem_cgroup_uncharge_page(page);
646 put_page(page);
647 return VM_FAULT_OOM; 644 return VM_FAULT_OOM;
648 }
649 645
650 clear_huge_page(page, haddr, HPAGE_PMD_NR); 646 clear_huge_page(page, haddr, HPAGE_PMD_NR);
651 __SetPageUptodate(page); 647 __SetPageUptodate(page);
@@ -675,7 +671,7 @@ static int __do_huge_pmd_anonymous_page(struct mm_struct *mm,
675 spin_unlock(&mm->page_table_lock); 671 spin_unlock(&mm->page_table_lock);
676 } 672 }
677 673
678 return ret; 674 return 0;
679} 675}
680 676
681static inline gfp_t alloc_hugepage_gfpmask(int defrag, gfp_t extra_gfp) 677static inline gfp_t alloc_hugepage_gfpmask(int defrag, gfp_t extra_gfp)
@@ -724,8 +720,14 @@ int do_huge_pmd_anonymous_page(struct mm_struct *mm, struct vm_area_struct *vma,
724 put_page(page); 720 put_page(page);
725 goto out; 721 goto out;
726 } 722 }
723 if (unlikely(__do_huge_pmd_anonymous_page(mm, vma, haddr, pmd,
724 page))) {
725 mem_cgroup_uncharge_page(page);
726 put_page(page);
727 goto out;
728 }
727 729
728 return __do_huge_pmd_anonymous_page(mm, vma, haddr, pmd, page); 730 return 0;
729 } 731 }
730out: 732out:
731 /* 733 /*
@@ -950,6 +952,8 @@ int do_huge_pmd_wp_page(struct mm_struct *mm, struct vm_area_struct *vma,
950 count_vm_event(THP_FAULT_FALLBACK); 952 count_vm_event(THP_FAULT_FALLBACK);
951 ret = do_huge_pmd_wp_page_fallback(mm, vma, address, 953 ret = do_huge_pmd_wp_page_fallback(mm, vma, address,
952 pmd, orig_pmd, page, haddr); 954 pmd, orig_pmd, page, haddr);
955 if (ret & VM_FAULT_OOM)
956 split_huge_page(page);
953 put_page(page); 957 put_page(page);
954 goto out; 958 goto out;
955 } 959 }
@@ -957,6 +961,7 @@ int do_huge_pmd_wp_page(struct mm_struct *mm, struct vm_area_struct *vma,
957 961
958 if (unlikely(mem_cgroup_newpage_charge(new_page, mm, GFP_KERNEL))) { 962 if (unlikely(mem_cgroup_newpage_charge(new_page, mm, GFP_KERNEL))) {
959 put_page(new_page); 963 put_page(new_page);
964 split_huge_page(page);
960 put_page(page); 965 put_page(page);
961 ret |= VM_FAULT_OOM; 966 ret |= VM_FAULT_OOM;
962 goto out; 967 goto out;
@@ -968,8 +973,10 @@ int do_huge_pmd_wp_page(struct mm_struct *mm, struct vm_area_struct *vma,
968 spin_lock(&mm->page_table_lock); 973 spin_lock(&mm->page_table_lock);
969 put_page(page); 974 put_page(page);
970 if (unlikely(!pmd_same(*pmd, orig_pmd))) { 975 if (unlikely(!pmd_same(*pmd, orig_pmd))) {
976 spin_unlock(&mm->page_table_lock);
971 mem_cgroup_uncharge_page(new_page); 977 mem_cgroup_uncharge_page(new_page);
972 put_page(new_page); 978 put_page(new_page);
979 goto out;
973 } else { 980 } else {
974 pmd_t entry; 981 pmd_t entry;
975 VM_BUG_ON(!PageHead(page)); 982 VM_BUG_ON(!PageHead(page));
@@ -1224,10 +1231,13 @@ static void __split_huge_page_refcount(struct page *page)
1224{ 1231{
1225 int i; 1232 int i;
1226 struct zone *zone = page_zone(page); 1233 struct zone *zone = page_zone(page);
1234 struct lruvec *lruvec;
1227 int tail_count = 0; 1235 int tail_count = 0;
1228 1236
1229 /* prevent PageLRU to go away from under us, and freeze lru stats */ 1237 /* prevent PageLRU to go away from under us, and freeze lru stats */
1230 spin_lock_irq(&zone->lru_lock); 1238 spin_lock_irq(&zone->lru_lock);
1239 lruvec = mem_cgroup_page_lruvec(page, zone);
1240
1231 compound_lock(page); 1241 compound_lock(page);
1232 /* complete memcg works before add pages to LRU */ 1242 /* complete memcg works before add pages to LRU */
1233 mem_cgroup_split_huge_fixup(page); 1243 mem_cgroup_split_huge_fixup(page);
@@ -1302,13 +1312,12 @@ static void __split_huge_page_refcount(struct page *page)
1302 BUG_ON(!PageDirty(page_tail)); 1312 BUG_ON(!PageDirty(page_tail));
1303 BUG_ON(!PageSwapBacked(page_tail)); 1313 BUG_ON(!PageSwapBacked(page_tail));
1304 1314
1305 1315 lru_add_page_tail(page, page_tail, lruvec);
1306 lru_add_page_tail(zone, page, page_tail);
1307 } 1316 }
1308 atomic_sub(tail_count, &page->_count); 1317 atomic_sub(tail_count, &page->_count);
1309 BUG_ON(atomic_read(&page->_count) <= 0); 1318 BUG_ON(atomic_read(&page->_count) <= 0);
1310 1319
1311 __dec_zone_page_state(page, NR_ANON_TRANSPARENT_HUGEPAGES); 1320 __mod_zone_page_state(zone, NR_ANON_TRANSPARENT_HUGEPAGES, -1);
1312 __mod_zone_page_state(zone, NR_ANON_PAGES, HPAGE_PMD_NR); 1321 __mod_zone_page_state(zone, NR_ANON_PAGES, HPAGE_PMD_NR);
1313 1322
1314 ClearPageCompound(page); 1323 ClearPageCompound(page);
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 4e28416c47fb..285a81e87ec8 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -273,8 +273,8 @@ static long region_count(struct list_head *head, long f, long t)
273 273
274 /* Locate each segment we overlap with, and count that overlap. */ 274 /* Locate each segment we overlap with, and count that overlap. */
275 list_for_each_entry(rg, head, link) { 275 list_for_each_entry(rg, head, link) {
276 int seg_from; 276 long seg_from;
277 int seg_to; 277 long seg_to;
278 278
279 if (rg->to <= f) 279 if (rg->to <= f)
280 continue; 280 continue;
@@ -2157,6 +2157,15 @@ static void hugetlb_vm_op_open(struct vm_area_struct *vma)
2157 kref_get(&reservations->refs); 2157 kref_get(&reservations->refs);
2158} 2158}
2159 2159
2160static void resv_map_put(struct vm_area_struct *vma)
2161{
2162 struct resv_map *reservations = vma_resv_map(vma);
2163
2164 if (!reservations)
2165 return;
2166 kref_put(&reservations->refs, resv_map_release);
2167}
2168
2160static void hugetlb_vm_op_close(struct vm_area_struct *vma) 2169static void hugetlb_vm_op_close(struct vm_area_struct *vma)
2161{ 2170{
2162 struct hstate *h = hstate_vma(vma); 2171 struct hstate *h = hstate_vma(vma);
@@ -2173,7 +2182,7 @@ static void hugetlb_vm_op_close(struct vm_area_struct *vma)
2173 reserve = (end - start) - 2182 reserve = (end - start) -
2174 region_count(&reservations->regions, start, end); 2183 region_count(&reservations->regions, start, end);
2175 2184
2176 kref_put(&reservations->refs, resv_map_release); 2185 resv_map_put(vma);
2177 2186
2178 if (reserve) { 2187 if (reserve) {
2179 hugetlb_acct_memory(h, -reserve); 2188 hugetlb_acct_memory(h, -reserve);
@@ -2991,12 +3000,16 @@ int hugetlb_reserve_pages(struct inode *inode,
2991 set_vma_resv_flags(vma, HPAGE_RESV_OWNER); 3000 set_vma_resv_flags(vma, HPAGE_RESV_OWNER);
2992 } 3001 }
2993 3002
2994 if (chg < 0) 3003 if (chg < 0) {
2995 return chg; 3004 ret = chg;
3005 goto out_err;
3006 }
2996 3007
2997 /* There must be enough pages in the subpool for the mapping */ 3008 /* There must be enough pages in the subpool for the mapping */
2998 if (hugepage_subpool_get_pages(spool, chg)) 3009 if (hugepage_subpool_get_pages(spool, chg)) {
2999 return -ENOSPC; 3010 ret = -ENOSPC;
3011 goto out_err;
3012 }
3000 3013
3001 /* 3014 /*
3002 * Check enough hugepages are available for the reservation. 3015 * Check enough hugepages are available for the reservation.
@@ -3005,7 +3018,7 @@ int hugetlb_reserve_pages(struct inode *inode,
3005 ret = hugetlb_acct_memory(h, chg); 3018 ret = hugetlb_acct_memory(h, chg);
3006 if (ret < 0) { 3019 if (ret < 0) {
3007 hugepage_subpool_put_pages(spool, chg); 3020 hugepage_subpool_put_pages(spool, chg);
3008 return ret; 3021 goto out_err;
3009 } 3022 }
3010 3023
3011 /* 3024 /*
@@ -3022,6 +3035,9 @@ int hugetlb_reserve_pages(struct inode *inode,
3022 if (!vma || vma->vm_flags & VM_MAYSHARE) 3035 if (!vma || vma->vm_flags & VM_MAYSHARE)
3023 region_add(&inode->i_mapping->private_list, from, to); 3036 region_add(&inode->i_mapping->private_list, from, to);
3024 return 0; 3037 return 0;
3038out_err:
3039 resv_map_put(vma);
3040 return ret;
3025} 3041}
3026 3042
3027void hugetlb_unreserve_pages(struct inode *inode, long offset, long freed) 3043void hugetlb_unreserve_pages(struct inode *inode, long offset, long freed)
diff --git a/mm/internal.h b/mm/internal.h
index aee4761cf9a9..4194ab9dc19b 100644
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -94,6 +94,9 @@ extern void putback_lru_page(struct page *page);
94/* 94/*
95 * in mm/page_alloc.c 95 * in mm/page_alloc.c
96 */ 96 */
97extern void set_pageblock_migratetype(struct page *page, int migratetype);
98extern int move_freepages_block(struct zone *zone, struct page *page,
99 int migratetype);
97extern void __free_pages_bootmem(struct page *page, unsigned int order); 100extern void __free_pages_bootmem(struct page *page, unsigned int order);
98extern void prep_compound_page(struct page *page, unsigned long order); 101extern void prep_compound_page(struct page *page, unsigned long order);
99#ifdef CONFIG_MEMORY_FAILURE 102#ifdef CONFIG_MEMORY_FAILURE
@@ -101,6 +104,7 @@ extern bool is_free_buddy_page(struct page *page);
101#endif 104#endif
102 105
103#if defined CONFIG_COMPACTION || defined CONFIG_CMA 106#if defined CONFIG_COMPACTION || defined CONFIG_CMA
107#include <linux/compaction.h>
104 108
105/* 109/*
106 * in mm/compaction.c 110 * in mm/compaction.c
@@ -119,11 +123,14 @@ struct compact_control {
119 unsigned long nr_migratepages; /* Number of pages to migrate */ 123 unsigned long nr_migratepages; /* Number of pages to migrate */
120 unsigned long free_pfn; /* isolate_freepages search base */ 124 unsigned long free_pfn; /* isolate_freepages search base */
121 unsigned long migrate_pfn; /* isolate_migratepages search base */ 125 unsigned long migrate_pfn; /* isolate_migratepages search base */
122 bool sync; /* Synchronous migration */ 126 enum compact_mode mode; /* Compaction mode */
123 127
124 int order; /* order a direct compactor needs */ 128 int order; /* order a direct compactor needs */
125 int migratetype; /* MOVABLE, RECLAIMABLE etc */ 129 int migratetype; /* MOVABLE, RECLAIMABLE etc */
126 struct zone *zone; 130 struct zone *zone;
131
132 /* Number of UNMOVABLE destination pageblocks skipped during scan */
133 unsigned long nr_pageblocks_skipped;
127}; 134};
128 135
129unsigned long 136unsigned long
@@ -164,7 +171,8 @@ static inline void munlock_vma_pages_all(struct vm_area_struct *vma)
164 * to determine if it's being mapped into a LOCKED vma. 171 * to determine if it's being mapped into a LOCKED vma.
165 * If so, mark page as mlocked. 172 * If so, mark page as mlocked.
166 */ 173 */
167static inline int is_mlocked_vma(struct vm_area_struct *vma, struct page *page) 174static inline int mlocked_vma_newpage(struct vm_area_struct *vma,
175 struct page *page)
168{ 176{
169 VM_BUG_ON(PageLRU(page)); 177 VM_BUG_ON(PageLRU(page));
170 178
@@ -222,7 +230,7 @@ extern unsigned long vma_address(struct page *page,
222 struct vm_area_struct *vma); 230 struct vm_area_struct *vma);
223#endif 231#endif
224#else /* !CONFIG_MMU */ 232#else /* !CONFIG_MMU */
225static inline int is_mlocked_vma(struct vm_area_struct *v, struct page *p) 233static inline int mlocked_vma_newpage(struct vm_area_struct *v, struct page *p)
226{ 234{
227 return 0; 235 return 0;
228} 236}
diff --git a/mm/madvise.c b/mm/madvise.c
index 1ccbba5b6674..deff1b64a08c 100644
--- a/mm/madvise.c
+++ b/mm/madvise.c
@@ -11,8 +11,10 @@
11#include <linux/mempolicy.h> 11#include <linux/mempolicy.h>
12#include <linux/page-isolation.h> 12#include <linux/page-isolation.h>
13#include <linux/hugetlb.h> 13#include <linux/hugetlb.h>
14#include <linux/falloc.h>
14#include <linux/sched.h> 15#include <linux/sched.h>
15#include <linux/ksm.h> 16#include <linux/ksm.h>
17#include <linux/fs.h>
16 18
17/* 19/*
18 * Any behaviour which results in changes to the vma->vm_flags needs to 20 * Any behaviour which results in changes to the vma->vm_flags needs to
@@ -200,8 +202,7 @@ static long madvise_remove(struct vm_area_struct *vma,
200 struct vm_area_struct **prev, 202 struct vm_area_struct **prev,
201 unsigned long start, unsigned long end) 203 unsigned long start, unsigned long end)
202{ 204{
203 struct address_space *mapping; 205 loff_t offset;
204 loff_t offset, endoff;
205 int error; 206 int error;
206 207
207 *prev = NULL; /* tell sys_madvise we drop mmap_sem */ 208 *prev = NULL; /* tell sys_madvise we drop mmap_sem */
@@ -217,16 +218,14 @@ static long madvise_remove(struct vm_area_struct *vma,
217 if ((vma->vm_flags & (VM_SHARED|VM_WRITE)) != (VM_SHARED|VM_WRITE)) 218 if ((vma->vm_flags & (VM_SHARED|VM_WRITE)) != (VM_SHARED|VM_WRITE))
218 return -EACCES; 219 return -EACCES;
219 220
220 mapping = vma->vm_file->f_mapping;
221
222 offset = (loff_t)(start - vma->vm_start) 221 offset = (loff_t)(start - vma->vm_start)
223 + ((loff_t)vma->vm_pgoff << PAGE_SHIFT); 222 + ((loff_t)vma->vm_pgoff << PAGE_SHIFT);
224 endoff = (loff_t)(end - vma->vm_start - 1)
225 + ((loff_t)vma->vm_pgoff << PAGE_SHIFT);
226 223
227 /* vmtruncate_range needs to take i_mutex */ 224 /* filesystem's fallocate may need to take i_mutex */
228 up_read(&current->mm->mmap_sem); 225 up_read(&current->mm->mmap_sem);
229 error = vmtruncate_range(mapping->host, offset, endoff); 226 error = do_fallocate(vma->vm_file,
227 FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE,
228 offset, end - start);
230 down_read(&current->mm->mmap_sem); 229 down_read(&current->mm->mmap_sem);
231 return error; 230 return error;
232} 231}
diff --git a/mm/memblock.c b/mm/memblock.c
index a44eab3157f8..952123eba433 100644
--- a/mm/memblock.c
+++ b/mm/memblock.c
@@ -37,6 +37,8 @@ struct memblock memblock __initdata_memblock = {
37 37
38int memblock_debug __initdata_memblock; 38int memblock_debug __initdata_memblock;
39static int memblock_can_resize __initdata_memblock; 39static int memblock_can_resize __initdata_memblock;
40static int memblock_memory_in_slab __initdata_memblock = 0;
41static int memblock_reserved_in_slab __initdata_memblock = 0;
40 42
41/* inline so we don't get a warning when pr_debug is compiled out */ 43/* inline so we don't get a warning when pr_debug is compiled out */
42static inline const char *memblock_type_name(struct memblock_type *type) 44static inline const char *memblock_type_name(struct memblock_type *type)
@@ -187,6 +189,7 @@ static int __init_memblock memblock_double_array(struct memblock_type *type)
187 struct memblock_region *new_array, *old_array; 189 struct memblock_region *new_array, *old_array;
188 phys_addr_t old_size, new_size, addr; 190 phys_addr_t old_size, new_size, addr;
189 int use_slab = slab_is_available(); 191 int use_slab = slab_is_available();
192 int *in_slab;
190 193
191 /* We don't allow resizing until we know about the reserved regions 194 /* We don't allow resizing until we know about the reserved regions
192 * of memory that aren't suitable for allocation 195 * of memory that aren't suitable for allocation
@@ -198,6 +201,12 @@ static int __init_memblock memblock_double_array(struct memblock_type *type)
198 old_size = type->max * sizeof(struct memblock_region); 201 old_size = type->max * sizeof(struct memblock_region);
199 new_size = old_size << 1; 202 new_size = old_size << 1;
200 203
204 /* Retrieve the slab flag */
205 if (type == &memblock.memory)
206 in_slab = &memblock_memory_in_slab;
207 else
208 in_slab = &memblock_reserved_in_slab;
209
201 /* Try to find some space for it. 210 /* Try to find some space for it.
202 * 211 *
203 * WARNING: We assume that either slab_is_available() and we use it or 212 * WARNING: We assume that either slab_is_available() and we use it or
@@ -212,14 +221,15 @@ static int __init_memblock memblock_double_array(struct memblock_type *type)
212 if (use_slab) { 221 if (use_slab) {
213 new_array = kmalloc(new_size, GFP_KERNEL); 222 new_array = kmalloc(new_size, GFP_KERNEL);
214 addr = new_array ? __pa(new_array) : 0; 223 addr = new_array ? __pa(new_array) : 0;
215 } else 224 } else {
216 addr = memblock_find_in_range(0, MEMBLOCK_ALLOC_ACCESSIBLE, new_size, sizeof(phys_addr_t)); 225 addr = memblock_find_in_range(0, MEMBLOCK_ALLOC_ACCESSIBLE, new_size, sizeof(phys_addr_t));
226 new_array = addr ? __va(addr) : 0;
227 }
217 if (!addr) { 228 if (!addr) {
218 pr_err("memblock: Failed to double %s array from %ld to %ld entries !\n", 229 pr_err("memblock: Failed to double %s array from %ld to %ld entries !\n",
219 memblock_type_name(type), type->max, type->max * 2); 230 memblock_type_name(type), type->max, type->max * 2);
220 return -1; 231 return -1;
221 } 232 }
222 new_array = __va(addr);
223 233
224 memblock_dbg("memblock: %s array is doubled to %ld at [%#010llx-%#010llx]", 234 memblock_dbg("memblock: %s array is doubled to %ld at [%#010llx-%#010llx]",
225 memblock_type_name(type), type->max * 2, (u64)addr, (u64)addr + new_size - 1); 235 memblock_type_name(type), type->max * 2, (u64)addr, (u64)addr + new_size - 1);
@@ -234,22 +244,24 @@ static int __init_memblock memblock_double_array(struct memblock_type *type)
234 type->regions = new_array; 244 type->regions = new_array;
235 type->max <<= 1; 245 type->max <<= 1;
236 246
237 /* If we use SLAB that's it, we are done */ 247 /* Free old array. We needn't free it if the array is the
238 if (use_slab) 248 * static one
239 return 0;
240
241 /* Add the new reserved region now. Should not fail ! */
242 BUG_ON(memblock_reserve(addr, new_size));
243
244 /* If the array wasn't our static init one, then free it. We only do
245 * that before SLAB is available as later on, we don't know whether
246 * to use kfree or free_bootmem_pages(). Shouldn't be a big deal
247 * anyways
248 */ 249 */
249 if (old_array != memblock_memory_init_regions && 250 if (*in_slab)
250 old_array != memblock_reserved_init_regions) 251 kfree(old_array);
252 else if (old_array != memblock_memory_init_regions &&
253 old_array != memblock_reserved_init_regions)
251 memblock_free(__pa(old_array), old_size); 254 memblock_free(__pa(old_array), old_size);
252 255
256 /* Reserve the new array if that comes from the memblock.
257 * Otherwise, we needn't do it
258 */
259 if (!use_slab)
260 BUG_ON(memblock_reserve(addr, new_size));
261
262 /* Update slab flag */
263 *in_slab = use_slab;
264
253 return 0; 265 return 0;
254} 266}
255 267
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index f342778a0c0a..ac35bccadb7b 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -59,7 +59,7 @@
59 59
60struct cgroup_subsys mem_cgroup_subsys __read_mostly; 60struct cgroup_subsys mem_cgroup_subsys __read_mostly;
61#define MEM_CGROUP_RECLAIM_RETRIES 5 61#define MEM_CGROUP_RECLAIM_RETRIES 5
62struct mem_cgroup *root_mem_cgroup __read_mostly; 62static struct mem_cgroup *root_mem_cgroup __read_mostly;
63 63
64#ifdef CONFIG_CGROUP_MEM_RES_CTLR_SWAP 64#ifdef CONFIG_CGROUP_MEM_RES_CTLR_SWAP
65/* Turned on only when memory cgroup is enabled && really_do_swap_account = 1 */ 65/* Turned on only when memory cgroup is enabled && really_do_swap_account = 1 */
@@ -73,7 +73,7 @@ static int really_do_swap_account __initdata = 0;
73#endif 73#endif
74 74
75#else 75#else
76#define do_swap_account (0) 76#define do_swap_account 0
77#endif 77#endif
78 78
79 79
@@ -88,18 +88,31 @@ enum mem_cgroup_stat_index {
88 MEM_CGROUP_STAT_RSS, /* # of pages charged as anon rss */ 88 MEM_CGROUP_STAT_RSS, /* # of pages charged as anon rss */
89 MEM_CGROUP_STAT_FILE_MAPPED, /* # of pages charged as file rss */ 89 MEM_CGROUP_STAT_FILE_MAPPED, /* # of pages charged as file rss */
90 MEM_CGROUP_STAT_SWAPOUT, /* # of pages, swapped out */ 90 MEM_CGROUP_STAT_SWAPOUT, /* # of pages, swapped out */
91 MEM_CGROUP_STAT_DATA, /* end of data requires synchronization */
92 MEM_CGROUP_STAT_NSTATS, 91 MEM_CGROUP_STAT_NSTATS,
93}; 92};
94 93
94static const char * const mem_cgroup_stat_names[] = {
95 "cache",
96 "rss",
97 "mapped_file",
98 "swap",
99};
100
95enum mem_cgroup_events_index { 101enum mem_cgroup_events_index {
96 MEM_CGROUP_EVENTS_PGPGIN, /* # of pages paged in */ 102 MEM_CGROUP_EVENTS_PGPGIN, /* # of pages paged in */
97 MEM_CGROUP_EVENTS_PGPGOUT, /* # of pages paged out */ 103 MEM_CGROUP_EVENTS_PGPGOUT, /* # of pages paged out */
98 MEM_CGROUP_EVENTS_COUNT, /* # of pages paged in/out */
99 MEM_CGROUP_EVENTS_PGFAULT, /* # of page-faults */ 104 MEM_CGROUP_EVENTS_PGFAULT, /* # of page-faults */
100 MEM_CGROUP_EVENTS_PGMAJFAULT, /* # of major page-faults */ 105 MEM_CGROUP_EVENTS_PGMAJFAULT, /* # of major page-faults */
101 MEM_CGROUP_EVENTS_NSTATS, 106 MEM_CGROUP_EVENTS_NSTATS,
102}; 107};
108
109static const char * const mem_cgroup_events_names[] = {
110 "pgpgin",
111 "pgpgout",
112 "pgfault",
113 "pgmajfault",
114};
115
103/* 116/*
104 * Per memcg event counter is incremented at every pagein/pageout. With THP, 117 * Per memcg event counter is incremented at every pagein/pageout. With THP,
105 * it will be incremated by the number of pages. This counter is used for 118 * it will be incremated by the number of pages. This counter is used for
@@ -112,13 +125,14 @@ enum mem_cgroup_events_target {
112 MEM_CGROUP_TARGET_NUMAINFO, 125 MEM_CGROUP_TARGET_NUMAINFO,
113 MEM_CGROUP_NTARGETS, 126 MEM_CGROUP_NTARGETS,
114}; 127};
115#define THRESHOLDS_EVENTS_TARGET (128) 128#define THRESHOLDS_EVENTS_TARGET 128
116#define SOFTLIMIT_EVENTS_TARGET (1024) 129#define SOFTLIMIT_EVENTS_TARGET 1024
117#define NUMAINFO_EVENTS_TARGET (1024) 130#define NUMAINFO_EVENTS_TARGET 1024
118 131
119struct mem_cgroup_stat_cpu { 132struct mem_cgroup_stat_cpu {
120 long count[MEM_CGROUP_STAT_NSTATS]; 133 long count[MEM_CGROUP_STAT_NSTATS];
121 unsigned long events[MEM_CGROUP_EVENTS_NSTATS]; 134 unsigned long events[MEM_CGROUP_EVENTS_NSTATS];
135 unsigned long nr_page_events;
122 unsigned long targets[MEM_CGROUP_NTARGETS]; 136 unsigned long targets[MEM_CGROUP_NTARGETS];
123}; 137};
124 138
@@ -138,7 +152,6 @@ struct mem_cgroup_per_zone {
138 152
139 struct mem_cgroup_reclaim_iter reclaim_iter[DEF_PRIORITY + 1]; 153 struct mem_cgroup_reclaim_iter reclaim_iter[DEF_PRIORITY + 1];
140 154
141 struct zone_reclaim_stat reclaim_stat;
142 struct rb_node tree_node; /* RB tree node */ 155 struct rb_node tree_node; /* RB tree node */
143 unsigned long long usage_in_excess;/* Set to the value by which */ 156 unsigned long long usage_in_excess;/* Set to the value by which */
144 /* the soft limit is exceeded*/ 157 /* the soft limit is exceeded*/
@@ -182,7 +195,7 @@ struct mem_cgroup_threshold {
182 195
183/* For threshold */ 196/* For threshold */
184struct mem_cgroup_threshold_ary { 197struct mem_cgroup_threshold_ary {
185 /* An array index points to threshold just below usage. */ 198 /* An array index points to threshold just below or equal to usage. */
186 int current_threshold; 199 int current_threshold;
187 /* Size of entries[] */ 200 /* Size of entries[] */
188 unsigned int size; 201 unsigned int size;
@@ -245,8 +258,8 @@ struct mem_cgroup {
245 */ 258 */
246 struct rcu_head rcu_freeing; 259 struct rcu_head rcu_freeing;
247 /* 260 /*
248 * But when using vfree(), that cannot be done at 261 * We also need some space for a worker in deferred freeing.
249 * interrupt time, so we must then queue the work. 262 * By the time we call it, rcu_freeing is no longer in use.
250 */ 263 */
251 struct work_struct work_freeing; 264 struct work_struct work_freeing;
252 }; 265 };
@@ -305,7 +318,7 @@ struct mem_cgroup {
305 /* 318 /*
306 * percpu counter. 319 * percpu counter.
307 */ 320 */
308 struct mem_cgroup_stat_cpu *stat; 321 struct mem_cgroup_stat_cpu __percpu *stat;
309 /* 322 /*
310 * used when a cpu is offlined or other synchronizations 323 * used when a cpu is offlined or other synchronizations
311 * See mem_cgroup_read_stat(). 324 * See mem_cgroup_read_stat().
@@ -360,8 +373,8 @@ static bool move_file(void)
360 * Maximum loops in mem_cgroup_hierarchical_reclaim(), used for soft 373 * Maximum loops in mem_cgroup_hierarchical_reclaim(), used for soft
361 * limit reclaim to prevent infinite loops, if they ever occur. 374 * limit reclaim to prevent infinite loops, if they ever occur.
362 */ 375 */
363#define MEM_CGROUP_MAX_RECLAIM_LOOPS (100) 376#define MEM_CGROUP_MAX_RECLAIM_LOOPS 100
364#define MEM_CGROUP_MAX_SOFT_LIMIT_RECLAIM_LOOPS (2) 377#define MEM_CGROUP_MAX_SOFT_LIMIT_RECLAIM_LOOPS 2
365 378
366enum charge_type { 379enum charge_type {
367 MEM_CGROUP_CHARGE_TYPE_CACHE = 0, 380 MEM_CGROUP_CHARGE_TYPE_CACHE = 0,
@@ -377,8 +390,8 @@ enum charge_type {
377#define _MEM (0) 390#define _MEM (0)
378#define _MEMSWAP (1) 391#define _MEMSWAP (1)
379#define _OOM_TYPE (2) 392#define _OOM_TYPE (2)
380#define MEMFILE_PRIVATE(x, val) (((x) << 16) | (val)) 393#define MEMFILE_PRIVATE(x, val) ((x) << 16 | (val))
381#define MEMFILE_TYPE(val) (((val) >> 16) & 0xffff) 394#define MEMFILE_TYPE(val) ((val) >> 16 & 0xffff)
382#define MEMFILE_ATTR(val) ((val) & 0xffff) 395#define MEMFILE_ATTR(val) ((val) & 0xffff)
383/* Used for OOM nofiier */ 396/* Used for OOM nofiier */
384#define OOM_CONTROL (0) 397#define OOM_CONTROL (0)
@@ -404,6 +417,7 @@ void sock_update_memcg(struct sock *sk)
404{ 417{
405 if (mem_cgroup_sockets_enabled) { 418 if (mem_cgroup_sockets_enabled) {
406 struct mem_cgroup *memcg; 419 struct mem_cgroup *memcg;
420 struct cg_proto *cg_proto;
407 421
408 BUG_ON(!sk->sk_prot->proto_cgroup); 422 BUG_ON(!sk->sk_prot->proto_cgroup);
409 423
@@ -423,9 +437,10 @@ void sock_update_memcg(struct sock *sk)
423 437
424 rcu_read_lock(); 438 rcu_read_lock();
425 memcg = mem_cgroup_from_task(current); 439 memcg = mem_cgroup_from_task(current);
426 if (!mem_cgroup_is_root(memcg)) { 440 cg_proto = sk->sk_prot->proto_cgroup(memcg);
441 if (!mem_cgroup_is_root(memcg) && memcg_proto_active(cg_proto)) {
427 mem_cgroup_get(memcg); 442 mem_cgroup_get(memcg);
428 sk->sk_cgrp = sk->sk_prot->proto_cgroup(memcg); 443 sk->sk_cgrp = cg_proto;
429 } 444 }
430 rcu_read_unlock(); 445 rcu_read_unlock();
431 } 446 }
@@ -454,6 +469,19 @@ EXPORT_SYMBOL(tcp_proto_cgroup);
454#endif /* CONFIG_INET */ 469#endif /* CONFIG_INET */
455#endif /* CONFIG_CGROUP_MEM_RES_CTLR_KMEM */ 470#endif /* CONFIG_CGROUP_MEM_RES_CTLR_KMEM */
456 471
472#if defined(CONFIG_INET) && defined(CONFIG_CGROUP_MEM_RES_CTLR_KMEM)
473static void disarm_sock_keys(struct mem_cgroup *memcg)
474{
475 if (!memcg_proto_activated(&memcg->tcp_mem.cg_proto))
476 return;
477 static_key_slow_dec(&memcg_socket_limit_enabled);
478}
479#else
480static void disarm_sock_keys(struct mem_cgroup *memcg)
481{
482}
483#endif
484
457static void drain_all_stock_async(struct mem_cgroup *memcg); 485static void drain_all_stock_async(struct mem_cgroup *memcg);
458 486
459static struct mem_cgroup_per_zone * 487static struct mem_cgroup_per_zone *
@@ -718,12 +746,21 @@ static void mem_cgroup_charge_statistics(struct mem_cgroup *memcg,
718 nr_pages = -nr_pages; /* for event */ 746 nr_pages = -nr_pages; /* for event */
719 } 747 }
720 748
721 __this_cpu_add(memcg->stat->events[MEM_CGROUP_EVENTS_COUNT], nr_pages); 749 __this_cpu_add(memcg->stat->nr_page_events, nr_pages);
722 750
723 preempt_enable(); 751 preempt_enable();
724} 752}
725 753
726unsigned long 754unsigned long
755mem_cgroup_get_lru_size(struct lruvec *lruvec, enum lru_list lru)
756{
757 struct mem_cgroup_per_zone *mz;
758
759 mz = container_of(lruvec, struct mem_cgroup_per_zone, lruvec);
760 return mz->lru_size[lru];
761}
762
763static unsigned long
727mem_cgroup_zone_nr_lru_pages(struct mem_cgroup *memcg, int nid, int zid, 764mem_cgroup_zone_nr_lru_pages(struct mem_cgroup *memcg, int nid, int zid,
728 unsigned int lru_mask) 765 unsigned int lru_mask)
729{ 766{
@@ -770,7 +807,7 @@ static bool mem_cgroup_event_ratelimit(struct mem_cgroup *memcg,
770{ 807{
771 unsigned long val, next; 808 unsigned long val, next;
772 809
773 val = __this_cpu_read(memcg->stat->events[MEM_CGROUP_EVENTS_COUNT]); 810 val = __this_cpu_read(memcg->stat->nr_page_events);
774 next = __this_cpu_read(memcg->stat->targets[target]); 811 next = __this_cpu_read(memcg->stat->targets[target]);
775 /* from time_after() in jiffies.h */ 812 /* from time_after() in jiffies.h */
776 if ((long)next - (long)val < 0) { 813 if ((long)next - (long)val < 0) {
@@ -1013,7 +1050,7 @@ EXPORT_SYMBOL(mem_cgroup_count_vm_event);
1013/** 1050/**
1014 * mem_cgroup_zone_lruvec - get the lru list vector for a zone and memcg 1051 * mem_cgroup_zone_lruvec - get the lru list vector for a zone and memcg
1015 * @zone: zone of the wanted lruvec 1052 * @zone: zone of the wanted lruvec
1016 * @mem: memcg of the wanted lruvec 1053 * @memcg: memcg of the wanted lruvec
1017 * 1054 *
1018 * Returns the lru list vector holding pages for the given @zone and 1055 * Returns the lru list vector holding pages for the given @zone and
1019 * @mem. This can be the global zone lruvec, if the memory controller 1056 * @mem. This can be the global zone lruvec, if the memory controller
@@ -1046,19 +1083,11 @@ struct lruvec *mem_cgroup_zone_lruvec(struct zone *zone,
1046 */ 1083 */
1047 1084
1048/** 1085/**
1049 * mem_cgroup_lru_add_list - account for adding an lru page and return lruvec 1086 * mem_cgroup_page_lruvec - return lruvec for adding an lru page
1050 * @zone: zone of the page
1051 * @page: the page 1087 * @page: the page
1052 * @lru: current lru 1088 * @zone: zone of the page
1053 *
1054 * This function accounts for @page being added to @lru, and returns
1055 * the lruvec for the given @zone and the memcg @page is charged to.
1056 *
1057 * The callsite is then responsible for physically linking the page to
1058 * the returned lruvec->lists[@lru].
1059 */ 1089 */
1060struct lruvec *mem_cgroup_lru_add_list(struct zone *zone, struct page *page, 1090struct lruvec *mem_cgroup_page_lruvec(struct page *page, struct zone *zone)
1061 enum lru_list lru)
1062{ 1091{
1063 struct mem_cgroup_per_zone *mz; 1092 struct mem_cgroup_per_zone *mz;
1064 struct mem_cgroup *memcg; 1093 struct mem_cgroup *memcg;
@@ -1071,7 +1100,7 @@ struct lruvec *mem_cgroup_lru_add_list(struct zone *zone, struct page *page,
1071 memcg = pc->mem_cgroup; 1100 memcg = pc->mem_cgroup;
1072 1101
1073 /* 1102 /*
1074 * Surreptitiously switch any uncharged page to root: 1103 * Surreptitiously switch any uncharged offlist page to root:
1075 * an uncharged page off lru does nothing to secure 1104 * an uncharged page off lru does nothing to secure
1076 * its former mem_cgroup from sudden removal. 1105 * its former mem_cgroup from sudden removal.
1077 * 1106 *
@@ -1079,85 +1108,60 @@ struct lruvec *mem_cgroup_lru_add_list(struct zone *zone, struct page *page,
1079 * under page_cgroup lock: between them, they make all uses 1108 * under page_cgroup lock: between them, they make all uses
1080 * of pc->mem_cgroup safe. 1109 * of pc->mem_cgroup safe.
1081 */ 1110 */
1082 if (!PageCgroupUsed(pc) && memcg != root_mem_cgroup) 1111 if (!PageLRU(page) && !PageCgroupUsed(pc) && memcg != root_mem_cgroup)
1083 pc->mem_cgroup = memcg = root_mem_cgroup; 1112 pc->mem_cgroup = memcg = root_mem_cgroup;
1084 1113
1085 mz = page_cgroup_zoneinfo(memcg, page); 1114 mz = page_cgroup_zoneinfo(memcg, page);
1086 /* compound_order() is stabilized through lru_lock */
1087 mz->lru_size[lru] += 1 << compound_order(page);
1088 return &mz->lruvec; 1115 return &mz->lruvec;
1089} 1116}
1090 1117
1091/** 1118/**
1092 * mem_cgroup_lru_del_list - account for removing an lru page 1119 * mem_cgroup_update_lru_size - account for adding or removing an lru page
1093 * @page: the page 1120 * @lruvec: mem_cgroup per zone lru vector
1094 * @lru: target lru 1121 * @lru: index of lru list the page is sitting on
1095 * 1122 * @nr_pages: positive when adding or negative when removing
1096 * This function accounts for @page being removed from @lru.
1097 * 1123 *
1098 * The callsite is then responsible for physically unlinking 1124 * This function must be called when a page is added to or removed from an
1099 * @page->lru. 1125 * lru list.
1100 */ 1126 */
1101void mem_cgroup_lru_del_list(struct page *page, enum lru_list lru) 1127void mem_cgroup_update_lru_size(struct lruvec *lruvec, enum lru_list lru,
1128 int nr_pages)
1102{ 1129{
1103 struct mem_cgroup_per_zone *mz; 1130 struct mem_cgroup_per_zone *mz;
1104 struct mem_cgroup *memcg; 1131 unsigned long *lru_size;
1105 struct page_cgroup *pc;
1106 1132
1107 if (mem_cgroup_disabled()) 1133 if (mem_cgroup_disabled())
1108 return; 1134 return;
1109 1135
1110 pc = lookup_page_cgroup(page); 1136 mz = container_of(lruvec, struct mem_cgroup_per_zone, lruvec);
1111 memcg = pc->mem_cgroup; 1137 lru_size = mz->lru_size + lru;
1112 VM_BUG_ON(!memcg); 1138 *lru_size += nr_pages;
1113 mz = page_cgroup_zoneinfo(memcg, page); 1139 VM_BUG_ON((long)(*lru_size) < 0);
1114 /* huge page split is done under lru_lock. so, we have no races. */
1115 VM_BUG_ON(mz->lru_size[lru] < (1 << compound_order(page)));
1116 mz->lru_size[lru] -= 1 << compound_order(page);
1117}
1118
1119void mem_cgroup_lru_del(struct page *page)
1120{
1121 mem_cgroup_lru_del_list(page, page_lru(page));
1122}
1123
1124/**
1125 * mem_cgroup_lru_move_lists - account for moving a page between lrus
1126 * @zone: zone of the page
1127 * @page: the page
1128 * @from: current lru
1129 * @to: target lru
1130 *
1131 * This function accounts for @page being moved between the lrus @from
1132 * and @to, and returns the lruvec for the given @zone and the memcg
1133 * @page is charged to.
1134 *
1135 * The callsite is then responsible for physically relinking
1136 * @page->lru to the returned lruvec->lists[@to].
1137 */
1138struct lruvec *mem_cgroup_lru_move_lists(struct zone *zone,
1139 struct page *page,
1140 enum lru_list from,
1141 enum lru_list to)
1142{
1143 /* XXX: Optimize this, especially for @from == @to */
1144 mem_cgroup_lru_del_list(page, from);
1145 return mem_cgroup_lru_add_list(zone, page, to);
1146} 1140}
1147 1141
1148/* 1142/*
1149 * Checks whether given mem is same or in the root_mem_cgroup's 1143 * Checks whether given mem is same or in the root_mem_cgroup's
1150 * hierarchy subtree 1144 * hierarchy subtree
1151 */ 1145 */
1146bool __mem_cgroup_same_or_subtree(const struct mem_cgroup *root_memcg,
1147 struct mem_cgroup *memcg)
1148{
1149 if (root_memcg == memcg)
1150 return true;
1151 if (!root_memcg->use_hierarchy)
1152 return false;
1153 return css_is_ancestor(&memcg->css, &root_memcg->css);
1154}
1155
1152static bool mem_cgroup_same_or_subtree(const struct mem_cgroup *root_memcg, 1156static bool mem_cgroup_same_or_subtree(const struct mem_cgroup *root_memcg,
1153 struct mem_cgroup *memcg) 1157 struct mem_cgroup *memcg)
1154{ 1158{
1155 if (root_memcg != memcg) { 1159 bool ret;
1156 return (root_memcg->use_hierarchy &&
1157 css_is_ancestor(&memcg->css, &root_memcg->css));
1158 }
1159 1160
1160 return true; 1161 rcu_read_lock();
1162 ret = __mem_cgroup_same_or_subtree(root_memcg, memcg);
1163 rcu_read_unlock();
1164 return ret;
1161} 1165}
1162 1166
1163int task_in_mem_cgroup(struct task_struct *task, const struct mem_cgroup *memcg) 1167int task_in_mem_cgroup(struct task_struct *task, const struct mem_cgroup *memcg)
@@ -1195,19 +1199,15 @@ int task_in_mem_cgroup(struct task_struct *task, const struct mem_cgroup *memcg)
1195 return ret; 1199 return ret;
1196} 1200}
1197 1201
1198int mem_cgroup_inactive_anon_is_low(struct mem_cgroup *memcg, struct zone *zone) 1202int mem_cgroup_inactive_anon_is_low(struct lruvec *lruvec)
1199{ 1203{
1200 unsigned long inactive_ratio; 1204 unsigned long inactive_ratio;
1201 int nid = zone_to_nid(zone);
1202 int zid = zone_idx(zone);
1203 unsigned long inactive; 1205 unsigned long inactive;
1204 unsigned long active; 1206 unsigned long active;
1205 unsigned long gb; 1207 unsigned long gb;
1206 1208
1207 inactive = mem_cgroup_zone_nr_lru_pages(memcg, nid, zid, 1209 inactive = mem_cgroup_get_lru_size(lruvec, LRU_INACTIVE_ANON);
1208 BIT(LRU_INACTIVE_ANON)); 1210 active = mem_cgroup_get_lru_size(lruvec, LRU_ACTIVE_ANON);
1209 active = mem_cgroup_zone_nr_lru_pages(memcg, nid, zid,
1210 BIT(LRU_ACTIVE_ANON));
1211 1211
1212 gb = (inactive + active) >> (30 - PAGE_SHIFT); 1212 gb = (inactive + active) >> (30 - PAGE_SHIFT);
1213 if (gb) 1213 if (gb)
@@ -1218,49 +1218,17 @@ int mem_cgroup_inactive_anon_is_low(struct mem_cgroup *memcg, struct zone *zone)
1218 return inactive * inactive_ratio < active; 1218 return inactive * inactive_ratio < active;
1219} 1219}
1220 1220
1221int mem_cgroup_inactive_file_is_low(struct mem_cgroup *memcg, struct zone *zone) 1221int mem_cgroup_inactive_file_is_low(struct lruvec *lruvec)
1222{ 1222{
1223 unsigned long active; 1223 unsigned long active;
1224 unsigned long inactive; 1224 unsigned long inactive;
1225 int zid = zone_idx(zone);
1226 int nid = zone_to_nid(zone);
1227 1225
1228 inactive = mem_cgroup_zone_nr_lru_pages(memcg, nid, zid, 1226 inactive = mem_cgroup_get_lru_size(lruvec, LRU_INACTIVE_FILE);
1229 BIT(LRU_INACTIVE_FILE)); 1227 active = mem_cgroup_get_lru_size(lruvec, LRU_ACTIVE_FILE);
1230 active = mem_cgroup_zone_nr_lru_pages(memcg, nid, zid,
1231 BIT(LRU_ACTIVE_FILE));
1232 1228
1233 return (active > inactive); 1229 return (active > inactive);
1234} 1230}
1235 1231
1236struct zone_reclaim_stat *mem_cgroup_get_reclaim_stat(struct mem_cgroup *memcg,
1237 struct zone *zone)
1238{
1239 int nid = zone_to_nid(zone);
1240 int zid = zone_idx(zone);
1241 struct mem_cgroup_per_zone *mz = mem_cgroup_zoneinfo(memcg, nid, zid);
1242
1243 return &mz->reclaim_stat;
1244}
1245
1246struct zone_reclaim_stat *
1247mem_cgroup_get_reclaim_stat_from_page(struct page *page)
1248{
1249 struct page_cgroup *pc;
1250 struct mem_cgroup_per_zone *mz;
1251
1252 if (mem_cgroup_disabled())
1253 return NULL;
1254
1255 pc = lookup_page_cgroup(page);
1256 if (!PageCgroupUsed(pc))
1257 return NULL;
1258 /* Ensure pc->mem_cgroup is visible after reading PCG_USED. */
1259 smp_rmb();
1260 mz = page_cgroup_zoneinfo(pc->mem_cgroup, page);
1261 return &mz->reclaim_stat;
1262}
1263
1264#define mem_cgroup_from_res_counter(counter, member) \ 1232#define mem_cgroup_from_res_counter(counter, member) \
1265 container_of(counter, struct mem_cgroup, member) 1233 container_of(counter, struct mem_cgroup, member)
1266 1234
@@ -1634,7 +1602,7 @@ int mem_cgroup_select_victim_node(struct mem_cgroup *memcg)
1634 * unused nodes. But scan_nodes is lazily updated and may not cotain 1602 * unused nodes. But scan_nodes is lazily updated and may not cotain
1635 * enough new information. We need to do double check. 1603 * enough new information. We need to do double check.
1636 */ 1604 */
1637bool mem_cgroup_reclaimable(struct mem_cgroup *memcg, bool noswap) 1605static bool mem_cgroup_reclaimable(struct mem_cgroup *memcg, bool noswap)
1638{ 1606{
1639 int nid; 1607 int nid;
1640 1608
@@ -1669,7 +1637,7 @@ int mem_cgroup_select_victim_node(struct mem_cgroup *memcg)
1669 return 0; 1637 return 0;
1670} 1638}
1671 1639
1672bool mem_cgroup_reclaimable(struct mem_cgroup *memcg, bool noswap) 1640static bool mem_cgroup_reclaimable(struct mem_cgroup *memcg, bool noswap)
1673{ 1641{
1674 return test_mem_cgroup_node_reclaimable(memcg, 0, noswap); 1642 return test_mem_cgroup_node_reclaimable(memcg, 0, noswap);
1675} 1643}
@@ -1843,7 +1811,8 @@ static void memcg_oom_recover(struct mem_cgroup *memcg)
1843/* 1811/*
1844 * try to call OOM killer. returns false if we should exit memory-reclaim loop. 1812 * try to call OOM killer. returns false if we should exit memory-reclaim loop.
1845 */ 1813 */
1846bool mem_cgroup_handle_oom(struct mem_cgroup *memcg, gfp_t mask, int order) 1814static bool mem_cgroup_handle_oom(struct mem_cgroup *memcg, gfp_t mask,
1815 int order)
1847{ 1816{
1848 struct oom_wait_info owait; 1817 struct oom_wait_info owait;
1849 bool locked, need_to_kill; 1818 bool locked, need_to_kill;
@@ -1992,7 +1961,7 @@ struct memcg_stock_pcp {
1992 unsigned int nr_pages; 1961 unsigned int nr_pages;
1993 struct work_struct work; 1962 struct work_struct work;
1994 unsigned long flags; 1963 unsigned long flags;
1995#define FLUSHING_CACHED_CHARGE (0) 1964#define FLUSHING_CACHED_CHARGE 0
1996}; 1965};
1997static DEFINE_PER_CPU(struct memcg_stock_pcp, memcg_stock); 1966static DEFINE_PER_CPU(struct memcg_stock_pcp, memcg_stock);
1998static DEFINE_MUTEX(percpu_charge_mutex); 1967static DEFINE_MUTEX(percpu_charge_mutex);
@@ -2139,7 +2108,7 @@ static void mem_cgroup_drain_pcp_counter(struct mem_cgroup *memcg, int cpu)
2139 int i; 2108 int i;
2140 2109
2141 spin_lock(&memcg->pcp_counter_lock); 2110 spin_lock(&memcg->pcp_counter_lock);
2142 for (i = 0; i < MEM_CGROUP_STAT_DATA; i++) { 2111 for (i = 0; i < MEM_CGROUP_STAT_NSTATS; i++) {
2143 long x = per_cpu(memcg->stat->count[i], cpu); 2112 long x = per_cpu(memcg->stat->count[i], cpu);
2144 2113
2145 per_cpu(memcg->stat->count[i], cpu) = 0; 2114 per_cpu(memcg->stat->count[i], cpu) = 0;
@@ -2427,6 +2396,24 @@ static void __mem_cgroup_cancel_charge(struct mem_cgroup *memcg,
2427} 2396}
2428 2397
2429/* 2398/*
2399 * Cancel chrages in this cgroup....doesn't propagate to parent cgroup.
2400 * This is useful when moving usage to parent cgroup.
2401 */
2402static void __mem_cgroup_cancel_local_charge(struct mem_cgroup *memcg,
2403 unsigned int nr_pages)
2404{
2405 unsigned long bytes = nr_pages * PAGE_SIZE;
2406
2407 if (mem_cgroup_is_root(memcg))
2408 return;
2409
2410 res_counter_uncharge_until(&memcg->res, memcg->res.parent, bytes);
2411 if (do_swap_account)
2412 res_counter_uncharge_until(&memcg->memsw,
2413 memcg->memsw.parent, bytes);
2414}
2415
2416/*
2430 * A helper function to get mem_cgroup from ID. must be called under 2417 * A helper function to get mem_cgroup from ID. must be called under
2431 * rcu_read_lock(). The caller must check css_is_removed() or some if 2418 * rcu_read_lock(). The caller must check css_is_removed() or some if
2432 * it's concern. (dropping refcnt from swap can be called against removed 2419 * it's concern. (dropping refcnt from swap can be called against removed
@@ -2481,6 +2468,7 @@ static void __mem_cgroup_commit_charge(struct mem_cgroup *memcg,
2481{ 2468{
2482 struct page_cgroup *pc = lookup_page_cgroup(page); 2469 struct page_cgroup *pc = lookup_page_cgroup(page);
2483 struct zone *uninitialized_var(zone); 2470 struct zone *uninitialized_var(zone);
2471 struct lruvec *lruvec;
2484 bool was_on_lru = false; 2472 bool was_on_lru = false;
2485 bool anon; 2473 bool anon;
2486 2474
@@ -2503,8 +2491,9 @@ static void __mem_cgroup_commit_charge(struct mem_cgroup *memcg,
2503 zone = page_zone(page); 2491 zone = page_zone(page);
2504 spin_lock_irq(&zone->lru_lock); 2492 spin_lock_irq(&zone->lru_lock);
2505 if (PageLRU(page)) { 2493 if (PageLRU(page)) {
2494 lruvec = mem_cgroup_zone_lruvec(zone, pc->mem_cgroup);
2506 ClearPageLRU(page); 2495 ClearPageLRU(page);
2507 del_page_from_lru_list(zone, page, page_lru(page)); 2496 del_page_from_lru_list(page, lruvec, page_lru(page));
2508 was_on_lru = true; 2497 was_on_lru = true;
2509 } 2498 }
2510 } 2499 }
@@ -2522,9 +2511,10 @@ static void __mem_cgroup_commit_charge(struct mem_cgroup *memcg,
2522 2511
2523 if (lrucare) { 2512 if (lrucare) {
2524 if (was_on_lru) { 2513 if (was_on_lru) {
2514 lruvec = mem_cgroup_zone_lruvec(zone, pc->mem_cgroup);
2525 VM_BUG_ON(PageLRU(page)); 2515 VM_BUG_ON(PageLRU(page));
2526 SetPageLRU(page); 2516 SetPageLRU(page);
2527 add_page_to_lru_list(zone, page, page_lru(page)); 2517 add_page_to_lru_list(page, lruvec, page_lru(page));
2528 } 2518 }
2529 spin_unlock_irq(&zone->lru_lock); 2519 spin_unlock_irq(&zone->lru_lock);
2530 } 2520 }
@@ -2547,7 +2537,7 @@ static void __mem_cgroup_commit_charge(struct mem_cgroup *memcg,
2547 2537
2548#ifdef CONFIG_TRANSPARENT_HUGEPAGE 2538#ifdef CONFIG_TRANSPARENT_HUGEPAGE
2549 2539
2550#define PCGF_NOCOPY_AT_SPLIT ((1 << PCG_LOCK) | (1 << PCG_MIGRATION)) 2540#define PCGF_NOCOPY_AT_SPLIT (1 << PCG_LOCK | 1 << PCG_MIGRATION)
2551/* 2541/*
2552 * Because tail pages are not marked as "used", set it. We're under 2542 * Because tail pages are not marked as "used", set it. We're under
2553 * zone->lru_lock, 'splitting on pmd' and compound_lock. 2543 * zone->lru_lock, 'splitting on pmd' and compound_lock.
@@ -2578,23 +2568,19 @@ void mem_cgroup_split_huge_fixup(struct page *head)
2578 * @pc: page_cgroup of the page. 2568 * @pc: page_cgroup of the page.
2579 * @from: mem_cgroup which the page is moved from. 2569 * @from: mem_cgroup which the page is moved from.
2580 * @to: mem_cgroup which the page is moved to. @from != @to. 2570 * @to: mem_cgroup which the page is moved to. @from != @to.
2581 * @uncharge: whether we should call uncharge and css_put against @from.
2582 * 2571 *
2583 * The caller must confirm following. 2572 * The caller must confirm following.
2584 * - page is not on LRU (isolate_page() is useful.) 2573 * - page is not on LRU (isolate_page() is useful.)
2585 * - compound_lock is held when nr_pages > 1 2574 * - compound_lock is held when nr_pages > 1
2586 * 2575 *
2587 * This function doesn't do "charge" nor css_get to new cgroup. It should be 2576 * This function doesn't do "charge" to new cgroup and doesn't do "uncharge"
2588 * done by a caller(__mem_cgroup_try_charge would be useful). If @uncharge is 2577 * from old cgroup.
2589 * true, this function does "uncharge" from old cgroup, but it doesn't if
2590 * @uncharge is false, so a caller should do "uncharge".
2591 */ 2578 */
2592static int mem_cgroup_move_account(struct page *page, 2579static int mem_cgroup_move_account(struct page *page,
2593 unsigned int nr_pages, 2580 unsigned int nr_pages,
2594 struct page_cgroup *pc, 2581 struct page_cgroup *pc,
2595 struct mem_cgroup *from, 2582 struct mem_cgroup *from,
2596 struct mem_cgroup *to, 2583 struct mem_cgroup *to)
2597 bool uncharge)
2598{ 2584{
2599 unsigned long flags; 2585 unsigned long flags;
2600 int ret; 2586 int ret;
@@ -2628,9 +2614,6 @@ static int mem_cgroup_move_account(struct page *page,
2628 preempt_enable(); 2614 preempt_enable();
2629 } 2615 }
2630 mem_cgroup_charge_statistics(from, anon, -nr_pages); 2616 mem_cgroup_charge_statistics(from, anon, -nr_pages);
2631 if (uncharge)
2632 /* This is not "cancel", but cancel_charge does all we need. */
2633 __mem_cgroup_cancel_charge(from, nr_pages);
2634 2617
2635 /* caller should have done css_get */ 2618 /* caller should have done css_get */
2636 pc->mem_cgroup = to; 2619 pc->mem_cgroup = to;
@@ -2664,15 +2647,13 @@ static int mem_cgroup_move_parent(struct page *page,
2664 struct mem_cgroup *child, 2647 struct mem_cgroup *child,
2665 gfp_t gfp_mask) 2648 gfp_t gfp_mask)
2666{ 2649{
2667 struct cgroup *cg = child->css.cgroup;
2668 struct cgroup *pcg = cg->parent;
2669 struct mem_cgroup *parent; 2650 struct mem_cgroup *parent;
2670 unsigned int nr_pages; 2651 unsigned int nr_pages;
2671 unsigned long uninitialized_var(flags); 2652 unsigned long uninitialized_var(flags);
2672 int ret; 2653 int ret;
2673 2654
2674 /* Is ROOT ? */ 2655 /* Is ROOT ? */
2675 if (!pcg) 2656 if (mem_cgroup_is_root(child))
2676 return -EINVAL; 2657 return -EINVAL;
2677 2658
2678 ret = -EBUSY; 2659 ret = -EBUSY;
@@ -2683,21 +2664,23 @@ static int mem_cgroup_move_parent(struct page *page,
2683 2664
2684 nr_pages = hpage_nr_pages(page); 2665 nr_pages = hpage_nr_pages(page);
2685 2666
2686 parent = mem_cgroup_from_cont(pcg); 2667 parent = parent_mem_cgroup(child);
2687 ret = __mem_cgroup_try_charge(NULL, gfp_mask, nr_pages, &parent, false); 2668 /*
2688 if (ret) 2669 * If no parent, move charges to root cgroup.
2689 goto put_back; 2670 */
2671 if (!parent)
2672 parent = root_mem_cgroup;
2690 2673
2691 if (nr_pages > 1) 2674 if (nr_pages > 1)
2692 flags = compound_lock_irqsave(page); 2675 flags = compound_lock_irqsave(page);
2693 2676
2694 ret = mem_cgroup_move_account(page, nr_pages, pc, child, parent, true); 2677 ret = mem_cgroup_move_account(page, nr_pages,
2695 if (ret) 2678 pc, child, parent);
2696 __mem_cgroup_cancel_charge(parent, nr_pages); 2679 if (!ret)
2680 __mem_cgroup_cancel_local_charge(child, nr_pages);
2697 2681
2698 if (nr_pages > 1) 2682 if (nr_pages > 1)
2699 compound_unlock_irqrestore(page, flags); 2683 compound_unlock_irqrestore(page, flags);
2700put_back:
2701 putback_lru_page(page); 2684 putback_lru_page(page);
2702put: 2685put:
2703 put_page(page); 2686 put_page(page);
@@ -2845,24 +2828,7 @@ __mem_cgroup_commit_charge_swapin(struct page *page, struct mem_cgroup *memcg,
2845 */ 2828 */
2846 if (do_swap_account && PageSwapCache(page)) { 2829 if (do_swap_account && PageSwapCache(page)) {
2847 swp_entry_t ent = {.val = page_private(page)}; 2830 swp_entry_t ent = {.val = page_private(page)};
2848 struct mem_cgroup *swap_memcg; 2831 mem_cgroup_uncharge_swap(ent);
2849 unsigned short id;
2850
2851 id = swap_cgroup_record(ent, 0);
2852 rcu_read_lock();
2853 swap_memcg = mem_cgroup_lookup(id);
2854 if (swap_memcg) {
2855 /*
2856 * This recorded memcg can be obsolete one. So, avoid
2857 * calling css_tryget
2858 */
2859 if (!mem_cgroup_is_root(swap_memcg))
2860 res_counter_uncharge(&swap_memcg->memsw,
2861 PAGE_SIZE);
2862 mem_cgroup_swap_statistics(swap_memcg, false);
2863 mem_cgroup_put(swap_memcg);
2864 }
2865 rcu_read_unlock();
2866 } 2832 }
2867 /* 2833 /*
2868 * At swapin, we may charge account against cgroup which has no tasks. 2834 * At swapin, we may charge account against cgroup which has no tasks.
@@ -3155,7 +3121,6 @@ void mem_cgroup_uncharge_swap(swp_entry_t ent)
3155 * @entry: swap entry to be moved 3121 * @entry: swap entry to be moved
3156 * @from: mem_cgroup which the entry is moved from 3122 * @from: mem_cgroup which the entry is moved from
3157 * @to: mem_cgroup which the entry is moved to 3123 * @to: mem_cgroup which the entry is moved to
3158 * @need_fixup: whether we should fixup res_counters and refcounts.
3159 * 3124 *
3160 * It succeeds only when the swap_cgroup's record for this entry is the same 3125 * It succeeds only when the swap_cgroup's record for this entry is the same
3161 * as the mem_cgroup's id of @from. 3126 * as the mem_cgroup's id of @from.
@@ -3166,7 +3131,7 @@ void mem_cgroup_uncharge_swap(swp_entry_t ent)
3166 * both res and memsw, and called css_get(). 3131 * both res and memsw, and called css_get().
3167 */ 3132 */
3168static int mem_cgroup_move_swap_account(swp_entry_t entry, 3133static int mem_cgroup_move_swap_account(swp_entry_t entry,
3169 struct mem_cgroup *from, struct mem_cgroup *to, bool need_fixup) 3134 struct mem_cgroup *from, struct mem_cgroup *to)
3170{ 3135{
3171 unsigned short old_id, new_id; 3136 unsigned short old_id, new_id;
3172 3137
@@ -3185,24 +3150,13 @@ static int mem_cgroup_move_swap_account(swp_entry_t entry,
3185 * swap-in, the refcount of @to might be decreased to 0. 3150 * swap-in, the refcount of @to might be decreased to 0.
3186 */ 3151 */
3187 mem_cgroup_get(to); 3152 mem_cgroup_get(to);
3188 if (need_fixup) {
3189 if (!mem_cgroup_is_root(from))
3190 res_counter_uncharge(&from->memsw, PAGE_SIZE);
3191 mem_cgroup_put(from);
3192 /*
3193 * we charged both to->res and to->memsw, so we should
3194 * uncharge to->res.
3195 */
3196 if (!mem_cgroup_is_root(to))
3197 res_counter_uncharge(&to->res, PAGE_SIZE);
3198 }
3199 return 0; 3153 return 0;
3200 } 3154 }
3201 return -EINVAL; 3155 return -EINVAL;
3202} 3156}
3203#else 3157#else
3204static inline int mem_cgroup_move_swap_account(swp_entry_t entry, 3158static inline int mem_cgroup_move_swap_account(swp_entry_t entry,
3205 struct mem_cgroup *from, struct mem_cgroup *to, bool need_fixup) 3159 struct mem_cgroup *from, struct mem_cgroup *to)
3206{ 3160{
3207 return -EINVAL; 3161 return -EINVAL;
3208} 3162}
@@ -3363,7 +3317,7 @@ void mem_cgroup_end_migration(struct mem_cgroup *memcg,
3363void mem_cgroup_replace_page_cache(struct page *oldpage, 3317void mem_cgroup_replace_page_cache(struct page *oldpage,
3364 struct page *newpage) 3318 struct page *newpage)
3365{ 3319{
3366 struct mem_cgroup *memcg; 3320 struct mem_cgroup *memcg = NULL;
3367 struct page_cgroup *pc; 3321 struct page_cgroup *pc;
3368 enum charge_type type = MEM_CGROUP_CHARGE_TYPE_CACHE; 3322 enum charge_type type = MEM_CGROUP_CHARGE_TYPE_CACHE;
3369 3323
@@ -3373,11 +3327,20 @@ void mem_cgroup_replace_page_cache(struct page *oldpage,
3373 pc = lookup_page_cgroup(oldpage); 3327 pc = lookup_page_cgroup(oldpage);
3374 /* fix accounting on old pages */ 3328 /* fix accounting on old pages */
3375 lock_page_cgroup(pc); 3329 lock_page_cgroup(pc);
3376 memcg = pc->mem_cgroup; 3330 if (PageCgroupUsed(pc)) {
3377 mem_cgroup_charge_statistics(memcg, false, -1); 3331 memcg = pc->mem_cgroup;
3378 ClearPageCgroupUsed(pc); 3332 mem_cgroup_charge_statistics(memcg, false, -1);
3333 ClearPageCgroupUsed(pc);
3334 }
3379 unlock_page_cgroup(pc); 3335 unlock_page_cgroup(pc);
3380 3336
3337 /*
3338 * When called from shmem_replace_page(), in some cases the
3339 * oldpage has already been charged, and in some cases not.
3340 */
3341 if (!memcg)
3342 return;
3343
3381 if (PageSwapBacked(oldpage)) 3344 if (PageSwapBacked(oldpage))
3382 type = MEM_CGROUP_CHARGE_TYPE_SHMEM; 3345 type = MEM_CGROUP_CHARGE_TYPE_SHMEM;
3383 3346
@@ -3793,7 +3756,7 @@ try_to_free:
3793 goto move_account; 3756 goto move_account;
3794} 3757}
3795 3758
3796int mem_cgroup_force_empty_write(struct cgroup *cont, unsigned int event) 3759static int mem_cgroup_force_empty_write(struct cgroup *cont, unsigned int event)
3797{ 3760{
3798 return mem_cgroup_force_empty(mem_cgroup_from_cont(cont), true); 3761 return mem_cgroup_force_empty(mem_cgroup_from_cont(cont), true);
3799} 3762}
@@ -4051,103 +4014,13 @@ static int mem_cgroup_move_charge_write(struct cgroup *cgrp,
4051} 4014}
4052#endif 4015#endif
4053 4016
4054
4055/* For read statistics */
4056enum {
4057 MCS_CACHE,
4058 MCS_RSS,
4059 MCS_FILE_MAPPED,
4060 MCS_PGPGIN,
4061 MCS_PGPGOUT,
4062 MCS_SWAP,
4063 MCS_PGFAULT,
4064 MCS_PGMAJFAULT,
4065 MCS_INACTIVE_ANON,
4066 MCS_ACTIVE_ANON,
4067 MCS_INACTIVE_FILE,
4068 MCS_ACTIVE_FILE,
4069 MCS_UNEVICTABLE,
4070 NR_MCS_STAT,
4071};
4072
4073struct mcs_total_stat {
4074 s64 stat[NR_MCS_STAT];
4075};
4076
4077struct {
4078 char *local_name;
4079 char *total_name;
4080} memcg_stat_strings[NR_MCS_STAT] = {
4081 {"cache", "total_cache"},
4082 {"rss", "total_rss"},
4083 {"mapped_file", "total_mapped_file"},
4084 {"pgpgin", "total_pgpgin"},
4085 {"pgpgout", "total_pgpgout"},
4086 {"swap", "total_swap"},
4087 {"pgfault", "total_pgfault"},
4088 {"pgmajfault", "total_pgmajfault"},
4089 {"inactive_anon", "total_inactive_anon"},
4090 {"active_anon", "total_active_anon"},
4091 {"inactive_file", "total_inactive_file"},
4092 {"active_file", "total_active_file"},
4093 {"unevictable", "total_unevictable"}
4094};
4095
4096
4097static void
4098mem_cgroup_get_local_stat(struct mem_cgroup *memcg, struct mcs_total_stat *s)
4099{
4100 s64 val;
4101
4102 /* per cpu stat */
4103 val = mem_cgroup_read_stat(memcg, MEM_CGROUP_STAT_CACHE);
4104 s->stat[MCS_CACHE] += val * PAGE_SIZE;
4105 val = mem_cgroup_read_stat(memcg, MEM_CGROUP_STAT_RSS);
4106 s->stat[MCS_RSS] += val * PAGE_SIZE;
4107 val = mem_cgroup_read_stat(memcg, MEM_CGROUP_STAT_FILE_MAPPED);
4108 s->stat[MCS_FILE_MAPPED] += val * PAGE_SIZE;
4109 val = mem_cgroup_read_events(memcg, MEM_CGROUP_EVENTS_PGPGIN);
4110 s->stat[MCS_PGPGIN] += val;
4111 val = mem_cgroup_read_events(memcg, MEM_CGROUP_EVENTS_PGPGOUT);
4112 s->stat[MCS_PGPGOUT] += val;
4113 if (do_swap_account) {
4114 val = mem_cgroup_read_stat(memcg, MEM_CGROUP_STAT_SWAPOUT);
4115 s->stat[MCS_SWAP] += val * PAGE_SIZE;
4116 }
4117 val = mem_cgroup_read_events(memcg, MEM_CGROUP_EVENTS_PGFAULT);
4118 s->stat[MCS_PGFAULT] += val;
4119 val = mem_cgroup_read_events(memcg, MEM_CGROUP_EVENTS_PGMAJFAULT);
4120 s->stat[MCS_PGMAJFAULT] += val;
4121
4122 /* per zone stat */
4123 val = mem_cgroup_nr_lru_pages(memcg, BIT(LRU_INACTIVE_ANON));
4124 s->stat[MCS_INACTIVE_ANON] += val * PAGE_SIZE;
4125 val = mem_cgroup_nr_lru_pages(memcg, BIT(LRU_ACTIVE_ANON));
4126 s->stat[MCS_ACTIVE_ANON] += val * PAGE_SIZE;
4127 val = mem_cgroup_nr_lru_pages(memcg, BIT(LRU_INACTIVE_FILE));
4128 s->stat[MCS_INACTIVE_FILE] += val * PAGE_SIZE;
4129 val = mem_cgroup_nr_lru_pages(memcg, BIT(LRU_ACTIVE_FILE));
4130 s->stat[MCS_ACTIVE_FILE] += val * PAGE_SIZE;
4131 val = mem_cgroup_nr_lru_pages(memcg, BIT(LRU_UNEVICTABLE));
4132 s->stat[MCS_UNEVICTABLE] += val * PAGE_SIZE;
4133}
4134
4135static void
4136mem_cgroup_get_total_stat(struct mem_cgroup *memcg, struct mcs_total_stat *s)
4137{
4138 struct mem_cgroup *iter;
4139
4140 for_each_mem_cgroup_tree(iter, memcg)
4141 mem_cgroup_get_local_stat(iter, s);
4142}
4143
4144#ifdef CONFIG_NUMA 4017#ifdef CONFIG_NUMA
4145static int mem_control_numa_stat_show(struct seq_file *m, void *arg) 4018static int mem_control_numa_stat_show(struct cgroup *cont, struct cftype *cft,
4019 struct seq_file *m)
4146{ 4020{
4147 int nid; 4021 int nid;
4148 unsigned long total_nr, file_nr, anon_nr, unevictable_nr; 4022 unsigned long total_nr, file_nr, anon_nr, unevictable_nr;
4149 unsigned long node_nr; 4023 unsigned long node_nr;
4150 struct cgroup *cont = m->private;
4151 struct mem_cgroup *memcg = mem_cgroup_from_cont(cont); 4024 struct mem_cgroup *memcg = mem_cgroup_from_cont(cont);
4152 4025
4153 total_nr = mem_cgroup_nr_lru_pages(memcg, LRU_ALL); 4026 total_nr = mem_cgroup_nr_lru_pages(memcg, LRU_ALL);
@@ -4188,64 +4061,100 @@ static int mem_control_numa_stat_show(struct seq_file *m, void *arg)
4188} 4061}
4189#endif /* CONFIG_NUMA */ 4062#endif /* CONFIG_NUMA */
4190 4063
4064static const char * const mem_cgroup_lru_names[] = {
4065 "inactive_anon",
4066 "active_anon",
4067 "inactive_file",
4068 "active_file",
4069 "unevictable",
4070};
4071
4072static inline void mem_cgroup_lru_names_not_uptodate(void)
4073{
4074 BUILD_BUG_ON(ARRAY_SIZE(mem_cgroup_lru_names) != NR_LRU_LISTS);
4075}
4076
4191static int mem_control_stat_show(struct cgroup *cont, struct cftype *cft, 4077static int mem_control_stat_show(struct cgroup *cont, struct cftype *cft,
4192 struct cgroup_map_cb *cb) 4078 struct seq_file *m)
4193{ 4079{
4194 struct mem_cgroup *memcg = mem_cgroup_from_cont(cont); 4080 struct mem_cgroup *memcg = mem_cgroup_from_cont(cont);
4195 struct mcs_total_stat mystat; 4081 struct mem_cgroup *mi;
4196 int i; 4082 unsigned int i;
4197
4198 memset(&mystat, 0, sizeof(mystat));
4199 mem_cgroup_get_local_stat(memcg, &mystat);
4200 4083
4201 4084 for (i = 0; i < MEM_CGROUP_STAT_NSTATS; i++) {
4202 for (i = 0; i < NR_MCS_STAT; i++) { 4085 if (i == MEM_CGROUP_STAT_SWAPOUT && !do_swap_account)
4203 if (i == MCS_SWAP && !do_swap_account)
4204 continue; 4086 continue;
4205 cb->fill(cb, memcg_stat_strings[i].local_name, mystat.stat[i]); 4087 seq_printf(m, "%s %ld\n", mem_cgroup_stat_names[i],
4088 mem_cgroup_read_stat(memcg, i) * PAGE_SIZE);
4206 } 4089 }
4207 4090
4091 for (i = 0; i < MEM_CGROUP_EVENTS_NSTATS; i++)
4092 seq_printf(m, "%s %lu\n", mem_cgroup_events_names[i],
4093 mem_cgroup_read_events(memcg, i));
4094
4095 for (i = 0; i < NR_LRU_LISTS; i++)
4096 seq_printf(m, "%s %lu\n", mem_cgroup_lru_names[i],
4097 mem_cgroup_nr_lru_pages(memcg, BIT(i)) * PAGE_SIZE);
4098
4208 /* Hierarchical information */ 4099 /* Hierarchical information */
4209 { 4100 {
4210 unsigned long long limit, memsw_limit; 4101 unsigned long long limit, memsw_limit;
4211 memcg_get_hierarchical_limit(memcg, &limit, &memsw_limit); 4102 memcg_get_hierarchical_limit(memcg, &limit, &memsw_limit);
4212 cb->fill(cb, "hierarchical_memory_limit", limit); 4103 seq_printf(m, "hierarchical_memory_limit %llu\n", limit);
4213 if (do_swap_account) 4104 if (do_swap_account)
4214 cb->fill(cb, "hierarchical_memsw_limit", memsw_limit); 4105 seq_printf(m, "hierarchical_memsw_limit %llu\n",
4106 memsw_limit);
4215 } 4107 }
4216 4108
4217 memset(&mystat, 0, sizeof(mystat)); 4109 for (i = 0; i < MEM_CGROUP_STAT_NSTATS; i++) {
4218 mem_cgroup_get_total_stat(memcg, &mystat); 4110 long long val = 0;
4219 for (i = 0; i < NR_MCS_STAT; i++) { 4111
4220 if (i == MCS_SWAP && !do_swap_account) 4112 if (i == MEM_CGROUP_STAT_SWAPOUT && !do_swap_account)
4221 continue; 4113 continue;
4222 cb->fill(cb, memcg_stat_strings[i].total_name, mystat.stat[i]); 4114 for_each_mem_cgroup_tree(mi, memcg)
4115 val += mem_cgroup_read_stat(mi, i) * PAGE_SIZE;
4116 seq_printf(m, "total_%s %lld\n", mem_cgroup_stat_names[i], val);
4117 }
4118
4119 for (i = 0; i < MEM_CGROUP_EVENTS_NSTATS; i++) {
4120 unsigned long long val = 0;
4121
4122 for_each_mem_cgroup_tree(mi, memcg)
4123 val += mem_cgroup_read_events(mi, i);
4124 seq_printf(m, "total_%s %llu\n",
4125 mem_cgroup_events_names[i], val);
4126 }
4127
4128 for (i = 0; i < NR_LRU_LISTS; i++) {
4129 unsigned long long val = 0;
4130
4131 for_each_mem_cgroup_tree(mi, memcg)
4132 val += mem_cgroup_nr_lru_pages(mi, BIT(i)) * PAGE_SIZE;
4133 seq_printf(m, "total_%s %llu\n", mem_cgroup_lru_names[i], val);
4223 } 4134 }
4224 4135
4225#ifdef CONFIG_DEBUG_VM 4136#ifdef CONFIG_DEBUG_VM
4226 { 4137 {
4227 int nid, zid; 4138 int nid, zid;
4228 struct mem_cgroup_per_zone *mz; 4139 struct mem_cgroup_per_zone *mz;
4140 struct zone_reclaim_stat *rstat;
4229 unsigned long recent_rotated[2] = {0, 0}; 4141 unsigned long recent_rotated[2] = {0, 0};
4230 unsigned long recent_scanned[2] = {0, 0}; 4142 unsigned long recent_scanned[2] = {0, 0};
4231 4143
4232 for_each_online_node(nid) 4144 for_each_online_node(nid)
4233 for (zid = 0; zid < MAX_NR_ZONES; zid++) { 4145 for (zid = 0; zid < MAX_NR_ZONES; zid++) {
4234 mz = mem_cgroup_zoneinfo(memcg, nid, zid); 4146 mz = mem_cgroup_zoneinfo(memcg, nid, zid);
4147 rstat = &mz->lruvec.reclaim_stat;
4235 4148
4236 recent_rotated[0] += 4149 recent_rotated[0] += rstat->recent_rotated[0];
4237 mz->reclaim_stat.recent_rotated[0]; 4150 recent_rotated[1] += rstat->recent_rotated[1];
4238 recent_rotated[1] += 4151 recent_scanned[0] += rstat->recent_scanned[0];
4239 mz->reclaim_stat.recent_rotated[1]; 4152 recent_scanned[1] += rstat->recent_scanned[1];
4240 recent_scanned[0] +=
4241 mz->reclaim_stat.recent_scanned[0];
4242 recent_scanned[1] +=
4243 mz->reclaim_stat.recent_scanned[1];
4244 } 4153 }
4245 cb->fill(cb, "recent_rotated_anon", recent_rotated[0]); 4154 seq_printf(m, "recent_rotated_anon %lu\n", recent_rotated[0]);
4246 cb->fill(cb, "recent_rotated_file", recent_rotated[1]); 4155 seq_printf(m, "recent_rotated_file %lu\n", recent_rotated[1]);
4247 cb->fill(cb, "recent_scanned_anon", recent_scanned[0]); 4156 seq_printf(m, "recent_scanned_anon %lu\n", recent_scanned[0]);
4248 cb->fill(cb, "recent_scanned_file", recent_scanned[1]); 4157 seq_printf(m, "recent_scanned_file %lu\n", recent_scanned[1]);
4249 } 4158 }
4250#endif 4159#endif
4251 4160
@@ -4307,7 +4216,7 @@ static void __mem_cgroup_threshold(struct mem_cgroup *memcg, bool swap)
4307 usage = mem_cgroup_usage(memcg, swap); 4216 usage = mem_cgroup_usage(memcg, swap);
4308 4217
4309 /* 4218 /*
4310 * current_threshold points to threshold just below usage. 4219 * current_threshold points to threshold just below or equal to usage.
4311 * If it's not true, a threshold was crossed after last 4220 * If it's not true, a threshold was crossed after last
4312 * call of __mem_cgroup_threshold(). 4221 * call of __mem_cgroup_threshold().
4313 */ 4222 */
@@ -4433,14 +4342,15 @@ static int mem_cgroup_usage_register_event(struct cgroup *cgrp,
4433 /* Find current threshold */ 4342 /* Find current threshold */
4434 new->current_threshold = -1; 4343 new->current_threshold = -1;
4435 for (i = 0; i < size; i++) { 4344 for (i = 0; i < size; i++) {
4436 if (new->entries[i].threshold < usage) { 4345 if (new->entries[i].threshold <= usage) {
4437 /* 4346 /*
4438 * new->current_threshold will not be used until 4347 * new->current_threshold will not be used until
4439 * rcu_assign_pointer(), so it's safe to increment 4348 * rcu_assign_pointer(), so it's safe to increment
4440 * it here. 4349 * it here.
4441 */ 4350 */
4442 ++new->current_threshold; 4351 ++new->current_threshold;
4443 } 4352 } else
4353 break;
4444 } 4354 }
4445 4355
4446 /* Free old spare buffer and save old primary buffer as spare */ 4356 /* Free old spare buffer and save old primary buffer as spare */
@@ -4509,7 +4419,7 @@ static void mem_cgroup_usage_unregister_event(struct cgroup *cgrp,
4509 continue; 4419 continue;
4510 4420
4511 new->entries[j] = thresholds->primary->entries[i]; 4421 new->entries[j] = thresholds->primary->entries[i];
4512 if (new->entries[j].threshold < usage) { 4422 if (new->entries[j].threshold <= usage) {
4513 /* 4423 /*
4514 * new->current_threshold will not be used 4424 * new->current_threshold will not be used
4515 * until rcu_assign_pointer(), so it's safe to increment 4425 * until rcu_assign_pointer(), so it's safe to increment
@@ -4623,22 +4533,6 @@ static int mem_cgroup_oom_control_write(struct cgroup *cgrp,
4623 return 0; 4533 return 0;
4624} 4534}
4625 4535
4626#ifdef CONFIG_NUMA
4627static const struct file_operations mem_control_numa_stat_file_operations = {
4628 .read = seq_read,
4629 .llseek = seq_lseek,
4630 .release = single_release,
4631};
4632
4633static int mem_control_numa_stat_open(struct inode *unused, struct file *file)
4634{
4635 struct cgroup *cont = file->f_dentry->d_parent->d_fsdata;
4636
4637 file->f_op = &mem_control_numa_stat_file_operations;
4638 return single_open(file, mem_control_numa_stat_show, cont);
4639}
4640#endif /* CONFIG_NUMA */
4641
4642#ifdef CONFIG_CGROUP_MEM_RES_CTLR_KMEM 4536#ifdef CONFIG_CGROUP_MEM_RES_CTLR_KMEM
4643static int memcg_init_kmem(struct mem_cgroup *memcg, struct cgroup_subsys *ss) 4537static int memcg_init_kmem(struct mem_cgroup *memcg, struct cgroup_subsys *ss)
4644{ 4538{
@@ -4694,7 +4588,7 @@ static struct cftype mem_cgroup_files[] = {
4694 }, 4588 },
4695 { 4589 {
4696 .name = "stat", 4590 .name = "stat",
4697 .read_map = mem_control_stat_show, 4591 .read_seq_string = mem_control_stat_show,
4698 }, 4592 },
4699 { 4593 {
4700 .name = "force_empty", 4594 .name = "force_empty",
@@ -4726,8 +4620,7 @@ static struct cftype mem_cgroup_files[] = {
4726#ifdef CONFIG_NUMA 4620#ifdef CONFIG_NUMA
4727 { 4621 {
4728 .name = "numa_stat", 4622 .name = "numa_stat",
4729 .open = mem_control_numa_stat_open, 4623 .read_seq_string = mem_control_numa_stat_show,
4730 .mode = S_IRUGO,
4731 }, 4624 },
4732#endif 4625#endif
4733#ifdef CONFIG_CGROUP_MEM_RES_CTLR_SWAP 4626#ifdef CONFIG_CGROUP_MEM_RES_CTLR_SWAP
@@ -4764,7 +4657,6 @@ static int alloc_mem_cgroup_per_zone_info(struct mem_cgroup *memcg, int node)
4764{ 4657{
4765 struct mem_cgroup_per_node *pn; 4658 struct mem_cgroup_per_node *pn;
4766 struct mem_cgroup_per_zone *mz; 4659 struct mem_cgroup_per_zone *mz;
4767 enum lru_list lru;
4768 int zone, tmp = node; 4660 int zone, tmp = node;
4769 /* 4661 /*
4770 * This routine is called against possible nodes. 4662 * This routine is called against possible nodes.
@@ -4782,8 +4674,7 @@ static int alloc_mem_cgroup_per_zone_info(struct mem_cgroup *memcg, int node)
4782 4674
4783 for (zone = 0; zone < MAX_NR_ZONES; zone++) { 4675 for (zone = 0; zone < MAX_NR_ZONES; zone++) {
4784 mz = &pn->zoneinfo[zone]; 4676 mz = &pn->zoneinfo[zone];
4785 for_each_lru(lru) 4677 lruvec_init(&mz->lruvec, &NODE_DATA(node)->node_zones[zone]);
4786 INIT_LIST_HEAD(&mz->lruvec.lists[lru]);
4787 mz->usage_in_excess = 0; 4678 mz->usage_in_excess = 0;
4788 mz->on_tree = false; 4679 mz->on_tree = false;
4789 mz->memcg = memcg; 4680 mz->memcg = memcg;
@@ -4826,23 +4717,40 @@ out_free:
4826} 4717}
4827 4718
4828/* 4719/*
4829 * Helpers for freeing a vzalloc()ed mem_cgroup by RCU, 4720 * Helpers for freeing a kmalloc()ed/vzalloc()ed mem_cgroup by RCU,
4830 * but in process context. The work_freeing structure is overlaid 4721 * but in process context. The work_freeing structure is overlaid
4831 * on the rcu_freeing structure, which itself is overlaid on memsw. 4722 * on the rcu_freeing structure, which itself is overlaid on memsw.
4832 */ 4723 */
4833static void vfree_work(struct work_struct *work) 4724static void free_work(struct work_struct *work)
4834{ 4725{
4835 struct mem_cgroup *memcg; 4726 struct mem_cgroup *memcg;
4727 int size = sizeof(struct mem_cgroup);
4836 4728
4837 memcg = container_of(work, struct mem_cgroup, work_freeing); 4729 memcg = container_of(work, struct mem_cgroup, work_freeing);
4838 vfree(memcg); 4730 /*
4731 * We need to make sure that (at least for now), the jump label
4732 * destruction code runs outside of the cgroup lock. This is because
4733 * get_online_cpus(), which is called from the static_branch update,
4734 * can't be called inside the cgroup_lock. cpusets are the ones
4735 * enforcing this dependency, so if they ever change, we might as well.
4736 *
4737 * schedule_work() will guarantee this happens. Be careful if you need
4738 * to move this code around, and make sure it is outside
4739 * the cgroup_lock.
4740 */
4741 disarm_sock_keys(memcg);
4742 if (size < PAGE_SIZE)
4743 kfree(memcg);
4744 else
4745 vfree(memcg);
4839} 4746}
4840static void vfree_rcu(struct rcu_head *rcu_head) 4747
4748static void free_rcu(struct rcu_head *rcu_head)
4841{ 4749{
4842 struct mem_cgroup *memcg; 4750 struct mem_cgroup *memcg;
4843 4751
4844 memcg = container_of(rcu_head, struct mem_cgroup, rcu_freeing); 4752 memcg = container_of(rcu_head, struct mem_cgroup, rcu_freeing);
4845 INIT_WORK(&memcg->work_freeing, vfree_work); 4753 INIT_WORK(&memcg->work_freeing, free_work);
4846 schedule_work(&memcg->work_freeing); 4754 schedule_work(&memcg->work_freeing);
4847} 4755}
4848 4756
@@ -4868,10 +4776,7 @@ static void __mem_cgroup_free(struct mem_cgroup *memcg)
4868 free_mem_cgroup_per_zone_info(memcg, node); 4776 free_mem_cgroup_per_zone_info(memcg, node);
4869 4777
4870 free_percpu(memcg->stat); 4778 free_percpu(memcg->stat);
4871 if (sizeof(struct mem_cgroup) < PAGE_SIZE) 4779 call_rcu(&memcg->rcu_freeing, free_rcu);
4872 kfree_rcu(memcg, rcu_freeing);
4873 else
4874 call_rcu(&memcg->rcu_freeing, vfree_rcu);
4875} 4780}
4876 4781
4877static void mem_cgroup_get(struct mem_cgroup *memcg) 4782static void mem_cgroup_get(struct mem_cgroup *memcg)
@@ -5135,7 +5040,7 @@ static struct page *mc_handle_present_pte(struct vm_area_struct *vma,
5135 return NULL; 5040 return NULL;
5136 if (PageAnon(page)) { 5041 if (PageAnon(page)) {
5137 /* we don't move shared anon */ 5042 /* we don't move shared anon */
5138 if (!move_anon() || page_mapcount(page) > 2) 5043 if (!move_anon())
5139 return NULL; 5044 return NULL;
5140 } else if (!move_file()) 5045 } else if (!move_file())
5141 /* we ignore mapcount for file pages */ 5046 /* we ignore mapcount for file pages */
@@ -5146,32 +5051,37 @@ static struct page *mc_handle_present_pte(struct vm_area_struct *vma,
5146 return page; 5051 return page;
5147} 5052}
5148 5053
5054#ifdef CONFIG_SWAP
5149static struct page *mc_handle_swap_pte(struct vm_area_struct *vma, 5055static struct page *mc_handle_swap_pte(struct vm_area_struct *vma,
5150 unsigned long addr, pte_t ptent, swp_entry_t *entry) 5056 unsigned long addr, pte_t ptent, swp_entry_t *entry)
5151{ 5057{
5152 int usage_count;
5153 struct page *page = NULL; 5058 struct page *page = NULL;
5154 swp_entry_t ent = pte_to_swp_entry(ptent); 5059 swp_entry_t ent = pte_to_swp_entry(ptent);
5155 5060
5156 if (!move_anon() || non_swap_entry(ent)) 5061 if (!move_anon() || non_swap_entry(ent))
5157 return NULL; 5062 return NULL;
5158 usage_count = mem_cgroup_count_swap_user(ent, &page); 5063 /*
5159 if (usage_count > 1) { /* we don't move shared anon */ 5064 * Because lookup_swap_cache() updates some statistics counter,
5160 if (page) 5065 * we call find_get_page() with swapper_space directly.
5161 put_page(page); 5066 */
5162 return NULL; 5067 page = find_get_page(&swapper_space, ent.val);
5163 }
5164 if (do_swap_account) 5068 if (do_swap_account)
5165 entry->val = ent.val; 5069 entry->val = ent.val;
5166 5070
5167 return page; 5071 return page;
5168} 5072}
5073#else
5074static struct page *mc_handle_swap_pte(struct vm_area_struct *vma,
5075 unsigned long addr, pte_t ptent, swp_entry_t *entry)
5076{
5077 return NULL;
5078}
5079#endif
5169 5080
5170static struct page *mc_handle_file_pte(struct vm_area_struct *vma, 5081static struct page *mc_handle_file_pte(struct vm_area_struct *vma,
5171 unsigned long addr, pte_t ptent, swp_entry_t *entry) 5082 unsigned long addr, pte_t ptent, swp_entry_t *entry)
5172{ 5083{
5173 struct page *page = NULL; 5084 struct page *page = NULL;
5174 struct inode *inode;
5175 struct address_space *mapping; 5085 struct address_space *mapping;
5176 pgoff_t pgoff; 5086 pgoff_t pgoff;
5177 5087
@@ -5180,7 +5090,6 @@ static struct page *mc_handle_file_pte(struct vm_area_struct *vma,
5180 if (!move_file()) 5090 if (!move_file())
5181 return NULL; 5091 return NULL;
5182 5092
5183 inode = vma->vm_file->f_path.dentry->d_inode;
5184 mapping = vma->vm_file->f_mapping; 5093 mapping = vma->vm_file->f_mapping;
5185 if (pte_none(ptent)) 5094 if (pte_none(ptent))
5186 pgoff = linear_page_index(vma, addr); 5095 pgoff = linear_page_index(vma, addr);
@@ -5479,8 +5388,7 @@ static int mem_cgroup_move_charge_pte_range(pmd_t *pmd,
5479 if (!isolate_lru_page(page)) { 5388 if (!isolate_lru_page(page)) {
5480 pc = lookup_page_cgroup(page); 5389 pc = lookup_page_cgroup(page);
5481 if (!mem_cgroup_move_account(page, HPAGE_PMD_NR, 5390 if (!mem_cgroup_move_account(page, HPAGE_PMD_NR,
5482 pc, mc.from, mc.to, 5391 pc, mc.from, mc.to)) {
5483 false)) {
5484 mc.precharge -= HPAGE_PMD_NR; 5392 mc.precharge -= HPAGE_PMD_NR;
5485 mc.moved_charge += HPAGE_PMD_NR; 5393 mc.moved_charge += HPAGE_PMD_NR;
5486 } 5394 }
@@ -5510,7 +5418,7 @@ retry:
5510 goto put; 5418 goto put;
5511 pc = lookup_page_cgroup(page); 5419 pc = lookup_page_cgroup(page);
5512 if (!mem_cgroup_move_account(page, 1, pc, 5420 if (!mem_cgroup_move_account(page, 1, pc,
5513 mc.from, mc.to, false)) { 5421 mc.from, mc.to)) {
5514 mc.precharge--; 5422 mc.precharge--;
5515 /* we uncharge from mc.from later. */ 5423 /* we uncharge from mc.from later. */
5516 mc.moved_charge++; 5424 mc.moved_charge++;
@@ -5521,8 +5429,7 @@ put: /* get_mctgt_type() gets the page */
5521 break; 5429 break;
5522 case MC_TARGET_SWAP: 5430 case MC_TARGET_SWAP:
5523 ent = target.ent; 5431 ent = target.ent;
5524 if (!mem_cgroup_move_swap_account(ent, 5432 if (!mem_cgroup_move_swap_account(ent, mc.from, mc.to)) {
5525 mc.from, mc.to, false)) {
5526 mc.precharge--; 5433 mc.precharge--;
5527 /* we fixup refcnts and charges later. */ 5434 /* we fixup refcnts and charges later. */
5528 mc.moved_swap++; 5435 mc.moved_swap++;
@@ -5598,7 +5505,6 @@ static void mem_cgroup_move_task(struct cgroup *cont,
5598 if (mm) { 5505 if (mm) {
5599 if (mc.to) 5506 if (mc.to)
5600 mem_cgroup_move_charge(mm); 5507 mem_cgroup_move_charge(mm);
5601 put_swap_token(mm);
5602 mmput(mm); 5508 mmput(mm);
5603 } 5509 }
5604 if (mc.to) 5510 if (mc.to)
diff --git a/mm/memory-failure.c b/mm/memory-failure.c
index c99ad4e6b88c..ab1e7145e290 100644
--- a/mm/memory-failure.c
+++ b/mm/memory-failure.c
@@ -1388,16 +1388,16 @@ static int get_any_page(struct page *p, unsigned long pfn, int flags)
1388 */ 1388 */
1389 if (!get_page_unless_zero(compound_head(p))) { 1389 if (!get_page_unless_zero(compound_head(p))) {
1390 if (PageHuge(p)) { 1390 if (PageHuge(p)) {
1391 pr_info("get_any_page: %#lx free huge page\n", pfn); 1391 pr_info("%s: %#lx free huge page\n", __func__, pfn);
1392 ret = dequeue_hwpoisoned_huge_page(compound_head(p)); 1392 ret = dequeue_hwpoisoned_huge_page(compound_head(p));
1393 } else if (is_free_buddy_page(p)) { 1393 } else if (is_free_buddy_page(p)) {
1394 pr_info("get_any_page: %#lx free buddy page\n", pfn); 1394 pr_info("%s: %#lx free buddy page\n", __func__, pfn);
1395 /* Set hwpoison bit while page is still isolated */ 1395 /* Set hwpoison bit while page is still isolated */
1396 SetPageHWPoison(p); 1396 SetPageHWPoison(p);
1397 ret = 0; 1397 ret = 0;
1398 } else { 1398 } else {
1399 pr_info("get_any_page: %#lx: unknown zero refcount page type %lx\n", 1399 pr_info("%s: %#lx: unknown zero refcount page type %lx\n",
1400 pfn, p->flags); 1400 __func__, pfn, p->flags);
1401 ret = -EIO; 1401 ret = -EIO;
1402 } 1402 }
1403 } else { 1403 } else {
diff --git a/mm/memory.c b/mm/memory.c
index e40f6759ba98..1b7dc662bf9f 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -2908,7 +2908,6 @@ static int do_swap_page(struct mm_struct *mm, struct vm_area_struct *vma,
2908 delayacct_set_flag(DELAYACCT_PF_SWAPIN); 2908 delayacct_set_flag(DELAYACCT_PF_SWAPIN);
2909 page = lookup_swap_cache(entry); 2909 page = lookup_swap_cache(entry);
2910 if (!page) { 2910 if (!page) {
2911 grab_swap_token(mm); /* Contend for token _before_ read-in */
2912 page = swapin_readahead(entry, 2911 page = swapin_readahead(entry,
2913 GFP_HIGHUSER_MOVABLE, vma, address); 2912 GFP_HIGHUSER_MOVABLE, vma, address);
2914 if (!page) { 2913 if (!page) {
@@ -2938,6 +2937,7 @@ static int do_swap_page(struct mm_struct *mm, struct vm_area_struct *vma,
2938 } 2937 }
2939 2938
2940 locked = lock_page_or_retry(page, mm, flags); 2939 locked = lock_page_or_retry(page, mm, flags);
2940
2941 delayacct_clear_flag(DELAYACCT_PF_SWAPIN); 2941 delayacct_clear_flag(DELAYACCT_PF_SWAPIN);
2942 if (!locked) { 2942 if (!locked) {
2943 ret |= VM_FAULT_RETRY; 2943 ret |= VM_FAULT_RETRY;
@@ -3486,6 +3486,7 @@ int handle_mm_fault(struct mm_struct *mm, struct vm_area_struct *vma,
3486 if (unlikely(is_vm_hugetlb_page(vma))) 3486 if (unlikely(is_vm_hugetlb_page(vma)))
3487 return hugetlb_fault(mm, vma, address, flags); 3487 return hugetlb_fault(mm, vma, address, flags);
3488 3488
3489retry:
3489 pgd = pgd_offset(mm, address); 3490 pgd = pgd_offset(mm, address);
3490 pud = pud_alloc(mm, pgd, address); 3491 pud = pud_alloc(mm, pgd, address);
3491 if (!pud) 3492 if (!pud)
@@ -3499,13 +3500,24 @@ int handle_mm_fault(struct mm_struct *mm, struct vm_area_struct *vma,
3499 pmd, flags); 3500 pmd, flags);
3500 } else { 3501 } else {
3501 pmd_t orig_pmd = *pmd; 3502 pmd_t orig_pmd = *pmd;
3503 int ret;
3504
3502 barrier(); 3505 barrier();
3503 if (pmd_trans_huge(orig_pmd)) { 3506 if (pmd_trans_huge(orig_pmd)) {
3504 if (flags & FAULT_FLAG_WRITE && 3507 if (flags & FAULT_FLAG_WRITE &&
3505 !pmd_write(orig_pmd) && 3508 !pmd_write(orig_pmd) &&
3506 !pmd_trans_splitting(orig_pmd)) 3509 !pmd_trans_splitting(orig_pmd)) {
3507 return do_huge_pmd_wp_page(mm, vma, address, 3510 ret = do_huge_pmd_wp_page(mm, vma, address, pmd,
3508 pmd, orig_pmd); 3511 orig_pmd);
3512 /*
3513 * If COW results in an oom, the huge pmd will
3514 * have been split, so retry the fault on the
3515 * pte for a smaller charge.
3516 */
3517 if (unlikely(ret & VM_FAULT_OOM))
3518 goto retry;
3519 return ret;
3520 }
3509 return 0; 3521 return 0;
3510 } 3522 }
3511 } 3523 }
diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
index fc898cb4fe8f..0d7e3ec8e0f3 100644
--- a/mm/memory_hotplug.c
+++ b/mm/memory_hotplug.c
@@ -74,8 +74,7 @@ static struct resource *register_memory_resource(u64 start, u64 size)
74 res->end = start + size - 1; 74 res->end = start + size - 1;
75 res->flags = IORESOURCE_MEM | IORESOURCE_BUSY; 75 res->flags = IORESOURCE_MEM | IORESOURCE_BUSY;
76 if (request_resource(&iomem_resource, res) < 0) { 76 if (request_resource(&iomem_resource, res) < 0) {
77 printk("System RAM resource %llx - %llx cannot be added\n", 77 printk("System RAM resource %pR cannot be added\n", res);
78 (unsigned long long)res->start, (unsigned long long)res->end);
79 kfree(res); 78 kfree(res);
80 res = NULL; 79 res = NULL;
81 } 80 }
@@ -502,8 +501,10 @@ int __ref online_pages(unsigned long pfn, unsigned long nr_pages)
502 online_pages_range); 501 online_pages_range);
503 if (ret) { 502 if (ret) {
504 mutex_unlock(&zonelists_mutex); 503 mutex_unlock(&zonelists_mutex);
505 printk(KERN_DEBUG "online_pages %lx at %lx failed\n", 504 printk(KERN_DEBUG "online_pages [mem %#010llx-%#010llx] failed\n",
506 nr_pages, pfn); 505 (unsigned long long) pfn << PAGE_SHIFT,
506 (((unsigned long long) pfn + nr_pages)
507 << PAGE_SHIFT) - 1);
507 memory_notify(MEM_CANCEL_ONLINE, &arg); 508 memory_notify(MEM_CANCEL_ONLINE, &arg);
508 unlock_memory_hotplug(); 509 unlock_memory_hotplug();
509 return ret; 510 return ret;
@@ -977,8 +978,9 @@ repeat:
977 return 0; 978 return 0;
978 979
979failed_removal: 980failed_removal:
980 printk(KERN_INFO "memory offlining %lx to %lx failed\n", 981 printk(KERN_INFO "memory offlining [mem %#010llx-%#010llx] failed\n",
981 start_pfn, end_pfn); 982 (unsigned long long) start_pfn << PAGE_SHIFT,
983 ((unsigned long long) end_pfn << PAGE_SHIFT) - 1);
982 memory_notify(MEM_CANCEL_OFFLINE, &arg); 984 memory_notify(MEM_CANCEL_OFFLINE, &arg);
983 /* pushback to free area */ 985 /* pushback to free area */
984 undo_isolate_page_range(start_pfn, end_pfn, MIGRATE_MOVABLE); 986 undo_isolate_page_range(start_pfn, end_pfn, MIGRATE_MOVABLE);
diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index 88f9422b92e7..f15c1b24ca18 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -390,7 +390,7 @@ static void mpol_rebind_policy(struct mempolicy *pol, const nodemask_t *newmask,
390{ 390{
391 if (!pol) 391 if (!pol)
392 return; 392 return;
393 if (!mpol_store_user_nodemask(pol) && step == 0 && 393 if (!mpol_store_user_nodemask(pol) && step == MPOL_REBIND_ONCE &&
394 nodes_equal(pol->w.cpuset_mems_allowed, *newmask)) 394 nodes_equal(pol->w.cpuset_mems_allowed, *newmask))
395 return; 395 return;
396 396
@@ -950,8 +950,8 @@ static int migrate_to_node(struct mm_struct *mm, int source, int dest,
950 * 950 *
951 * Returns the number of page that could not be moved. 951 * Returns the number of page that could not be moved.
952 */ 952 */
953int do_migrate_pages(struct mm_struct *mm, 953int do_migrate_pages(struct mm_struct *mm, const nodemask_t *from,
954 const nodemask_t *from_nodes, const nodemask_t *to_nodes, int flags) 954 const nodemask_t *to, int flags)
955{ 955{
956 int busy = 0; 956 int busy = 0;
957 int err; 957 int err;
@@ -963,7 +963,7 @@ int do_migrate_pages(struct mm_struct *mm,
963 963
964 down_read(&mm->mmap_sem); 964 down_read(&mm->mmap_sem);
965 965
966 err = migrate_vmas(mm, from_nodes, to_nodes, flags); 966 err = migrate_vmas(mm, from, to, flags);
967 if (err) 967 if (err)
968 goto out; 968 goto out;
969 969
@@ -998,14 +998,34 @@ int do_migrate_pages(struct mm_struct *mm,
998 * moved to an empty node, then there is nothing left worth migrating. 998 * moved to an empty node, then there is nothing left worth migrating.
999 */ 999 */
1000 1000
1001 tmp = *from_nodes; 1001 tmp = *from;
1002 while (!nodes_empty(tmp)) { 1002 while (!nodes_empty(tmp)) {
1003 int s,d; 1003 int s,d;
1004 int source = -1; 1004 int source = -1;
1005 int dest = 0; 1005 int dest = 0;
1006 1006
1007 for_each_node_mask(s, tmp) { 1007 for_each_node_mask(s, tmp) {
1008 d = node_remap(s, *from_nodes, *to_nodes); 1008
1009 /*
1010 * do_migrate_pages() tries to maintain the relative
1011 * node relationship of the pages established between
1012 * threads and memory areas.
1013 *
1014 * However if the number of source nodes is not equal to
1015 * the number of destination nodes we can not preserve
1016 * this node relative relationship. In that case, skip
1017 * copying memory from a node that is in the destination
1018 * mask.
1019 *
1020 * Example: [2,3,4] -> [3,4,5] moves everything.
1021 * [0-7] - > [3,4,5] moves only 0,1,2,6,7.
1022 */
1023
1024 if ((nodes_weight(*from) != nodes_weight(*to)) &&
1025 (node_isset(s, *to)))
1026 continue;
1027
1028 d = node_remap(s, *from, *to);
1009 if (s == d) 1029 if (s == d)
1010 continue; 1030 continue;
1011 1031
@@ -1065,8 +1085,8 @@ static void migrate_page_add(struct page *page, struct list_head *pagelist,
1065{ 1085{
1066} 1086}
1067 1087
1068int do_migrate_pages(struct mm_struct *mm, 1088int do_migrate_pages(struct mm_struct *mm, const nodemask_t *from,
1069 const nodemask_t *from_nodes, const nodemask_t *to_nodes, int flags) 1089 const nodemask_t *to, int flags)
1070{ 1090{
1071 return -ENOSYS; 1091 return -ENOSYS;
1072} 1092}
diff --git a/mm/mmap.c b/mm/mmap.c
index e8dcfc7de866..4a9c2a391e28 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -1639,33 +1639,34 @@ struct vm_area_struct *find_vma(struct mm_struct *mm, unsigned long addr)
1639{ 1639{
1640 struct vm_area_struct *vma = NULL; 1640 struct vm_area_struct *vma = NULL;
1641 1641
1642 if (mm) { 1642 if (WARN_ON_ONCE(!mm)) /* Remove this in linux-3.6 */
1643 /* Check the cache first. */ 1643 return NULL;
1644 /* (Cache hit rate is typically around 35%.) */ 1644
1645 vma = mm->mmap_cache; 1645 /* Check the cache first. */
1646 if (!(vma && vma->vm_end > addr && vma->vm_start <= addr)) { 1646 /* (Cache hit rate is typically around 35%.) */
1647 struct rb_node * rb_node; 1647 vma = mm->mmap_cache;
1648 1648 if (!(vma && vma->vm_end > addr && vma->vm_start <= addr)) {
1649 rb_node = mm->mm_rb.rb_node; 1649 struct rb_node *rb_node;
1650 vma = NULL; 1650
1651 1651 rb_node = mm->mm_rb.rb_node;
1652 while (rb_node) { 1652 vma = NULL;
1653 struct vm_area_struct * vma_tmp; 1653
1654 1654 while (rb_node) {
1655 vma_tmp = rb_entry(rb_node, 1655 struct vm_area_struct *vma_tmp;
1656 struct vm_area_struct, vm_rb); 1656
1657 1657 vma_tmp = rb_entry(rb_node,
1658 if (vma_tmp->vm_end > addr) { 1658 struct vm_area_struct, vm_rb);
1659 vma = vma_tmp; 1659
1660 if (vma_tmp->vm_start <= addr) 1660 if (vma_tmp->vm_end > addr) {
1661 break; 1661 vma = vma_tmp;
1662 rb_node = rb_node->rb_left; 1662 if (vma_tmp->vm_start <= addr)
1663 } else 1663 break;
1664 rb_node = rb_node->rb_right; 1664 rb_node = rb_node->rb_left;
1665 } 1665 } else
1666 if (vma) 1666 rb_node = rb_node->rb_right;
1667 mm->mmap_cache = vma;
1668 } 1667 }
1668 if (vma)
1669 mm->mmap_cache = vma;
1669 } 1670 }
1670 return vma; 1671 return vma;
1671} 1672}
diff --git a/mm/mmzone.c b/mm/mmzone.c
index 7cf7b7ddc7c5..6830eab5bf09 100644
--- a/mm/mmzone.c
+++ b/mm/mmzone.c
@@ -86,3 +86,17 @@ int memmap_valid_within(unsigned long pfn,
86 return 1; 86 return 1;
87} 87}
88#endif /* CONFIG_ARCH_HAS_HOLES_MEMORYMODEL */ 88#endif /* CONFIG_ARCH_HAS_HOLES_MEMORYMODEL */
89
90void lruvec_init(struct lruvec *lruvec, struct zone *zone)
91{
92 enum lru_list lru;
93
94 memset(lruvec, 0, sizeof(struct lruvec));
95
96 for_each_lru(lru)
97 INIT_LIST_HEAD(&lruvec->lists[lru]);
98
99#ifdef CONFIG_CGROUP_MEM_RES_CTLR
100 lruvec->zone = zone;
101#endif
102}
diff --git a/mm/nobootmem.c b/mm/nobootmem.c
index 1983fb1c7026..d23415c001bc 100644
--- a/mm/nobootmem.c
+++ b/mm/nobootmem.c
@@ -274,86 +274,85 @@ void * __init __alloc_bootmem(unsigned long size, unsigned long align,
274 return ___alloc_bootmem(size, align, goal, limit); 274 return ___alloc_bootmem(size, align, goal, limit);
275} 275}
276 276
277/** 277static void * __init ___alloc_bootmem_node_nopanic(pg_data_t *pgdat,
278 * __alloc_bootmem_node - allocate boot memory from a specific node 278 unsigned long size,
279 * @pgdat: node to allocate from 279 unsigned long align,
280 * @size: size of the request in bytes 280 unsigned long goal,
281 * @align: alignment of the region 281 unsigned long limit)
282 * @goal: preferred starting address of the region
283 *
284 * The goal is dropped if it can not be satisfied and the allocation will
285 * fall back to memory below @goal.
286 *
287 * Allocation may fall back to any node in the system if the specified node
288 * can not hold the requested memory.
289 *
290 * The function panics if the request can not be satisfied.
291 */
292void * __init __alloc_bootmem_node(pg_data_t *pgdat, unsigned long size,
293 unsigned long align, unsigned long goal)
294{ 282{
295 void *ptr; 283 void *ptr;
296 284
297 if (WARN_ON_ONCE(slab_is_available()))
298 return kzalloc_node(size, GFP_NOWAIT, pgdat->node_id);
299
300again: 285again:
301 ptr = __alloc_memory_core_early(pgdat->node_id, size, align, 286 ptr = __alloc_memory_core_early(pgdat->node_id, size, align,
302 goal, -1ULL); 287 goal, limit);
303 if (ptr) 288 if (ptr)
304 return ptr; 289 return ptr;
305 290
306 ptr = __alloc_memory_core_early(MAX_NUMNODES, size, align, 291 ptr = __alloc_memory_core_early(MAX_NUMNODES, size, align,
307 goal, -1ULL); 292 goal, limit);
308 if (!ptr && goal) { 293 if (ptr)
294 return ptr;
295
296 if (goal) {
309 goal = 0; 297 goal = 0;
310 goto again; 298 goto again;
311 } 299 }
312 return ptr; 300
301 return NULL;
313} 302}
314 303
315void * __init __alloc_bootmem_node_high(pg_data_t *pgdat, unsigned long size, 304void * __init __alloc_bootmem_node_nopanic(pg_data_t *pgdat, unsigned long size,
316 unsigned long align, unsigned long goal) 305 unsigned long align, unsigned long goal)
317{ 306{
318 return __alloc_bootmem_node(pgdat, size, align, goal); 307 if (WARN_ON_ONCE(slab_is_available()))
308 return kzalloc_node(size, GFP_NOWAIT, pgdat->node_id);
309
310 return ___alloc_bootmem_node_nopanic(pgdat, size, align, goal, 0);
319} 311}
320 312
321#ifdef CONFIG_SPARSEMEM 313void * __init ___alloc_bootmem_node(pg_data_t *pgdat, unsigned long size,
322/** 314 unsigned long align, unsigned long goal,
323 * alloc_bootmem_section - allocate boot memory from a specific section 315 unsigned long limit)
324 * @size: size of the request in bytes
325 * @section_nr: sparse map section to allocate from
326 *
327 * Return NULL on failure.
328 */
329void * __init alloc_bootmem_section(unsigned long size,
330 unsigned long section_nr)
331{ 316{
332 unsigned long pfn, goal, limit; 317 void *ptr;
333 318
334 pfn = section_nr_to_pfn(section_nr); 319 ptr = ___alloc_bootmem_node_nopanic(pgdat, size, align, goal, limit);
335 goal = pfn << PAGE_SHIFT; 320 if (ptr)
336 limit = section_nr_to_pfn(section_nr + 1) << PAGE_SHIFT; 321 return ptr;
337 322
338 return __alloc_memory_core_early(early_pfn_to_nid(pfn), size, 323 printk(KERN_ALERT "bootmem alloc of %lu bytes failed!\n", size);
339 SMP_CACHE_BYTES, goal, limit); 324 panic("Out of memory");
325 return NULL;
340} 326}
341#endif
342 327
343void * __init __alloc_bootmem_node_nopanic(pg_data_t *pgdat, unsigned long size, 328/**
329 * __alloc_bootmem_node - allocate boot memory from a specific node
330 * @pgdat: node to allocate from
331 * @size: size of the request in bytes
332 * @align: alignment of the region
333 * @goal: preferred starting address of the region
334 *
335 * The goal is dropped if it can not be satisfied and the allocation will
336 * fall back to memory below @goal.
337 *
338 * Allocation may fall back to any node in the system if the specified node
339 * can not hold the requested memory.
340 *
341 * The function panics if the request can not be satisfied.
342 */
343void * __init __alloc_bootmem_node(pg_data_t *pgdat, unsigned long size,
344 unsigned long align, unsigned long goal) 344 unsigned long align, unsigned long goal)
345{ 345{
346 void *ptr;
347
348 if (WARN_ON_ONCE(slab_is_available())) 346 if (WARN_ON_ONCE(slab_is_available()))
349 return kzalloc_node(size, GFP_NOWAIT, pgdat->node_id); 347 return kzalloc_node(size, GFP_NOWAIT, pgdat->node_id);
350 348
351 ptr = __alloc_memory_core_early(pgdat->node_id, size, align, 349 return ___alloc_bootmem_node(pgdat, size, align, goal, 0);
352 goal, -1ULL); 350}
353 if (ptr)
354 return ptr;
355 351
356 return __alloc_bootmem_nopanic(size, align, goal); 352void * __init __alloc_bootmem_node_high(pg_data_t *pgdat, unsigned long size,
353 unsigned long align, unsigned long goal)
354{
355 return __alloc_bootmem_node(pgdat, size, align, goal);
357} 356}
358 357
359#ifndef ARCH_LOW_ADDRESS_LIMIT 358#ifndef ARCH_LOW_ADDRESS_LIMIT
@@ -397,16 +396,9 @@ void * __init __alloc_bootmem_low(unsigned long size, unsigned long align,
397void * __init __alloc_bootmem_low_node(pg_data_t *pgdat, unsigned long size, 396void * __init __alloc_bootmem_low_node(pg_data_t *pgdat, unsigned long size,
398 unsigned long align, unsigned long goal) 397 unsigned long align, unsigned long goal)
399{ 398{
400 void *ptr;
401
402 if (WARN_ON_ONCE(slab_is_available())) 399 if (WARN_ON_ONCE(slab_is_available()))
403 return kzalloc_node(size, GFP_NOWAIT, pgdat->node_id); 400 return kzalloc_node(size, GFP_NOWAIT, pgdat->node_id);
404 401
405 ptr = __alloc_memory_core_early(pgdat->node_id, size, align, 402 return ___alloc_bootmem_node(pgdat, size, align, goal,
406 goal, ARCH_LOW_ADDRESS_LIMIT); 403 ARCH_LOW_ADDRESS_LIMIT);
407 if (ptr)
408 return ptr;
409
410 return __alloc_memory_core_early(MAX_NUMNODES, size, align,
411 goal, ARCH_LOW_ADDRESS_LIMIT);
412} 404}
diff --git a/mm/oom_kill.c b/mm/oom_kill.c
index 9f09a1fde9f9..ed0e19677360 100644
--- a/mm/oom_kill.c
+++ b/mm/oom_kill.c
@@ -180,10 +180,10 @@ static bool oom_unkillable_task(struct task_struct *p,
180 * predictable as possible. The goal is to return the highest value for the 180 * predictable as possible. The goal is to return the highest value for the
181 * task consuming the most memory to avoid subsequent oom failures. 181 * task consuming the most memory to avoid subsequent oom failures.
182 */ 182 */
183unsigned int oom_badness(struct task_struct *p, struct mem_cgroup *memcg, 183unsigned long oom_badness(struct task_struct *p, struct mem_cgroup *memcg,
184 const nodemask_t *nodemask, unsigned long totalpages) 184 const nodemask_t *nodemask, unsigned long totalpages)
185{ 185{
186 long points; 186 unsigned long points;
187 187
188 if (oom_unkillable_task(p, memcg, nodemask)) 188 if (oom_unkillable_task(p, memcg, nodemask))
189 return 0; 189 return 0;
@@ -198,21 +198,11 @@ unsigned int oom_badness(struct task_struct *p, struct mem_cgroup *memcg,
198 } 198 }
199 199
200 /* 200 /*
201 * The memory controller may have a limit of 0 bytes, so avoid a divide
202 * by zero, if necessary.
203 */
204 if (!totalpages)
205 totalpages = 1;
206
207 /*
208 * The baseline for the badness score is the proportion of RAM that each 201 * The baseline for the badness score is the proportion of RAM that each
209 * task's rss, pagetable and swap space use. 202 * task's rss, pagetable and swap space use.
210 */ 203 */
211 points = get_mm_rss(p->mm) + p->mm->nr_ptes; 204 points = get_mm_rss(p->mm) + p->mm->nr_ptes +
212 points += get_mm_counter(p->mm, MM_SWAPENTS); 205 get_mm_counter(p->mm, MM_SWAPENTS);
213
214 points *= 1000;
215 points /= totalpages;
216 task_unlock(p); 206 task_unlock(p);
217 207
218 /* 208 /*
@@ -220,23 +210,20 @@ unsigned int oom_badness(struct task_struct *p, struct mem_cgroup *memcg,
220 * implementation used by LSMs. 210 * implementation used by LSMs.
221 */ 211 */
222 if (has_capability_noaudit(p, CAP_SYS_ADMIN)) 212 if (has_capability_noaudit(p, CAP_SYS_ADMIN))
223 points -= 30; 213 points -= 30 * totalpages / 1000;
224 214
225 /* 215 /*
226 * /proc/pid/oom_score_adj ranges from -1000 to +1000 such that it may 216 * /proc/pid/oom_score_adj ranges from -1000 to +1000 such that it may
227 * either completely disable oom killing or always prefer a certain 217 * either completely disable oom killing or always prefer a certain
228 * task. 218 * task.
229 */ 219 */
230 points += p->signal->oom_score_adj; 220 points += p->signal->oom_score_adj * totalpages / 1000;
231 221
232 /* 222 /*
233 * Never return 0 for an eligible task that may be killed since it's 223 * Never return 0 for an eligible task regardless of the root bonus and
234 * possible that no single user task uses more than 0.1% of memory and 224 * oom_score_adj (oom_score_adj can't be OOM_SCORE_ADJ_MIN here).
235 * no single admin tasks uses more than 3.0%.
236 */ 225 */
237 if (points <= 0) 226 return points ? points : 1;
238 return 1;
239 return (points < 1000) ? points : 1000;
240} 227}
241 228
242/* 229/*
@@ -314,7 +301,7 @@ static struct task_struct *select_bad_process(unsigned int *ppoints,
314{ 301{
315 struct task_struct *g, *p; 302 struct task_struct *g, *p;
316 struct task_struct *chosen = NULL; 303 struct task_struct *chosen = NULL;
317 *ppoints = 0; 304 unsigned long chosen_points = 0;
318 305
319 do_each_thread(g, p) { 306 do_each_thread(g, p) {
320 unsigned int points; 307 unsigned int points;
@@ -354,7 +341,7 @@ static struct task_struct *select_bad_process(unsigned int *ppoints,
354 */ 341 */
355 if (p == current) { 342 if (p == current) {
356 chosen = p; 343 chosen = p;
357 *ppoints = 1000; 344 chosen_points = ULONG_MAX;
358 } else if (!force_kill) { 345 } else if (!force_kill) {
359 /* 346 /*
360 * If this task is not being ptraced on exit, 347 * If this task is not being ptraced on exit,
@@ -367,12 +354,13 @@ static struct task_struct *select_bad_process(unsigned int *ppoints,
367 } 354 }
368 355
369 points = oom_badness(p, memcg, nodemask, totalpages); 356 points = oom_badness(p, memcg, nodemask, totalpages);
370 if (points > *ppoints) { 357 if (points > chosen_points) {
371 chosen = p; 358 chosen = p;
372 *ppoints = points; 359 chosen_points = points;
373 } 360 }
374 } while_each_thread(g, p); 361 } while_each_thread(g, p);
375 362
363 *ppoints = chosen_points * 1000 / totalpages;
376 return chosen; 364 return chosen;
377} 365}
378 366
@@ -572,7 +560,7 @@ void mem_cgroup_out_of_memory(struct mem_cgroup *memcg, gfp_t gfp_mask,
572 } 560 }
573 561
574 check_panic_on_oom(CONSTRAINT_MEMCG, gfp_mask, order, NULL); 562 check_panic_on_oom(CONSTRAINT_MEMCG, gfp_mask, order, NULL);
575 limit = mem_cgroup_get_limit(memcg) >> PAGE_SHIFT; 563 limit = mem_cgroup_get_limit(memcg) >> PAGE_SHIFT ? : 1;
576 read_lock(&tasklist_lock); 564 read_lock(&tasklist_lock);
577 p = select_bad_process(&points, limit, memcg, NULL, false); 565 p = select_bad_process(&points, limit, memcg, NULL, false);
578 if (p && PTR_ERR(p) != -1UL) 566 if (p && PTR_ERR(p) != -1UL)
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index bab8e3bc4202..6092f331b32e 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -219,7 +219,7 @@ EXPORT_SYMBOL(nr_online_nodes);
219 219
220int page_group_by_mobility_disabled __read_mostly; 220int page_group_by_mobility_disabled __read_mostly;
221 221
222static void set_pageblock_migratetype(struct page *page, int migratetype) 222void set_pageblock_migratetype(struct page *page, int migratetype)
223{ 223{
224 224
225 if (unlikely(page_group_by_mobility_disabled)) 225 if (unlikely(page_group_by_mobility_disabled))
@@ -954,8 +954,8 @@ static int move_freepages(struct zone *zone,
954 return pages_moved; 954 return pages_moved;
955} 955}
956 956
957static int move_freepages_block(struct zone *zone, struct page *page, 957int move_freepages_block(struct zone *zone, struct page *page,
958 int migratetype) 958 int migratetype)
959{ 959{
960 unsigned long start_pfn, end_pfn; 960 unsigned long start_pfn, end_pfn;
961 struct page *start_page, *end_page; 961 struct page *start_page, *end_page;
@@ -4300,25 +4300,24 @@ static inline void setup_usemap(struct pglist_data *pgdat,
4300 4300
4301#ifdef CONFIG_HUGETLB_PAGE_SIZE_VARIABLE 4301#ifdef CONFIG_HUGETLB_PAGE_SIZE_VARIABLE
4302 4302
4303/* Return a sensible default order for the pageblock size. */
4304static inline int pageblock_default_order(void)
4305{
4306 if (HPAGE_SHIFT > PAGE_SHIFT)
4307 return HUGETLB_PAGE_ORDER;
4308
4309 return MAX_ORDER-1;
4310}
4311
4312/* Initialise the number of pages represented by NR_PAGEBLOCK_BITS */ 4303/* Initialise the number of pages represented by NR_PAGEBLOCK_BITS */
4313static inline void __init set_pageblock_order(unsigned int order) 4304static inline void __init set_pageblock_order(void)
4314{ 4305{
4306 unsigned int order;
4307
4315 /* Check that pageblock_nr_pages has not already been setup */ 4308 /* Check that pageblock_nr_pages has not already been setup */
4316 if (pageblock_order) 4309 if (pageblock_order)
4317 return; 4310 return;
4318 4311
4312 if (HPAGE_SHIFT > PAGE_SHIFT)
4313 order = HUGETLB_PAGE_ORDER;
4314 else
4315 order = MAX_ORDER - 1;
4316
4319 /* 4317 /*
4320 * Assume the largest contiguous order of interest is a huge page. 4318 * Assume the largest contiguous order of interest is a huge page.
4321 * This value may be variable depending on boot parameters on IA64 4319 * This value may be variable depending on boot parameters on IA64 and
4320 * powerpc.
4322 */ 4321 */
4323 pageblock_order = order; 4322 pageblock_order = order;
4324} 4323}
@@ -4326,15 +4325,13 @@ static inline void __init set_pageblock_order(unsigned int order)
4326 4325
4327/* 4326/*
4328 * When CONFIG_HUGETLB_PAGE_SIZE_VARIABLE is not set, set_pageblock_order() 4327 * When CONFIG_HUGETLB_PAGE_SIZE_VARIABLE is not set, set_pageblock_order()
4329 * and pageblock_default_order() are unused as pageblock_order is set 4328 * is unused as pageblock_order is set at compile-time. See
4330 * at compile-time. See include/linux/pageblock-flags.h for the values of 4329 * include/linux/pageblock-flags.h for the values of pageblock_order based on
4331 * pageblock_order based on the kernel config 4330 * the kernel config
4332 */ 4331 */
4333static inline int pageblock_default_order(unsigned int order) 4332static inline void set_pageblock_order(void)
4334{ 4333{
4335 return MAX_ORDER-1;
4336} 4334}
4337#define set_pageblock_order(x) do {} while (0)
4338 4335
4339#endif /* CONFIG_HUGETLB_PAGE_SIZE_VARIABLE */ 4336#endif /* CONFIG_HUGETLB_PAGE_SIZE_VARIABLE */
4340 4337
@@ -4361,7 +4358,6 @@ static void __paginginit free_area_init_core(struct pglist_data *pgdat,
4361 for (j = 0; j < MAX_NR_ZONES; j++) { 4358 for (j = 0; j < MAX_NR_ZONES; j++) {
4362 struct zone *zone = pgdat->node_zones + j; 4359 struct zone *zone = pgdat->node_zones + j;
4363 unsigned long size, realsize, memmap_pages; 4360 unsigned long size, realsize, memmap_pages;
4364 enum lru_list lru;
4365 4361
4366 size = zone_spanned_pages_in_node(nid, j, zones_size); 4362 size = zone_spanned_pages_in_node(nid, j, zones_size);
4367 realsize = size - zone_absent_pages_in_node(nid, j, 4363 realsize = size - zone_absent_pages_in_node(nid, j,
@@ -4411,18 +4407,13 @@ static void __paginginit free_area_init_core(struct pglist_data *pgdat,
4411 zone->zone_pgdat = pgdat; 4407 zone->zone_pgdat = pgdat;
4412 4408
4413 zone_pcp_init(zone); 4409 zone_pcp_init(zone);
4414 for_each_lru(lru) 4410 lruvec_init(&zone->lruvec, zone);
4415 INIT_LIST_HEAD(&zone->lruvec.lists[lru]);
4416 zone->reclaim_stat.recent_rotated[0] = 0;
4417 zone->reclaim_stat.recent_rotated[1] = 0;
4418 zone->reclaim_stat.recent_scanned[0] = 0;
4419 zone->reclaim_stat.recent_scanned[1] = 0;
4420 zap_zone_vm_stats(zone); 4411 zap_zone_vm_stats(zone);
4421 zone->flags = 0; 4412 zone->flags = 0;
4422 if (!size) 4413 if (!size)
4423 continue; 4414 continue;
4424 4415
4425 set_pageblock_order(pageblock_default_order()); 4416 set_pageblock_order();
4426 setup_usemap(pgdat, zone, size); 4417 setup_usemap(pgdat, zone, size);
4427 ret = init_currently_empty_zone(zone, zone_start_pfn, 4418 ret = init_currently_empty_zone(zone, zone_start_pfn,
4428 size, MEMMAP_EARLY); 4419 size, MEMMAP_EARLY);
@@ -4815,7 +4806,7 @@ void __init free_area_init_nodes(unsigned long *max_zone_pfn)
4815 find_zone_movable_pfns_for_nodes(); 4806 find_zone_movable_pfns_for_nodes();
4816 4807
4817 /* Print out the zone ranges */ 4808 /* Print out the zone ranges */
4818 printk("Zone PFN ranges:\n"); 4809 printk("Zone ranges:\n");
4819 for (i = 0; i < MAX_NR_ZONES; i++) { 4810 for (i = 0; i < MAX_NR_ZONES; i++) {
4820 if (i == ZONE_MOVABLE) 4811 if (i == ZONE_MOVABLE)
4821 continue; 4812 continue;
@@ -4824,22 +4815,25 @@ void __init free_area_init_nodes(unsigned long *max_zone_pfn)
4824 arch_zone_highest_possible_pfn[i]) 4815 arch_zone_highest_possible_pfn[i])
4825 printk(KERN_CONT "empty\n"); 4816 printk(KERN_CONT "empty\n");
4826 else 4817 else
4827 printk(KERN_CONT "%0#10lx -> %0#10lx\n", 4818 printk(KERN_CONT "[mem %0#10lx-%0#10lx]\n",
4828 arch_zone_lowest_possible_pfn[i], 4819 arch_zone_lowest_possible_pfn[i] << PAGE_SHIFT,
4829 arch_zone_highest_possible_pfn[i]); 4820 (arch_zone_highest_possible_pfn[i]
4821 << PAGE_SHIFT) - 1);
4830 } 4822 }
4831 4823
4832 /* Print out the PFNs ZONE_MOVABLE begins at in each node */ 4824 /* Print out the PFNs ZONE_MOVABLE begins at in each node */
4833 printk("Movable zone start PFN for each node\n"); 4825 printk("Movable zone start for each node\n");
4834 for (i = 0; i < MAX_NUMNODES; i++) { 4826 for (i = 0; i < MAX_NUMNODES; i++) {
4835 if (zone_movable_pfn[i]) 4827 if (zone_movable_pfn[i])
4836 printk(" Node %d: %lu\n", i, zone_movable_pfn[i]); 4828 printk(" Node %d: %#010lx\n", i,
4829 zone_movable_pfn[i] << PAGE_SHIFT);
4837 } 4830 }
4838 4831
4839 /* Print out the early_node_map[] */ 4832 /* Print out the early_node_map[] */
4840 printk("Early memory PFN ranges\n"); 4833 printk("Early memory node ranges\n");
4841 for_each_mem_pfn_range(i, MAX_NUMNODES, &start_pfn, &end_pfn, &nid) 4834 for_each_mem_pfn_range(i, MAX_NUMNODES, &start_pfn, &end_pfn, &nid)
4842 printk(" %3d: %0#10lx -> %0#10lx\n", nid, start_pfn, end_pfn); 4835 printk(" node %3d: [mem %#010lx-%#010lx]\n", nid,
4836 start_pfn << PAGE_SHIFT, (end_pfn << PAGE_SHIFT) - 1);
4843 4837
4844 /* Initialise every node */ 4838 /* Initialise every node */
4845 mminit_verify_pageflags_layout(); 4839 mminit_verify_pageflags_layout();
@@ -5657,7 +5651,7 @@ static int __alloc_contig_migrate_range(unsigned long start, unsigned long end)
5657 .nr_migratepages = 0, 5651 .nr_migratepages = 0,
5658 .order = -1, 5652 .order = -1,
5659 .zone = page_zone(pfn_to_page(start)), 5653 .zone = page_zone(pfn_to_page(start)),
5660 .sync = true, 5654 .mode = COMPACT_SYNC,
5661 }; 5655 };
5662 INIT_LIST_HEAD(&cc.migratepages); 5656 INIT_LIST_HEAD(&cc.migratepages);
5663 5657
@@ -5938,7 +5932,7 @@ bool is_free_buddy_page(struct page *page)
5938} 5932}
5939#endif 5933#endif
5940 5934
5941static struct trace_print_flags pageflag_names[] = { 5935static const struct trace_print_flags pageflag_names[] = {
5942 {1UL << PG_locked, "locked" }, 5936 {1UL << PG_locked, "locked" },
5943 {1UL << PG_error, "error" }, 5937 {1UL << PG_error, "error" },
5944 {1UL << PG_referenced, "referenced" }, 5938 {1UL << PG_referenced, "referenced" },
@@ -5973,7 +5967,9 @@ static struct trace_print_flags pageflag_names[] = {
5973#ifdef CONFIG_MEMORY_FAILURE 5967#ifdef CONFIG_MEMORY_FAILURE
5974 {1UL << PG_hwpoison, "hwpoison" }, 5968 {1UL << PG_hwpoison, "hwpoison" },
5975#endif 5969#endif
5976 {-1UL, NULL }, 5970#ifdef CONFIG_TRANSPARENT_HUGEPAGE
5971 {1UL << PG_compound_lock, "compound_lock" },
5972#endif
5977}; 5973};
5978 5974
5979static void dump_page_flags(unsigned long flags) 5975static void dump_page_flags(unsigned long flags)
@@ -5982,12 +5978,14 @@ static void dump_page_flags(unsigned long flags)
5982 unsigned long mask; 5978 unsigned long mask;
5983 int i; 5979 int i;
5984 5980
5981 BUILD_BUG_ON(ARRAY_SIZE(pageflag_names) != __NR_PAGEFLAGS);
5982
5985 printk(KERN_ALERT "page flags: %#lx(", flags); 5983 printk(KERN_ALERT "page flags: %#lx(", flags);
5986 5984
5987 /* remove zone id */ 5985 /* remove zone id */
5988 flags &= (1UL << NR_PAGEFLAGS) - 1; 5986 flags &= (1UL << NR_PAGEFLAGS) - 1;
5989 5987
5990 for (i = 0; pageflag_names[i].name && flags; i++) { 5988 for (i = 0; i < ARRAY_SIZE(pageflag_names) && flags; i++) {
5991 5989
5992 mask = pageflag_names[i].mask; 5990 mask = pageflag_names[i].mask;
5993 if ((flags & mask) != mask) 5991 if ((flags & mask) != mask)
diff --git a/mm/readahead.c b/mm/readahead.c
index cbcbb02f3e28..ea8f8fa21649 100644
--- a/mm/readahead.c
+++ b/mm/readahead.c
@@ -17,6 +17,8 @@
17#include <linux/task_io_accounting_ops.h> 17#include <linux/task_io_accounting_ops.h>
18#include <linux/pagevec.h> 18#include <linux/pagevec.h>
19#include <linux/pagemap.h> 19#include <linux/pagemap.h>
20#include <linux/syscalls.h>
21#include <linux/file.h>
20 22
21/* 23/*
22 * Initialise a struct file's readahead state. Assumes that the caller has 24 * Initialise a struct file's readahead state. Assumes that the caller has
@@ -562,3 +564,41 @@ page_cache_async_readahead(struct address_space *mapping,
562 ondemand_readahead(mapping, ra, filp, true, offset, req_size); 564 ondemand_readahead(mapping, ra, filp, true, offset, req_size);
563} 565}
564EXPORT_SYMBOL_GPL(page_cache_async_readahead); 566EXPORT_SYMBOL_GPL(page_cache_async_readahead);
567
568static ssize_t
569do_readahead(struct address_space *mapping, struct file *filp,
570 pgoff_t index, unsigned long nr)
571{
572 if (!mapping || !mapping->a_ops || !mapping->a_ops->readpage)
573 return -EINVAL;
574
575 force_page_cache_readahead(mapping, filp, index, nr);
576 return 0;
577}
578
579SYSCALL_DEFINE(readahead)(int fd, loff_t offset, size_t count)
580{
581 ssize_t ret;
582 struct file *file;
583
584 ret = -EBADF;
585 file = fget(fd);
586 if (file) {
587 if (file->f_mode & FMODE_READ) {
588 struct address_space *mapping = file->f_mapping;
589 pgoff_t start = offset >> PAGE_CACHE_SHIFT;
590 pgoff_t end = (offset + count - 1) >> PAGE_CACHE_SHIFT;
591 unsigned long len = end - start + 1;
592 ret = do_readahead(mapping, file, start, len);
593 }
594 fput(file);
595 }
596 return ret;
597}
598#ifdef CONFIG_HAVE_SYSCALL_WRAPPERS
599asmlinkage long SyS_readahead(long fd, loff_t offset, long count)
600{
601 return SYSC_readahead((int) fd, offset, (size_t) count);
602}
603SYSCALL_ALIAS(sys_readahead, SyS_readahead);
604#endif
diff --git a/mm/rmap.c b/mm/rmap.c
index 5b5ad584ffb7..0f3b7cda2a24 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -755,12 +755,6 @@ int page_referenced_one(struct page *page, struct vm_area_struct *vma,
755 pte_unmap_unlock(pte, ptl); 755 pte_unmap_unlock(pte, ptl);
756 } 756 }
757 757
758 /* Pretend the page is referenced if the task has the
759 swap token and is in the middle of a page fault. */
760 if (mm != current->mm && has_swap_token(mm) &&
761 rwsem_is_locked(&mm->mmap_sem))
762 referenced++;
763
764 (*mapcount)--; 758 (*mapcount)--;
765 759
766 if (referenced) 760 if (referenced)
diff --git a/mm/shmem.c b/mm/shmem.c
index be5af34a070d..d576b84d913c 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -53,6 +53,7 @@ static struct vfsmount *shm_mnt;
53#include <linux/blkdev.h> 53#include <linux/blkdev.h>
54#include <linux/pagevec.h> 54#include <linux/pagevec.h>
55#include <linux/percpu_counter.h> 55#include <linux/percpu_counter.h>
56#include <linux/falloc.h>
56#include <linux/splice.h> 57#include <linux/splice.h>
57#include <linux/security.h> 58#include <linux/security.h>
58#include <linux/swapops.h> 59#include <linux/swapops.h>
@@ -83,12 +84,25 @@ struct shmem_xattr {
83 char value[0]; 84 char value[0];
84}; 85};
85 86
87/*
88 * shmem_fallocate and shmem_writepage communicate via inode->i_private
89 * (with i_mutex making sure that it has only one user at a time):
90 * we would prefer not to enlarge the shmem inode just for that.
91 */
92struct shmem_falloc {
93 pgoff_t start; /* start of range currently being fallocated */
94 pgoff_t next; /* the next page offset to be fallocated */
95 pgoff_t nr_falloced; /* how many new pages have been fallocated */
96 pgoff_t nr_unswapped; /* how often writepage refused to swap out */
97};
98
86/* Flag allocation requirements to shmem_getpage */ 99/* Flag allocation requirements to shmem_getpage */
87enum sgp_type { 100enum sgp_type {
88 SGP_READ, /* don't exceed i_size, don't allocate page */ 101 SGP_READ, /* don't exceed i_size, don't allocate page */
89 SGP_CACHE, /* don't exceed i_size, may allocate page */ 102 SGP_CACHE, /* don't exceed i_size, may allocate page */
90 SGP_DIRTY, /* like SGP_CACHE, but set new page dirty */ 103 SGP_DIRTY, /* like SGP_CACHE, but set new page dirty */
91 SGP_WRITE, /* may exceed i_size, may allocate page */ 104 SGP_WRITE, /* may exceed i_size, may allocate !Uptodate page */
105 SGP_FALLOC, /* like SGP_WRITE, but make existing page Uptodate */
92}; 106};
93 107
94#ifdef CONFIG_TMPFS 108#ifdef CONFIG_TMPFS
@@ -103,6 +117,9 @@ static unsigned long shmem_default_max_inodes(void)
103} 117}
104#endif 118#endif
105 119
120static bool shmem_should_replace_page(struct page *page, gfp_t gfp);
121static int shmem_replace_page(struct page **pagep, gfp_t gfp,
122 struct shmem_inode_info *info, pgoff_t index);
106static int shmem_getpage_gfp(struct inode *inode, pgoff_t index, 123static int shmem_getpage_gfp(struct inode *inode, pgoff_t index,
107 struct page **pagep, enum sgp_type sgp, gfp_t gfp, int *fault_type); 124 struct page **pagep, enum sgp_type sgp, gfp_t gfp, int *fault_type);
108 125
@@ -423,27 +440,31 @@ void shmem_unlock_mapping(struct address_space *mapping)
423 440
424/* 441/*
425 * Remove range of pages and swap entries from radix tree, and free them. 442 * Remove range of pages and swap entries from radix tree, and free them.
443 * If !unfalloc, truncate or punch hole; if unfalloc, undo failed fallocate.
426 */ 444 */
427void shmem_truncate_range(struct inode *inode, loff_t lstart, loff_t lend) 445static void shmem_undo_range(struct inode *inode, loff_t lstart, loff_t lend,
446 bool unfalloc)
428{ 447{
429 struct address_space *mapping = inode->i_mapping; 448 struct address_space *mapping = inode->i_mapping;
430 struct shmem_inode_info *info = SHMEM_I(inode); 449 struct shmem_inode_info *info = SHMEM_I(inode);
431 pgoff_t start = (lstart + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; 450 pgoff_t start = (lstart + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
432 unsigned partial = lstart & (PAGE_CACHE_SIZE - 1); 451 pgoff_t end = (lend + 1) >> PAGE_CACHE_SHIFT;
433 pgoff_t end = (lend >> PAGE_CACHE_SHIFT); 452 unsigned int partial_start = lstart & (PAGE_CACHE_SIZE - 1);
453 unsigned int partial_end = (lend + 1) & (PAGE_CACHE_SIZE - 1);
434 struct pagevec pvec; 454 struct pagevec pvec;
435 pgoff_t indices[PAGEVEC_SIZE]; 455 pgoff_t indices[PAGEVEC_SIZE];
436 long nr_swaps_freed = 0; 456 long nr_swaps_freed = 0;
437 pgoff_t index; 457 pgoff_t index;
438 int i; 458 int i;
439 459
440 BUG_ON((lend & (PAGE_CACHE_SIZE - 1)) != (PAGE_CACHE_SIZE - 1)); 460 if (lend == -1)
461 end = -1; /* unsigned, so actually very big */
441 462
442 pagevec_init(&pvec, 0); 463 pagevec_init(&pvec, 0);
443 index = start; 464 index = start;
444 while (index <= end) { 465 while (index < end) {
445 pvec.nr = shmem_find_get_pages_and_swap(mapping, index, 466 pvec.nr = shmem_find_get_pages_and_swap(mapping, index,
446 min(end - index, (pgoff_t)PAGEVEC_SIZE - 1) + 1, 467 min(end - index, (pgoff_t)PAGEVEC_SIZE),
447 pvec.pages, indices); 468 pvec.pages, indices);
448 if (!pvec.nr) 469 if (!pvec.nr)
449 break; 470 break;
@@ -452,10 +473,12 @@ void shmem_truncate_range(struct inode *inode, loff_t lstart, loff_t lend)
452 struct page *page = pvec.pages[i]; 473 struct page *page = pvec.pages[i];
453 474
454 index = indices[i]; 475 index = indices[i];
455 if (index > end) 476 if (index >= end)
456 break; 477 break;
457 478
458 if (radix_tree_exceptional_entry(page)) { 479 if (radix_tree_exceptional_entry(page)) {
480 if (unfalloc)
481 continue;
459 nr_swaps_freed += !shmem_free_swap(mapping, 482 nr_swaps_freed += !shmem_free_swap(mapping,
460 index, page); 483 index, page);
461 continue; 484 continue;
@@ -463,9 +486,11 @@ void shmem_truncate_range(struct inode *inode, loff_t lstart, loff_t lend)
463 486
464 if (!trylock_page(page)) 487 if (!trylock_page(page))
465 continue; 488 continue;
466 if (page->mapping == mapping) { 489 if (!unfalloc || !PageUptodate(page)) {
467 VM_BUG_ON(PageWriteback(page)); 490 if (page->mapping == mapping) {
468 truncate_inode_page(mapping, page); 491 VM_BUG_ON(PageWriteback(page));
492 truncate_inode_page(mapping, page);
493 }
469 } 494 }
470 unlock_page(page); 495 unlock_page(page);
471 } 496 }
@@ -476,30 +501,47 @@ void shmem_truncate_range(struct inode *inode, loff_t lstart, loff_t lend)
476 index++; 501 index++;
477 } 502 }
478 503
479 if (partial) { 504 if (partial_start) {
480 struct page *page = NULL; 505 struct page *page = NULL;
481 shmem_getpage(inode, start - 1, &page, SGP_READ, NULL); 506 shmem_getpage(inode, start - 1, &page, SGP_READ, NULL);
482 if (page) { 507 if (page) {
483 zero_user_segment(page, partial, PAGE_CACHE_SIZE); 508 unsigned int top = PAGE_CACHE_SIZE;
509 if (start > end) {
510 top = partial_end;
511 partial_end = 0;
512 }
513 zero_user_segment(page, partial_start, top);
514 set_page_dirty(page);
515 unlock_page(page);
516 page_cache_release(page);
517 }
518 }
519 if (partial_end) {
520 struct page *page = NULL;
521 shmem_getpage(inode, end, &page, SGP_READ, NULL);
522 if (page) {
523 zero_user_segment(page, 0, partial_end);
484 set_page_dirty(page); 524 set_page_dirty(page);
485 unlock_page(page); 525 unlock_page(page);
486 page_cache_release(page); 526 page_cache_release(page);
487 } 527 }
488 } 528 }
529 if (start >= end)
530 return;
489 531
490 index = start; 532 index = start;
491 for ( ; ; ) { 533 for ( ; ; ) {
492 cond_resched(); 534 cond_resched();
493 pvec.nr = shmem_find_get_pages_and_swap(mapping, index, 535 pvec.nr = shmem_find_get_pages_and_swap(mapping, index,
494 min(end - index, (pgoff_t)PAGEVEC_SIZE - 1) + 1, 536 min(end - index, (pgoff_t)PAGEVEC_SIZE),
495 pvec.pages, indices); 537 pvec.pages, indices);
496 if (!pvec.nr) { 538 if (!pvec.nr) {
497 if (index == start) 539 if (index == start || unfalloc)
498 break; 540 break;
499 index = start; 541 index = start;
500 continue; 542 continue;
501 } 543 }
502 if (index == start && indices[0] > end) { 544 if ((index == start || unfalloc) && indices[0] >= end) {
503 shmem_deswap_pagevec(&pvec); 545 shmem_deswap_pagevec(&pvec);
504 pagevec_release(&pvec); 546 pagevec_release(&pvec);
505 break; 547 break;
@@ -509,19 +551,23 @@ void shmem_truncate_range(struct inode *inode, loff_t lstart, loff_t lend)
509 struct page *page = pvec.pages[i]; 551 struct page *page = pvec.pages[i];
510 552
511 index = indices[i]; 553 index = indices[i];
512 if (index > end) 554 if (index >= end)
513 break; 555 break;
514 556
515 if (radix_tree_exceptional_entry(page)) { 557 if (radix_tree_exceptional_entry(page)) {
558 if (unfalloc)
559 continue;
516 nr_swaps_freed += !shmem_free_swap(mapping, 560 nr_swaps_freed += !shmem_free_swap(mapping,
517 index, page); 561 index, page);
518 continue; 562 continue;
519 } 563 }
520 564
521 lock_page(page); 565 lock_page(page);
522 if (page->mapping == mapping) { 566 if (!unfalloc || !PageUptodate(page)) {
523 VM_BUG_ON(PageWriteback(page)); 567 if (page->mapping == mapping) {
524 truncate_inode_page(mapping, page); 568 VM_BUG_ON(PageWriteback(page));
569 truncate_inode_page(mapping, page);
570 }
525 } 571 }
526 unlock_page(page); 572 unlock_page(page);
527 } 573 }
@@ -535,7 +581,11 @@ void shmem_truncate_range(struct inode *inode, loff_t lstart, loff_t lend)
535 info->swapped -= nr_swaps_freed; 581 info->swapped -= nr_swaps_freed;
536 shmem_recalc_inode(inode); 582 shmem_recalc_inode(inode);
537 spin_unlock(&info->lock); 583 spin_unlock(&info->lock);
584}
538 585
586void shmem_truncate_range(struct inode *inode, loff_t lstart, loff_t lend)
587{
588 shmem_undo_range(inode, lstart, lend, false);
539 inode->i_ctime = inode->i_mtime = CURRENT_TIME; 589 inode->i_ctime = inode->i_mtime = CURRENT_TIME;
540} 590}
541EXPORT_SYMBOL_GPL(shmem_truncate_range); 591EXPORT_SYMBOL_GPL(shmem_truncate_range);
@@ -604,12 +654,13 @@ static void shmem_evict_inode(struct inode *inode)
604 * If swap found in inode, free it and move page from swapcache to filecache. 654 * If swap found in inode, free it and move page from swapcache to filecache.
605 */ 655 */
606static int shmem_unuse_inode(struct shmem_inode_info *info, 656static int shmem_unuse_inode(struct shmem_inode_info *info,
607 swp_entry_t swap, struct page *page) 657 swp_entry_t swap, struct page **pagep)
608{ 658{
609 struct address_space *mapping = info->vfs_inode.i_mapping; 659 struct address_space *mapping = info->vfs_inode.i_mapping;
610 void *radswap; 660 void *radswap;
611 pgoff_t index; 661 pgoff_t index;
612 int error; 662 gfp_t gfp;
663 int error = 0;
613 664
614 radswap = swp_to_radix_entry(swap); 665 radswap = swp_to_radix_entry(swap);
615 index = radix_tree_locate_item(&mapping->page_tree, radswap); 666 index = radix_tree_locate_item(&mapping->page_tree, radswap);
@@ -625,22 +676,37 @@ static int shmem_unuse_inode(struct shmem_inode_info *info,
625 if (shmem_swaplist.next != &info->swaplist) 676 if (shmem_swaplist.next != &info->swaplist)
626 list_move_tail(&shmem_swaplist, &info->swaplist); 677 list_move_tail(&shmem_swaplist, &info->swaplist);
627 678
679 gfp = mapping_gfp_mask(mapping);
680 if (shmem_should_replace_page(*pagep, gfp)) {
681 mutex_unlock(&shmem_swaplist_mutex);
682 error = shmem_replace_page(pagep, gfp, info, index);
683 mutex_lock(&shmem_swaplist_mutex);
684 /*
685 * We needed to drop mutex to make that restrictive page
686 * allocation; but the inode might already be freed by now,
687 * and we cannot refer to inode or mapping or info to check.
688 * However, we do hold page lock on the PageSwapCache page,
689 * so can check if that still has our reference remaining.
690 */
691 if (!page_swapcount(*pagep))
692 error = -ENOENT;
693 }
694
628 /* 695 /*
629 * We rely on shmem_swaplist_mutex, not only to protect the swaplist, 696 * We rely on shmem_swaplist_mutex, not only to protect the swaplist,
630 * but also to hold up shmem_evict_inode(): so inode cannot be freed 697 * but also to hold up shmem_evict_inode(): so inode cannot be freed
631 * beneath us (pagelock doesn't help until the page is in pagecache). 698 * beneath us (pagelock doesn't help until the page is in pagecache).
632 */ 699 */
633 error = shmem_add_to_page_cache(page, mapping, index, 700 if (!error)
701 error = shmem_add_to_page_cache(*pagep, mapping, index,
634 GFP_NOWAIT, radswap); 702 GFP_NOWAIT, radswap);
635 /* which does mem_cgroup_uncharge_cache_page on error */
636
637 if (error != -ENOMEM) { 703 if (error != -ENOMEM) {
638 /* 704 /*
639 * Truncation and eviction use free_swap_and_cache(), which 705 * Truncation and eviction use free_swap_and_cache(), which
640 * only does trylock page: if we raced, best clean up here. 706 * only does trylock page: if we raced, best clean up here.
641 */ 707 */
642 delete_from_swap_cache(page); 708 delete_from_swap_cache(*pagep);
643 set_page_dirty(page); 709 set_page_dirty(*pagep);
644 if (!error) { 710 if (!error) {
645 spin_lock(&info->lock); 711 spin_lock(&info->lock);
646 info->swapped--; 712 info->swapped--;
@@ -660,7 +726,14 @@ int shmem_unuse(swp_entry_t swap, struct page *page)
660 struct list_head *this, *next; 726 struct list_head *this, *next;
661 struct shmem_inode_info *info; 727 struct shmem_inode_info *info;
662 int found = 0; 728 int found = 0;
663 int error; 729 int error = 0;
730
731 /*
732 * There's a faint possibility that swap page was replaced before
733 * caller locked it: it will come back later with the right page.
734 */
735 if (unlikely(!PageSwapCache(page)))
736 goto out;
664 737
665 /* 738 /*
666 * Charge page using GFP_KERNEL while we can wait, before taking 739 * Charge page using GFP_KERNEL while we can wait, before taking
@@ -676,7 +749,7 @@ int shmem_unuse(swp_entry_t swap, struct page *page)
676 list_for_each_safe(this, next, &shmem_swaplist) { 749 list_for_each_safe(this, next, &shmem_swaplist) {
677 info = list_entry(this, struct shmem_inode_info, swaplist); 750 info = list_entry(this, struct shmem_inode_info, swaplist);
678 if (info->swapped) 751 if (info->swapped)
679 found = shmem_unuse_inode(info, swap, page); 752 found = shmem_unuse_inode(info, swap, &page);
680 else 753 else
681 list_del_init(&info->swaplist); 754 list_del_init(&info->swaplist);
682 cond_resched(); 755 cond_resched();
@@ -685,8 +758,6 @@ int shmem_unuse(swp_entry_t swap, struct page *page)
685 } 758 }
686 mutex_unlock(&shmem_swaplist_mutex); 759 mutex_unlock(&shmem_swaplist_mutex);
687 760
688 if (!found)
689 mem_cgroup_uncharge_cache_page(page);
690 if (found < 0) 761 if (found < 0)
691 error = found; 762 error = found;
692out: 763out:
@@ -727,6 +798,38 @@ static int shmem_writepage(struct page *page, struct writeback_control *wbc)
727 WARN_ON_ONCE(1); /* Still happens? Tell us about it! */ 798 WARN_ON_ONCE(1); /* Still happens? Tell us about it! */
728 goto redirty; 799 goto redirty;
729 } 800 }
801
802 /*
803 * This is somewhat ridiculous, but without plumbing a SWAP_MAP_FALLOC
804 * value into swapfile.c, the only way we can correctly account for a
805 * fallocated page arriving here is now to initialize it and write it.
806 *
807 * That's okay for a page already fallocated earlier, but if we have
808 * not yet completed the fallocation, then (a) we want to keep track
809 * of this page in case we have to undo it, and (b) it may not be a
810 * good idea to continue anyway, once we're pushing into swap. So
811 * reactivate the page, and let shmem_fallocate() quit when too many.
812 */
813 if (!PageUptodate(page)) {
814 if (inode->i_private) {
815 struct shmem_falloc *shmem_falloc;
816 spin_lock(&inode->i_lock);
817 shmem_falloc = inode->i_private;
818 if (shmem_falloc &&
819 index >= shmem_falloc->start &&
820 index < shmem_falloc->next)
821 shmem_falloc->nr_unswapped++;
822 else
823 shmem_falloc = NULL;
824 spin_unlock(&inode->i_lock);
825 if (shmem_falloc)
826 goto redirty;
827 }
828 clear_highpage(page);
829 flush_dcache_page(page);
830 SetPageUptodate(page);
831 }
832
730 swap = get_swap_page(); 833 swap = get_swap_page();
731 if (!swap.val) 834 if (!swap.val)
732 goto redirty; 835 goto redirty;
@@ -856,6 +959,84 @@ static inline struct mempolicy *shmem_get_sbmpol(struct shmem_sb_info *sbinfo)
856#endif 959#endif
857 960
858/* 961/*
962 * When a page is moved from swapcache to shmem filecache (either by the
963 * usual swapin of shmem_getpage_gfp(), or by the less common swapoff of
964 * shmem_unuse_inode()), it may have been read in earlier from swap, in
965 * ignorance of the mapping it belongs to. If that mapping has special
966 * constraints (like the gma500 GEM driver, which requires RAM below 4GB),
967 * we may need to copy to a suitable page before moving to filecache.
968 *
969 * In a future release, this may well be extended to respect cpuset and
970 * NUMA mempolicy, and applied also to anonymous pages in do_swap_page();
971 * but for now it is a simple matter of zone.
972 */
973static bool shmem_should_replace_page(struct page *page, gfp_t gfp)
974{
975 return page_zonenum(page) > gfp_zone(gfp);
976}
977
978static int shmem_replace_page(struct page **pagep, gfp_t gfp,
979 struct shmem_inode_info *info, pgoff_t index)
980{
981 struct page *oldpage, *newpage;
982 struct address_space *swap_mapping;
983 pgoff_t swap_index;
984 int error;
985
986 oldpage = *pagep;
987 swap_index = page_private(oldpage);
988 swap_mapping = page_mapping(oldpage);
989
990 /*
991 * We have arrived here because our zones are constrained, so don't
992 * limit chance of success by further cpuset and node constraints.
993 */
994 gfp &= ~GFP_CONSTRAINT_MASK;
995 newpage = shmem_alloc_page(gfp, info, index);
996 if (!newpage)
997 return -ENOMEM;
998 VM_BUG_ON(shmem_should_replace_page(newpage, gfp));
999
1000 *pagep = newpage;
1001 page_cache_get(newpage);
1002 copy_highpage(newpage, oldpage);
1003
1004 VM_BUG_ON(!PageLocked(oldpage));
1005 __set_page_locked(newpage);
1006 VM_BUG_ON(!PageUptodate(oldpage));
1007 SetPageUptodate(newpage);
1008 VM_BUG_ON(!PageSwapBacked(oldpage));
1009 SetPageSwapBacked(newpage);
1010 VM_BUG_ON(!swap_index);
1011 set_page_private(newpage, swap_index);
1012 VM_BUG_ON(!PageSwapCache(oldpage));
1013 SetPageSwapCache(newpage);
1014
1015 /*
1016 * Our caller will very soon move newpage out of swapcache, but it's
1017 * a nice clean interface for us to replace oldpage by newpage there.
1018 */
1019 spin_lock_irq(&swap_mapping->tree_lock);
1020 error = shmem_radix_tree_replace(swap_mapping, swap_index, oldpage,
1021 newpage);
1022 __inc_zone_page_state(newpage, NR_FILE_PAGES);
1023 __dec_zone_page_state(oldpage, NR_FILE_PAGES);
1024 spin_unlock_irq(&swap_mapping->tree_lock);
1025 BUG_ON(error);
1026
1027 mem_cgroup_replace_page_cache(oldpage, newpage);
1028 lru_cache_add_anon(newpage);
1029
1030 ClearPageSwapCache(oldpage);
1031 set_page_private(oldpage, 0);
1032
1033 unlock_page(oldpage);
1034 page_cache_release(oldpage);
1035 page_cache_release(oldpage);
1036 return 0;
1037}
1038
1039/*
859 * shmem_getpage_gfp - find page in cache, or get from swap, or allocate 1040 * shmem_getpage_gfp - find page in cache, or get from swap, or allocate
860 * 1041 *
861 * If we allocate a new one we do not mark it dirty. That's up to the 1042 * If we allocate a new one we do not mark it dirty. That's up to the
@@ -872,6 +1053,7 @@ static int shmem_getpage_gfp(struct inode *inode, pgoff_t index,
872 swp_entry_t swap; 1053 swp_entry_t swap;
873 int error; 1054 int error;
874 int once = 0; 1055 int once = 0;
1056 int alloced = 0;
875 1057
876 if (index > (MAX_LFS_FILESIZE >> PAGE_CACHE_SHIFT)) 1058 if (index > (MAX_LFS_FILESIZE >> PAGE_CACHE_SHIFT))
877 return -EFBIG; 1059 return -EFBIG;
@@ -883,19 +1065,21 @@ repeat:
883 page = NULL; 1065 page = NULL;
884 } 1066 }
885 1067
886 if (sgp != SGP_WRITE && 1068 if (sgp != SGP_WRITE && sgp != SGP_FALLOC &&
887 ((loff_t)index << PAGE_CACHE_SHIFT) >= i_size_read(inode)) { 1069 ((loff_t)index << PAGE_CACHE_SHIFT) >= i_size_read(inode)) {
888 error = -EINVAL; 1070 error = -EINVAL;
889 goto failed; 1071 goto failed;
890 } 1072 }
891 1073
1074 /* fallocated page? */
1075 if (page && !PageUptodate(page)) {
1076 if (sgp != SGP_READ)
1077 goto clear;
1078 unlock_page(page);
1079 page_cache_release(page);
1080 page = NULL;
1081 }
892 if (page || (sgp == SGP_READ && !swap.val)) { 1082 if (page || (sgp == SGP_READ && !swap.val)) {
893 /*
894 * Once we can get the page lock, it must be uptodate:
895 * if there were an error in reading back from swap,
896 * the page would not be inserted into the filecache.
897 */
898 BUG_ON(page && !PageUptodate(page));
899 *pagep = page; 1083 *pagep = page;
900 return 0; 1084 return 0;
901 } 1085 }
@@ -923,19 +1107,20 @@ repeat:
923 1107
924 /* We have to do this with page locked to prevent races */ 1108 /* We have to do this with page locked to prevent races */
925 lock_page(page); 1109 lock_page(page);
1110 if (!PageSwapCache(page) || page->mapping) {
1111 error = -EEXIST; /* try again */
1112 goto failed;
1113 }
926 if (!PageUptodate(page)) { 1114 if (!PageUptodate(page)) {
927 error = -EIO; 1115 error = -EIO;
928 goto failed; 1116 goto failed;
929 } 1117 }
930 wait_on_page_writeback(page); 1118 wait_on_page_writeback(page);
931 1119
932 /* Someone may have already done it for us */ 1120 if (shmem_should_replace_page(page, gfp)) {
933 if (page->mapping) { 1121 error = shmem_replace_page(&page, gfp, info, index);
934 if (page->mapping == mapping && 1122 if (error)
935 page->index == index) 1123 goto failed;
936 goto done;
937 error = -EEXIST;
938 goto failed;
939 } 1124 }
940 1125
941 error = mem_cgroup_cache_charge(page, current->mm, 1126 error = mem_cgroup_cache_charge(page, current->mm,
@@ -991,19 +1176,36 @@ repeat:
991 inode->i_blocks += BLOCKS_PER_PAGE; 1176 inode->i_blocks += BLOCKS_PER_PAGE;
992 shmem_recalc_inode(inode); 1177 shmem_recalc_inode(inode);
993 spin_unlock(&info->lock); 1178 spin_unlock(&info->lock);
1179 alloced = true;
994 1180
995 clear_highpage(page); 1181 /*
996 flush_dcache_page(page); 1182 * Let SGP_FALLOC use the SGP_WRITE optimization on a new page.
997 SetPageUptodate(page); 1183 */
1184 if (sgp == SGP_FALLOC)
1185 sgp = SGP_WRITE;
1186clear:
1187 /*
1188 * Let SGP_WRITE caller clear ends if write does not fill page;
1189 * but SGP_FALLOC on a page fallocated earlier must initialize
1190 * it now, lest undo on failure cancel our earlier guarantee.
1191 */
1192 if (sgp != SGP_WRITE) {
1193 clear_highpage(page);
1194 flush_dcache_page(page);
1195 SetPageUptodate(page);
1196 }
998 if (sgp == SGP_DIRTY) 1197 if (sgp == SGP_DIRTY)
999 set_page_dirty(page); 1198 set_page_dirty(page);
1000 } 1199 }
1001done: 1200
1002 /* Perhaps the file has been truncated since we checked */ 1201 /* Perhaps the file has been truncated since we checked */
1003 if (sgp != SGP_WRITE && 1202 if (sgp != SGP_WRITE && sgp != SGP_FALLOC &&
1004 ((loff_t)index << PAGE_CACHE_SHIFT) >= i_size_read(inode)) { 1203 ((loff_t)index << PAGE_CACHE_SHIFT) >= i_size_read(inode)) {
1005 error = -EINVAL; 1204 error = -EINVAL;
1006 goto trunc; 1205 if (alloced)
1206 goto trunc;
1207 else
1208 goto failed;
1007 } 1209 }
1008 *pagep = page; 1210 *pagep = page;
1009 return 0; 1211 return 0;
@@ -1012,6 +1214,7 @@ done:
1012 * Error recovery. 1214 * Error recovery.
1013 */ 1215 */
1014trunc: 1216trunc:
1217 info = SHMEM_I(inode);
1015 ClearPageDirty(page); 1218 ClearPageDirty(page);
1016 delete_from_page_cache(page); 1219 delete_from_page_cache(page);
1017 spin_lock(&info->lock); 1220 spin_lock(&info->lock);
@@ -1019,6 +1222,7 @@ trunc:
1019 inode->i_blocks -= BLOCKS_PER_PAGE; 1222 inode->i_blocks -= BLOCKS_PER_PAGE;
1020 spin_unlock(&info->lock); 1223 spin_unlock(&info->lock);
1021decused: 1224decused:
1225 sbinfo = SHMEM_SB(inode->i_sb);
1022 if (sbinfo->max_blocks) 1226 if (sbinfo->max_blocks)
1023 percpu_counter_add(&sbinfo->used_blocks, -1); 1227 percpu_counter_add(&sbinfo->used_blocks, -1);
1024unacct: 1228unacct:
@@ -1204,6 +1408,14 @@ shmem_write_end(struct file *file, struct address_space *mapping,
1204 if (pos + copied > inode->i_size) 1408 if (pos + copied > inode->i_size)
1205 i_size_write(inode, pos + copied); 1409 i_size_write(inode, pos + copied);
1206 1410
1411 if (!PageUptodate(page)) {
1412 if (copied < PAGE_CACHE_SIZE) {
1413 unsigned from = pos & (PAGE_CACHE_SIZE - 1);
1414 zero_user_segments(page, 0, from,
1415 from + copied, PAGE_CACHE_SIZE);
1416 }
1417 SetPageUptodate(page);
1418 }
1207 set_page_dirty(page); 1419 set_page_dirty(page);
1208 unlock_page(page); 1420 unlock_page(page);
1209 page_cache_release(page); 1421 page_cache_release(page);
@@ -1462,6 +1674,199 @@ static ssize_t shmem_file_splice_read(struct file *in, loff_t *ppos,
1462 return error; 1674 return error;
1463} 1675}
1464 1676
1677/*
1678 * llseek SEEK_DATA or SEEK_HOLE through the radix_tree.
1679 */
1680static pgoff_t shmem_seek_hole_data(struct address_space *mapping,
1681 pgoff_t index, pgoff_t end, int origin)
1682{
1683 struct page *page;
1684 struct pagevec pvec;
1685 pgoff_t indices[PAGEVEC_SIZE];
1686 bool done = false;
1687 int i;
1688
1689 pagevec_init(&pvec, 0);
1690 pvec.nr = 1; /* start small: we may be there already */
1691 while (!done) {
1692 pvec.nr = shmem_find_get_pages_and_swap(mapping, index,
1693 pvec.nr, pvec.pages, indices);
1694 if (!pvec.nr) {
1695 if (origin == SEEK_DATA)
1696 index = end;
1697 break;
1698 }
1699 for (i = 0; i < pvec.nr; i++, index++) {
1700 if (index < indices[i]) {
1701 if (origin == SEEK_HOLE) {
1702 done = true;
1703 break;
1704 }
1705 index = indices[i];
1706 }
1707 page = pvec.pages[i];
1708 if (page && !radix_tree_exceptional_entry(page)) {
1709 if (!PageUptodate(page))
1710 page = NULL;
1711 }
1712 if (index >= end ||
1713 (page && origin == SEEK_DATA) ||
1714 (!page && origin == SEEK_HOLE)) {
1715 done = true;
1716 break;
1717 }
1718 }
1719 shmem_deswap_pagevec(&pvec);
1720 pagevec_release(&pvec);
1721 pvec.nr = PAGEVEC_SIZE;
1722 cond_resched();
1723 }
1724 return index;
1725}
1726
1727static loff_t shmem_file_llseek(struct file *file, loff_t offset, int origin)
1728{
1729 struct address_space *mapping;
1730 struct inode *inode;
1731 pgoff_t start, end;
1732 loff_t new_offset;
1733
1734 if (origin != SEEK_DATA && origin != SEEK_HOLE)
1735 return generic_file_llseek_size(file, offset, origin,
1736 MAX_LFS_FILESIZE);
1737 mapping = file->f_mapping;
1738 inode = mapping->host;
1739 mutex_lock(&inode->i_mutex);
1740 /* We're holding i_mutex so we can access i_size directly */
1741
1742 if (offset < 0)
1743 offset = -EINVAL;
1744 else if (offset >= inode->i_size)
1745 offset = -ENXIO;
1746 else {
1747 start = offset >> PAGE_CACHE_SHIFT;
1748 end = (inode->i_size + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
1749 new_offset = shmem_seek_hole_data(mapping, start, end, origin);
1750 new_offset <<= PAGE_CACHE_SHIFT;
1751 if (new_offset > offset) {
1752 if (new_offset < inode->i_size)
1753 offset = new_offset;
1754 else if (origin == SEEK_DATA)
1755 offset = -ENXIO;
1756 else
1757 offset = inode->i_size;
1758 }
1759 }
1760
1761 if (offset >= 0 && offset != file->f_pos) {
1762 file->f_pos = offset;
1763 file->f_version = 0;
1764 }
1765 mutex_unlock(&inode->i_mutex);
1766 return offset;
1767}
1768
1769static long shmem_fallocate(struct file *file, int mode, loff_t offset,
1770 loff_t len)
1771{
1772 struct inode *inode = file->f_path.dentry->d_inode;
1773 struct shmem_sb_info *sbinfo = SHMEM_SB(inode->i_sb);
1774 struct shmem_falloc shmem_falloc;
1775 pgoff_t start, index, end;
1776 int error;
1777
1778 mutex_lock(&inode->i_mutex);
1779
1780 if (mode & FALLOC_FL_PUNCH_HOLE) {
1781 struct address_space *mapping = file->f_mapping;
1782 loff_t unmap_start = round_up(offset, PAGE_SIZE);
1783 loff_t unmap_end = round_down(offset + len, PAGE_SIZE) - 1;
1784
1785 if ((u64)unmap_end > (u64)unmap_start)
1786 unmap_mapping_range(mapping, unmap_start,
1787 1 + unmap_end - unmap_start, 0);
1788 shmem_truncate_range(inode, offset, offset + len - 1);
1789 /* No need to unmap again: hole-punching leaves COWed pages */
1790 error = 0;
1791 goto out;
1792 }
1793
1794 /* We need to check rlimit even when FALLOC_FL_KEEP_SIZE */
1795 error = inode_newsize_ok(inode, offset + len);
1796 if (error)
1797 goto out;
1798
1799 start = offset >> PAGE_CACHE_SHIFT;
1800 end = (offset + len + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
1801 /* Try to avoid a swapstorm if len is impossible to satisfy */
1802 if (sbinfo->max_blocks && end - start > sbinfo->max_blocks) {
1803 error = -ENOSPC;
1804 goto out;
1805 }
1806
1807 shmem_falloc.start = start;
1808 shmem_falloc.next = start;
1809 shmem_falloc.nr_falloced = 0;
1810 shmem_falloc.nr_unswapped = 0;
1811 spin_lock(&inode->i_lock);
1812 inode->i_private = &shmem_falloc;
1813 spin_unlock(&inode->i_lock);
1814
1815 for (index = start; index < end; index++) {
1816 struct page *page;
1817
1818 /*
1819 * Good, the fallocate(2) manpage permits EINTR: we may have
1820 * been interrupted because we are using up too much memory.
1821 */
1822 if (signal_pending(current))
1823 error = -EINTR;
1824 else if (shmem_falloc.nr_unswapped > shmem_falloc.nr_falloced)
1825 error = -ENOMEM;
1826 else
1827 error = shmem_getpage(inode, index, &page, SGP_FALLOC,
1828 NULL);
1829 if (error) {
1830 /* Remove the !PageUptodate pages we added */
1831 shmem_undo_range(inode,
1832 (loff_t)start << PAGE_CACHE_SHIFT,
1833 (loff_t)index << PAGE_CACHE_SHIFT, true);
1834 goto undone;
1835 }
1836
1837 /*
1838 * Inform shmem_writepage() how far we have reached.
1839 * No need for lock or barrier: we have the page lock.
1840 */
1841 shmem_falloc.next++;
1842 if (!PageUptodate(page))
1843 shmem_falloc.nr_falloced++;
1844
1845 /*
1846 * If !PageUptodate, leave it that way so that freeable pages
1847 * can be recognized if we need to rollback on error later.
1848 * But set_page_dirty so that memory pressure will swap rather
1849 * than free the pages we are allocating (and SGP_CACHE pages
1850 * might still be clean: we now need to mark those dirty too).
1851 */
1852 set_page_dirty(page);
1853 unlock_page(page);
1854 page_cache_release(page);
1855 cond_resched();
1856 }
1857
1858 if (!(mode & FALLOC_FL_KEEP_SIZE) && offset + len > inode->i_size)
1859 i_size_write(inode, offset + len);
1860 inode->i_ctime = CURRENT_TIME;
1861undone:
1862 spin_lock(&inode->i_lock);
1863 inode->i_private = NULL;
1864 spin_unlock(&inode->i_lock);
1865out:
1866 mutex_unlock(&inode->i_mutex);
1867 return error;
1868}
1869
1465static int shmem_statfs(struct dentry *dentry, struct kstatfs *buf) 1870static int shmem_statfs(struct dentry *dentry, struct kstatfs *buf)
1466{ 1871{
1467 struct shmem_sb_info *sbinfo = SHMEM_SB(dentry->d_sb); 1872 struct shmem_sb_info *sbinfo = SHMEM_SB(dentry->d_sb);
@@ -1665,6 +2070,7 @@ static int shmem_symlink(struct inode *dir, struct dentry *dentry, const char *s
1665 kaddr = kmap_atomic(page); 2070 kaddr = kmap_atomic(page);
1666 memcpy(kaddr, symname, len); 2071 memcpy(kaddr, symname, len);
1667 kunmap_atomic(kaddr); 2072 kunmap_atomic(kaddr);
2073 SetPageUptodate(page);
1668 set_page_dirty(page); 2074 set_page_dirty(page);
1669 unlock_page(page); 2075 unlock_page(page);
1670 page_cache_release(page); 2076 page_cache_release(page);
@@ -2270,6 +2676,7 @@ int shmem_fill_super(struct super_block *sb, void *data, int silent)
2270 } 2676 }
2271 } 2677 }
2272 sb->s_export_op = &shmem_export_ops; 2678 sb->s_export_op = &shmem_export_ops;
2679 sb->s_flags |= MS_NOSEC;
2273#else 2680#else
2274 sb->s_flags |= MS_NOUSER; 2681 sb->s_flags |= MS_NOUSER;
2275#endif 2682#endif
@@ -2364,7 +2771,7 @@ static const struct address_space_operations shmem_aops = {
2364static const struct file_operations shmem_file_operations = { 2771static const struct file_operations shmem_file_operations = {
2365 .mmap = shmem_mmap, 2772 .mmap = shmem_mmap,
2366#ifdef CONFIG_TMPFS 2773#ifdef CONFIG_TMPFS
2367 .llseek = generic_file_llseek, 2774 .llseek = shmem_file_llseek,
2368 .read = do_sync_read, 2775 .read = do_sync_read,
2369 .write = do_sync_write, 2776 .write = do_sync_write,
2370 .aio_read = shmem_file_aio_read, 2777 .aio_read = shmem_file_aio_read,
@@ -2372,12 +2779,12 @@ static const struct file_operations shmem_file_operations = {
2372 .fsync = noop_fsync, 2779 .fsync = noop_fsync,
2373 .splice_read = shmem_file_splice_read, 2780 .splice_read = shmem_file_splice_read,
2374 .splice_write = generic_file_splice_write, 2781 .splice_write = generic_file_splice_write,
2782 .fallocate = shmem_fallocate,
2375#endif 2783#endif
2376}; 2784};
2377 2785
2378static const struct inode_operations shmem_inode_operations = { 2786static const struct inode_operations shmem_inode_operations = {
2379 .setattr = shmem_setattr, 2787 .setattr = shmem_setattr,
2380 .truncate_range = shmem_truncate_range,
2381#ifdef CONFIG_TMPFS_XATTR 2788#ifdef CONFIG_TMPFS_XATTR
2382 .setxattr = shmem_setxattr, 2789 .setxattr = shmem_setxattr,
2383 .getxattr = shmem_getxattr, 2790 .getxattr = shmem_getxattr,
diff --git a/mm/sparse.c b/mm/sparse.c
index a8bc7d364deb..6a4bf9160e85 100644
--- a/mm/sparse.c
+++ b/mm/sparse.c
@@ -273,10 +273,10 @@ static unsigned long *__kmalloc_section_usemap(void)
273#ifdef CONFIG_MEMORY_HOTREMOVE 273#ifdef CONFIG_MEMORY_HOTREMOVE
274static unsigned long * __init 274static unsigned long * __init
275sparse_early_usemaps_alloc_pgdat_section(struct pglist_data *pgdat, 275sparse_early_usemaps_alloc_pgdat_section(struct pglist_data *pgdat,
276 unsigned long count) 276 unsigned long size)
277{ 277{
278 unsigned long section_nr; 278 pg_data_t *host_pgdat;
279 279 unsigned long goal;
280 /* 280 /*
281 * A page may contain usemaps for other sections preventing the 281 * A page may contain usemaps for other sections preventing the
282 * page being freed and making a section unremovable while 282 * page being freed and making a section unremovable while
@@ -287,8 +287,10 @@ sparse_early_usemaps_alloc_pgdat_section(struct pglist_data *pgdat,
287 * from the same section as the pgdat where possible to avoid 287 * from the same section as the pgdat where possible to avoid
288 * this problem. 288 * this problem.
289 */ 289 */
290 section_nr = pfn_to_section_nr(__pa(pgdat) >> PAGE_SHIFT); 290 goal = __pa(pgdat) & PAGE_SECTION_MASK;
291 return alloc_bootmem_section(usemap_size() * count, section_nr); 291 host_pgdat = NODE_DATA(early_pfn_to_nid(goal >> PAGE_SHIFT));
292 return __alloc_bootmem_node_nopanic(host_pgdat, size,
293 SMP_CACHE_BYTES, goal);
292} 294}
293 295
294static void __init check_usemap_section_nr(int nid, unsigned long *usemap) 296static void __init check_usemap_section_nr(int nid, unsigned long *usemap)
@@ -332,9 +334,9 @@ static void __init check_usemap_section_nr(int nid, unsigned long *usemap)
332#else 334#else
333static unsigned long * __init 335static unsigned long * __init
334sparse_early_usemaps_alloc_pgdat_section(struct pglist_data *pgdat, 336sparse_early_usemaps_alloc_pgdat_section(struct pglist_data *pgdat,
335 unsigned long count) 337 unsigned long size)
336{ 338{
337 return NULL; 339 return alloc_bootmem_node_nopanic(pgdat, size);
338} 340}
339 341
340static void __init check_usemap_section_nr(int nid, unsigned long *usemap) 342static void __init check_usemap_section_nr(int nid, unsigned long *usemap)
@@ -352,13 +354,10 @@ static void __init sparse_early_usemaps_alloc_node(unsigned long**usemap_map,
352 int size = usemap_size(); 354 int size = usemap_size();
353 355
354 usemap = sparse_early_usemaps_alloc_pgdat_section(NODE_DATA(nodeid), 356 usemap = sparse_early_usemaps_alloc_pgdat_section(NODE_DATA(nodeid),
355 usemap_count); 357 size * usemap_count);
356 if (!usemap) { 358 if (!usemap) {
357 usemap = alloc_bootmem_node(NODE_DATA(nodeid), size * usemap_count); 359 printk(KERN_WARNING "%s: allocation failed\n", __func__);
358 if (!usemap) { 360 return;
359 printk(KERN_WARNING "%s: allocation failed\n", __func__);
360 return;
361 }
362 } 361 }
363 362
364 for (pnum = pnum_begin; pnum < pnum_end; pnum++) { 363 for (pnum = pnum_begin; pnum < pnum_end; pnum++) {
diff --git a/mm/swap.c b/mm/swap.c
index 5c13f1338972..4e7e2ec67078 100644
--- a/mm/swap.c
+++ b/mm/swap.c
@@ -47,13 +47,15 @@ static DEFINE_PER_CPU(struct pagevec, lru_deactivate_pvecs);
47static void __page_cache_release(struct page *page) 47static void __page_cache_release(struct page *page)
48{ 48{
49 if (PageLRU(page)) { 49 if (PageLRU(page)) {
50 unsigned long flags;
51 struct zone *zone = page_zone(page); 50 struct zone *zone = page_zone(page);
51 struct lruvec *lruvec;
52 unsigned long flags;
52 53
53 spin_lock_irqsave(&zone->lru_lock, flags); 54 spin_lock_irqsave(&zone->lru_lock, flags);
55 lruvec = mem_cgroup_page_lruvec(page, zone);
54 VM_BUG_ON(!PageLRU(page)); 56 VM_BUG_ON(!PageLRU(page));
55 __ClearPageLRU(page); 57 __ClearPageLRU(page);
56 del_page_from_lru_list(zone, page, page_off_lru(page)); 58 del_page_from_lru_list(page, lruvec, page_off_lru(page));
57 spin_unlock_irqrestore(&zone->lru_lock, flags); 59 spin_unlock_irqrestore(&zone->lru_lock, flags);
58 } 60 }
59} 61}
@@ -82,6 +84,25 @@ static void put_compound_page(struct page *page)
82 if (likely(page != page_head && 84 if (likely(page != page_head &&
83 get_page_unless_zero(page_head))) { 85 get_page_unless_zero(page_head))) {
84 unsigned long flags; 86 unsigned long flags;
87
88 /*
89 * THP can not break up slab pages so avoid taking
90 * compound_lock(). Slab performs non-atomic bit ops
91 * on page->flags for better performance. In particular
92 * slab_unlock() in slub used to be a hot path. It is
93 * still hot on arches that do not support
94 * this_cpu_cmpxchg_double().
95 */
96 if (PageSlab(page_head)) {
97 if (PageTail(page)) {
98 if (put_page_testzero(page_head))
99 VM_BUG_ON(1);
100
101 atomic_dec(&page->_mapcount);
102 goto skip_lock_tail;
103 } else
104 goto skip_lock;
105 }
85 /* 106 /*
86 * page_head wasn't a dangling pointer but it 107 * page_head wasn't a dangling pointer but it
87 * may not be a head page anymore by the time 108 * may not be a head page anymore by the time
@@ -92,10 +113,10 @@ static void put_compound_page(struct page *page)
92 if (unlikely(!PageTail(page))) { 113 if (unlikely(!PageTail(page))) {
93 /* __split_huge_page_refcount run before us */ 114 /* __split_huge_page_refcount run before us */
94 compound_unlock_irqrestore(page_head, flags); 115 compound_unlock_irqrestore(page_head, flags);
95 VM_BUG_ON(PageHead(page_head)); 116skip_lock:
96 if (put_page_testzero(page_head)) 117 if (put_page_testzero(page_head))
97 __put_single_page(page_head); 118 __put_single_page(page_head);
98 out_put_single: 119out_put_single:
99 if (put_page_testzero(page)) 120 if (put_page_testzero(page))
100 __put_single_page(page); 121 __put_single_page(page);
101 return; 122 return;
@@ -115,6 +136,8 @@ static void put_compound_page(struct page *page)
115 VM_BUG_ON(atomic_read(&page_head->_count) <= 0); 136 VM_BUG_ON(atomic_read(&page_head->_count) <= 0);
116 VM_BUG_ON(atomic_read(&page->_count) != 0); 137 VM_BUG_ON(atomic_read(&page->_count) != 0);
117 compound_unlock_irqrestore(page_head, flags); 138 compound_unlock_irqrestore(page_head, flags);
139
140skip_lock_tail:
118 if (put_page_testzero(page_head)) { 141 if (put_page_testzero(page_head)) {
119 if (PageHead(page_head)) 142 if (PageHead(page_head))
120 __put_compound_page(page_head); 143 __put_compound_page(page_head);
@@ -162,6 +185,18 @@ bool __get_page_tail(struct page *page)
162 struct page *page_head = compound_trans_head(page); 185 struct page *page_head = compound_trans_head(page);
163 186
164 if (likely(page != page_head && get_page_unless_zero(page_head))) { 187 if (likely(page != page_head && get_page_unless_zero(page_head))) {
188
189 /* Ref to put_compound_page() comment. */
190 if (PageSlab(page_head)) {
191 if (likely(PageTail(page))) {
192 __get_page_tail_foll(page, false);
193 return true;
194 } else {
195 put_page(page_head);
196 return false;
197 }
198 }
199
165 /* 200 /*
166 * page_head wasn't a dangling pointer but it 201 * page_head wasn't a dangling pointer but it
167 * may not be a head page anymore by the time 202 * may not be a head page anymore by the time
@@ -202,11 +237,12 @@ void put_pages_list(struct list_head *pages)
202EXPORT_SYMBOL(put_pages_list); 237EXPORT_SYMBOL(put_pages_list);
203 238
204static void pagevec_lru_move_fn(struct pagevec *pvec, 239static void pagevec_lru_move_fn(struct pagevec *pvec,
205 void (*move_fn)(struct page *page, void *arg), 240 void (*move_fn)(struct page *page, struct lruvec *lruvec, void *arg),
206 void *arg) 241 void *arg)
207{ 242{
208 int i; 243 int i;
209 struct zone *zone = NULL; 244 struct zone *zone = NULL;
245 struct lruvec *lruvec;
210 unsigned long flags = 0; 246 unsigned long flags = 0;
211 247
212 for (i = 0; i < pagevec_count(pvec); i++) { 248 for (i = 0; i < pagevec_count(pvec); i++) {
@@ -220,7 +256,8 @@ static void pagevec_lru_move_fn(struct pagevec *pvec,
220 spin_lock_irqsave(&zone->lru_lock, flags); 256 spin_lock_irqsave(&zone->lru_lock, flags);
221 } 257 }
222 258
223 (*move_fn)(page, arg); 259 lruvec = mem_cgroup_page_lruvec(page, zone);
260 (*move_fn)(page, lruvec, arg);
224 } 261 }
225 if (zone) 262 if (zone)
226 spin_unlock_irqrestore(&zone->lru_lock, flags); 263 spin_unlock_irqrestore(&zone->lru_lock, flags);
@@ -228,16 +265,13 @@ static void pagevec_lru_move_fn(struct pagevec *pvec,
228 pagevec_reinit(pvec); 265 pagevec_reinit(pvec);
229} 266}
230 267
231static void pagevec_move_tail_fn(struct page *page, void *arg) 268static void pagevec_move_tail_fn(struct page *page, struct lruvec *lruvec,
269 void *arg)
232{ 270{
233 int *pgmoved = arg; 271 int *pgmoved = arg;
234 272
235 if (PageLRU(page) && !PageActive(page) && !PageUnevictable(page)) { 273 if (PageLRU(page) && !PageActive(page) && !PageUnevictable(page)) {
236 enum lru_list lru = page_lru_base_type(page); 274 enum lru_list lru = page_lru_base_type(page);
237 struct lruvec *lruvec;
238
239 lruvec = mem_cgroup_lru_move_lists(page_zone(page),
240 page, lru, lru);
241 list_move_tail(&page->lru, &lruvec->lists[lru]); 275 list_move_tail(&page->lru, &lruvec->lists[lru]);
242 (*pgmoved)++; 276 (*pgmoved)++;
243 } 277 }
@@ -276,41 +310,30 @@ void rotate_reclaimable_page(struct page *page)
276 } 310 }
277} 311}
278 312
279static void update_page_reclaim_stat(struct zone *zone, struct page *page, 313static void update_page_reclaim_stat(struct lruvec *lruvec,
280 int file, int rotated) 314 int file, int rotated)
281{ 315{
282 struct zone_reclaim_stat *reclaim_stat = &zone->reclaim_stat; 316 struct zone_reclaim_stat *reclaim_stat = &lruvec->reclaim_stat;
283 struct zone_reclaim_stat *memcg_reclaim_stat;
284
285 memcg_reclaim_stat = mem_cgroup_get_reclaim_stat_from_page(page);
286 317
287 reclaim_stat->recent_scanned[file]++; 318 reclaim_stat->recent_scanned[file]++;
288 if (rotated) 319 if (rotated)
289 reclaim_stat->recent_rotated[file]++; 320 reclaim_stat->recent_rotated[file]++;
290
291 if (!memcg_reclaim_stat)
292 return;
293
294 memcg_reclaim_stat->recent_scanned[file]++;
295 if (rotated)
296 memcg_reclaim_stat->recent_rotated[file]++;
297} 321}
298 322
299static void __activate_page(struct page *page, void *arg) 323static void __activate_page(struct page *page, struct lruvec *lruvec,
324 void *arg)
300{ 325{
301 struct zone *zone = page_zone(page);
302
303 if (PageLRU(page) && !PageActive(page) && !PageUnevictable(page)) { 326 if (PageLRU(page) && !PageActive(page) && !PageUnevictable(page)) {
304 int file = page_is_file_cache(page); 327 int file = page_is_file_cache(page);
305 int lru = page_lru_base_type(page); 328 int lru = page_lru_base_type(page);
306 del_page_from_lru_list(zone, page, lru);
307 329
330 del_page_from_lru_list(page, lruvec, lru);
308 SetPageActive(page); 331 SetPageActive(page);
309 lru += LRU_ACTIVE; 332 lru += LRU_ACTIVE;
310 add_page_to_lru_list(zone, page, lru); 333 add_page_to_lru_list(page, lruvec, lru);
311 __count_vm_event(PGACTIVATE);
312 334
313 update_page_reclaim_stat(zone, page, file, 1); 335 __count_vm_event(PGACTIVATE);
336 update_page_reclaim_stat(lruvec, file, 1);
314 } 337 }
315} 338}
316 339
@@ -347,7 +370,7 @@ void activate_page(struct page *page)
347 struct zone *zone = page_zone(page); 370 struct zone *zone = page_zone(page);
348 371
349 spin_lock_irq(&zone->lru_lock); 372 spin_lock_irq(&zone->lru_lock);
350 __activate_page(page, NULL); 373 __activate_page(page, mem_cgroup_page_lruvec(page, zone), NULL);
351 spin_unlock_irq(&zone->lru_lock); 374 spin_unlock_irq(&zone->lru_lock);
352} 375}
353#endif 376#endif
@@ -414,11 +437,13 @@ void lru_cache_add_lru(struct page *page, enum lru_list lru)
414void add_page_to_unevictable_list(struct page *page) 437void add_page_to_unevictable_list(struct page *page)
415{ 438{
416 struct zone *zone = page_zone(page); 439 struct zone *zone = page_zone(page);
440 struct lruvec *lruvec;
417 441
418 spin_lock_irq(&zone->lru_lock); 442 spin_lock_irq(&zone->lru_lock);
443 lruvec = mem_cgroup_page_lruvec(page, zone);
419 SetPageUnevictable(page); 444 SetPageUnevictable(page);
420 SetPageLRU(page); 445 SetPageLRU(page);
421 add_page_to_lru_list(zone, page, LRU_UNEVICTABLE); 446 add_page_to_lru_list(page, lruvec, LRU_UNEVICTABLE);
422 spin_unlock_irq(&zone->lru_lock); 447 spin_unlock_irq(&zone->lru_lock);
423} 448}
424 449
@@ -443,11 +468,11 @@ void add_page_to_unevictable_list(struct page *page)
443 * be write it out by flusher threads as this is much more effective 468 * be write it out by flusher threads as this is much more effective
444 * than the single-page writeout from reclaim. 469 * than the single-page writeout from reclaim.
445 */ 470 */
446static void lru_deactivate_fn(struct page *page, void *arg) 471static void lru_deactivate_fn(struct page *page, struct lruvec *lruvec,
472 void *arg)
447{ 473{
448 int lru, file; 474 int lru, file;
449 bool active; 475 bool active;
450 struct zone *zone = page_zone(page);
451 476
452 if (!PageLRU(page)) 477 if (!PageLRU(page))
453 return; 478 return;
@@ -460,13 +485,13 @@ static void lru_deactivate_fn(struct page *page, void *arg)
460 return; 485 return;
461 486
462 active = PageActive(page); 487 active = PageActive(page);
463
464 file = page_is_file_cache(page); 488 file = page_is_file_cache(page);
465 lru = page_lru_base_type(page); 489 lru = page_lru_base_type(page);
466 del_page_from_lru_list(zone, page, lru + active); 490
491 del_page_from_lru_list(page, lruvec, lru + active);
467 ClearPageActive(page); 492 ClearPageActive(page);
468 ClearPageReferenced(page); 493 ClearPageReferenced(page);
469 add_page_to_lru_list(zone, page, lru); 494 add_page_to_lru_list(page, lruvec, lru);
470 495
471 if (PageWriteback(page) || PageDirty(page)) { 496 if (PageWriteback(page) || PageDirty(page)) {
472 /* 497 /*
@@ -476,19 +501,17 @@ static void lru_deactivate_fn(struct page *page, void *arg)
476 */ 501 */
477 SetPageReclaim(page); 502 SetPageReclaim(page);
478 } else { 503 } else {
479 struct lruvec *lruvec;
480 /* 504 /*
481 * The page's writeback ends up during pagevec 505 * The page's writeback ends up during pagevec
482 * We moves tha page into tail of inactive. 506 * We moves tha page into tail of inactive.
483 */ 507 */
484 lruvec = mem_cgroup_lru_move_lists(zone, page, lru, lru);
485 list_move_tail(&page->lru, &lruvec->lists[lru]); 508 list_move_tail(&page->lru, &lruvec->lists[lru]);
486 __count_vm_event(PGROTATED); 509 __count_vm_event(PGROTATED);
487 } 510 }
488 511
489 if (active) 512 if (active)
490 __count_vm_event(PGDEACTIVATE); 513 __count_vm_event(PGDEACTIVATE);
491 update_page_reclaim_stat(zone, page, file, 0); 514 update_page_reclaim_stat(lruvec, file, 0);
492} 515}
493 516
494/* 517/*
@@ -588,6 +611,7 @@ void release_pages(struct page **pages, int nr, int cold)
588 int i; 611 int i;
589 LIST_HEAD(pages_to_free); 612 LIST_HEAD(pages_to_free);
590 struct zone *zone = NULL; 613 struct zone *zone = NULL;
614 struct lruvec *lruvec;
591 unsigned long uninitialized_var(flags); 615 unsigned long uninitialized_var(flags);
592 616
593 for (i = 0; i < nr; i++) { 617 for (i = 0; i < nr; i++) {
@@ -615,9 +639,11 @@ void release_pages(struct page **pages, int nr, int cold)
615 zone = pagezone; 639 zone = pagezone;
616 spin_lock_irqsave(&zone->lru_lock, flags); 640 spin_lock_irqsave(&zone->lru_lock, flags);
617 } 641 }
642
643 lruvec = mem_cgroup_page_lruvec(page, zone);
618 VM_BUG_ON(!PageLRU(page)); 644 VM_BUG_ON(!PageLRU(page));
619 __ClearPageLRU(page); 645 __ClearPageLRU(page);
620 del_page_from_lru_list(zone, page, page_off_lru(page)); 646 del_page_from_lru_list(page, lruvec, page_off_lru(page));
621 } 647 }
622 648
623 list_add(&page->lru, &pages_to_free); 649 list_add(&page->lru, &pages_to_free);
@@ -649,8 +675,8 @@ EXPORT_SYMBOL(__pagevec_release);
649 675
650#ifdef CONFIG_TRANSPARENT_HUGEPAGE 676#ifdef CONFIG_TRANSPARENT_HUGEPAGE
651/* used by __split_huge_page_refcount() */ 677/* used by __split_huge_page_refcount() */
652void lru_add_page_tail(struct zone* zone, 678void lru_add_page_tail(struct page *page, struct page *page_tail,
653 struct page *page, struct page *page_tail) 679 struct lruvec *lruvec)
654{ 680{
655 int uninitialized_var(active); 681 int uninitialized_var(active);
656 enum lru_list lru; 682 enum lru_list lru;
@@ -659,7 +685,8 @@ void lru_add_page_tail(struct zone* zone,
659 VM_BUG_ON(!PageHead(page)); 685 VM_BUG_ON(!PageHead(page));
660 VM_BUG_ON(PageCompound(page_tail)); 686 VM_BUG_ON(PageCompound(page_tail));
661 VM_BUG_ON(PageLRU(page_tail)); 687 VM_BUG_ON(PageLRU(page_tail));
662 VM_BUG_ON(NR_CPUS != 1 && !spin_is_locked(&zone->lru_lock)); 688 VM_BUG_ON(NR_CPUS != 1 &&
689 !spin_is_locked(&lruvec_zone(lruvec)->lru_lock));
663 690
664 SetPageLRU(page_tail); 691 SetPageLRU(page_tail);
665 692
@@ -688,20 +715,20 @@ void lru_add_page_tail(struct zone* zone,
688 * Use the standard add function to put page_tail on the list, 715 * Use the standard add function to put page_tail on the list,
689 * but then correct its position so they all end up in order. 716 * but then correct its position so they all end up in order.
690 */ 717 */
691 add_page_to_lru_list(zone, page_tail, lru); 718 add_page_to_lru_list(page_tail, lruvec, lru);
692 list_head = page_tail->lru.prev; 719 list_head = page_tail->lru.prev;
693 list_move_tail(&page_tail->lru, list_head); 720 list_move_tail(&page_tail->lru, list_head);
694 } 721 }
695 722
696 if (!PageUnevictable(page)) 723 if (!PageUnevictable(page))
697 update_page_reclaim_stat(zone, page_tail, file, active); 724 update_page_reclaim_stat(lruvec, file, active);
698} 725}
699#endif /* CONFIG_TRANSPARENT_HUGEPAGE */ 726#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
700 727
701static void __pagevec_lru_add_fn(struct page *page, void *arg) 728static void __pagevec_lru_add_fn(struct page *page, struct lruvec *lruvec,
729 void *arg)
702{ 730{
703 enum lru_list lru = (enum lru_list)arg; 731 enum lru_list lru = (enum lru_list)arg;
704 struct zone *zone = page_zone(page);
705 int file = is_file_lru(lru); 732 int file = is_file_lru(lru);
706 int active = is_active_lru(lru); 733 int active = is_active_lru(lru);
707 734
@@ -712,8 +739,8 @@ static void __pagevec_lru_add_fn(struct page *page, void *arg)
712 SetPageLRU(page); 739 SetPageLRU(page);
713 if (active) 740 if (active)
714 SetPageActive(page); 741 SetPageActive(page);
715 add_page_to_lru_list(zone, page, lru); 742 add_page_to_lru_list(page, lruvec, lru);
716 update_page_reclaim_stat(zone, page, file, active); 743 update_page_reclaim_stat(lruvec, file, active);
717} 744}
718 745
719/* 746/*
diff --git a/mm/swapfile.c b/mm/swapfile.c
index fafc26d1b1dc..457b10baef59 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -601,7 +601,7 @@ void swapcache_free(swp_entry_t entry, struct page *page)
601 * This does not give an exact answer when swap count is continued, 601 * This does not give an exact answer when swap count is continued,
602 * but does include the high COUNT_CONTINUED flag to allow for that. 602 * but does include the high COUNT_CONTINUED flag to allow for that.
603 */ 603 */
604static inline int page_swapcount(struct page *page) 604int page_swapcount(struct page *page)
605{ 605{
606 int count = 0; 606 int count = 0;
607 struct swap_info_struct *p; 607 struct swap_info_struct *p;
@@ -717,37 +717,6 @@ int free_swap_and_cache(swp_entry_t entry)
717 return p != NULL; 717 return p != NULL;
718} 718}
719 719
720#ifdef CONFIG_CGROUP_MEM_RES_CTLR
721/**
722 * mem_cgroup_count_swap_user - count the user of a swap entry
723 * @ent: the swap entry to be checked
724 * @pagep: the pointer for the swap cache page of the entry to be stored
725 *
726 * Returns the number of the user of the swap entry. The number is valid only
727 * for swaps of anonymous pages.
728 * If the entry is found on swap cache, the page is stored to pagep with
729 * refcount of it being incremented.
730 */
731int mem_cgroup_count_swap_user(swp_entry_t ent, struct page **pagep)
732{
733 struct page *page;
734 struct swap_info_struct *p;
735 int count = 0;
736
737 page = find_get_page(&swapper_space, ent.val);
738 if (page)
739 count += page_mapcount(page);
740 p = swap_info_get(ent);
741 if (p) {
742 count += swap_count(p->swap_map[swp_offset(ent)]);
743 spin_unlock(&swap_lock);
744 }
745
746 *pagep = page;
747 return count;
748}
749#endif
750
751#ifdef CONFIG_HIBERNATION 720#ifdef CONFIG_HIBERNATION
752/* 721/*
753 * Find the swap type that corresponds to given device (if any). 722 * Find the swap type that corresponds to given device (if any).
diff --git a/mm/thrash.c b/mm/thrash.c
deleted file mode 100644
index 57ad495dbd54..000000000000
--- a/mm/thrash.c
+++ /dev/null
@@ -1,155 +0,0 @@
1/*
2 * mm/thrash.c
3 *
4 * Copyright (C) 2004, Red Hat, Inc.
5 * Copyright (C) 2004, Rik van Riel <riel@redhat.com>
6 * Released under the GPL, see the file COPYING for details.
7 *
8 * Simple token based thrashing protection, using the algorithm
9 * described in: http://www.cse.ohio-state.edu/hpcs/WWW/HTML/publications/abs05-1.html
10 *
11 * Sep 2006, Ashwin Chaugule <ashwin.chaugule@celunite.com>
12 * Improved algorithm to pass token:
13 * Each task has a priority which is incremented if it contended
14 * for the token in an interval less than its previous attempt.
15 * If the token is acquired, that task's priority is boosted to prevent
16 * the token from bouncing around too often and to let the task make
17 * some progress in its execution.
18 */
19
20#include <linux/jiffies.h>
21#include <linux/mm.h>
22#include <linux/sched.h>
23#include <linux/swap.h>
24#include <linux/memcontrol.h>
25
26#include <trace/events/vmscan.h>
27
28#define TOKEN_AGING_INTERVAL (0xFF)
29
30static DEFINE_SPINLOCK(swap_token_lock);
31struct mm_struct *swap_token_mm;
32static struct mem_cgroup *swap_token_memcg;
33
34#ifdef CONFIG_CGROUP_MEM_RES_CTLR
35static struct mem_cgroup *swap_token_memcg_from_mm(struct mm_struct *mm)
36{
37 struct mem_cgroup *memcg;
38
39 memcg = try_get_mem_cgroup_from_mm(mm);
40 if (memcg)
41 css_put(mem_cgroup_css(memcg));
42
43 return memcg;
44}
45#else
46static struct mem_cgroup *swap_token_memcg_from_mm(struct mm_struct *mm)
47{
48 return NULL;
49}
50#endif
51
52void grab_swap_token(struct mm_struct *mm)
53{
54 int current_interval;
55 unsigned int old_prio = mm->token_priority;
56 static unsigned int global_faults;
57 static unsigned int last_aging;
58
59 global_faults++;
60
61 current_interval = global_faults - mm->faultstamp;
62
63 if (!spin_trylock(&swap_token_lock))
64 return;
65
66 /* First come first served */
67 if (!swap_token_mm)
68 goto replace_token;
69
70 /*
71 * Usually, we don't need priority aging because long interval faults
72 * makes priority decrease quickly. But there is one exception. If the
73 * token owner task is sleeping, it never make long interval faults.
74 * Thus, we need a priority aging mechanism instead. The requirements
75 * of priority aging are
76 * 1) An aging interval is reasonable enough long. Too short aging
77 * interval makes quick swap token lost and decrease performance.
78 * 2) The swap token owner task have to get priority aging even if
79 * it's under sleep.
80 */
81 if ((global_faults - last_aging) > TOKEN_AGING_INTERVAL) {
82 swap_token_mm->token_priority /= 2;
83 last_aging = global_faults;
84 }
85
86 if (mm == swap_token_mm) {
87 mm->token_priority += 2;
88 goto update_priority;
89 }
90
91 if (current_interval < mm->last_interval)
92 mm->token_priority++;
93 else {
94 if (likely(mm->token_priority > 0))
95 mm->token_priority--;
96 }
97
98 /* Check if we deserve the token */
99 if (mm->token_priority > swap_token_mm->token_priority)
100 goto replace_token;
101
102update_priority:
103 trace_update_swap_token_priority(mm, old_prio, swap_token_mm);
104
105out:
106 mm->faultstamp = global_faults;
107 mm->last_interval = current_interval;
108 spin_unlock(&swap_token_lock);
109 return;
110
111replace_token:
112 mm->token_priority += 2;
113 trace_replace_swap_token(swap_token_mm, mm);
114 swap_token_mm = mm;
115 swap_token_memcg = swap_token_memcg_from_mm(mm);
116 last_aging = global_faults;
117 goto out;
118}
119
120/* Called on process exit. */
121void __put_swap_token(struct mm_struct *mm)
122{
123 spin_lock(&swap_token_lock);
124 if (likely(mm == swap_token_mm)) {
125 trace_put_swap_token(swap_token_mm);
126 swap_token_mm = NULL;
127 swap_token_memcg = NULL;
128 }
129 spin_unlock(&swap_token_lock);
130}
131
132static bool match_memcg(struct mem_cgroup *a, struct mem_cgroup *b)
133{
134 if (!a)
135 return true;
136 if (!b)
137 return true;
138 if (a == b)
139 return true;
140 return false;
141}
142
143void disable_swap_token(struct mem_cgroup *memcg)
144{
145 /* memcg reclaim don't disable unrelated mm token. */
146 if (match_memcg(memcg, swap_token_memcg)) {
147 spin_lock(&swap_token_lock);
148 if (match_memcg(memcg, swap_token_memcg)) {
149 trace_disable_swap_token(swap_token_mm);
150 swap_token_mm = NULL;
151 swap_token_memcg = NULL;
152 }
153 spin_unlock(&swap_token_lock);
154 }
155}
diff --git a/mm/truncate.c b/mm/truncate.c
index 61a183b89df6..75801acdaac7 100644
--- a/mm/truncate.c
+++ b/mm/truncate.c
@@ -602,31 +602,6 @@ int vmtruncate(struct inode *inode, loff_t newsize)
602} 602}
603EXPORT_SYMBOL(vmtruncate); 603EXPORT_SYMBOL(vmtruncate);
604 604
605int vmtruncate_range(struct inode *inode, loff_t lstart, loff_t lend)
606{
607 struct address_space *mapping = inode->i_mapping;
608 loff_t holebegin = round_up(lstart, PAGE_SIZE);
609 loff_t holelen = 1 + lend - holebegin;
610
611 /*
612 * If the underlying filesystem is not going to provide
613 * a way to truncate a range of blocks (punch a hole) -
614 * we should return failure right now.
615 */
616 if (!inode->i_op->truncate_range)
617 return -ENOSYS;
618
619 mutex_lock(&inode->i_mutex);
620 inode_dio_wait(inode);
621 unmap_mapping_range(mapping, holebegin, holelen, 1);
622 inode->i_op->truncate_range(inode, lstart, lend);
623 /* unmap again to remove racily COWed private pages */
624 unmap_mapping_range(mapping, holebegin, holelen, 1);
625 mutex_unlock(&inode->i_mutex);
626
627 return 0;
628}
629
630/** 605/**
631 * truncate_pagecache_range - unmap and remove pagecache that is hole-punched 606 * truncate_pagecache_range - unmap and remove pagecache that is hole-punched
632 * @inode: inode 607 * @inode: inode
diff --git a/mm/vmalloc.c b/mm/vmalloc.c
index 94dff883b449..2aad49981b57 100644
--- a/mm/vmalloc.c
+++ b/mm/vmalloc.c
@@ -1185,9 +1185,10 @@ void __init vmalloc_init(void)
1185 /* Import existing vmlist entries. */ 1185 /* Import existing vmlist entries. */
1186 for (tmp = vmlist; tmp; tmp = tmp->next) { 1186 for (tmp = vmlist; tmp; tmp = tmp->next) {
1187 va = kzalloc(sizeof(struct vmap_area), GFP_NOWAIT); 1187 va = kzalloc(sizeof(struct vmap_area), GFP_NOWAIT);
1188 va->flags = tmp->flags | VM_VM_AREA; 1188 va->flags = VM_VM_AREA;
1189 va->va_start = (unsigned long)tmp->addr; 1189 va->va_start = (unsigned long)tmp->addr;
1190 va->va_end = va->va_start + tmp->size; 1190 va->va_end = va->va_start + tmp->size;
1191 va->vm = tmp;
1191 __insert_vmap_area(va); 1192 __insert_vmap_area(va);
1192 } 1193 }
1193 1194
@@ -2375,8 +2376,8 @@ struct vm_struct **pcpu_get_vm_areas(const unsigned long *offsets,
2375 return NULL; 2376 return NULL;
2376 } 2377 }
2377 2378
2378 vms = kzalloc(sizeof(vms[0]) * nr_vms, GFP_KERNEL); 2379 vms = kcalloc(nr_vms, sizeof(vms[0]), GFP_KERNEL);
2379 vas = kzalloc(sizeof(vas[0]) * nr_vms, GFP_KERNEL); 2380 vas = kcalloc(nr_vms, sizeof(vas[0]), GFP_KERNEL);
2380 if (!vas || !vms) 2381 if (!vas || !vms)
2381 goto err_free2; 2382 goto err_free2;
2382 2383
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 33dc256033b5..eeb3bc9d1d36 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -53,24 +53,6 @@
53#define CREATE_TRACE_POINTS 53#define CREATE_TRACE_POINTS
54#include <trace/events/vmscan.h> 54#include <trace/events/vmscan.h>
55 55
56/*
57 * reclaim_mode determines how the inactive list is shrunk
58 * RECLAIM_MODE_SINGLE: Reclaim only order-0 pages
59 * RECLAIM_MODE_ASYNC: Do not block
60 * RECLAIM_MODE_SYNC: Allow blocking e.g. call wait_on_page_writeback
61 * RECLAIM_MODE_LUMPYRECLAIM: For high-order allocations, take a reference
62 * page from the LRU and reclaim all pages within a
63 * naturally aligned range
64 * RECLAIM_MODE_COMPACTION: For high-order allocations, reclaim a number of
65 * order-0 pages and then compact the zone
66 */
67typedef unsigned __bitwise__ reclaim_mode_t;
68#define RECLAIM_MODE_SINGLE ((__force reclaim_mode_t)0x01u)
69#define RECLAIM_MODE_ASYNC ((__force reclaim_mode_t)0x02u)
70#define RECLAIM_MODE_SYNC ((__force reclaim_mode_t)0x04u)
71#define RECLAIM_MODE_LUMPYRECLAIM ((__force reclaim_mode_t)0x08u)
72#define RECLAIM_MODE_COMPACTION ((__force reclaim_mode_t)0x10u)
73
74struct scan_control { 56struct scan_control {
75 /* Incremented by the number of inactive pages that were scanned */ 57 /* Incremented by the number of inactive pages that were scanned */
76 unsigned long nr_scanned; 58 unsigned long nr_scanned;
@@ -96,11 +78,8 @@ struct scan_control {
96 78
97 int order; 79 int order;
98 80
99 /* 81 /* Scan (total_size >> priority) pages at once */
100 * Intend to reclaim enough continuous memory rather than reclaim 82 int priority;
101 * enough amount of memory. i.e, mode for high order allocation.
102 */
103 reclaim_mode_t reclaim_mode;
104 83
105 /* 84 /*
106 * The memory cgroup that hit its limit and as a result is the 85 * The memory cgroup that hit its limit and as a result is the
@@ -115,11 +94,6 @@ struct scan_control {
115 nodemask_t *nodemask; 94 nodemask_t *nodemask;
116}; 95};
117 96
118struct mem_cgroup_zone {
119 struct mem_cgroup *mem_cgroup;
120 struct zone *zone;
121};
122
123#define lru_to_page(_head) (list_entry((_head)->prev, struct page, lru)) 97#define lru_to_page(_head) (list_entry((_head)->prev, struct page, lru))
124 98
125#ifdef ARCH_HAS_PREFETCH 99#ifdef ARCH_HAS_PREFETCH
@@ -164,44 +138,21 @@ static bool global_reclaim(struct scan_control *sc)
164{ 138{
165 return !sc->target_mem_cgroup; 139 return !sc->target_mem_cgroup;
166} 140}
167
168static bool scanning_global_lru(struct mem_cgroup_zone *mz)
169{
170 return !mz->mem_cgroup;
171}
172#else 141#else
173static bool global_reclaim(struct scan_control *sc) 142static bool global_reclaim(struct scan_control *sc)
174{ 143{
175 return true; 144 return true;
176} 145}
177
178static bool scanning_global_lru(struct mem_cgroup_zone *mz)
179{
180 return true;
181}
182#endif 146#endif
183 147
184static struct zone_reclaim_stat *get_reclaim_stat(struct mem_cgroup_zone *mz) 148static unsigned long get_lru_size(struct lruvec *lruvec, enum lru_list lru)
185{
186 if (!scanning_global_lru(mz))
187 return mem_cgroup_get_reclaim_stat(mz->mem_cgroup, mz->zone);
188
189 return &mz->zone->reclaim_stat;
190}
191
192static unsigned long zone_nr_lru_pages(struct mem_cgroup_zone *mz,
193 enum lru_list lru)
194{ 149{
195 if (!scanning_global_lru(mz)) 150 if (!mem_cgroup_disabled())
196 return mem_cgroup_zone_nr_lru_pages(mz->mem_cgroup, 151 return mem_cgroup_get_lru_size(lruvec, lru);
197 zone_to_nid(mz->zone),
198 zone_idx(mz->zone),
199 BIT(lru));
200 152
201 return zone_page_state(mz->zone, NR_LRU_BASE + lru); 153 return zone_page_state(lruvec_zone(lruvec), NR_LRU_BASE + lru);
202} 154}
203 155
204
205/* 156/*
206 * Add a shrinker callback to be called from the vm 157 * Add a shrinker callback to be called from the vm
207 */ 158 */
@@ -364,39 +315,6 @@ out:
364 return ret; 315 return ret;
365} 316}
366 317
367static void set_reclaim_mode(int priority, struct scan_control *sc,
368 bool sync)
369{
370 reclaim_mode_t syncmode = sync ? RECLAIM_MODE_SYNC : RECLAIM_MODE_ASYNC;
371
372 /*
373 * Initially assume we are entering either lumpy reclaim or
374 * reclaim/compaction.Depending on the order, we will either set the
375 * sync mode or just reclaim order-0 pages later.
376 */
377 if (COMPACTION_BUILD)
378 sc->reclaim_mode = RECLAIM_MODE_COMPACTION;
379 else
380 sc->reclaim_mode = RECLAIM_MODE_LUMPYRECLAIM;
381
382 /*
383 * Avoid using lumpy reclaim or reclaim/compaction if possible by
384 * restricting when its set to either costly allocations or when
385 * under memory pressure
386 */
387 if (sc->order > PAGE_ALLOC_COSTLY_ORDER)
388 sc->reclaim_mode |= syncmode;
389 else if (sc->order && priority < DEF_PRIORITY - 2)
390 sc->reclaim_mode |= syncmode;
391 else
392 sc->reclaim_mode = RECLAIM_MODE_SINGLE | RECLAIM_MODE_ASYNC;
393}
394
395static void reset_reclaim_mode(struct scan_control *sc)
396{
397 sc->reclaim_mode = RECLAIM_MODE_SINGLE | RECLAIM_MODE_ASYNC;
398}
399
400static inline int is_page_cache_freeable(struct page *page) 318static inline int is_page_cache_freeable(struct page *page)
401{ 319{
402 /* 320 /*
@@ -416,10 +334,6 @@ static int may_write_to_queue(struct backing_dev_info *bdi,
416 return 1; 334 return 1;
417 if (bdi == current->backing_dev_info) 335 if (bdi == current->backing_dev_info)
418 return 1; 336 return 1;
419
420 /* lumpy reclaim for hugepage often need a lot of write */
421 if (sc->order > PAGE_ALLOC_COSTLY_ORDER)
422 return 1;
423 return 0; 337 return 0;
424} 338}
425 339
@@ -523,8 +437,7 @@ static pageout_t pageout(struct page *page, struct address_space *mapping,
523 /* synchronous write or broken a_ops? */ 437 /* synchronous write or broken a_ops? */
524 ClearPageReclaim(page); 438 ClearPageReclaim(page);
525 } 439 }
526 trace_mm_vmscan_writepage(page, 440 trace_mm_vmscan_writepage(page, trace_reclaim_flags(page));
527 trace_reclaim_flags(page, sc->reclaim_mode));
528 inc_zone_page_state(page, NR_VMSCAN_WRITE); 441 inc_zone_page_state(page, NR_VMSCAN_WRITE);
529 return PAGE_SUCCESS; 442 return PAGE_SUCCESS;
530 } 443 }
@@ -701,19 +614,15 @@ enum page_references {
701}; 614};
702 615
703static enum page_references page_check_references(struct page *page, 616static enum page_references page_check_references(struct page *page,
704 struct mem_cgroup_zone *mz,
705 struct scan_control *sc) 617 struct scan_control *sc)
706{ 618{
707 int referenced_ptes, referenced_page; 619 int referenced_ptes, referenced_page;
708 unsigned long vm_flags; 620 unsigned long vm_flags;
709 621
710 referenced_ptes = page_referenced(page, 1, mz->mem_cgroup, &vm_flags); 622 referenced_ptes = page_referenced(page, 1, sc->target_mem_cgroup,
623 &vm_flags);
711 referenced_page = TestClearPageReferenced(page); 624 referenced_page = TestClearPageReferenced(page);
712 625
713 /* Lumpy reclaim - ignore references */
714 if (sc->reclaim_mode & RECLAIM_MODE_LUMPYRECLAIM)
715 return PAGEREF_RECLAIM;
716
717 /* 626 /*
718 * Mlock lost the isolation race with us. Let try_to_unmap() 627 * Mlock lost the isolation race with us. Let try_to_unmap()
719 * move the page to the unevictable list. 628 * move the page to the unevictable list.
@@ -722,7 +631,7 @@ static enum page_references page_check_references(struct page *page,
722 return PAGEREF_RECLAIM; 631 return PAGEREF_RECLAIM;
723 632
724 if (referenced_ptes) { 633 if (referenced_ptes) {
725 if (PageAnon(page)) 634 if (PageSwapBacked(page))
726 return PAGEREF_ACTIVATE; 635 return PAGEREF_ACTIVATE;
727 /* 636 /*
728 * All mapped pages start out with page table 637 * All mapped pages start out with page table
@@ -763,9 +672,8 @@ static enum page_references page_check_references(struct page *page,
763 * shrink_page_list() returns the number of reclaimed pages 672 * shrink_page_list() returns the number of reclaimed pages
764 */ 673 */
765static unsigned long shrink_page_list(struct list_head *page_list, 674static unsigned long shrink_page_list(struct list_head *page_list,
766 struct mem_cgroup_zone *mz, 675 struct zone *zone,
767 struct scan_control *sc, 676 struct scan_control *sc,
768 int priority,
769 unsigned long *ret_nr_dirty, 677 unsigned long *ret_nr_dirty,
770 unsigned long *ret_nr_writeback) 678 unsigned long *ret_nr_writeback)
771{ 679{
@@ -794,7 +702,7 @@ static unsigned long shrink_page_list(struct list_head *page_list,
794 goto keep; 702 goto keep;
795 703
796 VM_BUG_ON(PageActive(page)); 704 VM_BUG_ON(PageActive(page));
797 VM_BUG_ON(page_zone(page) != mz->zone); 705 VM_BUG_ON(page_zone(page) != zone);
798 706
799 sc->nr_scanned++; 707 sc->nr_scanned++;
800 708
@@ -813,22 +721,11 @@ static unsigned long shrink_page_list(struct list_head *page_list,
813 721
814 if (PageWriteback(page)) { 722 if (PageWriteback(page)) {
815 nr_writeback++; 723 nr_writeback++;
816 /* 724 unlock_page(page);
817 * Synchronous reclaim cannot queue pages for 725 goto keep;
818 * writeback due to the possibility of stack overflow
819 * but if it encounters a page under writeback, wait
820 * for the IO to complete.
821 */
822 if ((sc->reclaim_mode & RECLAIM_MODE_SYNC) &&
823 may_enter_fs)
824 wait_on_page_writeback(page);
825 else {
826 unlock_page(page);
827 goto keep_lumpy;
828 }
829 } 726 }
830 727
831 references = page_check_references(page, mz, sc); 728 references = page_check_references(page, sc);
832 switch (references) { 729 switch (references) {
833 case PAGEREF_ACTIVATE: 730 case PAGEREF_ACTIVATE:
834 goto activate_locked; 731 goto activate_locked;
@@ -879,7 +776,8 @@ static unsigned long shrink_page_list(struct list_head *page_list,
879 * unless under significant pressure. 776 * unless under significant pressure.
880 */ 777 */
881 if (page_is_file_cache(page) && 778 if (page_is_file_cache(page) &&
882 (!current_is_kswapd() || priority >= DEF_PRIORITY - 2)) { 779 (!current_is_kswapd() ||
780 sc->priority >= DEF_PRIORITY - 2)) {
883 /* 781 /*
884 * Immediately reclaim when written back. 782 * Immediately reclaim when written back.
885 * Similar in principal to deactivate_page() 783 * Similar in principal to deactivate_page()
@@ -908,7 +806,7 @@ static unsigned long shrink_page_list(struct list_head *page_list,
908 goto activate_locked; 806 goto activate_locked;
909 case PAGE_SUCCESS: 807 case PAGE_SUCCESS:
910 if (PageWriteback(page)) 808 if (PageWriteback(page))
911 goto keep_lumpy; 809 goto keep;
912 if (PageDirty(page)) 810 if (PageDirty(page))
913 goto keep; 811 goto keep;
914 812
@@ -994,7 +892,6 @@ cull_mlocked:
994 try_to_free_swap(page); 892 try_to_free_swap(page);
995 unlock_page(page); 893 unlock_page(page);
996 putback_lru_page(page); 894 putback_lru_page(page);
997 reset_reclaim_mode(sc);
998 continue; 895 continue;
999 896
1000activate_locked: 897activate_locked:
@@ -1007,8 +904,6 @@ activate_locked:
1007keep_locked: 904keep_locked:
1008 unlock_page(page); 905 unlock_page(page);
1009keep: 906keep:
1010 reset_reclaim_mode(sc);
1011keep_lumpy:
1012 list_add(&page->lru, &ret_pages); 907 list_add(&page->lru, &ret_pages);
1013 VM_BUG_ON(PageLRU(page) || PageUnevictable(page)); 908 VM_BUG_ON(PageLRU(page) || PageUnevictable(page));
1014 } 909 }
@@ -1020,7 +915,7 @@ keep_lumpy:
1020 * will encounter the same problem 915 * will encounter the same problem
1021 */ 916 */
1022 if (nr_dirty && nr_dirty == nr_congested && global_reclaim(sc)) 917 if (nr_dirty && nr_dirty == nr_congested && global_reclaim(sc))
1023 zone_set_flag(mz->zone, ZONE_CONGESTED); 918 zone_set_flag(zone, ZONE_CONGESTED);
1024 919
1025 free_hot_cold_page_list(&free_pages, 1); 920 free_hot_cold_page_list(&free_pages, 1);
1026 921
@@ -1041,34 +936,15 @@ keep_lumpy:
1041 * 936 *
1042 * returns 0 on success, -ve errno on failure. 937 * returns 0 on success, -ve errno on failure.
1043 */ 938 */
1044int __isolate_lru_page(struct page *page, isolate_mode_t mode, int file) 939int __isolate_lru_page(struct page *page, isolate_mode_t mode)
1045{ 940{
1046 bool all_lru_mode;
1047 int ret = -EINVAL; 941 int ret = -EINVAL;
1048 942
1049 /* Only take pages on the LRU. */ 943 /* Only take pages on the LRU. */
1050 if (!PageLRU(page)) 944 if (!PageLRU(page))
1051 return ret; 945 return ret;
1052 946
1053 all_lru_mode = (mode & (ISOLATE_ACTIVE|ISOLATE_INACTIVE)) == 947 /* Do not give back unevictable pages for compaction */
1054 (ISOLATE_ACTIVE|ISOLATE_INACTIVE);
1055
1056 /*
1057 * When checking the active state, we need to be sure we are
1058 * dealing with comparible boolean values. Take the logical not
1059 * of each.
1060 */
1061 if (!all_lru_mode && !PageActive(page) != !(mode & ISOLATE_ACTIVE))
1062 return ret;
1063
1064 if (!all_lru_mode && !!page_is_file_cache(page) != file)
1065 return ret;
1066
1067 /*
1068 * When this function is being called for lumpy reclaim, we
1069 * initially look into all LRU pages, active, inactive and
1070 * unevictable; only give shrink_page_list evictable pages.
1071 */
1072 if (PageUnevictable(page)) 948 if (PageUnevictable(page))
1073 return ret; 949 return ret;
1074 950
@@ -1135,54 +1011,39 @@ int __isolate_lru_page(struct page *page, isolate_mode_t mode, int file)
1135 * Appropriate locks must be held before calling this function. 1011 * Appropriate locks must be held before calling this function.
1136 * 1012 *
1137 * @nr_to_scan: The number of pages to look through on the list. 1013 * @nr_to_scan: The number of pages to look through on the list.
1138 * @mz: The mem_cgroup_zone to pull pages from. 1014 * @lruvec: The LRU vector to pull pages from.
1139 * @dst: The temp list to put pages on to. 1015 * @dst: The temp list to put pages on to.
1140 * @nr_scanned: The number of pages that were scanned. 1016 * @nr_scanned: The number of pages that were scanned.
1141 * @sc: The scan_control struct for this reclaim session 1017 * @sc: The scan_control struct for this reclaim session
1142 * @mode: One of the LRU isolation modes 1018 * @mode: One of the LRU isolation modes
1143 * @active: True [1] if isolating active pages 1019 * @lru: LRU list id for isolating
1144 * @file: True [1] if isolating file [!anon] pages
1145 * 1020 *
1146 * returns how many pages were moved onto *@dst. 1021 * returns how many pages were moved onto *@dst.
1147 */ 1022 */
1148static unsigned long isolate_lru_pages(unsigned long nr_to_scan, 1023static unsigned long isolate_lru_pages(unsigned long nr_to_scan,
1149 struct mem_cgroup_zone *mz, struct list_head *dst, 1024 struct lruvec *lruvec, struct list_head *dst,
1150 unsigned long *nr_scanned, struct scan_control *sc, 1025 unsigned long *nr_scanned, struct scan_control *sc,
1151 isolate_mode_t mode, int active, int file) 1026 isolate_mode_t mode, enum lru_list lru)
1152{ 1027{
1153 struct lruvec *lruvec; 1028 struct list_head *src = &lruvec->lists[lru];
1154 struct list_head *src;
1155 unsigned long nr_taken = 0; 1029 unsigned long nr_taken = 0;
1156 unsigned long nr_lumpy_taken = 0;
1157 unsigned long nr_lumpy_dirty = 0;
1158 unsigned long nr_lumpy_failed = 0;
1159 unsigned long scan; 1030 unsigned long scan;
1160 int lru = LRU_BASE;
1161
1162 lruvec = mem_cgroup_zone_lruvec(mz->zone, mz->mem_cgroup);
1163 if (active)
1164 lru += LRU_ACTIVE;
1165 if (file)
1166 lru += LRU_FILE;
1167 src = &lruvec->lists[lru];
1168 1031
1169 for (scan = 0; scan < nr_to_scan && !list_empty(src); scan++) { 1032 for (scan = 0; scan < nr_to_scan && !list_empty(src); scan++) {
1170 struct page *page; 1033 struct page *page;
1171 unsigned long pfn; 1034 int nr_pages;
1172 unsigned long end_pfn;
1173 unsigned long page_pfn;
1174 int zone_id;
1175 1035
1176 page = lru_to_page(src); 1036 page = lru_to_page(src);
1177 prefetchw_prev_lru_page(page, src, flags); 1037 prefetchw_prev_lru_page(page, src, flags);
1178 1038
1179 VM_BUG_ON(!PageLRU(page)); 1039 VM_BUG_ON(!PageLRU(page));
1180 1040
1181 switch (__isolate_lru_page(page, mode, file)) { 1041 switch (__isolate_lru_page(page, mode)) {
1182 case 0: 1042 case 0:
1183 mem_cgroup_lru_del(page); 1043 nr_pages = hpage_nr_pages(page);
1044 mem_cgroup_update_lru_size(lruvec, lru, -nr_pages);
1184 list_move(&page->lru, dst); 1045 list_move(&page->lru, dst);
1185 nr_taken += hpage_nr_pages(page); 1046 nr_taken += nr_pages;
1186 break; 1047 break;
1187 1048
1188 case -EBUSY: 1049 case -EBUSY:
@@ -1193,93 +1054,11 @@ static unsigned long isolate_lru_pages(unsigned long nr_to_scan,
1193 default: 1054 default:
1194 BUG(); 1055 BUG();
1195 } 1056 }
1196
1197 if (!sc->order || !(sc->reclaim_mode & RECLAIM_MODE_LUMPYRECLAIM))
1198 continue;
1199
1200 /*
1201 * Attempt to take all pages in the order aligned region
1202 * surrounding the tag page. Only take those pages of
1203 * the same active state as that tag page. We may safely
1204 * round the target page pfn down to the requested order
1205 * as the mem_map is guaranteed valid out to MAX_ORDER,
1206 * where that page is in a different zone we will detect
1207 * it from its zone id and abort this block scan.
1208 */
1209 zone_id = page_zone_id(page);
1210 page_pfn = page_to_pfn(page);
1211 pfn = page_pfn & ~((1 << sc->order) - 1);
1212 end_pfn = pfn + (1 << sc->order);
1213 for (; pfn < end_pfn; pfn++) {
1214 struct page *cursor_page;
1215
1216 /* The target page is in the block, ignore it. */
1217 if (unlikely(pfn == page_pfn))
1218 continue;
1219
1220 /* Avoid holes within the zone. */
1221 if (unlikely(!pfn_valid_within(pfn)))
1222 break;
1223
1224 cursor_page = pfn_to_page(pfn);
1225
1226 /* Check that we have not crossed a zone boundary. */
1227 if (unlikely(page_zone_id(cursor_page) != zone_id))
1228 break;
1229
1230 /*
1231 * If we don't have enough swap space, reclaiming of
1232 * anon page which don't already have a swap slot is
1233 * pointless.
1234 */
1235 if (nr_swap_pages <= 0 && PageSwapBacked(cursor_page) &&
1236 !PageSwapCache(cursor_page))
1237 break;
1238
1239 if (__isolate_lru_page(cursor_page, mode, file) == 0) {
1240 unsigned int isolated_pages;
1241
1242 mem_cgroup_lru_del(cursor_page);
1243 list_move(&cursor_page->lru, dst);
1244 isolated_pages = hpage_nr_pages(cursor_page);
1245 nr_taken += isolated_pages;
1246 nr_lumpy_taken += isolated_pages;
1247 if (PageDirty(cursor_page))
1248 nr_lumpy_dirty += isolated_pages;
1249 scan++;
1250 pfn += isolated_pages - 1;
1251 } else {
1252 /*
1253 * Check if the page is freed already.
1254 *
1255 * We can't use page_count() as that
1256 * requires compound_head and we don't
1257 * have a pin on the page here. If a
1258 * page is tail, we may or may not
1259 * have isolated the head, so assume
1260 * it's not free, it'd be tricky to
1261 * track the head status without a
1262 * page pin.
1263 */
1264 if (!PageTail(cursor_page) &&
1265 !atomic_read(&cursor_page->_count))
1266 continue;
1267 break;
1268 }
1269 }
1270
1271 /* If we break out of the loop above, lumpy reclaim failed */
1272 if (pfn < end_pfn)
1273 nr_lumpy_failed++;
1274 } 1057 }
1275 1058
1276 *nr_scanned = scan; 1059 *nr_scanned = scan;
1277 1060 trace_mm_vmscan_lru_isolate(sc->order, nr_to_scan, scan,
1278 trace_mm_vmscan_lru_isolate(sc->order, 1061 nr_taken, mode, is_file_lru(lru));
1279 nr_to_scan, scan,
1280 nr_taken,
1281 nr_lumpy_taken, nr_lumpy_dirty, nr_lumpy_failed,
1282 mode, file);
1283 return nr_taken; 1062 return nr_taken;
1284} 1063}
1285 1064
@@ -1316,15 +1095,16 @@ int isolate_lru_page(struct page *page)
1316 1095
1317 if (PageLRU(page)) { 1096 if (PageLRU(page)) {
1318 struct zone *zone = page_zone(page); 1097 struct zone *zone = page_zone(page);
1098 struct lruvec *lruvec;
1319 1099
1320 spin_lock_irq(&zone->lru_lock); 1100 spin_lock_irq(&zone->lru_lock);
1101 lruvec = mem_cgroup_page_lruvec(page, zone);
1321 if (PageLRU(page)) { 1102 if (PageLRU(page)) {
1322 int lru = page_lru(page); 1103 int lru = page_lru(page);
1323 ret = 0;
1324 get_page(page); 1104 get_page(page);
1325 ClearPageLRU(page); 1105 ClearPageLRU(page);
1326 1106 del_page_from_lru_list(page, lruvec, lru);
1327 del_page_from_lru_list(zone, page, lru); 1107 ret = 0;
1328 } 1108 }
1329 spin_unlock_irq(&zone->lru_lock); 1109 spin_unlock_irq(&zone->lru_lock);
1330 } 1110 }
@@ -1357,11 +1137,10 @@ static int too_many_isolated(struct zone *zone, int file,
1357} 1137}
1358 1138
1359static noinline_for_stack void 1139static noinline_for_stack void
1360putback_inactive_pages(struct mem_cgroup_zone *mz, 1140putback_inactive_pages(struct lruvec *lruvec, struct list_head *page_list)
1361 struct list_head *page_list)
1362{ 1141{
1363 struct zone_reclaim_stat *reclaim_stat = get_reclaim_stat(mz); 1142 struct zone_reclaim_stat *reclaim_stat = &lruvec->reclaim_stat;
1364 struct zone *zone = mz->zone; 1143 struct zone *zone = lruvec_zone(lruvec);
1365 LIST_HEAD(pages_to_free); 1144 LIST_HEAD(pages_to_free);
1366 1145
1367 /* 1146 /*
@@ -1379,9 +1158,13 @@ putback_inactive_pages(struct mem_cgroup_zone *mz,
1379 spin_lock_irq(&zone->lru_lock); 1158 spin_lock_irq(&zone->lru_lock);
1380 continue; 1159 continue;
1381 } 1160 }
1161
1162 lruvec = mem_cgroup_page_lruvec(page, zone);
1163
1382 SetPageLRU(page); 1164 SetPageLRU(page);
1383 lru = page_lru(page); 1165 lru = page_lru(page);
1384 add_page_to_lru_list(zone, page, lru); 1166 add_page_to_lru_list(page, lruvec, lru);
1167
1385 if (is_active_lru(lru)) { 1168 if (is_active_lru(lru)) {
1386 int file = is_file_lru(lru); 1169 int file = is_file_lru(lru);
1387 int numpages = hpage_nr_pages(page); 1170 int numpages = hpage_nr_pages(page);
@@ -1390,7 +1173,7 @@ putback_inactive_pages(struct mem_cgroup_zone *mz,
1390 if (put_page_testzero(page)) { 1173 if (put_page_testzero(page)) {
1391 __ClearPageLRU(page); 1174 __ClearPageLRU(page);
1392 __ClearPageActive(page); 1175 __ClearPageActive(page);
1393 del_page_from_lru_list(zone, page, lru); 1176 del_page_from_lru_list(page, lruvec, lru);
1394 1177
1395 if (unlikely(PageCompound(page))) { 1178 if (unlikely(PageCompound(page))) {
1396 spin_unlock_irq(&zone->lru_lock); 1179 spin_unlock_irq(&zone->lru_lock);
@@ -1407,112 +1190,24 @@ putback_inactive_pages(struct mem_cgroup_zone *mz,
1407 list_splice(&pages_to_free, page_list); 1190 list_splice(&pages_to_free, page_list);
1408} 1191}
1409 1192
1410static noinline_for_stack void
1411update_isolated_counts(struct mem_cgroup_zone *mz,
1412 struct list_head *page_list,
1413 unsigned long *nr_anon,
1414 unsigned long *nr_file)
1415{
1416 struct zone *zone = mz->zone;
1417 unsigned int count[NR_LRU_LISTS] = { 0, };
1418 unsigned long nr_active = 0;
1419 struct page *page;
1420 int lru;
1421
1422 /*
1423 * Count pages and clear active flags
1424 */
1425 list_for_each_entry(page, page_list, lru) {
1426 int numpages = hpage_nr_pages(page);
1427 lru = page_lru_base_type(page);
1428 if (PageActive(page)) {
1429 lru += LRU_ACTIVE;
1430 ClearPageActive(page);
1431 nr_active += numpages;
1432 }
1433 count[lru] += numpages;
1434 }
1435
1436 preempt_disable();
1437 __count_vm_events(PGDEACTIVATE, nr_active);
1438
1439 __mod_zone_page_state(zone, NR_ACTIVE_FILE,
1440 -count[LRU_ACTIVE_FILE]);
1441 __mod_zone_page_state(zone, NR_INACTIVE_FILE,
1442 -count[LRU_INACTIVE_FILE]);
1443 __mod_zone_page_state(zone, NR_ACTIVE_ANON,
1444 -count[LRU_ACTIVE_ANON]);
1445 __mod_zone_page_state(zone, NR_INACTIVE_ANON,
1446 -count[LRU_INACTIVE_ANON]);
1447
1448 *nr_anon = count[LRU_ACTIVE_ANON] + count[LRU_INACTIVE_ANON];
1449 *nr_file = count[LRU_ACTIVE_FILE] + count[LRU_INACTIVE_FILE];
1450
1451 __mod_zone_page_state(zone, NR_ISOLATED_ANON, *nr_anon);
1452 __mod_zone_page_state(zone, NR_ISOLATED_FILE, *nr_file);
1453 preempt_enable();
1454}
1455
1456/*
1457 * Returns true if a direct reclaim should wait on pages under writeback.
1458 *
1459 * If we are direct reclaiming for contiguous pages and we do not reclaim
1460 * everything in the list, try again and wait for writeback IO to complete.
1461 * This will stall high-order allocations noticeably. Only do that when really
1462 * need to free the pages under high memory pressure.
1463 */
1464static inline bool should_reclaim_stall(unsigned long nr_taken,
1465 unsigned long nr_freed,
1466 int priority,
1467 struct scan_control *sc)
1468{
1469 int lumpy_stall_priority;
1470
1471 /* kswapd should not stall on sync IO */
1472 if (current_is_kswapd())
1473 return false;
1474
1475 /* Only stall on lumpy reclaim */
1476 if (sc->reclaim_mode & RECLAIM_MODE_SINGLE)
1477 return false;
1478
1479 /* If we have reclaimed everything on the isolated list, no stall */
1480 if (nr_freed == nr_taken)
1481 return false;
1482
1483 /*
1484 * For high-order allocations, there are two stall thresholds.
1485 * High-cost allocations stall immediately where as lower
1486 * order allocations such as stacks require the scanning
1487 * priority to be much higher before stalling.
1488 */
1489 if (sc->order > PAGE_ALLOC_COSTLY_ORDER)
1490 lumpy_stall_priority = DEF_PRIORITY;
1491 else
1492 lumpy_stall_priority = DEF_PRIORITY / 3;
1493
1494 return priority <= lumpy_stall_priority;
1495}
1496
1497/* 1193/*
1498 * shrink_inactive_list() is a helper for shrink_zone(). It returns the number 1194 * shrink_inactive_list() is a helper for shrink_zone(). It returns the number
1499 * of reclaimed pages 1195 * of reclaimed pages
1500 */ 1196 */
1501static noinline_for_stack unsigned long 1197static noinline_for_stack unsigned long
1502shrink_inactive_list(unsigned long nr_to_scan, struct mem_cgroup_zone *mz, 1198shrink_inactive_list(unsigned long nr_to_scan, struct lruvec *lruvec,
1503 struct scan_control *sc, int priority, int file) 1199 struct scan_control *sc, enum lru_list lru)
1504{ 1200{
1505 LIST_HEAD(page_list); 1201 LIST_HEAD(page_list);
1506 unsigned long nr_scanned; 1202 unsigned long nr_scanned;
1507 unsigned long nr_reclaimed = 0; 1203 unsigned long nr_reclaimed = 0;
1508 unsigned long nr_taken; 1204 unsigned long nr_taken;
1509 unsigned long nr_anon;
1510 unsigned long nr_file;
1511 unsigned long nr_dirty = 0; 1205 unsigned long nr_dirty = 0;
1512 unsigned long nr_writeback = 0; 1206 unsigned long nr_writeback = 0;
1513 isolate_mode_t isolate_mode = ISOLATE_INACTIVE; 1207 isolate_mode_t isolate_mode = 0;
1514 struct zone *zone = mz->zone; 1208 int file = is_file_lru(lru);
1515 struct zone_reclaim_stat *reclaim_stat = get_reclaim_stat(mz); 1209 struct zone *zone = lruvec_zone(lruvec);
1210 struct zone_reclaim_stat *reclaim_stat = &lruvec->reclaim_stat;
1516 1211
1517 while (unlikely(too_many_isolated(zone, file, sc))) { 1212 while (unlikely(too_many_isolated(zone, file, sc))) {
1518 congestion_wait(BLK_RW_ASYNC, HZ/10); 1213 congestion_wait(BLK_RW_ASYNC, HZ/10);
@@ -1522,10 +1217,6 @@ shrink_inactive_list(unsigned long nr_to_scan, struct mem_cgroup_zone *mz,
1522 return SWAP_CLUSTER_MAX; 1217 return SWAP_CLUSTER_MAX;
1523 } 1218 }
1524 1219
1525 set_reclaim_mode(priority, sc, false);
1526 if (sc->reclaim_mode & RECLAIM_MODE_LUMPYRECLAIM)
1527 isolate_mode |= ISOLATE_ACTIVE;
1528
1529 lru_add_drain(); 1220 lru_add_drain();
1530 1221
1531 if (!sc->may_unmap) 1222 if (!sc->may_unmap)
@@ -1535,38 +1226,30 @@ shrink_inactive_list(unsigned long nr_to_scan, struct mem_cgroup_zone *mz,
1535 1226
1536 spin_lock_irq(&zone->lru_lock); 1227 spin_lock_irq(&zone->lru_lock);
1537 1228
1538 nr_taken = isolate_lru_pages(nr_to_scan, mz, &page_list, &nr_scanned, 1229 nr_taken = isolate_lru_pages(nr_to_scan, lruvec, &page_list,
1539 sc, isolate_mode, 0, file); 1230 &nr_scanned, sc, isolate_mode, lru);
1231
1232 __mod_zone_page_state(zone, NR_LRU_BASE + lru, -nr_taken);
1233 __mod_zone_page_state(zone, NR_ISOLATED_ANON + file, nr_taken);
1234
1540 if (global_reclaim(sc)) { 1235 if (global_reclaim(sc)) {
1541 zone->pages_scanned += nr_scanned; 1236 zone->pages_scanned += nr_scanned;
1542 if (current_is_kswapd()) 1237 if (current_is_kswapd())
1543 __count_zone_vm_events(PGSCAN_KSWAPD, zone, 1238 __count_zone_vm_events(PGSCAN_KSWAPD, zone, nr_scanned);
1544 nr_scanned);
1545 else 1239 else
1546 __count_zone_vm_events(PGSCAN_DIRECT, zone, 1240 __count_zone_vm_events(PGSCAN_DIRECT, zone, nr_scanned);
1547 nr_scanned);
1548 } 1241 }
1549 spin_unlock_irq(&zone->lru_lock); 1242 spin_unlock_irq(&zone->lru_lock);
1550 1243
1551 if (nr_taken == 0) 1244 if (nr_taken == 0)
1552 return 0; 1245 return 0;
1553 1246
1554 update_isolated_counts(mz, &page_list, &nr_anon, &nr_file); 1247 nr_reclaimed = shrink_page_list(&page_list, zone, sc,
1555
1556 nr_reclaimed = shrink_page_list(&page_list, mz, sc, priority,
1557 &nr_dirty, &nr_writeback); 1248 &nr_dirty, &nr_writeback);
1558 1249
1559 /* Check if we should syncronously wait for writeback */
1560 if (should_reclaim_stall(nr_taken, nr_reclaimed, priority, sc)) {
1561 set_reclaim_mode(priority, sc, true);
1562 nr_reclaimed += shrink_page_list(&page_list, mz, sc,
1563 priority, &nr_dirty, &nr_writeback);
1564 }
1565
1566 spin_lock_irq(&zone->lru_lock); 1250 spin_lock_irq(&zone->lru_lock);
1567 1251
1568 reclaim_stat->recent_scanned[0] += nr_anon; 1252 reclaim_stat->recent_scanned[file] += nr_taken;
1569 reclaim_stat->recent_scanned[1] += nr_file;
1570 1253
1571 if (global_reclaim(sc)) { 1254 if (global_reclaim(sc)) {
1572 if (current_is_kswapd()) 1255 if (current_is_kswapd())
@@ -1577,10 +1260,9 @@ shrink_inactive_list(unsigned long nr_to_scan, struct mem_cgroup_zone *mz,
1577 nr_reclaimed); 1260 nr_reclaimed);
1578 } 1261 }
1579 1262
1580 putback_inactive_pages(mz, &page_list); 1263 putback_inactive_pages(lruvec, &page_list);
1581 1264
1582 __mod_zone_page_state(zone, NR_ISOLATED_ANON, -nr_anon); 1265 __mod_zone_page_state(zone, NR_ISOLATED_ANON + file, -nr_taken);
1583 __mod_zone_page_state(zone, NR_ISOLATED_FILE, -nr_file);
1584 1266
1585 spin_unlock_irq(&zone->lru_lock); 1267 spin_unlock_irq(&zone->lru_lock);
1586 1268
@@ -1609,14 +1291,15 @@ shrink_inactive_list(unsigned long nr_to_scan, struct mem_cgroup_zone *mz,
1609 * DEF_PRIORITY-6 For SWAP_CLUSTER_MAX isolated pages, throttle if any 1291 * DEF_PRIORITY-6 For SWAP_CLUSTER_MAX isolated pages, throttle if any
1610 * isolated page is PageWriteback 1292 * isolated page is PageWriteback
1611 */ 1293 */
1612 if (nr_writeback && nr_writeback >= (nr_taken >> (DEF_PRIORITY-priority))) 1294 if (nr_writeback && nr_writeback >=
1295 (nr_taken >> (DEF_PRIORITY - sc->priority)))
1613 wait_iff_congested(zone, BLK_RW_ASYNC, HZ/10); 1296 wait_iff_congested(zone, BLK_RW_ASYNC, HZ/10);
1614 1297
1615 trace_mm_vmscan_lru_shrink_inactive(zone->zone_pgdat->node_id, 1298 trace_mm_vmscan_lru_shrink_inactive(zone->zone_pgdat->node_id,
1616 zone_idx(zone), 1299 zone_idx(zone),
1617 nr_scanned, nr_reclaimed, 1300 nr_scanned, nr_reclaimed,
1618 priority, 1301 sc->priority,
1619 trace_shrink_flags(file, sc->reclaim_mode)); 1302 trace_shrink_flags(file));
1620 return nr_reclaimed; 1303 return nr_reclaimed;
1621} 1304}
1622 1305
@@ -1638,30 +1321,32 @@ shrink_inactive_list(unsigned long nr_to_scan, struct mem_cgroup_zone *mz,
1638 * But we had to alter page->flags anyway. 1321 * But we had to alter page->flags anyway.
1639 */ 1322 */
1640 1323
1641static void move_active_pages_to_lru(struct zone *zone, 1324static void move_active_pages_to_lru(struct lruvec *lruvec,
1642 struct list_head *list, 1325 struct list_head *list,
1643 struct list_head *pages_to_free, 1326 struct list_head *pages_to_free,
1644 enum lru_list lru) 1327 enum lru_list lru)
1645{ 1328{
1329 struct zone *zone = lruvec_zone(lruvec);
1646 unsigned long pgmoved = 0; 1330 unsigned long pgmoved = 0;
1647 struct page *page; 1331 struct page *page;
1332 int nr_pages;
1648 1333
1649 while (!list_empty(list)) { 1334 while (!list_empty(list)) {
1650 struct lruvec *lruvec;
1651
1652 page = lru_to_page(list); 1335 page = lru_to_page(list);
1336 lruvec = mem_cgroup_page_lruvec(page, zone);
1653 1337
1654 VM_BUG_ON(PageLRU(page)); 1338 VM_BUG_ON(PageLRU(page));
1655 SetPageLRU(page); 1339 SetPageLRU(page);
1656 1340
1657 lruvec = mem_cgroup_lru_add_list(zone, page, lru); 1341 nr_pages = hpage_nr_pages(page);
1342 mem_cgroup_update_lru_size(lruvec, lru, nr_pages);
1658 list_move(&page->lru, &lruvec->lists[lru]); 1343 list_move(&page->lru, &lruvec->lists[lru]);
1659 pgmoved += hpage_nr_pages(page); 1344 pgmoved += nr_pages;
1660 1345
1661 if (put_page_testzero(page)) { 1346 if (put_page_testzero(page)) {
1662 __ClearPageLRU(page); 1347 __ClearPageLRU(page);
1663 __ClearPageActive(page); 1348 __ClearPageActive(page);
1664 del_page_from_lru_list(zone, page, lru); 1349 del_page_from_lru_list(page, lruvec, lru);
1665 1350
1666 if (unlikely(PageCompound(page))) { 1351 if (unlikely(PageCompound(page))) {
1667 spin_unlock_irq(&zone->lru_lock); 1352 spin_unlock_irq(&zone->lru_lock);
@@ -1677,9 +1362,9 @@ static void move_active_pages_to_lru(struct zone *zone,
1677} 1362}
1678 1363
1679static void shrink_active_list(unsigned long nr_to_scan, 1364static void shrink_active_list(unsigned long nr_to_scan,
1680 struct mem_cgroup_zone *mz, 1365 struct lruvec *lruvec,
1681 struct scan_control *sc, 1366 struct scan_control *sc,
1682 int priority, int file) 1367 enum lru_list lru)
1683{ 1368{
1684 unsigned long nr_taken; 1369 unsigned long nr_taken;
1685 unsigned long nr_scanned; 1370 unsigned long nr_scanned;
@@ -1688,15 +1373,14 @@ static void shrink_active_list(unsigned long nr_to_scan,
1688 LIST_HEAD(l_active); 1373 LIST_HEAD(l_active);
1689 LIST_HEAD(l_inactive); 1374 LIST_HEAD(l_inactive);
1690 struct page *page; 1375 struct page *page;
1691 struct zone_reclaim_stat *reclaim_stat = get_reclaim_stat(mz); 1376 struct zone_reclaim_stat *reclaim_stat = &lruvec->reclaim_stat;
1692 unsigned long nr_rotated = 0; 1377 unsigned long nr_rotated = 0;
1693 isolate_mode_t isolate_mode = ISOLATE_ACTIVE; 1378 isolate_mode_t isolate_mode = 0;
1694 struct zone *zone = mz->zone; 1379 int file = is_file_lru(lru);
1380 struct zone *zone = lruvec_zone(lruvec);
1695 1381
1696 lru_add_drain(); 1382 lru_add_drain();
1697 1383
1698 reset_reclaim_mode(sc);
1699
1700 if (!sc->may_unmap) 1384 if (!sc->may_unmap)
1701 isolate_mode |= ISOLATE_UNMAPPED; 1385 isolate_mode |= ISOLATE_UNMAPPED;
1702 if (!sc->may_writepage) 1386 if (!sc->may_writepage)
@@ -1704,18 +1388,15 @@ static void shrink_active_list(unsigned long nr_to_scan,
1704 1388
1705 spin_lock_irq(&zone->lru_lock); 1389 spin_lock_irq(&zone->lru_lock);
1706 1390
1707 nr_taken = isolate_lru_pages(nr_to_scan, mz, &l_hold, &nr_scanned, sc, 1391 nr_taken = isolate_lru_pages(nr_to_scan, lruvec, &l_hold,
1708 isolate_mode, 1, file); 1392 &nr_scanned, sc, isolate_mode, lru);
1709 if (global_reclaim(sc)) 1393 if (global_reclaim(sc))
1710 zone->pages_scanned += nr_scanned; 1394 zone->pages_scanned += nr_scanned;
1711 1395
1712 reclaim_stat->recent_scanned[file] += nr_taken; 1396 reclaim_stat->recent_scanned[file] += nr_taken;
1713 1397
1714 __count_zone_vm_events(PGREFILL, zone, nr_scanned); 1398 __count_zone_vm_events(PGREFILL, zone, nr_scanned);
1715 if (file) 1399 __mod_zone_page_state(zone, NR_LRU_BASE + lru, -nr_taken);
1716 __mod_zone_page_state(zone, NR_ACTIVE_FILE, -nr_taken);
1717 else
1718 __mod_zone_page_state(zone, NR_ACTIVE_ANON, -nr_taken);
1719 __mod_zone_page_state(zone, NR_ISOLATED_ANON + file, nr_taken); 1400 __mod_zone_page_state(zone, NR_ISOLATED_ANON + file, nr_taken);
1720 spin_unlock_irq(&zone->lru_lock); 1401 spin_unlock_irq(&zone->lru_lock);
1721 1402
@@ -1737,7 +1418,8 @@ static void shrink_active_list(unsigned long nr_to_scan,
1737 } 1418 }
1738 } 1419 }
1739 1420
1740 if (page_referenced(page, 0, mz->mem_cgroup, &vm_flags)) { 1421 if (page_referenced(page, 0, sc->target_mem_cgroup,
1422 &vm_flags)) {
1741 nr_rotated += hpage_nr_pages(page); 1423 nr_rotated += hpage_nr_pages(page);
1742 /* 1424 /*
1743 * Identify referenced, file-backed active pages and 1425 * Identify referenced, file-backed active pages and
@@ -1770,10 +1452,8 @@ static void shrink_active_list(unsigned long nr_to_scan,
1770 */ 1452 */
1771 reclaim_stat->recent_rotated[file] += nr_rotated; 1453 reclaim_stat->recent_rotated[file] += nr_rotated;
1772 1454
1773 move_active_pages_to_lru(zone, &l_active, &l_hold, 1455 move_active_pages_to_lru(lruvec, &l_active, &l_hold, lru);
1774 LRU_ACTIVE + file * LRU_FILE); 1456 move_active_pages_to_lru(lruvec, &l_inactive, &l_hold, lru - LRU_ACTIVE);
1775 move_active_pages_to_lru(zone, &l_inactive, &l_hold,
1776 LRU_BASE + file * LRU_FILE);
1777 __mod_zone_page_state(zone, NR_ISOLATED_ANON + file, -nr_taken); 1457 __mod_zone_page_state(zone, NR_ISOLATED_ANON + file, -nr_taken);
1778 spin_unlock_irq(&zone->lru_lock); 1458 spin_unlock_irq(&zone->lru_lock);
1779 1459
@@ -1796,13 +1476,12 @@ static int inactive_anon_is_low_global(struct zone *zone)
1796 1476
1797/** 1477/**
1798 * inactive_anon_is_low - check if anonymous pages need to be deactivated 1478 * inactive_anon_is_low - check if anonymous pages need to be deactivated
1799 * @zone: zone to check 1479 * @lruvec: LRU vector to check
1800 * @sc: scan control of this context
1801 * 1480 *
1802 * Returns true if the zone does not have enough inactive anon pages, 1481 * Returns true if the zone does not have enough inactive anon pages,
1803 * meaning some active anon pages need to be deactivated. 1482 * meaning some active anon pages need to be deactivated.
1804 */ 1483 */
1805static int inactive_anon_is_low(struct mem_cgroup_zone *mz) 1484static int inactive_anon_is_low(struct lruvec *lruvec)
1806{ 1485{
1807 /* 1486 /*
1808 * If we don't have swap space, anonymous page deactivation 1487 * If we don't have swap space, anonymous page deactivation
@@ -1811,14 +1490,13 @@ static int inactive_anon_is_low(struct mem_cgroup_zone *mz)
1811 if (!total_swap_pages) 1490 if (!total_swap_pages)
1812 return 0; 1491 return 0;
1813 1492
1814 if (!scanning_global_lru(mz)) 1493 if (!mem_cgroup_disabled())
1815 return mem_cgroup_inactive_anon_is_low(mz->mem_cgroup, 1494 return mem_cgroup_inactive_anon_is_low(lruvec);
1816 mz->zone);
1817 1495
1818 return inactive_anon_is_low_global(mz->zone); 1496 return inactive_anon_is_low_global(lruvec_zone(lruvec));
1819} 1497}
1820#else 1498#else
1821static inline int inactive_anon_is_low(struct mem_cgroup_zone *mz) 1499static inline int inactive_anon_is_low(struct lruvec *lruvec)
1822{ 1500{
1823 return 0; 1501 return 0;
1824} 1502}
@@ -1836,7 +1514,7 @@ static int inactive_file_is_low_global(struct zone *zone)
1836 1514
1837/** 1515/**
1838 * inactive_file_is_low - check if file pages need to be deactivated 1516 * inactive_file_is_low - check if file pages need to be deactivated
1839 * @mz: memory cgroup and zone to check 1517 * @lruvec: LRU vector to check
1840 * 1518 *
1841 * When the system is doing streaming IO, memory pressure here 1519 * When the system is doing streaming IO, memory pressure here
1842 * ensures that active file pages get deactivated, until more 1520 * ensures that active file pages get deactivated, until more
@@ -1848,44 +1526,39 @@ static int inactive_file_is_low_global(struct zone *zone)
1848 * This uses a different ratio than the anonymous pages, because 1526 * This uses a different ratio than the anonymous pages, because
1849 * the page cache uses a use-once replacement algorithm. 1527 * the page cache uses a use-once replacement algorithm.
1850 */ 1528 */
1851static int inactive_file_is_low(struct mem_cgroup_zone *mz) 1529static int inactive_file_is_low(struct lruvec *lruvec)
1852{ 1530{
1853 if (!scanning_global_lru(mz)) 1531 if (!mem_cgroup_disabled())
1854 return mem_cgroup_inactive_file_is_low(mz->mem_cgroup, 1532 return mem_cgroup_inactive_file_is_low(lruvec);
1855 mz->zone);
1856 1533
1857 return inactive_file_is_low_global(mz->zone); 1534 return inactive_file_is_low_global(lruvec_zone(lruvec));
1858} 1535}
1859 1536
1860static int inactive_list_is_low(struct mem_cgroup_zone *mz, int file) 1537static int inactive_list_is_low(struct lruvec *lruvec, enum lru_list lru)
1861{ 1538{
1862 if (file) 1539 if (is_file_lru(lru))
1863 return inactive_file_is_low(mz); 1540 return inactive_file_is_low(lruvec);
1864 else 1541 else
1865 return inactive_anon_is_low(mz); 1542 return inactive_anon_is_low(lruvec);
1866} 1543}
1867 1544
1868static unsigned long shrink_list(enum lru_list lru, unsigned long nr_to_scan, 1545static unsigned long shrink_list(enum lru_list lru, unsigned long nr_to_scan,
1869 struct mem_cgroup_zone *mz, 1546 struct lruvec *lruvec, struct scan_control *sc)
1870 struct scan_control *sc, int priority)
1871{ 1547{
1872 int file = is_file_lru(lru);
1873
1874 if (is_active_lru(lru)) { 1548 if (is_active_lru(lru)) {
1875 if (inactive_list_is_low(mz, file)) 1549 if (inactive_list_is_low(lruvec, lru))
1876 shrink_active_list(nr_to_scan, mz, sc, priority, file); 1550 shrink_active_list(nr_to_scan, lruvec, sc, lru);
1877 return 0; 1551 return 0;
1878 } 1552 }
1879 1553
1880 return shrink_inactive_list(nr_to_scan, mz, sc, priority, file); 1554 return shrink_inactive_list(nr_to_scan, lruvec, sc, lru);
1881} 1555}
1882 1556
1883static int vmscan_swappiness(struct mem_cgroup_zone *mz, 1557static int vmscan_swappiness(struct scan_control *sc)
1884 struct scan_control *sc)
1885{ 1558{
1886 if (global_reclaim(sc)) 1559 if (global_reclaim(sc))
1887 return vm_swappiness; 1560 return vm_swappiness;
1888 return mem_cgroup_swappiness(mz->mem_cgroup); 1561 return mem_cgroup_swappiness(sc->target_mem_cgroup);
1889} 1562}
1890 1563
1891/* 1564/*
@@ -1896,17 +1569,18 @@ static int vmscan_swappiness(struct mem_cgroup_zone *mz,
1896 * 1569 *
1897 * nr[0] = anon pages to scan; nr[1] = file pages to scan 1570 * nr[0] = anon pages to scan; nr[1] = file pages to scan
1898 */ 1571 */
1899static void get_scan_count(struct mem_cgroup_zone *mz, struct scan_control *sc, 1572static void get_scan_count(struct lruvec *lruvec, struct scan_control *sc,
1900 unsigned long *nr, int priority) 1573 unsigned long *nr)
1901{ 1574{
1902 unsigned long anon, file, free; 1575 unsigned long anon, file, free;
1903 unsigned long anon_prio, file_prio; 1576 unsigned long anon_prio, file_prio;
1904 unsigned long ap, fp; 1577 unsigned long ap, fp;
1905 struct zone_reclaim_stat *reclaim_stat = get_reclaim_stat(mz); 1578 struct zone_reclaim_stat *reclaim_stat = &lruvec->reclaim_stat;
1906 u64 fraction[2], denominator; 1579 u64 fraction[2], denominator;
1907 enum lru_list lru; 1580 enum lru_list lru;
1908 int noswap = 0; 1581 int noswap = 0;
1909 bool force_scan = false; 1582 bool force_scan = false;
1583 struct zone *zone = lruvec_zone(lruvec);
1910 1584
1911 /* 1585 /*
1912 * If the zone or memcg is small, nr[l] can be 0. This 1586 * If the zone or memcg is small, nr[l] can be 0. This
@@ -1918,7 +1592,7 @@ static void get_scan_count(struct mem_cgroup_zone *mz, struct scan_control *sc,
1918 * latencies, so it's better to scan a minimum amount there as 1592 * latencies, so it's better to scan a minimum amount there as
1919 * well. 1593 * well.
1920 */ 1594 */
1921 if (current_is_kswapd() && mz->zone->all_unreclaimable) 1595 if (current_is_kswapd() && zone->all_unreclaimable)
1922 force_scan = true; 1596 force_scan = true;
1923 if (!global_reclaim(sc)) 1597 if (!global_reclaim(sc))
1924 force_scan = true; 1598 force_scan = true;
@@ -1932,16 +1606,16 @@ static void get_scan_count(struct mem_cgroup_zone *mz, struct scan_control *sc,
1932 goto out; 1606 goto out;
1933 } 1607 }
1934 1608
1935 anon = zone_nr_lru_pages(mz, LRU_ACTIVE_ANON) + 1609 anon = get_lru_size(lruvec, LRU_ACTIVE_ANON) +
1936 zone_nr_lru_pages(mz, LRU_INACTIVE_ANON); 1610 get_lru_size(lruvec, LRU_INACTIVE_ANON);
1937 file = zone_nr_lru_pages(mz, LRU_ACTIVE_FILE) + 1611 file = get_lru_size(lruvec, LRU_ACTIVE_FILE) +
1938 zone_nr_lru_pages(mz, LRU_INACTIVE_FILE); 1612 get_lru_size(lruvec, LRU_INACTIVE_FILE);
1939 1613
1940 if (global_reclaim(sc)) { 1614 if (global_reclaim(sc)) {
1941 free = zone_page_state(mz->zone, NR_FREE_PAGES); 1615 free = zone_page_state(zone, NR_FREE_PAGES);
1942 /* If we have very few page cache pages, 1616 /* If we have very few page cache pages,
1943 force-scan anon pages. */ 1617 force-scan anon pages. */
1944 if (unlikely(file + free <= high_wmark_pages(mz->zone))) { 1618 if (unlikely(file + free <= high_wmark_pages(zone))) {
1945 fraction[0] = 1; 1619 fraction[0] = 1;
1946 fraction[1] = 0; 1620 fraction[1] = 0;
1947 denominator = 1; 1621 denominator = 1;
@@ -1953,8 +1627,8 @@ static void get_scan_count(struct mem_cgroup_zone *mz, struct scan_control *sc,
1953 * With swappiness at 100, anonymous and file have the same priority. 1627 * With swappiness at 100, anonymous and file have the same priority.
1954 * This scanning priority is essentially the inverse of IO cost. 1628 * This scanning priority is essentially the inverse of IO cost.
1955 */ 1629 */
1956 anon_prio = vmscan_swappiness(mz, sc); 1630 anon_prio = vmscan_swappiness(sc);
1957 file_prio = 200 - vmscan_swappiness(mz, sc); 1631 file_prio = 200 - anon_prio;
1958 1632
1959 /* 1633 /*
1960 * OK, so we have swap space and a fair amount of page cache 1634 * OK, so we have swap space and a fair amount of page cache
@@ -1967,7 +1641,7 @@ static void get_scan_count(struct mem_cgroup_zone *mz, struct scan_control *sc,
1967 * 1641 *
1968 * anon in [0], file in [1] 1642 * anon in [0], file in [1]
1969 */ 1643 */
1970 spin_lock_irq(&mz->zone->lru_lock); 1644 spin_lock_irq(&zone->lru_lock);
1971 if (unlikely(reclaim_stat->recent_scanned[0] > anon / 4)) { 1645 if (unlikely(reclaim_stat->recent_scanned[0] > anon / 4)) {
1972 reclaim_stat->recent_scanned[0] /= 2; 1646 reclaim_stat->recent_scanned[0] /= 2;
1973 reclaim_stat->recent_rotated[0] /= 2; 1647 reclaim_stat->recent_rotated[0] /= 2;
@@ -1983,12 +1657,12 @@ static void get_scan_count(struct mem_cgroup_zone *mz, struct scan_control *sc,
1983 * proportional to the fraction of recently scanned pages on 1657 * proportional to the fraction of recently scanned pages on
1984 * each list that were recently referenced and in active use. 1658 * each list that were recently referenced and in active use.
1985 */ 1659 */
1986 ap = (anon_prio + 1) * (reclaim_stat->recent_scanned[0] + 1); 1660 ap = anon_prio * (reclaim_stat->recent_scanned[0] + 1);
1987 ap /= reclaim_stat->recent_rotated[0] + 1; 1661 ap /= reclaim_stat->recent_rotated[0] + 1;
1988 1662
1989 fp = (file_prio + 1) * (reclaim_stat->recent_scanned[1] + 1); 1663 fp = file_prio * (reclaim_stat->recent_scanned[1] + 1);
1990 fp /= reclaim_stat->recent_rotated[1] + 1; 1664 fp /= reclaim_stat->recent_rotated[1] + 1;
1991 spin_unlock_irq(&mz->zone->lru_lock); 1665 spin_unlock_irq(&zone->lru_lock);
1992 1666
1993 fraction[0] = ap; 1667 fraction[0] = ap;
1994 fraction[1] = fp; 1668 fraction[1] = fp;
@@ -1998,9 +1672,9 @@ out:
1998 int file = is_file_lru(lru); 1672 int file = is_file_lru(lru);
1999 unsigned long scan; 1673 unsigned long scan;
2000 1674
2001 scan = zone_nr_lru_pages(mz, lru); 1675 scan = get_lru_size(lruvec, lru);
2002 if (priority || noswap) { 1676 if (sc->priority || noswap || !vmscan_swappiness(sc)) {
2003 scan >>= priority; 1677 scan >>= sc->priority;
2004 if (!scan && force_scan) 1678 if (!scan && force_scan)
2005 scan = SWAP_CLUSTER_MAX; 1679 scan = SWAP_CLUSTER_MAX;
2006 scan = div64_u64(scan * fraction[file], denominator); 1680 scan = div64_u64(scan * fraction[file], denominator);
@@ -2009,14 +1683,25 @@ out:
2009 } 1683 }
2010} 1684}
2011 1685
1686/* Use reclaim/compaction for costly allocs or under memory pressure */
1687static bool in_reclaim_compaction(struct scan_control *sc)
1688{
1689 if (COMPACTION_BUILD && sc->order &&
1690 (sc->order > PAGE_ALLOC_COSTLY_ORDER ||
1691 sc->priority < DEF_PRIORITY - 2))
1692 return true;
1693
1694 return false;
1695}
1696
2012/* 1697/*
2013 * Reclaim/compaction depends on a number of pages being freed. To avoid 1698 * Reclaim/compaction is used for high-order allocation requests. It reclaims
2014 * disruption to the system, a small number of order-0 pages continue to be 1699 * order-0 pages before compacting the zone. should_continue_reclaim() returns
2015 * rotated and reclaimed in the normal fashion. However, by the time we get 1700 * true if more pages should be reclaimed such that when the page allocator
2016 * back to the allocator and call try_to_compact_zone(), we ensure that 1701 * calls try_to_compact_zone() that it will have enough free pages to succeed.
2017 * there are enough free pages for it to be likely successful 1702 * It will give up earlier than that if there is difficulty reclaiming pages.
2018 */ 1703 */
2019static inline bool should_continue_reclaim(struct mem_cgroup_zone *mz, 1704static inline bool should_continue_reclaim(struct lruvec *lruvec,
2020 unsigned long nr_reclaimed, 1705 unsigned long nr_reclaimed,
2021 unsigned long nr_scanned, 1706 unsigned long nr_scanned,
2022 struct scan_control *sc) 1707 struct scan_control *sc)
@@ -2025,7 +1710,7 @@ static inline bool should_continue_reclaim(struct mem_cgroup_zone *mz,
2025 unsigned long inactive_lru_pages; 1710 unsigned long inactive_lru_pages;
2026 1711
2027 /* If not in reclaim/compaction mode, stop */ 1712 /* If not in reclaim/compaction mode, stop */
2028 if (!(sc->reclaim_mode & RECLAIM_MODE_COMPACTION)) 1713 if (!in_reclaim_compaction(sc))
2029 return false; 1714 return false;
2030 1715
2031 /* Consider stopping depending on scan and reclaim activity */ 1716 /* Consider stopping depending on scan and reclaim activity */
@@ -2056,15 +1741,15 @@ static inline bool should_continue_reclaim(struct mem_cgroup_zone *mz,
2056 * inactive lists are large enough, continue reclaiming 1741 * inactive lists are large enough, continue reclaiming
2057 */ 1742 */
2058 pages_for_compaction = (2UL << sc->order); 1743 pages_for_compaction = (2UL << sc->order);
2059 inactive_lru_pages = zone_nr_lru_pages(mz, LRU_INACTIVE_FILE); 1744 inactive_lru_pages = get_lru_size(lruvec, LRU_INACTIVE_FILE);
2060 if (nr_swap_pages > 0) 1745 if (nr_swap_pages > 0)
2061 inactive_lru_pages += zone_nr_lru_pages(mz, LRU_INACTIVE_ANON); 1746 inactive_lru_pages += get_lru_size(lruvec, LRU_INACTIVE_ANON);
2062 if (sc->nr_reclaimed < pages_for_compaction && 1747 if (sc->nr_reclaimed < pages_for_compaction &&
2063 inactive_lru_pages > pages_for_compaction) 1748 inactive_lru_pages > pages_for_compaction)
2064 return true; 1749 return true;
2065 1750
2066 /* If compaction would go ahead or the allocation would succeed, stop */ 1751 /* If compaction would go ahead or the allocation would succeed, stop */
2067 switch (compaction_suitable(mz->zone, sc->order)) { 1752 switch (compaction_suitable(lruvec_zone(lruvec), sc->order)) {
2068 case COMPACT_PARTIAL: 1753 case COMPACT_PARTIAL:
2069 case COMPACT_CONTINUE: 1754 case COMPACT_CONTINUE:
2070 return false; 1755 return false;
@@ -2076,8 +1761,7 @@ static inline bool should_continue_reclaim(struct mem_cgroup_zone *mz,
2076/* 1761/*
2077 * This is a basic per-zone page freer. Used by both kswapd and direct reclaim. 1762 * This is a basic per-zone page freer. Used by both kswapd and direct reclaim.
2078 */ 1763 */
2079static void shrink_mem_cgroup_zone(int priority, struct mem_cgroup_zone *mz, 1764static void shrink_lruvec(struct lruvec *lruvec, struct scan_control *sc)
2080 struct scan_control *sc)
2081{ 1765{
2082 unsigned long nr[NR_LRU_LISTS]; 1766 unsigned long nr[NR_LRU_LISTS];
2083 unsigned long nr_to_scan; 1767 unsigned long nr_to_scan;
@@ -2089,7 +1773,7 @@ static void shrink_mem_cgroup_zone(int priority, struct mem_cgroup_zone *mz,
2089restart: 1773restart:
2090 nr_reclaimed = 0; 1774 nr_reclaimed = 0;
2091 nr_scanned = sc->nr_scanned; 1775 nr_scanned = sc->nr_scanned;
2092 get_scan_count(mz, sc, nr, priority); 1776 get_scan_count(lruvec, sc, nr);
2093 1777
2094 blk_start_plug(&plug); 1778 blk_start_plug(&plug);
2095 while (nr[LRU_INACTIVE_ANON] || nr[LRU_ACTIVE_FILE] || 1779 while (nr[LRU_INACTIVE_ANON] || nr[LRU_ACTIVE_FILE] ||
@@ -2101,7 +1785,7 @@ restart:
2101 nr[lru] -= nr_to_scan; 1785 nr[lru] -= nr_to_scan;
2102 1786
2103 nr_reclaimed += shrink_list(lru, nr_to_scan, 1787 nr_reclaimed += shrink_list(lru, nr_to_scan,
2104 mz, sc, priority); 1788 lruvec, sc);
2105 } 1789 }
2106 } 1790 }
2107 /* 1791 /*
@@ -2112,7 +1796,8 @@ restart:
2112 * with multiple processes reclaiming pages, the total 1796 * with multiple processes reclaiming pages, the total
2113 * freeing target can get unreasonably large. 1797 * freeing target can get unreasonably large.
2114 */ 1798 */
2115 if (nr_reclaimed >= nr_to_reclaim && priority < DEF_PRIORITY) 1799 if (nr_reclaimed >= nr_to_reclaim &&
1800 sc->priority < DEF_PRIORITY)
2116 break; 1801 break;
2117 } 1802 }
2118 blk_finish_plug(&plug); 1803 blk_finish_plug(&plug);
@@ -2122,35 +1807,33 @@ restart:
2122 * Even if we did not try to evict anon pages at all, we want to 1807 * Even if we did not try to evict anon pages at all, we want to
2123 * rebalance the anon lru active/inactive ratio. 1808 * rebalance the anon lru active/inactive ratio.
2124 */ 1809 */
2125 if (inactive_anon_is_low(mz)) 1810 if (inactive_anon_is_low(lruvec))
2126 shrink_active_list(SWAP_CLUSTER_MAX, mz, sc, priority, 0); 1811 shrink_active_list(SWAP_CLUSTER_MAX, lruvec,
1812 sc, LRU_ACTIVE_ANON);
2127 1813
2128 /* reclaim/compaction might need reclaim to continue */ 1814 /* reclaim/compaction might need reclaim to continue */
2129 if (should_continue_reclaim(mz, nr_reclaimed, 1815 if (should_continue_reclaim(lruvec, nr_reclaimed,
2130 sc->nr_scanned - nr_scanned, sc)) 1816 sc->nr_scanned - nr_scanned, sc))
2131 goto restart; 1817 goto restart;
2132 1818
2133 throttle_vm_writeout(sc->gfp_mask); 1819 throttle_vm_writeout(sc->gfp_mask);
2134} 1820}
2135 1821
2136static void shrink_zone(int priority, struct zone *zone, 1822static void shrink_zone(struct zone *zone, struct scan_control *sc)
2137 struct scan_control *sc)
2138{ 1823{
2139 struct mem_cgroup *root = sc->target_mem_cgroup; 1824 struct mem_cgroup *root = sc->target_mem_cgroup;
2140 struct mem_cgroup_reclaim_cookie reclaim = { 1825 struct mem_cgroup_reclaim_cookie reclaim = {
2141 .zone = zone, 1826 .zone = zone,
2142 .priority = priority, 1827 .priority = sc->priority,
2143 }; 1828 };
2144 struct mem_cgroup *memcg; 1829 struct mem_cgroup *memcg;
2145 1830
2146 memcg = mem_cgroup_iter(root, NULL, &reclaim); 1831 memcg = mem_cgroup_iter(root, NULL, &reclaim);
2147 do { 1832 do {
2148 struct mem_cgroup_zone mz = { 1833 struct lruvec *lruvec = mem_cgroup_zone_lruvec(zone, memcg);
2149 .mem_cgroup = memcg, 1834
2150 .zone = zone, 1835 shrink_lruvec(lruvec, sc);
2151 };
2152 1836
2153 shrink_mem_cgroup_zone(priority, &mz, sc);
2154 /* 1837 /*
2155 * Limit reclaim has historically picked one memcg and 1838 * Limit reclaim has historically picked one memcg and
2156 * scanned it with decreasing priority levels until 1839 * scanned it with decreasing priority levels until
@@ -2226,8 +1909,7 @@ static inline bool compaction_ready(struct zone *zone, struct scan_control *sc)
2226 * the caller that it should consider retrying the allocation instead of 1909 * the caller that it should consider retrying the allocation instead of
2227 * further reclaim. 1910 * further reclaim.
2228 */ 1911 */
2229static bool shrink_zones(int priority, struct zonelist *zonelist, 1912static bool shrink_zones(struct zonelist *zonelist, struct scan_control *sc)
2230 struct scan_control *sc)
2231{ 1913{
2232 struct zoneref *z; 1914 struct zoneref *z;
2233 struct zone *zone; 1915 struct zone *zone;
@@ -2254,7 +1936,8 @@ static bool shrink_zones(int priority, struct zonelist *zonelist,
2254 if (global_reclaim(sc)) { 1936 if (global_reclaim(sc)) {
2255 if (!cpuset_zone_allowed_hardwall(zone, GFP_KERNEL)) 1937 if (!cpuset_zone_allowed_hardwall(zone, GFP_KERNEL))
2256 continue; 1938 continue;
2257 if (zone->all_unreclaimable && priority != DEF_PRIORITY) 1939 if (zone->all_unreclaimable &&
1940 sc->priority != DEF_PRIORITY)
2258 continue; /* Let kswapd poll it */ 1941 continue; /* Let kswapd poll it */
2259 if (COMPACTION_BUILD) { 1942 if (COMPACTION_BUILD) {
2260 /* 1943 /*
@@ -2286,7 +1969,7 @@ static bool shrink_zones(int priority, struct zonelist *zonelist,
2286 /* need some check for avoid more shrink_zone() */ 1969 /* need some check for avoid more shrink_zone() */
2287 } 1970 }
2288 1971
2289 shrink_zone(priority, zone, sc); 1972 shrink_zone(zone, sc);
2290 } 1973 }
2291 1974
2292 return aborted_reclaim; 1975 return aborted_reclaim;
@@ -2337,7 +2020,6 @@ static unsigned long do_try_to_free_pages(struct zonelist *zonelist,
2337 struct scan_control *sc, 2020 struct scan_control *sc,
2338 struct shrink_control *shrink) 2021 struct shrink_control *shrink)
2339{ 2022{
2340 int priority;
2341 unsigned long total_scanned = 0; 2023 unsigned long total_scanned = 0;
2342 struct reclaim_state *reclaim_state = current->reclaim_state; 2024 struct reclaim_state *reclaim_state = current->reclaim_state;
2343 struct zoneref *z; 2025 struct zoneref *z;
@@ -2350,11 +2032,9 @@ static unsigned long do_try_to_free_pages(struct zonelist *zonelist,
2350 if (global_reclaim(sc)) 2032 if (global_reclaim(sc))
2351 count_vm_event(ALLOCSTALL); 2033 count_vm_event(ALLOCSTALL);
2352 2034
2353 for (priority = DEF_PRIORITY; priority >= 0; priority--) { 2035 do {
2354 sc->nr_scanned = 0; 2036 sc->nr_scanned = 0;
2355 if (!priority) 2037 aborted_reclaim = shrink_zones(zonelist, sc);
2356 disable_swap_token(sc->target_mem_cgroup);
2357 aborted_reclaim = shrink_zones(priority, zonelist, sc);
2358 2038
2359 /* 2039 /*
2360 * Don't shrink slabs when reclaiming memory from 2040 * Don't shrink slabs when reclaiming memory from
@@ -2396,7 +2076,7 @@ static unsigned long do_try_to_free_pages(struct zonelist *zonelist,
2396 2076
2397 /* Take a nap, wait for some writeback to complete */ 2077 /* Take a nap, wait for some writeback to complete */
2398 if (!sc->hibernation_mode && sc->nr_scanned && 2078 if (!sc->hibernation_mode && sc->nr_scanned &&
2399 priority < DEF_PRIORITY - 2) { 2079 sc->priority < DEF_PRIORITY - 2) {
2400 struct zone *preferred_zone; 2080 struct zone *preferred_zone;
2401 2081
2402 first_zones_zonelist(zonelist, gfp_zone(sc->gfp_mask), 2082 first_zones_zonelist(zonelist, gfp_zone(sc->gfp_mask),
@@ -2404,7 +2084,7 @@ static unsigned long do_try_to_free_pages(struct zonelist *zonelist,
2404 &preferred_zone); 2084 &preferred_zone);
2405 wait_iff_congested(preferred_zone, BLK_RW_ASYNC, HZ/10); 2085 wait_iff_congested(preferred_zone, BLK_RW_ASYNC, HZ/10);
2406 } 2086 }
2407 } 2087 } while (--sc->priority >= 0);
2408 2088
2409out: 2089out:
2410 delayacct_freepages_end(); 2090 delayacct_freepages_end();
@@ -2442,6 +2122,7 @@ unsigned long try_to_free_pages(struct zonelist *zonelist, int order,
2442 .may_unmap = 1, 2122 .may_unmap = 1,
2443 .may_swap = 1, 2123 .may_swap = 1,
2444 .order = order, 2124 .order = order,
2125 .priority = DEF_PRIORITY,
2445 .target_mem_cgroup = NULL, 2126 .target_mem_cgroup = NULL,
2446 .nodemask = nodemask, 2127 .nodemask = nodemask,
2447 }; 2128 };
@@ -2474,17 +2155,15 @@ unsigned long mem_cgroup_shrink_node_zone(struct mem_cgroup *memcg,
2474 .may_unmap = 1, 2155 .may_unmap = 1,
2475 .may_swap = !noswap, 2156 .may_swap = !noswap,
2476 .order = 0, 2157 .order = 0,
2158 .priority = 0,
2477 .target_mem_cgroup = memcg, 2159 .target_mem_cgroup = memcg,
2478 }; 2160 };
2479 struct mem_cgroup_zone mz = { 2161 struct lruvec *lruvec = mem_cgroup_zone_lruvec(zone, memcg);
2480 .mem_cgroup = memcg,
2481 .zone = zone,
2482 };
2483 2162
2484 sc.gfp_mask = (gfp_mask & GFP_RECLAIM_MASK) | 2163 sc.gfp_mask = (gfp_mask & GFP_RECLAIM_MASK) |
2485 (GFP_HIGHUSER_MOVABLE & ~GFP_RECLAIM_MASK); 2164 (GFP_HIGHUSER_MOVABLE & ~GFP_RECLAIM_MASK);
2486 2165
2487 trace_mm_vmscan_memcg_softlimit_reclaim_begin(0, 2166 trace_mm_vmscan_memcg_softlimit_reclaim_begin(sc.order,
2488 sc.may_writepage, 2167 sc.may_writepage,
2489 sc.gfp_mask); 2168 sc.gfp_mask);
2490 2169
@@ -2495,7 +2174,7 @@ unsigned long mem_cgroup_shrink_node_zone(struct mem_cgroup *memcg,
2495 * will pick up pages from other mem cgroup's as well. We hack 2174 * will pick up pages from other mem cgroup's as well. We hack
2496 * the priority and make it zero. 2175 * the priority and make it zero.
2497 */ 2176 */
2498 shrink_mem_cgroup_zone(0, &mz, &sc); 2177 shrink_lruvec(lruvec, &sc);
2499 2178
2500 trace_mm_vmscan_memcg_softlimit_reclaim_end(sc.nr_reclaimed); 2179 trace_mm_vmscan_memcg_softlimit_reclaim_end(sc.nr_reclaimed);
2501 2180
@@ -2516,6 +2195,7 @@ unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *memcg,
2516 .may_swap = !noswap, 2195 .may_swap = !noswap,
2517 .nr_to_reclaim = SWAP_CLUSTER_MAX, 2196 .nr_to_reclaim = SWAP_CLUSTER_MAX,
2518 .order = 0, 2197 .order = 0,
2198 .priority = DEF_PRIORITY,
2519 .target_mem_cgroup = memcg, 2199 .target_mem_cgroup = memcg,
2520 .nodemask = NULL, /* we don't care the placement */ 2200 .nodemask = NULL, /* we don't care the placement */
2521 .gfp_mask = (gfp_mask & GFP_RECLAIM_MASK) | 2201 .gfp_mask = (gfp_mask & GFP_RECLAIM_MASK) |
@@ -2546,8 +2226,7 @@ unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *memcg,
2546} 2226}
2547#endif 2227#endif
2548 2228
2549static void age_active_anon(struct zone *zone, struct scan_control *sc, 2229static void age_active_anon(struct zone *zone, struct scan_control *sc)
2550 int priority)
2551{ 2230{
2552 struct mem_cgroup *memcg; 2231 struct mem_cgroup *memcg;
2553 2232
@@ -2556,14 +2235,11 @@ static void age_active_anon(struct zone *zone, struct scan_control *sc,
2556 2235
2557 memcg = mem_cgroup_iter(NULL, NULL, NULL); 2236 memcg = mem_cgroup_iter(NULL, NULL, NULL);
2558 do { 2237 do {
2559 struct mem_cgroup_zone mz = { 2238 struct lruvec *lruvec = mem_cgroup_zone_lruvec(zone, memcg);
2560 .mem_cgroup = memcg,
2561 .zone = zone,
2562 };
2563 2239
2564 if (inactive_anon_is_low(&mz)) 2240 if (inactive_anon_is_low(lruvec))
2565 shrink_active_list(SWAP_CLUSTER_MAX, &mz, 2241 shrink_active_list(SWAP_CLUSTER_MAX, lruvec,
2566 sc, priority, 0); 2242 sc, LRU_ACTIVE_ANON);
2567 2243
2568 memcg = mem_cgroup_iter(NULL, memcg, NULL); 2244 memcg = mem_cgroup_iter(NULL, memcg, NULL);
2569 } while (memcg); 2245 } while (memcg);
@@ -2672,7 +2348,6 @@ static unsigned long balance_pgdat(pg_data_t *pgdat, int order,
2672{ 2348{
2673 int all_zones_ok; 2349 int all_zones_ok;
2674 unsigned long balanced; 2350 unsigned long balanced;
2675 int priority;
2676 int i; 2351 int i;
2677 int end_zone = 0; /* Inclusive. 0 = ZONE_DMA */ 2352 int end_zone = 0; /* Inclusive. 0 = ZONE_DMA */
2678 unsigned long total_scanned; 2353 unsigned long total_scanned;
@@ -2696,18 +2371,15 @@ static unsigned long balance_pgdat(pg_data_t *pgdat, int order,
2696 }; 2371 };
2697loop_again: 2372loop_again:
2698 total_scanned = 0; 2373 total_scanned = 0;
2374 sc.priority = DEF_PRIORITY;
2699 sc.nr_reclaimed = 0; 2375 sc.nr_reclaimed = 0;
2700 sc.may_writepage = !laptop_mode; 2376 sc.may_writepage = !laptop_mode;
2701 count_vm_event(PAGEOUTRUN); 2377 count_vm_event(PAGEOUTRUN);
2702 2378
2703 for (priority = DEF_PRIORITY; priority >= 0; priority--) { 2379 do {
2704 unsigned long lru_pages = 0; 2380 unsigned long lru_pages = 0;
2705 int has_under_min_watermark_zone = 0; 2381 int has_under_min_watermark_zone = 0;
2706 2382
2707 /* The swap token gets in the way of swapout... */
2708 if (!priority)
2709 disable_swap_token(NULL);
2710
2711 all_zones_ok = 1; 2383 all_zones_ok = 1;
2712 balanced = 0; 2384 balanced = 0;
2713 2385
@@ -2721,14 +2393,15 @@ loop_again:
2721 if (!populated_zone(zone)) 2393 if (!populated_zone(zone))
2722 continue; 2394 continue;
2723 2395
2724 if (zone->all_unreclaimable && priority != DEF_PRIORITY) 2396 if (zone->all_unreclaimable &&
2397 sc.priority != DEF_PRIORITY)
2725 continue; 2398 continue;
2726 2399
2727 /* 2400 /*
2728 * Do some background aging of the anon list, to give 2401 * Do some background aging of the anon list, to give
2729 * pages a chance to be referenced before reclaiming. 2402 * pages a chance to be referenced before reclaiming.
2730 */ 2403 */
2731 age_active_anon(zone, &sc, priority); 2404 age_active_anon(zone, &sc);
2732 2405
2733 /* 2406 /*
2734 * If the number of buffer_heads in the machine 2407 * If the number of buffer_heads in the machine
@@ -2776,7 +2449,8 @@ loop_again:
2776 if (!populated_zone(zone)) 2449 if (!populated_zone(zone))
2777 continue; 2450 continue;
2778 2451
2779 if (zone->all_unreclaimable && priority != DEF_PRIORITY) 2452 if (zone->all_unreclaimable &&
2453 sc.priority != DEF_PRIORITY)
2780 continue; 2454 continue;
2781 2455
2782 sc.nr_scanned = 0; 2456 sc.nr_scanned = 0;
@@ -2820,7 +2494,7 @@ loop_again:
2820 !zone_watermark_ok_safe(zone, testorder, 2494 !zone_watermark_ok_safe(zone, testorder,
2821 high_wmark_pages(zone) + balance_gap, 2495 high_wmark_pages(zone) + balance_gap,
2822 end_zone, 0)) { 2496 end_zone, 0)) {
2823 shrink_zone(priority, zone, &sc); 2497 shrink_zone(zone, &sc);
2824 2498
2825 reclaim_state->reclaimed_slab = 0; 2499 reclaim_state->reclaimed_slab = 0;
2826 nr_slab = shrink_slab(&shrink, sc.nr_scanned, lru_pages); 2500 nr_slab = shrink_slab(&shrink, sc.nr_scanned, lru_pages);
@@ -2877,7 +2551,7 @@ loop_again:
2877 * OK, kswapd is getting into trouble. Take a nap, then take 2551 * OK, kswapd is getting into trouble. Take a nap, then take
2878 * another pass across the zones. 2552 * another pass across the zones.
2879 */ 2553 */
2880 if (total_scanned && (priority < DEF_PRIORITY - 2)) { 2554 if (total_scanned && (sc.priority < DEF_PRIORITY - 2)) {
2881 if (has_under_min_watermark_zone) 2555 if (has_under_min_watermark_zone)
2882 count_vm_event(KSWAPD_SKIP_CONGESTION_WAIT); 2556 count_vm_event(KSWAPD_SKIP_CONGESTION_WAIT);
2883 else 2557 else
@@ -2892,7 +2566,7 @@ loop_again:
2892 */ 2566 */
2893 if (sc.nr_reclaimed >= SWAP_CLUSTER_MAX) 2567 if (sc.nr_reclaimed >= SWAP_CLUSTER_MAX)
2894 break; 2568 break;
2895 } 2569 } while (--sc.priority >= 0);
2896out: 2570out:
2897 2571
2898 /* 2572 /*
@@ -2942,7 +2616,8 @@ out:
2942 if (!populated_zone(zone)) 2616 if (!populated_zone(zone))
2943 continue; 2617 continue;
2944 2618
2945 if (zone->all_unreclaimable && priority != DEF_PRIORITY) 2619 if (zone->all_unreclaimable &&
2620 sc.priority != DEF_PRIORITY)
2946 continue; 2621 continue;
2947 2622
2948 /* Would compaction fail due to lack of free memory? */ 2623 /* Would compaction fail due to lack of free memory? */
@@ -3209,6 +2884,7 @@ unsigned long shrink_all_memory(unsigned long nr_to_reclaim)
3209 .nr_to_reclaim = nr_to_reclaim, 2884 .nr_to_reclaim = nr_to_reclaim,
3210 .hibernation_mode = 1, 2885 .hibernation_mode = 1,
3211 .order = 0, 2886 .order = 0,
2887 .priority = DEF_PRIORITY,
3212 }; 2888 };
3213 struct shrink_control shrink = { 2889 struct shrink_control shrink = {
3214 .gfp_mask = sc.gfp_mask, 2890 .gfp_mask = sc.gfp_mask,
@@ -3386,7 +3062,6 @@ static int __zone_reclaim(struct zone *zone, gfp_t gfp_mask, unsigned int order)
3386 const unsigned long nr_pages = 1 << order; 3062 const unsigned long nr_pages = 1 << order;
3387 struct task_struct *p = current; 3063 struct task_struct *p = current;
3388 struct reclaim_state reclaim_state; 3064 struct reclaim_state reclaim_state;
3389 int priority;
3390 struct scan_control sc = { 3065 struct scan_control sc = {
3391 .may_writepage = !!(zone_reclaim_mode & RECLAIM_WRITE), 3066 .may_writepage = !!(zone_reclaim_mode & RECLAIM_WRITE),
3392 .may_unmap = !!(zone_reclaim_mode & RECLAIM_SWAP), 3067 .may_unmap = !!(zone_reclaim_mode & RECLAIM_SWAP),
@@ -3395,6 +3070,7 @@ static int __zone_reclaim(struct zone *zone, gfp_t gfp_mask, unsigned int order)
3395 SWAP_CLUSTER_MAX), 3070 SWAP_CLUSTER_MAX),
3396 .gfp_mask = gfp_mask, 3071 .gfp_mask = gfp_mask,
3397 .order = order, 3072 .order = order,
3073 .priority = ZONE_RECLAIM_PRIORITY,
3398 }; 3074 };
3399 struct shrink_control shrink = { 3075 struct shrink_control shrink = {
3400 .gfp_mask = sc.gfp_mask, 3076 .gfp_mask = sc.gfp_mask,
@@ -3417,11 +3093,9 @@ static int __zone_reclaim(struct zone *zone, gfp_t gfp_mask, unsigned int order)
3417 * Free memory by calling shrink zone with increasing 3093 * Free memory by calling shrink zone with increasing
3418 * priorities until we have enough memory freed. 3094 * priorities until we have enough memory freed.
3419 */ 3095 */
3420 priority = ZONE_RECLAIM_PRIORITY;
3421 do { 3096 do {
3422 shrink_zone(priority, zone, &sc); 3097 shrink_zone(zone, &sc);
3423 priority--; 3098 } while (sc.nr_reclaimed < nr_pages && --sc.priority >= 0);
3424 } while (priority >= 0 && sc.nr_reclaimed < nr_pages);
3425 } 3099 }
3426 3100
3427 nr_slab_pages0 = zone_page_state(zone, NR_SLAB_RECLAIMABLE); 3101 nr_slab_pages0 = zone_page_state(zone, NR_SLAB_RECLAIMABLE);
@@ -3536,7 +3210,7 @@ int page_evictable(struct page *page, struct vm_area_struct *vma)
3536 if (mapping_unevictable(page_mapping(page))) 3210 if (mapping_unevictable(page_mapping(page)))
3537 return 0; 3211 return 0;
3538 3212
3539 if (PageMlocked(page) || (vma && is_mlocked_vma(vma, page))) 3213 if (PageMlocked(page) || (vma && mlocked_vma_newpage(vma, page)))
3540 return 0; 3214 return 0;
3541 3215
3542 return 1; 3216 return 1;
@@ -3572,6 +3246,7 @@ void check_move_unevictable_pages(struct page **pages, int nr_pages)
3572 zone = pagezone; 3246 zone = pagezone;
3573 spin_lock_irq(&zone->lru_lock); 3247 spin_lock_irq(&zone->lru_lock);
3574 } 3248 }
3249 lruvec = mem_cgroup_page_lruvec(page, zone);
3575 3250
3576 if (!PageLRU(page) || !PageUnevictable(page)) 3251 if (!PageLRU(page) || !PageUnevictable(page))
3577 continue; 3252 continue;
@@ -3581,11 +3256,8 @@ void check_move_unevictable_pages(struct page **pages, int nr_pages)
3581 3256
3582 VM_BUG_ON(PageActive(page)); 3257 VM_BUG_ON(PageActive(page));
3583 ClearPageUnevictable(page); 3258 ClearPageUnevictable(page);
3584 __dec_zone_state(zone, NR_UNEVICTABLE); 3259 del_page_from_lru_list(page, lruvec, LRU_UNEVICTABLE);
3585 lruvec = mem_cgroup_lru_move_lists(zone, page, 3260 add_page_to_lru_list(page, lruvec, lru);
3586 LRU_UNEVICTABLE, lru);
3587 list_move(&page->lru, &lruvec->lists[lru]);
3588 __inc_zone_state(zone, NR_INACTIVE_ANON + lru);
3589 pgrescued++; 3261 pgrescued++;
3590 } 3262 }
3591 } 3263 }
diff --git a/mm/vmstat.c b/mm/vmstat.c
index 0dad31dc1618..1bbbbd9776ad 100644
--- a/mm/vmstat.c
+++ b/mm/vmstat.c
@@ -1223,7 +1223,6 @@ module_init(setup_vmstat)
1223#if defined(CONFIG_DEBUG_FS) && defined(CONFIG_COMPACTION) 1223#if defined(CONFIG_DEBUG_FS) && defined(CONFIG_COMPACTION)
1224#include <linux/debugfs.h> 1224#include <linux/debugfs.h>
1225 1225
1226static struct dentry *extfrag_debug_root;
1227 1226
1228/* 1227/*
1229 * Return an index indicating how much of the available free memory is 1228 * Return an index indicating how much of the available free memory is
@@ -1361,19 +1360,24 @@ static const struct file_operations extfrag_file_ops = {
1361 1360
1362static int __init extfrag_debug_init(void) 1361static int __init extfrag_debug_init(void)
1363{ 1362{
1363 struct dentry *extfrag_debug_root;
1364
1364 extfrag_debug_root = debugfs_create_dir("extfrag", NULL); 1365 extfrag_debug_root = debugfs_create_dir("extfrag", NULL);
1365 if (!extfrag_debug_root) 1366 if (!extfrag_debug_root)
1366 return -ENOMEM; 1367 return -ENOMEM;
1367 1368
1368 if (!debugfs_create_file("unusable_index", 0444, 1369 if (!debugfs_create_file("unusable_index", 0444,
1369 extfrag_debug_root, NULL, &unusable_file_ops)) 1370 extfrag_debug_root, NULL, &unusable_file_ops))
1370 return -ENOMEM; 1371 goto fail;
1371 1372
1372 if (!debugfs_create_file("extfrag_index", 0444, 1373 if (!debugfs_create_file("extfrag_index", 0444,
1373 extfrag_debug_root, NULL, &extfrag_file_ops)) 1374 extfrag_debug_root, NULL, &extfrag_file_ops))
1374 return -ENOMEM; 1375 goto fail;
1375 1376
1376 return 0; 1377 return 0;
1378fail:
1379 debugfs_remove_recursive(extfrag_debug_root);
1380 return -ENOMEM;
1377} 1381}
1378 1382
1379module_init(extfrag_debug_init); 1383module_init(extfrag_debug_init);
diff --git a/net/ipv4/tcp_memcontrol.c b/net/ipv4/tcp_memcontrol.c
index 151703791bb0..b6f3583ddfe8 100644
--- a/net/ipv4/tcp_memcontrol.c
+++ b/net/ipv4/tcp_memcontrol.c
@@ -74,9 +74,6 @@ void tcp_destroy_cgroup(struct mem_cgroup *memcg)
74 percpu_counter_destroy(&tcp->tcp_sockets_allocated); 74 percpu_counter_destroy(&tcp->tcp_sockets_allocated);
75 75
76 val = res_counter_read_u64(&tcp->tcp_memory_allocated, RES_LIMIT); 76 val = res_counter_read_u64(&tcp->tcp_memory_allocated, RES_LIMIT);
77
78 if (val != RESOURCE_MAX)
79 static_key_slow_dec(&memcg_socket_limit_enabled);
80} 77}
81EXPORT_SYMBOL(tcp_destroy_cgroup); 78EXPORT_SYMBOL(tcp_destroy_cgroup);
82 79
@@ -107,10 +104,33 @@ static int tcp_update_limit(struct mem_cgroup *memcg, u64 val)
107 tcp->tcp_prot_mem[i] = min_t(long, val >> PAGE_SHIFT, 104 tcp->tcp_prot_mem[i] = min_t(long, val >> PAGE_SHIFT,
108 net->ipv4.sysctl_tcp_mem[i]); 105 net->ipv4.sysctl_tcp_mem[i]);
109 106
110 if (val == RESOURCE_MAX && old_lim != RESOURCE_MAX) 107 if (val == RESOURCE_MAX)
111 static_key_slow_dec(&memcg_socket_limit_enabled); 108 clear_bit(MEMCG_SOCK_ACTIVE, &cg_proto->flags);
112 else if (old_lim == RESOURCE_MAX && val != RESOURCE_MAX) 109 else if (val != RESOURCE_MAX) {
113 static_key_slow_inc(&memcg_socket_limit_enabled); 110 /*
111 * The active bit needs to be written after the static_key
112 * update. This is what guarantees that the socket activation
113 * function is the last one to run. See sock_update_memcg() for
114 * details, and note that we don't mark any socket as belonging
115 * to this memcg until that flag is up.
116 *
117 * We need to do this, because static_keys will span multiple
118 * sites, but we can't control their order. If we mark a socket
119 * as accounted, but the accounting functions are not patched in
120 * yet, we'll lose accounting.
121 *
122 * We never race with the readers in sock_update_memcg(),
123 * because when this value change, the code to process it is not
124 * patched in yet.
125 *
126 * The activated bit is used to guarantee that no two writers
127 * will do the update in the same memcg. Without that, we can't
128 * properly shutdown the static key.
129 */
130 if (!test_and_set_bit(MEMCG_SOCK_ACTIVATED, &cg_proto->flags))
131 static_key_slow_inc(&memcg_socket_limit_enabled);
132 set_bit(MEMCG_SOCK_ACTIVE, &cg_proto->flags);
133 }
114 134
115 return 0; 135 return 0;
116} 136}
diff --git a/tools/vm/page-types.c b/tools/vm/page-types.c
index 7dab7b25b5c6..f576971f6556 100644
--- a/tools/vm/page-types.c
+++ b/tools/vm/page-types.c
@@ -35,6 +35,7 @@
35#include <sys/mount.h> 35#include <sys/mount.h>
36#include <sys/statfs.h> 36#include <sys/statfs.h>
37#include "../../include/linux/magic.h" 37#include "../../include/linux/magic.h"
38#include "../../include/linux/kernel-page-flags.h"
38 39
39 40
40#ifndef MAX_PATH 41#ifndef MAX_PATH
@@ -73,33 +74,6 @@
73#define KPF_BYTES 8 74#define KPF_BYTES 8
74#define PROC_KPAGEFLAGS "/proc/kpageflags" 75#define PROC_KPAGEFLAGS "/proc/kpageflags"
75 76
76/* copied from kpageflags_read() */
77#define KPF_LOCKED 0
78#define KPF_ERROR 1
79#define KPF_REFERENCED 2
80#define KPF_UPTODATE 3
81#define KPF_DIRTY 4
82#define KPF_LRU 5
83#define KPF_ACTIVE 6
84#define KPF_SLAB 7
85#define KPF_WRITEBACK 8
86#define KPF_RECLAIM 9
87#define KPF_BUDDY 10
88
89/* [11-20] new additions in 2.6.31 */
90#define KPF_MMAP 11
91#define KPF_ANON 12
92#define KPF_SWAPCACHE 13
93#define KPF_SWAPBACKED 14
94#define KPF_COMPOUND_HEAD 15
95#define KPF_COMPOUND_TAIL 16
96#define KPF_HUGE 17
97#define KPF_UNEVICTABLE 18
98#define KPF_HWPOISON 19
99#define KPF_NOPAGE 20
100#define KPF_KSM 21
101#define KPF_THP 22
102
103/* [32-] kernel hacking assistances */ 77/* [32-] kernel hacking assistances */
104#define KPF_RESERVED 32 78#define KPF_RESERVED 32
105#define KPF_MLOCKED 33 79#define KPF_MLOCKED 33
@@ -326,7 +300,7 @@ static char *page_flag_name(uint64_t flags)
326{ 300{
327 static char buf[65]; 301 static char buf[65];
328 int present; 302 int present;
329 int i, j; 303 size_t i, j;
330 304
331 for (i = 0, j = 0; i < ARRAY_SIZE(page_flag_names); i++) { 305 for (i = 0, j = 0; i < ARRAY_SIZE(page_flag_names); i++) {
332 present = (flags >> i) & 1; 306 present = (flags >> i) & 1;
@@ -344,7 +318,7 @@ static char *page_flag_name(uint64_t flags)
344static char *page_flag_longname(uint64_t flags) 318static char *page_flag_longname(uint64_t flags)
345{ 319{
346 static char buf[1024]; 320 static char buf[1024];
347 int i, n; 321 size_t i, n;
348 322
349 for (i = 0, n = 0; i < ARRAY_SIZE(page_flag_names); i++) { 323 for (i = 0, n = 0; i < ARRAY_SIZE(page_flag_names); i++) {
350 if (!page_flag_names[i]) 324 if (!page_flag_names[i])
@@ -402,7 +376,7 @@ static void show_page(unsigned long voffset,
402 376
403static void show_summary(void) 377static void show_summary(void)
404{ 378{
405 int i; 379 size_t i;
406 380
407 printf(" flags\tpage-count MB" 381 printf(" flags\tpage-count MB"
408 " symbolic-flags\t\t\tlong-symbolic-flags\n"); 382 " symbolic-flags\t\t\tlong-symbolic-flags\n");
@@ -500,7 +474,7 @@ static int debugfs_valid_mountpoint(const char *debugfs)
500/* find the path to the mounted debugfs */ 474/* find the path to the mounted debugfs */
501static const char *debugfs_find_mountpoint(void) 475static const char *debugfs_find_mountpoint(void)
502{ 476{
503 const char **ptr; 477 const char *const *ptr;
504 char type[100]; 478 char type[100];
505 FILE *fp; 479 FILE *fp;
506 480
@@ -537,7 +511,7 @@ static const char *debugfs_find_mountpoint(void)
537 511
538static void debugfs_mount(void) 512static void debugfs_mount(void)
539{ 513{
540 const char **ptr; 514 const char *const *ptr;
541 515
542 /* see if it's already mounted */ 516 /* see if it's already mounted */
543 if (debugfs_find_mountpoint()) 517 if (debugfs_find_mountpoint())
@@ -614,10 +588,10 @@ static int unpoison_page(unsigned long offset)
614 * page frame walker 588 * page frame walker
615 */ 589 */
616 590
617static int hash_slot(uint64_t flags) 591static size_t hash_slot(uint64_t flags)
618{ 592{
619 int k = HASH_KEY(flags); 593 size_t k = HASH_KEY(flags);
620 int i; 594 size_t i;
621 595
622 /* Explicitly reserve slot 0 for flags 0: the following logic 596 /* Explicitly reserve slot 0 for flags 0: the following logic
623 * cannot distinguish an unoccupied slot from slot (flags==0). 597 * cannot distinguish an unoccupied slot from slot (flags==0).
@@ -670,7 +644,7 @@ static void walk_pfn(unsigned long voffset,
670{ 644{
671 uint64_t buf[KPAGEFLAGS_BATCH]; 645 uint64_t buf[KPAGEFLAGS_BATCH];
672 unsigned long batch; 646 unsigned long batch;
673 long pages; 647 unsigned long pages;
674 unsigned long i; 648 unsigned long i;
675 649
676 while (count) { 650 while (count) {
@@ -779,7 +753,7 @@ static const char *page_flag_type(uint64_t flag)
779 753
780static void usage(void) 754static void usage(void)
781{ 755{
782 int i, j; 756 size_t i, j;
783 757
784 printf( 758 printf(
785"page-types [options]\n" 759"page-types [options]\n"
@@ -938,7 +912,7 @@ static void add_bits_filter(uint64_t mask, uint64_t bits)
938 912
939static uint64_t parse_flag_name(const char *str, int len) 913static uint64_t parse_flag_name(const char *str, int len)
940{ 914{
941 int i; 915 size_t i;
942 916
943 if (!*str || !len) 917 if (!*str || !len)
944 return 0; 918 return 0;