aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--.mailmap1
-rw-r--r--Documentation/cgroups/cgroups.txt (renamed from Documentation/cgroups.txt)0
-rw-r--r--Documentation/cgroups/freezer-subsystem.txt99
-rw-r--r--Documentation/controllers/memory.txt24
-rw-r--r--Documentation/cpusets.txt2
-rw-r--r--Documentation/filesystems/ext3.txt5
-rw-r--r--Documentation/filesystems/proc.txt28
-rw-r--r--Documentation/kernel-parameters.txt2
-rw-r--r--Documentation/mtd/nand_ecc.txt714
-rw-r--r--Documentation/sysrq.txt3
-rw-r--r--Documentation/vm/unevictable-lru.txt615
-rw-r--r--arch/alpha/Kconfig1
-rw-r--r--arch/alpha/include/asm/thread_info.h2
-rw-r--r--arch/alpha/kernel/core_marvel.c4
-rw-r--r--arch/alpha/kernel/time.c18
-rw-r--r--arch/arm/Kconfig2
-rw-r--r--arch/arm/mach-pxa/include/mach/pxa3xx_nand.h44
-rw-r--r--arch/arm/plat-mxc/include/mach/mxc_nand.h27
-rw-r--r--arch/arm/plat-omap/include/mach/onenand.h6
-rw-r--r--arch/avr32/Kconfig2
-rw-r--r--arch/avr32/include/asm/thread_info.h1
-rw-r--r--arch/blackfin/Kconfig3
-rw-r--r--arch/cris/Kconfig2
-rw-r--r--arch/cris/arch-v10/drivers/ds1302.c24
-rw-r--r--arch/cris/arch-v10/drivers/pcf8563.c24
-rw-r--r--arch/cris/arch-v32/drivers/pcf8563.c24
-rw-r--r--arch/cris/kernel/time.c18
-rw-r--r--arch/frv/Kconfig2
-rw-r--r--arch/h8300/Kconfig2
-rw-r--r--arch/h8300/include/asm/thread_info.h2
-rw-r--r--arch/ia64/Kconfig2
-rw-r--r--arch/ia64/hp/common/sba_iommu.c5
-rw-r--r--arch/ia64/kernel/crash_dump.c4
-rw-r--r--arch/ia64/kernel/efi.c2
-rw-r--r--arch/ia64/kernel/setup.c13
-rw-r--r--arch/ia64/mm/init.c17
-rw-r--r--arch/m32r/Kconfig2
-rw-r--r--arch/m68k/Kconfig2
-rw-r--r--arch/m68k/bvme6000/rtc.c1
-rw-r--r--arch/m68knommu/Kconfig2
-rw-r--r--arch/m68knommu/include/asm/thread_info.h2
-rw-r--r--arch/mips/Kconfig2
-rw-r--r--arch/mips/dec/time.c18
-rw-r--r--arch/mips/include/asm/mc146818-time.h18
-rw-r--r--arch/mips/pmc-sierra/yosemite/setup.c30
-rw-r--r--arch/mips/sibyte/swarm/rtc_m41t81.c26
-rw-r--r--arch/mips/sibyte/swarm/rtc_xicor1241.c26
-rw-r--r--arch/mn10300/Kconfig2
-rw-r--r--arch/mn10300/kernel/rtc.c6
-rw-r--r--arch/parisc/Kconfig2
-rw-r--r--arch/powerpc/Kconfig2
-rw-r--r--arch/powerpc/include/asm/ps3av.h3
-rw-r--r--arch/powerpc/kernel/crash_dump.c10
-rw-r--r--arch/powerpc/mm/mem.c17
-rw-r--r--arch/s390/Kconfig2
-rw-r--r--arch/s390/include/asm/thread_info.h2
-rw-r--r--arch/s390/mm/init.c11
-rw-r--r--arch/sh/Kconfig2
-rw-r--r--arch/sh/kernel/crash_dump.c3
-rw-r--r--arch/sparc/Kconfig2
-rw-r--r--arch/sparc/include/asm/thread_info_32.h2
-rw-r--r--arch/sparc/include/asm/thread_info_64.h2
-rw-r--r--arch/sparc64/Kconfig1
-rw-r--r--arch/um/Kconfig2
-rw-r--r--arch/um/sys-i386/signal.c3
-rw-r--r--arch/x86/Kconfig1
-rw-r--r--arch/x86/kernel/crash_dump_32.c3
-rw-r--r--arch/x86/kernel/crash_dump_64.c3
-rw-r--r--arch/x86/kernel/rtc.c20
-rw-r--r--arch/x86/kernel/setup.c8
-rw-r--r--arch/x86/mm/pageattr.c2
-rw-r--r--arch/x86/xen/enlighten.c1
-rw-r--r--arch/x86/xen/mmu.c1
-rw-r--r--arch/xtensa/Kconfig1
-rw-r--r--drivers/acpi/battery.c2
-rw-r--r--drivers/acpi/sbs.c2
-rw-r--r--drivers/acpi/sleep/proc.c18
-rw-r--r--drivers/acpi/system.c1
-rw-r--r--drivers/base/memory.c4
-rw-r--r--drivers/base/node.c69
-rw-r--r--drivers/block/aoe/aoeblk.c2
-rw-r--r--drivers/block/nbd.c2
-rw-r--r--drivers/char/ds1286.c34
-rw-r--r--drivers/char/ds1302.c24
-rw-r--r--drivers/char/ip27-rtc.c24
-rw-r--r--drivers/char/pc8736x_gpio.c11
-rw-r--r--drivers/char/rtc.c40
-rw-r--r--drivers/char/sx.c6
-rw-r--r--drivers/char/sysrq.c2
-rw-r--r--drivers/char/tpm/tpm.c2
-rw-r--r--drivers/edac/cell_edac.c2
-rw-r--r--drivers/firmware/iscsi_ibft.c1
-rw-r--r--drivers/gpio/gpiolib.c4
-rw-r--r--drivers/hwmon/applesmc.c109
-rw-r--r--drivers/hwmon/pc87360.c248
-rw-r--r--drivers/i2c/chips/at24.c1
-rw-r--r--drivers/i2c/chips/ds1682.c1
-rw-r--r--drivers/i2c/chips/menelaus.c34
-rw-r--r--drivers/infiniband/core/cm.c2
-rw-r--r--drivers/media/dvb/ttpci/av7110.c2
-rw-r--r--drivers/media/video/cx18/cx18-driver.h1
-rw-r--r--drivers/media/video/ivtv/ivtv-driver.h1
-rw-r--r--drivers/memstick/core/mspro_block.c1
-rw-r--r--drivers/misc/hp-wmi.c1
-rw-r--r--drivers/mtd/Kconfig5
-rw-r--r--drivers/mtd/chips/Kconfig4
-rw-r--r--drivers/mtd/chips/cfi_cmdset_0001.c71
-rw-r--r--drivers/mtd/chips/cfi_cmdset_0002.c52
-rw-r--r--drivers/mtd/chips/cfi_probe.c58
-rw-r--r--drivers/mtd/chips/cfi_util.c66
-rw-r--r--drivers/mtd/chips/gen_probe.c2
-rw-r--r--drivers/mtd/cmdlinepart.c1
-rw-r--r--drivers/mtd/devices/Kconfig21
-rw-r--r--drivers/mtd/devices/m25p80.c138
-rw-r--r--drivers/mtd/devices/mtd_dataflash.c214
-rw-r--r--drivers/mtd/inftlcore.c5
-rw-r--r--drivers/mtd/maps/Kconfig33
-rw-r--r--drivers/mtd/maps/Makefile4
-rw-r--r--drivers/mtd/maps/ebony.c163
-rw-r--r--drivers/mtd/maps/ocotea.c154
-rw-r--r--drivers/mtd/maps/omap-toto-flash.c133
-rw-r--r--drivers/mtd/maps/pci.c18
-rw-r--r--drivers/mtd/maps/physmap_of.c3
-rw-r--r--drivers/mtd/maps/walnut.c122
-rw-r--r--drivers/mtd/mtdchar.c4
-rw-r--r--drivers/mtd/mtdconcat.c4
-rw-r--r--drivers/mtd/mtdoops.c42
-rw-r--r--drivers/mtd/mtdpart.c4
-rw-r--r--drivers/mtd/nand/Kconfig42
-rw-r--r--drivers/mtd/nand/Makefile4
-rw-r--r--drivers/mtd/nand/atmel_nand.c58
-rw-r--r--drivers/mtd/nand/cs553x_nand.c2
-rw-r--r--drivers/mtd/nand/fsl_elbc_nand.c3
-rw-r--r--drivers/mtd/nand/fsl_upm.c68
-rw-r--r--drivers/mtd/nand/gpio.c375
-rw-r--r--drivers/mtd/nand/mxc_nand.c1077
-rw-r--r--drivers/mtd/nand/nand_base.c16
-rw-r--r--drivers/mtd/nand/nand_ecc.c554
-rw-r--r--drivers/mtd/nand/nandsim.c1
-rw-r--r--drivers/mtd/nand/pxa3xx_nand.c147
-rw-r--r--drivers/mtd/nand/sh_flctl.c878
-rw-r--r--drivers/mtd/nand/toto.c206
-rw-r--r--drivers/mtd/ofpart.c1
-rw-r--r--drivers/mtd/onenand/Kconfig8
-rw-r--r--drivers/mtd/onenand/Makefile1
-rw-r--r--drivers/mtd/onenand/omap2.c802
-rw-r--r--drivers/mtd/onenand/onenand_base.c2
-rw-r--r--drivers/mtd/ssfdc.c3
-rw-r--r--drivers/mtd/ubi/cdev.c6
-rw-r--r--drivers/mtd/ubi/scan.c2
-rw-r--r--drivers/mtd/ubi/vtbl.c4
-rw-r--r--drivers/pci/intel-iommu.c4
-rw-r--r--drivers/pci/pci.c5
-rw-r--r--drivers/pci/probe.c28
-rw-r--r--drivers/pci/rom.c6
-rw-r--r--drivers/pci/setup-bus.c13
-rw-r--r--drivers/pci/setup-res.c40
-rw-r--r--drivers/power/power_supply_sysfs.c2
-rw-r--r--drivers/ps3/ps3av.c16
-rw-r--r--drivers/ps3/ps3av_cmd.c19
-rw-r--r--drivers/rtc/rtc-at91rm9200.c42
-rw-r--r--drivers/rtc/rtc-bq4802.c30
-rw-r--r--drivers/rtc/rtc-cmos.c91
-rw-r--r--drivers/rtc/rtc-ds1216.c26
-rw-r--r--drivers/rtc/rtc-ds1302.c28
-rw-r--r--drivers/rtc/rtc-ds1305.c39
-rw-r--r--drivers/rtc/rtc-ds1307.c41
-rw-r--r--drivers/rtc/rtc-ds1511.c43
-rw-r--r--drivers/rtc/rtc-ds1553.c38
-rw-r--r--drivers/rtc/rtc-ds1742.c30
-rw-r--r--drivers/rtc/rtc-fm3130.c56
-rw-r--r--drivers/rtc/rtc-isl1208.c42
-rw-r--r--drivers/rtc/rtc-m41t80.c44
-rw-r--r--drivers/rtc/rtc-m41t94.c28
-rw-r--r--drivers/rtc/rtc-m48t59.c49
-rw-r--r--drivers/rtc/rtc-m48t86.c28
-rw-r--r--drivers/rtc/rtc-max6900.c32
-rw-r--r--drivers/rtc/rtc-max6902.c32
-rw-r--r--drivers/rtc/rtc-omap.c24
-rw-r--r--drivers/rtc/rtc-pcf8563.c24
-rw-r--r--drivers/rtc/rtc-pcf8583.c20
-rw-r--r--drivers/rtc/rtc-r9701.c24
-rw-r--r--drivers/rtc/rtc-rs5c313.c28
-rw-r--r--drivers/rtc/rtc-rs5c348.c30
-rw-r--r--drivers/rtc/rtc-rs5c372.c42
-rw-r--r--drivers/rtc/rtc-s35390a.c34
-rw-r--r--drivers/rtc/rtc-s3c.c42
-rw-r--r--drivers/rtc/rtc-sh.c40
-rw-r--r--drivers/rtc/rtc-stk17ta8.c39
-rw-r--r--drivers/rtc/rtc-v3020.c28
-rw-r--r--drivers/rtc/rtc-x1205.c30
-rw-r--r--drivers/scsi/arcmsr/arcmsr_attr.c3
-rw-r--r--drivers/scsi/sr_vendor.c12
-rw-r--r--drivers/serial/8250_gsc.c2
-rw-r--r--drivers/serial/serial_txx9.c2
-rw-r--r--drivers/serial/sn_console.c2
-rw-r--r--drivers/staging/go7007/Kconfig2
-rw-r--r--drivers/staging/sxg/Kconfig1
-rw-r--r--drivers/telephony/phonedev.c2
-rw-r--r--drivers/video/fbmem.c174
-rw-r--r--drivers/w1/slaves/w1_ds2760.c1
-rw-r--r--fs/Kconfig333
-rw-r--r--fs/Kconfig.binfmt22
-rw-r--r--fs/binfmt_elf.c12
-rw-r--r--fs/buffer.c3
-rw-r--r--fs/cifs/Kconfig142
-rw-r--r--fs/cifs/file.c4
-rw-r--r--fs/exec.c7
-rw-r--r--fs/ext3/balloc.c3
-rw-r--r--fs/ext3/dir.c30
-rw-r--r--fs/ext3/inode.c7
-rw-r--r--fs/ext3/resize.c3
-rw-r--r--fs/ext3/super.c16
-rw-r--r--fs/hfsplus/extents.c3
-rw-r--r--fs/hfsplus/inode.c2
-rw-r--r--fs/jbd/commit.c10
-rw-r--r--fs/jbd/transaction.c16
-rw-r--r--fs/jffs2/Kconfig188
-rw-r--r--fs/jffs2/compr.c4
-rw-r--r--fs/jffs2/dir.c2
-rw-r--r--fs/jffs2/erase.c4
-rw-r--r--fs/jffs2/fs.c6
-rw-r--r--fs/jffs2/nodemgmt.c4
-rw-r--r--fs/jffs2/wbuf.c5
-rw-r--r--fs/nfs/dir.c2
-rw-r--r--fs/ntfs/file.c4
-rw-r--r--fs/proc/proc_misc.c85
-rw-r--r--fs/proc/vmcore.c5
-rw-r--r--fs/ramfs/file-nommu.c4
-rw-r--r--fs/ramfs/inode.c1
-rw-r--r--fs/seq_file.c29
-rw-r--r--include/asm-cris/thread_info.h2
-rw-r--r--include/asm-generic/rtc.h24
-rw-r--r--include/asm-m68k/thread_info.h1
-rw-r--r--include/asm-parisc/thread_info.h2
-rw-r--r--include/asm-um/thread_info.h2
-rw-r--r--include/asm-xtensa/thread_info.h2
-rw-r--r--include/linux/Kbuild2
-rw-r--r--include/linux/backing-dev.h13
-rw-r--r--include/linux/bcd.h16
-rw-r--r--include/linux/bitmap.h1
-rw-r--r--include/linux/buffer_head.h2
-rw-r--r--include/linux/byteorder/Kbuild1
-rw-r--r--include/linux/byteorder/big_endian.h1
-rw-r--r--include/linux/byteorder/little_endian.h1
-rw-r--r--include/linux/cgroup.h28
-rw-r--r--include/linux/cgroup_subsys.h6
-rw-r--r--include/linux/crash_dump.h38
-rw-r--r--include/linux/ext3_fs.h2
-rw-r--r--include/linux/fb.h1
-rw-r--r--include/linux/freezer.h43
-rw-r--r--include/linux/jbd.h3
-rw-r--r--include/linux/memcontrol.h34
-rw-r--r--include/linux/migrate.h3
-rw-r--r--include/linux/mm.h9
-rw-r--r--include/linux/mm_inline.h98
-rw-r--r--include/linux/mm_types.h3
-rw-r--r--include/linux/mmzone.h105
-rw-r--r--include/linux/mtd/cfi.h9
-rw-r--r--include/linux/mtd/flashchip.h4
-rw-r--r--include/linux/mtd/mtd.h4
-rw-r--r--include/linux/mtd/nand-gpio.h19
-rw-r--r--include/linux/mtd/nand.h1
-rw-r--r--include/linux/mtd/onenand_regs.h2
-rw-r--r--include/linux/mtd/partitions.h1
-rw-r--r--include/linux/mtd/sh_flctl.h125
-rw-r--r--include/linux/page-flags.h55
-rw-r--r--include/linux/page_cgroup.h103
-rw-r--r--include/linux/pagemap.h44
-rw-r--r--include/linux/pagevec.h34
-rw-r--r--include/linux/pci.h2
-rw-r--r--include/linux/ptrace.h1
-rw-r--r--include/linux/rmap.h29
-rw-r--r--include/linux/sched.h13
-rw-r--r--include/linux/seq_file.h13
-rw-r--r--include/linux/swab.h10
-rw-r--r--include/linux/swap.h69
-rw-r--r--include/linux/sysfs.h5
-rw-r--r--include/linux/vmalloc.h15
-rw-r--r--include/linux/vmstat.h20
-rw-r--r--include/net/netns/x_tables.h4
-rw-r--r--init/Kconfig7
-rw-r--r--init/main.c2
-rw-r--r--ipc/mqueue.c20
-rw-r--r--ipc/shm.c4
-rw-r--r--kernel/Kconfig.freezer2
-rw-r--r--kernel/Makefile2
-rw-r--r--kernel/cgroup.c265
-rw-r--r--kernel/cgroup_debug.c4
-rw-r--r--kernel/cgroup_freezer.c379
-rw-r--r--kernel/configs.c9
-rw-r--r--kernel/cpuset.c17
-rw-r--r--kernel/freezer.c154
-rw-r--r--kernel/kexec.c2
-rw-r--r--kernel/kthread.c5
-rw-r--r--kernel/power/process.c119
-rw-r--r--kernel/ptrace.c2
-rw-r--r--kernel/rcupreempt.c2
-rw-r--r--kernel/sysctl.c10
-rw-r--r--lib/bitmap.c11
-rw-r--r--lib/vsprintf.c49
-rw-r--r--mm/Kconfig11
-rw-r--r--mm/Makefile3
-rw-r--r--mm/filemap.c37
-rw-r--r--mm/fremap.c27
-rw-r--r--mm/hugetlb.c44
-rw-r--r--mm/internal.h131
-rw-r--r--mm/memcontrol.c466
-rw-r--r--mm/memory.c127
-rw-r--r--mm/memory_hotplug.c19
-rw-r--r--mm/mempolicy.c11
-rw-r--r--mm/migrate.c274
-rw-r--r--mm/mlock.c443
-rw-r--r--mm/mmap.c81
-rw-r--r--mm/mremap.c8
-rw-r--r--mm/nommu.c44
-rw-r--r--mm/page-writeback.c8
-rw-r--r--mm/page_alloc.c121
-rw-r--r--mm/page_cgroup.c237
-rw-r--r--mm/readahead.c2
-rw-r--r--mm/rmap.c319
-rw-r--r--mm/shmem.c7
-rw-r--r--mm/swap.c172
-rw-r--r--mm/swap_state.c11
-rw-r--r--mm/swapfile.c27
-rw-r--r--mm/truncate.c4
-rw-r--r--mm/vmalloc.c975
-rw-r--r--mm/vmscan.c1026
-rw-r--r--mm/vmstat.c33
-rw-r--r--net/bridge/br_netfilter.c2
-rw-r--r--net/core/dev.c6
-rw-r--r--net/dccp/ipv6.c4
-rw-r--r--net/dccp/minisocks.c1
-rw-r--r--net/dccp/output.c2
-rw-r--r--net/ipv4/arp.c4
-rw-r--r--net/ipv4/netfilter/nf_nat_snmp_basic.c1
-rw-r--r--net/ipv6/syncookies.c1
-rw-r--r--net/ipv6/tcp_ipv6.c6
-rw-r--r--net/netfilter/Kconfig1
-rw-r--r--net/netfilter/ipvs/Kconfig4
-rw-r--r--net/netfilter/nf_conntrack_netlink.c2
-rw-r--r--net/netfilter/xt_NFQUEUE.c2
-rw-r--r--net/netfilter/xt_iprange.c8
-rw-r--r--net/netfilter/xt_recent.c10
-rw-r--r--net/sched/sch_generic.c2
-rw-r--r--security/device_cgroup.c46
-rw-r--r--sound/core/pcm_misc.c1
-rw-r--r--sound/drivers/dummy.c2
-rw-r--r--sound/pci/ca0106/ca0106_main.c1
-rw-r--r--sound/ppc/snd_ps3.c96
-rw-r--r--sound/ppc/snd_ps3.h1
-rw-r--r--sound/soc/omap/omap-mcbsp.c24
352 files changed, 13209 insertions, 4409 deletions
diff --git a/.mailmap b/.mailmap
index dfab12f809ed..eba9bf953ef5 100644
--- a/.mailmap
+++ b/.mailmap
@@ -66,6 +66,7 @@ Kenneth W Chen <kenneth.w.chen@intel.com>
66Koushik <raghavendra.koushik@neterion.com> 66Koushik <raghavendra.koushik@neterion.com>
67Leonid I Ananiev <leonid.i.ananiev@intel.com> 67Leonid I Ananiev <leonid.i.ananiev@intel.com>
68Linas Vepstas <linas@austin.ibm.com> 68Linas Vepstas <linas@austin.ibm.com>
69Mark Brown <broonie@sirena.org.uk>
69Matthieu CASTET <castet.matthieu@free.fr> 70Matthieu CASTET <castet.matthieu@free.fr>
70Michael Buesch <mb@bu3sch.de> 71Michael Buesch <mb@bu3sch.de>
71Michael Buesch <mbuesch@freenet.de> 72Michael Buesch <mbuesch@freenet.de>
diff --git a/Documentation/cgroups.txt b/Documentation/cgroups/cgroups.txt
index d9014aa0eb68..d9014aa0eb68 100644
--- a/Documentation/cgroups.txt
+++ b/Documentation/cgroups/cgroups.txt
diff --git a/Documentation/cgroups/freezer-subsystem.txt b/Documentation/cgroups/freezer-subsystem.txt
new file mode 100644
index 000000000000..c50ab58b72eb
--- /dev/null
+++ b/Documentation/cgroups/freezer-subsystem.txt
@@ -0,0 +1,99 @@
1 The cgroup freezer is useful to batch job management system which start
2and stop sets of tasks in order to schedule the resources of a machine
3according to the desires of a system administrator. This sort of program
4is often used on HPC clusters to schedule access to the cluster as a
5whole. The cgroup freezer uses cgroups to describe the set of tasks to
6be started/stopped by the batch job management system. It also provides
7a means to start and stop the tasks composing the job.
8
9 The cgroup freezer will also be useful for checkpointing running groups
10of tasks. The freezer allows the checkpoint code to obtain a consistent
11image of the tasks by attempting to force the tasks in a cgroup into a
12quiescent state. Once the tasks are quiescent another task can
13walk /proc or invoke a kernel interface to gather information about the
14quiesced tasks. Checkpointed tasks can be restarted later should a
15recoverable error occur. This also allows the checkpointed tasks to be
16migrated between nodes in a cluster by copying the gathered information
17to another node and restarting the tasks there.
18
19 Sequences of SIGSTOP and SIGCONT are not always sufficient for stopping
20and resuming tasks in userspace. Both of these signals are observable
21from within the tasks we wish to freeze. While SIGSTOP cannot be caught,
22blocked, or ignored it can be seen by waiting or ptracing parent tasks.
23SIGCONT is especially unsuitable since it can be caught by the task. Any
24programs designed to watch for SIGSTOP and SIGCONT could be broken by
25attempting to use SIGSTOP and SIGCONT to stop and resume tasks. We can
26demonstrate this problem using nested bash shells:
27
28 $ echo $$
29 16644
30 $ bash
31 $ echo $$
32 16690
33
34 From a second, unrelated bash shell:
35 $ kill -SIGSTOP 16690
36 $ kill -SIGCONT 16990
37
38 <at this point 16990 exits and causes 16644 to exit too>
39
40 This happens because bash can observe both signals and choose how it
41responds to them.
42
43 Another example of a program which catches and responds to these
44signals is gdb. In fact any program designed to use ptrace is likely to
45have a problem with this method of stopping and resuming tasks.
46
47 In contrast, the cgroup freezer uses the kernel freezer code to
48prevent the freeze/unfreeze cycle from becoming visible to the tasks
49being frozen. This allows the bash example above and gdb to run as
50expected.
51
52 The freezer subsystem in the container filesystem defines a file named
53freezer.state. Writing "FROZEN" to the state file will freeze all tasks in the
54cgroup. Subsequently writing "THAWED" will unfreeze the tasks in the cgroup.
55Reading will return the current state.
56
57* Examples of usage :
58
59 # mkdir /containers/freezer
60 # mount -t cgroup -ofreezer freezer /containers
61 # mkdir /containers/0
62 # echo $some_pid > /containers/0/tasks
63
64to get status of the freezer subsystem :
65
66 # cat /containers/0/freezer.state
67 THAWED
68
69to freeze all tasks in the container :
70
71 # echo FROZEN > /containers/0/freezer.state
72 # cat /containers/0/freezer.state
73 FREEZING
74 # cat /containers/0/freezer.state
75 FROZEN
76
77to unfreeze all tasks in the container :
78
79 # echo THAWED > /containers/0/freezer.state
80 # cat /containers/0/freezer.state
81 THAWED
82
83This is the basic mechanism which should do the right thing for user space task
84in a simple scenario.
85
86It's important to note that freezing can be incomplete. In that case we return
87EBUSY. This means that some tasks in the cgroup are busy doing something that
88prevents us from completely freezing the cgroup at this time. After EBUSY,
89the cgroup will remain partially frozen -- reflected by freezer.state reporting
90"FREEZING" when read. The state will remain "FREEZING" until one of these
91things happens:
92
93 1) Userspace cancels the freezing operation by writing "THAWED" to
94 the freezer.state file
95 2) Userspace retries the freezing operation by writing "FROZEN" to
96 the freezer.state file (writing "FREEZING" is not legal
97 and returns EIO)
98 3) The tasks that blocked the cgroup from entering the "FROZEN"
99 state disappear from the cgroup's set of tasks.
diff --git a/Documentation/controllers/memory.txt b/Documentation/controllers/memory.txt
index 9b53d5827361..1c07547d3f81 100644
--- a/Documentation/controllers/memory.txt
+++ b/Documentation/controllers/memory.txt
@@ -112,14 +112,22 @@ the per cgroup LRU.
112 112
1132.2.1 Accounting details 1132.2.1 Accounting details
114 114
115All mapped pages (RSS) and unmapped user pages (Page Cache) are accounted. 115All mapped anon pages (RSS) and cache pages (Page Cache) are accounted.
116RSS pages are accounted at the time of page_add_*_rmap() unless they've already 116(some pages which never be reclaimable and will not be on global LRU
117been accounted for earlier. A file page will be accounted for as Page Cache; 117 are not accounted. we just accounts pages under usual vm management.)
118it's mapped into the page tables of a process, duplicate accounting is carefully 118
119avoided. Page Cache pages are accounted at the time of add_to_page_cache(). 119RSS pages are accounted at page_fault unless they've already been accounted
120The corresponding routines that remove a page from the page tables or removes 120for earlier. A file page will be accounted for as Page Cache when it's
121a page from Page Cache is used to decrement the accounting counters of the 121inserted into inode (radix-tree). While it's mapped into the page tables of
122cgroup. 122processes, duplicate accounting is carefully avoided.
123
124A RSS page is unaccounted when it's fully unmapped. A PageCache page is
125unaccounted when it's removed from radix-tree.
126
127At page migration, accounting information is kept.
128
129Note: we just account pages-on-lru because our purpose is to control amount
130of used pages. not-on-lru pages are tend to be out-of-control from vm view.
123 131
1242.3 Shared Page Accounting 1322.3 Shared Page Accounting
125 133
diff --git a/Documentation/cpusets.txt b/Documentation/cpusets.txt
index 47e568a9370a..5c86c258c791 100644
--- a/Documentation/cpusets.txt
+++ b/Documentation/cpusets.txt
@@ -48,7 +48,7 @@ hooks, beyond what is already present, required to manage dynamic
48job placement on large systems. 48job placement on large systems.
49 49
50Cpusets use the generic cgroup subsystem described in 50Cpusets use the generic cgroup subsystem described in
51Documentation/cgroup.txt. 51Documentation/cgroups/cgroups.txt.
52 52
53Requests by a task, using the sched_setaffinity(2) system call to 53Requests by a task, using the sched_setaffinity(2) system call to
54include CPUs in its CPU affinity mask, and using the mbind(2) and 54include CPUs in its CPU affinity mask, and using the mbind(2) and
diff --git a/Documentation/filesystems/ext3.txt b/Documentation/filesystems/ext3.txt
index 295f26cd895a..9dd2a3bb2acc 100644
--- a/Documentation/filesystems/ext3.txt
+++ b/Documentation/filesystems/ext3.txt
@@ -96,6 +96,11 @@ errors=remount-ro(*) Remount the filesystem read-only on an error.
96errors=continue Keep going on a filesystem error. 96errors=continue Keep going on a filesystem error.
97errors=panic Panic and halt the machine if an error occurs. 97errors=panic Panic and halt the machine if an error occurs.
98 98
99data_err=ignore(*) Just print an error message if an error occurs
100 in a file data buffer in ordered mode.
101data_err=abort Abort the journal if an error occurs in a file
102 data buffer in ordered mode.
103
99grpid Give objects the same group ID as their creator. 104grpid Give objects the same group ID as their creator.
100bsdgroups 105bsdgroups
101 106
diff --git a/Documentation/filesystems/proc.txt b/Documentation/filesystems/proc.txt
index c032bf39e8b9..bcceb99b81dd 100644
--- a/Documentation/filesystems/proc.txt
+++ b/Documentation/filesystems/proc.txt
@@ -1384,15 +1384,18 @@ causes the kernel to prefer to reclaim dentries and inodes.
1384dirty_background_ratio 1384dirty_background_ratio
1385---------------------- 1385----------------------
1386 1386
1387Contains, as a percentage of total system memory, the number of pages at which 1387Contains, as a percentage of the dirtyable system memory (free pages + mapped
1388the pdflush background writeback daemon will start writing out dirty data. 1388pages + file cache, not including locked pages and HugePages), the number of
1389pages at which the pdflush background writeback daemon will start writing out
1390dirty data.
1389 1391
1390dirty_ratio 1392dirty_ratio
1391----------------- 1393-----------------
1392 1394
1393Contains, as a percentage of total system memory, the number of pages at which 1395Contains, as a percentage of the dirtyable system memory (free pages + mapped
1394a process which is generating disk writes will itself start writing out dirty 1396pages + file cache, not including locked pages and HugePages), the number of
1395data. 1397pages at which a process which is generating disk writes will itself start
1398writing out dirty data.
1396 1399
1397dirty_writeback_centisecs 1400dirty_writeback_centisecs
1398------------------------- 1401-------------------------
@@ -2412,24 +2415,29 @@ will be dumped when the <pid> process is dumped. coredump_filter is a bitmask
2412of memory types. If a bit of the bitmask is set, memory segments of the 2415of memory types. If a bit of the bitmask is set, memory segments of the
2413corresponding memory type are dumped, otherwise they are not dumped. 2416corresponding memory type are dumped, otherwise they are not dumped.
2414 2417
2415The following 4 memory types are supported: 2418The following 7 memory types are supported:
2416 - (bit 0) anonymous private memory 2419 - (bit 0) anonymous private memory
2417 - (bit 1) anonymous shared memory 2420 - (bit 1) anonymous shared memory
2418 - (bit 2) file-backed private memory 2421 - (bit 2) file-backed private memory
2419 - (bit 3) file-backed shared memory 2422 - (bit 3) file-backed shared memory
2420 - (bit 4) ELF header pages in file-backed private memory areas (it is 2423 - (bit 4) ELF header pages in file-backed private memory areas (it is
2421 effective only if the bit 2 is cleared) 2424 effective only if the bit 2 is cleared)
2425 - (bit 5) hugetlb private memory
2426 - (bit 6) hugetlb shared memory
2422 2427
2423 Note that MMIO pages such as frame buffer are never dumped and vDSO pages 2428 Note that MMIO pages such as frame buffer are never dumped and vDSO pages
2424 are always dumped regardless of the bitmask status. 2429 are always dumped regardless of the bitmask status.
2425 2430
2426Default value of coredump_filter is 0x3; this means all anonymous memory 2431 Note bit 0-4 doesn't effect any hugetlb memory. hugetlb memory are only
2427segments are dumped. 2432 effected by bit 5-6.
2433
2434Default value of coredump_filter is 0x23; this means all anonymous memory
2435segments and hugetlb private memory are dumped.
2428 2436
2429If you don't want to dump all shared memory segments attached to pid 1234, 2437If you don't want to dump all shared memory segments attached to pid 1234,
2430write 1 to the process's proc file. 2438write 0x21 to the process's proc file.
2431 2439
2432 $ echo 0x1 > /proc/1234/coredump_filter 2440 $ echo 0x21 > /proc/1234/coredump_filter
2433 2441
2434When a new process is created, the process inherits the bitmask status from its 2442When a new process is created, the process inherits the bitmask status from its
2435parent. It is useful to set up coredump_filter before the program runs. 2443parent. It is useful to set up coredump_filter before the program runs.
diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index bcecfaa1e770..0f1544f67400 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -690,7 +690,7 @@ and is between 256 and 4096 characters. It is defined in the file
690 See Documentation/block/as-iosched.txt and 690 See Documentation/block/as-iosched.txt and
691 Documentation/block/deadline-iosched.txt for details. 691 Documentation/block/deadline-iosched.txt for details.
692 692
693 elfcorehdr= [X86-32, X86_64] 693 elfcorehdr= [IA64,PPC,SH,X86-32,X86_64]
694 Specifies physical address of start of kernel core 694 Specifies physical address of start of kernel core
695 image elf header. Generally kexec loader will 695 image elf header. Generally kexec loader will
696 pass this option to capture kernel. 696 pass this option to capture kernel.
diff --git a/Documentation/mtd/nand_ecc.txt b/Documentation/mtd/nand_ecc.txt
new file mode 100644
index 000000000000..bdf93b7f0f24
--- /dev/null
+++ b/Documentation/mtd/nand_ecc.txt
@@ -0,0 +1,714 @@
1Introduction
2============
3
4Having looked at the linux mtd/nand driver and more specific at nand_ecc.c
5I felt there was room for optimisation. I bashed the code for a few hours
6performing tricks like table lookup removing superfluous code etc.
7After that the speed was increased by 35-40%.
8Still I was not too happy as I felt there was additional room for improvement.
9
10Bad! I was hooked.
11I decided to annotate my steps in this file. Perhaps it is useful to someone
12or someone learns something from it.
13
14
15The problem
16===========
17
18NAND flash (at least SLC one) typically has sectors of 256 bytes.
19However NAND flash is not extremely reliable so some error detection
20(and sometimes correction) is needed.
21
22This is done by means of a Hamming code. I'll try to explain it in
23laymans terms (and apologies to all the pro's in the field in case I do
24not use the right terminology, my coding theory class was almost 30
25years ago, and I must admit it was not one of my favourites).
26
27As I said before the ecc calculation is performed on sectors of 256
28bytes. This is done by calculating several parity bits over the rows and
29columns. The parity used is even parity which means that the parity bit = 1
30if the data over which the parity is calculated is 1 and the parity bit = 0
31if the data over which the parity is calculated is 0. So the total
32number of bits over the data over which the parity is calculated + the
33parity bit is even. (see wikipedia if you can't follow this).
34Parity is often calculated by means of an exclusive or operation,
35sometimes also referred to as xor. In C the operator for xor is ^
36
37Back to ecc.
38Let's give a small figure:
39
40byte 0: bit7 bit6 bit5 bit4 bit3 bit2 bit1 bit0 rp0 rp2 rp4 ... rp14
41byte 1: bit7 bit6 bit5 bit4 bit3 bit2 bit1 bit0 rp1 rp2 rp4 ... rp14
42byte 2: bit7 bit6 bit5 bit4 bit3 bit2 bit1 bit0 rp0 rp3 rp4 ... rp14
43byte 3: bit7 bit6 bit5 bit4 bit3 bit2 bit1 bit0 rp1 rp3 rp4 ... rp14
44byte 4: bit7 bit6 bit5 bit4 bit3 bit2 bit1 bit0 rp0 rp2 rp5 ... rp14
45....
46byte 254: bit7 bit6 bit5 bit4 bit3 bit2 bit1 bit0 rp0 rp3 rp5 ... rp15
47byte 255: bit7 bit6 bit5 bit4 bit3 bit2 bit1 bit0 rp1 rp3 rp5 ... rp15
48 cp1 cp0 cp1 cp0 cp1 cp0 cp1 cp0
49 cp3 cp3 cp2 cp2 cp3 cp3 cp2 cp2
50 cp5 cp5 cp5 cp5 cp4 cp4 cp4 cp4
51
52This figure represents a sector of 256 bytes.
53cp is my abbreviaton for column parity, rp for row parity.
54
55Let's start to explain column parity.
56cp0 is the parity that belongs to all bit0, bit2, bit4, bit6.
57so the sum of all bit0, bit2, bit4 and bit6 values + cp0 itself is even.
58Similarly cp1 is the sum of all bit1, bit3, bit5 and bit7.
59cp2 is the parity over bit0, bit1, bit4 and bit5
60cp3 is the parity over bit2, bit3, bit6 and bit7.
61cp4 is the parity over bit0, bit1, bit2 and bit3.
62cp5 is the parity over bit4, bit5, bit6 and bit7.
63Note that each of cp0 .. cp5 is exactly one bit.
64
65Row parity actually works almost the same.
66rp0 is the parity of all even bytes (0, 2, 4, 6, ... 252, 254)
67rp1 is the parity of all odd bytes (1, 3, 5, 7, ..., 253, 255)
68rp2 is the parity of all bytes 0, 1, 4, 5, 8, 9, ...
69(so handle two bytes, then skip 2 bytes).
70rp3 is covers the half rp2 does not cover (bytes 2, 3, 6, 7, 10, 11, ...)
71for rp4 the rule is cover 4 bytes, skip 4 bytes, cover 4 bytes, skip 4 etc.
72so rp4 calculates parity over bytes 0, 1, 2, 3, 8, 9, 10, 11, 16, ...)
73and rp5 covers the other half, so bytes 4, 5, 6, 7, 12, 13, 14, 15, 20, ..
74The story now becomes quite boring. I guess you get the idea.
75rp6 covers 8 bytes then skips 8 etc
76rp7 skips 8 bytes then covers 8 etc
77rp8 covers 16 bytes then skips 16 etc
78rp9 skips 16 bytes then covers 16 etc
79rp10 covers 32 bytes then skips 32 etc
80rp11 skips 32 bytes then covers 32 etc
81rp12 covers 64 bytes then skips 64 etc
82rp13 skips 64 bytes then covers 64 etc
83rp14 covers 128 bytes then skips 128
84rp15 skips 128 bytes then covers 128
85
86In the end the parity bits are grouped together in three bytes as
87follows:
88ECC Bit 7 Bit 6 Bit 5 Bit 4 Bit 3 Bit 2 Bit 1 Bit 0
89ECC 0 rp07 rp06 rp05 rp04 rp03 rp02 rp01 rp00
90ECC 1 rp15 rp14 rp13 rp12 rp11 rp10 rp09 rp08
91ECC 2 cp5 cp4 cp3 cp2 cp1 cp0 1 1
92
93I detected after writing this that ST application note AN1823
94(http://www.st.com/stonline/books/pdf/docs/10123.pdf) gives a much
95nicer picture.(but they use line parity as term where I use row parity)
96Oh well, I'm graphically challenged, so suffer with me for a moment :-)
97And I could not reuse the ST picture anyway for copyright reasons.
98
99
100Attempt 0
101=========
102
103Implementing the parity calculation is pretty simple.
104In C pseudocode:
105for (i = 0; i < 256; i++)
106{
107 if (i & 0x01)
108 rp1 = bit7 ^ bit6 ^ bit5 ^ bit4 ^ bit3 ^ bit2 ^ bit1 ^ bit0 ^ rp1;
109 else
110 rp0 = bit7 ^ bit6 ^ bit5 ^ bit4 ^ bit3 ^ bit2 ^ bit1 ^ bit0 ^ rp1;
111 if (i & 0x02)
112 rp3 = bit7 ^ bit6 ^ bit5 ^ bit4 ^ bit3 ^ bit2 ^ bit1 ^ bit0 ^ rp3;
113 else
114 rp2 = bit7 ^ bit6 ^ bit5 ^ bit4 ^ bit3 ^ bit2 ^ bit1 ^ bit0 ^ rp2;
115 if (i & 0x04)
116 rp5 = bit7 ^ bit6 ^ bit5 ^ bit4 ^ bit3 ^ bit2 ^ bit1 ^ bit0 ^ rp5;
117 else
118 rp4 = bit7 ^ bit6 ^ bit5 ^ bit4 ^ bit3 ^ bit2 ^ bit1 ^ bit0 ^ rp4;
119 if (i & 0x08)
120 rp7 = bit7 ^ bit6 ^ bit5 ^ bit4 ^ bit3 ^ bit2 ^ bit1 ^ bit0 ^ rp7;
121 else
122 rp6 = bit7 ^ bit6 ^ bit5 ^ bit4 ^ bit3 ^ bit2 ^ bit1 ^ bit0 ^ rp6;
123 if (i & 0x10)
124 rp9 = bit7 ^ bit6 ^ bit5 ^ bit4 ^ bit3 ^ bit2 ^ bit1 ^ bit0 ^ rp9;
125 else
126 rp8 = bit7 ^ bit6 ^ bit5 ^ bit4 ^ bit3 ^ bit2 ^ bit1 ^ bit0 ^ rp8;
127 if (i & 0x20)
128 rp11 = bit7 ^ bit6 ^ bit5 ^ bit4 ^ bit3 ^ bit2 ^ bit1 ^ bit0 ^ rp11;
129 else
130 rp10 = bit7 ^ bit6 ^ bit5 ^ bit4 ^ bit3 ^ bit2 ^ bit1 ^ bit0 ^ rp10;
131 if (i & 0x40)
132 rp13 = bit7 ^ bit6 ^ bit5 ^ bit4 ^ bit3 ^ bit2 ^ bit1 ^ bit0 ^ rp13;
133 else
134 rp12 = bit7 ^ bit6 ^ bit5 ^ bit4 ^ bit3 ^ bit2 ^ bit1 ^ bit0 ^ rp12;
135 if (i & 0x80)
136 rp15 = bit7 ^ bit6 ^ bit5 ^ bit4 ^ bit3 ^ bit2 ^ bit1 ^ bit0 ^ rp15;
137 else
138 rp14 = bit7 ^ bit6 ^ bit5 ^ bit4 ^ bit3 ^ bit2 ^ bit1 ^ bit0 ^ rp14;
139 cp0 = bit6 ^ bit4 ^ bit2 ^ bit0 ^ cp0;
140 cp1 = bit7 ^ bit5 ^ bit3 ^ bit1 ^ cp1;
141 cp2 = bit5 ^ bit4 ^ bit1 ^ bit0 ^ cp2;
142 cp3 = bit7 ^ bit6 ^ bit3 ^ bit2 ^ cp3
143 cp4 = bit3 ^ bit2 ^ bit1 ^ bit0 ^ cp4
144 cp5 = bit7 ^ bit6 ^ bit5 ^ bit4 ^ cp5
145}
146
147
148Analysis 0
149==========
150
151C does have bitwise operators but not really operators to do the above
152efficiently (and most hardware has no such instructions either).
153Therefore without implementing this it was clear that the code above was
154not going to bring me a Nobel prize :-)
155
156Fortunately the exclusive or operation is commutative, so we can combine
157the values in any order. So instead of calculating all the bits
158individually, let us try to rearrange things.
159For the column parity this is easy. We can just xor the bytes and in the
160end filter out the relevant bits. This is pretty nice as it will bring
161all cp calculation out of the if loop.
162
163Similarly we can first xor the bytes for the various rows.
164This leads to:
165
166
167Attempt 1
168=========
169
170const char parity[256] = {
171 0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0,
172 1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1,
173 1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1,
174 0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0,
175 1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1,
176 0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0,
177 0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0,
178 1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1,
179 1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1,
180 0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0,
181 0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0,
182 1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1,
183 0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0,
184 1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1,
185 1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1,
186 0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0
187};
188
189void ecc1(const unsigned char *buf, unsigned char *code)
190{
191 int i;
192 const unsigned char *bp = buf;
193 unsigned char cur;
194 unsigned char rp0, rp1, rp2, rp3, rp4, rp5, rp6, rp7;
195 unsigned char rp8, rp9, rp10, rp11, rp12, rp13, rp14, rp15;
196 unsigned char par;
197
198 par = 0;
199 rp0 = 0; rp1 = 0; rp2 = 0; rp3 = 0;
200 rp4 = 0; rp5 = 0; rp6 = 0; rp7 = 0;
201 rp8 = 0; rp9 = 0; rp10 = 0; rp11 = 0;
202 rp12 = 0; rp13 = 0; rp14 = 0; rp15 = 0;
203
204 for (i = 0; i < 256; i++)
205 {
206 cur = *bp++;
207 par ^= cur;
208 if (i & 0x01) rp1 ^= cur; else rp0 ^= cur;
209 if (i & 0x02) rp3 ^= cur; else rp2 ^= cur;
210 if (i & 0x04) rp5 ^= cur; else rp4 ^= cur;
211 if (i & 0x08) rp7 ^= cur; else rp6 ^= cur;
212 if (i & 0x10) rp9 ^= cur; else rp8 ^= cur;
213 if (i & 0x20) rp11 ^= cur; else rp10 ^= cur;
214 if (i & 0x40) rp13 ^= cur; else rp12 ^= cur;
215 if (i & 0x80) rp15 ^= cur; else rp14 ^= cur;
216 }
217 code[0] =
218 (parity[rp7] << 7) |
219 (parity[rp6] << 6) |
220 (parity[rp5] << 5) |
221 (parity[rp4] << 4) |
222 (parity[rp3] << 3) |
223 (parity[rp2] << 2) |
224 (parity[rp1] << 1) |
225 (parity[rp0]);
226 code[1] =
227 (parity[rp15] << 7) |
228 (parity[rp14] << 6) |
229 (parity[rp13] << 5) |
230 (parity[rp12] << 4) |
231 (parity[rp11] << 3) |
232 (parity[rp10] << 2) |
233 (parity[rp9] << 1) |
234 (parity[rp8]);
235 code[2] =
236 (parity[par & 0xf0] << 7) |
237 (parity[par & 0x0f] << 6) |
238 (parity[par & 0xcc] << 5) |
239 (parity[par & 0x33] << 4) |
240 (parity[par & 0xaa] << 3) |
241 (parity[par & 0x55] << 2);
242 code[0] = ~code[0];
243 code[1] = ~code[1];
244 code[2] = ~code[2];
245}
246
247Still pretty straightforward. The last three invert statements are there to
248give a checksum of 0xff 0xff 0xff for an empty flash. In an empty flash
249all data is 0xff, so the checksum then matches.
250
251I also introduced the parity lookup. I expected this to be the fastest
252way to calculate the parity, but I will investigate alternatives later
253on.
254
255
256Analysis 1
257==========
258
259The code works, but is not terribly efficient. On my system it took
260almost 4 times as much time as the linux driver code. But hey, if it was
261*that* easy this would have been done long before.
262No pain. no gain.
263
264Fortunately there is plenty of room for improvement.
265
266In step 1 we moved from bit-wise calculation to byte-wise calculation.
267However in C we can also use the unsigned long data type and virtually
268every modern microprocessor supports 32 bit operations, so why not try
269to write our code in such a way that we process data in 32 bit chunks.
270
271Of course this means some modification as the row parity is byte by
272byte. A quick analysis:
273for the column parity we use the par variable. When extending to 32 bits
274we can in the end easily calculate p0 and p1 from it.
275(because par now consists of 4 bytes, contributing to rp1, rp0, rp1, rp0
276respectively)
277also rp2 and rp3 can be easily retrieved from par as rp3 covers the
278first two bytes and rp2 the last two bytes.
279
280Note that of course now the loop is executed only 64 times (256/4).
281And note that care must taken wrt byte ordering. The way bytes are
282ordered in a long is machine dependent, and might affect us.
283Anyway, if there is an issue: this code is developed on x86 (to be
284precise: a DELL PC with a D920 Intel CPU)
285
286And of course the performance might depend on alignment, but I expect
287that the I/O buffers in the nand driver are aligned properly (and
288otherwise that should be fixed to get maximum performance).
289
290Let's give it a try...
291
292
293Attempt 2
294=========
295
296extern const char parity[256];
297
298void ecc2(const unsigned char *buf, unsigned char *code)
299{
300 int i;
301 const unsigned long *bp = (unsigned long *)buf;
302 unsigned long cur;
303 unsigned long rp0, rp1, rp2, rp3, rp4, rp5, rp6, rp7;
304 unsigned long rp8, rp9, rp10, rp11, rp12, rp13, rp14, rp15;
305 unsigned long par;
306
307 par = 0;
308 rp0 = 0; rp1 = 0; rp2 = 0; rp3 = 0;
309 rp4 = 0; rp5 = 0; rp6 = 0; rp7 = 0;
310 rp8 = 0; rp9 = 0; rp10 = 0; rp11 = 0;
311 rp12 = 0; rp13 = 0; rp14 = 0; rp15 = 0;
312
313 for (i = 0; i < 64; i++)
314 {
315 cur = *bp++;
316 par ^= cur;
317 if (i & 0x01) rp5 ^= cur; else rp4 ^= cur;
318 if (i & 0x02) rp7 ^= cur; else rp6 ^= cur;
319 if (i & 0x04) rp9 ^= cur; else rp8 ^= cur;
320 if (i & 0x08) rp11 ^= cur; else rp10 ^= cur;
321 if (i & 0x10) rp13 ^= cur; else rp12 ^= cur;
322 if (i & 0x20) rp15 ^= cur; else rp14 ^= cur;
323 }
324 /*
325 we need to adapt the code generation for the fact that rp vars are now
326 long; also the column parity calculation needs to be changed.
327 we'll bring rp4 to 15 back to single byte entities by shifting and
328 xoring
329 */
330 rp4 ^= (rp4 >> 16); rp4 ^= (rp4 >> 8); rp4 &= 0xff;
331 rp5 ^= (rp5 >> 16); rp5 ^= (rp5 >> 8); rp5 &= 0xff;
332 rp6 ^= (rp6 >> 16); rp6 ^= (rp6 >> 8); rp6 &= 0xff;
333 rp7 ^= (rp7 >> 16); rp7 ^= (rp7 >> 8); rp7 &= 0xff;
334 rp8 ^= (rp8 >> 16); rp8 ^= (rp8 >> 8); rp8 &= 0xff;
335 rp9 ^= (rp9 >> 16); rp9 ^= (rp9 >> 8); rp9 &= 0xff;
336 rp10 ^= (rp10 >> 16); rp10 ^= (rp10 >> 8); rp10 &= 0xff;
337 rp11 ^= (rp11 >> 16); rp11 ^= (rp11 >> 8); rp11 &= 0xff;
338 rp12 ^= (rp12 >> 16); rp12 ^= (rp12 >> 8); rp12 &= 0xff;
339 rp13 ^= (rp13 >> 16); rp13 ^= (rp13 >> 8); rp13 &= 0xff;
340 rp14 ^= (rp14 >> 16); rp14 ^= (rp14 >> 8); rp14 &= 0xff;
341 rp15 ^= (rp15 >> 16); rp15 ^= (rp15 >> 8); rp15 &= 0xff;
342 rp3 = (par >> 16); rp3 ^= (rp3 >> 8); rp3 &= 0xff;
343 rp2 = par & 0xffff; rp2 ^= (rp2 >> 8); rp2 &= 0xff;
344 par ^= (par >> 16);
345 rp1 = (par >> 8); rp1 &= 0xff;
346 rp0 = (par & 0xff);
347 par ^= (par >> 8); par &= 0xff;
348
349 code[0] =
350 (parity[rp7] << 7) |
351 (parity[rp6] << 6) |
352 (parity[rp5] << 5) |
353 (parity[rp4] << 4) |
354 (parity[rp3] << 3) |
355 (parity[rp2] << 2) |
356 (parity[rp1] << 1) |
357 (parity[rp0]);
358 code[1] =
359 (parity[rp15] << 7) |
360 (parity[rp14] << 6) |
361 (parity[rp13] << 5) |
362 (parity[rp12] << 4) |
363 (parity[rp11] << 3) |
364 (parity[rp10] << 2) |
365 (parity[rp9] << 1) |
366 (parity[rp8]);
367 code[2] =
368 (parity[par & 0xf0] << 7) |
369 (parity[par & 0x0f] << 6) |
370 (parity[par & 0xcc] << 5) |
371 (parity[par & 0x33] << 4) |
372 (parity[par & 0xaa] << 3) |
373 (parity[par & 0x55] << 2);
374 code[0] = ~code[0];
375 code[1] = ~code[1];
376 code[2] = ~code[2];
377}
378
379The parity array is not shown any more. Note also that for these
380examples I kinda deviated from my regular programming style by allowing
381multiple statements on a line, not using { } in then and else blocks
382with only a single statement and by using operators like ^=
383
384
385Analysis 2
386==========
387
388The code (of course) works, and hurray: we are a little bit faster than
389the linux driver code (about 15%). But wait, don't cheer too quickly.
390THere is more to be gained.
391If we look at e.g. rp14 and rp15 we see that we either xor our data with
392rp14 or with rp15. However we also have par which goes over all data.
393This means there is no need to calculate rp14 as it can be calculated from
394rp15 through rp14 = par ^ rp15;
395(or if desired we can avoid calculating rp15 and calculate it from
396rp14). That is why some places refer to inverse parity.
397Of course the same thing holds for rp4/5, rp6/7, rp8/9, rp10/11 and rp12/13.
398Effectively this means we can eliminate the else clause from the if
399statements. Also we can optimise the calculation in the end a little bit
400by going from long to byte first. Actually we can even avoid the table
401lookups
402
403Attempt 3
404=========
405
406Odd replaced:
407 if (i & 0x01) rp5 ^= cur; else rp4 ^= cur;
408 if (i & 0x02) rp7 ^= cur; else rp6 ^= cur;
409 if (i & 0x04) rp9 ^= cur; else rp8 ^= cur;
410 if (i & 0x08) rp11 ^= cur; else rp10 ^= cur;
411 if (i & 0x10) rp13 ^= cur; else rp12 ^= cur;
412 if (i & 0x20) rp15 ^= cur; else rp14 ^= cur;
413with
414 if (i & 0x01) rp5 ^= cur;
415 if (i & 0x02) rp7 ^= cur;
416 if (i & 0x04) rp9 ^= cur;
417 if (i & 0x08) rp11 ^= cur;
418 if (i & 0x10) rp13 ^= cur;
419 if (i & 0x20) rp15 ^= cur;
420
421 and outside the loop added:
422 rp4 = par ^ rp5;
423 rp6 = par ^ rp7;
424 rp8 = par ^ rp9;
425 rp10 = par ^ rp11;
426 rp12 = par ^ rp13;
427 rp14 = par ^ rp15;
428
429And after that the code takes about 30% more time, although the number of
430statements is reduced. This is also reflected in the assembly code.
431
432
433Analysis 3
434==========
435
436Very weird. Guess it has to do with caching or instruction parallellism
437or so. I also tried on an eeePC (Celeron, clocked at 900 Mhz). Interesting
438observation was that this one is only 30% slower (according to time)
439executing the code as my 3Ghz D920 processor.
440
441Well, it was expected not to be easy so maybe instead move to a
442different track: let's move back to the code from attempt2 and do some
443loop unrolling. This will eliminate a few if statements. I'll try
444different amounts of unrolling to see what works best.
445
446
447Attempt 4
448=========
449
450Unrolled the loop 1, 2, 3 and 4 times.
451For 4 the code starts with:
452
453 for (i = 0; i < 4; i++)
454 {
455 cur = *bp++;
456 par ^= cur;
457 rp4 ^= cur;
458 rp6 ^= cur;
459 rp8 ^= cur;
460 rp10 ^= cur;
461 if (i & 0x1) rp13 ^= cur; else rp12 ^= cur;
462 if (i & 0x2) rp15 ^= cur; else rp14 ^= cur;
463 cur = *bp++;
464 par ^= cur;
465 rp5 ^= cur;
466 rp6 ^= cur;
467 ...
468
469
470Analysis 4
471==========
472
473Unrolling once gains about 15%
474Unrolling twice keeps the gain at about 15%
475Unrolling three times gives a gain of 30% compared to attempt 2.
476Unrolling four times gives a marginal improvement compared to unrolling
477three times.
478
479I decided to proceed with a four time unrolled loop anyway. It was my gut
480feeling that in the next steps I would obtain additional gain from it.
481
482The next step was triggered by the fact that par contains the xor of all
483bytes and rp4 and rp5 each contain the xor of half of the bytes.
484So in effect par = rp4 ^ rp5. But as xor is commutative we can also say
485that rp5 = par ^ rp4. So no need to keep both rp4 and rp5 around. We can
486eliminate rp5 (or rp4, but I already foresaw another optimisation).
487The same holds for rp6/7, rp8/9, rp10/11 rp12/13 and rp14/15.
488
489
490Attempt 5
491=========
492
493Effectively so all odd digit rp assignments in the loop were removed.
494This included the else clause of the if statements.
495Of course after the loop we need to correct things by adding code like:
496 rp5 = par ^ rp4;
497Also the initial assignments (rp5 = 0; etc) could be removed.
498Along the line I also removed the initialisation of rp0/1/2/3.
499
500
501Analysis 5
502==========
503
504Measurements showed this was a good move. The run-time roughly halved
505compared with attempt 4 with 4 times unrolled, and we only require 1/3rd
506of the processor time compared to the current code in the linux kernel.
507
508However, still I thought there was more. I didn't like all the if
509statements. Why not keep a running parity and only keep the last if
510statement. Time for yet another version!
511
512
513Attempt 6
514=========
515
516THe code within the for loop was changed to:
517
518 for (i = 0; i < 4; i++)
519 {
520 cur = *bp++; tmppar = cur; rp4 ^= cur;
521 cur = *bp++; tmppar ^= cur; rp6 ^= tmppar;
522 cur = *bp++; tmppar ^= cur; rp4 ^= cur;
523 cur = *bp++; tmppar ^= cur; rp8 ^= tmppar;
524
525 cur = *bp++; tmppar ^= cur; rp4 ^= cur; rp6 ^= cur;
526 cur = *bp++; tmppar ^= cur; rp6 ^= cur;
527 cur = *bp++; tmppar ^= cur; rp4 ^= cur;
528 cur = *bp++; tmppar ^= cur; rp10 ^= tmppar;
529
530 cur = *bp++; tmppar ^= cur; rp4 ^= cur; rp6 ^= cur; rp8 ^= cur;
531 cur = *bp++; tmppar ^= cur; rp6 ^= cur; rp8 ^= cur;
532 cur = *bp++; tmppar ^= cur; rp4 ^= cur; rp8 ^= cur;
533 cur = *bp++; tmppar ^= cur; rp8 ^= cur;
534
535 cur = *bp++; tmppar ^= cur; rp4 ^= cur; rp6 ^= cur;
536 cur = *bp++; tmppar ^= cur; rp6 ^= cur;
537 cur = *bp++; tmppar ^= cur; rp4 ^= cur;
538 cur = *bp++; tmppar ^= cur;
539
540 par ^= tmppar;
541 if ((i & 0x1) == 0) rp12 ^= tmppar;
542 if ((i & 0x2) == 0) rp14 ^= tmppar;
543 }
544
545As you can see tmppar is used to accumulate the parity within a for
546iteration. In the last 3 statements is is added to par and, if needed,
547to rp12 and rp14.
548
549While making the changes I also found that I could exploit that tmppar
550contains the running parity for this iteration. So instead of having:
551rp4 ^= cur; rp6 = cur;
552I removed the rp6 = cur; statement and did rp6 ^= tmppar; on next
553statement. A similar change was done for rp8 and rp10
554
555
556Analysis 6
557==========
558
559Measuring this code again showed big gain. When executing the original
560linux code 1 million times, this took about 1 second on my system.
561(using time to measure the performance). After this iteration I was back
562to 0.075 sec. Actually I had to decide to start measuring over 10
563million interations in order not to loose too much accuracy. This one
564definitely seemed to be the jackpot!
565
566There is a little bit more room for improvement though. There are three
567places with statements:
568rp4 ^= cur; rp6 ^= cur;
569It seems more efficient to also maintain a variable rp4_6 in the while
570loop; This eliminates 3 statements per loop. Of course after the loop we
571need to correct by adding:
572 rp4 ^= rp4_6;
573 rp6 ^= rp4_6
574Furthermore there are 4 sequential assingments to rp8. This can be
575encoded slightly more efficient by saving tmppar before those 4 lines
576and later do rp8 = rp8 ^ tmppar ^ notrp8;
577(where notrp8 is the value of rp8 before those 4 lines).
578Again a use of the commutative property of xor.
579Time for a new test!
580
581
582Attempt 7
583=========
584
585The new code now looks like:
586
587 for (i = 0; i < 4; i++)
588 {
589 cur = *bp++; tmppar = cur; rp4 ^= cur;
590 cur = *bp++; tmppar ^= cur; rp6 ^= tmppar;
591 cur = *bp++; tmppar ^= cur; rp4 ^= cur;
592 cur = *bp++; tmppar ^= cur; rp8 ^= tmppar;
593
594 cur = *bp++; tmppar ^= cur; rp4_6 ^= cur;
595 cur = *bp++; tmppar ^= cur; rp6 ^= cur;
596 cur = *bp++; tmppar ^= cur; rp4 ^= cur;
597 cur = *bp++; tmppar ^= cur; rp10 ^= tmppar;
598
599 notrp8 = tmppar;
600 cur = *bp++; tmppar ^= cur; rp4_6 ^= cur;
601 cur = *bp++; tmppar ^= cur; rp6 ^= cur;
602 cur = *bp++; tmppar ^= cur; rp4 ^= cur;
603 cur = *bp++; tmppar ^= cur;
604 rp8 = rp8 ^ tmppar ^ notrp8;
605
606 cur = *bp++; tmppar ^= cur; rp4_6 ^= cur;
607 cur = *bp++; tmppar ^= cur; rp6 ^= cur;
608 cur = *bp++; tmppar ^= cur; rp4 ^= cur;
609 cur = *bp++; tmppar ^= cur;
610
611 par ^= tmppar;
612 if ((i & 0x1) == 0) rp12 ^= tmppar;
613 if ((i & 0x2) == 0) rp14 ^= tmppar;
614 }
615 rp4 ^= rp4_6;
616 rp6 ^= rp4_6;
617
618
619Not a big change, but every penny counts :-)
620
621
622Analysis 7
623==========
624
625Acutally this made things worse. Not very much, but I don't want to move
626into the wrong direction. Maybe something to investigate later. Could
627have to do with caching again.
628
629Guess that is what there is to win within the loop. Maybe unrolling one
630more time will help. I'll keep the optimisations from 7 for now.
631
632
633Attempt 8
634=========
635
636Unrolled the loop one more time.
637
638
639Analysis 8
640==========
641
642This makes things worse. Let's stick with attempt 6 and continue from there.
643Although it seems that the code within the loop cannot be optimised
644further there is still room to optimize the generation of the ecc codes.
645We can simply calcualate the total parity. If this is 0 then rp4 = rp5
646etc. If the parity is 1, then rp4 = !rp5;
647But if rp4 = rp5 we do not need rp5 etc. We can just write the even bits
648in the result byte and then do something like
649 code[0] |= (code[0] << 1);
650Lets test this.
651
652
653Attempt 9
654=========
655
656Changed the code but again this slightly degrades performance. Tried all
657kind of other things, like having dedicated parity arrays to avoid the
658shift after parity[rp7] << 7; No gain.
659Change the lookup using the parity array by using shift operators (e.g.
660replace parity[rp7] << 7 with:
661rp7 ^= (rp7 << 4);
662rp7 ^= (rp7 << 2);
663rp7 ^= (rp7 << 1);
664rp7 &= 0x80;
665No gain.
666
667The only marginal change was inverting the parity bits, so we can remove
668the last three invert statements.
669
670Ah well, pity this does not deliver more. Then again 10 million
671iterations using the linux driver code takes between 13 and 13.5
672seconds, whereas my code now takes about 0.73 seconds for those 10
673million iterations. So basically I've improved the performance by a
674factor 18 on my system. Not that bad. Of course on different hardware
675you will get different results. No warranties!
676
677But of course there is no such thing as a free lunch. The codesize almost
678tripled (from 562 bytes to 1434 bytes). Then again, it is not that much.
679
680
681Correcting errors
682=================
683
684For correcting errors I again used the ST application note as a starter,
685but I also peeked at the existing code.
686The algorithm itself is pretty straightforward. Just xor the given and
687the calculated ecc. If all bytes are 0 there is no problem. If 11 bits
688are 1 we have one correctable bit error. If there is 1 bit 1, we have an
689error in the given ecc code.
690It proved to be fastest to do some table lookups. Performance gain
691introduced by this is about a factor 2 on my system when a repair had to
692be done, and 1% or so if no repair had to be done.
693Code size increased from 330 bytes to 686 bytes for this function.
694(gcc 4.2, -O3)
695
696
697Conclusion
698==========
699
700The gain when calculating the ecc is tremendous. Om my development hardware
701a speedup of a factor of 18 for ecc calculation was achieved. On a test on an
702embedded system with a MIPS core a factor 7 was obtained.
703On a test with a Linksys NSLU2 (ARMv5TE processor) the speedup was a factor
7045 (big endian mode, gcc 4.1.2, -O3)
705For correction not much gain could be obtained (as bitflips are rare). Then
706again there are also much less cycles spent there.
707
708It seems there is not much more gain possible in this, at least when
709programmed in C. Of course it might be possible to squeeze something more
710out of it with an assembler program, but due to pipeline behaviour etc
711this is very tricky (at least for intel hw).
712
713Author: Frans Meulenbroeks
714Copyright (C) 2008 Koninklijke Philips Electronics NV.
diff --git a/Documentation/sysrq.txt b/Documentation/sysrq.txt
index 5ce0952aa065..49378a9f2b5f 100644
--- a/Documentation/sysrq.txt
+++ b/Documentation/sysrq.txt
@@ -95,7 +95,8 @@ On all - write a character to /proc/sysrq-trigger. e.g.:
95 95
96'p' - Will dump the current registers and flags to your console. 96'p' - Will dump the current registers and flags to your console.
97 97
98'q' - Will dump a list of all running timers. 98'q' - Will dump a list of all running hrtimers.
99 WARNING: Does not cover any other timers
99 100
100'r' - Turns off keyboard raw mode and sets it to XLATE. 101'r' - Turns off keyboard raw mode and sets it to XLATE.
101 102
diff --git a/Documentation/vm/unevictable-lru.txt b/Documentation/vm/unevictable-lru.txt
new file mode 100644
index 000000000000..125eed560e5a
--- /dev/null
+++ b/Documentation/vm/unevictable-lru.txt
@@ -0,0 +1,615 @@
1
2This document describes the Linux memory management "Unevictable LRU"
3infrastructure and the use of this infrastructure to manage several types
4of "unevictable" pages. The document attempts to provide the overall
5rationale behind this mechanism and the rationale for some of the design
6decisions that drove the implementation. The latter design rationale is
7discussed in the context of an implementation description. Admittedly, one
8can obtain the implementation details--the "what does it do?"--by reading the
9code. One hopes that the descriptions below add value by provide the answer
10to "why does it do that?".
11
12Unevictable LRU Infrastructure:
13
14The Unevictable LRU adds an additional LRU list to track unevictable pages
15and to hide these pages from vmscan. This mechanism is based on a patch by
16Larry Woodman of Red Hat to address several scalability problems with page
17reclaim in Linux. The problems have been observed at customer sites on large
18memory x86_64 systems. For example, a non-numal x86_64 platform with 128GB
19of main memory will have over 32 million 4k pages in a single zone. When a
20large fraction of these pages are not evictable for any reason [see below],
21vmscan will spend a lot of time scanning the LRU lists looking for the small
22fraction of pages that are evictable. This can result in a situation where
23all cpus are spending 100% of their time in vmscan for hours or days on end,
24with the system completely unresponsive.
25
26The Unevictable LRU infrastructure addresses the following classes of
27unevictable pages:
28
29+ page owned by ramfs
30+ page mapped into SHM_LOCKed shared memory regions
31+ page mapped into VM_LOCKED [mlock()ed] vmas
32
33The infrastructure might be able to handle other conditions that make pages
34unevictable, either by definition or by circumstance, in the future.
35
36
37The Unevictable LRU List
38
39The Unevictable LRU infrastructure consists of an additional, per-zone, LRU list
40called the "unevictable" list and an associated page flag, PG_unevictable, to
41indicate that the page is being managed on the unevictable list. The
42PG_unevictable flag is analogous to, and mutually exclusive with, the PG_active
43flag in that it indicates on which LRU list a page resides when PG_lru is set.
44The unevictable LRU list is source configurable based on the UNEVICTABLE_LRU
45Kconfig option.
46
47The Unevictable LRU infrastructure maintains unevictable pages on an additional
48LRU list for a few reasons:
49
501) We get to "treat unevictable pages just like we treat other pages in the
51 system, which means we get to use the same code to manipulate them, the
52 same code to isolate them (for migrate, etc.), the same code to keep track
53 of the statistics, etc..." [Rik van Riel]
54
552) We want to be able to migrate unevictable pages between nodes--for memory
56 defragmentation, workload management and memory hotplug. The linux kernel
57 can only migrate pages that it can successfully isolate from the lru lists.
58 If we were to maintain pages elsewise than on an lru-like list, where they
59 can be found by isolate_lru_page(), we would prevent their migration, unless
60 we reworked migration code to find the unevictable pages.
61
62
63The unevictable LRU list does not differentiate between file backed and swap
64backed [anon] pages. This differentiation is only important while the pages
65are, in fact, evictable.
66
67The unevictable LRU list benefits from the "arrayification" of the per-zone
68LRU lists and statistics originally proposed and posted by Christoph Lameter.
69
70The unevictable list does not use the lru pagevec mechanism. Rather,
71unevictable pages are placed directly on the page's zone's unevictable
72list under the zone lru_lock. The reason for this is to prevent stranding
73of pages on the unevictable list when one task has the page isolated from the
74lru and other tasks are changing the "evictability" state of the page.
75
76
77Unevictable LRU and Memory Controller Interaction
78
79The memory controller data structure automatically gets a per zone unevictable
80lru list as a result of the "arrayification" of the per-zone LRU lists. The
81memory controller tracks the movement of pages to and from the unevictable list.
82When a memory control group comes under memory pressure, the controller will
83not attempt to reclaim pages on the unevictable list. This has a couple of
84effects. Because the pages are "hidden" from reclaim on the unevictable list,
85the reclaim process can be more efficient, dealing only with pages that have
86a chance of being reclaimed. On the other hand, if too many of the pages
87charged to the control group are unevictable, the evictable portion of the
88working set of the tasks in the control group may not fit into the available
89memory. This can cause the control group to thrash or to oom-kill tasks.
90
91
92Unevictable LRU: Detecting Unevictable Pages
93
94The function page_evictable(page, vma) in vmscan.c determines whether a
95page is evictable or not. For ramfs pages and pages in SHM_LOCKed regions,
96page_evictable() tests a new address space flag, AS_UNEVICTABLE, in the page's
97address space using a wrapper function. Wrapper functions are used to set,
98clear and test the flag to reduce the requirement for #ifdef's throughout the
99source code. AS_UNEVICTABLE is set on ramfs inode/mapping when it is created.
100This flag remains for the life of the inode.
101
102For shared memory regions, AS_UNEVICTABLE is set when an application
103successfully SHM_LOCKs the region and is removed when the region is
104SHM_UNLOCKed. Note that shmctl(SHM_LOCK, ...) does not populate the page
105tables for the region as does, for example, mlock(). So, we make no special
106effort to push any pages in the SHM_LOCKed region to the unevictable list.
107Vmscan will do this when/if it encounters the pages during reclaim. On
108SHM_UNLOCK, shmctl() scans the pages in the region and "rescues" them from the
109unevictable list if no other condition keeps them unevictable. If a SHM_LOCKed
110region is destroyed, the pages are also "rescued" from the unevictable list in
111the process of freeing them.
112
113page_evictable() detects mlock()ed pages by testing an additional page flag,
114PG_mlocked via the PageMlocked() wrapper. If the page is NOT mlocked, and a
115non-NULL vma is supplied, page_evictable() will check whether the vma is
116VM_LOCKED via is_mlocked_vma(). is_mlocked_vma() will SetPageMlocked() and
117update the appropriate statistics if the vma is VM_LOCKED. This method allows
118efficient "culling" of pages in the fault path that are being faulted in to
119VM_LOCKED vmas.
120
121
122Unevictable Pages and Vmscan [shrink_*_list()]
123
124If unevictable pages are culled in the fault path, or moved to the unevictable
125list at mlock() or mmap() time, vmscan will never encounter the pages until
126they have become evictable again, for example, via munlock() and have been
127"rescued" from the unevictable list. However, there may be situations where we
128decide, for the sake of expediency, to leave a unevictable page on one of the
129regular active/inactive LRU lists for vmscan to deal with. Vmscan checks for
130such pages in all of the shrink_{active|inactive|page}_list() functions and
131will "cull" such pages that it encounters--that is, it diverts those pages to
132the unevictable list for the zone being scanned.
133
134There may be situations where a page is mapped into a VM_LOCKED vma, but the
135page is not marked as PageMlocked. Such pages will make it all the way to
136shrink_page_list() where they will be detected when vmscan walks the reverse
137map in try_to_unmap(). If try_to_unmap() returns SWAP_MLOCK, shrink_page_list()
138will cull the page at that point.
139
140Note that for anonymous pages, shrink_page_list() attempts to add the page to
141the swap cache before it tries to unmap the page. To avoid this unnecessary
142consumption of swap space, shrink_page_list() calls try_to_munlock() to check
143whether any VM_LOCKED vmas map the page without attempting to unmap the page.
144If try_to_munlock() returns SWAP_MLOCK, shrink_page_list() will cull the page
145without consuming swap space. try_to_munlock() will be described below.
146
147To "cull" an unevictable page, vmscan simply puts the page back on the lru
148list using putback_lru_page()--the inverse operation to isolate_lru_page()--
149after dropping the page lock. Because the condition which makes the page
150unevictable may change once the page is unlocked, putback_lru_page() will
151recheck the unevictable state of a page that it places on the unevictable lru
152list. If the page has become unevictable, putback_lru_page() removes it from
153the list and retries, including the page_unevictable() test. Because such a
154race is a rare event and movement of pages onto the unevictable list should be
155rare, these extra evictabilty checks should not occur in the majority of calls
156to putback_lru_page().
157
158
159Mlocked Page: Prior Work
160
161The "Unevictable Mlocked Pages" infrastructure is based on work originally
162posted by Nick Piggin in an RFC patch entitled "mm: mlocked pages off LRU".
163Nick posted his patch as an alternative to a patch posted by Christoph
164Lameter to achieve the same objective--hiding mlocked pages from vmscan.
165In Nick's patch, he used one of the struct page lru list link fields as a count
166of VM_LOCKED vmas that map the page. This use of the link field for a count
167prevented the management of the pages on an LRU list. Thus, mlocked pages were
168not migratable as isolate_lru_page() could not find them and the lru list link
169field was not available to the migration subsystem. Nick resolved this by
170putting mlocked pages back on the lru list before attempting to isolate them,
171thus abandoning the count of VM_LOCKED vmas. When Nick's patch was integrated
172with the Unevictable LRU work, the count was replaced by walking the reverse
173map to determine whether any VM_LOCKED vmas mapped the page. More on this
174below.
175
176
177Mlocked Pages: Basic Management
178
179Mlocked pages--pages mapped into a VM_LOCKED vma--represent one class of
180unevictable pages. When such a page has been "noticed" by the memory
181management subsystem, the page is marked with the PG_mlocked [PageMlocked()]
182flag. A PageMlocked() page will be placed on the unevictable LRU list when
183it is added to the LRU. Pages can be "noticed" by memory management in
184several places:
185
1861) in the mlock()/mlockall() system call handlers.
1872) in the mmap() system call handler when mmap()ing a region with the
188 MAP_LOCKED flag, or mmap()ing a region in a task that has called
189 mlockall() with the MCL_FUTURE flag. Both of these conditions result
190 in the VM_LOCKED flag being set for the vma.
1913) in the fault path, if mlocked pages are "culled" in the fault path,
192 and when a VM_LOCKED stack segment is expanded.
1934) as mentioned above, in vmscan:shrink_page_list() with attempting to
194 reclaim a page in a VM_LOCKED vma--via try_to_unmap() or try_to_munlock().
195
196Mlocked pages become unlocked and rescued from the unevictable list when:
197
1981) mapped in a range unlocked via the munlock()/munlockall() system calls.
1992) munmapped() out of the last VM_LOCKED vma that maps the page, including
200 unmapping at task exit.
2013) when the page is truncated from the last VM_LOCKED vma of an mmap()ed file.
2024) before a page is COWed in a VM_LOCKED vma.
203
204
205Mlocked Pages: mlock()/mlockall() System Call Handling
206
207Both [do_]mlock() and [do_]mlockall() system call handlers call mlock_fixup()
208for each vma in the range specified by the call. In the case of mlockall(),
209this is the entire active address space of the task. Note that mlock_fixup()
210is used for both mlock()ing and munlock()ing a range of memory. A call to
211mlock() an already VM_LOCKED vma, or to munlock() a vma that is not VM_LOCKED
212is treated as a no-op--mlock_fixup() simply returns.
213
214If the vma passes some filtering described in "Mlocked Pages: Filtering Vmas"
215below, mlock_fixup() will attempt to merge the vma with its neighbors or split
216off a subset of the vma if the range does not cover the entire vma. Once the
217vma has been merged or split or neither, mlock_fixup() will call
218__mlock_vma_pages_range() to fault in the pages via get_user_pages() and
219to mark the pages as mlocked via mlock_vma_page().
220
221Note that the vma being mlocked might be mapped with PROT_NONE. In this case,
222get_user_pages() will be unable to fault in the pages. That's OK. If pages
223do end up getting faulted into this VM_LOCKED vma, we'll handle them in the
224fault path or in vmscan.
225
226Also note that a page returned by get_user_pages() could be truncated or
227migrated out from under us, while we're trying to mlock it. To detect
228this, __mlock_vma_pages_range() tests the page_mapping after acquiring
229the page lock. If the page is still associated with its mapping, we'll
230go ahead and call mlock_vma_page(). If the mapping is gone, we just
231unlock the page and move on. Worse case, this results in page mapped
232in a VM_LOCKED vma remaining on a normal LRU list without being
233PageMlocked(). Again, vmscan will detect and cull such pages.
234
235mlock_vma_page(), called with the page locked [N.B., not "mlocked"], will
236TestSetPageMlocked() for each page returned by get_user_pages(). We use
237TestSetPageMlocked() because the page might already be mlocked by another
238task/vma and we don't want to do extra work. We especially do not want to
239count an mlocked page more than once in the statistics. If the page was
240already mlocked, mlock_vma_page() is done.
241
242If the page was NOT already mlocked, mlock_vma_page() attempts to isolate the
243page from the LRU, as it is likely on the appropriate active or inactive list
244at that time. If the isolate_lru_page() succeeds, mlock_vma_page() will
245putback the page--putback_lru_page()--which will notice that the page is now
246mlocked and divert the page to the zone's unevictable LRU list. If
247mlock_vma_page() is unable to isolate the page from the LRU, vmscan will handle
248it later if/when it attempts to reclaim the page.
249
250
251Mlocked Pages: Filtering Special Vmas
252
253mlock_fixup() filters several classes of "special" vmas:
254
2551) vmas with VM_IO|VM_PFNMAP set are skipped entirely. The pages behind
256 these mappings are inherently pinned, so we don't need to mark them as
257 mlocked. In any case, most of the pages have no struct page in which to
258 so mark the page. Because of this, get_user_pages() will fail for these
259 vmas, so there is no sense in attempting to visit them.
260
2612) vmas mapping hugetlbfs page are already effectively pinned into memory.
262 We don't need nor want to mlock() these pages. However, to preserve the
263 prior behavior of mlock()--before the unevictable/mlock changes--mlock_fixup()
264 will call make_pages_present() in the hugetlbfs vma range to allocate the
265 huge pages and populate the ptes.
266
2673) vmas with VM_DONTEXPAND|VM_RESERVED are generally user space mappings of
268 kernel pages, such as the vdso page, relay channel pages, etc. These pages
269 are inherently unevictable and are not managed on the LRU lists.
270 mlock_fixup() treats these vmas the same as hugetlbfs vmas. It calls
271 make_pages_present() to populate the ptes.
272
273Note that for all of these special vmas, mlock_fixup() does not set the
274VM_LOCKED flag. Therefore, we won't have to deal with them later during
275munlock() or munmap()--for example, at task exit. Neither does mlock_fixup()
276account these vmas against the task's "locked_vm".
277
278Mlocked Pages: Downgrading the Mmap Semaphore.
279
280mlock_fixup() must be called with the mmap semaphore held for write, because
281it may have to merge or split vmas. However, mlocking a large region of
282memory can take a long time--especially if vmscan must reclaim pages to
283satisfy the regions requirements. Faulting in a large region with the mmap
284semaphore held for write can hold off other faults on the address space, in
285the case of a multi-threaded task. It can also hold off scans of the task's
286address space via /proc. While testing under heavy load, it was observed that
287the ps(1) command could be held off for many minutes while a large segment was
288mlock()ed down.
289
290To address this issue, and to make the system more responsive during mlock()ing
291of large segments, mlock_fixup() downgrades the mmap semaphore to read mode
292during the call to __mlock_vma_pages_range(). This works fine. However, the
293callers of mlock_fixup() expect the semaphore to be returned in write mode.
294So, mlock_fixup() "upgrades" the semphore to write mode. Linux does not
295support an atomic upgrade_sem() call, so mlock_fixup() must drop the semaphore
296and reacquire it in write mode. In a multi-threaded task, it is possible for
297the task memory map to change while the semaphore is dropped. Therefore,
298mlock_fixup() looks up the vma at the range start address after reacquiring
299the semaphore in write mode and verifies that it still covers the original
300range. If not, mlock_fixup() returns an error [-EAGAIN]. All callers of
301mlock_fixup() have been changed to deal with this new error condition.
302
303Note: when munlocking a region, all of the pages should already be resident--
304unless we have racing threads mlocking() and munlocking() regions. So,
305unlocking should not have to wait for page allocations nor faults of any kind.
306Therefore mlock_fixup() does not downgrade the semaphore for munlock().
307
308
309Mlocked Pages: munlock()/munlockall() System Call Handling
310
311The munlock() and munlockall() system calls are handled by the same functions--
312do_mlock[all]()--as the mlock() and mlockall() system calls with the unlock
313vs lock operation indicated by an argument. So, these system calls are also
314handled by mlock_fixup(). Again, if called for an already munlock()ed vma,
315mlock_fixup() simply returns. Because of the vma filtering discussed above,
316VM_LOCKED will not be set in any "special" vmas. So, these vmas will be
317ignored for munlock.
318
319If the vma is VM_LOCKED, mlock_fixup() again attempts to merge or split off
320the specified range. The range is then munlocked via the function
321__mlock_vma_pages_range()--the same function used to mlock a vma range--
322passing a flag to indicate that munlock() is being performed.
323
324Because the vma access protections could have been changed to PROT_NONE after
325faulting in and mlocking some pages, get_user_pages() was unreliable for visiting
326these pages for munlocking. Because we don't want to leave pages mlocked(),
327get_user_pages() was enhanced to accept a flag to ignore the permissions when
328fetching the pages--all of which should be resident as a result of previous
329mlock()ing.
330
331For munlock(), __mlock_vma_pages_range() unlocks individual pages by calling
332munlock_vma_page(). munlock_vma_page() unconditionally clears the PG_mlocked
333flag using TestClearPageMlocked(). As with mlock_vma_page(), munlock_vma_page()
334use the Test*PageMlocked() function to handle the case where the page might
335have already been unlocked by another task. If the page was mlocked,
336munlock_vma_page() updates that zone statistics for the number of mlocked
337pages. Note, however, that at this point we haven't checked whether the page
338is mapped by other VM_LOCKED vmas.
339
340We can't call try_to_munlock(), the function that walks the reverse map to check
341for other VM_LOCKED vmas, without first isolating the page from the LRU.
342try_to_munlock() is a variant of try_to_unmap() and thus requires that the page
343not be on an lru list. [More on these below.] However, the call to
344isolate_lru_page() could fail, in which case we couldn't try_to_munlock().
345So, we go ahead and clear PG_mlocked up front, as this might be the only chance
346we have. If we can successfully isolate the page, we go ahead and
347try_to_munlock(), which will restore the PG_mlocked flag and update the zone
348page statistics if it finds another vma holding the page mlocked. If we fail
349to isolate the page, we'll have left a potentially mlocked page on the LRU.
350This is fine, because we'll catch it later when/if vmscan tries to reclaim the
351page. This should be relatively rare.
352
353Mlocked Pages: Migrating Them...
354
355A page that is being migrated has been isolated from the lru lists and is
356held locked across unmapping of the page, updating the page's mapping
357[address_space] entry and copying the contents and state, until the
358page table entry has been replaced with an entry that refers to the new
359page. Linux supports migration of mlocked pages and other unevictable
360pages. This involves simply moving the PageMlocked and PageUnevictable states
361from the old page to the new page.
362
363Note that page migration can race with mlocking or munlocking of the same
364page. This has been discussed from the mlock/munlock perspective in the
365respective sections above. Both processes [migration, m[un]locking], hold
366the page locked. This provides the first level of synchronization. Page
367migration zeros out the page_mapping of the old page before unlocking it,
368so m[un]lock can skip these pages by testing the page mapping under page
369lock.
370
371When completing page migration, we place the new and old pages back onto the
372lru after dropping the page lock. The "unneeded" page--old page on success,
373new page on failure--will be freed when the reference count held by the
374migration process is released. To ensure that we don't strand pages on the
375unevictable list because of a race between munlock and migration, page
376migration uses the putback_lru_page() function to add migrated pages back to
377the lru.
378
379
380Mlocked Pages: mmap(MAP_LOCKED) System Call Handling
381
382In addition the the mlock()/mlockall() system calls, an application can request
383that a region of memory be mlocked using the MAP_LOCKED flag with the mmap()
384call. Furthermore, any mmap() call or brk() call that expands the heap by a
385task that has previously called mlockall() with the MCL_FUTURE flag will result
386in the newly mapped memory being mlocked. Before the unevictable/mlock changes,
387the kernel simply called make_pages_present() to allocate pages and populate
388the page table.
389
390To mlock a range of memory under the unevictable/mlock infrastructure, the
391mmap() handler and task address space expansion functions call
392mlock_vma_pages_range() specifying the vma and the address range to mlock.
393mlock_vma_pages_range() filters vmas like mlock_fixup(), as described above in
394"Mlocked Pages: Filtering Vmas". It will clear the VM_LOCKED flag, which will
395have already been set by the caller, in filtered vmas. Thus these vma's need
396not be visited for munlock when the region is unmapped.
397
398For "normal" vmas, mlock_vma_pages_range() calls __mlock_vma_pages_range() to
399fault/allocate the pages and mlock them. Again, like mlock_fixup(),
400mlock_vma_pages_range() downgrades the mmap semaphore to read mode before
401attempting to fault/allocate and mlock the pages; and "upgrades" the semaphore
402back to write mode before returning.
403
404The callers of mlock_vma_pages_range() will have already added the memory
405range to be mlocked to the task's "locked_vm". To account for filtered vmas,
406mlock_vma_pages_range() returns the number of pages NOT mlocked. All of the
407callers then subtract a non-negative return value from the task's locked_vm.
408A negative return value represent an error--for example, from get_user_pages()
409attempting to fault in a vma with PROT_NONE access. In this case, we leave
410the memory range accounted as locked_vm, as the protections could be changed
411later and pages allocated into that region.
412
413
414Mlocked Pages: munmap()/exit()/exec() System Call Handling
415
416When unmapping an mlocked region of memory, whether by an explicit call to
417munmap() or via an internal unmap from exit() or exec() processing, we must
418munlock the pages if we're removing the last VM_LOCKED vma that maps the pages.
419Before the unevictable/mlock changes, mlocking did not mark the pages in any way,
420so unmapping them required no processing.
421
422To munlock a range of memory under the unevictable/mlock infrastructure, the
423munmap() hander and task address space tear down function call
424munlock_vma_pages_all(). The name reflects the observation that one always
425specifies the entire vma range when munlock()ing during unmap of a region.
426Because of the vma filtering when mlocking() regions, only "normal" vmas that
427actually contain mlocked pages will be passed to munlock_vma_pages_all().
428
429munlock_vma_pages_all() clears the VM_LOCKED vma flag and, like mlock_fixup()
430for the munlock case, calls __munlock_vma_pages_range() to walk the page table
431for the vma's memory range and munlock_vma_page() each resident page mapped by
432the vma. This effectively munlocks the page, only if this is the last
433VM_LOCKED vma that maps the page.
434
435
436Mlocked Page: try_to_unmap()
437
438[Note: the code changes represented by this section are really quite small
439compared to the text to describe what happening and why, and to discuss the
440implications.]
441
442Pages can, of course, be mapped into multiple vmas. Some of these vmas may
443have VM_LOCKED flag set. It is possible for a page mapped into one or more
444VM_LOCKED vmas not to have the PG_mlocked flag set and therefore reside on one
445of the active or inactive LRU lists. This could happen if, for example, a
446task in the process of munlock()ing the page could not isolate the page from
447the LRU. As a result, vmscan/shrink_page_list() might encounter such a page
448as described in "Unevictable Pages and Vmscan [shrink_*_list()]". To
449handle this situation, try_to_unmap() has been enhanced to check for VM_LOCKED
450vmas while it is walking a page's reverse map.
451
452try_to_unmap() is always called, by either vmscan for reclaim or for page
453migration, with the argument page locked and isolated from the LRU. BUG_ON()
454assertions enforce this requirement. Separate functions handle anonymous and
455mapped file pages, as these types of pages have different reverse map
456mechanisms.
457
458 try_to_unmap_anon()
459
460To unmap anonymous pages, each vma in the list anchored in the anon_vma must be
461visited--at least until a VM_LOCKED vma is encountered. If the page is being
462unmapped for migration, VM_LOCKED vmas do not stop the process because mlocked
463pages are migratable. However, for reclaim, if the page is mapped into a
464VM_LOCKED vma, the scan stops. try_to_unmap() attempts to acquire the mmap
465semphore of the mm_struct to which the vma belongs in read mode. If this is
466successful, try_to_unmap() will mlock the page via mlock_vma_page()--we
467wouldn't have gotten to try_to_unmap() if the page were already mlocked--and
468will return SWAP_MLOCK, indicating that the page is unevictable. If the
469mmap semaphore cannot be acquired, we are not sure whether the page is really
470unevictable or not. In this case, try_to_unmap() will return SWAP_AGAIN.
471
472 try_to_unmap_file() -- linear mappings
473
474Unmapping of a mapped file page works the same, except that the scan visits
475all vmas that maps the page's index/page offset in the page's mapping's
476reverse map priority search tree. It must also visit each vma in the page's
477mapping's non-linear list, if the list is non-empty. As for anonymous pages,
478on encountering a VM_LOCKED vma for a mapped file page, try_to_unmap() will
479attempt to acquire the associated mm_struct's mmap semaphore to mlock the page,
480returning SWAP_MLOCK if this is successful, and SWAP_AGAIN, if not.
481
482 try_to_unmap_file() -- non-linear mappings
483
484If a page's mapping contains a non-empty non-linear mapping vma list, then
485try_to_un{map|lock}() must also visit each vma in that list to determine
486whether the page is mapped in a VM_LOCKED vma. Again, the scan must visit
487all vmas in the non-linear list to ensure that the pages is not/should not be
488mlocked. If a VM_LOCKED vma is found in the list, the scan could terminate.
489However, there is no easy way to determine whether the page is actually mapped
490in a given vma--either for unmapping or testing whether the VM_LOCKED vma
491actually pins the page.
492
493So, try_to_unmap_file() handles non-linear mappings by scanning a certain
494number of pages--a "cluster"--in each non-linear vma associated with the page's
495mapping, for each file mapped page that vmscan tries to unmap. If this happens
496to unmap the page we're trying to unmap, try_to_unmap() will notice this on
497return--(page_mapcount(page) == 0)--and return SWAP_SUCCESS. Otherwise, it
498will return SWAP_AGAIN, causing vmscan to recirculate this page. We take
499advantage of the cluster scan in try_to_unmap_cluster() as follows:
500
501For each non-linear vma, try_to_unmap_cluster() attempts to acquire the mmap
502semaphore of the associated mm_struct for read without blocking. If this
503attempt is successful and the vma is VM_LOCKED, try_to_unmap_cluster() will
504retain the mmap semaphore for the scan; otherwise it drops it here. Then,
505for each page in the cluster, if we're holding the mmap semaphore for a locked
506vma, try_to_unmap_cluster() calls mlock_vma_page() to mlock the page. This
507call is a no-op if the page is already locked, but will mlock any pages in
508the non-linear mapping that happen to be unlocked. If one of the pages so
509mlocked is the page passed in to try_to_unmap(), try_to_unmap_cluster() will
510return SWAP_MLOCK, rather than the default SWAP_AGAIN. This will allow vmscan
511to cull the page, rather than recirculating it on the inactive list. Again,
512if try_to_unmap_cluster() cannot acquire the vma's mmap sem, it returns
513SWAP_AGAIN, indicating that the page is mapped by a VM_LOCKED vma, but
514couldn't be mlocked.
515
516
517Mlocked pages: try_to_munlock() Reverse Map Scan
518
519TODO/FIXME: a better name might be page_mlocked()--analogous to the
520page_referenced() reverse map walker--especially if we continue to call this
521from shrink_page_list(). See related TODO/FIXME below.
522
523When munlock_vma_page()--see "Mlocked Pages: munlock()/munlockall() System
524Call Handling" above--tries to munlock a page, or when shrink_page_list()
525encounters an anonymous page that is not yet in the swap cache, they need to
526determine whether or not the page is mapped by any VM_LOCKED vma, without
527actually attempting to unmap all ptes from the page. For this purpose, the
528unevictable/mlock infrastructure introduced a variant of try_to_unmap() called
529try_to_munlock().
530
531try_to_munlock() calls the same functions as try_to_unmap() for anonymous and
532mapped file pages with an additional argument specifing unlock versus unmap
533processing. Again, these functions walk the respective reverse maps looking
534for VM_LOCKED vmas. When such a vma is found for anonymous pages and file
535pages mapped in linear VMAs, as in the try_to_unmap() case, the functions
536attempt to acquire the associated mmap semphore, mlock the page via
537mlock_vma_page() and return SWAP_MLOCK. This effectively undoes the
538pre-clearing of the page's PG_mlocked done by munlock_vma_page() and informs
539shrink_page_list() that the anonymous page should be culled rather than added
540to the swap cache in preparation for a try_to_unmap() that will almost
541certainly fail.
542
543If try_to_unmap() is unable to acquire a VM_LOCKED vma's associated mmap
544semaphore, it will return SWAP_AGAIN. This will allow shrink_page_list()
545to recycle the page on the inactive list and hope that it has better luck
546with the page next time.
547
548For file pages mapped into non-linear vmas, the try_to_munlock() logic works
549slightly differently. On encountering a VM_LOCKED non-linear vma that might
550map the page, try_to_munlock() returns SWAP_AGAIN without actually mlocking
551the page. munlock_vma_page() will just leave the page unlocked and let
552vmscan deal with it--the usual fallback position.
553
554Note that try_to_munlock()'s reverse map walk must visit every vma in a pages'
555reverse map to determine that a page is NOT mapped into any VM_LOCKED vma.
556However, the scan can terminate when it encounters a VM_LOCKED vma and can
557successfully acquire the vma's mmap semphore for read and mlock the page.
558Although try_to_munlock() can be called many [very many!] times when
559munlock()ing a large region or tearing down a large address space that has been
560mlocked via mlockall(), overall this is a fairly rare event. In addition,
561although shrink_page_list() calls try_to_munlock() for every anonymous page that
562it handles that is not yet in the swap cache, on average anonymous pages will
563have very short reverse map lists.
564
565Mlocked Page: Page Reclaim in shrink_*_list()
566
567shrink_active_list() culls any obviously unevictable pages--i.e.,
568!page_evictable(page, NULL)--diverting these to the unevictable lru
569list. However, shrink_active_list() only sees unevictable pages that
570made it onto the active/inactive lru lists. Note that these pages do not
571have PageUnevictable set--otherwise, they would be on the unevictable list and
572shrink_active_list would never see them.
573
574Some examples of these unevictable pages on the LRU lists are:
575
5761) ramfs pages that have been placed on the lru lists when first allocated.
577
5782) SHM_LOCKed shared memory pages. shmctl(SHM_LOCK) does not attempt to
579 allocate or fault in the pages in the shared memory region. This happens
580 when an application accesses the page the first time after SHM_LOCKing
581 the segment.
582
5833) Mlocked pages that could not be isolated from the lru and moved to the
584 unevictable list in mlock_vma_page().
585
5863) Pages mapped into multiple VM_LOCKED vmas, but try_to_munlock() couldn't
587 acquire the vma's mmap semaphore to test the flags and set PageMlocked.
588 munlock_vma_page() was forced to let the page back on to the normal
589 LRU list for vmscan to handle.
590
591shrink_inactive_list() also culls any unevictable pages that it finds
592on the inactive lists, again diverting them to the appropriate zone's unevictable
593lru list. shrink_inactive_list() should only see SHM_LOCKed pages that became
594SHM_LOCKed after shrink_active_list() had moved them to the inactive list, or
595pages mapped into VM_LOCKED vmas that munlock_vma_page() couldn't isolate from
596the lru to recheck via try_to_munlock(). shrink_inactive_list() won't notice
597the latter, but will pass on to shrink_page_list().
598
599shrink_page_list() again culls obviously unevictable pages that it could
600encounter for similar reason to shrink_inactive_list(). As already discussed,
601shrink_page_list() proactively looks for anonymous pages that should have
602PG_mlocked set but don't--these would not be detected by page_evictable()--to
603avoid adding them to the swap cache unnecessarily. File pages mapped into
604VM_LOCKED vmas but without PG_mlocked set will make it all the way to
605try_to_unmap(). shrink_page_list() will divert them to the unevictable list when
606try_to_unmap() returns SWAP_MLOCK, as discussed above.
607
608TODO/FIXME: If we can enhance the swap cache to reliably remove entries
609with page_count(page) > 2, as long as all ptes are mapped to the page and
610not the swap entry, we can probably remove the call to try_to_munlock() in
611shrink_page_list() and just remove the page from the swap cache when
612try_to_unmap() returns SWAP_MLOCK. Currently, remove_exclusive_swap_page()
613doesn't seem to allow that.
614
615
diff --git a/arch/alpha/Kconfig b/arch/alpha/Kconfig
index a0f642b6a4b9..6110197757a3 100644
--- a/arch/alpha/Kconfig
+++ b/arch/alpha/Kconfig
@@ -70,6 +70,7 @@ config AUTO_IRQ_AFFINITY
70 default y 70 default y
71 71
72source "init/Kconfig" 72source "init/Kconfig"
73source "kernel/Kconfig.freezer"
73 74
74 75
75menu "System setup" 76menu "System setup"
diff --git a/arch/alpha/include/asm/thread_info.h b/arch/alpha/include/asm/thread_info.h
index 15fda4344424..d069526bd767 100644
--- a/arch/alpha/include/asm/thread_info.h
+++ b/arch/alpha/include/asm/thread_info.h
@@ -74,12 +74,14 @@ register struct thread_info *__current_thread_info __asm__("$8");
74#define TIF_UAC_SIGBUS 7 74#define TIF_UAC_SIGBUS 7
75#define TIF_MEMDIE 8 75#define TIF_MEMDIE 8
76#define TIF_RESTORE_SIGMASK 9 /* restore signal mask in do_signal */ 76#define TIF_RESTORE_SIGMASK 9 /* restore signal mask in do_signal */
77#define TIF_FREEZE 16 /* is freezing for suspend */
77 78
78#define _TIF_SYSCALL_TRACE (1<<TIF_SYSCALL_TRACE) 79#define _TIF_SYSCALL_TRACE (1<<TIF_SYSCALL_TRACE)
79#define _TIF_SIGPENDING (1<<TIF_SIGPENDING) 80#define _TIF_SIGPENDING (1<<TIF_SIGPENDING)
80#define _TIF_NEED_RESCHED (1<<TIF_NEED_RESCHED) 81#define _TIF_NEED_RESCHED (1<<TIF_NEED_RESCHED)
81#define _TIF_POLLING_NRFLAG (1<<TIF_POLLING_NRFLAG) 82#define _TIF_POLLING_NRFLAG (1<<TIF_POLLING_NRFLAG)
82#define _TIF_RESTORE_SIGMASK (1<<TIF_RESTORE_SIGMASK) 83#define _TIF_RESTORE_SIGMASK (1<<TIF_RESTORE_SIGMASK)
84#define _TIF_FREEZE (1<<TIF_FREEZE)
83 85
84/* Work to do on interrupt/exception return. */ 86/* Work to do on interrupt/exception return. */
85#define _TIF_WORK_MASK (_TIF_SIGPENDING | _TIF_NEED_RESCHED) 87#define _TIF_WORK_MASK (_TIF_SIGPENDING | _TIF_NEED_RESCHED)
diff --git a/arch/alpha/kernel/core_marvel.c b/arch/alpha/kernel/core_marvel.c
index 04dcc5e5d4c1..9cd8dca742a7 100644
--- a/arch/alpha/kernel/core_marvel.c
+++ b/arch/alpha/kernel/core_marvel.c
@@ -655,7 +655,7 @@ __marvel_rtc_io(u8 b, unsigned long addr, int write)
655 655
656 case 0x71: /* RTC_PORT(1) */ 656 case 0x71: /* RTC_PORT(1) */
657 rtc_access.index = index; 657 rtc_access.index = index;
658 rtc_access.data = BCD_TO_BIN(b); 658 rtc_access.data = bcd2bin(b);
659 rtc_access.function = 0x48 + !write; /* GET/PUT_TOY */ 659 rtc_access.function = 0x48 + !write; /* GET/PUT_TOY */
660 660
661#ifdef CONFIG_SMP 661#ifdef CONFIG_SMP
@@ -668,7 +668,7 @@ __marvel_rtc_io(u8 b, unsigned long addr, int write)
668#else 668#else
669 __marvel_access_rtc(&rtc_access); 669 __marvel_access_rtc(&rtc_access);
670#endif 670#endif
671 ret = BIN_TO_BCD(rtc_access.data); 671 ret = bin2bcd(rtc_access.data);
672 break; 672 break;
673 673
674 default: 674 default:
diff --git a/arch/alpha/kernel/time.c b/arch/alpha/kernel/time.c
index 75480cab0893..e6a231435cba 100644
--- a/arch/alpha/kernel/time.c
+++ b/arch/alpha/kernel/time.c
@@ -346,12 +346,12 @@ time_init(void)
346 year = CMOS_READ(RTC_YEAR); 346 year = CMOS_READ(RTC_YEAR);
347 347
348 if (!(CMOS_READ(RTC_CONTROL) & RTC_DM_BINARY) || RTC_ALWAYS_BCD) { 348 if (!(CMOS_READ(RTC_CONTROL) & RTC_DM_BINARY) || RTC_ALWAYS_BCD) {
349 BCD_TO_BIN(sec); 349 sec = bcd2bin(sec);
350 BCD_TO_BIN(min); 350 min = bcd2bin(min);
351 BCD_TO_BIN(hour); 351 hour = bcd2bin(hour);
352 BCD_TO_BIN(day); 352 day = bcd2bin(day);
353 BCD_TO_BIN(mon); 353 mon = bcd2bin(mon);
354 BCD_TO_BIN(year); 354 year = bcd2bin(year);
355 } 355 }
356 356
357 /* PC-like is standard; used for year >= 70 */ 357 /* PC-like is standard; used for year >= 70 */
@@ -525,7 +525,7 @@ set_rtc_mmss(unsigned long nowtime)
525 525
526 cmos_minutes = CMOS_READ(RTC_MINUTES); 526 cmos_minutes = CMOS_READ(RTC_MINUTES);
527 if (!(save_control & RTC_DM_BINARY) || RTC_ALWAYS_BCD) 527 if (!(save_control & RTC_DM_BINARY) || RTC_ALWAYS_BCD)
528 BCD_TO_BIN(cmos_minutes); 528 cmos_minutes = bcd2bin(cmos_minutes);
529 529
530 /* 530 /*
531 * since we're only adjusting minutes and seconds, 531 * since we're only adjusting minutes and seconds,
@@ -543,8 +543,8 @@ set_rtc_mmss(unsigned long nowtime)
543 543
544 if (abs(real_minutes - cmos_minutes) < 30) { 544 if (abs(real_minutes - cmos_minutes) < 30) {
545 if (!(save_control & RTC_DM_BINARY) || RTC_ALWAYS_BCD) { 545 if (!(save_control & RTC_DM_BINARY) || RTC_ALWAYS_BCD) {
546 BIN_TO_BCD(real_seconds); 546 real_seconds = bin2bcd(real_seconds);
547 BIN_TO_BCD(real_minutes); 547 real_minutes = bin2bcd(real_minutes);
548 } 548 }
549 CMOS_WRITE(real_seconds,RTC_SECONDS); 549 CMOS_WRITE(real_seconds,RTC_SECONDS);
550 CMOS_WRITE(real_minutes,RTC_MINUTES); 550 CMOS_WRITE(real_minutes,RTC_MINUTES);
diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
index 4853f9df37bd..df39d20f7425 100644
--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@@ -192,6 +192,8 @@ config VECTORS_BASE
192 192
193source "init/Kconfig" 193source "init/Kconfig"
194 194
195source "kernel/Kconfig.freezer"
196
195menu "System Type" 197menu "System Type"
196 198
197choice 199choice
diff --git a/arch/arm/mach-pxa/include/mach/pxa3xx_nand.h b/arch/arm/mach-pxa/include/mach/pxa3xx_nand.h
index eb4b190b6657..eb35fca9aea5 100644
--- a/arch/arm/mach-pxa/include/mach/pxa3xx_nand.h
+++ b/arch/arm/mach-pxa/include/mach/pxa3xx_nand.h
@@ -4,6 +4,43 @@
4#include <linux/mtd/mtd.h> 4#include <linux/mtd/mtd.h>
5#include <linux/mtd/partitions.h> 5#include <linux/mtd/partitions.h>
6 6
7struct pxa3xx_nand_timing {
8 unsigned int tCH; /* Enable signal hold time */
9 unsigned int tCS; /* Enable signal setup time */
10 unsigned int tWH; /* ND_nWE high duration */
11 unsigned int tWP; /* ND_nWE pulse time */
12 unsigned int tRH; /* ND_nRE high duration */
13 unsigned int tRP; /* ND_nRE pulse width */
14 unsigned int tR; /* ND_nWE high to ND_nRE low for read */
15 unsigned int tWHR; /* ND_nWE high to ND_nRE low for status read */
16 unsigned int tAR; /* ND_ALE low to ND_nRE low delay */
17};
18
19struct pxa3xx_nand_cmdset {
20 uint16_t read1;
21 uint16_t read2;
22 uint16_t program;
23 uint16_t read_status;
24 uint16_t read_id;
25 uint16_t erase;
26 uint16_t reset;
27 uint16_t lock;
28 uint16_t unlock;
29 uint16_t lock_status;
30};
31
32struct pxa3xx_nand_flash {
33 const struct pxa3xx_nand_timing *timing; /* NAND Flash timing */
34 const struct pxa3xx_nand_cmdset *cmdset;
35
36 uint32_t page_per_block;/* Pages per block (PG_PER_BLK) */
37 uint32_t page_size; /* Page size in bytes (PAGE_SZ) */
38 uint32_t flash_width; /* Width of Flash memory (DWIDTH_M) */
39 uint32_t dfc_width; /* Width of flash controller(DWIDTH_C) */
40 uint32_t num_blocks; /* Number of physical blocks in Flash */
41 uint32_t chip_id;
42};
43
7struct pxa3xx_nand_platform_data { 44struct pxa3xx_nand_platform_data {
8 45
9 /* the data flash bus is shared between the Static Memory 46 /* the data flash bus is shared between the Static Memory
@@ -12,8 +49,11 @@ struct pxa3xx_nand_platform_data {
12 */ 49 */
13 int enable_arbiter; 50 int enable_arbiter;
14 51
15 struct mtd_partition *parts; 52 const struct mtd_partition *parts;
16 unsigned int nr_parts; 53 unsigned int nr_parts;
54
55 const struct pxa3xx_nand_flash * flash;
56 size_t num_flash;
17}; 57};
18 58
19extern void pxa3xx_set_nand_info(struct pxa3xx_nand_platform_data *info); 59extern void pxa3xx_set_nand_info(struct pxa3xx_nand_platform_data *info);
diff --git a/arch/arm/plat-mxc/include/mach/mxc_nand.h b/arch/arm/plat-mxc/include/mach/mxc_nand.h
new file mode 100644
index 000000000000..2b972df22d12
--- /dev/null
+++ b/arch/arm/plat-mxc/include/mach/mxc_nand.h
@@ -0,0 +1,27 @@
1/*
2 * Copyright 2004-2007 Freescale Semiconductor, Inc. All Rights Reserved.
3 * Copyright 2008 Sascha Hauer, kernel@pengutronix.de
4 *
5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU General Public License
7 * as published by the Free Software Foundation; either version 2
8 * of the License, or (at your option) any later version.
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write to the Free Software
16 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
17 * MA 02110-1301, USA.
18 */
19
20#ifndef __ASM_ARCH_NAND_H
21#define __ASM_ARCH_NAND_H
22
23struct mxc_nand_platform_data {
24 int width; /* data bus width in bytes */
25 int hw_ecc; /* 0 if supress hardware ECC */
26};
27#endif /* __ASM_ARCH_NAND_H */
diff --git a/arch/arm/plat-omap/include/mach/onenand.h b/arch/arm/plat-omap/include/mach/onenand.h
index d57f20226b28..4649d302c263 100644
--- a/arch/arm/plat-omap/include/mach/onenand.h
+++ b/arch/arm/plat-omap/include/mach/onenand.h
@@ -16,6 +16,10 @@ struct omap_onenand_platform_data {
16 int gpio_irq; 16 int gpio_irq;
17 struct mtd_partition *parts; 17 struct mtd_partition *parts;
18 int nr_parts; 18 int nr_parts;
19 int (*onenand_setup)(void __iomem *); 19 int (*onenand_setup)(void __iomem *, int freq);
20 int dma_channel; 20 int dma_channel;
21}; 21};
22
23int omap2_onenand_rephase(void);
24
25#define ONENAND_MAX_PARTITIONS 8
diff --git a/arch/avr32/Kconfig b/arch/avr32/Kconfig
index 7c239a916275..33a5b2969eb4 100644
--- a/arch/avr32/Kconfig
+++ b/arch/avr32/Kconfig
@@ -72,6 +72,8 @@ config GENERIC_BUG
72 72
73source "init/Kconfig" 73source "init/Kconfig"
74 74
75source "kernel/Kconfig.freezer"
76
75menu "System Type and features" 77menu "System Type and features"
76 78
77source "kernel/time/Kconfig" 79source "kernel/time/Kconfig"
diff --git a/arch/avr32/include/asm/thread_info.h b/arch/avr32/include/asm/thread_info.h
index 294b25f9323d..4442f8d2d423 100644
--- a/arch/avr32/include/asm/thread_info.h
+++ b/arch/avr32/include/asm/thread_info.h
@@ -96,6 +96,7 @@ static inline struct thread_info *current_thread_info(void)
96#define _TIF_MEMDIE (1 << TIF_MEMDIE) 96#define _TIF_MEMDIE (1 << TIF_MEMDIE)
97#define _TIF_RESTORE_SIGMASK (1 << TIF_RESTORE_SIGMASK) 97#define _TIF_RESTORE_SIGMASK (1 << TIF_RESTORE_SIGMASK)
98#define _TIF_CPU_GOING_TO_SLEEP (1 << TIF_CPU_GOING_TO_SLEEP) 98#define _TIF_CPU_GOING_TO_SLEEP (1 << TIF_CPU_GOING_TO_SLEEP)
99#define _TIF_FREEZE (1 << TIF_FREEZE)
99 100
100/* Note: The masks below must never span more than 16 bits! */ 101/* Note: The masks below must never span more than 16 bits! */
101 102
diff --git a/arch/blackfin/Kconfig b/arch/blackfin/Kconfig
index 8102c79aaa94..29e71ed6b8a7 100644
--- a/arch/blackfin/Kconfig
+++ b/arch/blackfin/Kconfig
@@ -64,8 +64,11 @@ config HARDWARE_PM
64 depends on OPROFILE 64 depends on OPROFILE
65 65
66source "init/Kconfig" 66source "init/Kconfig"
67
67source "kernel/Kconfig.preempt" 68source "kernel/Kconfig.preempt"
68 69
70source "kernel/Kconfig.freezer"
71
69menu "Blackfin Processor Options" 72menu "Blackfin Processor Options"
70 73
71comment "Processor and Board Settings" 74comment "Processor and Board Settings"
diff --git a/arch/cris/Kconfig b/arch/cris/Kconfig
index 9389d38f222f..07335e719bf8 100644
--- a/arch/cris/Kconfig
+++ b/arch/cris/Kconfig
@@ -62,6 +62,8 @@ config HZ
62 62
63source "init/Kconfig" 63source "init/Kconfig"
64 64
65source "kernel/Kconfig.freezer"
66
65menu "General setup" 67menu "General setup"
66 68
67source "fs/Kconfig.binfmt" 69source "fs/Kconfig.binfmt"
diff --git a/arch/cris/arch-v10/drivers/ds1302.c b/arch/cris/arch-v10/drivers/ds1302.c
index c9aa3904be05..3bdfaf43390c 100644
--- a/arch/cris/arch-v10/drivers/ds1302.c
+++ b/arch/cris/arch-v10/drivers/ds1302.c
@@ -215,12 +215,12 @@ get_rtc_time(struct rtc_time *rtc_tm)
215 215
216 local_irq_restore(flags); 216 local_irq_restore(flags);
217 217
218 BCD_TO_BIN(rtc_tm->tm_sec); 218 rtc_tm->tm_sec = bcd2bin(rtc_tm->tm_sec);
219 BCD_TO_BIN(rtc_tm->tm_min); 219 rtc_tm->tm_min = bcd2bin(rtc_tm->tm_min);
220 BCD_TO_BIN(rtc_tm->tm_hour); 220 rtc_tm->tm_hour = bcd2bin(rtc_tm->tm_hour);
221 BCD_TO_BIN(rtc_tm->tm_mday); 221 rtc_tm->tm_mday = bcd2bin(rtc_tm->tm_mday);
222 BCD_TO_BIN(rtc_tm->tm_mon); 222 rtc_tm->tm_mon = bcd2bin(rtc_tm->tm_mon);
223 BCD_TO_BIN(rtc_tm->tm_year); 223 rtc_tm->tm_year = bcd2bin(rtc_tm->tm_year);
224 224
225 /* 225 /*
226 * Account for differences between how the RTC uses the values 226 * Account for differences between how the RTC uses the values
@@ -295,12 +295,12 @@ rtc_ioctl(struct inode *inode, struct file *file, unsigned int cmd,
295 else 295 else
296 yrs -= 1900; /* RTC (70, 71, ... 99) */ 296 yrs -= 1900; /* RTC (70, 71, ... 99) */
297 297
298 BIN_TO_BCD(sec); 298 sec = bin2bcd(sec);
299 BIN_TO_BCD(min); 299 min = bin2bcd(min);
300 BIN_TO_BCD(hrs); 300 hrs = bin2bcd(hrs);
301 BIN_TO_BCD(day); 301 day = bin2bcd(day);
302 BIN_TO_BCD(mon); 302 mon = bin2bcd(mon);
303 BIN_TO_BCD(yrs); 303 yrs = bin2bcd(yrs);
304 304
305 local_irq_save(flags); 305 local_irq_save(flags);
306 CMOS_WRITE(yrs, RTC_YEAR); 306 CMOS_WRITE(yrs, RTC_YEAR);
diff --git a/arch/cris/arch-v10/drivers/pcf8563.c b/arch/cris/arch-v10/drivers/pcf8563.c
index 8769dc914073..1e90c1a9c849 100644
--- a/arch/cris/arch-v10/drivers/pcf8563.c
+++ b/arch/cris/arch-v10/drivers/pcf8563.c
@@ -122,7 +122,7 @@ get_rtc_time(struct rtc_time *tm)
122 "information is no longer guaranteed!\n", PCF8563_NAME); 122 "information is no longer guaranteed!\n", PCF8563_NAME);
123 } 123 }
124 124
125 tm->tm_year = BCD_TO_BIN(tm->tm_year) + 125 tm->tm_year = bcd2bin(tm->tm_year) +
126 ((tm->tm_mon & 0x80) ? 100 : 0); 126 ((tm->tm_mon & 0x80) ? 100 : 0);
127 tm->tm_sec &= 0x7F; 127 tm->tm_sec &= 0x7F;
128 tm->tm_min &= 0x7F; 128 tm->tm_min &= 0x7F;
@@ -131,11 +131,11 @@ get_rtc_time(struct rtc_time *tm)
131 tm->tm_wday &= 0x07; /* Not coded in BCD. */ 131 tm->tm_wday &= 0x07; /* Not coded in BCD. */
132 tm->tm_mon &= 0x1F; 132 tm->tm_mon &= 0x1F;
133 133
134 BCD_TO_BIN(tm->tm_sec); 134 tm->tm_sec = bcd2bin(tm->tm_sec);
135 BCD_TO_BIN(tm->tm_min); 135 tm->tm_min = bcd2bin(tm->tm_min);
136 BCD_TO_BIN(tm->tm_hour); 136 tm->tm_hour = bcd2bin(tm->tm_hour);
137 BCD_TO_BIN(tm->tm_mday); 137 tm->tm_mday = bcd2bin(tm->tm_mday);
138 BCD_TO_BIN(tm->tm_mon); 138 tm->tm_mon = bcd2bin(tm->tm_mon);
139 tm->tm_mon--; /* Month is 1..12 in RTC but 0..11 in linux */ 139 tm->tm_mon--; /* Month is 1..12 in RTC but 0..11 in linux */
140} 140}
141 141
@@ -282,12 +282,12 @@ int pcf8563_ioctl(struct inode *inode, struct file *filp, unsigned int cmd,
282 century = (tm.tm_year >= 2000) ? 0x80 : 0; 282 century = (tm.tm_year >= 2000) ? 0x80 : 0;
283 tm.tm_year = tm.tm_year % 100; 283 tm.tm_year = tm.tm_year % 100;
284 284
285 BIN_TO_BCD(tm.tm_year); 285 tm.tm_year = bin2bcd(tm.tm_year);
286 BIN_TO_BCD(tm.tm_mon); 286 tm.tm_mon = bin2bcd(tm.tm_mon);
287 BIN_TO_BCD(tm.tm_mday); 287 tm.tm_mday = bin2bcd(tm.tm_mday);
288 BIN_TO_BCD(tm.tm_hour); 288 tm.tm_hour = bin2bcd(tm.tm_hour);
289 BIN_TO_BCD(tm.tm_min); 289 tm.tm_min = bin2bcd(tm.tm_min);
290 BIN_TO_BCD(tm.tm_sec); 290 tm.tm_sec = bin2bcd(tm.tm_sec);
291 tm.tm_mon |= century; 291 tm.tm_mon |= century;
292 292
293 mutex_lock(&rtc_lock); 293 mutex_lock(&rtc_lock);
diff --git a/arch/cris/arch-v32/drivers/pcf8563.c b/arch/cris/arch-v32/drivers/pcf8563.c
index f263ab571221..f4478506e52c 100644
--- a/arch/cris/arch-v32/drivers/pcf8563.c
+++ b/arch/cris/arch-v32/drivers/pcf8563.c
@@ -118,7 +118,7 @@ get_rtc_time(struct rtc_time *tm)
118 "information is no longer guaranteed!\n", PCF8563_NAME); 118 "information is no longer guaranteed!\n", PCF8563_NAME);
119 } 119 }
120 120
121 tm->tm_year = BCD_TO_BIN(tm->tm_year) + 121 tm->tm_year = bcd2bin(tm->tm_year) +
122 ((tm->tm_mon & 0x80) ? 100 : 0); 122 ((tm->tm_mon & 0x80) ? 100 : 0);
123 tm->tm_sec &= 0x7F; 123 tm->tm_sec &= 0x7F;
124 tm->tm_min &= 0x7F; 124 tm->tm_min &= 0x7F;
@@ -127,11 +127,11 @@ get_rtc_time(struct rtc_time *tm)
127 tm->tm_wday &= 0x07; /* Not coded in BCD. */ 127 tm->tm_wday &= 0x07; /* Not coded in BCD. */
128 tm->tm_mon &= 0x1F; 128 tm->tm_mon &= 0x1F;
129 129
130 BCD_TO_BIN(tm->tm_sec); 130 tm->tm_sec = bcd2bin(tm->tm_sec);
131 BCD_TO_BIN(tm->tm_min); 131 tm->tm_min = bcd2bin(tm->tm_min);
132 BCD_TO_BIN(tm->tm_hour); 132 tm->tm_hour = bcd2bin(tm->tm_hour);
133 BCD_TO_BIN(tm->tm_mday); 133 tm->tm_mday = bcd2bin(tm->tm_mday);
134 BCD_TO_BIN(tm->tm_mon); 134 tm->tm_mon = bcd2bin(tm->tm_mon);
135 tm->tm_mon--; /* Month is 1..12 in RTC but 0..11 in linux */ 135 tm->tm_mon--; /* Month is 1..12 in RTC but 0..11 in linux */
136} 136}
137 137
@@ -279,12 +279,12 @@ int pcf8563_ioctl(struct inode *inode, struct file *filp, unsigned int cmd,
279 century = (tm.tm_year >= 2000) ? 0x80 : 0; 279 century = (tm.tm_year >= 2000) ? 0x80 : 0;
280 tm.tm_year = tm.tm_year % 100; 280 tm.tm_year = tm.tm_year % 100;
281 281
282 BIN_TO_BCD(tm.tm_year); 282 tm.tm_year = bin2bcd(tm.tm_year);
283 BIN_TO_BCD(tm.tm_mon); 283 tm.tm_mon = bin2bcd(tm.tm_mon);
284 BIN_TO_BCD(tm.tm_mday); 284 tm.tm_mday = bin2bcd(tm.tm_mday);
285 BIN_TO_BCD(tm.tm_hour); 285 tm.tm_hour = bin2bcd(tm.tm_hour);
286 BIN_TO_BCD(tm.tm_min); 286 tm.tm_min = bin2bcd(tm.tm_min);
287 BIN_TO_BCD(tm.tm_sec); 287 tm.tm_sec = bin2bcd(tm.tm_sec);
288 tm.tm_mon |= century; 288 tm.tm_mon |= century;
289 289
290 mutex_lock(&rtc_lock); 290 mutex_lock(&rtc_lock);
diff --git a/arch/cris/kernel/time.c b/arch/cris/kernel/time.c
index ff4c6aa75def..074fe7dea96b 100644
--- a/arch/cris/kernel/time.c
+++ b/arch/cris/kernel/time.c
@@ -127,7 +127,7 @@ int set_rtc_mmss(unsigned long nowtime)
127 return 0; 127 return 0;
128 128
129 cmos_minutes = CMOS_READ(RTC_MINUTES); 129 cmos_minutes = CMOS_READ(RTC_MINUTES);
130 BCD_TO_BIN(cmos_minutes); 130 cmos_minutes = bcd2bin(cmos_minutes);
131 131
132 /* 132 /*
133 * since we're only adjusting minutes and seconds, 133 * since we're only adjusting minutes and seconds,
@@ -142,8 +142,8 @@ int set_rtc_mmss(unsigned long nowtime)
142 real_minutes %= 60; 142 real_minutes %= 60;
143 143
144 if (abs(real_minutes - cmos_minutes) < 30) { 144 if (abs(real_minutes - cmos_minutes) < 30) {
145 BIN_TO_BCD(real_seconds); 145 real_seconds = bin2bcd(real_seconds);
146 BIN_TO_BCD(real_minutes); 146 real_minutes = bin2bcd(real_minutes);
147 CMOS_WRITE(real_seconds,RTC_SECONDS); 147 CMOS_WRITE(real_seconds,RTC_SECONDS);
148 CMOS_WRITE(real_minutes,RTC_MINUTES); 148 CMOS_WRITE(real_minutes,RTC_MINUTES);
149 } else { 149 } else {
@@ -170,12 +170,12 @@ get_cmos_time(void)
170 mon = CMOS_READ(RTC_MONTH); 170 mon = CMOS_READ(RTC_MONTH);
171 year = CMOS_READ(RTC_YEAR); 171 year = CMOS_READ(RTC_YEAR);
172 172
173 BCD_TO_BIN(sec); 173 sec = bcd2bin(sec);
174 BCD_TO_BIN(min); 174 min = bcd2bin(min);
175 BCD_TO_BIN(hour); 175 hour = bcd2bin(hour);
176 BCD_TO_BIN(day); 176 day = bcd2bin(day);
177 BCD_TO_BIN(mon); 177 mon = bcd2bin(mon);
178 BCD_TO_BIN(year); 178 year = bcd2bin(year);
179 179
180 if ((year += 1900) < 1970) 180 if ((year += 1900) < 1970)
181 year += 100; 181 year += 100;
diff --git a/arch/frv/Kconfig b/arch/frv/Kconfig
index a5aac1b07562..9d1552a9ee2c 100644
--- a/arch/frv/Kconfig
+++ b/arch/frv/Kconfig
@@ -66,6 +66,8 @@ mainmenu "Fujitsu FR-V Kernel Configuration"
66 66
67source "init/Kconfig" 67source "init/Kconfig"
68 68
69source "kernel/Kconfig.freezer"
70
69 71
70menu "Fujitsu FR-V system setup" 72menu "Fujitsu FR-V system setup"
71 73
diff --git a/arch/h8300/Kconfig b/arch/h8300/Kconfig
index c7966746fbfe..bd1995403c67 100644
--- a/arch/h8300/Kconfig
+++ b/arch/h8300/Kconfig
@@ -90,6 +90,8 @@ config HZ
90 90
91source "init/Kconfig" 91source "init/Kconfig"
92 92
93source "kernel/Kconfig.freezer"
94
93source "arch/h8300/Kconfig.cpu" 95source "arch/h8300/Kconfig.cpu"
94 96
95menu "Executable file formats" 97menu "Executable file formats"
diff --git a/arch/h8300/include/asm/thread_info.h b/arch/h8300/include/asm/thread_info.h
index aafd4d322ec3..700014d2155f 100644
--- a/arch/h8300/include/asm/thread_info.h
+++ b/arch/h8300/include/asm/thread_info.h
@@ -89,6 +89,7 @@ static inline struct thread_info *current_thread_info(void)
89 TIF_NEED_RESCHED */ 89 TIF_NEED_RESCHED */
90#define TIF_MEMDIE 4 90#define TIF_MEMDIE 4
91#define TIF_RESTORE_SIGMASK 5 /* restore signal mask in do_signal() */ 91#define TIF_RESTORE_SIGMASK 5 /* restore signal mask in do_signal() */
92#define TIF_FREEZE 16 /* is freezing for suspend */
92 93
93/* as above, but as bit values */ 94/* as above, but as bit values */
94#define _TIF_SYSCALL_TRACE (1<<TIF_SYSCALL_TRACE) 95#define _TIF_SYSCALL_TRACE (1<<TIF_SYSCALL_TRACE)
@@ -96,6 +97,7 @@ static inline struct thread_info *current_thread_info(void)
96#define _TIF_NEED_RESCHED (1<<TIF_NEED_RESCHED) 97#define _TIF_NEED_RESCHED (1<<TIF_NEED_RESCHED)
97#define _TIF_POLLING_NRFLAG (1<<TIF_POLLING_NRFLAG) 98#define _TIF_POLLING_NRFLAG (1<<TIF_POLLING_NRFLAG)
98#define _TIF_RESTORE_SIGMASK (1<<TIF_RESTORE_SIGMASK) 99#define _TIF_RESTORE_SIGMASK (1<<TIF_RESTORE_SIGMASK)
100#define _TIF_FREEZE (1<<TIF_FREEZE)
99 101
100#define _TIF_WORK_MASK 0x0000FFFE /* work to do on interrupt/exception return */ 102#define _TIF_WORK_MASK 0x0000FFFE /* work to do on interrupt/exception return */
101 103
diff --git a/arch/ia64/Kconfig b/arch/ia64/Kconfig
index 3b7aa38254a8..912c57db2d21 100644
--- a/arch/ia64/Kconfig
+++ b/arch/ia64/Kconfig
@@ -7,6 +7,8 @@ mainmenu "IA-64 Linux Kernel Configuration"
7 7
8source "init/Kconfig" 8source "init/Kconfig"
9 9
10source "kernel/Kconfig.freezer"
11
10menu "Processor type and features" 12menu "Processor type and features"
11 13
12config IA64 14config IA64
diff --git a/arch/ia64/hp/common/sba_iommu.c b/arch/ia64/hp/common/sba_iommu.c
index 4956be40d7b5..d98f0f4ff83f 100644
--- a/arch/ia64/hp/common/sba_iommu.c
+++ b/arch/ia64/hp/common/sba_iommu.c
@@ -2070,14 +2070,13 @@ sba_init(void)
2070 if (!ia64_platform_is("hpzx1") && !ia64_platform_is("hpzx1_swiotlb")) 2070 if (!ia64_platform_is("hpzx1") && !ia64_platform_is("hpzx1_swiotlb"))
2071 return 0; 2071 return 0;
2072 2072
2073#if defined(CONFIG_IA64_GENERIC) && defined(CONFIG_CRASH_DUMP) && \ 2073#if defined(CONFIG_IA64_GENERIC)
2074 defined(CONFIG_PROC_FS)
2075 /* If we are booting a kdump kernel, the sba_iommu will 2074 /* If we are booting a kdump kernel, the sba_iommu will
2076 * cause devices that were not shutdown properly to MCA 2075 * cause devices that were not shutdown properly to MCA
2077 * as soon as they are turned back on. Our only option for 2076 * as soon as they are turned back on. Our only option for
2078 * a successful kdump kernel boot is to use the swiotlb. 2077 * a successful kdump kernel boot is to use the swiotlb.
2079 */ 2078 */
2080 if (elfcorehdr_addr < ELFCORE_ADDR_MAX) { 2079 if (is_kdump_kernel()) {
2081 if (swiotlb_late_init_with_default_size(64 * (1<<20)) != 0) 2080 if (swiotlb_late_init_with_default_size(64 * (1<<20)) != 0)
2082 panic("Unable to initialize software I/O TLB:" 2081 panic("Unable to initialize software I/O TLB:"
2083 " Try machvec=dig boot option"); 2082 " Try machvec=dig boot option");
diff --git a/arch/ia64/kernel/crash_dump.c b/arch/ia64/kernel/crash_dump.c
index da60e90eeeb1..23e91290e41f 100644
--- a/arch/ia64/kernel/crash_dump.c
+++ b/arch/ia64/kernel/crash_dump.c
@@ -8,10 +8,14 @@
8 8
9#include <linux/errno.h> 9#include <linux/errno.h>
10#include <linux/types.h> 10#include <linux/types.h>
11#include <linux/crash_dump.h>
11 12
12#include <asm/page.h> 13#include <asm/page.h>
13#include <asm/uaccess.h> 14#include <asm/uaccess.h>
14 15
16/* Stores the physical address of elf header of crash image. */
17unsigned long long elfcorehdr_addr = ELFCORE_ADDR_MAX;
18
15/** 19/**
16 * copy_oldmem_page - copy one page from "oldmem" 20 * copy_oldmem_page - copy one page from "oldmem"
17 * @pfn: page frame number to be copied 21 * @pfn: page frame number to be copied
diff --git a/arch/ia64/kernel/efi.c b/arch/ia64/kernel/efi.c
index 51b75cea7018..efaff15d8cf1 100644
--- a/arch/ia64/kernel/efi.c
+++ b/arch/ia64/kernel/efi.c
@@ -1335,7 +1335,7 @@ kdump_find_rsvd_region (unsigned long size, struct rsvd_region *r, int n)
1335} 1335}
1336#endif 1336#endif
1337 1337
1338#ifdef CONFIG_PROC_VMCORE 1338#ifdef CONFIG_CRASH_DUMP
1339/* locate the size find a the descriptor at a certain address */ 1339/* locate the size find a the descriptor at a certain address */
1340unsigned long __init 1340unsigned long __init
1341vmcore_find_descriptor_size (unsigned long address) 1341vmcore_find_descriptor_size (unsigned long address)
diff --git a/arch/ia64/kernel/setup.c b/arch/ia64/kernel/setup.c
index de636b215677..916ba898237f 100644
--- a/arch/ia64/kernel/setup.c
+++ b/arch/ia64/kernel/setup.c
@@ -352,7 +352,7 @@ reserve_memory (void)
352 } 352 }
353#endif 353#endif
354 354
355#ifdef CONFIG_PROC_VMCORE 355#ifdef CONFIG_CRASH_KERNEL
356 if (reserve_elfcorehdr(&rsvd_region[n].start, 356 if (reserve_elfcorehdr(&rsvd_region[n].start,
357 &rsvd_region[n].end) == 0) 357 &rsvd_region[n].end) == 0)
358 n++; 358 n++;
@@ -478,7 +478,12 @@ static __init int setup_nomca(char *s)
478} 478}
479early_param("nomca", setup_nomca); 479early_param("nomca", setup_nomca);
480 480
481#ifdef CONFIG_PROC_VMCORE 481/*
482 * Note: elfcorehdr_addr is not just limited to vmcore. It is also used by
483 * is_kdump_kernel() to determine if we are booting after a panic. Hence
484 * ifdef it under CONFIG_CRASH_DUMP and not CONFIG_PROC_VMCORE.
485 */
486#ifdef CONFIG_CRASH_DUMP
482/* elfcorehdr= specifies the location of elf core header 487/* elfcorehdr= specifies the location of elf core header
483 * stored by the crashed kernel. 488 * stored by the crashed kernel.
484 */ 489 */
@@ -502,11 +507,11 @@ int __init reserve_elfcorehdr(unsigned long *start, unsigned long *end)
502 * to work properly. 507 * to work properly.
503 */ 508 */
504 509
505 if (elfcorehdr_addr >= ELFCORE_ADDR_MAX) 510 if (!is_vmcore_usable())
506 return -EINVAL; 511 return -EINVAL;
507 512
508 if ((length = vmcore_find_descriptor_size(elfcorehdr_addr)) == 0) { 513 if ((length = vmcore_find_descriptor_size(elfcorehdr_addr)) == 0) {
509 elfcorehdr_addr = ELFCORE_ADDR_MAX; 514 vmcore_unusable();
510 return -EINVAL; 515 return -EINVAL;
511 } 516 }
512 517
diff --git a/arch/ia64/mm/init.c b/arch/ia64/mm/init.c
index f482a9098e32..054bcd9439aa 100644
--- a/arch/ia64/mm/init.c
+++ b/arch/ia64/mm/init.c
@@ -700,23 +700,6 @@ int arch_add_memory(int nid, u64 start, u64 size)
700 700
701 return ret; 701 return ret;
702} 702}
703#ifdef CONFIG_MEMORY_HOTREMOVE
704int remove_memory(u64 start, u64 size)
705{
706 unsigned long start_pfn, end_pfn;
707 unsigned long timeout = 120 * HZ;
708 int ret;
709 start_pfn = start >> PAGE_SHIFT;
710 end_pfn = start_pfn + (size >> PAGE_SHIFT);
711 ret = offline_pages(start_pfn, end_pfn, timeout);
712 if (ret)
713 goto out;
714 /* we can free mem_map at this point */
715out:
716 return ret;
717}
718EXPORT_SYMBOL_GPL(remove_memory);
719#endif /* CONFIG_MEMORY_HOTREMOVE */
720#endif 703#endif
721 704
722/* 705/*
diff --git a/arch/m32r/Kconfig b/arch/m32r/Kconfig
index 00289c178f89..dbaed4a63815 100644
--- a/arch/m32r/Kconfig
+++ b/arch/m32r/Kconfig
@@ -42,6 +42,8 @@ config HZ
42 42
43source "init/Kconfig" 43source "init/Kconfig"
44 44
45source "kernel/Kconfig.freezer"
46
45 47
46menu "Processor type and features" 48menu "Processor type and features"
47 49
diff --git a/arch/m68k/Kconfig b/arch/m68k/Kconfig
index 677c93a490f6..836fb66f080d 100644
--- a/arch/m68k/Kconfig
+++ b/arch/m68k/Kconfig
@@ -62,6 +62,8 @@ mainmenu "Linux/68k Kernel Configuration"
62 62
63source "init/Kconfig" 63source "init/Kconfig"
64 64
65source "kernel/Kconfig.freezer"
66
65menu "Platform dependent setup" 67menu "Platform dependent setup"
66 68
67config EISA 69config EISA
diff --git a/arch/m68k/bvme6000/rtc.c b/arch/m68k/bvme6000/rtc.c
index 808c9018b115..c50bec8aabb1 100644
--- a/arch/m68k/bvme6000/rtc.c
+++ b/arch/m68k/bvme6000/rtc.c
@@ -18,7 +18,6 @@
18#include <linux/poll.h> 18#include <linux/poll.h>
19#include <linux/module.h> 19#include <linux/module.h>
20#include <linux/mc146818rtc.h> /* For struct rtc_time and ioctls, etc */ 20#include <linux/mc146818rtc.h> /* For struct rtc_time and ioctls, etc */
21#include <linux/smp_lock.h>
22#include <linux/bcd.h> 21#include <linux/bcd.h>
23#include <asm/bvme6000hw.h> 22#include <asm/bvme6000hw.h>
24 23
diff --git a/arch/m68knommu/Kconfig b/arch/m68knommu/Kconfig
index 0a8998315e5e..76b66feb74df 100644
--- a/arch/m68knommu/Kconfig
+++ b/arch/m68knommu/Kconfig
@@ -75,6 +75,8 @@ config NO_IOPORT
75 75
76source "init/Kconfig" 76source "init/Kconfig"
77 77
78source "kernel/Kconfig.freezer"
79
78menu "Processor type and features" 80menu "Processor type and features"
79 81
80choice 82choice
diff --git a/arch/m68knommu/include/asm/thread_info.h b/arch/m68knommu/include/asm/thread_info.h
index 0c9bc095f3f0..82529f424ea3 100644
--- a/arch/m68knommu/include/asm/thread_info.h
+++ b/arch/m68knommu/include/asm/thread_info.h
@@ -84,12 +84,14 @@ static inline struct thread_info *current_thread_info(void)
84#define TIF_POLLING_NRFLAG 3 /* true if poll_idle() is polling 84#define TIF_POLLING_NRFLAG 3 /* true if poll_idle() is polling
85 TIF_NEED_RESCHED */ 85 TIF_NEED_RESCHED */
86#define TIF_MEMDIE 4 86#define TIF_MEMDIE 4
87#define TIF_FREEZE 16 /* is freezing for suspend */
87 88
88/* as above, but as bit values */ 89/* as above, but as bit values */
89#define _TIF_SYSCALL_TRACE (1<<TIF_SYSCALL_TRACE) 90#define _TIF_SYSCALL_TRACE (1<<TIF_SYSCALL_TRACE)
90#define _TIF_SIGPENDING (1<<TIF_SIGPENDING) 91#define _TIF_SIGPENDING (1<<TIF_SIGPENDING)
91#define _TIF_NEED_RESCHED (1<<TIF_NEED_RESCHED) 92#define _TIF_NEED_RESCHED (1<<TIF_NEED_RESCHED)
92#define _TIF_POLLING_NRFLAG (1<<TIF_POLLING_NRFLAG) 93#define _TIF_POLLING_NRFLAG (1<<TIF_POLLING_NRFLAG)
94#define _TIF_FREEZE (1<<TIF_FREEZE)
93 95
94#define _TIF_WORK_MASK 0x0000FFFE /* work to do on interrupt/exception return */ 96#define _TIF_WORK_MASK 0x0000FFFE /* work to do on interrupt/exception return */
95 97
diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig
index b905744d7915..5f149b030c0f 100644
--- a/arch/mips/Kconfig
+++ b/arch/mips/Kconfig
@@ -1885,6 +1885,8 @@ config PROBE_INITRD_HEADER
1885 add initrd or initramfs image to the kernel image. 1885 add initrd or initramfs image to the kernel image.
1886 Otherwise, say N. 1886 Otherwise, say N.
1887 1887
1888source "kernel/Kconfig.freezer"
1889
1888menu "Bus options (PCI, PCMCIA, EISA, ISA, TC)" 1890menu "Bus options (PCI, PCMCIA, EISA, ISA, TC)"
1889 1891
1890config HW_HAS_EISA 1892config HW_HAS_EISA
diff --git a/arch/mips/dec/time.c b/arch/mips/dec/time.c
index 3965fda94a89..1359c03ded51 100644
--- a/arch/mips/dec/time.c
+++ b/arch/mips/dec/time.c
@@ -45,12 +45,12 @@ unsigned long read_persistent_clock(void)
45 spin_unlock_irqrestore(&rtc_lock, flags); 45 spin_unlock_irqrestore(&rtc_lock, flags);
46 46
47 if (!(CMOS_READ(RTC_CONTROL) & RTC_DM_BINARY) || RTC_ALWAYS_BCD) { 47 if (!(CMOS_READ(RTC_CONTROL) & RTC_DM_BINARY) || RTC_ALWAYS_BCD) {
48 sec = BCD2BIN(sec); 48 sec = bcd2bin(sec);
49 min = BCD2BIN(min); 49 min = bcd2bin(min);
50 hour = BCD2BIN(hour); 50 hour = bcd2bin(hour);
51 day = BCD2BIN(day); 51 day = bcd2bin(day);
52 mon = BCD2BIN(mon); 52 mon = bcd2bin(mon);
53 year = BCD2BIN(year); 53 year = bcd2bin(year);
54 } 54 }
55 55
56 year += real_year - 72 + 2000; 56 year += real_year - 72 + 2000;
@@ -83,7 +83,7 @@ int rtc_mips_set_mmss(unsigned long nowtime)
83 83
84 cmos_minutes = CMOS_READ(RTC_MINUTES); 84 cmos_minutes = CMOS_READ(RTC_MINUTES);
85 if (!(save_control & RTC_DM_BINARY) || RTC_ALWAYS_BCD) 85 if (!(save_control & RTC_DM_BINARY) || RTC_ALWAYS_BCD)
86 cmos_minutes = BCD2BIN(cmos_minutes); 86 cmos_minutes = bcd2bin(cmos_minutes);
87 87
88 /* 88 /*
89 * since we're only adjusting minutes and seconds, 89 * since we're only adjusting minutes and seconds,
@@ -99,8 +99,8 @@ int rtc_mips_set_mmss(unsigned long nowtime)
99 99
100 if (abs(real_minutes - cmos_minutes) < 30) { 100 if (abs(real_minutes - cmos_minutes) < 30) {
101 if (!(save_control & RTC_DM_BINARY) || RTC_ALWAYS_BCD) { 101 if (!(save_control & RTC_DM_BINARY) || RTC_ALWAYS_BCD) {
102 real_seconds = BIN2BCD(real_seconds); 102 real_seconds = bin2bcd(real_seconds);
103 real_minutes = BIN2BCD(real_minutes); 103 real_minutes = bin2bcd(real_minutes);
104 } 104 }
105 CMOS_WRITE(real_seconds, RTC_SECONDS); 105 CMOS_WRITE(real_seconds, RTC_SECONDS);
106 CMOS_WRITE(real_minutes, RTC_MINUTES); 106 CMOS_WRITE(real_minutes, RTC_MINUTES);
diff --git a/arch/mips/include/asm/mc146818-time.h b/arch/mips/include/asm/mc146818-time.h
index cdc379a0a94e..199b45733a95 100644
--- a/arch/mips/include/asm/mc146818-time.h
+++ b/arch/mips/include/asm/mc146818-time.h
@@ -44,7 +44,7 @@ static inline int mc146818_set_rtc_mmss(unsigned long nowtime)
44 44
45 cmos_minutes = CMOS_READ(RTC_MINUTES); 45 cmos_minutes = CMOS_READ(RTC_MINUTES);
46 if (!(save_control & RTC_DM_BINARY) || RTC_ALWAYS_BCD) 46 if (!(save_control & RTC_DM_BINARY) || RTC_ALWAYS_BCD)
47 BCD_TO_BIN(cmos_minutes); 47 cmos_minutes = bcd2bin(cmos_minutes);
48 48
49 /* 49 /*
50 * since we're only adjusting minutes and seconds, 50 * since we're only adjusting minutes and seconds,
@@ -60,8 +60,8 @@ static inline int mc146818_set_rtc_mmss(unsigned long nowtime)
60 60
61 if (abs(real_minutes - cmos_minutes) < 30) { 61 if (abs(real_minutes - cmos_minutes) < 30) {
62 if (!(save_control & RTC_DM_BINARY) || RTC_ALWAYS_BCD) { 62 if (!(save_control & RTC_DM_BINARY) || RTC_ALWAYS_BCD) {
63 BIN_TO_BCD(real_seconds); 63 real_seconds = bin2bcd(real_seconds);
64 BIN_TO_BCD(real_minutes); 64 real_minutes = bin2bcd(real_minutes);
65 } 65 }
66 CMOS_WRITE(real_seconds, RTC_SECONDS); 66 CMOS_WRITE(real_seconds, RTC_SECONDS);
67 CMOS_WRITE(real_minutes, RTC_MINUTES); 67 CMOS_WRITE(real_minutes, RTC_MINUTES);
@@ -103,12 +103,12 @@ static inline unsigned long mc146818_get_cmos_time(void)
103 } while (sec != CMOS_READ(RTC_SECONDS)); 103 } while (sec != CMOS_READ(RTC_SECONDS));
104 104
105 if (!(CMOS_READ(RTC_CONTROL) & RTC_DM_BINARY) || RTC_ALWAYS_BCD) { 105 if (!(CMOS_READ(RTC_CONTROL) & RTC_DM_BINARY) || RTC_ALWAYS_BCD) {
106 BCD_TO_BIN(sec); 106 sec = bcd2bin(sec);
107 BCD_TO_BIN(min); 107 min = bcd2bin(min);
108 BCD_TO_BIN(hour); 108 hour = bcd2bin(hour);
109 BCD_TO_BIN(day); 109 day = bcd2bin(day);
110 BCD_TO_BIN(mon); 110 mon = bcd2bin(mon);
111 BCD_TO_BIN(year); 111 year = bcd2bin(year);
112 } 112 }
113 spin_unlock_irqrestore(&rtc_lock, flags); 113 spin_unlock_irqrestore(&rtc_lock, flags);
114 year = mc146818_decode_year(year); 114 year = mc146818_decode_year(year);
diff --git a/arch/mips/pmc-sierra/yosemite/setup.c b/arch/mips/pmc-sierra/yosemite/setup.c
index 6537d90a25bb..2d3c0dca275d 100644
--- a/arch/mips/pmc-sierra/yosemite/setup.c
+++ b/arch/mips/pmc-sierra/yosemite/setup.c
@@ -79,14 +79,14 @@ unsigned long read_persistent_clock(void)
79 /* Stop the update to the time */ 79 /* Stop the update to the time */
80 m48t37_base->control = 0x40; 80 m48t37_base->control = 0x40;
81 81
82 year = BCD2BIN(m48t37_base->year); 82 year = bcd2bin(m48t37_base->year);
83 year += BCD2BIN(m48t37_base->century) * 100; 83 year += bcd2bin(m48t37_base->century) * 100;
84 84
85 month = BCD2BIN(m48t37_base->month); 85 month = bcd2bin(m48t37_base->month);
86 day = BCD2BIN(m48t37_base->date); 86 day = bcd2bin(m48t37_base->date);
87 hour = BCD2BIN(m48t37_base->hour); 87 hour = bcd2bin(m48t37_base->hour);
88 min = BCD2BIN(m48t37_base->min); 88 min = bcd2bin(m48t37_base->min);
89 sec = BCD2BIN(m48t37_base->sec); 89 sec = bcd2bin(m48t37_base->sec);
90 90
91 /* Start the update to the time again */ 91 /* Start the update to the time again */
92 m48t37_base->control = 0x00; 92 m48t37_base->control = 0x00;
@@ -113,22 +113,22 @@ int rtc_mips_set_time(unsigned long tim)
113 m48t37_base->control = 0x80; 113 m48t37_base->control = 0x80;
114 114
115 /* year */ 115 /* year */
116 m48t37_base->year = BIN2BCD(tm.tm_year % 100); 116 m48t37_base->year = bin2bcd(tm.tm_year % 100);
117 m48t37_base->century = BIN2BCD(tm.tm_year / 100); 117 m48t37_base->century = bin2bcd(tm.tm_year / 100);
118 118
119 /* month */ 119 /* month */
120 m48t37_base->month = BIN2BCD(tm.tm_mon); 120 m48t37_base->month = bin2bcd(tm.tm_mon);
121 121
122 /* day */ 122 /* day */
123 m48t37_base->date = BIN2BCD(tm.tm_mday); 123 m48t37_base->date = bin2bcd(tm.tm_mday);
124 124
125 /* hour/min/sec */ 125 /* hour/min/sec */
126 m48t37_base->hour = BIN2BCD(tm.tm_hour); 126 m48t37_base->hour = bin2bcd(tm.tm_hour);
127 m48t37_base->min = BIN2BCD(tm.tm_min); 127 m48t37_base->min = bin2bcd(tm.tm_min);
128 m48t37_base->sec = BIN2BCD(tm.tm_sec); 128 m48t37_base->sec = bin2bcd(tm.tm_sec);
129 129
130 /* day of week -- not really used, but let's keep it up-to-date */ 130 /* day of week -- not really used, but let's keep it up-to-date */
131 m48t37_base->day = BIN2BCD(tm.tm_wday + 1); 131 m48t37_base->day = bin2bcd(tm.tm_wday + 1);
132 132
133 /* disable writing */ 133 /* disable writing */
134 m48t37_base->control = 0x00; 134 m48t37_base->control = 0x00;
diff --git a/arch/mips/sibyte/swarm/rtc_m41t81.c b/arch/mips/sibyte/swarm/rtc_m41t81.c
index 26fbff4c15b1..b732600b47f5 100644
--- a/arch/mips/sibyte/swarm/rtc_m41t81.c
+++ b/arch/mips/sibyte/swarm/rtc_m41t81.c
@@ -156,32 +156,32 @@ int m41t81_set_time(unsigned long t)
156 */ 156 */
157 157
158 spin_lock_irqsave(&rtc_lock, flags); 158 spin_lock_irqsave(&rtc_lock, flags);
159 tm.tm_sec = BIN2BCD(tm.tm_sec); 159 tm.tm_sec = bin2bcd(tm.tm_sec);
160 m41t81_write(M41T81REG_SC, tm.tm_sec); 160 m41t81_write(M41T81REG_SC, tm.tm_sec);
161 161
162 tm.tm_min = BIN2BCD(tm.tm_min); 162 tm.tm_min = bin2bcd(tm.tm_min);
163 m41t81_write(M41T81REG_MN, tm.tm_min); 163 m41t81_write(M41T81REG_MN, tm.tm_min);
164 164
165 tm.tm_hour = BIN2BCD(tm.tm_hour); 165 tm.tm_hour = bin2bcd(tm.tm_hour);
166 tm.tm_hour = (tm.tm_hour & 0x3f) | (m41t81_read(M41T81REG_HR) & 0xc0); 166 tm.tm_hour = (tm.tm_hour & 0x3f) | (m41t81_read(M41T81REG_HR) & 0xc0);
167 m41t81_write(M41T81REG_HR, tm.tm_hour); 167 m41t81_write(M41T81REG_HR, tm.tm_hour);
168 168
169 /* tm_wday starts from 0 to 6 */ 169 /* tm_wday starts from 0 to 6 */
170 if (tm.tm_wday == 0) tm.tm_wday = 7; 170 if (tm.tm_wday == 0) tm.tm_wday = 7;
171 tm.tm_wday = BIN2BCD(tm.tm_wday); 171 tm.tm_wday = bin2bcd(tm.tm_wday);
172 m41t81_write(M41T81REG_DY, tm.tm_wday); 172 m41t81_write(M41T81REG_DY, tm.tm_wday);
173 173
174 tm.tm_mday = BIN2BCD(tm.tm_mday); 174 tm.tm_mday = bin2bcd(tm.tm_mday);
175 m41t81_write(M41T81REG_DT, tm.tm_mday); 175 m41t81_write(M41T81REG_DT, tm.tm_mday);
176 176
177 /* tm_mon starts from 0, *ick* */ 177 /* tm_mon starts from 0, *ick* */
178 tm.tm_mon ++; 178 tm.tm_mon ++;
179 tm.tm_mon = BIN2BCD(tm.tm_mon); 179 tm.tm_mon = bin2bcd(tm.tm_mon);
180 m41t81_write(M41T81REG_MO, tm.tm_mon); 180 m41t81_write(M41T81REG_MO, tm.tm_mon);
181 181
182 /* we don't do century, everything is beyond 2000 */ 182 /* we don't do century, everything is beyond 2000 */
183 tm.tm_year %= 100; 183 tm.tm_year %= 100;
184 tm.tm_year = BIN2BCD(tm.tm_year); 184 tm.tm_year = bin2bcd(tm.tm_year);
185 m41t81_write(M41T81REG_YR, tm.tm_year); 185 m41t81_write(M41T81REG_YR, tm.tm_year);
186 spin_unlock_irqrestore(&rtc_lock, flags); 186 spin_unlock_irqrestore(&rtc_lock, flags);
187 187
@@ -209,12 +209,12 @@ unsigned long m41t81_get_time(void)
209 year = m41t81_read(M41T81REG_YR); 209 year = m41t81_read(M41T81REG_YR);
210 spin_unlock_irqrestore(&rtc_lock, flags); 210 spin_unlock_irqrestore(&rtc_lock, flags);
211 211
212 sec = BCD2BIN(sec); 212 sec = bcd2bin(sec);
213 min = BCD2BIN(min); 213 min = bcd2bin(min);
214 hour = BCD2BIN(hour); 214 hour = bcd2bin(hour);
215 day = BCD2BIN(day); 215 day = bcd2bin(day);
216 mon = BCD2BIN(mon); 216 mon = bcd2bin(mon);
217 year = BCD2BIN(year); 217 year = bcd2bin(year);
218 218
219 year += 2000; 219 year += 2000;
220 220
diff --git a/arch/mips/sibyte/swarm/rtc_xicor1241.c b/arch/mips/sibyte/swarm/rtc_xicor1241.c
index ff3e5dabb348..4438b2195c44 100644
--- a/arch/mips/sibyte/swarm/rtc_xicor1241.c
+++ b/arch/mips/sibyte/swarm/rtc_xicor1241.c
@@ -124,18 +124,18 @@ int xicor_set_time(unsigned long t)
124 xicor_write(X1241REG_SR, X1241REG_SR_WEL | X1241REG_SR_RWEL); 124 xicor_write(X1241REG_SR, X1241REG_SR_WEL | X1241REG_SR_RWEL);
125 125
126 /* trivial ones */ 126 /* trivial ones */
127 tm.tm_sec = BIN2BCD(tm.tm_sec); 127 tm.tm_sec = bin2bcd(tm.tm_sec);
128 xicor_write(X1241REG_SC, tm.tm_sec); 128 xicor_write(X1241REG_SC, tm.tm_sec);
129 129
130 tm.tm_min = BIN2BCD(tm.tm_min); 130 tm.tm_min = bin2bcd(tm.tm_min);
131 xicor_write(X1241REG_MN, tm.tm_min); 131 xicor_write(X1241REG_MN, tm.tm_min);
132 132
133 tm.tm_mday = BIN2BCD(tm.tm_mday); 133 tm.tm_mday = bin2bcd(tm.tm_mday);
134 xicor_write(X1241REG_DT, tm.tm_mday); 134 xicor_write(X1241REG_DT, tm.tm_mday);
135 135
136 /* tm_mon starts from 0, *ick* */ 136 /* tm_mon starts from 0, *ick* */
137 tm.tm_mon ++; 137 tm.tm_mon ++;
138 tm.tm_mon = BIN2BCD(tm.tm_mon); 138 tm.tm_mon = bin2bcd(tm.tm_mon);
139 xicor_write(X1241REG_MO, tm.tm_mon); 139 xicor_write(X1241REG_MO, tm.tm_mon);
140 140
141 /* year is split */ 141 /* year is split */
@@ -148,7 +148,7 @@ int xicor_set_time(unsigned long t)
148 tmp = xicor_read(X1241REG_HR); 148 tmp = xicor_read(X1241REG_HR);
149 if (tmp & X1241REG_HR_MIL) { 149 if (tmp & X1241REG_HR_MIL) {
150 /* 24 hour format */ 150 /* 24 hour format */
151 tm.tm_hour = BIN2BCD(tm.tm_hour); 151 tm.tm_hour = bin2bcd(tm.tm_hour);
152 tmp = (tmp & ~0x3f) | (tm.tm_hour & 0x3f); 152 tmp = (tmp & ~0x3f) | (tm.tm_hour & 0x3f);
153 } else { 153 } else {
154 /* 12 hour format, with 0x2 for pm */ 154 /* 12 hour format, with 0x2 for pm */
@@ -157,7 +157,7 @@ int xicor_set_time(unsigned long t)
157 tmp |= 0x20; 157 tmp |= 0x20;
158 tm.tm_hour -= 12; 158 tm.tm_hour -= 12;
159 } 159 }
160 tm.tm_hour = BIN2BCD(tm.tm_hour); 160 tm.tm_hour = bin2bcd(tm.tm_hour);
161 tmp |= tm.tm_hour; 161 tmp |= tm.tm_hour;
162 } 162 }
163 xicor_write(X1241REG_HR, tmp); 163 xicor_write(X1241REG_HR, tmp);
@@ -191,13 +191,13 @@ unsigned long xicor_get_time(void)
191 y2k = xicor_read(X1241REG_Y2K); 191 y2k = xicor_read(X1241REG_Y2K);
192 spin_unlock_irqrestore(&rtc_lock, flags); 192 spin_unlock_irqrestore(&rtc_lock, flags);
193 193
194 sec = BCD2BIN(sec); 194 sec = bcd2bin(sec);
195 min = BCD2BIN(min); 195 min = bcd2bin(min);
196 hour = BCD2BIN(hour); 196 hour = bcd2bin(hour);
197 day = BCD2BIN(day); 197 day = bcd2bin(day);
198 mon = BCD2BIN(mon); 198 mon = bcd2bin(mon);
199 year = BCD2BIN(year); 199 year = bcd2bin(year);
200 y2k = BCD2BIN(y2k); 200 y2k = bcd2bin(y2k);
201 201
202 year += (y2k * 100); 202 year += (y2k * 100);
203 203
diff --git a/arch/mn10300/Kconfig b/arch/mn10300/Kconfig
index dd557c9cf001..9a9f43358879 100644
--- a/arch/mn10300/Kconfig
+++ b/arch/mn10300/Kconfig
@@ -68,6 +68,8 @@ mainmenu "Matsushita MN10300/AM33 Kernel Configuration"
68 68
69source "init/Kconfig" 69source "init/Kconfig"
70 70
71source "kernel/Kconfig.freezer"
72
71 73
72menu "Matsushita MN10300 system setup" 74menu "Matsushita MN10300 system setup"
73 75
diff --git a/arch/mn10300/kernel/rtc.c b/arch/mn10300/kernel/rtc.c
index 042f792d8430..7978470b5749 100644
--- a/arch/mn10300/kernel/rtc.c
+++ b/arch/mn10300/kernel/rtc.c
@@ -67,7 +67,7 @@ static int set_rtc_mmss(unsigned long nowtime)
67 67
68 cmos_minutes = CMOS_READ(RTC_MINUTES); 68 cmos_minutes = CMOS_READ(RTC_MINUTES);
69 if (!(save_control & RTC_DM_BINARY) || RTC_ALWAYS_BCD) 69 if (!(save_control & RTC_DM_BINARY) || RTC_ALWAYS_BCD)
70 BCD_TO_BIN(cmos_minutes); 70 cmos_minutes = bcd2bin(cmos_minutes);
71 71
72 /* 72 /*
73 * since we're only adjusting minutes and seconds, 73 * since we're only adjusting minutes and seconds,
@@ -84,8 +84,8 @@ static int set_rtc_mmss(unsigned long nowtime)
84 84
85 if (abs(real_minutes - cmos_minutes) < 30) { 85 if (abs(real_minutes - cmos_minutes) < 30) {
86 if (!(save_control & RTC_DM_BINARY) || RTC_ALWAYS_BCD) { 86 if (!(save_control & RTC_DM_BINARY) || RTC_ALWAYS_BCD) {
87 BIN_TO_BCD(real_seconds); 87 real_seconds = bin2bcd(real_seconds);
88 BIN_TO_BCD(real_minutes); 88 real_minutes = bin2bcd(real_minutes);
89 } 89 }
90 CMOS_WRITE(real_seconds, RTC_SECONDS); 90 CMOS_WRITE(real_seconds, RTC_SECONDS);
91 CMOS_WRITE(real_minutes, RTC_MINUTES); 91 CMOS_WRITE(real_minutes, RTC_MINUTES);
diff --git a/arch/parisc/Kconfig b/arch/parisc/Kconfig
index 8313fccced5e..2bd1f6ef5db0 100644
--- a/arch/parisc/Kconfig
+++ b/arch/parisc/Kconfig
@@ -90,6 +90,8 @@ config ARCH_MAY_HAVE_PC_FDC
90 90
91source "init/Kconfig" 91source "init/Kconfig"
92 92
93source "kernel/Kconfig.freezer"
94
93 95
94menu "Processor type and features" 96menu "Processor type and features"
95 97
diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index 380baa1780e9..9391199d9e77 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -230,6 +230,8 @@ config PPC_OF_PLATFORM_PCI
230 230
231source "init/Kconfig" 231source "init/Kconfig"
232 232
233source "kernel/Kconfig.freezer"
234
233source "arch/powerpc/sysdev/Kconfig" 235source "arch/powerpc/sysdev/Kconfig"
234source "arch/powerpc/platforms/Kconfig" 236source "arch/powerpc/platforms/Kconfig"
235 237
diff --git a/arch/powerpc/include/asm/ps3av.h b/arch/powerpc/include/asm/ps3av.h
index fda98715cd35..5aa22cffdbd6 100644
--- a/arch/powerpc/include/asm/ps3av.h
+++ b/arch/powerpc/include/asm/ps3av.h
@@ -678,6 +678,8 @@ struct ps3av_pkt_avb_param {
678 u8 buf[PS3AV_PKT_AVB_PARAM_MAX_BUF_SIZE]; 678 u8 buf[PS3AV_PKT_AVB_PARAM_MAX_BUF_SIZE];
679}; 679};
680 680
681/* channel status */
682extern u8 ps3av_mode_cs_info[];
681 683
682/** command status **/ 684/** command status **/
683#define PS3AV_STATUS_SUCCESS 0x0000 /* success */ 685#define PS3AV_STATUS_SUCCESS 0x0000 /* success */
@@ -735,6 +737,7 @@ extern int ps3av_get_mode(void);
735extern int ps3av_video_mode2res(u32, u32 *, u32 *); 737extern int ps3av_video_mode2res(u32, u32 *, u32 *);
736extern int ps3av_video_mute(int); 738extern int ps3av_video_mute(int);
737extern int ps3av_audio_mute(int); 739extern int ps3av_audio_mute(int);
740extern int ps3av_audio_mute_analog(int);
738extern int ps3av_dev_open(void); 741extern int ps3av_dev_open(void);
739extern int ps3av_dev_close(void); 742extern int ps3av_dev_close(void);
740extern void ps3av_register_flip_ctl(void (*flip_ctl)(int on, void *data), 743extern void ps3av_register_flip_ctl(void (*flip_ctl)(int on, void *data),
diff --git a/arch/powerpc/kernel/crash_dump.c b/arch/powerpc/kernel/crash_dump.c
index a323c9b32ee1..97e056379728 100644
--- a/arch/powerpc/kernel/crash_dump.c
+++ b/arch/powerpc/kernel/crash_dump.c
@@ -27,6 +27,9 @@
27#define DBG(fmt...) 27#define DBG(fmt...)
28#endif 28#endif
29 29
30/* Stores the physical address of elf header of crash image. */
31unsigned long long elfcorehdr_addr = ELFCORE_ADDR_MAX;
32
30void __init reserve_kdump_trampoline(void) 33void __init reserve_kdump_trampoline(void)
31{ 34{
32 lmb_reserve(0, KDUMP_RESERVE_LIMIT); 35 lmb_reserve(0, KDUMP_RESERVE_LIMIT);
@@ -66,7 +69,11 @@ void __init setup_kdump_trampoline(void)
66 DBG(" <- setup_kdump_trampoline()\n"); 69 DBG(" <- setup_kdump_trampoline()\n");
67} 70}
68 71
69#ifdef CONFIG_PROC_VMCORE 72/*
73 * Note: elfcorehdr_addr is not just limited to vmcore. It is also used by
74 * is_kdump_kernel() to determine if we are booting after a panic. Hence
75 * ifdef it under CONFIG_CRASH_DUMP and not CONFIG_PROC_VMCORE.
76 */
70static int __init parse_elfcorehdr(char *p) 77static int __init parse_elfcorehdr(char *p)
71{ 78{
72 if (p) 79 if (p)
@@ -75,7 +82,6 @@ static int __init parse_elfcorehdr(char *p)
75 return 1; 82 return 1;
76} 83}
77__setup("elfcorehdr=", parse_elfcorehdr); 84__setup("elfcorehdr=", parse_elfcorehdr);
78#endif
79 85
80static int __init parse_savemaxmem(char *p) 86static int __init parse_savemaxmem(char *p)
81{ 87{
diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c
index 98d7bf99533a..b9e1a1da6e52 100644
--- a/arch/powerpc/mm/mem.c
+++ b/arch/powerpc/mm/mem.c
@@ -134,23 +134,6 @@ int arch_add_memory(int nid, u64 start, u64 size)
134 134
135 return __add_pages(zone, start_pfn, nr_pages); 135 return __add_pages(zone, start_pfn, nr_pages);
136} 136}
137
138#ifdef CONFIG_MEMORY_HOTREMOVE
139int remove_memory(u64 start, u64 size)
140{
141 unsigned long start_pfn, end_pfn;
142 int ret;
143
144 start_pfn = start >> PAGE_SHIFT;
145 end_pfn = start_pfn + (size >> PAGE_SHIFT);
146 ret = offline_pages(start_pfn, end_pfn, 120 * HZ);
147 if (ret)
148 goto out;
149 /* Arch-specific calls go here - next patch */
150out:
151 return ret;
152}
153#endif /* CONFIG_MEMORY_HOTREMOVE */
154#endif /* CONFIG_MEMORY_HOTPLUG */ 137#endif /* CONFIG_MEMORY_HOTPLUG */
155 138
156/* 139/*
diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig
index bc581d8a7cd9..70b7645ce745 100644
--- a/arch/s390/Kconfig
+++ b/arch/s390/Kconfig
@@ -78,6 +78,8 @@ config S390
78 78
79source "init/Kconfig" 79source "init/Kconfig"
80 80
81source "kernel/Kconfig.freezer"
82
81menu "Base setup" 83menu "Base setup"
82 84
83comment "Processor type and features" 85comment "Processor type and features"
diff --git a/arch/s390/include/asm/thread_info.h b/arch/s390/include/asm/thread_info.h
index ea40a9d690fc..de3fad60c682 100644
--- a/arch/s390/include/asm/thread_info.h
+++ b/arch/s390/include/asm/thread_info.h
@@ -99,6 +99,7 @@ static inline struct thread_info *current_thread_info(void)
99#define TIF_31BIT 18 /* 32bit process */ 99#define TIF_31BIT 18 /* 32bit process */
100#define TIF_MEMDIE 19 100#define TIF_MEMDIE 19
101#define TIF_RESTORE_SIGMASK 20 /* restore signal mask in do_signal() */ 101#define TIF_RESTORE_SIGMASK 20 /* restore signal mask in do_signal() */
102#define TIF_FREEZE 21 /* thread is freezing for suspend */
102 103
103#define _TIF_SYSCALL_TRACE (1<<TIF_SYSCALL_TRACE) 104#define _TIF_SYSCALL_TRACE (1<<TIF_SYSCALL_TRACE)
104#define _TIF_NOTIFY_RESUME (1<<TIF_NOTIFY_RESUME) 105#define _TIF_NOTIFY_RESUME (1<<TIF_NOTIFY_RESUME)
@@ -112,6 +113,7 @@ static inline struct thread_info *current_thread_info(void)
112#define _TIF_USEDFPU (1<<TIF_USEDFPU) 113#define _TIF_USEDFPU (1<<TIF_USEDFPU)
113#define _TIF_POLLING_NRFLAG (1<<TIF_POLLING_NRFLAG) 114#define _TIF_POLLING_NRFLAG (1<<TIF_POLLING_NRFLAG)
114#define _TIF_31BIT (1<<TIF_31BIT) 115#define _TIF_31BIT (1<<TIF_31BIT)
116#define _TIF_FREEZE (1<<TIF_FREEZE)
115 117
116#endif /* __KERNEL__ */ 118#endif /* __KERNEL__ */
117 119
diff --git a/arch/s390/mm/init.c b/arch/s390/mm/init.c
index 1169130a97ef..158b0d6d7046 100644
--- a/arch/s390/mm/init.c
+++ b/arch/s390/mm/init.c
@@ -189,14 +189,3 @@ int arch_add_memory(int nid, u64 start, u64 size)
189 return rc; 189 return rc;
190} 190}
191#endif /* CONFIG_MEMORY_HOTPLUG */ 191#endif /* CONFIG_MEMORY_HOTPLUG */
192
193#ifdef CONFIG_MEMORY_HOTREMOVE
194int remove_memory(u64 start, u64 size)
195{
196 unsigned long start_pfn, end_pfn;
197
198 start_pfn = PFN_DOWN(start);
199 end_pfn = start_pfn + PFN_DOWN(size);
200 return offline_pages(start_pfn, end_pfn, 120 * HZ);
201}
202#endif /* CONFIG_MEMORY_HOTREMOVE */
diff --git a/arch/sh/Kconfig b/arch/sh/Kconfig
index b4aa2a03e19b..cb2c87df70ce 100644
--- a/arch/sh/Kconfig
+++ b/arch/sh/Kconfig
@@ -121,6 +121,8 @@ config IO_TRAPPED
121 121
122source "init/Kconfig" 122source "init/Kconfig"
123 123
124source "kernel/Kconfig.freezer"
125
124menu "System type" 126menu "System type"
125 127
126# 128#
diff --git a/arch/sh/kernel/crash_dump.c b/arch/sh/kernel/crash_dump.c
index 4a2ecbe27d8e..95d216255565 100644
--- a/arch/sh/kernel/crash_dump.c
+++ b/arch/sh/kernel/crash_dump.c
@@ -10,6 +10,9 @@
10#include <linux/io.h> 10#include <linux/io.h>
11#include <asm/uaccess.h> 11#include <asm/uaccess.h>
12 12
13/* Stores the physical address of elf header of crash image. */
14unsigned long long elfcorehdr_addr = ELFCORE_ADDR_MAX;
15
13/** 16/**
14 * copy_oldmem_page - copy one page from "oldmem" 17 * copy_oldmem_page - copy one page from "oldmem"
15 * @pfn: page frame number to be copied 18 * @pfn: page frame number to be copied
diff --git a/arch/sparc/Kconfig b/arch/sparc/Kconfig
index 97671dac12a6..e594559c8dba 100644
--- a/arch/sparc/Kconfig
+++ b/arch/sparc/Kconfig
@@ -37,6 +37,8 @@ config HZ
37 37
38source "init/Kconfig" 38source "init/Kconfig"
39 39
40source "kernel/Kconfig.freezer"
41
40menu "General machine setup" 42menu "General machine setup"
41 43
42config SMP 44config SMP
diff --git a/arch/sparc/include/asm/thread_info_32.h b/arch/sparc/include/asm/thread_info_32.h
index 29899fd5b1b2..80fe547c3f45 100644
--- a/arch/sparc/include/asm/thread_info_32.h
+++ b/arch/sparc/include/asm/thread_info_32.h
@@ -135,6 +135,7 @@ BTFIXUPDEF_CALL(void, free_thread_info, struct thread_info *)
135#define TIF_POLLING_NRFLAG 9 /* true if poll_idle() is polling 135#define TIF_POLLING_NRFLAG 9 /* true if poll_idle() is polling
136 * TIF_NEED_RESCHED */ 136 * TIF_NEED_RESCHED */
137#define TIF_MEMDIE 10 137#define TIF_MEMDIE 10
138#define TIF_FREEZE 11 /* is freezing for suspend */
138 139
139/* as above, but as bit values */ 140/* as above, but as bit values */
140#define _TIF_SYSCALL_TRACE (1<<TIF_SYSCALL_TRACE) 141#define _TIF_SYSCALL_TRACE (1<<TIF_SYSCALL_TRACE)
@@ -148,6 +149,7 @@ BTFIXUPDEF_CALL(void, free_thread_info, struct thread_info *)
148#define _TIF_DO_NOTIFY_RESUME_MASK (_TIF_NOTIFY_RESUME | \ 149#define _TIF_DO_NOTIFY_RESUME_MASK (_TIF_NOTIFY_RESUME | \
149 _TIF_SIGPENDING | \ 150 _TIF_SIGPENDING | \
150 _TIF_RESTORE_SIGMASK) 151 _TIF_RESTORE_SIGMASK)
152#define _TIF_FREEZE (1<<TIF_FREEZE)
151 153
152#endif /* __KERNEL__ */ 154#endif /* __KERNEL__ */
153 155
diff --git a/arch/sparc/include/asm/thread_info_64.h b/arch/sparc/include/asm/thread_info_64.h
index c0a737d7292c..639ac805448a 100644
--- a/arch/sparc/include/asm/thread_info_64.h
+++ b/arch/sparc/include/asm/thread_info_64.h
@@ -237,6 +237,7 @@ register struct thread_info *current_thread_info_reg asm("g6");
237#define TIF_ABI_PENDING 12 237#define TIF_ABI_PENDING 12
238#define TIF_MEMDIE 13 238#define TIF_MEMDIE 13
239#define TIF_POLLING_NRFLAG 14 239#define TIF_POLLING_NRFLAG 14
240#define TIF_FREEZE 15 /* is freezing for suspend */
240 241
241#define _TIF_SYSCALL_TRACE (1<<TIF_SYSCALL_TRACE) 242#define _TIF_SYSCALL_TRACE (1<<TIF_SYSCALL_TRACE)
242#define _TIF_NOTIFY_RESUME (1<<TIF_NOTIFY_RESUME) 243#define _TIF_NOTIFY_RESUME (1<<TIF_NOTIFY_RESUME)
@@ -249,6 +250,7 @@ register struct thread_info *current_thread_info_reg asm("g6");
249#define _TIF_SYSCALL_AUDIT (1<<TIF_SYSCALL_AUDIT) 250#define _TIF_SYSCALL_AUDIT (1<<TIF_SYSCALL_AUDIT)
250#define _TIF_ABI_PENDING (1<<TIF_ABI_PENDING) 251#define _TIF_ABI_PENDING (1<<TIF_ABI_PENDING)
251#define _TIF_POLLING_NRFLAG (1<<TIF_POLLING_NRFLAG) 252#define _TIF_POLLING_NRFLAG (1<<TIF_POLLING_NRFLAG)
253#define _TIF_FREEZE (1<<TIF_FREEZE)
252 254
253#define _TIF_USER_WORK_MASK ((0xff << TI_FLAG_WSAVED_SHIFT) | \ 255#define _TIF_USER_WORK_MASK ((0xff << TI_FLAG_WSAVED_SHIFT) | \
254 _TIF_DO_NOTIFY_RESUME_MASK | \ 256 _TIF_DO_NOTIFY_RESUME_MASK | \
diff --git a/arch/sparc64/Kconfig b/arch/sparc64/Kconfig
index 5446e2a499b1..035b15af90d8 100644
--- a/arch/sparc64/Kconfig
+++ b/arch/sparc64/Kconfig
@@ -96,6 +96,7 @@ config GENERIC_HARDIRQS_NO__DO_IRQ
96 def_bool y 96 def_bool y
97 97
98source "init/Kconfig" 98source "init/Kconfig"
99source "kernel/Kconfig.freezer"
99 100
100menu "Processor type and features" 101menu "Processor type and features"
101 102
diff --git a/arch/um/Kconfig b/arch/um/Kconfig
index 6976812cfb18..393bccfe1785 100644
--- a/arch/um/Kconfig
+++ b/arch/um/Kconfig
@@ -229,6 +229,8 @@ endmenu
229 229
230source "init/Kconfig" 230source "init/Kconfig"
231 231
232source "kernel/Kconfig.freezer"
233
232source "drivers/block/Kconfig" 234source "drivers/block/Kconfig"
233 235
234source "arch/um/Kconfig.char" 236source "arch/um/Kconfig.char"
diff --git a/arch/um/sys-i386/signal.c b/arch/um/sys-i386/signal.c
index fd0c25ad6af3..129647375a6c 100644
--- a/arch/um/sys-i386/signal.c
+++ b/arch/um/sys-i386/signal.c
@@ -179,7 +179,8 @@ static int copy_sc_from_user(struct pt_regs *regs,
179 if (have_fpx_regs) { 179 if (have_fpx_regs) {
180 struct user_fxsr_struct fpx; 180 struct user_fxsr_struct fpx;
181 181
182 err = copy_from_user(&fpx, &sc.fpstate->_fxsr_env[0], 182 err = copy_from_user(&fpx,
183 &((struct _fpstate __user *)sc.fpstate)->_fxsr_env[0],
183 sizeof(struct user_fxsr_struct)); 184 sizeof(struct user_fxsr_struct));
184 if (err) 185 if (err)
185 return 1; 186 return 1;
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index bd3c2c53873e..49349ba77d80 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -193,6 +193,7 @@ config X86_TRAMPOLINE
193config KTIME_SCALAR 193config KTIME_SCALAR
194 def_bool X86_32 194 def_bool X86_32
195source "init/Kconfig" 195source "init/Kconfig"
196source "kernel/Kconfig.freezer"
196 197
197menu "Processor type and features" 198menu "Processor type and features"
198 199
diff --git a/arch/x86/kernel/crash_dump_32.c b/arch/x86/kernel/crash_dump_32.c
index 72d0c56c1b48..f7cdb3b457aa 100644
--- a/arch/x86/kernel/crash_dump_32.c
+++ b/arch/x86/kernel/crash_dump_32.c
@@ -13,6 +13,9 @@
13 13
14static void *kdump_buf_page; 14static void *kdump_buf_page;
15 15
16/* Stores the physical address of elf header of crash image. */
17unsigned long long elfcorehdr_addr = ELFCORE_ADDR_MAX;
18
16/** 19/**
17 * copy_oldmem_page - copy one page from "oldmem" 20 * copy_oldmem_page - copy one page from "oldmem"
18 * @pfn: page frame number to be copied 21 * @pfn: page frame number to be copied
diff --git a/arch/x86/kernel/crash_dump_64.c b/arch/x86/kernel/crash_dump_64.c
index e90a60ef10c2..045b36cada65 100644
--- a/arch/x86/kernel/crash_dump_64.c
+++ b/arch/x86/kernel/crash_dump_64.c
@@ -10,6 +10,9 @@
10#include <linux/uaccess.h> 10#include <linux/uaccess.h>
11#include <linux/io.h> 11#include <linux/io.h>
12 12
13/* Stores the physical address of elf header of crash image. */
14unsigned long long elfcorehdr_addr = ELFCORE_ADDR_MAX;
15
13/** 16/**
14 * copy_oldmem_page - copy one page from "oldmem" 17 * copy_oldmem_page - copy one page from "oldmem"
15 * @pfn: page frame number to be copied 18 * @pfn: page frame number to be copied
diff --git a/arch/x86/kernel/rtc.c b/arch/x86/kernel/rtc.c
index 0a23b5795b25..dd6f2b71561b 100644
--- a/arch/x86/kernel/rtc.c
+++ b/arch/x86/kernel/rtc.c
@@ -52,7 +52,7 @@ int mach_set_rtc_mmss(unsigned long nowtime)
52 52
53 cmos_minutes = CMOS_READ(RTC_MINUTES); 53 cmos_minutes = CMOS_READ(RTC_MINUTES);
54 if (!(save_control & RTC_DM_BINARY) || RTC_ALWAYS_BCD) 54 if (!(save_control & RTC_DM_BINARY) || RTC_ALWAYS_BCD)
55 BCD_TO_BIN(cmos_minutes); 55 cmos_minutes = bcd2bin(cmos_minutes);
56 56
57 /* 57 /*
58 * since we're only adjusting minutes and seconds, 58 * since we're only adjusting minutes and seconds,
@@ -69,8 +69,8 @@ int mach_set_rtc_mmss(unsigned long nowtime)
69 69
70 if (abs(real_minutes - cmos_minutes) < 30) { 70 if (abs(real_minutes - cmos_minutes) < 30) {
71 if (!(save_control & RTC_DM_BINARY) || RTC_ALWAYS_BCD) { 71 if (!(save_control & RTC_DM_BINARY) || RTC_ALWAYS_BCD) {
72 BIN_TO_BCD(real_seconds); 72 real_seconds = bin2bcd(real_seconds);
73 BIN_TO_BCD(real_minutes); 73 real_minutes = bin2bcd(real_minutes);
74 } 74 }
75 CMOS_WRITE(real_seconds,RTC_SECONDS); 75 CMOS_WRITE(real_seconds,RTC_SECONDS);
76 CMOS_WRITE(real_minutes,RTC_MINUTES); 76 CMOS_WRITE(real_minutes,RTC_MINUTES);
@@ -124,16 +124,16 @@ unsigned long mach_get_cmos_time(void)
124 WARN_ON_ONCE(RTC_ALWAYS_BCD && (status & RTC_DM_BINARY)); 124 WARN_ON_ONCE(RTC_ALWAYS_BCD && (status & RTC_DM_BINARY));
125 125
126 if (RTC_ALWAYS_BCD || !(status & RTC_DM_BINARY)) { 126 if (RTC_ALWAYS_BCD || !(status & RTC_DM_BINARY)) {
127 BCD_TO_BIN(sec); 127 sec = bcd2bin(sec);
128 BCD_TO_BIN(min); 128 min = bcd2bin(min);
129 BCD_TO_BIN(hour); 129 hour = bcd2bin(hour);
130 BCD_TO_BIN(day); 130 day = bcd2bin(day);
131 BCD_TO_BIN(mon); 131 mon = bcd2bin(mon);
132 BCD_TO_BIN(year); 132 year = bcd2bin(year);
133 } 133 }
134 134
135 if (century) { 135 if (century) {
136 BCD_TO_BIN(century); 136 century = bcd2bin(century);
137 year += century * 100; 137 year += century * 100;
138 printk(KERN_INFO "Extended CMOS year: %d\n", century * 100); 138 printk(KERN_INFO "Extended CMOS year: %d\n", century * 100);
139 } else 139 } else
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index 2255782e8d4b..b2c97874ec0f 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -561,7 +561,13 @@ static void __init reserve_standard_io_resources(void)
561 561
562} 562}
563 563
564#ifdef CONFIG_PROC_VMCORE 564/*
565 * Note: elfcorehdr_addr is not just limited to vmcore. It is also used by
566 * is_kdump_kernel() to determine if we are booting after a panic. Hence
567 * ifdef it under CONFIG_CRASH_DUMP and not CONFIG_PROC_VMCORE.
568 */
569
570#ifdef CONFIG_CRASH_DUMP
565/* elfcorehdr= specifies the location of elf core header 571/* elfcorehdr= specifies the location of elf core header
566 * stored by the crashed kernel. This option will be passed 572 * stored by the crashed kernel. This option will be passed
567 * by kexec loader to the capture kernel. 573 * by kexec loader to the capture kernel.
diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c
index a9ec89c3fbca..407d8784f669 100644
--- a/arch/x86/mm/pageattr.c
+++ b/arch/x86/mm/pageattr.c
@@ -792,6 +792,8 @@ static int change_page_attr_set_clr(unsigned long *addr, int numpages,
792 /* Must avoid aliasing mappings in the highmem code */ 792 /* Must avoid aliasing mappings in the highmem code */
793 kmap_flush_unused(); 793 kmap_flush_unused();
794 794
795 vm_unmap_aliases();
796
795 cpa.vaddr = addr; 797 cpa.vaddr = addr;
796 cpa.numpages = numpages; 798 cpa.numpages = numpages;
797 cpa.mask_set = mask_set; 799 cpa.mask_set = mask_set;
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index 0013a729b41d..b61534c7a4c4 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -871,6 +871,7 @@ static void xen_alloc_ptpage(struct mm_struct *mm, unsigned long pfn, unsigned l
871 /* make sure there are no stray mappings of 871 /* make sure there are no stray mappings of
872 this page */ 872 this page */
873 kmap_flush_unused(); 873 kmap_flush_unused();
874 vm_unmap_aliases();
874 } 875 }
875} 876}
876 877
diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c
index ae173f6edd8b..d4d52f5a1cf7 100644
--- a/arch/x86/xen/mmu.c
+++ b/arch/x86/xen/mmu.c
@@ -846,6 +846,7 @@ static void __xen_pgd_pin(struct mm_struct *mm, pgd_t *pgd)
846 /* re-enable interrupts for kmap_flush_unused */ 846 /* re-enable interrupts for kmap_flush_unused */
847 xen_mc_issue(0); 847 xen_mc_issue(0);
848 kmap_flush_unused(); 848 kmap_flush_unused();
849 vm_unmap_aliases();
849 xen_mc_batch(); 850 xen_mc_batch();
850 } 851 }
851 852
diff --git a/arch/xtensa/Kconfig b/arch/xtensa/Kconfig
index 02e417d3d8e9..a213260b51e5 100644
--- a/arch/xtensa/Kconfig
+++ b/arch/xtensa/Kconfig
@@ -55,6 +55,7 @@ config HZ
55 default 100 55 default 100
56 56
57source "init/Kconfig" 57source "init/Kconfig"
58source "kernel/Kconfig.freezer"
58 59
59menu "Processor type and features" 60menu "Processor type and features"
60 61
diff --git a/drivers/acpi/battery.c b/drivers/acpi/battery.c
index b1c723f9f58d..70f7f60929ca 100644
--- a/drivers/acpi/battery.c
+++ b/drivers/acpi/battery.c
@@ -431,7 +431,7 @@ static ssize_t acpi_battery_alarm_store(struct device *dev,
431} 431}
432 432
433static struct device_attribute alarm_attr = { 433static struct device_attribute alarm_attr = {
434 .attr = {.name = "alarm", .mode = 0644, .owner = THIS_MODULE}, 434 .attr = {.name = "alarm", .mode = 0644},
435 .show = acpi_battery_alarm_show, 435 .show = acpi_battery_alarm_show,
436 .store = acpi_battery_alarm_store, 436 .store = acpi_battery_alarm_store,
437}; 437};
diff --git a/drivers/acpi/sbs.c b/drivers/acpi/sbs.c
index 10a36512647c..7b011e7e29fe 100644
--- a/drivers/acpi/sbs.c
+++ b/drivers/acpi/sbs.c
@@ -463,7 +463,7 @@ static ssize_t acpi_battery_alarm_store(struct device *dev,
463} 463}
464 464
465static struct device_attribute alarm_attr = { 465static struct device_attribute alarm_attr = {
466 .attr = {.name = "alarm", .mode = 0644, .owner = THIS_MODULE}, 466 .attr = {.name = "alarm", .mode = 0644},
467 .show = acpi_battery_alarm_show, 467 .show = acpi_battery_alarm_show,
468 .store = acpi_battery_alarm_store, 468 .store = acpi_battery_alarm_store,
469}; 469};
diff --git a/drivers/acpi/sleep/proc.c b/drivers/acpi/sleep/proc.c
index bf5b04de02d1..631ee2ee2ca0 100644
--- a/drivers/acpi/sleep/proc.c
+++ b/drivers/acpi/sleep/proc.c
@@ -120,13 +120,13 @@ static int acpi_system_alarm_seq_show(struct seq_file *seq, void *offset)
120 spin_unlock_irqrestore(&rtc_lock, flags); 120 spin_unlock_irqrestore(&rtc_lock, flags);
121 121
122 if (!(rtc_control & RTC_DM_BINARY) || RTC_ALWAYS_BCD) { 122 if (!(rtc_control & RTC_DM_BINARY) || RTC_ALWAYS_BCD) {
123 BCD_TO_BIN(sec); 123 sec = bcd2bin(sec);
124 BCD_TO_BIN(min); 124 min = bcd2bin(min);
125 BCD_TO_BIN(hr); 125 hr = bcd2bin(hr);
126 BCD_TO_BIN(day); 126 day = bcd2bin(day);
127 BCD_TO_BIN(mo); 127 mo = bcd2bin(mo);
128 BCD_TO_BIN(yr); 128 yr = bcd2bin(yr);
129 BCD_TO_BIN(cent); 129 cent = bcd2bin(cent);
130 } 130 }
131 131
132 /* we're trusting the FADT (see above) */ 132 /* we're trusting the FADT (see above) */
@@ -204,7 +204,7 @@ static u32 cmos_bcd_read(int offset, int rtc_control)
204{ 204{
205 u32 val = CMOS_READ(offset); 205 u32 val = CMOS_READ(offset);
206 if (!(rtc_control & RTC_DM_BINARY) || RTC_ALWAYS_BCD) 206 if (!(rtc_control & RTC_DM_BINARY) || RTC_ALWAYS_BCD)
207 BCD_TO_BIN(val); 207 val = bcd2bin(val);
208 return val; 208 return val;
209} 209}
210 210
@@ -212,7 +212,7 @@ static u32 cmos_bcd_read(int offset, int rtc_control)
212static void cmos_bcd_write(u32 val, int offset, int rtc_control) 212static void cmos_bcd_write(u32 val, int offset, int rtc_control)
213{ 213{
214 if (!(rtc_control & RTC_DM_BINARY) || RTC_ALWAYS_BCD) 214 if (!(rtc_control & RTC_DM_BINARY) || RTC_ALWAYS_BCD)
215 BIN_TO_BCD(val); 215 val = bin2bcd(val);
216 CMOS_WRITE(val, offset); 216 CMOS_WRITE(val, offset);
217} 217}
218 218
diff --git a/drivers/acpi/system.c b/drivers/acpi/system.c
index 91dec448b3ed..24e80fd927e2 100644
--- a/drivers/acpi/system.c
+++ b/drivers/acpi/system.c
@@ -115,7 +115,6 @@ static void acpi_table_attr_init(struct acpi_table_attr *table_attr,
115 table_attr->attr.read = acpi_table_show; 115 table_attr->attr.read = acpi_table_show;
116 table_attr->attr.attr.name = table_attr->name; 116 table_attr->attr.attr.name = table_attr->name;
117 table_attr->attr.attr.mode = 0444; 117 table_attr->attr.attr.mode = 0444;
118 table_attr->attr.attr.owner = THIS_MODULE;
119 118
120 return; 119 return;
121} 120}
diff --git a/drivers/base/memory.c b/drivers/base/memory.c
index af0d175c025d..5260e9e0df48 100644
--- a/drivers/base/memory.c
+++ b/drivers/base/memory.c
@@ -21,6 +21,8 @@
21#include <linux/memory_hotplug.h> 21#include <linux/memory_hotplug.h>
22#include <linux/mm.h> 22#include <linux/mm.h>
23#include <linux/mutex.h> 23#include <linux/mutex.h>
24#include <linux/stat.h>
25
24#include <asm/atomic.h> 26#include <asm/atomic.h>
25#include <asm/uaccess.h> 27#include <asm/uaccess.h>
26 28
@@ -325,7 +327,7 @@ memory_probe_store(struct class *class, const char *buf, size_t count)
325 327
326 return count; 328 return count;
327} 329}
328static CLASS_ATTR(probe, 0700, NULL, memory_probe_store); 330static CLASS_ATTR(probe, S_IWUSR, NULL, memory_probe_store);
329 331
330static int memory_probe_init(void) 332static int memory_probe_init(void)
331{ 333{
diff --git a/drivers/base/node.c b/drivers/base/node.c
index 5116b78c6325..f5207090885a 100644
--- a/drivers/base/node.c
+++ b/drivers/base/node.c
@@ -13,6 +13,7 @@
13#include <linux/nodemask.h> 13#include <linux/nodemask.h>
14#include <linux/cpu.h> 14#include <linux/cpu.h>
15#include <linux/device.h> 15#include <linux/device.h>
16#include <linux/swap.h>
16 17
17static struct sysdev_class node_class = { 18static struct sysdev_class node_class = {
18 .name = "node", 19 .name = "node",
@@ -61,34 +62,52 @@ static ssize_t node_read_meminfo(struct sys_device * dev,
61 si_meminfo_node(&i, nid); 62 si_meminfo_node(&i, nid);
62 63
63 n = sprintf(buf, "\n" 64 n = sprintf(buf, "\n"
64 "Node %d MemTotal: %8lu kB\n" 65 "Node %d MemTotal: %8lu kB\n"
65 "Node %d MemFree: %8lu kB\n" 66 "Node %d MemFree: %8lu kB\n"
66 "Node %d MemUsed: %8lu kB\n" 67 "Node %d MemUsed: %8lu kB\n"
67 "Node %d Active: %8lu kB\n" 68 "Node %d Active: %8lu kB\n"
68 "Node %d Inactive: %8lu kB\n" 69 "Node %d Inactive: %8lu kB\n"
70 "Node %d Active(anon): %8lu kB\n"
71 "Node %d Inactive(anon): %8lu kB\n"
72 "Node %d Active(file): %8lu kB\n"
73 "Node %d Inactive(file): %8lu kB\n"
74#ifdef CONFIG_UNEVICTABLE_LRU
75 "Node %d Unevictable: %8lu kB\n"
76 "Node %d Mlocked: %8lu kB\n"
77#endif
69#ifdef CONFIG_HIGHMEM 78#ifdef CONFIG_HIGHMEM
70 "Node %d HighTotal: %8lu kB\n" 79 "Node %d HighTotal: %8lu kB\n"
71 "Node %d HighFree: %8lu kB\n" 80 "Node %d HighFree: %8lu kB\n"
72 "Node %d LowTotal: %8lu kB\n" 81 "Node %d LowTotal: %8lu kB\n"
73 "Node %d LowFree: %8lu kB\n" 82 "Node %d LowFree: %8lu kB\n"
74#endif 83#endif
75 "Node %d Dirty: %8lu kB\n" 84 "Node %d Dirty: %8lu kB\n"
76 "Node %d Writeback: %8lu kB\n" 85 "Node %d Writeback: %8lu kB\n"
77 "Node %d FilePages: %8lu kB\n" 86 "Node %d FilePages: %8lu kB\n"
78 "Node %d Mapped: %8lu kB\n" 87 "Node %d Mapped: %8lu kB\n"
79 "Node %d AnonPages: %8lu kB\n" 88 "Node %d AnonPages: %8lu kB\n"
80 "Node %d PageTables: %8lu kB\n" 89 "Node %d PageTables: %8lu kB\n"
81 "Node %d NFS_Unstable: %8lu kB\n" 90 "Node %d NFS_Unstable: %8lu kB\n"
82 "Node %d Bounce: %8lu kB\n" 91 "Node %d Bounce: %8lu kB\n"
83 "Node %d WritebackTmp: %8lu kB\n" 92 "Node %d WritebackTmp: %8lu kB\n"
84 "Node %d Slab: %8lu kB\n" 93 "Node %d Slab: %8lu kB\n"
85 "Node %d SReclaimable: %8lu kB\n" 94 "Node %d SReclaimable: %8lu kB\n"
86 "Node %d SUnreclaim: %8lu kB\n", 95 "Node %d SUnreclaim: %8lu kB\n",
87 nid, K(i.totalram), 96 nid, K(i.totalram),
88 nid, K(i.freeram), 97 nid, K(i.freeram),
89 nid, K(i.totalram - i.freeram), 98 nid, K(i.totalram - i.freeram),
90 nid, K(node_page_state(nid, NR_ACTIVE)), 99 nid, K(node_page_state(nid, NR_ACTIVE_ANON) +
91 nid, K(node_page_state(nid, NR_INACTIVE)), 100 node_page_state(nid, NR_ACTIVE_FILE)),
101 nid, K(node_page_state(nid, NR_INACTIVE_ANON) +
102 node_page_state(nid, NR_INACTIVE_FILE)),
103 nid, K(node_page_state(nid, NR_ACTIVE_ANON)),
104 nid, K(node_page_state(nid, NR_INACTIVE_ANON)),
105 nid, K(node_page_state(nid, NR_ACTIVE_FILE)),
106 nid, K(node_page_state(nid, NR_INACTIVE_FILE)),
107#ifdef CONFIG_UNEVICTABLE_LRU
108 nid, K(node_page_state(nid, NR_UNEVICTABLE)),
109 nid, K(node_page_state(nid, NR_MLOCK)),
110#endif
92#ifdef CONFIG_HIGHMEM 111#ifdef CONFIG_HIGHMEM
93 nid, K(i.totalhigh), 112 nid, K(i.totalhigh),
94 nid, K(i.freehigh), 113 nid, K(i.freehigh),
@@ -173,6 +192,8 @@ int register_node(struct node *node, int num, struct node *parent)
173 sysdev_create_file(&node->sysdev, &attr_meminfo); 192 sysdev_create_file(&node->sysdev, &attr_meminfo);
174 sysdev_create_file(&node->sysdev, &attr_numastat); 193 sysdev_create_file(&node->sysdev, &attr_numastat);
175 sysdev_create_file(&node->sysdev, &attr_distance); 194 sysdev_create_file(&node->sysdev, &attr_distance);
195
196 scan_unevictable_register_node(node);
176 } 197 }
177 return error; 198 return error;
178} 199}
@@ -192,6 +213,8 @@ void unregister_node(struct node *node)
192 sysdev_remove_file(&node->sysdev, &attr_numastat); 213 sysdev_remove_file(&node->sysdev, &attr_numastat);
193 sysdev_remove_file(&node->sysdev, &attr_distance); 214 sysdev_remove_file(&node->sysdev, &attr_distance);
194 215
216 scan_unevictable_unregister_node(node);
217
195 sysdev_unregister(&node->sysdev); 218 sysdev_unregister(&node->sysdev);
196} 219}
197 220
diff --git a/drivers/block/aoe/aoeblk.c b/drivers/block/aoe/aoeblk.c
index b82654e883a7..d876ad861237 100644
--- a/drivers/block/aoe/aoeblk.c
+++ b/drivers/block/aoe/aoeblk.c
@@ -90,7 +90,7 @@ static DEVICE_ATTR(state, S_IRUGO, aoedisk_show_state, NULL);
90static DEVICE_ATTR(mac, S_IRUGO, aoedisk_show_mac, NULL); 90static DEVICE_ATTR(mac, S_IRUGO, aoedisk_show_mac, NULL);
91static DEVICE_ATTR(netif, S_IRUGO, aoedisk_show_netif, NULL); 91static DEVICE_ATTR(netif, S_IRUGO, aoedisk_show_netif, NULL);
92static struct device_attribute dev_attr_firmware_version = { 92static struct device_attribute dev_attr_firmware_version = {
93 .attr = { .name = "firmware-version", .mode = S_IRUGO, .owner = THIS_MODULE }, 93 .attr = { .name = "firmware-version", .mode = S_IRUGO },
94 .show = aoedisk_show_fwver, 94 .show = aoedisk_show_fwver,
95}; 95};
96 96
diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c
index 7b3351260d56..9034ca585afd 100644
--- a/drivers/block/nbd.c
+++ b/drivers/block/nbd.c
@@ -391,7 +391,7 @@ static ssize_t pid_show(struct device *dev,
391} 391}
392 392
393static struct device_attribute pid_attr = { 393static struct device_attribute pid_attr = {
394 .attr = { .name = "pid", .mode = S_IRUGO, .owner = THIS_MODULE }, 394 .attr = { .name = "pid", .mode = S_IRUGO},
395 .show = pid_show, 395 .show = pid_show,
396}; 396};
397 397
diff --git a/drivers/char/ds1286.c b/drivers/char/ds1286.c
index 5329d482b582..0a826d7be10e 100644
--- a/drivers/char/ds1286.c
+++ b/drivers/char/ds1286.c
@@ -210,8 +210,8 @@ static int ds1286_ioctl(struct inode *inode, struct file *file,
210 if (sec != 0) 210 if (sec != 0)
211 return -EINVAL; 211 return -EINVAL;
212 212
213 min = BIN2BCD(min); 213 min = bin2bcd(min);
214 min = BIN2BCD(hrs); 214 min = bin2bcd(hrs);
215 215
216 spin_lock(&ds1286_lock); 216 spin_lock(&ds1286_lock);
217 rtc_write(hrs, RTC_HOURS_ALARM); 217 rtc_write(hrs, RTC_HOURS_ALARM);
@@ -353,7 +353,7 @@ static int ds1286_proc_output(char *buf)
353 353
354 ds1286_get_time(&tm); 354 ds1286_get_time(&tm);
355 hundredth = rtc_read(RTC_HUNDREDTH_SECOND); 355 hundredth = rtc_read(RTC_HUNDREDTH_SECOND);
356 BCD_TO_BIN(hundredth); 356 hundredth = bcd2bin(hundredth);
357 357
358 p += sprintf(p, 358 p += sprintf(p,
359 "rtc_time\t: %02d:%02d:%02d.%02d\n" 359 "rtc_time\t: %02d:%02d:%02d.%02d\n"
@@ -477,12 +477,12 @@ static void ds1286_get_time(struct rtc_time *rtc_tm)
477 rtc_write(save_control, RTC_CMD); 477 rtc_write(save_control, RTC_CMD);
478 spin_unlock_irqrestore(&ds1286_lock, flags); 478 spin_unlock_irqrestore(&ds1286_lock, flags);
479 479
480 BCD_TO_BIN(rtc_tm->tm_sec); 480 rtc_tm->tm_sec = bcd2bin(rtc_tm->tm_sec);
481 BCD_TO_BIN(rtc_tm->tm_min); 481 rtc_tm->tm_min = bcd2bin(rtc_tm->tm_min);
482 BCD_TO_BIN(rtc_tm->tm_hour); 482 rtc_tm->tm_hour = bcd2bin(rtc_tm->tm_hour);
483 BCD_TO_BIN(rtc_tm->tm_mday); 483 rtc_tm->tm_mday = bcd2bin(rtc_tm->tm_mday);
484 BCD_TO_BIN(rtc_tm->tm_mon); 484 rtc_tm->tm_mon = bcd2bin(rtc_tm->tm_mon);
485 BCD_TO_BIN(rtc_tm->tm_year); 485 rtc_tm->tm_year = bcd2bin(rtc_tm->tm_year);
486 486
487 /* 487 /*
488 * Account for differences between how the RTC uses the values 488 * Account for differences between how the RTC uses the values
@@ -531,12 +531,12 @@ static int ds1286_set_time(struct rtc_time *rtc_tm)
531 if (yrs >= 100) 531 if (yrs >= 100)
532 yrs -= 100; 532 yrs -= 100;
533 533
534 BIN_TO_BCD(sec); 534 sec = bin2bcd(sec);
535 BIN_TO_BCD(min); 535 min = bin2bcd(min);
536 BIN_TO_BCD(hrs); 536 hrs = bin2bcd(hrs);
537 BIN_TO_BCD(day); 537 day = bin2bcd(day);
538 BIN_TO_BCD(mon); 538 mon = bin2bcd(mon);
539 BIN_TO_BCD(yrs); 539 yrs = bin2bcd(yrs);
540 540
541 spin_lock_irqsave(&ds1286_lock, flags); 541 spin_lock_irqsave(&ds1286_lock, flags);
542 save_control = rtc_read(RTC_CMD); 542 save_control = rtc_read(RTC_CMD);
@@ -572,8 +572,8 @@ static void ds1286_get_alm_time(struct rtc_time *alm_tm)
572 cmd = rtc_read(RTC_CMD); 572 cmd = rtc_read(RTC_CMD);
573 spin_unlock_irqrestore(&ds1286_lock, flags); 573 spin_unlock_irqrestore(&ds1286_lock, flags);
574 574
575 BCD_TO_BIN(alm_tm->tm_min); 575 alm_tm->tm_min = bcd2bin(alm_tm->tm_min);
576 BCD_TO_BIN(alm_tm->tm_hour); 576 alm_tm->tm_hour = bcd2bin(alm_tm->tm_hour);
577 alm_tm->tm_sec = 0; 577 alm_tm->tm_sec = 0;
578} 578}
579 579
diff --git a/drivers/char/ds1302.c b/drivers/char/ds1302.c
index c5e67a623951..170693c93c73 100644
--- a/drivers/char/ds1302.c
+++ b/drivers/char/ds1302.c
@@ -131,12 +131,12 @@ get_rtc_time(struct rtc_time *rtc_tm)
131 131
132 local_irq_restore(flags); 132 local_irq_restore(flags);
133 133
134 BCD_TO_BIN(rtc_tm->tm_sec); 134 rtc_tm->tm_sec = bcd2bin(rtc_tm->tm_sec);
135 BCD_TO_BIN(rtc_tm->tm_min); 135 rtc_tm->tm_min = bcd2bin(rtc_tm->tm_min);
136 BCD_TO_BIN(rtc_tm->tm_hour); 136 rtc_tm->tm_hour = bcd2bin(rtc_tm->tm_hour);
137 BCD_TO_BIN(rtc_tm->tm_mday); 137 rtc_tm->tm_mday = bcd2bin(rtc_tm->tm_mday);
138 BCD_TO_BIN(rtc_tm->tm_mon); 138 rtc_tm->tm_mon = bcd2bin(rtc_tm->tm_mon);
139 BCD_TO_BIN(rtc_tm->tm_year); 139 rtc_tm->tm_year = bcd2bin(rtc_tm->tm_year);
140 140
141 /* 141 /*
142 * Account for differences between how the RTC uses the values 142 * Account for differences between how the RTC uses the values
@@ -211,12 +211,12 @@ static long rtc_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
211 else 211 else
212 yrs -= 1900; /* RTC (70, 71, ... 99) */ 212 yrs -= 1900; /* RTC (70, 71, ... 99) */
213 213
214 BIN_TO_BCD(sec); 214 sec = bin2bcd(sec);
215 BIN_TO_BCD(min); 215 min = bin2bcd(min);
216 BIN_TO_BCD(hrs); 216 hrs = bin2bcd(hrs);
217 BIN_TO_BCD(day); 217 day = bin2bcd(day);
218 BIN_TO_BCD(mon); 218 mon = bin2bcd(mon);
219 BIN_TO_BCD(yrs); 219 yrs = bin2bcd(yrs);
220 220
221 lock_kernel(); 221 lock_kernel();
222 local_irq_save(flags); 222 local_irq_save(flags);
diff --git a/drivers/char/ip27-rtc.c b/drivers/char/ip27-rtc.c
index ec9d0443d92c..2abd881b4cbc 100644
--- a/drivers/char/ip27-rtc.c
+++ b/drivers/char/ip27-rtc.c
@@ -130,12 +130,12 @@ static long rtc_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
130 if (yrs >= 100) 130 if (yrs >= 100)
131 yrs -= 100; 131 yrs -= 100;
132 132
133 sec = BIN2BCD(sec); 133 sec = bin2bcd(sec);
134 min = BIN2BCD(min); 134 min = bin2bcd(min);
135 hrs = BIN2BCD(hrs); 135 hrs = bin2bcd(hrs);
136 day = BIN2BCD(day); 136 day = bin2bcd(day);
137 mon = BIN2BCD(mon); 137 mon = bin2bcd(mon);
138 yrs = BIN2BCD(yrs); 138 yrs = bin2bcd(yrs);
139 139
140 spin_lock_irq(&rtc_lock); 140 spin_lock_irq(&rtc_lock);
141 rtc->control |= M48T35_RTC_SET; 141 rtc->control |= M48T35_RTC_SET;
@@ -311,12 +311,12 @@ static void get_rtc_time(struct rtc_time *rtc_tm)
311 rtc->control &= ~M48T35_RTC_READ; 311 rtc->control &= ~M48T35_RTC_READ;
312 spin_unlock_irq(&rtc_lock); 312 spin_unlock_irq(&rtc_lock);
313 313
314 rtc_tm->tm_sec = BCD2BIN(rtc_tm->tm_sec); 314 rtc_tm->tm_sec = bcd2bin(rtc_tm->tm_sec);
315 rtc_tm->tm_min = BCD2BIN(rtc_tm->tm_min); 315 rtc_tm->tm_min = bcd2bin(rtc_tm->tm_min);
316 rtc_tm->tm_hour = BCD2BIN(rtc_tm->tm_hour); 316 rtc_tm->tm_hour = bcd2bin(rtc_tm->tm_hour);
317 rtc_tm->tm_mday = BCD2BIN(rtc_tm->tm_mday); 317 rtc_tm->tm_mday = bcd2bin(rtc_tm->tm_mday);
318 rtc_tm->tm_mon = BCD2BIN(rtc_tm->tm_mon); 318 rtc_tm->tm_mon = bcd2bin(rtc_tm->tm_mon);
319 rtc_tm->tm_year = BCD2BIN(rtc_tm->tm_year); 319 rtc_tm->tm_year = bcd2bin(rtc_tm->tm_year);
320 320
321 /* 321 /*
322 * Account for differences between how the RTC uses the values 322 * Account for differences between how the RTC uses the values
diff --git a/drivers/char/pc8736x_gpio.c b/drivers/char/pc8736x_gpio.c
index b930de50407a..3f7da8cf3a80 100644
--- a/drivers/char/pc8736x_gpio.c
+++ b/drivers/char/pc8736x_gpio.c
@@ -41,7 +41,8 @@ static u8 pc8736x_gpio_shadow[4];
41#define SIO_BASE2 0x4E /* alt command-reg to check */ 41#define SIO_BASE2 0x4E /* alt command-reg to check */
42 42
43#define SIO_SID 0x20 /* SuperI/O ID Register */ 43#define SIO_SID 0x20 /* SuperI/O ID Register */
44#define SIO_SID_VALUE 0xe9 /* Expected value in SuperI/O ID Register */ 44#define SIO_SID_PC87365 0xe5 /* Expected value in ID Register for PC87365 */
45#define SIO_SID_PC87366 0xe9 /* Expected value in ID Register for PC87366 */
45 46
46#define SIO_CF1 0x21 /* chip config, bit0 is chip enable */ 47#define SIO_CF1 0x21 /* chip config, bit0 is chip enable */
47 48
@@ -91,13 +92,17 @@ static inline int superio_inb(int addr)
91 92
92static int pc8736x_superio_present(void) 93static int pc8736x_superio_present(void)
93{ 94{
95 int id;
96
94 /* try the 2 possible values, read a hardware reg to verify */ 97 /* try the 2 possible values, read a hardware reg to verify */
95 superio_cmd = SIO_BASE1; 98 superio_cmd = SIO_BASE1;
96 if (superio_inb(SIO_SID) == SIO_SID_VALUE) 99 id = superio_inb(SIO_SID);
100 if (id == SIO_SID_PC87365 || id == SIO_SID_PC87366)
97 return superio_cmd; 101 return superio_cmd;
98 102
99 superio_cmd = SIO_BASE2; 103 superio_cmd = SIO_BASE2;
100 if (superio_inb(SIO_SID) == SIO_SID_VALUE) 104 id = superio_inb(SIO_SID);
105 if (id == SIO_SID_PC87365 || id == SIO_SID_PC87366)
101 return superio_cmd; 106 return superio_cmd;
102 107
103 return 0; 108 return 0;
diff --git a/drivers/char/rtc.c b/drivers/char/rtc.c
index 17683de95717..32dc89720d58 100644
--- a/drivers/char/rtc.c
+++ b/drivers/char/rtc.c
@@ -518,17 +518,17 @@ static int rtc_do_ioctl(unsigned int cmd, unsigned long arg, int kernel)
518 if (!(CMOS_READ(RTC_CONTROL) & RTC_DM_BINARY) || 518 if (!(CMOS_READ(RTC_CONTROL) & RTC_DM_BINARY) ||
519 RTC_ALWAYS_BCD) { 519 RTC_ALWAYS_BCD) {
520 if (sec < 60) 520 if (sec < 60)
521 BIN_TO_BCD(sec); 521 sec = bin2bcd(sec);
522 else 522 else
523 sec = 0xff; 523 sec = 0xff;
524 524
525 if (min < 60) 525 if (min < 60)
526 BIN_TO_BCD(min); 526 min = bin2bcd(min);
527 else 527 else
528 min = 0xff; 528 min = 0xff;
529 529
530 if (hrs < 24) 530 if (hrs < 24)
531 BIN_TO_BCD(hrs); 531 hrs = bin2bcd(hrs);
532 else 532 else
533 hrs = 0xff; 533 hrs = 0xff;
534 } 534 }
@@ -614,12 +614,12 @@ static int rtc_do_ioctl(unsigned int cmd, unsigned long arg, int kernel)
614 614
615 if (!(CMOS_READ(RTC_CONTROL) & RTC_DM_BINARY) 615 if (!(CMOS_READ(RTC_CONTROL) & RTC_DM_BINARY)
616 || RTC_ALWAYS_BCD) { 616 || RTC_ALWAYS_BCD) {
617 BIN_TO_BCD(sec); 617 sec = bin2bcd(sec);
618 BIN_TO_BCD(min); 618 min = bin2bcd(min);
619 BIN_TO_BCD(hrs); 619 hrs = bin2bcd(hrs);
620 BIN_TO_BCD(day); 620 day = bin2bcd(day);
621 BIN_TO_BCD(mon); 621 mon = bin2bcd(mon);
622 BIN_TO_BCD(yrs); 622 yrs = bin2bcd(yrs);
623 } 623 }
624 624
625 save_control = CMOS_READ(RTC_CONTROL); 625 save_control = CMOS_READ(RTC_CONTROL);
@@ -1099,7 +1099,7 @@ no_irq:
1099 spin_unlock_irq(&rtc_lock); 1099 spin_unlock_irq(&rtc_lock);
1100 1100
1101 if (!(ctrl & RTC_DM_BINARY) || RTC_ALWAYS_BCD) 1101 if (!(ctrl & RTC_DM_BINARY) || RTC_ALWAYS_BCD)
1102 BCD_TO_BIN(year); /* This should never happen... */ 1102 year = bcd2bin(year); /* This should never happen... */
1103 1103
1104 if (year < 20) { 1104 if (year < 20) {
1105 epoch = 2000; 1105 epoch = 2000;
@@ -1352,13 +1352,13 @@ static void rtc_get_rtc_time(struct rtc_time *rtc_tm)
1352 spin_unlock_irqrestore(&rtc_lock, flags); 1352 spin_unlock_irqrestore(&rtc_lock, flags);
1353 1353
1354 if (!(ctrl & RTC_DM_BINARY) || RTC_ALWAYS_BCD) { 1354 if (!(ctrl & RTC_DM_BINARY) || RTC_ALWAYS_BCD) {
1355 BCD_TO_BIN(rtc_tm->tm_sec); 1355 rtc_tm->tm_sec = bcd2bin(rtc_tm->tm_sec);
1356 BCD_TO_BIN(rtc_tm->tm_min); 1356 rtc_tm->tm_min = bcd2bin(rtc_tm->tm_min);
1357 BCD_TO_BIN(rtc_tm->tm_hour); 1357 rtc_tm->tm_hour = bcd2bin(rtc_tm->tm_hour);
1358 BCD_TO_BIN(rtc_tm->tm_mday); 1358 rtc_tm->tm_mday = bcd2bin(rtc_tm->tm_mday);
1359 BCD_TO_BIN(rtc_tm->tm_mon); 1359 rtc_tm->tm_mon = bcd2bin(rtc_tm->tm_mon);
1360 BCD_TO_BIN(rtc_tm->tm_year); 1360 rtc_tm->tm_year = bcd2bin(rtc_tm->tm_year);
1361 BCD_TO_BIN(rtc_tm->tm_wday); 1361 rtc_tm->tm_wday = bcd2bin(rtc_tm->tm_wday);
1362 } 1362 }
1363 1363
1364#ifdef CONFIG_MACH_DECSTATION 1364#ifdef CONFIG_MACH_DECSTATION
@@ -1392,9 +1392,9 @@ static void get_rtc_alm_time(struct rtc_time *alm_tm)
1392 spin_unlock_irq(&rtc_lock); 1392 spin_unlock_irq(&rtc_lock);
1393 1393
1394 if (!(ctrl & RTC_DM_BINARY) || RTC_ALWAYS_BCD) { 1394 if (!(ctrl & RTC_DM_BINARY) || RTC_ALWAYS_BCD) {
1395 BCD_TO_BIN(alm_tm->tm_sec); 1395 alm_tm->tm_sec = bcd2bin(alm_tm->tm_sec);
1396 BCD_TO_BIN(alm_tm->tm_min); 1396 alm_tm->tm_min = bcd2bin(alm_tm->tm_min);
1397 BCD_TO_BIN(alm_tm->tm_hour); 1397 alm_tm->tm_hour = bcd2bin(alm_tm->tm_hour);
1398 } 1398 }
1399} 1399}
1400 1400
diff --git a/drivers/char/sx.c b/drivers/char/sx.c
index 5b8d7a1aa3e6..ba4e86281fbf 100644
--- a/drivers/char/sx.c
+++ b/drivers/char/sx.c
@@ -2504,7 +2504,7 @@ static void __devexit sx_remove_card(struct sx_board *board,
2504 del_timer(&board->timer); 2504 del_timer(&board->timer);
2505 if (pdev) { 2505 if (pdev) {
2506#ifdef CONFIG_PCI 2506#ifdef CONFIG_PCI
2507 pci_iounmap(pdev, board->base2); 2507 iounmap(board->base2);
2508 pci_release_region(pdev, IS_CF_BOARD(board) ? 3 : 2); 2508 pci_release_region(pdev, IS_CF_BOARD(board) ? 3 : 2);
2509#endif 2509#endif
2510 } else { 2510 } else {
@@ -2677,7 +2677,7 @@ static int __devinit sx_pci_probe(struct pci_dev *pdev,
2677 } 2677 }
2678 board->hw_base = pci_resource_start(pdev, reg); 2678 board->hw_base = pci_resource_start(pdev, reg);
2679 board->base2 = 2679 board->base2 =
2680 board->base = pci_iomap(pdev, reg, WINDOW_LEN(board)); 2680 board->base = ioremap_nocache(board->hw_base, WINDOW_LEN(board));
2681 if (!board->base) { 2681 if (!board->base) {
2682 dev_err(&pdev->dev, "ioremap failed\n"); 2682 dev_err(&pdev->dev, "ioremap failed\n");
2683 goto err_reg; 2683 goto err_reg;
@@ -2703,7 +2703,7 @@ static int __devinit sx_pci_probe(struct pci_dev *pdev,
2703 2703
2704 return 0; 2704 return 0;
2705err_unmap: 2705err_unmap:
2706 pci_iounmap(pdev, board->base2); 2706 iounmap(board->base2);
2707err_reg: 2707err_reg:
2708 pci_release_region(pdev, reg); 2708 pci_release_region(pdev, reg);
2709err_flag: 2709err_flag:
diff --git a/drivers/char/sysrq.c b/drivers/char/sysrq.c
index dce4cc0e6953..d0c0d64ed366 100644
--- a/drivers/char/sysrq.c
+++ b/drivers/char/sysrq.c
@@ -168,7 +168,7 @@ static void sysrq_handle_show_timers(int key, struct tty_struct *tty)
168static struct sysrq_key_op sysrq_show_timers_op = { 168static struct sysrq_key_op sysrq_show_timers_op = {
169 .handler = sysrq_handle_show_timers, 169 .handler = sysrq_handle_show_timers,
170 .help_msg = "show-all-timers(Q)", 170 .help_msg = "show-all-timers(Q)",
171 .action_msg = "Show Pending Timers", 171 .action_msg = "Show pending hrtimers (no others)",
172}; 172};
173 173
174static void sysrq_handle_mountro(int key, struct tty_struct *tty) 174static void sysrq_handle_mountro(int key, struct tty_struct *tty)
diff --git a/drivers/char/tpm/tpm.c b/drivers/char/tpm/tpm.c
index e70d13defde4..9c47dc48c9fd 100644
--- a/drivers/char/tpm/tpm.c
+++ b/drivers/char/tpm/tpm.c
@@ -1157,7 +1157,7 @@ EXPORT_SYMBOL_GPL(tpm_dev_vendor_release);
1157 * Once all references to platform device are down to 0, 1157 * Once all references to platform device are down to 0,
1158 * release all allocated structures. 1158 * release all allocated structures.
1159 */ 1159 */
1160static void tpm_dev_release(struct device *dev) 1160void tpm_dev_release(struct device *dev)
1161{ 1161{
1162 struct tpm_chip *chip = dev_get_drvdata(dev); 1162 struct tpm_chip *chip = dev_get_drvdata(dev);
1163 1163
diff --git a/drivers/edac/cell_edac.c b/drivers/edac/cell_edac.c
index 0e024fe2d8c4..887072f5dc8b 100644
--- a/drivers/edac/cell_edac.c
+++ b/drivers/edac/cell_edac.c
@@ -142,7 +142,7 @@ static void __devinit cell_edac_init_csrows(struct mem_ctl_info *mci)
142 csrow->nr_pages = (r.end - r.start + 1) >> PAGE_SHIFT; 142 csrow->nr_pages = (r.end - r.start + 1) >> PAGE_SHIFT;
143 csrow->last_page = csrow->first_page + csrow->nr_pages - 1; 143 csrow->last_page = csrow->first_page + csrow->nr_pages - 1;
144 csrow->mtype = MEM_XDR; 144 csrow->mtype = MEM_XDR;
145 csrow->edac_mode = EDAC_FLAG_EC | EDAC_FLAG_SECDED; 145 csrow->edac_mode = EDAC_SECDED;
146 dev_dbg(mci->dev, 146 dev_dbg(mci->dev,
147 "Initialized on node %d, chanmask=0x%x," 147 "Initialized on node %d, chanmask=0x%x,"
148 " first_page=0x%lx, nr_pages=0x%x\n", 148 " first_page=0x%lx, nr_pages=0x%x\n",
diff --git a/drivers/firmware/iscsi_ibft.c b/drivers/firmware/iscsi_ibft.c
index deb154aa47c4..4353414a0b77 100644
--- a/drivers/firmware/iscsi_ibft.c
+++ b/drivers/firmware/iscsi_ibft.c
@@ -732,7 +732,6 @@ static int __init ibft_create_attribute(struct ibft_kobject *kobj_data,
732 732
733 attr->attr.name = name; 733 attr->attr.name = name;
734 attr->attr.mode = S_IRUSR; 734 attr->attr.mode = S_IRUSR;
735 attr->attr.owner = THIS_MODULE;
736 735
737 attr->hdr = hdr; 736 attr->hdr = hdr;
738 attr->show = show; 737 attr->show = show;
diff --git a/drivers/gpio/gpiolib.c b/drivers/gpio/gpiolib.c
index 9112830107a5..22edc4273ef6 100644
--- a/drivers/gpio/gpiolib.c
+++ b/drivers/gpio/gpiolib.c
@@ -248,7 +248,7 @@ static ssize_t gpio_value_show(struct device *dev,
248 if (!test_bit(FLAG_EXPORT, &desc->flags)) 248 if (!test_bit(FLAG_EXPORT, &desc->flags))
249 status = -EIO; 249 status = -EIO;
250 else 250 else
251 status = sprintf(buf, "%d\n", gpio_get_value_cansleep(gpio)); 251 status = sprintf(buf, "%d\n", !!gpio_get_value_cansleep(gpio));
252 252
253 mutex_unlock(&sysfs_lock); 253 mutex_unlock(&sysfs_lock);
254 return status; 254 return status;
@@ -1105,7 +1105,7 @@ int gpio_get_value_cansleep(unsigned gpio)
1105 1105
1106 might_sleep_if(extra_checks); 1106 might_sleep_if(extra_checks);
1107 chip = gpio_to_chip(gpio); 1107 chip = gpio_to_chip(gpio);
1108 return chip->get(chip, gpio - chip->base); 1108 return chip->get ? chip->get(chip, gpio - chip->base) : 0;
1109} 1109}
1110EXPORT_SYMBOL_GPL(gpio_get_value_cansleep); 1110EXPORT_SYMBOL_GPL(gpio_get_value_cansleep);
1111 1111
diff --git a/drivers/hwmon/applesmc.c b/drivers/hwmon/applesmc.c
index b06b8e090a27..bc011da79e14 100644
--- a/drivers/hwmon/applesmc.c
+++ b/drivers/hwmon/applesmc.c
@@ -49,6 +49,9 @@
49 49
50#define APPLESMC_MAX_DATA_LENGTH 32 50#define APPLESMC_MAX_DATA_LENGTH 32
51 51
52#define APPLESMC_MIN_WAIT 0x0040
53#define APPLESMC_MAX_WAIT 0x8000
54
52#define APPLESMC_STATUS_MASK 0x0f 55#define APPLESMC_STATUS_MASK 0x0f
53#define APPLESMC_READ_CMD 0x10 56#define APPLESMC_READ_CMD 0x10
54#define APPLESMC_WRITE_CMD 0x11 57#define APPLESMC_WRITE_CMD 0x11
@@ -57,8 +60,8 @@
57 60
58#define KEY_COUNT_KEY "#KEY" /* r-o ui32 */ 61#define KEY_COUNT_KEY "#KEY" /* r-o ui32 */
59 62
60#define LIGHT_SENSOR_LEFT_KEY "ALV0" /* r-o {alv (6 bytes) */ 63#define LIGHT_SENSOR_LEFT_KEY "ALV0" /* r-o {alv (6-10 bytes) */
61#define LIGHT_SENSOR_RIGHT_KEY "ALV1" /* r-o {alv (6 bytes) */ 64#define LIGHT_SENSOR_RIGHT_KEY "ALV1" /* r-o {alv (6-10 bytes) */
62#define BACKLIGHT_KEY "LKSB" /* w-o {lkb (2 bytes) */ 65#define BACKLIGHT_KEY "LKSB" /* w-o {lkb (2 bytes) */
63 66
64#define CLAMSHELL_KEY "MSLD" /* r-o ui8 (unused) */ 67#define CLAMSHELL_KEY "MSLD" /* r-o ui8 (unused) */
@@ -104,6 +107,15 @@ static const char* temperature_sensors_sets[][36] = {
104/* Set 6: Macbook3 set */ 107/* Set 6: Macbook3 set */
105 { "TB0T", "TC0D", "TC0P", "TM0P", "TN0P", "TTF0", "TW0P", "Th0H", 108 { "TB0T", "TC0D", "TC0P", "TM0P", "TN0P", "TTF0", "TW0P", "Th0H",
106 "Th0S", "Th1H", NULL }, 109 "Th0S", "Th1H", NULL },
110/* Set 7: Macbook Air */
111 { "TB0T", "TB1S", "TB1T", "TB2S", "TB2T", "TC0D", "TC0P", "TCFP",
112 "TTF0", "TW0P", "Th0H", "Tp0P", "TpFP", "Ts0P", "Ts0S", NULL },
113/* Set 8: Macbook Pro 4,1 (Penryn) */
114 { "TB0T", "TC0D", "TC0P", "TG0D", "TG0H", "TTF0", "TW0P", "Th0H",
115 "Th1H", "Th2H", "Tm0P", "Ts0P", NULL },
116/* Set 9: Macbook Pro 3,1 (Santa Rosa) */
117 { "TALP", "TB0T", "TC0D", "TC0P", "TG0D", "TG0H", "TTF0", "TW0P",
118 "Th0H", "Th1H", "Th2H", "Tm0P", "Ts0P", NULL },
107}; 119};
108 120
109/* List of keys used to read/write fan speeds */ 121/* List of keys used to read/write fan speeds */
@@ -163,25 +175,25 @@ static unsigned int key_at_index;
163static struct workqueue_struct *applesmc_led_wq; 175static struct workqueue_struct *applesmc_led_wq;
164 176
165/* 177/*
166 * __wait_status - Wait up to 2ms for the status port to get a certain value 178 * __wait_status - Wait up to 32ms for the status port to get a certain value
167 * (masked with 0x0f), returning zero if the value is obtained. Callers must 179 * (masked with 0x0f), returning zero if the value is obtained. Callers must
168 * hold applesmc_lock. 180 * hold applesmc_lock.
169 */ 181 */
170static int __wait_status(u8 val) 182static int __wait_status(u8 val)
171{ 183{
172 unsigned int i; 184 int us;
173 185
174 val = val & APPLESMC_STATUS_MASK; 186 val = val & APPLESMC_STATUS_MASK;
175 187
176 for (i = 0; i < 200; i++) { 188 for (us = APPLESMC_MIN_WAIT; us < APPLESMC_MAX_WAIT; us <<= 1) {
189 udelay(us);
177 if ((inb(APPLESMC_CMD_PORT) & APPLESMC_STATUS_MASK) == val) { 190 if ((inb(APPLESMC_CMD_PORT) & APPLESMC_STATUS_MASK) == val) {
178 if (debug) 191 if (debug)
179 printk(KERN_DEBUG 192 printk(KERN_DEBUG
180 "Waited %d us for status %x\n", 193 "Waited %d us for status %x\n",
181 i*10, val); 194 2 * us - APPLESMC_MIN_WAIT, val);
182 return 0; 195 return 0;
183 } 196 }
184 udelay(10);
185 } 197 }
186 198
187 printk(KERN_WARNING "applesmc: wait status failed: %x != %x\n", 199 printk(KERN_WARNING "applesmc: wait status failed: %x != %x\n",
@@ -191,6 +203,25 @@ static int __wait_status(u8 val)
191} 203}
192 204
193/* 205/*
206 * special treatment of command port - on newer macbooks, it seems necessary
207 * to resend the command byte before polling the status again. Callers must
208 * hold applesmc_lock.
209 */
210static int send_command(u8 cmd)
211{
212 int us;
213 for (us = APPLESMC_MIN_WAIT; us < APPLESMC_MAX_WAIT; us <<= 1) {
214 outb(cmd, APPLESMC_CMD_PORT);
215 udelay(us);
216 if ((inb(APPLESMC_CMD_PORT) & APPLESMC_STATUS_MASK) == 0x0c)
217 return 0;
218 }
219 printk(KERN_WARNING "applesmc: command failed: %x -> %x\n",
220 cmd, inb(APPLESMC_CMD_PORT));
221 return -EIO;
222}
223
224/*
194 * applesmc_read_key - reads len bytes from a given key, and put them in buffer. 225 * applesmc_read_key - reads len bytes from a given key, and put them in buffer.
195 * Returns zero on success or a negative error on failure. Callers must 226 * Returns zero on success or a negative error on failure. Callers must
196 * hold applesmc_lock. 227 * hold applesmc_lock.
@@ -205,8 +236,7 @@ static int applesmc_read_key(const char* key, u8* buffer, u8 len)
205 return -EINVAL; 236 return -EINVAL;
206 } 237 }
207 238
208 outb(APPLESMC_READ_CMD, APPLESMC_CMD_PORT); 239 if (send_command(APPLESMC_READ_CMD))
209 if (__wait_status(0x0c))
210 return -EIO; 240 return -EIO;
211 241
212 for (i = 0; i < 4; i++) { 242 for (i = 0; i < 4; i++) {
@@ -249,8 +279,7 @@ static int applesmc_write_key(const char* key, u8* buffer, u8 len)
249 return -EINVAL; 279 return -EINVAL;
250 } 280 }
251 281
252 outb(APPLESMC_WRITE_CMD, APPLESMC_CMD_PORT); 282 if (send_command(APPLESMC_WRITE_CMD))
253 if (__wait_status(0x0c))
254 return -EIO; 283 return -EIO;
255 284
256 for (i = 0; i < 4; i++) { 285 for (i = 0; i < 4; i++) {
@@ -284,8 +313,7 @@ static int applesmc_get_key_at_index(int index, char* key)
284 readkey[2] = index >> 8; 313 readkey[2] = index >> 8;
285 readkey[3] = index; 314 readkey[3] = index;
286 315
287 outb(APPLESMC_GET_KEY_BY_INDEX_CMD, APPLESMC_CMD_PORT); 316 if (send_command(APPLESMC_GET_KEY_BY_INDEX_CMD))
288 if (__wait_status(0x0c))
289 return -EIO; 317 return -EIO;
290 318
291 for (i = 0; i < 4; i++) { 319 for (i = 0; i < 4; i++) {
@@ -315,8 +343,7 @@ static int applesmc_get_key_type(char* key, char* type)
315{ 343{
316 int i; 344 int i;
317 345
318 outb(APPLESMC_GET_KEY_TYPE_CMD, APPLESMC_CMD_PORT); 346 if (send_command(APPLESMC_GET_KEY_TYPE_CMD))
319 if (__wait_status(0x0c))
320 return -EIO; 347 return -EIO;
321 348
322 for (i = 0; i < 4; i++) { 349 for (i = 0; i < 4; i++) {
@@ -325,7 +352,7 @@ static int applesmc_get_key_type(char* key, char* type)
325 return -EIO; 352 return -EIO;
326 } 353 }
327 354
328 outb(5, APPLESMC_DATA_PORT); 355 outb(6, APPLESMC_DATA_PORT);
329 356
330 for (i = 0; i < 6; i++) { 357 for (i = 0; i < 6; i++) {
331 if (__wait_status(0x05)) 358 if (__wait_status(0x05))
@@ -527,17 +554,27 @@ out:
527static ssize_t applesmc_light_show(struct device *dev, 554static ssize_t applesmc_light_show(struct device *dev,
528 struct device_attribute *attr, char *sysfsbuf) 555 struct device_attribute *attr, char *sysfsbuf)
529{ 556{
557 static int data_length;
530 int ret; 558 int ret;
531 u8 left = 0, right = 0; 559 u8 left = 0, right = 0;
532 u8 buffer[6]; 560 u8 buffer[10], query[6];
533 561
534 mutex_lock(&applesmc_lock); 562 mutex_lock(&applesmc_lock);
535 563
536 ret = applesmc_read_key(LIGHT_SENSOR_LEFT_KEY, buffer, 6); 564 if (!data_length) {
565 ret = applesmc_get_key_type(LIGHT_SENSOR_LEFT_KEY, query);
566 if (ret)
567 goto out;
568 data_length = clamp_val(query[0], 0, 10);
569 printk(KERN_INFO "applesmc: light sensor data length set to "
570 "%d\n", data_length);
571 }
572
573 ret = applesmc_read_key(LIGHT_SENSOR_LEFT_KEY, buffer, data_length);
537 left = buffer[2]; 574 left = buffer[2];
538 if (ret) 575 if (ret)
539 goto out; 576 goto out;
540 ret = applesmc_read_key(LIGHT_SENSOR_RIGHT_KEY, buffer, 6); 577 ret = applesmc_read_key(LIGHT_SENSOR_RIGHT_KEY, buffer, data_length);
541 right = buffer[2]; 578 right = buffer[2];
542 579
543out: 580out:
@@ -1233,39 +1270,57 @@ static __initdata struct dmi_match_data applesmc_dmi_data[] = {
1233 { .accelerometer = 0, .light = 0, .temperature_set = 5 }, 1270 { .accelerometer = 0, .light = 0, .temperature_set = 5 },
1234/* MacBook3: accelerometer and temperature set 6 */ 1271/* MacBook3: accelerometer and temperature set 6 */
1235 { .accelerometer = 1, .light = 0, .temperature_set = 6 }, 1272 { .accelerometer = 1, .light = 0, .temperature_set = 6 },
1273/* MacBook Air: accelerometer, backlight and temperature set 7 */
1274 { .accelerometer = 1, .light = 1, .temperature_set = 7 },
1275/* MacBook Pro 4: accelerometer, backlight and temperature set 8 */
1276 { .accelerometer = 1, .light = 1, .temperature_set = 8 },
1277/* MacBook Pro 3: accelerometer, backlight and temperature set 9 */
1278 { .accelerometer = 1, .light = 1, .temperature_set = 9 },
1236}; 1279};
1237 1280
1238/* Note that DMI_MATCH(...,"MacBook") will match "MacBookPro1,1". 1281/* Note that DMI_MATCH(...,"MacBook") will match "MacBookPro1,1".
1239 * So we need to put "Apple MacBook Pro" before "Apple MacBook". */ 1282 * So we need to put "Apple MacBook Pro" before "Apple MacBook". */
1240static __initdata struct dmi_system_id applesmc_whitelist[] = { 1283static __initdata struct dmi_system_id applesmc_whitelist[] = {
1284 { applesmc_dmi_match, "Apple MacBook Air", {
1285 DMI_MATCH(DMI_BOARD_VENDOR, "Apple"),
1286 DMI_MATCH(DMI_PRODUCT_NAME, "MacBookAir") },
1287 &applesmc_dmi_data[7]},
1288 { applesmc_dmi_match, "Apple MacBook Pro 4", {
1289 DMI_MATCH(DMI_BOARD_VENDOR, "Apple"),
1290 DMI_MATCH(DMI_PRODUCT_NAME, "MacBookPro4") },
1291 &applesmc_dmi_data[8]},
1292 { applesmc_dmi_match, "Apple MacBook Pro 3", {
1293 DMI_MATCH(DMI_BOARD_VENDOR, "Apple"),
1294 DMI_MATCH(DMI_PRODUCT_NAME, "MacBookPro3") },
1295 &applesmc_dmi_data[9]},
1241 { applesmc_dmi_match, "Apple MacBook Pro", { 1296 { applesmc_dmi_match, "Apple MacBook Pro", {
1242 DMI_MATCH(DMI_BOARD_VENDOR,"Apple"), 1297 DMI_MATCH(DMI_BOARD_VENDOR,"Apple"),
1243 DMI_MATCH(DMI_PRODUCT_NAME,"MacBookPro") }, 1298 DMI_MATCH(DMI_PRODUCT_NAME,"MacBookPro") },
1244 (void*)&applesmc_dmi_data[0]}, 1299 &applesmc_dmi_data[0]},
1245 { applesmc_dmi_match, "Apple MacBook (v2)", { 1300 { applesmc_dmi_match, "Apple MacBook (v2)", {
1246 DMI_MATCH(DMI_BOARD_VENDOR,"Apple"), 1301 DMI_MATCH(DMI_BOARD_VENDOR,"Apple"),
1247 DMI_MATCH(DMI_PRODUCT_NAME,"MacBook2") }, 1302 DMI_MATCH(DMI_PRODUCT_NAME,"MacBook2") },
1248 (void*)&applesmc_dmi_data[1]}, 1303 &applesmc_dmi_data[1]},
1249 { applesmc_dmi_match, "Apple MacBook (v3)", { 1304 { applesmc_dmi_match, "Apple MacBook (v3)", {
1250 DMI_MATCH(DMI_BOARD_VENDOR,"Apple"), 1305 DMI_MATCH(DMI_BOARD_VENDOR,"Apple"),
1251 DMI_MATCH(DMI_PRODUCT_NAME,"MacBook3") }, 1306 DMI_MATCH(DMI_PRODUCT_NAME,"MacBook3") },
1252 (void*)&applesmc_dmi_data[6]}, 1307 &applesmc_dmi_data[6]},
1253 { applesmc_dmi_match, "Apple MacBook", { 1308 { applesmc_dmi_match, "Apple MacBook", {
1254 DMI_MATCH(DMI_BOARD_VENDOR,"Apple"), 1309 DMI_MATCH(DMI_BOARD_VENDOR,"Apple"),
1255 DMI_MATCH(DMI_PRODUCT_NAME,"MacBook") }, 1310 DMI_MATCH(DMI_PRODUCT_NAME,"MacBook") },
1256 (void*)&applesmc_dmi_data[2]}, 1311 &applesmc_dmi_data[2]},
1257 { applesmc_dmi_match, "Apple Macmini", { 1312 { applesmc_dmi_match, "Apple Macmini", {
1258 DMI_MATCH(DMI_BOARD_VENDOR,"Apple"), 1313 DMI_MATCH(DMI_BOARD_VENDOR,"Apple"),
1259 DMI_MATCH(DMI_PRODUCT_NAME,"Macmini") }, 1314 DMI_MATCH(DMI_PRODUCT_NAME,"Macmini") },
1260 (void*)&applesmc_dmi_data[3]}, 1315 &applesmc_dmi_data[3]},
1261 { applesmc_dmi_match, "Apple MacPro2", { 1316 { applesmc_dmi_match, "Apple MacPro2", {
1262 DMI_MATCH(DMI_BOARD_VENDOR,"Apple"), 1317 DMI_MATCH(DMI_BOARD_VENDOR,"Apple"),
1263 DMI_MATCH(DMI_PRODUCT_NAME,"MacPro2") }, 1318 DMI_MATCH(DMI_PRODUCT_NAME,"MacPro2") },
1264 (void*)&applesmc_dmi_data[4]}, 1319 &applesmc_dmi_data[4]},
1265 { applesmc_dmi_match, "Apple iMac", { 1320 { applesmc_dmi_match, "Apple iMac", {
1266 DMI_MATCH(DMI_BOARD_VENDOR,"Apple"), 1321 DMI_MATCH(DMI_BOARD_VENDOR,"Apple"),
1267 DMI_MATCH(DMI_PRODUCT_NAME,"iMac") }, 1322 DMI_MATCH(DMI_PRODUCT_NAME,"iMac") },
1268 (void*)&applesmc_dmi_data[5]}, 1323 &applesmc_dmi_data[5]},
1269 { .ident = NULL } 1324 { .ident = NULL }
1270}; 1325};
1271 1326
diff --git a/drivers/hwmon/pc87360.c b/drivers/hwmon/pc87360.c
index 9b462bb13fa3..5fbfa34c110e 100644
--- a/drivers/hwmon/pc87360.c
+++ b/drivers/hwmon/pc87360.c
@@ -75,7 +75,8 @@ MODULE_PARM_DESC(force_id, "Override the detected device ID");
75#define FSCM 0x09 /* Logical device: fans */ 75#define FSCM 0x09 /* Logical device: fans */
76#define VLM 0x0d /* Logical device: voltages */ 76#define VLM 0x0d /* Logical device: voltages */
77#define TMS 0x0e /* Logical device: temperatures */ 77#define TMS 0x0e /* Logical device: temperatures */
78static const u8 logdev[3] = { FSCM, VLM, TMS }; 78#define LDNI_MAX 3
79static const u8 logdev[LDNI_MAX] = { FSCM, VLM, TMS };
79 80
80#define LD_FAN 0 81#define LD_FAN 0
81#define LD_IN 1 82#define LD_IN 1
@@ -489,11 +490,66 @@ static struct sensor_device_attribute in_max[] = {
489 SENSOR_ATTR(in10_max, S_IWUSR | S_IRUGO, show_in_max, set_in_max, 10), 490 SENSOR_ATTR(in10_max, S_IWUSR | S_IRUGO, show_in_max, set_in_max, 10),
490}; 491};
491 492
493/* (temp & vin) channel status register alarm bits (pdf sec.11.5.12) */
494#define CHAN_ALM_MIN 0x02 /* min limit crossed */
495#define CHAN_ALM_MAX 0x04 /* max limit exceeded */
496#define TEMP_ALM_CRIT 0x08 /* temp crit exceeded (temp only) */
497
498/* show_in_min/max_alarm() reads data from the per-channel status
499 register (sec 11.5.12), not the vin event status registers (sec
500 11.5.2) that (legacy) show_in_alarm() resds (via data->in_alarms) */
501
502static ssize_t show_in_min_alarm(struct device *dev,
503 struct device_attribute *devattr, char *buf)
504{
505 struct pc87360_data *data = pc87360_update_device(dev);
506 unsigned nr = to_sensor_dev_attr(devattr)->index;
507
508 return sprintf(buf, "%u\n", !!(data->in_status[nr] & CHAN_ALM_MIN));
509}
510static ssize_t show_in_max_alarm(struct device *dev,
511 struct device_attribute *devattr, char *buf)
512{
513 struct pc87360_data *data = pc87360_update_device(dev);
514 unsigned nr = to_sensor_dev_attr(devattr)->index;
515
516 return sprintf(buf, "%u\n", !!(data->in_status[nr] & CHAN_ALM_MAX));
517}
518
519static struct sensor_device_attribute in_min_alarm[] = {
520 SENSOR_ATTR(in0_min_alarm, S_IRUGO, show_in_min_alarm, NULL, 0),
521 SENSOR_ATTR(in1_min_alarm, S_IRUGO, show_in_min_alarm, NULL, 1),
522 SENSOR_ATTR(in2_min_alarm, S_IRUGO, show_in_min_alarm, NULL, 2),
523 SENSOR_ATTR(in3_min_alarm, S_IRUGO, show_in_min_alarm, NULL, 3),
524 SENSOR_ATTR(in4_min_alarm, S_IRUGO, show_in_min_alarm, NULL, 4),
525 SENSOR_ATTR(in5_min_alarm, S_IRUGO, show_in_min_alarm, NULL, 5),
526 SENSOR_ATTR(in6_min_alarm, S_IRUGO, show_in_min_alarm, NULL, 6),
527 SENSOR_ATTR(in7_min_alarm, S_IRUGO, show_in_min_alarm, NULL, 7),
528 SENSOR_ATTR(in8_min_alarm, S_IRUGO, show_in_min_alarm, NULL, 8),
529 SENSOR_ATTR(in9_min_alarm, S_IRUGO, show_in_min_alarm, NULL, 9),
530 SENSOR_ATTR(in10_min_alarm, S_IRUGO, show_in_min_alarm, NULL, 10),
531};
532static struct sensor_device_attribute in_max_alarm[] = {
533 SENSOR_ATTR(in0_max_alarm, S_IRUGO, show_in_max_alarm, NULL, 0),
534 SENSOR_ATTR(in1_max_alarm, S_IRUGO, show_in_max_alarm, NULL, 1),
535 SENSOR_ATTR(in2_max_alarm, S_IRUGO, show_in_max_alarm, NULL, 2),
536 SENSOR_ATTR(in3_max_alarm, S_IRUGO, show_in_max_alarm, NULL, 3),
537 SENSOR_ATTR(in4_max_alarm, S_IRUGO, show_in_max_alarm, NULL, 4),
538 SENSOR_ATTR(in5_max_alarm, S_IRUGO, show_in_max_alarm, NULL, 5),
539 SENSOR_ATTR(in6_max_alarm, S_IRUGO, show_in_max_alarm, NULL, 6),
540 SENSOR_ATTR(in7_max_alarm, S_IRUGO, show_in_max_alarm, NULL, 7),
541 SENSOR_ATTR(in8_max_alarm, S_IRUGO, show_in_max_alarm, NULL, 8),
542 SENSOR_ATTR(in9_max_alarm, S_IRUGO, show_in_max_alarm, NULL, 9),
543 SENSOR_ATTR(in10_max_alarm, S_IRUGO, show_in_max_alarm, NULL, 10),
544};
545
492#define VIN_UNIT_ATTRS(X) \ 546#define VIN_UNIT_ATTRS(X) \
493 &in_input[X].dev_attr.attr, \ 547 &in_input[X].dev_attr.attr, \
494 &in_status[X].dev_attr.attr, \ 548 &in_status[X].dev_attr.attr, \
495 &in_min[X].dev_attr.attr, \ 549 &in_min[X].dev_attr.attr, \
496 &in_max[X].dev_attr.attr 550 &in_max[X].dev_attr.attr, \
551 &in_min_alarm[X].dev_attr.attr, \
552 &in_max_alarm[X].dev_attr.attr
497 553
498static ssize_t show_vid(struct device *dev, struct device_attribute *attr, char *buf) 554static ssize_t show_vid(struct device *dev, struct device_attribute *attr, char *buf)
499{ 555{
@@ -658,12 +714,68 @@ static struct sensor_device_attribute therm_crit[] = {
658 show_therm_crit, set_therm_crit, 2+11), 714 show_therm_crit, set_therm_crit, 2+11),
659}; 715};
660 716
717/* show_therm_min/max_alarm() reads data from the per-channel voltage
718 status register (sec 11.5.12) */
719
720static ssize_t show_therm_min_alarm(struct device *dev,
721 struct device_attribute *devattr, char *buf)
722{
723 struct pc87360_data *data = pc87360_update_device(dev);
724 unsigned nr = to_sensor_dev_attr(devattr)->index;
725
726 return sprintf(buf, "%u\n", !!(data->in_status[nr] & CHAN_ALM_MIN));
727}
728static ssize_t show_therm_max_alarm(struct device *dev,
729 struct device_attribute *devattr, char *buf)
730{
731 struct pc87360_data *data = pc87360_update_device(dev);
732 unsigned nr = to_sensor_dev_attr(devattr)->index;
733
734 return sprintf(buf, "%u\n", !!(data->in_status[nr] & CHAN_ALM_MAX));
735}
736static ssize_t show_therm_crit_alarm(struct device *dev,
737 struct device_attribute *devattr, char *buf)
738{
739 struct pc87360_data *data = pc87360_update_device(dev);
740 unsigned nr = to_sensor_dev_attr(devattr)->index;
741
742 return sprintf(buf, "%u\n", !!(data->in_status[nr] & TEMP_ALM_CRIT));
743}
744
745static struct sensor_device_attribute therm_min_alarm[] = {
746 SENSOR_ATTR(temp4_min_alarm, S_IRUGO,
747 show_therm_min_alarm, NULL, 0+11),
748 SENSOR_ATTR(temp5_min_alarm, S_IRUGO,
749 show_therm_min_alarm, NULL, 1+11),
750 SENSOR_ATTR(temp6_min_alarm, S_IRUGO,
751 show_therm_min_alarm, NULL, 2+11),
752};
753static struct sensor_device_attribute therm_max_alarm[] = {
754 SENSOR_ATTR(temp4_max_alarm, S_IRUGO,
755 show_therm_max_alarm, NULL, 0+11),
756 SENSOR_ATTR(temp5_max_alarm, S_IRUGO,
757 show_therm_max_alarm, NULL, 1+11),
758 SENSOR_ATTR(temp6_max_alarm, S_IRUGO,
759 show_therm_max_alarm, NULL, 2+11),
760};
761static struct sensor_device_attribute therm_crit_alarm[] = {
762 SENSOR_ATTR(temp4_crit_alarm, S_IRUGO,
763 show_therm_crit_alarm, NULL, 0+11),
764 SENSOR_ATTR(temp5_crit_alarm, S_IRUGO,
765 show_therm_crit_alarm, NULL, 1+11),
766 SENSOR_ATTR(temp6_crit_alarm, S_IRUGO,
767 show_therm_crit_alarm, NULL, 2+11),
768};
769
661#define THERM_UNIT_ATTRS(X) \ 770#define THERM_UNIT_ATTRS(X) \
662 &therm_input[X].dev_attr.attr, \ 771 &therm_input[X].dev_attr.attr, \
663 &therm_status[X].dev_attr.attr, \ 772 &therm_status[X].dev_attr.attr, \
664 &therm_min[X].dev_attr.attr, \ 773 &therm_min[X].dev_attr.attr, \
665 &therm_max[X].dev_attr.attr, \ 774 &therm_max[X].dev_attr.attr, \
666 &therm_crit[X].dev_attr.attr 775 &therm_crit[X].dev_attr.attr, \
776 &therm_min_alarm[X].dev_attr.attr, \
777 &therm_max_alarm[X].dev_attr.attr, \
778 &therm_crit_alarm[X].dev_attr.attr
667 779
668static struct attribute * pc8736x_therm_attr_array[] = { 780static struct attribute * pc8736x_therm_attr_array[] = {
669 THERM_UNIT_ATTRS(0), 781 THERM_UNIT_ATTRS(0),
@@ -790,12 +902,76 @@ static ssize_t show_temp_alarms(struct device *dev, struct device_attribute *att
790} 902}
791static DEVICE_ATTR(alarms_temp, S_IRUGO, show_temp_alarms, NULL); 903static DEVICE_ATTR(alarms_temp, S_IRUGO, show_temp_alarms, NULL);
792 904
905/* show_temp_min/max_alarm() reads data from the per-channel status
906 register (sec 12.3.7), not the temp event status registers (sec
907 12.3.2) that show_temp_alarm() reads (via data->temp_alarms) */
908
909static ssize_t show_temp_min_alarm(struct device *dev,
910 struct device_attribute *devattr, char *buf)
911{
912 struct pc87360_data *data = pc87360_update_device(dev);
913 unsigned nr = to_sensor_dev_attr(devattr)->index;
914
915 return sprintf(buf, "%u\n", !!(data->temp_status[nr] & CHAN_ALM_MIN));
916}
917static ssize_t show_temp_max_alarm(struct device *dev,
918 struct device_attribute *devattr, char *buf)
919{
920 struct pc87360_data *data = pc87360_update_device(dev);
921 unsigned nr = to_sensor_dev_attr(devattr)->index;
922
923 return sprintf(buf, "%u\n", !!(data->temp_status[nr] & CHAN_ALM_MAX));
924}
925static ssize_t show_temp_crit_alarm(struct device *dev,
926 struct device_attribute *devattr, char *buf)
927{
928 struct pc87360_data *data = pc87360_update_device(dev);
929 unsigned nr = to_sensor_dev_attr(devattr)->index;
930
931 return sprintf(buf, "%u\n", !!(data->temp_status[nr] & TEMP_ALM_CRIT));
932}
933
934static struct sensor_device_attribute temp_min_alarm[] = {
935 SENSOR_ATTR(temp1_min_alarm, S_IRUGO, show_temp_min_alarm, NULL, 0),
936 SENSOR_ATTR(temp2_min_alarm, S_IRUGO, show_temp_min_alarm, NULL, 1),
937 SENSOR_ATTR(temp3_min_alarm, S_IRUGO, show_temp_min_alarm, NULL, 2),
938};
939static struct sensor_device_attribute temp_max_alarm[] = {
940 SENSOR_ATTR(temp1_max_alarm, S_IRUGO, show_temp_max_alarm, NULL, 0),
941 SENSOR_ATTR(temp2_max_alarm, S_IRUGO, show_temp_max_alarm, NULL, 1),
942 SENSOR_ATTR(temp3_max_alarm, S_IRUGO, show_temp_max_alarm, NULL, 2),
943};
944static struct sensor_device_attribute temp_crit_alarm[] = {
945 SENSOR_ATTR(temp1_crit_alarm, S_IRUGO, show_temp_crit_alarm, NULL, 0),
946 SENSOR_ATTR(temp2_crit_alarm, S_IRUGO, show_temp_crit_alarm, NULL, 1),
947 SENSOR_ATTR(temp3_crit_alarm, S_IRUGO, show_temp_crit_alarm, NULL, 2),
948};
949
950#define TEMP_FAULT 0x40 /* open diode */
951static ssize_t show_temp_fault(struct device *dev,
952 struct device_attribute *devattr, char *buf)
953{
954 struct pc87360_data *data = pc87360_update_device(dev);
955 unsigned nr = to_sensor_dev_attr(devattr)->index;
956
957 return sprintf(buf, "%u\n", !!(data->temp_status[nr] & TEMP_FAULT));
958}
959static struct sensor_device_attribute temp_fault[] = {
960 SENSOR_ATTR(temp1_fault, S_IRUGO, show_temp_fault, NULL, 0),
961 SENSOR_ATTR(temp2_fault, S_IRUGO, show_temp_fault, NULL, 1),
962 SENSOR_ATTR(temp3_fault, S_IRUGO, show_temp_fault, NULL, 2),
963};
964
793#define TEMP_UNIT_ATTRS(X) \ 965#define TEMP_UNIT_ATTRS(X) \
794 &temp_input[X].dev_attr.attr, \ 966 &temp_input[X].dev_attr.attr, \
795 &temp_status[X].dev_attr.attr, \ 967 &temp_status[X].dev_attr.attr, \
796 &temp_min[X].dev_attr.attr, \ 968 &temp_min[X].dev_attr.attr, \
797 &temp_max[X].dev_attr.attr, \ 969 &temp_max[X].dev_attr.attr, \
798 &temp_crit[X].dev_attr.attr 970 &temp_crit[X].dev_attr.attr, \
971 &temp_min_alarm[X].dev_attr.attr, \
972 &temp_max_alarm[X].dev_attr.attr, \
973 &temp_crit_alarm[X].dev_attr.attr, \
974 &temp_fault[X].dev_attr.attr
799 975
800static struct attribute * pc8736x_temp_attr_array[] = { 976static struct attribute * pc8736x_temp_attr_array[] = {
801 TEMP_UNIT_ATTRS(0), 977 TEMP_UNIT_ATTRS(0),
@@ -809,8 +985,8 @@ static const struct attribute_group pc8736x_temp_group = {
809 .attrs = pc8736x_temp_attr_array, 985 .attrs = pc8736x_temp_attr_array,
810}; 986};
811 987
812static ssize_t show_name(struct device *dev, struct device_attribute 988static ssize_t show_name(struct device *dev,
813 *devattr, char *buf) 989 struct device_attribute *devattr, char *buf)
814{ 990{
815 struct pc87360_data *data = dev_get_drvdata(dev); 991 struct pc87360_data *data = dev_get_drvdata(dev);
816 return sprintf(buf, "%s\n", data->name); 992 return sprintf(buf, "%s\n", data->name);
@@ -955,7 +1131,7 @@ static int __devinit pc87360_probe(struct platform_device *pdev)
955 mutex_init(&data->update_lock); 1131 mutex_init(&data->update_lock);
956 platform_set_drvdata(pdev, data); 1132 platform_set_drvdata(pdev, data);
957 1133
958 for (i = 0; i < 3; i++) { 1134 for (i = 0; i < LDNI_MAX; i++) {
959 if (((data->address[i] = extra_isa[i])) 1135 if (((data->address[i] = extra_isa[i]))
960 && !request_region(extra_isa[i], PC87360_EXTENT, 1136 && !request_region(extra_isa[i], PC87360_EXTENT,
961 pc87360_driver.driver.name)) { 1137 pc87360_driver.driver.name)) {
@@ -1031,7 +1207,15 @@ static int __devinit pc87360_probe(struct platform_device *pdev)
1031 || (err = device_create_file(dev, 1207 || (err = device_create_file(dev,
1032 &temp_crit[i].dev_attr)) 1208 &temp_crit[i].dev_attr))
1033 || (err = device_create_file(dev, 1209 || (err = device_create_file(dev,
1034 &temp_status[i].dev_attr))) 1210 &temp_status[i].dev_attr))
1211 || (err = device_create_file(dev,
1212 &temp_min_alarm[i].dev_attr))
1213 || (err = device_create_file(dev,
1214 &temp_max_alarm[i].dev_attr))
1215 || (err = device_create_file(dev,
1216 &temp_crit_alarm[i].dev_attr))
1217 || (err = device_create_file(dev,
1218 &temp_fault[i].dev_attr)))
1035 goto ERROR3; 1219 goto ERROR3;
1036 } 1220 }
1037 if ((err = device_create_file(dev, &dev_attr_alarms_temp))) 1221 if ((err = device_create_file(dev, &dev_attr_alarms_temp)))
@@ -1131,6 +1315,16 @@ static void pc87360_write_value(struct pc87360_data *data, u8 ldi, u8 bank,
1131 mutex_unlock(&(data->lock)); 1315 mutex_unlock(&(data->lock));
1132} 1316}
1133 1317
1318/* (temp & vin) channel conversion status register flags (pdf sec.11.5.12) */
1319#define CHAN_CNVRTD 0x80 /* new data ready */
1320#define CHAN_ENA 0x01 /* enabled channel (temp or vin) */
1321#define CHAN_ALM_ENA 0x10 /* propagate to alarms-reg ?? (chk val!) */
1322#define CHAN_READY (CHAN_ENA|CHAN_CNVRTD) /* sample ready mask */
1323
1324#define TEMP_OTS_OE 0x20 /* OTS Output Enable */
1325#define VIN_RW1C_MASK (CHAN_READY|CHAN_ALM_MAX|CHAN_ALM_MIN) /* 0x87 */
1326#define TEMP_RW1C_MASK (VIN_RW1C_MASK|TEMP_ALM_CRIT|TEMP_FAULT) /* 0xCF */
1327
1134static void pc87360_init_device(struct platform_device *pdev, 1328static void pc87360_init_device(struct platform_device *pdev,
1135 int use_thermistors) 1329 int use_thermistors)
1136{ 1330{
@@ -1152,11 +1346,12 @@ static void pc87360_init_device(struct platform_device *pdev,
1152 1346
1153 nr = data->innr < 11 ? data->innr : 11; 1347 nr = data->innr < 11 ? data->innr : 11;
1154 for (i = 0; i < nr; i++) { 1348 for (i = 0; i < nr; i++) {
1349 reg = pc87360_read_value(data, LD_IN, i,
1350 PC87365_REG_IN_STATUS);
1351 dev_dbg(&pdev->dev, "bios in%d status:0x%02x\n", i, reg);
1155 if (init >= init_in[i]) { 1352 if (init >= init_in[i]) {
1156 /* Forcibly enable voltage channel */ 1353 /* Forcibly enable voltage channel */
1157 reg = pc87360_read_value(data, LD_IN, i, 1354 if (!(reg & CHAN_ENA)) {
1158 PC87365_REG_IN_STATUS);
1159 if (!(reg & 0x01)) {
1160 dev_dbg(&pdev->dev, "Forcibly " 1355 dev_dbg(&pdev->dev, "Forcibly "
1161 "enabling in%d\n", i); 1356 "enabling in%d\n", i);
1162 pc87360_write_value(data, LD_IN, i, 1357 pc87360_write_value(data, LD_IN, i,
@@ -1168,19 +1363,24 @@ static void pc87360_init_device(struct platform_device *pdev,
1168 1363
1169 /* We can't blindly trust the Super-I/O space configuration bit, 1364 /* We can't blindly trust the Super-I/O space configuration bit,
1170 most BIOS won't set it properly */ 1365 most BIOS won't set it properly */
1366 dev_dbg(&pdev->dev, "bios thermistors:%d\n", use_thermistors);
1171 for (i = 11; i < data->innr; i++) { 1367 for (i = 11; i < data->innr; i++) {
1172 reg = pc87360_read_value(data, LD_IN, i, 1368 reg = pc87360_read_value(data, LD_IN, i,
1173 PC87365_REG_TEMP_STATUS); 1369 PC87365_REG_TEMP_STATUS);
1174 use_thermistors = use_thermistors || (reg & 0x01); 1370 use_thermistors = use_thermistors || (reg & CHAN_ENA);
1371 /* thermistors are temp[4-6], measured on vin[11-14] */
1372 dev_dbg(&pdev->dev, "bios temp%d_status:0x%02x\n", i-7, reg);
1175 } 1373 }
1374 dev_dbg(&pdev->dev, "using thermistors:%d\n", use_thermistors);
1176 1375
1177 i = use_thermistors ? 2 : 0; 1376 i = use_thermistors ? 2 : 0;
1178 for (; i < data->tempnr; i++) { 1377 for (; i < data->tempnr; i++) {
1378 reg = pc87360_read_value(data, LD_TEMP, i,
1379 PC87365_REG_TEMP_STATUS);
1380 dev_dbg(&pdev->dev, "bios temp%d_status:0x%02x\n", i+1, reg);
1179 if (init >= init_temp[i]) { 1381 if (init >= init_temp[i]) {
1180 /* Forcibly enable temperature channel */ 1382 /* Forcibly enable temperature channel */
1181 reg = pc87360_read_value(data, LD_TEMP, i, 1383 if (!(reg & CHAN_ENA)) {
1182 PC87365_REG_TEMP_STATUS);
1183 if (!(reg & 0x01)) {
1184 dev_dbg(&pdev->dev, "Forcibly " 1384 dev_dbg(&pdev->dev, "Forcibly "
1185 "enabling temp%d\n", i+1); 1385 "enabling temp%d\n", i+1);
1186 pc87360_write_value(data, LD_TEMP, i, 1386 pc87360_write_value(data, LD_TEMP, i,
@@ -1197,7 +1397,7 @@ static void pc87360_init_device(struct platform_device *pdev,
1197 diodes */ 1397 diodes */
1198 reg = pc87360_read_value(data, LD_TEMP, 1398 reg = pc87360_read_value(data, LD_TEMP,
1199 (i-11)/2, PC87365_REG_TEMP_STATUS); 1399 (i-11)/2, PC87365_REG_TEMP_STATUS);
1200 if (reg & 0x01) { 1400 if (reg & CHAN_ENA) {
1201 dev_dbg(&pdev->dev, "Skipping " 1401 dev_dbg(&pdev->dev, "Skipping "
1202 "temp%d, pin already in use " 1402 "temp%d, pin already in use "
1203 "by temp%d\n", i-7, (i-11)/2); 1403 "by temp%d\n", i-7, (i-11)/2);
@@ -1207,7 +1407,7 @@ static void pc87360_init_device(struct platform_device *pdev,
1207 /* Forcibly enable thermistor channel */ 1407 /* Forcibly enable thermistor channel */
1208 reg = pc87360_read_value(data, LD_IN, i, 1408 reg = pc87360_read_value(data, LD_IN, i,
1209 PC87365_REG_IN_STATUS); 1409 PC87365_REG_IN_STATUS);
1210 if (!(reg & 0x01)) { 1410 if (!(reg & CHAN_ENA)) {
1211 dev_dbg(&pdev->dev, "Forcibly " 1411 dev_dbg(&pdev->dev, "Forcibly "
1212 "enabling temp%d\n", i-7); 1412 "enabling temp%d\n", i-7);
1213 pc87360_write_value(data, LD_IN, i, 1413 pc87360_write_value(data, LD_IN, i,
@@ -1221,7 +1421,8 @@ static void pc87360_init_device(struct platform_device *pdev,
1221 if (data->innr) { 1421 if (data->innr) {
1222 reg = pc87360_read_value(data, LD_IN, NO_BANK, 1422 reg = pc87360_read_value(data, LD_IN, NO_BANK,
1223 PC87365_REG_IN_CONFIG); 1423 PC87365_REG_IN_CONFIG);
1224 if (reg & 0x01) { 1424 dev_dbg(&pdev->dev, "bios vin-cfg:0x%02x\n", reg);
1425 if (reg & CHAN_ENA) {
1225 dev_dbg(&pdev->dev, "Forcibly " 1426 dev_dbg(&pdev->dev, "Forcibly "
1226 "enabling monitoring (VLM)\n"); 1427 "enabling monitoring (VLM)\n");
1227 pc87360_write_value(data, LD_IN, NO_BANK, 1428 pc87360_write_value(data, LD_IN, NO_BANK,
@@ -1233,7 +1434,8 @@ static void pc87360_init_device(struct platform_device *pdev,
1233 if (data->tempnr) { 1434 if (data->tempnr) {
1234 reg = pc87360_read_value(data, LD_TEMP, NO_BANK, 1435 reg = pc87360_read_value(data, LD_TEMP, NO_BANK,
1235 PC87365_REG_TEMP_CONFIG); 1436 PC87365_REG_TEMP_CONFIG);
1236 if (reg & 0x01) { 1437 dev_dbg(&pdev->dev, "bios temp-cfg:0x%02x\n", reg);
1438 if (reg & CHAN_ENA) {
1237 dev_dbg(&pdev->dev, "Forcibly enabling " 1439 dev_dbg(&pdev->dev, "Forcibly enabling "
1238 "monitoring (TMS)\n"); 1440 "monitoring (TMS)\n");
1239 pc87360_write_value(data, LD_TEMP, NO_BANK, 1441 pc87360_write_value(data, LD_TEMP, NO_BANK,
@@ -1336,11 +1538,11 @@ static struct pc87360_data *pc87360_update_device(struct device *dev)
1336 pc87360_write_value(data, LD_IN, i, 1538 pc87360_write_value(data, LD_IN, i,
1337 PC87365_REG_IN_STATUS, 1539 PC87365_REG_IN_STATUS,
1338 data->in_status[i]); 1540 data->in_status[i]);
1339 if ((data->in_status[i] & 0x81) == 0x81) { 1541 if ((data->in_status[i] & CHAN_READY) == CHAN_READY) {
1340 data->in[i] = pc87360_read_value(data, LD_IN, 1542 data->in[i] = pc87360_read_value(data, LD_IN,
1341 i, PC87365_REG_IN); 1543 i, PC87365_REG_IN);
1342 } 1544 }
1343 if (data->in_status[i] & 0x01) { 1545 if (data->in_status[i] & CHAN_ENA) {
1344 data->in_min[i] = pc87360_read_value(data, 1546 data->in_min[i] = pc87360_read_value(data,
1345 LD_IN, i, 1547 LD_IN, i,
1346 PC87365_REG_IN_MIN); 1548 PC87365_REG_IN_MIN);
@@ -1373,12 +1575,12 @@ static struct pc87360_data *pc87360_update_device(struct device *dev)
1373 pc87360_write_value(data, LD_TEMP, i, 1575 pc87360_write_value(data, LD_TEMP, i,
1374 PC87365_REG_TEMP_STATUS, 1576 PC87365_REG_TEMP_STATUS,
1375 data->temp_status[i]); 1577 data->temp_status[i]);
1376 if ((data->temp_status[i] & 0x81) == 0x81) { 1578 if ((data->temp_status[i] & CHAN_READY) == CHAN_READY) {
1377 data->temp[i] = pc87360_read_value(data, 1579 data->temp[i] = pc87360_read_value(data,
1378 LD_TEMP, i, 1580 LD_TEMP, i,
1379 PC87365_REG_TEMP); 1581 PC87365_REG_TEMP);
1380 } 1582 }
1381 if (data->temp_status[i] & 0x01) { 1583 if (data->temp_status[i] & CHAN_ENA) {
1382 data->temp_min[i] = pc87360_read_value(data, 1584 data->temp_min[i] = pc87360_read_value(data,
1383 LD_TEMP, i, 1585 LD_TEMP, i,
1384 PC87365_REG_TEMP_MIN); 1586 PC87365_REG_TEMP_MIN);
diff --git a/drivers/i2c/chips/at24.c b/drivers/i2c/chips/at24.c
index 2a4acb269569..d4775528abc6 100644
--- a/drivers/i2c/chips/at24.c
+++ b/drivers/i2c/chips/at24.c
@@ -460,7 +460,6 @@ static int at24_probe(struct i2c_client *client, const struct i2c_device_id *id)
460 */ 460 */
461 at24->bin.attr.name = "eeprom"; 461 at24->bin.attr.name = "eeprom";
462 at24->bin.attr.mode = chip.flags & AT24_FLAG_IRUGO ? S_IRUGO : S_IRUSR; 462 at24->bin.attr.mode = chip.flags & AT24_FLAG_IRUGO ? S_IRUGO : S_IRUSR;
463 at24->bin.attr.owner = THIS_MODULE;
464 at24->bin.read = at24_bin_read; 463 at24->bin.read = at24_bin_read;
465 at24->bin.size = chip.byte_len; 464 at24->bin.size = chip.byte_len;
466 465
diff --git a/drivers/i2c/chips/ds1682.c b/drivers/i2c/chips/ds1682.c
index 23be4d42cb02..f3ee4a1abb77 100644
--- a/drivers/i2c/chips/ds1682.c
+++ b/drivers/i2c/chips/ds1682.c
@@ -190,7 +190,6 @@ static struct bin_attribute ds1682_eeprom_attr = {
190 .attr = { 190 .attr = {
191 .name = "eeprom", 191 .name = "eeprom",
192 .mode = S_IRUGO | S_IWUSR, 192 .mode = S_IRUGO | S_IWUSR,
193 .owner = THIS_MODULE,
194 }, 193 },
195 .size = DS1682_EEPROM_SIZE, 194 .size = DS1682_EEPROM_SIZE,
196 .read = ds1682_eeprom_read, 195 .read = ds1682_eeprom_read,
diff --git a/drivers/i2c/chips/menelaus.c b/drivers/i2c/chips/menelaus.c
index 176126d3a01d..4b364bae6b3e 100644
--- a/drivers/i2c/chips/menelaus.c
+++ b/drivers/i2c/chips/menelaus.c
@@ -832,52 +832,52 @@ static irqreturn_t menelaus_irq(int irq, void *_menelaus)
832 832
833static void menelaus_to_time(char *regs, struct rtc_time *t) 833static void menelaus_to_time(char *regs, struct rtc_time *t)
834{ 834{
835 t->tm_sec = BCD2BIN(regs[0]); 835 t->tm_sec = bcd2bin(regs[0]);
836 t->tm_min = BCD2BIN(regs[1]); 836 t->tm_min = bcd2bin(regs[1]);
837 if (the_menelaus->rtc_control & RTC_CTRL_MODE12) { 837 if (the_menelaus->rtc_control & RTC_CTRL_MODE12) {
838 t->tm_hour = BCD2BIN(regs[2] & 0x1f) - 1; 838 t->tm_hour = bcd2bin(regs[2] & 0x1f) - 1;
839 if (regs[2] & RTC_HR_PM) 839 if (regs[2] & RTC_HR_PM)
840 t->tm_hour += 12; 840 t->tm_hour += 12;
841 } else 841 } else
842 t->tm_hour = BCD2BIN(regs[2] & 0x3f); 842 t->tm_hour = bcd2bin(regs[2] & 0x3f);
843 t->tm_mday = BCD2BIN(regs[3]); 843 t->tm_mday = bcd2bin(regs[3]);
844 t->tm_mon = BCD2BIN(regs[4]) - 1; 844 t->tm_mon = bcd2bin(regs[4]) - 1;
845 t->tm_year = BCD2BIN(regs[5]) + 100; 845 t->tm_year = bcd2bin(regs[5]) + 100;
846} 846}
847 847
848static int time_to_menelaus(struct rtc_time *t, int regnum) 848static int time_to_menelaus(struct rtc_time *t, int regnum)
849{ 849{
850 int hour, status; 850 int hour, status;
851 851
852 status = menelaus_write_reg(regnum++, BIN2BCD(t->tm_sec)); 852 status = menelaus_write_reg(regnum++, bin2bcd(t->tm_sec));
853 if (status < 0) 853 if (status < 0)
854 goto fail; 854 goto fail;
855 855
856 status = menelaus_write_reg(regnum++, BIN2BCD(t->tm_min)); 856 status = menelaus_write_reg(regnum++, bin2bcd(t->tm_min));
857 if (status < 0) 857 if (status < 0)
858 goto fail; 858 goto fail;
859 859
860 if (the_menelaus->rtc_control & RTC_CTRL_MODE12) { 860 if (the_menelaus->rtc_control & RTC_CTRL_MODE12) {
861 hour = t->tm_hour + 1; 861 hour = t->tm_hour + 1;
862 if (hour > 12) 862 if (hour > 12)
863 hour = RTC_HR_PM | BIN2BCD(hour - 12); 863 hour = RTC_HR_PM | bin2bcd(hour - 12);
864 else 864 else
865 hour = BIN2BCD(hour); 865 hour = bin2bcd(hour);
866 } else 866 } else
867 hour = BIN2BCD(t->tm_hour); 867 hour = bin2bcd(t->tm_hour);
868 status = menelaus_write_reg(regnum++, hour); 868 status = menelaus_write_reg(regnum++, hour);
869 if (status < 0) 869 if (status < 0)
870 goto fail; 870 goto fail;
871 871
872 status = menelaus_write_reg(regnum++, BIN2BCD(t->tm_mday)); 872 status = menelaus_write_reg(regnum++, bin2bcd(t->tm_mday));
873 if (status < 0) 873 if (status < 0)
874 goto fail; 874 goto fail;
875 875
876 status = menelaus_write_reg(regnum++, BIN2BCD(t->tm_mon + 1)); 876 status = menelaus_write_reg(regnum++, bin2bcd(t->tm_mon + 1));
877 if (status < 0) 877 if (status < 0)
878 goto fail; 878 goto fail;
879 879
880 status = menelaus_write_reg(regnum++, BIN2BCD(t->tm_year - 100)); 880 status = menelaus_write_reg(regnum++, bin2bcd(t->tm_year - 100));
881 if (status < 0) 881 if (status < 0)
882 goto fail; 882 goto fail;
883 883
@@ -914,7 +914,7 @@ static int menelaus_read_time(struct device *dev, struct rtc_time *t)
914 } 914 }
915 915
916 menelaus_to_time(regs, t); 916 menelaus_to_time(regs, t);
917 t->tm_wday = BCD2BIN(regs[6]); 917 t->tm_wday = bcd2bin(regs[6]);
918 918
919 return 0; 919 return 0;
920} 920}
@@ -927,7 +927,7 @@ static int menelaus_set_time(struct device *dev, struct rtc_time *t)
927 status = time_to_menelaus(t, MENELAUS_RTC_SEC); 927 status = time_to_menelaus(t, MENELAUS_RTC_SEC);
928 if (status < 0) 928 if (status < 0)
929 return status; 929 return status;
930 status = menelaus_write_reg(MENELAUS_RTC_WKDAY, BIN2BCD(t->tm_wday)); 930 status = menelaus_write_reg(MENELAUS_RTC_WKDAY, bin2bcd(t->tm_wday));
931 if (status < 0) { 931 if (status < 0) {
932 dev_err(&the_menelaus->client->dev, "rtc write reg %02x " 932 dev_err(&the_menelaus->client->dev, "rtc write reg %02x "
933 "err %d\n", MENELAUS_RTC_WKDAY, status); 933 "err %d\n", MENELAUS_RTC_WKDAY, status);
diff --git a/drivers/infiniband/core/cm.c b/drivers/infiniband/core/cm.c
index a78d35aecee3..f1e82a92e61e 100644
--- a/drivers/infiniband/core/cm.c
+++ b/drivers/infiniband/core/cm.c
@@ -122,7 +122,7 @@ struct cm_counter_attribute {
122 122
123#define CM_COUNTER_ATTR(_name, _index) \ 123#define CM_COUNTER_ATTR(_name, _index) \
124struct cm_counter_attribute cm_##_name##_counter_attr = { \ 124struct cm_counter_attribute cm_##_name##_counter_attr = { \
125 .attr = { .name = __stringify(_name), .mode = 0444, .owner = THIS_MODULE }, \ 125 .attr = { .name = __stringify(_name), .mode = 0444 }, \
126 .index = _index \ 126 .index = _index \
127} 127}
128 128
diff --git a/drivers/media/dvb/ttpci/av7110.c b/drivers/media/dvb/ttpci/av7110.c
index c7c770c28988..aa1ff524256e 100644
--- a/drivers/media/dvb/ttpci/av7110.c
+++ b/drivers/media/dvb/ttpci/av7110.c
@@ -36,7 +36,6 @@
36#include <linux/fs.h> 36#include <linux/fs.h>
37#include <linux/timer.h> 37#include <linux/timer.h>
38#include <linux/poll.h> 38#include <linux/poll.h>
39#include <linux/byteorder/swabb.h>
40#include <linux/smp_lock.h> 39#include <linux/smp_lock.h>
41 40
42#include <linux/kernel.h> 41#include <linux/kernel.h>
@@ -52,6 +51,7 @@
52#include <linux/i2c.h> 51#include <linux/i2c.h>
53#include <linux/kthread.h> 52#include <linux/kthread.h>
54#include <asm/unaligned.h> 53#include <asm/unaligned.h>
54#include <asm/byteorder.h>
55 55
56#include <asm/system.h> 56#include <asm/system.h>
57 57
diff --git a/drivers/media/video/cx18/cx18-driver.h b/drivers/media/video/cx18/cx18-driver.h
index fa8be0731a3f..a4b1708fafe7 100644
--- a/drivers/media/video/cx18/cx18-driver.h
+++ b/drivers/media/video/cx18/cx18-driver.h
@@ -41,6 +41,7 @@
41#include <linux/pagemap.h> 41#include <linux/pagemap.h>
42#include <linux/workqueue.h> 42#include <linux/workqueue.h>
43#include <linux/mutex.h> 43#include <linux/mutex.h>
44#include <asm/byteorder.h>
44 45
45#include <linux/dvb/video.h> 46#include <linux/dvb/video.h>
46#include <linux/dvb/audio.h> 47#include <linux/dvb/audio.h>
diff --git a/drivers/media/video/ivtv/ivtv-driver.h b/drivers/media/video/ivtv/ivtv-driver.h
index bc29436e8a3c..3733b2afec5f 100644
--- a/drivers/media/video/ivtv/ivtv-driver.h
+++ b/drivers/media/video/ivtv/ivtv-driver.h
@@ -55,6 +55,7 @@
55#include <linux/mutex.h> 55#include <linux/mutex.h>
56#include <asm/uaccess.h> 56#include <asm/uaccess.h>
57#include <asm/system.h> 57#include <asm/system.h>
58#include <asm/byteorder.h>
58 59
59#include <linux/dvb/video.h> 60#include <linux/dvb/video.h>
60#include <linux/dvb/audio.h> 61#include <linux/dvb/audio.h>
diff --git a/drivers/memstick/core/mspro_block.c b/drivers/memstick/core/mspro_block.c
index 6e291bf8237a..5263913e0c69 100644
--- a/drivers/memstick/core/mspro_block.c
+++ b/drivers/memstick/core/mspro_block.c
@@ -1044,7 +1044,6 @@ static int mspro_block_read_attributes(struct memstick_dev *card)
1044 1044
1045 s_attr->dev_attr.attr.name = s_attr->name; 1045 s_attr->dev_attr.attr.name = s_attr->name;
1046 s_attr->dev_attr.attr.mode = S_IRUGO; 1046 s_attr->dev_attr.attr.mode = S_IRUGO;
1047 s_attr->dev_attr.attr.owner = THIS_MODULE;
1048 s_attr->dev_attr.show = mspro_block_attr_show(s_attr->id); 1047 s_attr->dev_attr.show = mspro_block_attr_show(s_attr->id);
1049 1048
1050 if (!rc) 1049 if (!rc)
diff --git a/drivers/misc/hp-wmi.c b/drivers/misc/hp-wmi.c
index 5dabfb69ee53..4b7c24c519c3 100644
--- a/drivers/misc/hp-wmi.c
+++ b/drivers/misc/hp-wmi.c
@@ -82,6 +82,7 @@ static struct key_entry hp_wmi_keymap[] = {
82 {KE_KEY, 0x03, KEY_BRIGHTNESSDOWN}, 82 {KE_KEY, 0x03, KEY_BRIGHTNESSDOWN},
83 {KE_KEY, 0x20e6, KEY_PROG1}, 83 {KE_KEY, 0x20e6, KEY_PROG1},
84 {KE_KEY, 0x2142, KEY_MEDIA}, 84 {KE_KEY, 0x2142, KEY_MEDIA},
85 {KE_KEY, 0x213b, KEY_INFO},
85 {KE_KEY, 0x231b, KEY_HELP}, 86 {KE_KEY, 0x231b, KEY_HELP},
86 {KE_END, 0} 87 {KE_END, 0}
87}; 88};
diff --git a/drivers/mtd/Kconfig b/drivers/mtd/Kconfig
index 14f11f8b9e5f..a90d50c2c3e5 100644
--- a/drivers/mtd/Kconfig
+++ b/drivers/mtd/Kconfig
@@ -172,6 +172,11 @@ config MTD_CHAR
172 memory chips, and also use ioctl() to obtain information about 172 memory chips, and also use ioctl() to obtain information about
173 the device, or to erase parts of it. 173 the device, or to erase parts of it.
174 174
175config HAVE_MTD_OTP
176 bool
177 help
178 Enable access to OTP regions using MTD_CHAR.
179
175config MTD_BLKDEVS 180config MTD_BLKDEVS
176 tristate "Common interface to block layer for MTD 'translation layers'" 181 tristate "Common interface to block layer for MTD 'translation layers'"
177 depends on BLOCK 182 depends on BLOCK
diff --git a/drivers/mtd/chips/Kconfig b/drivers/mtd/chips/Kconfig
index 479d32b57a1e..9408099eec48 100644
--- a/drivers/mtd/chips/Kconfig
+++ b/drivers/mtd/chips/Kconfig
@@ -6,6 +6,7 @@ menu "RAM/ROM/Flash chip drivers"
6config MTD_CFI 6config MTD_CFI
7 tristate "Detect flash chips by Common Flash Interface (CFI) probe" 7 tristate "Detect flash chips by Common Flash Interface (CFI) probe"
8 select MTD_GEN_PROBE 8 select MTD_GEN_PROBE
9 select MTD_CFI_UTIL
9 help 10 help
10 The Common Flash Interface specification was developed by Intel, 11 The Common Flash Interface specification was developed by Intel,
11 AMD and other flash manufactures that provides a universal method 12 AMD and other flash manufactures that provides a universal method
@@ -154,6 +155,7 @@ config MTD_CFI_I8
154config MTD_OTP 155config MTD_OTP
155 bool "Protection Registers aka one-time programmable (OTP) bits" 156 bool "Protection Registers aka one-time programmable (OTP) bits"
156 depends on MTD_CFI_ADV_OPTIONS 157 depends on MTD_CFI_ADV_OPTIONS
158 select HAVE_MTD_OTP
157 default n 159 default n
158 help 160 help
159 This enables support for reading, writing and locking so called 161 This enables support for reading, writing and locking so called
@@ -187,7 +189,7 @@ config MTD_CFI_INTELEXT
187 StrataFlash and other parts. 189 StrataFlash and other parts.
188 190
189config MTD_CFI_AMDSTD 191config MTD_CFI_AMDSTD
190 tristate "Support for AMD/Fujitsu flash chips" 192 tristate "Support for AMD/Fujitsu/Spansion flash chips"
191 depends on MTD_GEN_PROBE 193 depends on MTD_GEN_PROBE
192 select MTD_CFI_UTIL 194 select MTD_CFI_UTIL
193 help 195 help
diff --git a/drivers/mtd/chips/cfi_cmdset_0001.c b/drivers/mtd/chips/cfi_cmdset_0001.c
index 5f1b472137a0..c93a8be5d5f1 100644
--- a/drivers/mtd/chips/cfi_cmdset_0001.c
+++ b/drivers/mtd/chips/cfi_cmdset_0001.c
@@ -478,6 +478,28 @@ struct mtd_info *cfi_cmdset_0001(struct map_info *map, int primary)
478 else 478 else
479 cfi->chips[i].erase_time = 2000000; 479 cfi->chips[i].erase_time = 2000000;
480 480
481 if (cfi->cfiq->WordWriteTimeoutTyp &&
482 cfi->cfiq->WordWriteTimeoutMax)
483 cfi->chips[i].word_write_time_max =
484 1<<(cfi->cfiq->WordWriteTimeoutTyp +
485 cfi->cfiq->WordWriteTimeoutMax);
486 else
487 cfi->chips[i].word_write_time_max = 50000 * 8;
488
489 if (cfi->cfiq->BufWriteTimeoutTyp &&
490 cfi->cfiq->BufWriteTimeoutMax)
491 cfi->chips[i].buffer_write_time_max =
492 1<<(cfi->cfiq->BufWriteTimeoutTyp +
493 cfi->cfiq->BufWriteTimeoutMax);
494
495 if (cfi->cfiq->BlockEraseTimeoutTyp &&
496 cfi->cfiq->BlockEraseTimeoutMax)
497 cfi->chips[i].erase_time_max =
498 1000<<(cfi->cfiq->BlockEraseTimeoutTyp +
499 cfi->cfiq->BlockEraseTimeoutMax);
500 else
501 cfi->chips[i].erase_time_max = 2000000 * 8;
502
481 cfi->chips[i].ref_point_counter = 0; 503 cfi->chips[i].ref_point_counter = 0;
482 init_waitqueue_head(&(cfi->chips[i].wq)); 504 init_waitqueue_head(&(cfi->chips[i].wq));
483 } 505 }
@@ -703,6 +725,10 @@ static int chip_ready (struct map_info *map, struct flchip *chip, unsigned long
703 struct cfi_pri_intelext *cfip = cfi->cmdset_priv; 725 struct cfi_pri_intelext *cfip = cfi->cmdset_priv;
704 unsigned long timeo = jiffies + HZ; 726 unsigned long timeo = jiffies + HZ;
705 727
728 /* Prevent setting state FL_SYNCING for chip in suspended state. */
729 if (mode == FL_SYNCING && chip->oldstate != FL_READY)
730 goto sleep;
731
706 switch (chip->state) { 732 switch (chip->state) {
707 733
708 case FL_STATUS: 734 case FL_STATUS:
@@ -808,8 +834,9 @@ static int get_chip(struct map_info *map, struct flchip *chip, unsigned long adr
808 DECLARE_WAITQUEUE(wait, current); 834 DECLARE_WAITQUEUE(wait, current);
809 835
810 retry: 836 retry:
811 if (chip->priv && (mode == FL_WRITING || mode == FL_ERASING 837 if (chip->priv &&
812 || mode == FL_OTP_WRITE || mode == FL_SHUTDOWN)) { 838 (mode == FL_WRITING || mode == FL_ERASING || mode == FL_OTP_WRITE
839 || mode == FL_SHUTDOWN) && chip->state != FL_SYNCING) {
813 /* 840 /*
814 * OK. We have possibility for contention on the write/erase 841 * OK. We have possibility for contention on the write/erase
815 * operations which are global to the real chip and not per 842 * operations which are global to the real chip and not per
@@ -859,6 +886,14 @@ static int get_chip(struct map_info *map, struct flchip *chip, unsigned long adr
859 return ret; 886 return ret;
860 } 887 }
861 spin_lock(&shared->lock); 888 spin_lock(&shared->lock);
889
890 /* We should not own chip if it is already
891 * in FL_SYNCING state. Put contender and retry. */
892 if (chip->state == FL_SYNCING) {
893 put_chip(map, contender, contender->start);
894 spin_unlock(contender->mutex);
895 goto retry;
896 }
862 spin_unlock(contender->mutex); 897 spin_unlock(contender->mutex);
863 } 898 }
864 899
@@ -1012,7 +1047,7 @@ static void __xipram xip_enable(struct map_info *map, struct flchip *chip,
1012 1047
1013static int __xipram xip_wait_for_operation( 1048static int __xipram xip_wait_for_operation(
1014 struct map_info *map, struct flchip *chip, 1049 struct map_info *map, struct flchip *chip,
1015 unsigned long adr, unsigned int chip_op_time ) 1050 unsigned long adr, unsigned int chip_op_time_max)
1016{ 1051{
1017 struct cfi_private *cfi = map->fldrv_priv; 1052 struct cfi_private *cfi = map->fldrv_priv;
1018 struct cfi_pri_intelext *cfip = cfi->cmdset_priv; 1053 struct cfi_pri_intelext *cfip = cfi->cmdset_priv;
@@ -1021,7 +1056,7 @@ static int __xipram xip_wait_for_operation(
1021 flstate_t oldstate, newstate; 1056 flstate_t oldstate, newstate;
1022 1057
1023 start = xip_currtime(); 1058 start = xip_currtime();
1024 usec = chip_op_time * 8; 1059 usec = chip_op_time_max;
1025 if (usec == 0) 1060 if (usec == 0)
1026 usec = 500000; 1061 usec = 500000;
1027 done = 0; 1062 done = 0;
@@ -1131,8 +1166,8 @@ static int __xipram xip_wait_for_operation(
1131#define XIP_INVAL_CACHED_RANGE(map, from, size) \ 1166#define XIP_INVAL_CACHED_RANGE(map, from, size) \
1132 INVALIDATE_CACHED_RANGE(map, from, size) 1167 INVALIDATE_CACHED_RANGE(map, from, size)
1133 1168
1134#define INVAL_CACHE_AND_WAIT(map, chip, cmd_adr, inval_adr, inval_len, usec) \ 1169#define INVAL_CACHE_AND_WAIT(map, chip, cmd_adr, inval_adr, inval_len, usec, usec_max) \
1135 xip_wait_for_operation(map, chip, cmd_adr, usec) 1170 xip_wait_for_operation(map, chip, cmd_adr, usec_max)
1136 1171
1137#else 1172#else
1138 1173
@@ -1144,7 +1179,7 @@ static int __xipram xip_wait_for_operation(
1144static int inval_cache_and_wait_for_operation( 1179static int inval_cache_and_wait_for_operation(
1145 struct map_info *map, struct flchip *chip, 1180 struct map_info *map, struct flchip *chip,
1146 unsigned long cmd_adr, unsigned long inval_adr, int inval_len, 1181 unsigned long cmd_adr, unsigned long inval_adr, int inval_len,
1147 unsigned int chip_op_time) 1182 unsigned int chip_op_time, unsigned int chip_op_time_max)
1148{ 1183{
1149 struct cfi_private *cfi = map->fldrv_priv; 1184 struct cfi_private *cfi = map->fldrv_priv;
1150 map_word status, status_OK = CMD(0x80); 1185 map_word status, status_OK = CMD(0x80);
@@ -1156,8 +1191,7 @@ static int inval_cache_and_wait_for_operation(
1156 INVALIDATE_CACHED_RANGE(map, inval_adr, inval_len); 1191 INVALIDATE_CACHED_RANGE(map, inval_adr, inval_len);
1157 spin_lock(chip->mutex); 1192 spin_lock(chip->mutex);
1158 1193
1159 /* set our timeout to 8 times the expected delay */ 1194 timeo = chip_op_time_max;
1160 timeo = chip_op_time * 8;
1161 if (!timeo) 1195 if (!timeo)
1162 timeo = 500000; 1196 timeo = 500000;
1163 reset_timeo = timeo; 1197 reset_timeo = timeo;
@@ -1217,8 +1251,8 @@ static int inval_cache_and_wait_for_operation(
1217 1251
1218#endif 1252#endif
1219 1253
1220#define WAIT_TIMEOUT(map, chip, adr, udelay) \ 1254#define WAIT_TIMEOUT(map, chip, adr, udelay, udelay_max) \
1221 INVAL_CACHE_AND_WAIT(map, chip, adr, 0, 0, udelay); 1255 INVAL_CACHE_AND_WAIT(map, chip, adr, 0, 0, udelay, udelay_max);
1222 1256
1223 1257
1224static int do_point_onechip (struct map_info *map, struct flchip *chip, loff_t adr, size_t len) 1258static int do_point_onechip (struct map_info *map, struct flchip *chip, loff_t adr, size_t len)
@@ -1452,7 +1486,8 @@ static int __xipram do_write_oneword(struct map_info *map, struct flchip *chip,
1452 1486
1453 ret = INVAL_CACHE_AND_WAIT(map, chip, adr, 1487 ret = INVAL_CACHE_AND_WAIT(map, chip, adr,
1454 adr, map_bankwidth(map), 1488 adr, map_bankwidth(map),
1455 chip->word_write_time); 1489 chip->word_write_time,
1490 chip->word_write_time_max);
1456 if (ret) { 1491 if (ret) {
1457 xip_enable(map, chip, adr); 1492 xip_enable(map, chip, adr);
1458 printk(KERN_ERR "%s: word write error (status timeout)\n", map->name); 1493 printk(KERN_ERR "%s: word write error (status timeout)\n", map->name);
@@ -1623,7 +1658,7 @@ static int __xipram do_write_buffer(struct map_info *map, struct flchip *chip,
1623 1658
1624 chip->state = FL_WRITING_TO_BUFFER; 1659 chip->state = FL_WRITING_TO_BUFFER;
1625 map_write(map, write_cmd, cmd_adr); 1660 map_write(map, write_cmd, cmd_adr);
1626 ret = WAIT_TIMEOUT(map, chip, cmd_adr, 0); 1661 ret = WAIT_TIMEOUT(map, chip, cmd_adr, 0, 0);
1627 if (ret) { 1662 if (ret) {
1628 /* Argh. Not ready for write to buffer */ 1663 /* Argh. Not ready for write to buffer */
1629 map_word Xstatus = map_read(map, cmd_adr); 1664 map_word Xstatus = map_read(map, cmd_adr);
@@ -1640,7 +1675,7 @@ static int __xipram do_write_buffer(struct map_info *map, struct flchip *chip,
1640 1675
1641 /* Figure out the number of words to write */ 1676 /* Figure out the number of words to write */
1642 word_gap = (-adr & (map_bankwidth(map)-1)); 1677 word_gap = (-adr & (map_bankwidth(map)-1));
1643 words = (len - word_gap + map_bankwidth(map) - 1) / map_bankwidth(map); 1678 words = DIV_ROUND_UP(len - word_gap, map_bankwidth(map));
1644 if (!word_gap) { 1679 if (!word_gap) {
1645 words--; 1680 words--;
1646 } else { 1681 } else {
@@ -1692,7 +1727,8 @@ static int __xipram do_write_buffer(struct map_info *map, struct flchip *chip,
1692 1727
1693 ret = INVAL_CACHE_AND_WAIT(map, chip, cmd_adr, 1728 ret = INVAL_CACHE_AND_WAIT(map, chip, cmd_adr,
1694 initial_adr, initial_len, 1729 initial_adr, initial_len,
1695 chip->buffer_write_time); 1730 chip->buffer_write_time,
1731 chip->buffer_write_time_max);
1696 if (ret) { 1732 if (ret) {
1697 map_write(map, CMD(0x70), cmd_adr); 1733 map_write(map, CMD(0x70), cmd_adr);
1698 chip->state = FL_STATUS; 1734 chip->state = FL_STATUS;
@@ -1827,7 +1863,8 @@ static int __xipram do_erase_oneblock(struct map_info *map, struct flchip *chip,
1827 1863
1828 ret = INVAL_CACHE_AND_WAIT(map, chip, adr, 1864 ret = INVAL_CACHE_AND_WAIT(map, chip, adr,
1829 adr, len, 1865 adr, len,
1830 chip->erase_time); 1866 chip->erase_time,
1867 chip->erase_time_max);
1831 if (ret) { 1868 if (ret) {
1832 map_write(map, CMD(0x70), adr); 1869 map_write(map, CMD(0x70), adr);
1833 chip->state = FL_STATUS; 1870 chip->state = FL_STATUS;
@@ -2006,7 +2043,7 @@ static int __xipram do_xxlock_oneblock(struct map_info *map, struct flchip *chip
2006 */ 2043 */
2007 udelay = (!extp || !(extp->FeatureSupport & (1 << 5))) ? 1000000/HZ : 0; 2044 udelay = (!extp || !(extp->FeatureSupport & (1 << 5))) ? 1000000/HZ : 0;
2008 2045
2009 ret = WAIT_TIMEOUT(map, chip, adr, udelay); 2046 ret = WAIT_TIMEOUT(map, chip, adr, udelay, udelay * 100);
2010 if (ret) { 2047 if (ret) {
2011 map_write(map, CMD(0x70), adr); 2048 map_write(map, CMD(0x70), adr);
2012 chip->state = FL_STATUS; 2049 chip->state = FL_STATUS;
diff --git a/drivers/mtd/chips/cfi_cmdset_0002.c b/drivers/mtd/chips/cfi_cmdset_0002.c
index a972cc6be436..3e6f5d8609e8 100644
--- a/drivers/mtd/chips/cfi_cmdset_0002.c
+++ b/drivers/mtd/chips/cfi_cmdset_0002.c
@@ -13,6 +13,8 @@
13 * XIP support hooks by Vitaly Wool (based on code for Intel flash 13 * XIP support hooks by Vitaly Wool (based on code for Intel flash
14 * by Nicolas Pitre) 14 * by Nicolas Pitre)
15 * 15 *
16 * 25/09/2008 Christopher Moore: TopBottom fixup for many Macronix with CFI V1.0
17 *
16 * Occasionally maintained by Thayne Harbaugh tharbaugh at lnxi dot com 18 * Occasionally maintained by Thayne Harbaugh tharbaugh at lnxi dot com
17 * 19 *
18 * This code is GPL 20 * This code is GPL
@@ -43,6 +45,7 @@
43 45
44#define MANUFACTURER_AMD 0x0001 46#define MANUFACTURER_AMD 0x0001
45#define MANUFACTURER_ATMEL 0x001F 47#define MANUFACTURER_ATMEL 0x001F
48#define MANUFACTURER_MACRONIX 0x00C2
46#define MANUFACTURER_SST 0x00BF 49#define MANUFACTURER_SST 0x00BF
47#define SST49LF004B 0x0060 50#define SST49LF004B 0x0060
48#define SST49LF040B 0x0050 51#define SST49LF040B 0x0050
@@ -144,12 +147,44 @@ static void fixup_amd_bootblock(struct mtd_info *mtd, void* param)
144 147
145 if (((major << 8) | minor) < 0x3131) { 148 if (((major << 8) | minor) < 0x3131) {
146 /* CFI version 1.0 => don't trust bootloc */ 149 /* CFI version 1.0 => don't trust bootloc */
150
151 DEBUG(MTD_DEBUG_LEVEL1,
152 "%s: JEDEC Vendor ID is 0x%02X Device ID is 0x%02X\n",
153 map->name, cfi->mfr, cfi->id);
154
155 /* AFAICS all 29LV400 with a bottom boot block have a device ID
156 * of 0x22BA in 16-bit mode and 0xBA in 8-bit mode.
157 * These were badly detected as they have the 0x80 bit set
158 * so treat them as a special case.
159 */
160 if (((cfi->id == 0xBA) || (cfi->id == 0x22BA)) &&
161
162 /* Macronix added CFI to their 2nd generation
163 * MX29LV400C B/T but AFAICS no other 29LV400 (AMD,
164 * Fujitsu, Spansion, EON, ESI and older Macronix)
165 * has CFI.
166 *
167 * Therefore also check the manufacturer.
168 * This reduces the risk of false detection due to
169 * the 8-bit device ID.
170 */
171 (cfi->mfr == MANUFACTURER_MACRONIX)) {
172 DEBUG(MTD_DEBUG_LEVEL1,
173 "%s: Macronix MX29LV400C with bottom boot block"
174 " detected\n", map->name);
175 extp->TopBottom = 2; /* bottom boot */
176 } else
147 if (cfi->id & 0x80) { 177 if (cfi->id & 0x80) {
148 printk(KERN_WARNING "%s: JEDEC Device ID is 0x%02X. Assuming broken CFI table.\n", map->name, cfi->id); 178 printk(KERN_WARNING "%s: JEDEC Device ID is 0x%02X. Assuming broken CFI table.\n", map->name, cfi->id);
149 extp->TopBottom = 3; /* top boot */ 179 extp->TopBottom = 3; /* top boot */
150 } else { 180 } else {
151 extp->TopBottom = 2; /* bottom boot */ 181 extp->TopBottom = 2; /* bottom boot */
152 } 182 }
183
184 DEBUG(MTD_DEBUG_LEVEL1,
185 "%s: AMD CFI PRI V%c.%c has no boot block field;"
186 " deduced %s from Device ID\n", map->name, major, minor,
187 extp->TopBottom == 2 ? "bottom" : "top");
153 } 188 }
154} 189}
155#endif 190#endif
@@ -178,10 +213,18 @@ static void fixup_convert_atmel_pri(struct mtd_info *mtd, void *param)
178 if (atmel_pri.Features & 0x02) 213 if (atmel_pri.Features & 0x02)
179 extp->EraseSuspend = 2; 214 extp->EraseSuspend = 2;
180 215
181 if (atmel_pri.BottomBoot) 216 /* Some chips got it backwards... */
182 extp->TopBottom = 2; 217 if (cfi->id == AT49BV6416) {
183 else 218 if (atmel_pri.BottomBoot)
184 extp->TopBottom = 3; 219 extp->TopBottom = 3;
220 else
221 extp->TopBottom = 2;
222 } else {
223 if (atmel_pri.BottomBoot)
224 extp->TopBottom = 2;
225 else
226 extp->TopBottom = 3;
227 }
185 228
186 /* burst write mode not supported */ 229 /* burst write mode not supported */
187 cfi->cfiq->BufWriteTimeoutTyp = 0; 230 cfi->cfiq->BufWriteTimeoutTyp = 0;
@@ -243,6 +286,7 @@ static struct cfi_fixup cfi_fixup_table[] = {
243 { CFI_MFR_ATMEL, CFI_ID_ANY, fixup_convert_atmel_pri, NULL }, 286 { CFI_MFR_ATMEL, CFI_ID_ANY, fixup_convert_atmel_pri, NULL },
244#ifdef AMD_BOOTLOC_BUG 287#ifdef AMD_BOOTLOC_BUG
245 { CFI_MFR_AMD, CFI_ID_ANY, fixup_amd_bootblock, NULL }, 288 { CFI_MFR_AMD, CFI_ID_ANY, fixup_amd_bootblock, NULL },
289 { MANUFACTURER_MACRONIX, CFI_ID_ANY, fixup_amd_bootblock, NULL },
246#endif 290#endif
247 { CFI_MFR_AMD, 0x0050, fixup_use_secsi, NULL, }, 291 { CFI_MFR_AMD, 0x0050, fixup_use_secsi, NULL, },
248 { CFI_MFR_AMD, 0x0053, fixup_use_secsi, NULL, }, 292 { CFI_MFR_AMD, 0x0053, fixup_use_secsi, NULL, },
diff --git a/drivers/mtd/chips/cfi_probe.c b/drivers/mtd/chips/cfi_probe.c
index c418e92e1d92..e63e6749429a 100644
--- a/drivers/mtd/chips/cfi_probe.c
+++ b/drivers/mtd/chips/cfi_probe.c
@@ -44,17 +44,14 @@ do { \
44 44
45#define xip_enable(base, map, cfi) \ 45#define xip_enable(base, map, cfi) \
46do { \ 46do { \
47 cfi_send_gen_cmd(0xF0, 0, base, map, cfi, cfi->device_type, NULL); \ 47 cfi_qry_mode_off(base, map, cfi); \
48 cfi_send_gen_cmd(0xFF, 0, base, map, cfi, cfi->device_type, NULL); \
49 xip_allowed(base, map); \ 48 xip_allowed(base, map); \
50} while (0) 49} while (0)
51 50
52#define xip_disable_qry(base, map, cfi) \ 51#define xip_disable_qry(base, map, cfi) \
53do { \ 52do { \
54 xip_disable(); \ 53 xip_disable(); \
55 cfi_send_gen_cmd(0xF0, 0, base, map, cfi, cfi->device_type, NULL); \ 54 cfi_qry_mode_on(base, map, cfi); \
56 cfi_send_gen_cmd(0xFF, 0, base, map, cfi, cfi->device_type, NULL); \
57 cfi_send_gen_cmd(0x98, 0x55, base, map, cfi, cfi->device_type, NULL); \
58} while (0) 55} while (0)
59 56
60#else 57#else
@@ -70,32 +67,6 @@ do { \
70 in: interleave,type,mode 67 in: interleave,type,mode
71 ret: table index, <0 for error 68 ret: table index, <0 for error
72 */ 69 */
73static int __xipram qry_present(struct map_info *map, __u32 base,
74 struct cfi_private *cfi)
75{
76 int osf = cfi->interleave * cfi->device_type; // scale factor
77 map_word val[3];
78 map_word qry[3];
79
80 qry[0] = cfi_build_cmd('Q', map, cfi);
81 qry[1] = cfi_build_cmd('R', map, cfi);
82 qry[2] = cfi_build_cmd('Y', map, cfi);
83
84 val[0] = map_read(map, base + osf*0x10);
85 val[1] = map_read(map, base + osf*0x11);
86 val[2] = map_read(map, base + osf*0x12);
87
88 if (!map_word_equal(map, qry[0], val[0]))
89 return 0;
90
91 if (!map_word_equal(map, qry[1], val[1]))
92 return 0;
93
94 if (!map_word_equal(map, qry[2], val[2]))
95 return 0;
96
97 return 1; // "QRY" found
98}
99 70
100static int __xipram cfi_probe_chip(struct map_info *map, __u32 base, 71static int __xipram cfi_probe_chip(struct map_info *map, __u32 base,
101 unsigned long *chip_map, struct cfi_private *cfi) 72 unsigned long *chip_map, struct cfi_private *cfi)
@@ -116,11 +87,7 @@ static int __xipram cfi_probe_chip(struct map_info *map, __u32 base,
116 } 87 }
117 88
118 xip_disable(); 89 xip_disable();
119 cfi_send_gen_cmd(0xF0, 0, base, map, cfi, cfi->device_type, NULL); 90 if (!cfi_qry_mode_on(base, map, cfi)) {
120 cfi_send_gen_cmd(0xFF, 0, base, map, cfi, cfi->device_type, NULL);
121 cfi_send_gen_cmd(0x98, 0x55, base, map, cfi, cfi->device_type, NULL);
122
123 if (!qry_present(map,base,cfi)) {
124 xip_enable(base, map, cfi); 91 xip_enable(base, map, cfi);
125 return 0; 92 return 0;
126 } 93 }
@@ -141,14 +108,13 @@ static int __xipram cfi_probe_chip(struct map_info *map, __u32 base,
141 start = i << cfi->chipshift; 108 start = i << cfi->chipshift;
142 /* This chip should be in read mode if it's one 109 /* This chip should be in read mode if it's one
143 we've already touched. */ 110 we've already touched. */
144 if (qry_present(map, start, cfi)) { 111 if (cfi_qry_present(map, start, cfi)) {
145 /* Eep. This chip also had the QRY marker. 112 /* Eep. This chip also had the QRY marker.
146 * Is it an alias for the new one? */ 113 * Is it an alias for the new one? */
147 cfi_send_gen_cmd(0xF0, 0, start, map, cfi, cfi->device_type, NULL); 114 cfi_qry_mode_off(start, map, cfi);
148 cfi_send_gen_cmd(0xFF, 0, start, map, cfi, cfi->device_type, NULL);
149 115
150 /* If the QRY marker goes away, it's an alias */ 116 /* If the QRY marker goes away, it's an alias */
151 if (!qry_present(map, start, cfi)) { 117 if (!cfi_qry_present(map, start, cfi)) {
152 xip_allowed(base, map); 118 xip_allowed(base, map);
153 printk(KERN_DEBUG "%s: Found an alias at 0x%x for the chip at 0x%lx\n", 119 printk(KERN_DEBUG "%s: Found an alias at 0x%x for the chip at 0x%lx\n",
154 map->name, base, start); 120 map->name, base, start);
@@ -158,10 +124,9 @@ static int __xipram cfi_probe_chip(struct map_info *map, __u32 base,
158 * unfortunate. Stick the new chip in read mode 124 * unfortunate. Stick the new chip in read mode
159 * too and if it's the same, assume it's an alias. */ 125 * too and if it's the same, assume it's an alias. */
160 /* FIXME: Use other modes to do a proper check */ 126 /* FIXME: Use other modes to do a proper check */
161 cfi_send_gen_cmd(0xF0, 0, base, map, cfi, cfi->device_type, NULL); 127 cfi_qry_mode_off(base, map, cfi);
162 cfi_send_gen_cmd(0xFF, 0, start, map, cfi, cfi->device_type, NULL);
163 128
164 if (qry_present(map, base, cfi)) { 129 if (cfi_qry_present(map, base, cfi)) {
165 xip_allowed(base, map); 130 xip_allowed(base, map);
166 printk(KERN_DEBUG "%s: Found an alias at 0x%x for the chip at 0x%lx\n", 131 printk(KERN_DEBUG "%s: Found an alias at 0x%x for the chip at 0x%lx\n",
167 map->name, base, start); 132 map->name, base, start);
@@ -176,8 +141,7 @@ static int __xipram cfi_probe_chip(struct map_info *map, __u32 base,
176 cfi->numchips++; 141 cfi->numchips++;
177 142
178 /* Put it back into Read Mode */ 143 /* Put it back into Read Mode */
179 cfi_send_gen_cmd(0xF0, 0, base, map, cfi, cfi->device_type, NULL); 144 cfi_qry_mode_off(base, map, cfi);
180 cfi_send_gen_cmd(0xFF, 0, base, map, cfi, cfi->device_type, NULL);
181 xip_allowed(base, map); 145 xip_allowed(base, map);
182 146
183 printk(KERN_INFO "%s: Found %d x%d devices at 0x%x in %d-bit bank\n", 147 printk(KERN_INFO "%s: Found %d x%d devices at 0x%x in %d-bit bank\n",
@@ -237,9 +201,7 @@ static int __xipram cfi_chip_setup(struct map_info *map,
237 cfi_read_query(map, base + 0xf * ofs_factor); 201 cfi_read_query(map, base + 0xf * ofs_factor);
238 202
239 /* Put it back into Read Mode */ 203 /* Put it back into Read Mode */
240 cfi_send_gen_cmd(0xF0, 0, base, map, cfi, cfi->device_type, NULL); 204 cfi_qry_mode_off(base, map, cfi);
241 /* ... even if it's an Intel chip */
242 cfi_send_gen_cmd(0xFF, 0, base, map, cfi, cfi->device_type, NULL);
243 xip_allowed(base, map); 205 xip_allowed(base, map);
244 206
245 /* Do any necessary byteswapping */ 207 /* Do any necessary byteswapping */
diff --git a/drivers/mtd/chips/cfi_util.c b/drivers/mtd/chips/cfi_util.c
index 0ee457018016..34d40e25d312 100644
--- a/drivers/mtd/chips/cfi_util.c
+++ b/drivers/mtd/chips/cfi_util.c
@@ -24,6 +24,66 @@
24#include <linux/mtd/cfi.h> 24#include <linux/mtd/cfi.h>
25#include <linux/mtd/compatmac.h> 25#include <linux/mtd/compatmac.h>
26 26
27int __xipram cfi_qry_present(struct map_info *map, __u32 base,
28 struct cfi_private *cfi)
29{
30 int osf = cfi->interleave * cfi->device_type; /* scale factor */
31 map_word val[3];
32 map_word qry[3];
33
34 qry[0] = cfi_build_cmd('Q', map, cfi);
35 qry[1] = cfi_build_cmd('R', map, cfi);
36 qry[2] = cfi_build_cmd('Y', map, cfi);
37
38 val[0] = map_read(map, base + osf*0x10);
39 val[1] = map_read(map, base + osf*0x11);
40 val[2] = map_read(map, base + osf*0x12);
41
42 if (!map_word_equal(map, qry[0], val[0]))
43 return 0;
44
45 if (!map_word_equal(map, qry[1], val[1]))
46 return 0;
47
48 if (!map_word_equal(map, qry[2], val[2]))
49 return 0;
50
51 return 1; /* "QRY" found */
52}
53EXPORT_SYMBOL_GPL(cfi_qry_present);
54
55int __xipram cfi_qry_mode_on(uint32_t base, struct map_info *map,
56 struct cfi_private *cfi)
57{
58 cfi_send_gen_cmd(0xF0, 0, base, map, cfi, cfi->device_type, NULL);
59 cfi_send_gen_cmd(0x98, 0x55, base, map, cfi, cfi->device_type, NULL);
60 if (cfi_qry_present(map, base, cfi))
61 return 1;
62 /* QRY not found probably we deal with some odd CFI chips */
63 /* Some revisions of some old Intel chips? */
64 cfi_send_gen_cmd(0xF0, 0, base, map, cfi, cfi->device_type, NULL);
65 cfi_send_gen_cmd(0xFF, 0, base, map, cfi, cfi->device_type, NULL);
66 cfi_send_gen_cmd(0x98, 0x55, base, map, cfi, cfi->device_type, NULL);
67 if (cfi_qry_present(map, base, cfi))
68 return 1;
69 /* ST M29DW chips */
70 cfi_send_gen_cmd(0xF0, 0, base, map, cfi, cfi->device_type, NULL);
71 cfi_send_gen_cmd(0x98, 0x555, base, map, cfi, cfi->device_type, NULL);
72 if (cfi_qry_present(map, base, cfi))
73 return 1;
74 /* QRY not found */
75 return 0;
76}
77EXPORT_SYMBOL_GPL(cfi_qry_mode_on);
78
79void __xipram cfi_qry_mode_off(uint32_t base, struct map_info *map,
80 struct cfi_private *cfi)
81{
82 cfi_send_gen_cmd(0xF0, 0, base, map, cfi, cfi->device_type, NULL);
83 cfi_send_gen_cmd(0xFF, 0, base, map, cfi, cfi->device_type, NULL);
84}
85EXPORT_SYMBOL_GPL(cfi_qry_mode_off);
86
27struct cfi_extquery * 87struct cfi_extquery *
28__xipram cfi_read_pri(struct map_info *map, __u16 adr, __u16 size, const char* name) 88__xipram cfi_read_pri(struct map_info *map, __u16 adr, __u16 size, const char* name)
29{ 89{
@@ -48,8 +108,7 @@ __xipram cfi_read_pri(struct map_info *map, __u16 adr, __u16 size, const char* n
48#endif 108#endif
49 109
50 /* Switch it into Query Mode */ 110 /* Switch it into Query Mode */
51 cfi_send_gen_cmd(0x98, 0x55, base, map, cfi, cfi->device_type, NULL); 111 cfi_qry_mode_on(base, map, cfi);
52
53 /* Read in the Extended Query Table */ 112 /* Read in the Extended Query Table */
54 for (i=0; i<size; i++) { 113 for (i=0; i<size; i++) {
55 ((unsigned char *)extp)[i] = 114 ((unsigned char *)extp)[i] =
@@ -57,8 +116,7 @@ __xipram cfi_read_pri(struct map_info *map, __u16 adr, __u16 size, const char* n
57 } 116 }
58 117
59 /* Make sure it returns to read mode */ 118 /* Make sure it returns to read mode */
60 cfi_send_gen_cmd(0xf0, 0, base, map, cfi, cfi->device_type, NULL); 119 cfi_qry_mode_off(base, map, cfi);
61 cfi_send_gen_cmd(0xff, 0, base, map, cfi, cfi->device_type, NULL);
62 120
63#ifdef CONFIG_MTD_XIP 121#ifdef CONFIG_MTD_XIP
64 (void) map_read(map, base); 122 (void) map_read(map, base);
diff --git a/drivers/mtd/chips/gen_probe.c b/drivers/mtd/chips/gen_probe.c
index f061885b2812..e2dc96441e05 100644
--- a/drivers/mtd/chips/gen_probe.c
+++ b/drivers/mtd/chips/gen_probe.c
@@ -111,7 +111,7 @@ static struct cfi_private *genprobe_ident_chips(struct map_info *map, struct chi
111 max_chips = 1; 111 max_chips = 1;
112 } 112 }
113 113
114 mapsize = sizeof(long) * ( (max_chips + BITS_PER_LONG-1) / BITS_PER_LONG ); 114 mapsize = sizeof(long) * DIV_ROUND_UP(max_chips, BITS_PER_LONG);
115 chip_map = kzalloc(mapsize, GFP_KERNEL); 115 chip_map = kzalloc(mapsize, GFP_KERNEL);
116 if (!chip_map) { 116 if (!chip_map) {
117 printk(KERN_WARNING "%s: kmalloc failed for CFI chip map\n", map->name); 117 printk(KERN_WARNING "%s: kmalloc failed for CFI chip map\n", map->name);
diff --git a/drivers/mtd/cmdlinepart.c b/drivers/mtd/cmdlinepart.c
index 71bc07f149b7..50a340388e74 100644
--- a/drivers/mtd/cmdlinepart.c
+++ b/drivers/mtd/cmdlinepart.c
@@ -7,6 +7,7 @@
7 * 7 *
8 * mtdparts=<mtddef>[;<mtddef] 8 * mtdparts=<mtddef>[;<mtddef]
9 * <mtddef> := <mtd-id>:<partdef>[,<partdef>] 9 * <mtddef> := <mtd-id>:<partdef>[,<partdef>]
10 * where <mtd-id> is the name from the "cat /proc/mtd" command
10 * <partdef> := <size>[@offset][<name>][ro][lk] 11 * <partdef> := <size>[@offset][<name>][ro][lk]
11 * <mtd-id> := unique name used in mapping driver/device (mtd->name) 12 * <mtd-id> := unique name used in mapping driver/device (mtd->name)
12 * <size> := standard linux memsize OR "-" to denote all remaining space 13 * <size> := standard linux memsize OR "-" to denote all remaining space
diff --git a/drivers/mtd/devices/Kconfig b/drivers/mtd/devices/Kconfig
index 9c613f06623c..6fde0a2e3567 100644
--- a/drivers/mtd/devices/Kconfig
+++ b/drivers/mtd/devices/Kconfig
@@ -59,6 +59,27 @@ config MTD_DATAFLASH
59 Sometimes DataFlash chips are packaged inside MMC-format 59 Sometimes DataFlash chips are packaged inside MMC-format
60 cards; at this writing, the MMC stack won't handle those. 60 cards; at this writing, the MMC stack won't handle those.
61 61
62config MTD_DATAFLASH_WRITE_VERIFY
63 bool "Verify DataFlash page writes"
64 depends on MTD_DATAFLASH
65 help
66 This adds an extra check when data is written to the flash.
67 It may help if you are verifying chip setup (timings etc) on
68 your board. There is a rare possibility that even though the
69 device thinks the write was successful, a bit could have been
70 flipped accidentally due to device wear or something else.
71
72config MTD_DATAFLASH_OTP
73 bool "DataFlash OTP support (Security Register)"
74 depends on MTD_DATAFLASH
75 select HAVE_MTD_OTP
76 help
77 Newer DataFlash chips (revisions C and D) support 128 bytes of
78 one-time-programmable (OTP) data. The first half may be written
79 (once) with up to 64 bytes of data, such as a serial number or
80 other key product data. The second half is programmed with a
81 unique-to-each-chip bit pattern at the factory.
82
62config MTD_M25P80 83config MTD_M25P80
63 tristate "Support most SPI Flash chips (AT26DF, M25P, W25X, ...)" 84 tristate "Support most SPI Flash chips (AT26DF, M25P, W25X, ...)"
64 depends on SPI_MASTER && EXPERIMENTAL 85 depends on SPI_MASTER && EXPERIMENTAL
diff --git a/drivers/mtd/devices/m25p80.c b/drivers/mtd/devices/m25p80.c
index b35c3333e210..76a76751da36 100644
--- a/drivers/mtd/devices/m25p80.c
+++ b/drivers/mtd/devices/m25p80.c
@@ -39,6 +39,7 @@
39#define OPCODE_PP 0x02 /* Page program (up to 256 bytes) */ 39#define OPCODE_PP 0x02 /* Page program (up to 256 bytes) */
40#define OPCODE_BE_4K 0x20 /* Erase 4KiB block */ 40#define OPCODE_BE_4K 0x20 /* Erase 4KiB block */
41#define OPCODE_BE_32K 0x52 /* Erase 32KiB block */ 41#define OPCODE_BE_32K 0x52 /* Erase 32KiB block */
42#define OPCODE_BE 0xc7 /* Erase whole flash block */
42#define OPCODE_SE 0xd8 /* Sector erase (usually 64KiB) */ 43#define OPCODE_SE 0xd8 /* Sector erase (usually 64KiB) */
43#define OPCODE_RDID 0x9f /* Read JEDEC ID */ 44#define OPCODE_RDID 0x9f /* Read JEDEC ID */
44 45
@@ -161,6 +162,31 @@ static int wait_till_ready(struct m25p *flash)
161 return 1; 162 return 1;
162} 163}
163 164
165/*
166 * Erase the whole flash memory
167 *
168 * Returns 0 if successful, non-zero otherwise.
169 */
170static int erase_block(struct m25p *flash)
171{
172 DEBUG(MTD_DEBUG_LEVEL3, "%s: %s %dKiB\n",
173 flash->spi->dev.bus_id, __func__,
174 flash->mtd.size / 1024);
175
176 /* Wait until finished previous write command. */
177 if (wait_till_ready(flash))
178 return 1;
179
180 /* Send write enable, then erase commands. */
181 write_enable(flash);
182
183 /* Set up command buffer. */
184 flash->command[0] = OPCODE_BE;
185
186 spi_write(flash->spi, flash->command, 1);
187
188 return 0;
189}
164 190
165/* 191/*
166 * Erase one sector of flash memory at offset ``offset'' which is any 192 * Erase one sector of flash memory at offset ``offset'' which is any
@@ -229,15 +255,21 @@ static int m25p80_erase(struct mtd_info *mtd, struct erase_info *instr)
229 */ 255 */
230 256
231 /* now erase those sectors */ 257 /* now erase those sectors */
232 while (len) { 258 if (len == flash->mtd.size && erase_block(flash)) {
233 if (erase_sector(flash, addr)) { 259 instr->state = MTD_ERASE_FAILED;
234 instr->state = MTD_ERASE_FAILED; 260 mutex_unlock(&flash->lock);
235 mutex_unlock(&flash->lock); 261 return -EIO;
236 return -EIO; 262 } else {
237 } 263 while (len) {
264 if (erase_sector(flash, addr)) {
265 instr->state = MTD_ERASE_FAILED;
266 mutex_unlock(&flash->lock);
267 return -EIO;
268 }
238 269
239 addr += mtd->erasesize; 270 addr += mtd->erasesize;
240 len -= mtd->erasesize; 271 len -= mtd->erasesize;
272 }
241 } 273 }
242 274
243 mutex_unlock(&flash->lock); 275 mutex_unlock(&flash->lock);
@@ -437,6 +469,7 @@ struct flash_info {
437 * then a two byte device id. 469 * then a two byte device id.
438 */ 470 */
439 u32 jedec_id; 471 u32 jedec_id;
472 u16 ext_id;
440 473
441 /* The size listed here is what works with OPCODE_SE, which isn't 474 /* The size listed here is what works with OPCODE_SE, which isn't
442 * necessarily called a "sector" by the vendor. 475 * necessarily called a "sector" by the vendor.
@@ -456,72 +489,75 @@ struct flash_info {
456static struct flash_info __devinitdata m25p_data [] = { 489static struct flash_info __devinitdata m25p_data [] = {
457 490
458 /* Atmel -- some are (confusingly) marketed as "DataFlash" */ 491 /* Atmel -- some are (confusingly) marketed as "DataFlash" */
459 { "at25fs010", 0x1f6601, 32 * 1024, 4, SECT_4K, }, 492 { "at25fs010", 0x1f6601, 0, 32 * 1024, 4, SECT_4K, },
460 { "at25fs040", 0x1f6604, 64 * 1024, 8, SECT_4K, }, 493 { "at25fs040", 0x1f6604, 0, 64 * 1024, 8, SECT_4K, },
461 494
462 { "at25df041a", 0x1f4401, 64 * 1024, 8, SECT_4K, }, 495 { "at25df041a", 0x1f4401, 0, 64 * 1024, 8, SECT_4K, },
463 { "at25df641", 0x1f4800, 64 * 1024, 128, SECT_4K, }, 496 { "at25df641", 0x1f4800, 0, 64 * 1024, 128, SECT_4K, },
464 497
465 { "at26f004", 0x1f0400, 64 * 1024, 8, SECT_4K, }, 498 { "at26f004", 0x1f0400, 0, 64 * 1024, 8, SECT_4K, },
466 { "at26df081a", 0x1f4501, 64 * 1024, 16, SECT_4K, }, 499 { "at26df081a", 0x1f4501, 0, 64 * 1024, 16, SECT_4K, },
467 { "at26df161a", 0x1f4601, 64 * 1024, 32, SECT_4K, }, 500 { "at26df161a", 0x1f4601, 0, 64 * 1024, 32, SECT_4K, },
468 { "at26df321", 0x1f4701, 64 * 1024, 64, SECT_4K, }, 501 { "at26df321", 0x1f4701, 0, 64 * 1024, 64, SECT_4K, },
469 502
470 /* Spansion -- single (large) sector size only, at least 503 /* Spansion -- single (large) sector size only, at least
471 * for the chips listed here (without boot sectors). 504 * for the chips listed here (without boot sectors).
472 */ 505 */
473 { "s25sl004a", 0x010212, 64 * 1024, 8, }, 506 { "s25sl004a", 0x010212, 0, 64 * 1024, 8, },
474 { "s25sl008a", 0x010213, 64 * 1024, 16, }, 507 { "s25sl008a", 0x010213, 0, 64 * 1024, 16, },
475 { "s25sl016a", 0x010214, 64 * 1024, 32, }, 508 { "s25sl016a", 0x010214, 0, 64 * 1024, 32, },
476 { "s25sl032a", 0x010215, 64 * 1024, 64, }, 509 { "s25sl032a", 0x010215, 0, 64 * 1024, 64, },
477 { "s25sl064a", 0x010216, 64 * 1024, 128, }, 510 { "s25sl064a", 0x010216, 0, 64 * 1024, 128, },
511 { "s25sl12800", 0x012018, 0x0300, 256 * 1024, 64, },
512 { "s25sl12801", 0x012018, 0x0301, 64 * 1024, 256, },
478 513
479 /* SST -- large erase sizes are "overlays", "sectors" are 4K */ 514 /* SST -- large erase sizes are "overlays", "sectors" are 4K */
480 { "sst25vf040b", 0xbf258d, 64 * 1024, 8, SECT_4K, }, 515 { "sst25vf040b", 0xbf258d, 0, 64 * 1024, 8, SECT_4K, },
481 { "sst25vf080b", 0xbf258e, 64 * 1024, 16, SECT_4K, }, 516 { "sst25vf080b", 0xbf258e, 0, 64 * 1024, 16, SECT_4K, },
482 { "sst25vf016b", 0xbf2541, 64 * 1024, 32, SECT_4K, }, 517 { "sst25vf016b", 0xbf2541, 0, 64 * 1024, 32, SECT_4K, },
483 { "sst25vf032b", 0xbf254a, 64 * 1024, 64, SECT_4K, }, 518 { "sst25vf032b", 0xbf254a, 0, 64 * 1024, 64, SECT_4K, },
484 519
485 /* ST Microelectronics -- newer production may have feature updates */ 520 /* ST Microelectronics -- newer production may have feature updates */
486 { "m25p05", 0x202010, 32 * 1024, 2, }, 521 { "m25p05", 0x202010, 0, 32 * 1024, 2, },
487 { "m25p10", 0x202011, 32 * 1024, 4, }, 522 { "m25p10", 0x202011, 0, 32 * 1024, 4, },
488 { "m25p20", 0x202012, 64 * 1024, 4, }, 523 { "m25p20", 0x202012, 0, 64 * 1024, 4, },
489 { "m25p40", 0x202013, 64 * 1024, 8, }, 524 { "m25p40", 0x202013, 0, 64 * 1024, 8, },
490 { "m25p80", 0, 64 * 1024, 16, }, 525 { "m25p80", 0, 0, 64 * 1024, 16, },
491 { "m25p16", 0x202015, 64 * 1024, 32, }, 526 { "m25p16", 0x202015, 0, 64 * 1024, 32, },
492 { "m25p32", 0x202016, 64 * 1024, 64, }, 527 { "m25p32", 0x202016, 0, 64 * 1024, 64, },
493 { "m25p64", 0x202017, 64 * 1024, 128, }, 528 { "m25p64", 0x202017, 0, 64 * 1024, 128, },
494 { "m25p128", 0x202018, 256 * 1024, 64, }, 529 { "m25p128", 0x202018, 0, 256 * 1024, 64, },
495 530
496 { "m45pe80", 0x204014, 64 * 1024, 16, }, 531 { "m45pe80", 0x204014, 0, 64 * 1024, 16, },
497 { "m45pe16", 0x204015, 64 * 1024, 32, }, 532 { "m45pe16", 0x204015, 0, 64 * 1024, 32, },
498 533
499 { "m25pe80", 0x208014, 64 * 1024, 16, }, 534 { "m25pe80", 0x208014, 0, 64 * 1024, 16, },
500 { "m25pe16", 0x208015, 64 * 1024, 32, SECT_4K, }, 535 { "m25pe16", 0x208015, 0, 64 * 1024, 32, SECT_4K, },
501 536
502 /* Winbond -- w25x "blocks" are 64K, "sectors" are 4KiB */ 537 /* Winbond -- w25x "blocks" are 64K, "sectors" are 4KiB */
503 { "w25x10", 0xef3011, 64 * 1024, 2, SECT_4K, }, 538 { "w25x10", 0xef3011, 0, 64 * 1024, 2, SECT_4K, },
504 { "w25x20", 0xef3012, 64 * 1024, 4, SECT_4K, }, 539 { "w25x20", 0xef3012, 0, 64 * 1024, 4, SECT_4K, },
505 { "w25x40", 0xef3013, 64 * 1024, 8, SECT_4K, }, 540 { "w25x40", 0xef3013, 0, 64 * 1024, 8, SECT_4K, },
506 { "w25x80", 0xef3014, 64 * 1024, 16, SECT_4K, }, 541 { "w25x80", 0xef3014, 0, 64 * 1024, 16, SECT_4K, },
507 { "w25x16", 0xef3015, 64 * 1024, 32, SECT_4K, }, 542 { "w25x16", 0xef3015, 0, 64 * 1024, 32, SECT_4K, },
508 { "w25x32", 0xef3016, 64 * 1024, 64, SECT_4K, }, 543 { "w25x32", 0xef3016, 0, 64 * 1024, 64, SECT_4K, },
509 { "w25x64", 0xef3017, 64 * 1024, 128, SECT_4K, }, 544 { "w25x64", 0xef3017, 0, 64 * 1024, 128, SECT_4K, },
510}; 545};
511 546
512static struct flash_info *__devinit jedec_probe(struct spi_device *spi) 547static struct flash_info *__devinit jedec_probe(struct spi_device *spi)
513{ 548{
514 int tmp; 549 int tmp;
515 u8 code = OPCODE_RDID; 550 u8 code = OPCODE_RDID;
516 u8 id[3]; 551 u8 id[5];
517 u32 jedec; 552 u32 jedec;
553 u16 ext_jedec;
518 struct flash_info *info; 554 struct flash_info *info;
519 555
520 /* JEDEC also defines an optional "extended device information" 556 /* JEDEC also defines an optional "extended device information"
521 * string for after vendor-specific data, after the three bytes 557 * string for after vendor-specific data, after the three bytes
522 * we use here. Supporting some chips might require using it. 558 * we use here. Supporting some chips might require using it.
523 */ 559 */
524 tmp = spi_write_then_read(spi, &code, 1, id, 3); 560 tmp = spi_write_then_read(spi, &code, 1, id, 5);
525 if (tmp < 0) { 561 if (tmp < 0) {
526 DEBUG(MTD_DEBUG_LEVEL0, "%s: error %d reading JEDEC ID\n", 562 DEBUG(MTD_DEBUG_LEVEL0, "%s: error %d reading JEDEC ID\n",
527 spi->dev.bus_id, tmp); 563 spi->dev.bus_id, tmp);
@@ -533,10 +569,14 @@ static struct flash_info *__devinit jedec_probe(struct spi_device *spi)
533 jedec = jedec << 8; 569 jedec = jedec << 8;
534 jedec |= id[2]; 570 jedec |= id[2];
535 571
572 ext_jedec = id[3] << 8 | id[4];
573
536 for (tmp = 0, info = m25p_data; 574 for (tmp = 0, info = m25p_data;
537 tmp < ARRAY_SIZE(m25p_data); 575 tmp < ARRAY_SIZE(m25p_data);
538 tmp++, info++) { 576 tmp++, info++) {
539 if (info->jedec_id == jedec) 577 if (info->jedec_id == jedec)
578 if (ext_jedec != 0 && info->ext_id != ext_jedec)
579 continue;
540 return info; 580 return info;
541 } 581 }
542 dev_err(&spi->dev, "unrecognized JEDEC id %06x\n", jedec); 582 dev_err(&spi->dev, "unrecognized JEDEC id %06x\n", jedec);
diff --git a/drivers/mtd/devices/mtd_dataflash.c b/drivers/mtd/devices/mtd_dataflash.c
index 8bd0dea6885f..6dd9aff8bb2d 100644
--- a/drivers/mtd/devices/mtd_dataflash.c
+++ b/drivers/mtd/devices/mtd_dataflash.c
@@ -30,12 +30,10 @@
30 * doesn't (yet) use these for any kind of i/o overlap or prefetching. 30 * doesn't (yet) use these for any kind of i/o overlap or prefetching.
31 * 31 *
32 * Sometimes DataFlash is packaged in MMC-format cards, although the 32 * Sometimes DataFlash is packaged in MMC-format cards, although the
33 * MMC stack can't use SPI (yet), or distinguish between MMC and DataFlash 33 * MMC stack can't (yet?) distinguish between MMC and DataFlash
34 * protocols during enumeration. 34 * protocols during enumeration.
35 */ 35 */
36 36
37#define CONFIG_DATAFLASH_WRITE_VERIFY
38
39/* reads can bypass the buffers */ 37/* reads can bypass the buffers */
40#define OP_READ_CONTINUOUS 0xE8 38#define OP_READ_CONTINUOUS 0xE8
41#define OP_READ_PAGE 0xD2 39#define OP_READ_PAGE 0xD2
@@ -80,7 +78,8 @@
80 */ 78 */
81#define OP_READ_ID 0x9F 79#define OP_READ_ID 0x9F
82#define OP_READ_SECURITY 0x77 80#define OP_READ_SECURITY 0x77
83#define OP_WRITE_SECURITY 0x9A /* OTP bits */ 81#define OP_WRITE_SECURITY_REVC 0x9A
82#define OP_WRITE_SECURITY 0x9B /* revision D */
84 83
85 84
86struct dataflash { 85struct dataflash {
@@ -402,7 +401,7 @@ static int dataflash_write(struct mtd_info *mtd, loff_t to, size_t len,
402 (void) dataflash_waitready(priv->spi); 401 (void) dataflash_waitready(priv->spi);
403 402
404 403
405#ifdef CONFIG_DATAFLASH_WRITE_VERIFY 404#ifdef CONFIG_MTD_DATAFLASH_VERIFY_WRITE
406 405
407 /* (3) Compare to Buffer1 */ 406 /* (3) Compare to Buffer1 */
408 addr = pageaddr << priv->page_offset; 407 addr = pageaddr << priv->page_offset;
@@ -431,7 +430,7 @@ static int dataflash_write(struct mtd_info *mtd, loff_t to, size_t len,
431 } else 430 } else
432 status = 0; 431 status = 0;
433 432
434#endif /* CONFIG_DATAFLASH_WRITE_VERIFY */ 433#endif /* CONFIG_MTD_DATAFLASH_VERIFY_WRITE */
435 434
436 remaining = remaining - writelen; 435 remaining = remaining - writelen;
437 pageaddr++; 436 pageaddr++;
@@ -451,16 +450,192 @@ static int dataflash_write(struct mtd_info *mtd, loff_t to, size_t len,
451 450
452/* ......................................................................... */ 451/* ......................................................................... */
453 452
453#ifdef CONFIG_MTD_DATAFLASH_OTP
454
455static int dataflash_get_otp_info(struct mtd_info *mtd,
456 struct otp_info *info, size_t len)
457{
458 /* Report both blocks as identical: bytes 0..64, locked.
459 * Unless the user block changed from all-ones, we can't
460 * tell whether it's still writable; so we assume it isn't.
461 */
462 info->start = 0;
463 info->length = 64;
464 info->locked = 1;
465 return sizeof(*info);
466}
467
468static ssize_t otp_read(struct spi_device *spi, unsigned base,
469 uint8_t *buf, loff_t off, size_t len)
470{
471 struct spi_message m;
472 size_t l;
473 uint8_t *scratch;
474 struct spi_transfer t;
475 int status;
476
477 if (off > 64)
478 return -EINVAL;
479
480 if ((off + len) > 64)
481 len = 64 - off;
482 if (len == 0)
483 return len;
484
485 spi_message_init(&m);
486
487 l = 4 + base + off + len;
488 scratch = kzalloc(l, GFP_KERNEL);
489 if (!scratch)
490 return -ENOMEM;
491
492 /* OUT: OP_READ_SECURITY, 3 don't-care bytes, zeroes
493 * IN: ignore 4 bytes, data bytes 0..N (max 127)
494 */
495 scratch[0] = OP_READ_SECURITY;
496
497 memset(&t, 0, sizeof t);
498 t.tx_buf = scratch;
499 t.rx_buf = scratch;
500 t.len = l;
501 spi_message_add_tail(&t, &m);
502
503 dataflash_waitready(spi);
504
505 status = spi_sync(spi, &m);
506 if (status >= 0) {
507 memcpy(buf, scratch + 4 + base + off, len);
508 status = len;
509 }
510
511 kfree(scratch);
512 return status;
513}
514
515static int dataflash_read_fact_otp(struct mtd_info *mtd,
516 loff_t from, size_t len, size_t *retlen, u_char *buf)
517{
518 struct dataflash *priv = (struct dataflash *)mtd->priv;
519 int status;
520
521 /* 64 bytes, from 0..63 ... start at 64 on-chip */
522 mutex_lock(&priv->lock);
523 status = otp_read(priv->spi, 64, buf, from, len);
524 mutex_unlock(&priv->lock);
525
526 if (status < 0)
527 return status;
528 *retlen = status;
529 return 0;
530}
531
532static int dataflash_read_user_otp(struct mtd_info *mtd,
533 loff_t from, size_t len, size_t *retlen, u_char *buf)
534{
535 struct dataflash *priv = (struct dataflash *)mtd->priv;
536 int status;
537
538 /* 64 bytes, from 0..63 ... start at 0 on-chip */
539 mutex_lock(&priv->lock);
540 status = otp_read(priv->spi, 0, buf, from, len);
541 mutex_unlock(&priv->lock);
542
543 if (status < 0)
544 return status;
545 *retlen = status;
546 return 0;
547}
548
549static int dataflash_write_user_otp(struct mtd_info *mtd,
550 loff_t from, size_t len, size_t *retlen, u_char *buf)
551{
552 struct spi_message m;
553 const size_t l = 4 + 64;
554 uint8_t *scratch;
555 struct spi_transfer t;
556 struct dataflash *priv = (struct dataflash *)mtd->priv;
557 int status;
558
559 if (len > 64)
560 return -EINVAL;
561
562 /* Strictly speaking, we *could* truncate the write ... but
563 * let's not do that for the only write that's ever possible.
564 */
565 if ((from + len) > 64)
566 return -EINVAL;
567
568 /* OUT: OP_WRITE_SECURITY, 3 zeroes, 64 data-or-zero bytes
569 * IN: ignore all
570 */
571 scratch = kzalloc(l, GFP_KERNEL);
572 if (!scratch)
573 return -ENOMEM;
574 scratch[0] = OP_WRITE_SECURITY;
575 memcpy(scratch + 4 + from, buf, len);
576
577 spi_message_init(&m);
578
579 memset(&t, 0, sizeof t);
580 t.tx_buf = scratch;
581 t.len = l;
582 spi_message_add_tail(&t, &m);
583
584 /* Write the OTP bits, if they've not yet been written.
585 * This modifies SRAM buffer1.
586 */
587 mutex_lock(&priv->lock);
588 dataflash_waitready(priv->spi);
589 status = spi_sync(priv->spi, &m);
590 mutex_unlock(&priv->lock);
591
592 kfree(scratch);
593
594 if (status >= 0) {
595 status = 0;
596 *retlen = len;
597 }
598 return status;
599}
600
601static char *otp_setup(struct mtd_info *device, char revision)
602{
603 device->get_fact_prot_info = dataflash_get_otp_info;
604 device->read_fact_prot_reg = dataflash_read_fact_otp;
605 device->get_user_prot_info = dataflash_get_otp_info;
606 device->read_user_prot_reg = dataflash_read_user_otp;
607
608 /* rev c parts (at45db321c and at45db1281 only!) use a
609 * different write procedure; not (yet?) implemented.
610 */
611 if (revision > 'c')
612 device->write_user_prot_reg = dataflash_write_user_otp;
613
614 return ", OTP";
615}
616
617#else
618
619static char *otp_setup(struct mtd_info *device, char revision)
620{
621 return " (OTP)";
622}
623
624#endif
625
626/* ......................................................................... */
627
454/* 628/*
455 * Register DataFlash device with MTD subsystem. 629 * Register DataFlash device with MTD subsystem.
456 */ 630 */
457static int __devinit 631static int __devinit
458add_dataflash(struct spi_device *spi, char *name, 632add_dataflash_otp(struct spi_device *spi, char *name,
459 int nr_pages, int pagesize, int pageoffset) 633 int nr_pages, int pagesize, int pageoffset, char revision)
460{ 634{
461 struct dataflash *priv; 635 struct dataflash *priv;
462 struct mtd_info *device; 636 struct mtd_info *device;
463 struct flash_platform_data *pdata = spi->dev.platform_data; 637 struct flash_platform_data *pdata = spi->dev.platform_data;
638 char *otp_tag = "";
464 639
465 priv = kzalloc(sizeof *priv, GFP_KERNEL); 640 priv = kzalloc(sizeof *priv, GFP_KERNEL);
466 if (!priv) 641 if (!priv)
@@ -489,8 +664,12 @@ add_dataflash(struct spi_device *spi, char *name,
489 device->write = dataflash_write; 664 device->write = dataflash_write;
490 device->priv = priv; 665 device->priv = priv;
491 666
492 dev_info(&spi->dev, "%s (%d KBytes) pagesize %d bytes\n", 667 if (revision >= 'c')
493 name, DIV_ROUND_UP(device->size, 1024), pagesize); 668 otp_tag = otp_setup(device, revision);
669
670 dev_info(&spi->dev, "%s (%d KBytes) pagesize %d bytes%s\n",
671 name, DIV_ROUND_UP(device->size, 1024),
672 pagesize, otp_tag);
494 dev_set_drvdata(&spi->dev, priv); 673 dev_set_drvdata(&spi->dev, priv);
495 674
496 if (mtd_has_partitions()) { 675 if (mtd_has_partitions()) {
@@ -519,6 +698,14 @@ add_dataflash(struct spi_device *spi, char *name,
519 return add_mtd_device(device) == 1 ? -ENODEV : 0; 698 return add_mtd_device(device) == 1 ? -ENODEV : 0;
520} 699}
521 700
701static inline int __devinit
702add_dataflash(struct spi_device *spi, char *name,
703 int nr_pages, int pagesize, int pageoffset)
704{
705 return add_dataflash_otp(spi, name, nr_pages, pagesize,
706 pageoffset, 0);
707}
708
522struct flash_info { 709struct flash_info {
523 char *name; 710 char *name;
524 711
@@ -664,13 +851,16 @@ static int __devinit dataflash_probe(struct spi_device *spi)
664 * Try to detect dataflash by JEDEC ID. 851 * Try to detect dataflash by JEDEC ID.
665 * If it succeeds we know we have either a C or D part. 852 * If it succeeds we know we have either a C or D part.
666 * D will support power of 2 pagesize option. 853 * D will support power of 2 pagesize option.
854 * Both support the security register, though with different
855 * write procedures.
667 */ 856 */
668 info = jedec_probe(spi); 857 info = jedec_probe(spi);
669 if (IS_ERR(info)) 858 if (IS_ERR(info))
670 return PTR_ERR(info); 859 return PTR_ERR(info);
671 if (info != NULL) 860 if (info != NULL)
672 return add_dataflash(spi, info->name, info->nr_pages, 861 return add_dataflash_otp(spi, info->name, info->nr_pages,
673 info->pagesize, info->pageoffset); 862 info->pagesize, info->pageoffset,
863 (info->flags & SUP_POW2PS) ? 'd' : 'c');
674 864
675 /* 865 /*
676 * Older chips support only legacy commands, identifing 866 * Older chips support only legacy commands, identifing
diff --git a/drivers/mtd/inftlcore.c b/drivers/mtd/inftlcore.c
index c4f9d3378b24..50ce13887f63 100644
--- a/drivers/mtd/inftlcore.c
+++ b/drivers/mtd/inftlcore.c
@@ -388,6 +388,10 @@ static u16 INFTL_foldchain(struct INFTLrecord *inftl, unsigned thisVUC, unsigned
388 if (thisEUN == targetEUN) 388 if (thisEUN == targetEUN)
389 break; 389 break;
390 390
391 /* Unlink the last block from the chain. */
392 inftl->PUtable[prevEUN] = BLOCK_NIL;
393
394 /* Now try to erase it. */
391 if (INFTL_formatblock(inftl, thisEUN) < 0) { 395 if (INFTL_formatblock(inftl, thisEUN) < 0) {
392 /* 396 /*
393 * Could not erase : mark block as reserved. 397 * Could not erase : mark block as reserved.
@@ -396,7 +400,6 @@ static u16 INFTL_foldchain(struct INFTLrecord *inftl, unsigned thisVUC, unsigned
396 } else { 400 } else {
397 /* Correctly erased : mark it as free */ 401 /* Correctly erased : mark it as free */
398 inftl->PUtable[thisEUN] = BLOCK_FREE; 402 inftl->PUtable[thisEUN] = BLOCK_FREE;
399 inftl->PUtable[prevEUN] = BLOCK_NIL;
400 inftl->numfreeEUNs++; 403 inftl->numfreeEUNs++;
401 } 404 }
402 } 405 }
diff --git a/drivers/mtd/maps/Kconfig b/drivers/mtd/maps/Kconfig
index df8e00bba07b..5ea169362164 100644
--- a/drivers/mtd/maps/Kconfig
+++ b/drivers/mtd/maps/Kconfig
@@ -332,30 +332,6 @@ config MTD_CFI_FLAGADM
332 Mapping for the Flaga digital module. If you don't have one, ignore 332 Mapping for the Flaga digital module. If you don't have one, ignore
333 this setting. 333 this setting.
334 334
335config MTD_WALNUT
336 tristate "Flash device mapped on IBM 405GP Walnut"
337 depends on MTD_JEDECPROBE && WALNUT && !PPC_MERGE
338 help
339 This enables access routines for the flash chips on the IBM 405GP
340 Walnut board. If you have one of these boards and would like to
341 use the flash chips on it, say 'Y'.
342
343config MTD_EBONY
344 tristate "Flash devices mapped on IBM 440GP Ebony"
345 depends on MTD_JEDECPROBE && EBONY && !PPC_MERGE
346 help
347 This enables access routines for the flash chips on the IBM 440GP
348 Ebony board. If you have one of these boards and would like to
349 use the flash chips on it, say 'Y'.
350
351config MTD_OCOTEA
352 tristate "Flash devices mapped on IBM 440GX Ocotea"
353 depends on MTD_CFI && OCOTEA && !PPC_MERGE
354 help
355 This enables access routines for the flash chips on the IBM 440GX
356 Ocotea board. If you have one of these boards and would like to
357 use the flash chips on it, say 'Y'.
358
359config MTD_REDWOOD 335config MTD_REDWOOD
360 tristate "CFI Flash devices mapped on IBM Redwood" 336 tristate "CFI Flash devices mapped on IBM Redwood"
361 depends on MTD_CFI && ( REDWOOD_4 || REDWOOD_5 || REDWOOD_6 ) 337 depends on MTD_CFI && ( REDWOOD_4 || REDWOOD_5 || REDWOOD_6 )
@@ -458,13 +434,6 @@ config MTD_CEIVA
458 PhotoMax Digital Picture Frame. 434 PhotoMax Digital Picture Frame.
459 If you have such a device, say 'Y'. 435 If you have such a device, say 'Y'.
460 436
461config MTD_NOR_TOTO
462 tristate "NOR Flash device on TOTO board"
463 depends on ARCH_OMAP && OMAP_TOTO
464 help
465 This enables access to the NOR flash on the Texas Instruments
466 TOTO board.
467
468config MTD_H720X 437config MTD_H720X
469 tristate "Hynix evaluation board mappings" 438 tristate "Hynix evaluation board mappings"
470 depends on MTD_CFI && ( ARCH_H7201 || ARCH_H7202 ) 439 depends on MTD_CFI && ( ARCH_H7201 || ARCH_H7202 )
@@ -522,7 +491,7 @@ config MTD_BFIN_ASYNC
522 491
523config MTD_UCLINUX 492config MTD_UCLINUX
524 tristate "Generic uClinux RAM/ROM filesystem support" 493 tristate "Generic uClinux RAM/ROM filesystem support"
525 depends on MTD_PARTITIONS && !MMU 494 depends on MTD_PARTITIONS && MTD_RAM && !MMU
526 help 495 help
527 Map driver to support image based filesystems for uClinux. 496 Map driver to support image based filesystems for uClinux.
528 497
diff --git a/drivers/mtd/maps/Makefile b/drivers/mtd/maps/Makefile
index 6cda6df973e5..6d9ba35caf11 100644
--- a/drivers/mtd/maps/Makefile
+++ b/drivers/mtd/maps/Makefile
@@ -50,12 +50,8 @@ obj-$(CONFIG_MTD_REDWOOD) += redwood.o
50obj-$(CONFIG_MTD_UCLINUX) += uclinux.o 50obj-$(CONFIG_MTD_UCLINUX) += uclinux.o
51obj-$(CONFIG_MTD_NETtel) += nettel.o 51obj-$(CONFIG_MTD_NETtel) += nettel.o
52obj-$(CONFIG_MTD_SCB2_FLASH) += scb2_flash.o 52obj-$(CONFIG_MTD_SCB2_FLASH) += scb2_flash.o
53obj-$(CONFIG_MTD_EBONY) += ebony.o
54obj-$(CONFIG_MTD_OCOTEA) += ocotea.o
55obj-$(CONFIG_MTD_WALNUT) += walnut.o
56obj-$(CONFIG_MTD_H720X) += h720x-flash.o 53obj-$(CONFIG_MTD_H720X) += h720x-flash.o
57obj-$(CONFIG_MTD_SBC8240) += sbc8240.o 54obj-$(CONFIG_MTD_SBC8240) += sbc8240.o
58obj-$(CONFIG_MTD_NOR_TOTO) += omap-toto-flash.o
59obj-$(CONFIG_MTD_IXP4XX) += ixp4xx.o 55obj-$(CONFIG_MTD_IXP4XX) += ixp4xx.o
60obj-$(CONFIG_MTD_IXP2000) += ixp2000.o 56obj-$(CONFIG_MTD_IXP2000) += ixp2000.o
61obj-$(CONFIG_MTD_WRSBC8260) += wr_sbc82xx_flash.o 57obj-$(CONFIG_MTD_WRSBC8260) += wr_sbc82xx_flash.o
diff --git a/drivers/mtd/maps/ebony.c b/drivers/mtd/maps/ebony.c
deleted file mode 100644
index d92b7c70d3ed..000000000000
--- a/drivers/mtd/maps/ebony.c
+++ /dev/null
@@ -1,163 +0,0 @@
1/*
2 * Mapping for Ebony user flash
3 *
4 * Matt Porter <mporter@kernel.crashing.org>
5 *
6 * Copyright 2002-2004 MontaVista Software Inc.
7 *
8 * This program is free software; you can redistribute it and/or modify it
9 * under the terms of the GNU General Public License as published by the
10 * Free Software Foundation; either version 2 of the License, or (at your
11 * option) any later version.
12 */
13
14#include <linux/module.h>
15#include <linux/types.h>
16#include <linux/kernel.h>
17#include <linux/init.h>
18#include <linux/mtd/mtd.h>
19#include <linux/mtd/map.h>
20#include <linux/mtd/partitions.h>
21#include <asm/io.h>
22#include <asm/ibm44x.h>
23#include <platforms/4xx/ebony.h>
24
25static struct mtd_info *flash;
26
27static struct map_info ebony_small_map = {
28 .name = "Ebony small flash",
29 .size = EBONY_SMALL_FLASH_SIZE,
30 .bankwidth = 1,
31};
32
33static struct map_info ebony_large_map = {
34 .name = "Ebony large flash",
35 .size = EBONY_LARGE_FLASH_SIZE,
36 .bankwidth = 1,
37};
38
39static struct mtd_partition ebony_small_partitions[] = {
40 {
41 .name = "OpenBIOS",
42 .offset = 0x0,
43 .size = 0x80000,
44 }
45};
46
47static struct mtd_partition ebony_large_partitions[] = {
48 {
49 .name = "fs",
50 .offset = 0,
51 .size = 0x380000,
52 },
53 {
54 .name = "firmware",
55 .offset = 0x380000,
56 .size = 0x80000,
57 }
58};
59
60int __init init_ebony(void)
61{
62 u8 fpga0_reg;
63 u8 __iomem *fpga0_adr;
64 unsigned long long small_flash_base, large_flash_base;
65
66 fpga0_adr = ioremap64(EBONY_FPGA_ADDR, 16);
67 if (!fpga0_adr)
68 return -ENOMEM;
69
70 fpga0_reg = readb(fpga0_adr);
71 iounmap(fpga0_adr);
72
73 if (EBONY_BOOT_SMALL_FLASH(fpga0_reg) &&
74 !EBONY_FLASH_SEL(fpga0_reg))
75 small_flash_base = EBONY_SMALL_FLASH_HIGH2;
76 else if (EBONY_BOOT_SMALL_FLASH(fpga0_reg) &&
77 EBONY_FLASH_SEL(fpga0_reg))
78 small_flash_base = EBONY_SMALL_FLASH_HIGH1;
79 else if (!EBONY_BOOT_SMALL_FLASH(fpga0_reg) &&
80 !EBONY_FLASH_SEL(fpga0_reg))
81 small_flash_base = EBONY_SMALL_FLASH_LOW2;
82 else
83 small_flash_base = EBONY_SMALL_FLASH_LOW1;
84
85 if (EBONY_BOOT_SMALL_FLASH(fpga0_reg) &&
86 !EBONY_ONBRD_FLASH_EN(fpga0_reg))
87 large_flash_base = EBONY_LARGE_FLASH_LOW;
88 else
89 large_flash_base = EBONY_LARGE_FLASH_HIGH;
90
91 ebony_small_map.phys = small_flash_base;
92 ebony_small_map.virt = ioremap64(small_flash_base,
93 ebony_small_map.size);
94
95 if (!ebony_small_map.virt) {
96 printk("Failed to ioremap flash\n");
97 return -EIO;
98 }
99
100 simple_map_init(&ebony_small_map);
101
102 flash = do_map_probe("jedec_probe", &ebony_small_map);
103 if (flash) {
104 flash->owner = THIS_MODULE;
105 add_mtd_partitions(flash, ebony_small_partitions,
106 ARRAY_SIZE(ebony_small_partitions));
107 } else {
108 printk("map probe failed for flash\n");
109 iounmap(ebony_small_map.virt);
110 return -ENXIO;
111 }
112
113 ebony_large_map.phys = large_flash_base;
114 ebony_large_map.virt = ioremap64(large_flash_base,
115 ebony_large_map.size);
116
117 if (!ebony_large_map.virt) {
118 printk("Failed to ioremap flash\n");
119 iounmap(ebony_small_map.virt);
120 return -EIO;
121 }
122
123 simple_map_init(&ebony_large_map);
124
125 flash = do_map_probe("jedec_probe", &ebony_large_map);
126 if (flash) {
127 flash->owner = THIS_MODULE;
128 add_mtd_partitions(flash, ebony_large_partitions,
129 ARRAY_SIZE(ebony_large_partitions));
130 } else {
131 printk("map probe failed for flash\n");
132 iounmap(ebony_small_map.virt);
133 iounmap(ebony_large_map.virt);
134 return -ENXIO;
135 }
136
137 return 0;
138}
139
140static void __exit cleanup_ebony(void)
141{
142 if (flash) {
143 del_mtd_partitions(flash);
144 map_destroy(flash);
145 }
146
147 if (ebony_small_map.virt) {
148 iounmap(ebony_small_map.virt);
149 ebony_small_map.virt = NULL;
150 }
151
152 if (ebony_large_map.virt) {
153 iounmap(ebony_large_map.virt);
154 ebony_large_map.virt = NULL;
155 }
156}
157
158module_init(init_ebony);
159module_exit(cleanup_ebony);
160
161MODULE_LICENSE("GPL");
162MODULE_AUTHOR("Matt Porter <mporter@kernel.crashing.org>");
163MODULE_DESCRIPTION("MTD map and partitions for IBM 440GP Ebony boards");
diff --git a/drivers/mtd/maps/ocotea.c b/drivers/mtd/maps/ocotea.c
deleted file mode 100644
index 5522eac8c980..000000000000
--- a/drivers/mtd/maps/ocotea.c
+++ /dev/null
@@ -1,154 +0,0 @@
1/*
2 * Mapping for Ocotea user flash
3 *
4 * Matt Porter <mporter@kernel.crashing.org>
5 *
6 * Copyright 2002-2004 MontaVista Software Inc.
7 *
8 * This program is free software; you can redistribute it and/or modify it
9 * under the terms of the GNU General Public License as published by the
10 * Free Software Foundation; either version 2 of the License, or (at your
11 * option) any later version.
12 */
13
14#include <linux/module.h>
15#include <linux/types.h>
16#include <linux/kernel.h>
17#include <linux/init.h>
18#include <linux/mtd/mtd.h>
19#include <linux/mtd/map.h>
20#include <linux/mtd/partitions.h>
21#include <asm/io.h>
22#include <asm/ibm44x.h>
23#include <platforms/4xx/ocotea.h>
24
25static struct mtd_info *flash;
26
27static struct map_info ocotea_small_map = {
28 .name = "Ocotea small flash",
29 .size = OCOTEA_SMALL_FLASH_SIZE,
30 .buswidth = 1,
31};
32
33static struct map_info ocotea_large_map = {
34 .name = "Ocotea large flash",
35 .size = OCOTEA_LARGE_FLASH_SIZE,
36 .buswidth = 1,
37};
38
39static struct mtd_partition ocotea_small_partitions[] = {
40 {
41 .name = "pibs",
42 .offset = 0x0,
43 .size = 0x100000,
44 }
45};
46
47static struct mtd_partition ocotea_large_partitions[] = {
48 {
49 .name = "fs",
50 .offset = 0,
51 .size = 0x300000,
52 },
53 {
54 .name = "firmware",
55 .offset = 0x300000,
56 .size = 0x100000,
57 }
58};
59
60int __init init_ocotea(void)
61{
62 u8 fpga0_reg;
63 u8 *fpga0_adr;
64 unsigned long long small_flash_base, large_flash_base;
65
66 fpga0_adr = ioremap64(OCOTEA_FPGA_ADDR, 16);
67 if (!fpga0_adr)
68 return -ENOMEM;
69
70 fpga0_reg = readb((unsigned long)fpga0_adr);
71 iounmap(fpga0_adr);
72
73 if (OCOTEA_BOOT_LARGE_FLASH(fpga0_reg)) {
74 small_flash_base = OCOTEA_SMALL_FLASH_HIGH;
75 large_flash_base = OCOTEA_LARGE_FLASH_LOW;
76 }
77 else {
78 small_flash_base = OCOTEA_SMALL_FLASH_LOW;
79 large_flash_base = OCOTEA_LARGE_FLASH_HIGH;
80 }
81
82 ocotea_small_map.phys = small_flash_base;
83 ocotea_small_map.virt = ioremap64(small_flash_base,
84 ocotea_small_map.size);
85
86 if (!ocotea_small_map.virt) {
87 printk("Failed to ioremap flash\n");
88 return -EIO;
89 }
90
91 simple_map_init(&ocotea_small_map);
92
93 flash = do_map_probe("map_rom", &ocotea_small_map);
94 if (flash) {
95 flash->owner = THIS_MODULE;
96 add_mtd_partitions(flash, ocotea_small_partitions,
97 ARRAY_SIZE(ocotea_small_partitions));
98 } else {
99 printk("map probe failed for flash\n");
100 iounmap(ocotea_small_map.virt);
101 return -ENXIO;
102 }
103
104 ocotea_large_map.phys = large_flash_base;
105 ocotea_large_map.virt = ioremap64(large_flash_base,
106 ocotea_large_map.size);
107
108 if (!ocotea_large_map.virt) {
109 printk("Failed to ioremap flash\n");
110 iounmap(ocotea_small_map.virt);
111 return -EIO;
112 }
113
114 simple_map_init(&ocotea_large_map);
115
116 flash = do_map_probe("cfi_probe", &ocotea_large_map);
117 if (flash) {
118 flash->owner = THIS_MODULE;
119 add_mtd_partitions(flash, ocotea_large_partitions,
120 ARRAY_SIZE(ocotea_large_partitions));
121 } else {
122 printk("map probe failed for flash\n");
123 iounmap(ocotea_small_map.virt);
124 iounmap(ocotea_large_map.virt);
125 return -ENXIO;
126 }
127
128 return 0;
129}
130
131static void __exit cleanup_ocotea(void)
132{
133 if (flash) {
134 del_mtd_partitions(flash);
135 map_destroy(flash);
136 }
137
138 if (ocotea_small_map.virt) {
139 iounmap((void *)ocotea_small_map.virt);
140 ocotea_small_map.virt = 0;
141 }
142
143 if (ocotea_large_map.virt) {
144 iounmap((void *)ocotea_large_map.virt);
145 ocotea_large_map.virt = 0;
146 }
147}
148
149module_init(init_ocotea);
150module_exit(cleanup_ocotea);
151
152MODULE_LICENSE("GPL");
153MODULE_AUTHOR("Matt Porter <mporter@kernel.crashing.org>");
154MODULE_DESCRIPTION("MTD map and partitions for IBM 440GX Ocotea boards");
diff --git a/drivers/mtd/maps/omap-toto-flash.c b/drivers/mtd/maps/omap-toto-flash.c
deleted file mode 100644
index 0a60ebbc2175..000000000000
--- a/drivers/mtd/maps/omap-toto-flash.c
+++ /dev/null
@@ -1,133 +0,0 @@
1/*
2 * NOR Flash memory access on TI Toto board
3 *
4 * jzhang@ti.com (C) 2003 Texas Instruments.
5 *
6 * (C) 2002 MontVista Software, Inc.
7 */
8
9#include <linux/module.h>
10#include <linux/types.h>
11#include <linux/kernel.h>
12#include <linux/errno.h>
13#include <linux/init.h>
14#include <linux/slab.h>
15
16#include <linux/mtd/mtd.h>
17#include <linux/mtd/map.h>
18#include <linux/mtd/partitions.h>
19
20#include <asm/hardware.h>
21#include <asm/io.h>
22
23
24#ifndef CONFIG_ARCH_OMAP
25#error This is for OMAP architecture only
26#endif
27
28//these lines need be moved to a hardware header file
29#define OMAP_TOTO_FLASH_BASE 0xd8000000
30#define OMAP_TOTO_FLASH_SIZE 0x80000
31
32static struct map_info omap_toto_map_flash = {
33 .name = "OMAP Toto flash",
34 .bankwidth = 2,
35 .virt = (void __iomem *)OMAP_TOTO_FLASH_BASE,
36};
37
38
39static struct mtd_partition toto_flash_partitions[] = {
40 {
41 .name = "BootLoader",
42 .size = 0x00040000, /* hopefully u-boot will stay 128k + 128*/
43 .offset = 0,
44 .mask_flags = MTD_WRITEABLE, /* force read-only */
45 }, {
46 .name = "ReservedSpace",
47 .size = 0x00030000,
48 .offset = MTDPART_OFS_APPEND,
49 //mask_flags: MTD_WRITEABLE, /* force read-only */
50 }, {
51 .name = "EnvArea", /* bottom 64KiB for env vars */
52 .size = MTDPART_SIZ_FULL,
53 .offset = MTDPART_OFS_APPEND,
54 }
55};
56
57static struct mtd_partition *parsed_parts;
58
59static struct mtd_info *flash_mtd;
60
61static int __init init_flash (void)
62{
63
64 struct mtd_partition *parts;
65 int nb_parts = 0;
66 int parsed_nr_parts = 0;
67 const char *part_type;
68
69 /*
70 * Static partition definition selection
71 */
72 part_type = "static";
73
74 parts = toto_flash_partitions;
75 nb_parts = ARRAY_SIZE(toto_flash_partitions);
76 omap_toto_map_flash.size = OMAP_TOTO_FLASH_SIZE;
77 omap_toto_map_flash.phys = virt_to_phys(OMAP_TOTO_FLASH_BASE);
78
79 simple_map_init(&omap_toto_map_flash);
80 /*
81 * Now let's probe for the actual flash. Do it here since
82 * specific machine settings might have been set above.
83 */
84 printk(KERN_NOTICE "OMAP toto flash: probing %d-bit flash bus\n",
85 omap_toto_map_flash.bankwidth*8);
86 flash_mtd = do_map_probe("jedec_probe", &omap_toto_map_flash);
87 if (!flash_mtd)
88 return -ENXIO;
89
90 if (parsed_nr_parts > 0) {
91 parts = parsed_parts;
92 nb_parts = parsed_nr_parts;
93 }
94
95 if (nb_parts == 0) {
96 printk(KERN_NOTICE "OMAP toto flash: no partition info available,"
97 "registering whole flash at once\n");
98 if (add_mtd_device(flash_mtd)){
99 return -ENXIO;
100 }
101 } else {
102 printk(KERN_NOTICE "Using %s partition definition\n",
103 part_type);
104 return add_mtd_partitions(flash_mtd, parts, nb_parts);
105 }
106 return 0;
107}
108
109int __init omap_toto_mtd_init(void)
110{
111 int status;
112
113 if (status = init_flash()) {
114 printk(KERN_ERR "OMAP Toto Flash: unable to init map for toto flash\n");
115 }
116 return status;
117}
118
119static void __exit omap_toto_mtd_cleanup(void)
120{
121 if (flash_mtd) {
122 del_mtd_partitions(flash_mtd);
123 map_destroy(flash_mtd);
124 kfree(parsed_parts);
125 }
126}
127
128module_init(omap_toto_mtd_init);
129module_exit(omap_toto_mtd_cleanup);
130
131MODULE_AUTHOR("Jian Zhang");
132MODULE_DESCRIPTION("OMAP Toto board map driver");
133MODULE_LICENSE("GPL");
diff --git a/drivers/mtd/maps/pci.c b/drivers/mtd/maps/pci.c
index 5c6a25c90380..48f4cf5cb9d1 100644
--- a/drivers/mtd/maps/pci.c
+++ b/drivers/mtd/maps/pci.c
@@ -203,15 +203,8 @@ intel_dc21285_init(struct pci_dev *dev, struct map_pci_info *map)
203 * not enabled, should we be allocating a new resource for it 203 * not enabled, should we be allocating a new resource for it
204 * or simply enabling it? 204 * or simply enabling it?
205 */ 205 */
206 if (!(pci_resource_flags(dev, PCI_ROM_RESOURCE) & 206 pci_enable_rom(dev);
207 IORESOURCE_ROM_ENABLE)) { 207 printk("%s: enabling expansion ROM\n", pci_name(dev));
208 u32 val;
209 pci_resource_flags(dev, PCI_ROM_RESOURCE) |= IORESOURCE_ROM_ENABLE;
210 pci_read_config_dword(dev, PCI_ROM_ADDRESS, &val);
211 val |= PCI_ROM_ADDRESS_ENABLE;
212 pci_write_config_dword(dev, PCI_ROM_ADDRESS, val);
213 printk("%s: enabling expansion ROM\n", pci_name(dev));
214 }
215 } 208 }
216 209
217 if (!len || !base) 210 if (!len || !base)
@@ -232,18 +225,13 @@ intel_dc21285_init(struct pci_dev *dev, struct map_pci_info *map)
232static void 225static void
233intel_dc21285_exit(struct pci_dev *dev, struct map_pci_info *map) 226intel_dc21285_exit(struct pci_dev *dev, struct map_pci_info *map)
234{ 227{
235 u32 val;
236
237 if (map->base) 228 if (map->base)
238 iounmap(map->base); 229 iounmap(map->base);
239 230
240 /* 231 /*
241 * We need to undo the PCI BAR2/PCI ROM BAR address alteration. 232 * We need to undo the PCI BAR2/PCI ROM BAR address alteration.
242 */ 233 */
243 pci_resource_flags(dev, PCI_ROM_RESOURCE) &= ~IORESOURCE_ROM_ENABLE; 234 pci_disable_rom(dev);
244 pci_read_config_dword(dev, PCI_ROM_ADDRESS, &val);
245 val &= ~PCI_ROM_ADDRESS_ENABLE;
246 pci_write_config_dword(dev, PCI_ROM_ADDRESS, val);
247} 235}
248 236
249static unsigned long 237static unsigned long
diff --git a/drivers/mtd/maps/physmap_of.c b/drivers/mtd/maps/physmap_of.c
index 49acd4171893..5fcfec034a94 100644
--- a/drivers/mtd/maps/physmap_of.c
+++ b/drivers/mtd/maps/physmap_of.c
@@ -230,8 +230,7 @@ static int __devinit of_flash_probe(struct of_device *dev,
230 230
231#ifdef CONFIG_MTD_OF_PARTS 231#ifdef CONFIG_MTD_OF_PARTS
232 if (err == 0) { 232 if (err == 0) {
233 err = of_mtd_parse_partitions(&dev->dev, info->mtd, 233 err = of_mtd_parse_partitions(&dev->dev, dp, &info->parts);
234 dp, &info->parts);
235 if (err < 0) 234 if (err < 0)
236 return err; 235 return err;
237 } 236 }
diff --git a/drivers/mtd/maps/walnut.c b/drivers/mtd/maps/walnut.c
deleted file mode 100644
index e243476c8171..000000000000
--- a/drivers/mtd/maps/walnut.c
+++ /dev/null
@@ -1,122 +0,0 @@
1/*
2 * Mapping for Walnut flash
3 * (used ebony.c as a "framework")
4 *
5 * Heikki Lindholm <holindho@infradead.org>
6 *
7 *
8 * This program is free software; you can redistribute it and/or modify it
9 * under the terms of the GNU General Public License as published by the
10 * Free Software Foundation; either version 2 of the License, or (at your
11 * option) any later version.
12 */
13
14#include <linux/module.h>
15#include <linux/types.h>
16#include <linux/kernel.h>
17#include <linux/init.h>
18#include <linux/mtd/mtd.h>
19#include <linux/mtd/map.h>
20#include <linux/mtd/partitions.h>
21#include <asm/io.h>
22#include <asm/ibm4xx.h>
23#include <platforms/4xx/walnut.h>
24
25/* these should be in platforms/4xx/walnut.h ? */
26#define WALNUT_FLASH_ONBD_N(x) (x & 0x02)
27#define WALNUT_FLASH_SRAM_SEL(x) (x & 0x01)
28#define WALNUT_FLASH_LOW 0xFFF00000
29#define WALNUT_FLASH_HIGH 0xFFF80000
30#define WALNUT_FLASH_SIZE 0x80000
31
32static struct mtd_info *flash;
33
34static struct map_info walnut_map = {
35 .name = "Walnut flash",
36 .size = WALNUT_FLASH_SIZE,
37 .bankwidth = 1,
38};
39
40/* Actually, OpenBIOS is the last 128 KiB of the flash - better
41 * partitioning could be made */
42static struct mtd_partition walnut_partitions[] = {
43 {
44 .name = "OpenBIOS",
45 .offset = 0x0,
46 .size = WALNUT_FLASH_SIZE,
47 /*.mask_flags = MTD_WRITEABLE, */ /* force read-only */
48 }
49};
50
51int __init init_walnut(void)
52{
53 u8 fpga_brds1;
54 void *fpga_brds1_adr;
55 void *fpga_status_adr;
56 unsigned long flash_base;
57
58 /* this should already be mapped (platform/4xx/walnut.c) */
59 fpga_status_adr = ioremap(WALNUT_FPGA_BASE, 8);
60 if (!fpga_status_adr)
61 return -ENOMEM;
62
63 fpga_brds1_adr = fpga_status_adr+5;
64 fpga_brds1 = readb(fpga_brds1_adr);
65 /* iounmap(fpga_status_adr); */
66
67 if (WALNUT_FLASH_ONBD_N(fpga_brds1)) {
68 printk("The on-board flash is disabled (U79 sw 5)!");
69 iounmap(fpga_status_adr);
70 return -EIO;
71 }
72 if (WALNUT_FLASH_SRAM_SEL(fpga_brds1))
73 flash_base = WALNUT_FLASH_LOW;
74 else
75 flash_base = WALNUT_FLASH_HIGH;
76
77 walnut_map.phys = flash_base;
78 walnut_map.virt =
79 (void __iomem *)ioremap(flash_base, walnut_map.size);
80
81 if (!walnut_map.virt) {
82 printk("Failed to ioremap flash.\n");
83 iounmap(fpga_status_adr);
84 return -EIO;
85 }
86
87 simple_map_init(&walnut_map);
88
89 flash = do_map_probe("jedec_probe", &walnut_map);
90 if (flash) {
91 flash->owner = THIS_MODULE;
92 add_mtd_partitions(flash, walnut_partitions,
93 ARRAY_SIZE(walnut_partitions));
94 } else {
95 printk("map probe failed for flash\n");
96 iounmap(fpga_status_adr);
97 return -ENXIO;
98 }
99
100 iounmap(fpga_status_adr);
101 return 0;
102}
103
104static void __exit cleanup_walnut(void)
105{
106 if (flash) {
107 del_mtd_partitions(flash);
108 map_destroy(flash);
109 }
110
111 if (walnut_map.virt) {
112 iounmap((void *)walnut_map.virt);
113 walnut_map.virt = 0;
114 }
115}
116
117module_init(init_walnut);
118module_exit(cleanup_walnut);
119
120MODULE_LICENSE("GPL");
121MODULE_AUTHOR("Heikki Lindholm <holindho@infradead.org>");
122MODULE_DESCRIPTION("MTD map and partitions for IBM 405GP Walnut boards");
diff --git a/drivers/mtd/mtdchar.c b/drivers/mtd/mtdchar.c
index 1c74762dec89..963840e9b5bf 100644
--- a/drivers/mtd/mtdchar.c
+++ b/drivers/mtd/mtdchar.c
@@ -348,7 +348,7 @@ static void mtdchar_erase_callback (struct erase_info *instr)
348 wake_up((wait_queue_head_t *)instr->priv); 348 wake_up((wait_queue_head_t *)instr->priv);
349} 349}
350 350
351#if defined(CONFIG_MTD_OTP) || defined(CONFIG_MTD_ONENAND_OTP) 351#ifdef CONFIG_HAVE_MTD_OTP
352static int otp_select_filemode(struct mtd_file_info *mfi, int mode) 352static int otp_select_filemode(struct mtd_file_info *mfi, int mode)
353{ 353{
354 struct mtd_info *mtd = mfi->mtd; 354 struct mtd_info *mtd = mfi->mtd;
@@ -665,7 +665,7 @@ static int mtd_ioctl(struct inode *inode, struct file *file,
665 break; 665 break;
666 } 666 }
667 667
668#if defined(CONFIG_MTD_OTP) || defined(CONFIG_MTD_ONENAND_OTP) 668#ifdef CONFIG_HAVE_MTD_OTP
669 case OTPSELECT: 669 case OTPSELECT:
670 { 670 {
671 int mode; 671 int mode;
diff --git a/drivers/mtd/mtdconcat.c b/drivers/mtd/mtdconcat.c
index 2972a5edb73d..789842d0e6f2 100644
--- a/drivers/mtd/mtdconcat.c
+++ b/drivers/mtd/mtdconcat.c
@@ -444,7 +444,7 @@ static int concat_erase(struct mtd_info *mtd, struct erase_info *instr)
444 return -EINVAL; 444 return -EINVAL;
445 } 445 }
446 446
447 instr->fail_addr = 0xffffffff; 447 instr->fail_addr = MTD_FAIL_ADDR_UNKNOWN;
448 448
449 /* make a local copy of instr to avoid modifying the caller's struct */ 449 /* make a local copy of instr to avoid modifying the caller's struct */
450 erase = kmalloc(sizeof (struct erase_info), GFP_KERNEL); 450 erase = kmalloc(sizeof (struct erase_info), GFP_KERNEL);
@@ -493,7 +493,7 @@ static int concat_erase(struct mtd_info *mtd, struct erase_info *instr)
493 /* sanity check: should never happen since 493 /* sanity check: should never happen since
494 * block alignment has been checked above */ 494 * block alignment has been checked above */
495 BUG_ON(err == -EINVAL); 495 BUG_ON(err == -EINVAL);
496 if (erase->fail_addr != 0xffffffff) 496 if (erase->fail_addr != MTD_FAIL_ADDR_UNKNOWN)
497 instr->fail_addr = erase->fail_addr + offset; 497 instr->fail_addr = erase->fail_addr + offset;
498 break; 498 break;
499 } 499 }
diff --git a/drivers/mtd/mtdoops.c b/drivers/mtd/mtdoops.c
index 5a680e1e61f1..aebb3b27edbd 100644
--- a/drivers/mtd/mtdoops.c
+++ b/drivers/mtd/mtdoops.c
@@ -33,6 +33,7 @@
33#include <linux/interrupt.h> 33#include <linux/interrupt.h>
34#include <linux/mtd/mtd.h> 34#include <linux/mtd/mtd.h>
35 35
36#define MTDOOPS_KERNMSG_MAGIC 0x5d005d00
36#define OOPS_PAGE_SIZE 4096 37#define OOPS_PAGE_SIZE 4096
37 38
38static struct mtdoops_context { 39static struct mtdoops_context {
@@ -99,7 +100,7 @@ static void mtdoops_inc_counter(struct mtdoops_context *cxt)
99 int ret; 100 int ret;
100 101
101 cxt->nextpage++; 102 cxt->nextpage++;
102 if (cxt->nextpage > cxt->oops_pages) 103 if (cxt->nextpage >= cxt->oops_pages)
103 cxt->nextpage = 0; 104 cxt->nextpage = 0;
104 cxt->nextcount++; 105 cxt->nextcount++;
105 if (cxt->nextcount == 0xffffffff) 106 if (cxt->nextcount == 0xffffffff)
@@ -141,7 +142,7 @@ static void mtdoops_workfunc_erase(struct work_struct *work)
141 mod = (cxt->nextpage * OOPS_PAGE_SIZE) % mtd->erasesize; 142 mod = (cxt->nextpage * OOPS_PAGE_SIZE) % mtd->erasesize;
142 if (mod != 0) { 143 if (mod != 0) {
143 cxt->nextpage = cxt->nextpage + ((mtd->erasesize - mod) / OOPS_PAGE_SIZE); 144 cxt->nextpage = cxt->nextpage + ((mtd->erasesize - mod) / OOPS_PAGE_SIZE);
144 if (cxt->nextpage > cxt->oops_pages) 145 if (cxt->nextpage >= cxt->oops_pages)
145 cxt->nextpage = 0; 146 cxt->nextpage = 0;
146 } 147 }
147 148
@@ -158,7 +159,7 @@ badblock:
158 cxt->nextpage * OOPS_PAGE_SIZE); 159 cxt->nextpage * OOPS_PAGE_SIZE);
159 i++; 160 i++;
160 cxt->nextpage = cxt->nextpage + (mtd->erasesize / OOPS_PAGE_SIZE); 161 cxt->nextpage = cxt->nextpage + (mtd->erasesize / OOPS_PAGE_SIZE);
161 if (cxt->nextpage > cxt->oops_pages) 162 if (cxt->nextpage >= cxt->oops_pages)
162 cxt->nextpage = 0; 163 cxt->nextpage = 0;
163 if (i == (cxt->oops_pages / (mtd->erasesize / OOPS_PAGE_SIZE))) { 164 if (i == (cxt->oops_pages / (mtd->erasesize / OOPS_PAGE_SIZE))) {
164 printk(KERN_ERR "mtdoops: All blocks bad!\n"); 165 printk(KERN_ERR "mtdoops: All blocks bad!\n");
@@ -224,40 +225,40 @@ static void find_next_position(struct mtdoops_context *cxt)
224{ 225{
225 struct mtd_info *mtd = cxt->mtd; 226 struct mtd_info *mtd = cxt->mtd;
226 int ret, page, maxpos = 0; 227 int ret, page, maxpos = 0;
227 u32 count, maxcount = 0xffffffff; 228 u32 count[2], maxcount = 0xffffffff;
228 size_t retlen; 229 size_t retlen;
229 230
230 for (page = 0; page < cxt->oops_pages; page++) { 231 for (page = 0; page < cxt->oops_pages; page++) {
231 ret = mtd->read(mtd, page * OOPS_PAGE_SIZE, 4, &retlen, (u_char *) &count); 232 ret = mtd->read(mtd, page * OOPS_PAGE_SIZE, 8, &retlen, (u_char *) &count[0]);
232 if ((retlen != 4) || ((ret < 0) && (ret != -EUCLEAN))) { 233 if ((retlen != 8) || ((ret < 0) && (ret != -EUCLEAN))) {
233 printk(KERN_ERR "mtdoops: Read failure at %d (%td of 4 read)" 234 printk(KERN_ERR "mtdoops: Read failure at %d (%td of 8 read)"
234 ", err %d.\n", page * OOPS_PAGE_SIZE, retlen, ret); 235 ", err %d.\n", page * OOPS_PAGE_SIZE, retlen, ret);
235 continue; 236 continue;
236 } 237 }
237 238
238 if (count == 0xffffffff) 239 if (count[1] != MTDOOPS_KERNMSG_MAGIC)
240 continue;
241 if (count[0] == 0xffffffff)
239 continue; 242 continue;
240 if (maxcount == 0xffffffff) { 243 if (maxcount == 0xffffffff) {
241 maxcount = count; 244 maxcount = count[0];
242 maxpos = page; 245 maxpos = page;
243 } else if ((count < 0x40000000) && (maxcount > 0xc0000000)) { 246 } else if ((count[0] < 0x40000000) && (maxcount > 0xc0000000)) {
244 maxcount = count; 247 maxcount = count[0];
245 maxpos = page; 248 maxpos = page;
246 } else if ((count > maxcount) && (count < 0xc0000000)) { 249 } else if ((count[0] > maxcount) && (count[0] < 0xc0000000)) {
247 maxcount = count; 250 maxcount = count[0];
248 maxpos = page; 251 maxpos = page;
249 } else if ((count > maxcount) && (count > 0xc0000000) 252 } else if ((count[0] > maxcount) && (count[0] > 0xc0000000)
250 && (maxcount > 0x80000000)) { 253 && (maxcount > 0x80000000)) {
251 maxcount = count; 254 maxcount = count[0];
252 maxpos = page; 255 maxpos = page;
253 } 256 }
254 } 257 }
255 if (maxcount == 0xffffffff) { 258 if (maxcount == 0xffffffff) {
256 cxt->nextpage = 0; 259 cxt->nextpage = 0;
257 cxt->nextcount = 1; 260 cxt->nextcount = 1;
258 cxt->ready = 1; 261 schedule_work(&cxt->work_erase);
259 printk(KERN_DEBUG "mtdoops: Ready %d, %d (first init)\n",
260 cxt->nextpage, cxt->nextcount);
261 return; 262 return;
262 } 263 }
263 264
@@ -358,8 +359,9 @@ mtdoops_console_write(struct console *co, const char *s, unsigned int count)
358 359
359 if (cxt->writecount == 0) { 360 if (cxt->writecount == 0) {
360 u32 *stamp = cxt->oops_buf; 361 u32 *stamp = cxt->oops_buf;
361 *stamp = cxt->nextcount; 362 *stamp++ = cxt->nextcount;
362 cxt->writecount = 4; 363 *stamp = MTDOOPS_KERNMSG_MAGIC;
364 cxt->writecount = 8;
363 } 365 }
364 366
365 if ((count + cxt->writecount) > OOPS_PAGE_SIZE) 367 if ((count + cxt->writecount) > OOPS_PAGE_SIZE)
diff --git a/drivers/mtd/mtdpart.c b/drivers/mtd/mtdpart.c
index 9a06dc93ee0d..3728913fa5fa 100644
--- a/drivers/mtd/mtdpart.c
+++ b/drivers/mtd/mtdpart.c
@@ -214,7 +214,7 @@ static int part_erase(struct mtd_info *mtd, struct erase_info *instr)
214 instr->addr += part->offset; 214 instr->addr += part->offset;
215 ret = part->master->erase(part->master, instr); 215 ret = part->master->erase(part->master, instr);
216 if (ret) { 216 if (ret) {
217 if (instr->fail_addr != 0xffffffff) 217 if (instr->fail_addr != MTD_FAIL_ADDR_UNKNOWN)
218 instr->fail_addr -= part->offset; 218 instr->fail_addr -= part->offset;
219 instr->addr -= part->offset; 219 instr->addr -= part->offset;
220 } 220 }
@@ -226,7 +226,7 @@ void mtd_erase_callback(struct erase_info *instr)
226 if (instr->mtd->erase == part_erase) { 226 if (instr->mtd->erase == part_erase) {
227 struct mtd_part *part = PART(instr->mtd); 227 struct mtd_part *part = PART(instr->mtd);
228 228
229 if (instr->fail_addr != 0xffffffff) 229 if (instr->fail_addr != MTD_FAIL_ADDR_UNKNOWN)
230 instr->fail_addr -= part->offset; 230 instr->fail_addr -= part->offset;
231 instr->addr -= part->offset; 231 instr->addr -= part->offset;
232 } 232 }
diff --git a/drivers/mtd/nand/Kconfig b/drivers/mtd/nand/Kconfig
index 41f361c49b32..1c2e9450d663 100644
--- a/drivers/mtd/nand/Kconfig
+++ b/drivers/mtd/nand/Kconfig
@@ -56,6 +56,12 @@ config MTD_NAND_H1900
56 help 56 help
57 This enables the driver for the iPAQ h1900 flash. 57 This enables the driver for the iPAQ h1900 flash.
58 58
59config MTD_NAND_GPIO
60 tristate "GPIO NAND Flash driver"
61 depends on GENERIC_GPIO && ARM
62 help
63 This enables a GPIO based NAND flash driver.
64
59config MTD_NAND_SPIA 65config MTD_NAND_SPIA
60 tristate "NAND Flash device on SPIA board" 66 tristate "NAND Flash device on SPIA board"
61 depends on ARCH_P720T 67 depends on ARCH_P720T
@@ -68,12 +74,6 @@ config MTD_NAND_AMS_DELTA
68 help 74 help
69 Support for NAND flash on Amstrad E3 (Delta). 75 Support for NAND flash on Amstrad E3 (Delta).
70 76
71config MTD_NAND_TOTO
72 tristate "NAND Flash device on TOTO board"
73 depends on ARCH_OMAP && BROKEN
74 help
75 Support for NAND flash on Texas Instruments Toto platform.
76
77config MTD_NAND_TS7250 77config MTD_NAND_TS7250
78 tristate "NAND Flash device on TS-7250 board" 78 tristate "NAND Flash device on TS-7250 board"
79 depends on MACH_TS72XX 79 depends on MACH_TS72XX
@@ -163,13 +163,6 @@ config MTD_NAND_S3C2410_HWECC
163 incorrect ECC generation, and if using these, the default of 163 incorrect ECC generation, and if using these, the default of
164 software ECC is preferable. 164 software ECC is preferable.
165 165
166config MTD_NAND_NDFC
167 tristate "NDFC NanD Flash Controller"
168 depends on 4xx && !PPC_MERGE
169 select MTD_NAND_ECC_SMC
170 help
171 NDFC Nand Flash Controllers are integrated in IBM/AMCC's 4xx SoCs
172
173config MTD_NAND_S3C2410_CLKSTOP 166config MTD_NAND_S3C2410_CLKSTOP
174 bool "S3C2410 NAND IDLE clock stop" 167 bool "S3C2410 NAND IDLE clock stop"
175 depends on MTD_NAND_S3C2410 168 depends on MTD_NAND_S3C2410
@@ -340,6 +333,13 @@ config MTD_NAND_PXA3xx
340 This enables the driver for the NAND flash device found on 333 This enables the driver for the NAND flash device found on
341 PXA3xx processors 334 PXA3xx processors
342 335
336config MTD_NAND_PXA3xx_BUILTIN
337 bool "Use builtin definitions for some NAND chips (deprecated)"
338 depends on MTD_NAND_PXA3xx
339 help
340 This enables builtin definitions for some NAND chips. This
341 is deprecated in favor of platform specific data.
342
343config MTD_NAND_CM_X270 343config MTD_NAND_CM_X270
344 tristate "Support for NAND Flash on CM-X270 modules" 344 tristate "Support for NAND Flash on CM-X270 modules"
345 depends on MTD_NAND && MACH_ARMCORE 345 depends on MTD_NAND && MACH_ARMCORE
@@ -400,10 +400,24 @@ config MTD_NAND_FSL_ELBC
400 400
401config MTD_NAND_FSL_UPM 401config MTD_NAND_FSL_UPM
402 tristate "Support for NAND on Freescale UPM" 402 tristate "Support for NAND on Freescale UPM"
403 depends on MTD_NAND && OF_GPIO && (PPC_83xx || PPC_85xx) 403 depends on MTD_NAND && (PPC_83xx || PPC_85xx)
404 select FSL_LBC 404 select FSL_LBC
405 help 405 help
406 Enables support for NAND Flash chips wired onto Freescale PowerPC 406 Enables support for NAND Flash chips wired onto Freescale PowerPC
407 processor localbus with User-Programmable Machine support. 407 processor localbus with User-Programmable Machine support.
408 408
409config MTD_NAND_MXC
410 tristate "MXC NAND support"
411 depends on ARCH_MX2
412 help
413 This enables the driver for the NAND flash controller on the
414 MXC processors.
415
416config MTD_NAND_SH_FLCTL
417 tristate "Support for NAND on Renesas SuperH FLCTL"
418 depends on MTD_NAND && SUPERH && CPU_SUBTYPE_SH7723
419 help
420 Several Renesas SuperH CPU has FLCTL. This option enables support
421 for NAND Flash using FLCTL. This driver support SH7723.
422
409endif # MTD_NAND 423endif # MTD_NAND
diff --git a/drivers/mtd/nand/Makefile b/drivers/mtd/nand/Makefile
index b786c5da82da..b661586afbfc 100644
--- a/drivers/mtd/nand/Makefile
+++ b/drivers/mtd/nand/Makefile
@@ -8,7 +8,6 @@ obj-$(CONFIG_MTD_NAND_IDS) += nand_ids.o
8obj-$(CONFIG_MTD_NAND_CAFE) += cafe_nand.o 8obj-$(CONFIG_MTD_NAND_CAFE) += cafe_nand.o
9obj-$(CONFIG_MTD_NAND_SPIA) += spia.o 9obj-$(CONFIG_MTD_NAND_SPIA) += spia.o
10obj-$(CONFIG_MTD_NAND_AMS_DELTA) += ams-delta.o 10obj-$(CONFIG_MTD_NAND_AMS_DELTA) += ams-delta.o
11obj-$(CONFIG_MTD_NAND_TOTO) += toto.o
12obj-$(CONFIG_MTD_NAND_AUTCPU12) += autcpu12.o 11obj-$(CONFIG_MTD_NAND_AUTCPU12) += autcpu12.o
13obj-$(CONFIG_MTD_NAND_EDB7312) += edb7312.o 12obj-$(CONFIG_MTD_NAND_EDB7312) += edb7312.o
14obj-$(CONFIG_MTD_NAND_AU1550) += au1550nd.o 13obj-$(CONFIG_MTD_NAND_AU1550) += au1550nd.o
@@ -24,6 +23,7 @@ obj-$(CONFIG_MTD_NAND_NANDSIM) += nandsim.o
24obj-$(CONFIG_MTD_NAND_CS553X) += cs553x_nand.o 23obj-$(CONFIG_MTD_NAND_CS553X) += cs553x_nand.o
25obj-$(CONFIG_MTD_NAND_NDFC) += ndfc.o 24obj-$(CONFIG_MTD_NAND_NDFC) += ndfc.o
26obj-$(CONFIG_MTD_NAND_ATMEL) += atmel_nand.o 25obj-$(CONFIG_MTD_NAND_ATMEL) += atmel_nand.o
26obj-$(CONFIG_MTD_NAND_GPIO) += gpio.o
27obj-$(CONFIG_MTD_NAND_CM_X270) += cmx270_nand.o 27obj-$(CONFIG_MTD_NAND_CM_X270) += cmx270_nand.o
28obj-$(CONFIG_MTD_NAND_BASLER_EXCITE) += excite_nandflash.o 28obj-$(CONFIG_MTD_NAND_BASLER_EXCITE) += excite_nandflash.o
29obj-$(CONFIG_MTD_NAND_PXA3xx) += pxa3xx_nand.o 29obj-$(CONFIG_MTD_NAND_PXA3xx) += pxa3xx_nand.o
@@ -34,5 +34,7 @@ obj-$(CONFIG_MTD_NAND_PASEMI) += pasemi_nand.o
34obj-$(CONFIG_MTD_NAND_ORION) += orion_nand.o 34obj-$(CONFIG_MTD_NAND_ORION) += orion_nand.o
35obj-$(CONFIG_MTD_NAND_FSL_ELBC) += fsl_elbc_nand.o 35obj-$(CONFIG_MTD_NAND_FSL_ELBC) += fsl_elbc_nand.o
36obj-$(CONFIG_MTD_NAND_FSL_UPM) += fsl_upm.o 36obj-$(CONFIG_MTD_NAND_FSL_UPM) += fsl_upm.o
37obj-$(CONFIG_MTD_NAND_SH_FLCTL) += sh_flctl.o
38obj-$(CONFIG_MTD_NAND_MXC) += mxc_nand.o
37 39
38nand-objs := nand_base.o nand_bbt.o 40nand-objs := nand_base.o nand_bbt.o
diff --git a/drivers/mtd/nand/atmel_nand.c b/drivers/mtd/nand/atmel_nand.c
index 3387e0d5076b..c98c1570a40b 100644
--- a/drivers/mtd/nand/atmel_nand.c
+++ b/drivers/mtd/nand/atmel_nand.c
@@ -174,48 +174,6 @@ static void atmel_write_buf16(struct mtd_info *mtd, const u8 *buf, int len)
174} 174}
175 175
176/* 176/*
177 * write oob for small pages
178 */
179static int atmel_nand_write_oob_512(struct mtd_info *mtd,
180 struct nand_chip *chip, int page)
181{
182 int chunk = chip->ecc.bytes + chip->ecc.prepad + chip->ecc.postpad;
183 int eccsize = chip->ecc.size, length = mtd->oobsize;
184 int len, pos, status = 0;
185 const uint8_t *bufpoi = chip->oob_poi;
186
187 pos = eccsize + chunk;
188
189 chip->cmdfunc(mtd, NAND_CMD_SEQIN, pos, page);
190 len = min_t(int, length, chunk);
191 chip->write_buf(mtd, bufpoi, len);
192 bufpoi += len;
193 length -= len;
194 if (length > 0)
195 chip->write_buf(mtd, bufpoi, length);
196
197 chip->cmdfunc(mtd, NAND_CMD_PAGEPROG, -1, -1);
198 status = chip->waitfunc(mtd, chip);
199
200 return status & NAND_STATUS_FAIL ? -EIO : 0;
201
202}
203
204/*
205 * read oob for small pages
206 */
207static int atmel_nand_read_oob_512(struct mtd_info *mtd,
208 struct nand_chip *chip, int page, int sndcmd)
209{
210 if (sndcmd) {
211 chip->cmdfunc(mtd, NAND_CMD_READOOB, 0, page);
212 sndcmd = 0;
213 }
214 chip->read_buf(mtd, chip->oob_poi, mtd->oobsize);
215 return sndcmd;
216}
217
218/*
219 * Calculate HW ECC 177 * Calculate HW ECC
220 * 178 *
221 * function called after a write 179 * function called after a write
@@ -235,14 +193,14 @@ static int atmel_nand_calculate(struct mtd_info *mtd,
235 /* get the first 2 ECC bytes */ 193 /* get the first 2 ECC bytes */
236 ecc_value = ecc_readl(host->ecc, PR); 194 ecc_value = ecc_readl(host->ecc, PR);
237 195
238 ecc_code[eccpos[0]] = ecc_value & 0xFF; 196 ecc_code[0] = ecc_value & 0xFF;
239 ecc_code[eccpos[1]] = (ecc_value >> 8) & 0xFF; 197 ecc_code[1] = (ecc_value >> 8) & 0xFF;
240 198
241 /* get the last 2 ECC bytes */ 199 /* get the last 2 ECC bytes */
242 ecc_value = ecc_readl(host->ecc, NPR) & ATMEL_ECC_NPARITY; 200 ecc_value = ecc_readl(host->ecc, NPR) & ATMEL_ECC_NPARITY;
243 201
244 ecc_code[eccpos[2]] = ecc_value & 0xFF; 202 ecc_code[2] = ecc_value & 0xFF;
245 ecc_code[eccpos[3]] = (ecc_value >> 8) & 0xFF; 203 ecc_code[3] = (ecc_value >> 8) & 0xFF;
246 204
247 return 0; 205 return 0;
248} 206}
@@ -476,14 +434,12 @@ static int __init atmel_nand_probe(struct platform_device *pdev)
476 res = -EIO; 434 res = -EIO;
477 goto err_ecc_ioremap; 435 goto err_ecc_ioremap;
478 } 436 }
479 nand_chip->ecc.mode = NAND_ECC_HW_SYNDROME; 437 nand_chip->ecc.mode = NAND_ECC_HW;
480 nand_chip->ecc.calculate = atmel_nand_calculate; 438 nand_chip->ecc.calculate = atmel_nand_calculate;
481 nand_chip->ecc.correct = atmel_nand_correct; 439 nand_chip->ecc.correct = atmel_nand_correct;
482 nand_chip->ecc.hwctl = atmel_nand_hwctl; 440 nand_chip->ecc.hwctl = atmel_nand_hwctl;
483 nand_chip->ecc.read_page = atmel_nand_read_page; 441 nand_chip->ecc.read_page = atmel_nand_read_page;
484 nand_chip->ecc.bytes = 4; 442 nand_chip->ecc.bytes = 4;
485 nand_chip->ecc.prepad = 0;
486 nand_chip->ecc.postpad = 0;
487 } 443 }
488 444
489 nand_chip->chip_delay = 20; /* 20us command delay time */ 445 nand_chip->chip_delay = 20; /* 20us command delay time */
@@ -514,7 +470,7 @@ static int __init atmel_nand_probe(struct platform_device *pdev)
514 goto err_scan_ident; 470 goto err_scan_ident;
515 } 471 }
516 472
517 if (nand_chip->ecc.mode == NAND_ECC_HW_SYNDROME) { 473 if (nand_chip->ecc.mode == NAND_ECC_HW) {
518 /* ECC is calculated for the whole page (1 step) */ 474 /* ECC is calculated for the whole page (1 step) */
519 nand_chip->ecc.size = mtd->writesize; 475 nand_chip->ecc.size = mtd->writesize;
520 476
@@ -522,8 +478,6 @@ static int __init atmel_nand_probe(struct platform_device *pdev)
522 switch (mtd->writesize) { 478 switch (mtd->writesize) {
523 case 512: 479 case 512:
524 nand_chip->ecc.layout = &atmel_oobinfo_small; 480 nand_chip->ecc.layout = &atmel_oobinfo_small;
525 nand_chip->ecc.read_oob = atmel_nand_read_oob_512;
526 nand_chip->ecc.write_oob = atmel_nand_write_oob_512;
527 ecc_writel(host->ecc, MR, ATMEL_ECC_PAGESIZE_528); 481 ecc_writel(host->ecc, MR, ATMEL_ECC_PAGESIZE_528);
528 break; 482 break;
529 case 1024: 483 case 1024:
diff --git a/drivers/mtd/nand/cs553x_nand.c b/drivers/mtd/nand/cs553x_nand.c
index 3370a800fd36..9f1b451005ca 100644
--- a/drivers/mtd/nand/cs553x_nand.c
+++ b/drivers/mtd/nand/cs553x_nand.c
@@ -289,8 +289,10 @@ static int __init cs553x_init(void)
289 int i; 289 int i;
290 uint64_t val; 290 uint64_t val;
291 291
292#ifdef CONFIG_MTD_PARTITIONS
292 int mtd_parts_nb = 0; 293 int mtd_parts_nb = 0;
293 struct mtd_partition *mtd_parts = NULL; 294 struct mtd_partition *mtd_parts = NULL;
295#endif
294 296
295 /* If the CPU isn't a Geode GX or LX, abort */ 297 /* If the CPU isn't a Geode GX or LX, abort */
296 if (!is_geode()) 298 if (!is_geode())
diff --git a/drivers/mtd/nand/fsl_elbc_nand.c b/drivers/mtd/nand/fsl_elbc_nand.c
index 98ad3cefcaf4..4aa5bd6158da 100644
--- a/drivers/mtd/nand/fsl_elbc_nand.c
+++ b/drivers/mtd/nand/fsl_elbc_nand.c
@@ -918,8 +918,7 @@ static int __devinit fsl_elbc_chip_probe(struct fsl_elbc_ctrl *ctrl,
918 918
919#ifdef CONFIG_MTD_OF_PARTS 919#ifdef CONFIG_MTD_OF_PARTS
920 if (ret == 0) { 920 if (ret == 0) {
921 ret = of_mtd_parse_partitions(priv->dev, &priv->mtd, 921 ret = of_mtd_parse_partitions(priv->dev, node, &parts);
922 node, &parts);
923 if (ret < 0) 922 if (ret < 0)
924 goto err; 923 goto err;
925 } 924 }
diff --git a/drivers/mtd/nand/fsl_upm.c b/drivers/mtd/nand/fsl_upm.c
index 1ebfd87f00b4..024e3fffd4bb 100644
--- a/drivers/mtd/nand/fsl_upm.c
+++ b/drivers/mtd/nand/fsl_upm.c
@@ -13,6 +13,7 @@
13 13
14#include <linux/kernel.h> 14#include <linux/kernel.h>
15#include <linux/module.h> 15#include <linux/module.h>
16#include <linux/delay.h>
16#include <linux/mtd/nand.h> 17#include <linux/mtd/nand.h>
17#include <linux/mtd/nand_ecc.h> 18#include <linux/mtd/nand_ecc.h>
18#include <linux/mtd/partitions.h> 19#include <linux/mtd/partitions.h>
@@ -36,8 +37,6 @@ struct fsl_upm_nand {
36 uint8_t upm_cmd_offset; 37 uint8_t upm_cmd_offset;
37 void __iomem *io_base; 38 void __iomem *io_base;
38 int rnb_gpio; 39 int rnb_gpio;
39 const uint32_t *wait_pattern;
40 const uint32_t *wait_write;
41 int chip_delay; 40 int chip_delay;
42}; 41};
43 42
@@ -61,10 +60,11 @@ static void fun_wait_rnb(struct fsl_upm_nand *fun)
61 if (fun->rnb_gpio >= 0) { 60 if (fun->rnb_gpio >= 0) {
62 while (--cnt && !fun_chip_ready(&fun->mtd)) 61 while (--cnt && !fun_chip_ready(&fun->mtd))
63 cpu_relax(); 62 cpu_relax();
63 if (!cnt)
64 dev_err(fun->dev, "tired waiting for RNB\n");
65 } else {
66 ndelay(100);
64 } 67 }
65
66 if (!cnt)
67 dev_err(fun->dev, "tired waiting for RNB\n");
68} 68}
69 69
70static void fun_cmd_ctrl(struct mtd_info *mtd, int cmd, unsigned int ctrl) 70static void fun_cmd_ctrl(struct mtd_info *mtd, int cmd, unsigned int ctrl)
@@ -89,8 +89,7 @@ static void fun_cmd_ctrl(struct mtd_info *mtd, int cmd, unsigned int ctrl)
89 89
90 fsl_upm_run_pattern(&fun->upm, fun->io_base, cmd); 90 fsl_upm_run_pattern(&fun->upm, fun->io_base, cmd);
91 91
92 if (fun->wait_pattern) 92 fun_wait_rnb(fun);
93 fun_wait_rnb(fun);
94} 93}
95 94
96static uint8_t fun_read_byte(struct mtd_info *mtd) 95static uint8_t fun_read_byte(struct mtd_info *mtd)
@@ -116,14 +115,16 @@ static void fun_write_buf(struct mtd_info *mtd, const uint8_t *buf, int len)
116 115
117 for (i = 0; i < len; i++) { 116 for (i = 0; i < len; i++) {
118 out_8(fun->chip.IO_ADDR_W, buf[i]); 117 out_8(fun->chip.IO_ADDR_W, buf[i]);
119 if (fun->wait_write) 118 fun_wait_rnb(fun);
120 fun_wait_rnb(fun);
121 } 119 }
122} 120}
123 121
124static int __devinit fun_chip_init(struct fsl_upm_nand *fun) 122static int __devinit fun_chip_init(struct fsl_upm_nand *fun,
123 const struct device_node *upm_np,
124 const struct resource *io_res)
125{ 125{
126 int ret; 126 int ret;
127 struct device_node *flash_np;
127#ifdef CONFIG_MTD_PARTITIONS 128#ifdef CONFIG_MTD_PARTITIONS
128 static const char *part_types[] = { "cmdlinepart", NULL, }; 129 static const char *part_types[] = { "cmdlinepart", NULL, };
129#endif 130#endif
@@ -143,18 +144,37 @@ static int __devinit fun_chip_init(struct fsl_upm_nand *fun)
143 fun->mtd.priv = &fun->chip; 144 fun->mtd.priv = &fun->chip;
144 fun->mtd.owner = THIS_MODULE; 145 fun->mtd.owner = THIS_MODULE;
145 146
147 flash_np = of_get_next_child(upm_np, NULL);
148 if (!flash_np)
149 return -ENODEV;
150
151 fun->mtd.name = kasprintf(GFP_KERNEL, "%x.%s", io_res->start,
152 flash_np->name);
153 if (!fun->mtd.name) {
154 ret = -ENOMEM;
155 goto err;
156 }
157
146 ret = nand_scan(&fun->mtd, 1); 158 ret = nand_scan(&fun->mtd, 1);
147 if (ret) 159 if (ret)
148 return ret; 160 goto err;
149
150 fun->mtd.name = fun->dev->bus_id;
151 161
152#ifdef CONFIG_MTD_PARTITIONS 162#ifdef CONFIG_MTD_PARTITIONS
153 ret = parse_mtd_partitions(&fun->mtd, part_types, &fun->parts, 0); 163 ret = parse_mtd_partitions(&fun->mtd, part_types, &fun->parts, 0);
164
165#ifdef CONFIG_MTD_OF_PARTS
166 if (ret == 0)
167 ret = of_mtd_parse_partitions(fun->dev, &fun->mtd,
168 flash_np, &fun->parts);
169#endif
154 if (ret > 0) 170 if (ret > 0)
155 return add_mtd_partitions(&fun->mtd, fun->parts, ret); 171 ret = add_mtd_partitions(&fun->mtd, fun->parts, ret);
172 else
156#endif 173#endif
157 return add_mtd_device(&fun->mtd); 174 ret = add_mtd_device(&fun->mtd);
175err:
176 of_node_put(flash_np);
177 return ret;
158} 178}
159 179
160static int __devinit fun_probe(struct of_device *ofdev, 180static int __devinit fun_probe(struct of_device *ofdev,
@@ -211,6 +231,12 @@ static int __devinit fun_probe(struct of_device *ofdev,
211 goto err2; 231 goto err2;
212 } 232 }
213 233
234 prop = of_get_property(ofdev->node, "chip-delay", NULL);
235 if (prop)
236 fun->chip_delay = *prop;
237 else
238 fun->chip_delay = 50;
239
214 fun->io_base = devm_ioremap_nocache(&ofdev->dev, io_res.start, 240 fun->io_base = devm_ioremap_nocache(&ofdev->dev, io_res.start,
215 io_res.end - io_res.start + 1); 241 io_res.end - io_res.start + 1);
216 if (!fun->io_base) { 242 if (!fun->io_base) {
@@ -220,17 +246,8 @@ static int __devinit fun_probe(struct of_device *ofdev,
220 246
221 fun->dev = &ofdev->dev; 247 fun->dev = &ofdev->dev;
222 fun->last_ctrl = NAND_CLE; 248 fun->last_ctrl = NAND_CLE;
223 fun->wait_pattern = of_get_property(ofdev->node, "fsl,wait-pattern",
224 NULL);
225 fun->wait_write = of_get_property(ofdev->node, "fsl,wait-write", NULL);
226
227 prop = of_get_property(ofdev->node, "chip-delay", NULL);
228 if (prop)
229 fun->chip_delay = *prop;
230 else
231 fun->chip_delay = 50;
232 249
233 ret = fun_chip_init(fun); 250 ret = fun_chip_init(fun, ofdev->node, &io_res);
234 if (ret) 251 if (ret)
235 goto err2; 252 goto err2;
236 253
@@ -251,6 +268,7 @@ static int __devexit fun_remove(struct of_device *ofdev)
251 struct fsl_upm_nand *fun = dev_get_drvdata(&ofdev->dev); 268 struct fsl_upm_nand *fun = dev_get_drvdata(&ofdev->dev);
252 269
253 nand_release(&fun->mtd); 270 nand_release(&fun->mtd);
271 kfree(fun->mtd.name);
254 272
255 if (fun->rnb_gpio >= 0) 273 if (fun->rnb_gpio >= 0)
256 gpio_free(fun->rnb_gpio); 274 gpio_free(fun->rnb_gpio);
diff --git a/drivers/mtd/nand/gpio.c b/drivers/mtd/nand/gpio.c
new file mode 100644
index 000000000000..8f902e75aa85
--- /dev/null
+++ b/drivers/mtd/nand/gpio.c
@@ -0,0 +1,375 @@
1/*
2 * drivers/mtd/nand/gpio.c
3 *
4 * Updated, and converted to generic GPIO based driver by Russell King.
5 *
6 * Written by Ben Dooks <ben@simtec.co.uk>
7 * Based on 2.4 version by Mark Whittaker
8 *
9 * © 2004 Simtec Electronics
10 *
11 * Device driver for NAND connected via GPIO
12 *
13 * This program is free software; you can redistribute it and/or modify
14 * it under the terms of the GNU General Public License version 2 as
15 * published by the Free Software Foundation.
16 *
17 */
18
19#include <linux/kernel.h>
20#include <linux/init.h>
21#include <linux/slab.h>
22#include <linux/module.h>
23#include <linux/platform_device.h>
24#include <linux/gpio.h>
25#include <linux/io.h>
26#include <linux/mtd/mtd.h>
27#include <linux/mtd/nand.h>
28#include <linux/mtd/partitions.h>
29#include <linux/mtd/nand-gpio.h>
30
31struct gpiomtd {
32 void __iomem *io_sync;
33 struct mtd_info mtd_info;
34 struct nand_chip nand_chip;
35 struct gpio_nand_platdata plat;
36};
37
38#define gpio_nand_getpriv(x) container_of(x, struct gpiomtd, mtd_info)
39
40
41#ifdef CONFIG_ARM
42/* gpio_nand_dosync()
43 *
44 * Make sure the GPIO state changes occur in-order with writes to NAND
45 * memory region.
46 * Needed on PXA due to bus-reordering within the SoC itself (see section on
47 * I/O ordering in PXA manual (section 2.3, p35)
48 */
49static void gpio_nand_dosync(struct gpiomtd *gpiomtd)
50{
51 unsigned long tmp;
52
53 if (gpiomtd->io_sync) {
54 /*
55 * Linux memory barriers don't cater for what's required here.
56 * What's required is what's here - a read from a separate
57 * region with a dependency on that read.
58 */
59 tmp = readl(gpiomtd->io_sync);
60 asm volatile("mov %1, %0\n" : "=r" (tmp) : "r" (tmp));
61 }
62}
63#else
64static inline void gpio_nand_dosync(struct gpiomtd *gpiomtd) {}
65#endif
66
67static void gpio_nand_cmd_ctrl(struct mtd_info *mtd, int cmd, unsigned int ctrl)
68{
69 struct gpiomtd *gpiomtd = gpio_nand_getpriv(mtd);
70
71 gpio_nand_dosync(gpiomtd);
72
73 if (ctrl & NAND_CTRL_CHANGE) {
74 gpio_set_value(gpiomtd->plat.gpio_nce, !(ctrl & NAND_NCE));
75 gpio_set_value(gpiomtd->plat.gpio_cle, !!(ctrl & NAND_CLE));
76 gpio_set_value(gpiomtd->plat.gpio_ale, !!(ctrl & NAND_ALE));
77 gpio_nand_dosync(gpiomtd);
78 }
79 if (cmd == NAND_CMD_NONE)
80 return;
81
82 writeb(cmd, gpiomtd->nand_chip.IO_ADDR_W);
83 gpio_nand_dosync(gpiomtd);
84}
85
86static void gpio_nand_writebuf(struct mtd_info *mtd, const u_char *buf, int len)
87{
88 struct nand_chip *this = mtd->priv;
89
90 writesb(this->IO_ADDR_W, buf, len);
91}
92
93static void gpio_nand_readbuf(struct mtd_info *mtd, u_char *buf, int len)
94{
95 struct nand_chip *this = mtd->priv;
96
97 readsb(this->IO_ADDR_R, buf, len);
98}
99
100static int gpio_nand_verifybuf(struct mtd_info *mtd, const u_char *buf, int len)
101{
102 struct nand_chip *this = mtd->priv;
103 unsigned char read, *p = (unsigned char *) buf;
104 int i, err = 0;
105
106 for (i = 0; i < len; i++) {
107 read = readb(this->IO_ADDR_R);
108 if (read != p[i]) {
109 pr_debug("%s: err at %d (read %04x vs %04x)\n",
110 __func__, i, read, p[i]);
111 err = -EFAULT;
112 }
113 }
114 return err;
115}
116
117static void gpio_nand_writebuf16(struct mtd_info *mtd, const u_char *buf,
118 int len)
119{
120 struct nand_chip *this = mtd->priv;
121
122 if (IS_ALIGNED((unsigned long)buf, 2)) {
123 writesw(this->IO_ADDR_W, buf, len>>1);
124 } else {
125 int i;
126 unsigned short *ptr = (unsigned short *)buf;
127
128 for (i = 0; i < len; i += 2, ptr++)
129 writew(*ptr, this->IO_ADDR_W);
130 }
131}
132
133static void gpio_nand_readbuf16(struct mtd_info *mtd, u_char *buf, int len)
134{
135 struct nand_chip *this = mtd->priv;
136
137 if (IS_ALIGNED((unsigned long)buf, 2)) {
138 readsw(this->IO_ADDR_R, buf, len>>1);
139 } else {
140 int i;
141 unsigned short *ptr = (unsigned short *)buf;
142
143 for (i = 0; i < len; i += 2, ptr++)
144 *ptr = readw(this->IO_ADDR_R);
145 }
146}
147
148static int gpio_nand_verifybuf16(struct mtd_info *mtd, const u_char *buf,
149 int len)
150{
151 struct nand_chip *this = mtd->priv;
152 unsigned short read, *p = (unsigned short *) buf;
153 int i, err = 0;
154 len >>= 1;
155
156 for (i = 0; i < len; i++) {
157 read = readw(this->IO_ADDR_R);
158 if (read != p[i]) {
159 pr_debug("%s: err at %d (read %04x vs %04x)\n",
160 __func__, i, read, p[i]);
161 err = -EFAULT;
162 }
163 }
164 return err;
165}
166
167
168static int gpio_nand_devready(struct mtd_info *mtd)
169{
170 struct gpiomtd *gpiomtd = gpio_nand_getpriv(mtd);
171 return gpio_get_value(gpiomtd->plat.gpio_rdy);
172}
173
174static int __devexit gpio_nand_remove(struct platform_device *dev)
175{
176 struct gpiomtd *gpiomtd = platform_get_drvdata(dev);
177 struct resource *res;
178
179 nand_release(&gpiomtd->mtd_info);
180
181 res = platform_get_resource(dev, IORESOURCE_MEM, 1);
182 iounmap(gpiomtd->io_sync);
183 if (res)
184 release_mem_region(res->start, res->end - res->start + 1);
185
186 res = platform_get_resource(dev, IORESOURCE_MEM, 0);
187 iounmap(gpiomtd->nand_chip.IO_ADDR_R);
188 release_mem_region(res->start, res->end - res->start + 1);
189
190 if (gpio_is_valid(gpiomtd->plat.gpio_nwp))
191 gpio_set_value(gpiomtd->plat.gpio_nwp, 0);
192 gpio_set_value(gpiomtd->plat.gpio_nce, 1);
193
194 gpio_free(gpiomtd->plat.gpio_cle);
195 gpio_free(gpiomtd->plat.gpio_ale);
196 gpio_free(gpiomtd->plat.gpio_nce);
197 if (gpio_is_valid(gpiomtd->plat.gpio_nwp))
198 gpio_free(gpiomtd->plat.gpio_nwp);
199 gpio_free(gpiomtd->plat.gpio_rdy);
200
201 kfree(gpiomtd);
202
203 return 0;
204}
205
206static void __iomem *request_and_remap(struct resource *res, size_t size,
207 const char *name, int *err)
208{
209 void __iomem *ptr;
210
211 if (!request_mem_region(res->start, res->end - res->start + 1, name)) {
212 *err = -EBUSY;
213 return NULL;
214 }
215
216 ptr = ioremap(res->start, size);
217 if (!ptr) {
218 release_mem_region(res->start, res->end - res->start + 1);
219 *err = -ENOMEM;
220 }
221 return ptr;
222}
223
224static int __devinit gpio_nand_probe(struct platform_device *dev)
225{
226 struct gpiomtd *gpiomtd;
227 struct nand_chip *this;
228 struct resource *res0, *res1;
229 int ret;
230
231 if (!dev->dev.platform_data)
232 return -EINVAL;
233
234 res0 = platform_get_resource(dev, IORESOURCE_MEM, 0);
235 if (!res0)
236 return -EINVAL;
237
238 gpiomtd = kzalloc(sizeof(*gpiomtd), GFP_KERNEL);
239 if (gpiomtd == NULL) {
240 dev_err(&dev->dev, "failed to create NAND MTD\n");
241 return -ENOMEM;
242 }
243
244 this = &gpiomtd->nand_chip;
245 this->IO_ADDR_R = request_and_remap(res0, 2, "NAND", &ret);
246 if (!this->IO_ADDR_R) {
247 dev_err(&dev->dev, "unable to map NAND\n");
248 goto err_map;
249 }
250
251 res1 = platform_get_resource(dev, IORESOURCE_MEM, 1);
252 if (res1) {
253 gpiomtd->io_sync = request_and_remap(res1, 4, "NAND sync", &ret);
254 if (!gpiomtd->io_sync) {
255 dev_err(&dev->dev, "unable to map sync NAND\n");
256 goto err_sync;
257 }
258 }
259
260 memcpy(&gpiomtd->plat, dev->dev.platform_data, sizeof(gpiomtd->plat));
261
262 ret = gpio_request(gpiomtd->plat.gpio_nce, "NAND NCE");
263 if (ret)
264 goto err_nce;
265 gpio_direction_output(gpiomtd->plat.gpio_nce, 1);
266 if (gpio_is_valid(gpiomtd->plat.gpio_nwp)) {
267 ret = gpio_request(gpiomtd->plat.gpio_nwp, "NAND NWP");
268 if (ret)
269 goto err_nwp;
270 gpio_direction_output(gpiomtd->plat.gpio_nwp, 1);
271 }
272 ret = gpio_request(gpiomtd->plat.gpio_ale, "NAND ALE");
273 if (ret)
274 goto err_ale;
275 gpio_direction_output(gpiomtd->plat.gpio_ale, 0);
276 ret = gpio_request(gpiomtd->plat.gpio_cle, "NAND CLE");
277 if (ret)
278 goto err_cle;
279 gpio_direction_output(gpiomtd->plat.gpio_cle, 0);
280 ret = gpio_request(gpiomtd->plat.gpio_rdy, "NAND RDY");
281 if (ret)
282 goto err_rdy;
283 gpio_direction_input(gpiomtd->plat.gpio_rdy);
284
285
286 this->IO_ADDR_W = this->IO_ADDR_R;
287 this->ecc.mode = NAND_ECC_SOFT;
288 this->options = gpiomtd->plat.options;
289 this->chip_delay = gpiomtd->plat.chip_delay;
290
291 /* install our routines */
292 this->cmd_ctrl = gpio_nand_cmd_ctrl;
293 this->dev_ready = gpio_nand_devready;
294
295 if (this->options & NAND_BUSWIDTH_16) {
296 this->read_buf = gpio_nand_readbuf16;
297 this->write_buf = gpio_nand_writebuf16;
298 this->verify_buf = gpio_nand_verifybuf16;
299 } else {
300 this->read_buf = gpio_nand_readbuf;
301 this->write_buf = gpio_nand_writebuf;
302 this->verify_buf = gpio_nand_verifybuf;
303 }
304
305 /* set the mtd private data for the nand driver */
306 gpiomtd->mtd_info.priv = this;
307 gpiomtd->mtd_info.owner = THIS_MODULE;
308
309 if (nand_scan(&gpiomtd->mtd_info, 1)) {
310 dev_err(&dev->dev, "no nand chips found?\n");
311 ret = -ENXIO;
312 goto err_wp;
313 }
314
315 if (gpiomtd->plat.adjust_parts)
316 gpiomtd->plat.adjust_parts(&gpiomtd->plat,
317 gpiomtd->mtd_info.size);
318
319 add_mtd_partitions(&gpiomtd->mtd_info, gpiomtd->plat.parts,
320 gpiomtd->plat.num_parts);
321 platform_set_drvdata(dev, gpiomtd);
322
323 return 0;
324
325err_wp:
326 if (gpio_is_valid(gpiomtd->plat.gpio_nwp))
327 gpio_set_value(gpiomtd->plat.gpio_nwp, 0);
328 gpio_free(gpiomtd->plat.gpio_rdy);
329err_rdy:
330 gpio_free(gpiomtd->plat.gpio_cle);
331err_cle:
332 gpio_free(gpiomtd->plat.gpio_ale);
333err_ale:
334 if (gpio_is_valid(gpiomtd->plat.gpio_nwp))
335 gpio_free(gpiomtd->plat.gpio_nwp);
336err_nwp:
337 gpio_free(gpiomtd->plat.gpio_nce);
338err_nce:
339 iounmap(gpiomtd->io_sync);
340 if (res1)
341 release_mem_region(res1->start, res1->end - res1->start + 1);
342err_sync:
343 iounmap(gpiomtd->nand_chip.IO_ADDR_R);
344 release_mem_region(res0->start, res0->end - res0->start + 1);
345err_map:
346 kfree(gpiomtd);
347 return ret;
348}
349
350static struct platform_driver gpio_nand_driver = {
351 .probe = gpio_nand_probe,
352 .remove = gpio_nand_remove,
353 .driver = {
354 .name = "gpio-nand",
355 },
356};
357
358static int __init gpio_nand_init(void)
359{
360 printk(KERN_INFO "GPIO NAND driver, © 2004 Simtec Electronics\n");
361
362 return platform_driver_register(&gpio_nand_driver);
363}
364
365static void __exit gpio_nand_exit(void)
366{
367 platform_driver_unregister(&gpio_nand_driver);
368}
369
370module_init(gpio_nand_init);
371module_exit(gpio_nand_exit);
372
373MODULE_LICENSE("GPL");
374MODULE_AUTHOR("Ben Dooks <ben@simtec.co.uk>");
375MODULE_DESCRIPTION("GPIO NAND Driver");
diff --git a/drivers/mtd/nand/mxc_nand.c b/drivers/mtd/nand/mxc_nand.c
new file mode 100644
index 000000000000..21fd4f1c4806
--- /dev/null
+++ b/drivers/mtd/nand/mxc_nand.c
@@ -0,0 +1,1077 @@
1/*
2 * Copyright 2004-2007 Freescale Semiconductor, Inc. All Rights Reserved.
3 * Copyright 2008 Sascha Hauer, kernel@pengutronix.de
4 *
5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU General Public License
7 * as published by the Free Software Foundation; either version 2
8 * of the License, or (at your option) any later version.
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write to the Free Software
16 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
17 * MA 02110-1301, USA.
18 */
19
20#include <linux/delay.h>
21#include <linux/slab.h>
22#include <linux/init.h>
23#include <linux/module.h>
24#include <linux/mtd/mtd.h>
25#include <linux/mtd/nand.h>
26#include <linux/mtd/partitions.h>
27#include <linux/interrupt.h>
28#include <linux/device.h>
29#include <linux/platform_device.h>
30#include <linux/clk.h>
31#include <linux/err.h>
32#include <linux/io.h>
33
34#include <asm/mach/flash.h>
35#include <mach/mxc_nand.h>
36
37#define DRIVER_NAME "mxc_nand"
38
39/* Addresses for NFC registers */
40#define NFC_BUF_SIZE 0xE00
41#define NFC_BUF_ADDR 0xE04
42#define NFC_FLASH_ADDR 0xE06
43#define NFC_FLASH_CMD 0xE08
44#define NFC_CONFIG 0xE0A
45#define NFC_ECC_STATUS_RESULT 0xE0C
46#define NFC_RSLTMAIN_AREA 0xE0E
47#define NFC_RSLTSPARE_AREA 0xE10
48#define NFC_WRPROT 0xE12
49#define NFC_UNLOCKSTART_BLKADDR 0xE14
50#define NFC_UNLOCKEND_BLKADDR 0xE16
51#define NFC_NF_WRPRST 0xE18
52#define NFC_CONFIG1 0xE1A
53#define NFC_CONFIG2 0xE1C
54
55/* Addresses for NFC RAM BUFFER Main area 0 */
56#define MAIN_AREA0 0x000
57#define MAIN_AREA1 0x200
58#define MAIN_AREA2 0x400
59#define MAIN_AREA3 0x600
60
61/* Addresses for NFC SPARE BUFFER Spare area 0 */
62#define SPARE_AREA0 0x800
63#define SPARE_AREA1 0x810
64#define SPARE_AREA2 0x820
65#define SPARE_AREA3 0x830
66
67/* Set INT to 0, FCMD to 1, rest to 0 in NFC_CONFIG2 Register
68 * for Command operation */
69#define NFC_CMD 0x1
70
71/* Set INT to 0, FADD to 1, rest to 0 in NFC_CONFIG2 Register
72 * for Address operation */
73#define NFC_ADDR 0x2
74
75/* Set INT to 0, FDI to 1, rest to 0 in NFC_CONFIG2 Register
76 * for Input operation */
77#define NFC_INPUT 0x4
78
79/* Set INT to 0, FDO to 001, rest to 0 in NFC_CONFIG2 Register
80 * for Data Output operation */
81#define NFC_OUTPUT 0x8
82
83/* Set INT to 0, FD0 to 010, rest to 0 in NFC_CONFIG2 Register
84 * for Read ID operation */
85#define NFC_ID 0x10
86
87/* Set INT to 0, FDO to 100, rest to 0 in NFC_CONFIG2 Register
88 * for Read Status operation */
89#define NFC_STATUS 0x20
90
91/* Set INT to 1, rest to 0 in NFC_CONFIG2 Register for Read
92 * Status operation */
93#define NFC_INT 0x8000
94
95#define NFC_SP_EN (1 << 2)
96#define NFC_ECC_EN (1 << 3)
97#define NFC_INT_MSK (1 << 4)
98#define NFC_BIG (1 << 5)
99#define NFC_RST (1 << 6)
100#define NFC_CE (1 << 7)
101#define NFC_ONE_CYCLE (1 << 8)
102
103struct mxc_nand_host {
104 struct mtd_info mtd;
105 struct nand_chip nand;
106 struct mtd_partition *parts;
107 struct device *dev;
108
109 void __iomem *regs;
110 int spare_only;
111 int status_request;
112 int pagesize_2k;
113 uint16_t col_addr;
114 struct clk *clk;
115 int clk_act;
116 int irq;
117
118 wait_queue_head_t irq_waitq;
119};
120
121/* Define delays in microsec for NAND device operations */
122#define TROP_US_DELAY 2000
123/* Macros to get byte and bit positions of ECC */
124#define COLPOS(x) ((x) >> 3)
125#define BITPOS(x) ((x) & 0xf)
126
127/* Define single bit Error positions in Main & Spare area */
128#define MAIN_SINGLEBIT_ERROR 0x4
129#define SPARE_SINGLEBIT_ERROR 0x1
130
131/* OOB placement block for use with hardware ecc generation */
132static struct nand_ecclayout nand_hw_eccoob_8 = {
133 .eccbytes = 5,
134 .eccpos = {6, 7, 8, 9, 10},
135 .oobfree = {{0, 5}, {11, 5}, }
136};
137
138static struct nand_ecclayout nand_hw_eccoob_16 = {
139 .eccbytes = 5,
140 .eccpos = {6, 7, 8, 9, 10},
141 .oobfree = {{0, 6}, {12, 4}, }
142};
143
144#ifdef CONFIG_MTD_PARTITIONS
145static const char *part_probes[] = { "RedBoot", "cmdlinepart", NULL };
146#endif
147
148static irqreturn_t mxc_nfc_irq(int irq, void *dev_id)
149{
150 struct mxc_nand_host *host = dev_id;
151
152 uint16_t tmp;
153
154 tmp = readw(host->regs + NFC_CONFIG1);
155 tmp |= NFC_INT_MSK; /* Disable interrupt */
156 writew(tmp, host->regs + NFC_CONFIG1);
157
158 wake_up(&host->irq_waitq);
159
160 return IRQ_HANDLED;
161}
162
163/* This function polls the NANDFC to wait for the basic operation to
164 * complete by checking the INT bit of config2 register.
165 */
166static void wait_op_done(struct mxc_nand_host *host, int max_retries,
167 uint16_t param, int useirq)
168{
169 uint32_t tmp;
170
171 if (useirq) {
172 if ((readw(host->regs + NFC_CONFIG2) & NFC_INT) == 0) {
173
174 tmp = readw(host->regs + NFC_CONFIG1);
175 tmp &= ~NFC_INT_MSK; /* Enable interrupt */
176 writew(tmp, host->regs + NFC_CONFIG1);
177
178 wait_event(host->irq_waitq,
179 readw(host->regs + NFC_CONFIG2) & NFC_INT);
180
181 tmp = readw(host->regs + NFC_CONFIG2);
182 tmp &= ~NFC_INT;
183 writew(tmp, host->regs + NFC_CONFIG2);
184 }
185 } else {
186 while (max_retries-- > 0) {
187 if (readw(host->regs + NFC_CONFIG2) & NFC_INT) {
188 tmp = readw(host->regs + NFC_CONFIG2);
189 tmp &= ~NFC_INT;
190 writew(tmp, host->regs + NFC_CONFIG2);
191 break;
192 }
193 udelay(1);
194 }
195 if (max_retries <= 0)
196 DEBUG(MTD_DEBUG_LEVEL0, "%s(%d): INT not set\n",
197 __func__, param);
198 }
199}
200
201/* This function issues the specified command to the NAND device and
202 * waits for completion. */
203static void send_cmd(struct mxc_nand_host *host, uint16_t cmd, int useirq)
204{
205 DEBUG(MTD_DEBUG_LEVEL3, "send_cmd(host, 0x%x, %d)\n", cmd, useirq);
206
207 writew(cmd, host->regs + NFC_FLASH_CMD);
208 writew(NFC_CMD, host->regs + NFC_CONFIG2);
209
210 /* Wait for operation to complete */
211 wait_op_done(host, TROP_US_DELAY, cmd, useirq);
212}
213
214/* This function sends an address (or partial address) to the
215 * NAND device. The address is used to select the source/destination for
216 * a NAND command. */
217static void send_addr(struct mxc_nand_host *host, uint16_t addr, int islast)
218{
219 DEBUG(MTD_DEBUG_LEVEL3, "send_addr(host, 0x%x %d)\n", addr, islast);
220
221 writew(addr, host->regs + NFC_FLASH_ADDR);
222 writew(NFC_ADDR, host->regs + NFC_CONFIG2);
223
224 /* Wait for operation to complete */
225 wait_op_done(host, TROP_US_DELAY, addr, islast);
226}
227
228/* This function requests the NANDFC to initate the transfer
229 * of data currently in the NANDFC RAM buffer to the NAND device. */
230static void send_prog_page(struct mxc_nand_host *host, uint8_t buf_id,
231 int spare_only)
232{
233 DEBUG(MTD_DEBUG_LEVEL3, "send_prog_page (%d)\n", spare_only);
234
235 /* NANDFC buffer 0 is used for page read/write */
236 writew(buf_id, host->regs + NFC_BUF_ADDR);
237
238 /* Configure spare or page+spare access */
239 if (!host->pagesize_2k) {
240 uint16_t config1 = readw(host->regs + NFC_CONFIG1);
241 if (spare_only)
242 config1 |= NFC_SP_EN;
243 else
244 config1 &= ~(NFC_SP_EN);
245 writew(config1, host->regs + NFC_CONFIG1);
246 }
247
248 writew(NFC_INPUT, host->regs + NFC_CONFIG2);
249
250 /* Wait for operation to complete */
251 wait_op_done(host, TROP_US_DELAY, spare_only, true);
252}
253
254/* Requests NANDFC to initated the transfer of data from the
255 * NAND device into in the NANDFC ram buffer. */
256static void send_read_page(struct mxc_nand_host *host, uint8_t buf_id,
257 int spare_only)
258{
259 DEBUG(MTD_DEBUG_LEVEL3, "send_read_page (%d)\n", spare_only);
260
261 /* NANDFC buffer 0 is used for page read/write */
262 writew(buf_id, host->regs + NFC_BUF_ADDR);
263
264 /* Configure spare or page+spare access */
265 if (!host->pagesize_2k) {
266 uint32_t config1 = readw(host->regs + NFC_CONFIG1);
267 if (spare_only)
268 config1 |= NFC_SP_EN;
269 else
270 config1 &= ~NFC_SP_EN;
271 writew(config1, host->regs + NFC_CONFIG1);
272 }
273
274 writew(NFC_OUTPUT, host->regs + NFC_CONFIG2);
275
276 /* Wait for operation to complete */
277 wait_op_done(host, TROP_US_DELAY, spare_only, true);
278}
279
280/* Request the NANDFC to perform a read of the NAND device ID. */
281static void send_read_id(struct mxc_nand_host *host)
282{
283 struct nand_chip *this = &host->nand;
284 uint16_t tmp;
285
286 /* NANDFC buffer 0 is used for device ID output */
287 writew(0x0, host->regs + NFC_BUF_ADDR);
288
289 /* Read ID into main buffer */
290 tmp = readw(host->regs + NFC_CONFIG1);
291 tmp &= ~NFC_SP_EN;
292 writew(tmp, host->regs + NFC_CONFIG1);
293
294 writew(NFC_ID, host->regs + NFC_CONFIG2);
295
296 /* Wait for operation to complete */
297 wait_op_done(host, TROP_US_DELAY, 0, true);
298
299 if (this->options & NAND_BUSWIDTH_16) {
300 void __iomem *main_buf = host->regs + MAIN_AREA0;
301 /* compress the ID info */
302 writeb(readb(main_buf + 2), main_buf + 1);
303 writeb(readb(main_buf + 4), main_buf + 2);
304 writeb(readb(main_buf + 6), main_buf + 3);
305 writeb(readb(main_buf + 8), main_buf + 4);
306 writeb(readb(main_buf + 10), main_buf + 5);
307 }
308}
309
310/* This function requests the NANDFC to perform a read of the
311 * NAND device status and returns the current status. */
312static uint16_t get_dev_status(struct mxc_nand_host *host)
313{
314 void __iomem *main_buf = host->regs + MAIN_AREA1;
315 uint32_t store;
316 uint16_t ret, tmp;
317 /* Issue status request to NAND device */
318
319 /* store the main area1 first word, later do recovery */
320 store = readl(main_buf);
321 /* NANDFC buffer 1 is used for device status to prevent
322 * corruption of read/write buffer on status requests. */
323 writew(1, host->regs + NFC_BUF_ADDR);
324
325 /* Read status into main buffer */
326 tmp = readw(host->regs + NFC_CONFIG1);
327 tmp &= ~NFC_SP_EN;
328 writew(tmp, host->regs + NFC_CONFIG1);
329
330 writew(NFC_STATUS, host->regs + NFC_CONFIG2);
331
332 /* Wait for operation to complete */
333 wait_op_done(host, TROP_US_DELAY, 0, true);
334
335 /* Status is placed in first word of main buffer */
336 /* get status, then recovery area 1 data */
337 ret = readw(main_buf);
338 writel(store, main_buf);
339
340 return ret;
341}
342
343/* This functions is used by upper layer to checks if device is ready */
344static int mxc_nand_dev_ready(struct mtd_info *mtd)
345{
346 /*
347 * NFC handles R/B internally. Therefore, this function
348 * always returns status as ready.
349 */
350 return 1;
351}
352
353static void mxc_nand_enable_hwecc(struct mtd_info *mtd, int mode)
354{
355 /*
356 * If HW ECC is enabled, we turn it on during init. There is
357 * no need to enable again here.
358 */
359}
360
361static int mxc_nand_correct_data(struct mtd_info *mtd, u_char *dat,
362 u_char *read_ecc, u_char *calc_ecc)
363{
364 struct nand_chip *nand_chip = mtd->priv;
365 struct mxc_nand_host *host = nand_chip->priv;
366
367 /*
368 * 1-Bit errors are automatically corrected in HW. No need for
369 * additional correction. 2-Bit errors cannot be corrected by
370 * HW ECC, so we need to return failure
371 */
372 uint16_t ecc_status = readw(host->regs + NFC_ECC_STATUS_RESULT);
373
374 if (((ecc_status & 0x3) == 2) || ((ecc_status >> 2) == 2)) {
375 DEBUG(MTD_DEBUG_LEVEL0,
376 "MXC_NAND: HWECC uncorrectable 2-bit ECC error\n");
377 return -1;
378 }
379
380 return 0;
381}
382
383static int mxc_nand_calculate_ecc(struct mtd_info *mtd, const u_char *dat,
384 u_char *ecc_code)
385{
386 return 0;
387}
388
389static u_char mxc_nand_read_byte(struct mtd_info *mtd)
390{
391 struct nand_chip *nand_chip = mtd->priv;
392 struct mxc_nand_host *host = nand_chip->priv;
393 uint8_t ret = 0;
394 uint16_t col, rd_word;
395 uint16_t __iomem *main_buf = host->regs + MAIN_AREA0;
396 uint16_t __iomem *spare_buf = host->regs + SPARE_AREA0;
397
398 /* Check for status request */
399 if (host->status_request)
400 return get_dev_status(host) & 0xFF;
401
402 /* Get column for 16-bit access */
403 col = host->col_addr >> 1;
404
405 /* If we are accessing the spare region */
406 if (host->spare_only)
407 rd_word = readw(&spare_buf[col]);
408 else
409 rd_word = readw(&main_buf[col]);
410
411 /* Pick upper/lower byte of word from RAM buffer */
412 if (host->col_addr & 0x1)
413 ret = (rd_word >> 8) & 0xFF;
414 else
415 ret = rd_word & 0xFF;
416
417 /* Update saved column address */
418 host->col_addr++;
419
420 return ret;
421}
422
423static uint16_t mxc_nand_read_word(struct mtd_info *mtd)
424{
425 struct nand_chip *nand_chip = mtd->priv;
426 struct mxc_nand_host *host = nand_chip->priv;
427 uint16_t col, rd_word, ret;
428 uint16_t __iomem *p;
429
430 DEBUG(MTD_DEBUG_LEVEL3,
431 "mxc_nand_read_word(col = %d)\n", host->col_addr);
432
433 col = host->col_addr;
434 /* Adjust saved column address */
435 if (col < mtd->writesize && host->spare_only)
436 col += mtd->writesize;
437
438 if (col < mtd->writesize)
439 p = (host->regs + MAIN_AREA0) + (col >> 1);
440 else
441 p = (host->regs + SPARE_AREA0) + ((col - mtd->writesize) >> 1);
442
443 if (col & 1) {
444 rd_word = readw(p);
445 ret = (rd_word >> 8) & 0xff;
446 rd_word = readw(&p[1]);
447 ret |= (rd_word << 8) & 0xff00;
448
449 } else
450 ret = readw(p);
451
452 /* Update saved column address */
453 host->col_addr = col + 2;
454
455 return ret;
456}
457
458/* Write data of length len to buffer buf. The data to be
459 * written on NAND Flash is first copied to RAMbuffer. After the Data Input
460 * Operation by the NFC, the data is written to NAND Flash */
461static void mxc_nand_write_buf(struct mtd_info *mtd,
462 const u_char *buf, int len)
463{
464 struct nand_chip *nand_chip = mtd->priv;
465 struct mxc_nand_host *host = nand_chip->priv;
466 int n, col, i = 0;
467
468 DEBUG(MTD_DEBUG_LEVEL3,
469 "mxc_nand_write_buf(col = %d, len = %d)\n", host->col_addr,
470 len);
471
472 col = host->col_addr;
473
474 /* Adjust saved column address */
475 if (col < mtd->writesize && host->spare_only)
476 col += mtd->writesize;
477
478 n = mtd->writesize + mtd->oobsize - col;
479 n = min(len, n);
480
481 DEBUG(MTD_DEBUG_LEVEL3,
482 "%s:%d: col = %d, n = %d\n", __func__, __LINE__, col, n);
483
484 while (n) {
485 void __iomem *p;
486
487 if (col < mtd->writesize)
488 p = host->regs + MAIN_AREA0 + (col & ~3);
489 else
490 p = host->regs + SPARE_AREA0 -
491 mtd->writesize + (col & ~3);
492
493 DEBUG(MTD_DEBUG_LEVEL3, "%s:%d: p = %p\n", __func__,
494 __LINE__, p);
495
496 if (((col | (int)&buf[i]) & 3) || n < 16) {
497 uint32_t data = 0;
498
499 if (col & 3 || n < 4)
500 data = readl(p);
501
502 switch (col & 3) {
503 case 0:
504 if (n) {
505 data = (data & 0xffffff00) |
506 (buf[i++] << 0);
507 n--;
508 col++;
509 }
510 case 1:
511 if (n) {
512 data = (data & 0xffff00ff) |
513 (buf[i++] << 8);
514 n--;
515 col++;
516 }
517 case 2:
518 if (n) {
519 data = (data & 0xff00ffff) |
520 (buf[i++] << 16);
521 n--;
522 col++;
523 }
524 case 3:
525 if (n) {
526 data = (data & 0x00ffffff) |
527 (buf[i++] << 24);
528 n--;
529 col++;
530 }
531 }
532
533 writel(data, p);
534 } else {
535 int m = mtd->writesize - col;
536
537 if (col >= mtd->writesize)
538 m += mtd->oobsize;
539
540 m = min(n, m) & ~3;
541
542 DEBUG(MTD_DEBUG_LEVEL3,
543 "%s:%d: n = %d, m = %d, i = %d, col = %d\n",
544 __func__, __LINE__, n, m, i, col);
545
546 memcpy(p, &buf[i], m);
547 col += m;
548 i += m;
549 n -= m;
550 }
551 }
552 /* Update saved column address */
553 host->col_addr = col;
554}
555
556/* Read the data buffer from the NAND Flash. To read the data from NAND
557 * Flash first the data output cycle is initiated by the NFC, which copies
558 * the data to RAMbuffer. This data of length len is then copied to buffer buf.
559 */
560static void mxc_nand_read_buf(struct mtd_info *mtd, u_char *buf, int len)
561{
562 struct nand_chip *nand_chip = mtd->priv;
563 struct mxc_nand_host *host = nand_chip->priv;
564 int n, col, i = 0;
565
566 DEBUG(MTD_DEBUG_LEVEL3,
567 "mxc_nand_read_buf(col = %d, len = %d)\n", host->col_addr, len);
568
569 col = host->col_addr;
570
571 /* Adjust saved column address */
572 if (col < mtd->writesize && host->spare_only)
573 col += mtd->writesize;
574
575 n = mtd->writesize + mtd->oobsize - col;
576 n = min(len, n);
577
578 while (n) {
579 void __iomem *p;
580
581 if (col < mtd->writesize)
582 p = host->regs + MAIN_AREA0 + (col & ~3);
583 else
584 p = host->regs + SPARE_AREA0 -
585 mtd->writesize + (col & ~3);
586
587 if (((col | (int)&buf[i]) & 3) || n < 16) {
588 uint32_t data;
589
590 data = readl(p);
591 switch (col & 3) {
592 case 0:
593 if (n) {
594 buf[i++] = (uint8_t) (data);
595 n--;
596 col++;
597 }
598 case 1:
599 if (n) {
600 buf[i++] = (uint8_t) (data >> 8);
601 n--;
602 col++;
603 }
604 case 2:
605 if (n) {
606 buf[i++] = (uint8_t) (data >> 16);
607 n--;
608 col++;
609 }
610 case 3:
611 if (n) {
612 buf[i++] = (uint8_t) (data >> 24);
613 n--;
614 col++;
615 }
616 }
617 } else {
618 int m = mtd->writesize - col;
619
620 if (col >= mtd->writesize)
621 m += mtd->oobsize;
622
623 m = min(n, m) & ~3;
624 memcpy(&buf[i], p, m);
625 col += m;
626 i += m;
627 n -= m;
628 }
629 }
630 /* Update saved column address */
631 host->col_addr = col;
632
633}
634
635/* Used by the upper layer to verify the data in NAND Flash
636 * with the data in the buf. */
637static int mxc_nand_verify_buf(struct mtd_info *mtd,
638 const u_char *buf, int len)
639{
640 return -EFAULT;
641}
642
643/* This function is used by upper layer for select and
644 * deselect of the NAND chip */
645static void mxc_nand_select_chip(struct mtd_info *mtd, int chip)
646{
647 struct nand_chip *nand_chip = mtd->priv;
648 struct mxc_nand_host *host = nand_chip->priv;
649
650#ifdef CONFIG_MTD_NAND_MXC_FORCE_CE
651 if (chip > 0) {
652 DEBUG(MTD_DEBUG_LEVEL0,
653 "ERROR: Illegal chip select (chip = %d)\n", chip);
654 return;
655 }
656
657 if (chip == -1) {
658 writew(readw(host->regs + NFC_CONFIG1) & ~NFC_CE,
659 host->regs + NFC_CONFIG1);
660 return;
661 }
662
663 writew(readw(host->regs + NFC_CONFIG1) | NFC_CE,
664 host->regs + NFC_CONFIG1);
665#endif
666
667 switch (chip) {
668 case -1:
669 /* Disable the NFC clock */
670 if (host->clk_act) {
671 clk_disable(host->clk);
672 host->clk_act = 0;
673 }
674 break;
675 case 0:
676 /* Enable the NFC clock */
677 if (!host->clk_act) {
678 clk_enable(host->clk);
679 host->clk_act = 1;
680 }
681 break;
682
683 default:
684 break;
685 }
686}
687
688/* Used by the upper layer to write command to NAND Flash for
689 * different operations to be carried out on NAND Flash */
690static void mxc_nand_command(struct mtd_info *mtd, unsigned command,
691 int column, int page_addr)
692{
693 struct nand_chip *nand_chip = mtd->priv;
694 struct mxc_nand_host *host = nand_chip->priv;
695 int useirq = true;
696
697 DEBUG(MTD_DEBUG_LEVEL3,
698 "mxc_nand_command (cmd = 0x%x, col = 0x%x, page = 0x%x)\n",
699 command, column, page_addr);
700
701 /* Reset command state information */
702 host->status_request = false;
703
704 /* Command pre-processing step */
705 switch (command) {
706
707 case NAND_CMD_STATUS:
708 host->col_addr = 0;
709 host->status_request = true;
710 break;
711
712 case NAND_CMD_READ0:
713 host->col_addr = column;
714 host->spare_only = false;
715 useirq = false;
716 break;
717
718 case NAND_CMD_READOOB:
719 host->col_addr = column;
720 host->spare_only = true;
721 useirq = false;
722 if (host->pagesize_2k)
723 command = NAND_CMD_READ0; /* only READ0 is valid */
724 break;
725
726 case NAND_CMD_SEQIN:
727 if (column >= mtd->writesize) {
728 /*
729 * FIXME: before send SEQIN command for write OOB,
730 * We must read one page out.
731 * For K9F1GXX has no READ1 command to set current HW
732 * pointer to spare area, we must write the whole page
733 * including OOB together.
734 */
735 if (host->pagesize_2k)
736 /* call ourself to read a page */
737 mxc_nand_command(mtd, NAND_CMD_READ0, 0,
738 page_addr);
739
740 host->col_addr = column - mtd->writesize;
741 host->spare_only = true;
742
743 /* Set program pointer to spare region */
744 if (!host->pagesize_2k)
745 send_cmd(host, NAND_CMD_READOOB, false);
746 } else {
747 host->spare_only = false;
748 host->col_addr = column;
749
750 /* Set program pointer to page start */
751 if (!host->pagesize_2k)
752 send_cmd(host, NAND_CMD_READ0, false);
753 }
754 useirq = false;
755 break;
756
757 case NAND_CMD_PAGEPROG:
758 send_prog_page(host, 0, host->spare_only);
759
760 if (host->pagesize_2k) {
761 /* data in 4 areas datas */
762 send_prog_page(host, 1, host->spare_only);
763 send_prog_page(host, 2, host->spare_only);
764 send_prog_page(host, 3, host->spare_only);
765 }
766
767 break;
768
769 case NAND_CMD_ERASE1:
770 useirq = false;
771 break;
772 }
773
774 /* Write out the command to the device. */
775 send_cmd(host, command, useirq);
776
777 /* Write out column address, if necessary */
778 if (column != -1) {
779 /*
780 * MXC NANDFC can only perform full page+spare or
781 * spare-only read/write. When the upper layers
782 * layers perform a read/write buf operation,
783 * we will used the saved column adress to index into
784 * the full page.
785 */
786 send_addr(host, 0, page_addr == -1);
787 if (host->pagesize_2k)
788 /* another col addr cycle for 2k page */
789 send_addr(host, 0, false);
790 }
791
792 /* Write out page address, if necessary */
793 if (page_addr != -1) {
794 /* paddr_0 - p_addr_7 */
795 send_addr(host, (page_addr & 0xff), false);
796
797 if (host->pagesize_2k) {
798 send_addr(host, (page_addr >> 8) & 0xFF, false);
799 if (mtd->size >= 0x40000000)
800 send_addr(host, (page_addr >> 16) & 0xff, true);
801 } else {
802 /* One more address cycle for higher density devices */
803 if (mtd->size >= 0x4000000) {
804 /* paddr_8 - paddr_15 */
805 send_addr(host, (page_addr >> 8) & 0xff, false);
806 send_addr(host, (page_addr >> 16) & 0xff, true);
807 } else
808 /* paddr_8 - paddr_15 */
809 send_addr(host, (page_addr >> 8) & 0xff, true);
810 }
811 }
812
813 /* Command post-processing step */
814 switch (command) {
815
816 case NAND_CMD_RESET:
817 break;
818
819 case NAND_CMD_READOOB:
820 case NAND_CMD_READ0:
821 if (host->pagesize_2k) {
822 /* send read confirm command */
823 send_cmd(host, NAND_CMD_READSTART, true);
824 /* read for each AREA */
825 send_read_page(host, 0, host->spare_only);
826 send_read_page(host, 1, host->spare_only);
827 send_read_page(host, 2, host->spare_only);
828 send_read_page(host, 3, host->spare_only);
829 } else
830 send_read_page(host, 0, host->spare_only);
831 break;
832
833 case NAND_CMD_READID:
834 send_read_id(host);
835 break;
836
837 case NAND_CMD_PAGEPROG:
838 break;
839
840 case NAND_CMD_STATUS:
841 break;
842
843 case NAND_CMD_ERASE2:
844 break;
845 }
846}
847
848static int __init mxcnd_probe(struct platform_device *pdev)
849{
850 struct nand_chip *this;
851 struct mtd_info *mtd;
852 struct mxc_nand_platform_data *pdata = pdev->dev.platform_data;
853 struct mxc_nand_host *host;
854 struct resource *res;
855 uint16_t tmp;
856 int err = 0, nr_parts = 0;
857
858 /* Allocate memory for MTD device structure and private data */
859 host = kzalloc(sizeof(struct mxc_nand_host), GFP_KERNEL);
860 if (!host)
861 return -ENOMEM;
862
863 host->dev = &pdev->dev;
864 /* structures must be linked */
865 this = &host->nand;
866 mtd = &host->mtd;
867 mtd->priv = this;
868 mtd->owner = THIS_MODULE;
869
870 /* 50 us command delay time */
871 this->chip_delay = 5;
872
873 this->priv = host;
874 this->dev_ready = mxc_nand_dev_ready;
875 this->cmdfunc = mxc_nand_command;
876 this->select_chip = mxc_nand_select_chip;
877 this->read_byte = mxc_nand_read_byte;
878 this->read_word = mxc_nand_read_word;
879 this->write_buf = mxc_nand_write_buf;
880 this->read_buf = mxc_nand_read_buf;
881 this->verify_buf = mxc_nand_verify_buf;
882
883 host->clk = clk_get(&pdev->dev, "nfc_clk");
884 if (IS_ERR(host->clk))
885 goto eclk;
886
887 clk_enable(host->clk);
888 host->clk_act = 1;
889
890 res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
891 if (!res) {
892 err = -ENODEV;
893 goto eres;
894 }
895
896 host->regs = ioremap(res->start, res->end - res->start + 1);
897 if (!host->regs) {
898 err = -EIO;
899 goto eres;
900 }
901
902 tmp = readw(host->regs + NFC_CONFIG1);
903 tmp |= NFC_INT_MSK;
904 writew(tmp, host->regs + NFC_CONFIG1);
905
906 init_waitqueue_head(&host->irq_waitq);
907
908 host->irq = platform_get_irq(pdev, 0);
909
910 err = request_irq(host->irq, mxc_nfc_irq, 0, "mxc_nd", host);
911 if (err)
912 goto eirq;
913
914 if (pdata->hw_ecc) {
915 this->ecc.calculate = mxc_nand_calculate_ecc;
916 this->ecc.hwctl = mxc_nand_enable_hwecc;
917 this->ecc.correct = mxc_nand_correct_data;
918 this->ecc.mode = NAND_ECC_HW;
919 this->ecc.size = 512;
920 this->ecc.bytes = 3;
921 this->ecc.layout = &nand_hw_eccoob_8;
922 tmp = readw(host->regs + NFC_CONFIG1);
923 tmp |= NFC_ECC_EN;
924 writew(tmp, host->regs + NFC_CONFIG1);
925 } else {
926 this->ecc.size = 512;
927 this->ecc.bytes = 3;
928 this->ecc.layout = &nand_hw_eccoob_8;
929 this->ecc.mode = NAND_ECC_SOFT;
930 tmp = readw(host->regs + NFC_CONFIG1);
931 tmp &= ~NFC_ECC_EN;
932 writew(tmp, host->regs + NFC_CONFIG1);
933 }
934
935 /* Reset NAND */
936 this->cmdfunc(mtd, NAND_CMD_RESET, -1, -1);
937
938 /* preset operation */
939 /* Unlock the internal RAM Buffer */
940 writew(0x2, host->regs + NFC_CONFIG);
941
942 /* Blocks to be unlocked */
943 writew(0x0, host->regs + NFC_UNLOCKSTART_BLKADDR);
944 writew(0x4000, host->regs + NFC_UNLOCKEND_BLKADDR);
945
946 /* Unlock Block Command for given address range */
947 writew(0x4, host->regs + NFC_WRPROT);
948
949 /* NAND bus width determines access funtions used by upper layer */
950 if (pdata->width == 2) {
951 this->options |= NAND_BUSWIDTH_16;
952 this->ecc.layout = &nand_hw_eccoob_16;
953 }
954
955 host->pagesize_2k = 0;
956
957 /* Scan to find existence of the device */
958 if (nand_scan(mtd, 1)) {
959 DEBUG(MTD_DEBUG_LEVEL0,
960 "MXC_ND: Unable to find any NAND device.\n");
961 err = -ENXIO;
962 goto escan;
963 }
964
965 /* Register the partitions */
966#ifdef CONFIG_MTD_PARTITIONS
967 nr_parts =
968 parse_mtd_partitions(mtd, part_probes, &host->parts, 0);
969 if (nr_parts > 0)
970 add_mtd_partitions(mtd, host->parts, nr_parts);
971 else
972#endif
973 {
974 pr_info("Registering %s as whole device\n", mtd->name);
975 add_mtd_device(mtd);
976 }
977
978 platform_set_drvdata(pdev, host);
979
980 return 0;
981
982escan:
983 free_irq(host->irq, NULL);
984eirq:
985 iounmap(host->regs);
986eres:
987 clk_put(host->clk);
988eclk:
989 kfree(host);
990
991 return err;
992}
993
994static int __devexit mxcnd_remove(struct platform_device *pdev)
995{
996 struct mxc_nand_host *host = platform_get_drvdata(pdev);
997
998 clk_put(host->clk);
999
1000 platform_set_drvdata(pdev, NULL);
1001
1002 nand_release(&host->mtd);
1003 free_irq(host->irq, NULL);
1004 iounmap(host->regs);
1005 kfree(host);
1006
1007 return 0;
1008}
1009
1010#ifdef CONFIG_PM
1011static int mxcnd_suspend(struct platform_device *pdev, pm_message_t state)
1012{
1013 struct mtd_info *info = platform_get_drvdata(pdev);
1014 int ret = 0;
1015
1016 DEBUG(MTD_DEBUG_LEVEL0, "MXC_ND : NAND suspend\n");
1017 if (info)
1018 ret = info->suspend(info);
1019
1020 /* Disable the NFC clock */
1021 clk_disable(nfc_clk); /* FIXME */
1022
1023 return ret;
1024}
1025
1026static int mxcnd_resume(struct platform_device *pdev)
1027{
1028 struct mtd_info *info = platform_get_drvdata(pdev);
1029 int ret = 0;
1030
1031 DEBUG(MTD_DEBUG_LEVEL0, "MXC_ND : NAND resume\n");
1032 /* Enable the NFC clock */
1033 clk_enable(nfc_clk); /* FIXME */
1034
1035 if (info)
1036 info->resume(info);
1037
1038 return ret;
1039}
1040
1041#else
1042# define mxcnd_suspend NULL
1043# define mxcnd_resume NULL
1044#endif /* CONFIG_PM */
1045
1046static struct platform_driver mxcnd_driver = {
1047 .driver = {
1048 .name = DRIVER_NAME,
1049 },
1050 .remove = __exit_p(mxcnd_remove),
1051 .suspend = mxcnd_suspend,
1052 .resume = mxcnd_resume,
1053};
1054
1055static int __init mxc_nd_init(void)
1056{
1057 /* Register the device driver structure. */
1058 pr_info("MXC MTD nand Driver\n");
1059 if (platform_driver_probe(&mxcnd_driver, mxcnd_probe) != 0) {
1060 printk(KERN_ERR "Driver register failed for mxcnd_driver\n");
1061 return -ENODEV;
1062 }
1063 return 0;
1064}
1065
1066static void __exit mxc_nd_cleanup(void)
1067{
1068 /* Unregister the device structure */
1069 platform_driver_unregister(&mxcnd_driver);
1070}
1071
1072module_init(mxc_nd_init);
1073module_exit(mxc_nd_cleanup);
1074
1075MODULE_AUTHOR("Freescale Semiconductor, Inc.");
1076MODULE_DESCRIPTION("MXC NAND MTD driver");
1077MODULE_LICENSE("GPL");
diff --git a/drivers/mtd/nand/nand_base.c b/drivers/mtd/nand/nand_base.c
index d1129bae6c27..0a9c9cd33f96 100644
--- a/drivers/mtd/nand/nand_base.c
+++ b/drivers/mtd/nand/nand_base.c
@@ -801,9 +801,9 @@ static int nand_read_page_swecc(struct mtd_info *mtd, struct nand_chip *chip,
801 * nand_read_subpage - [REPLACABLE] software ecc based sub-page read function 801 * nand_read_subpage - [REPLACABLE] software ecc based sub-page read function
802 * @mtd: mtd info structure 802 * @mtd: mtd info structure
803 * @chip: nand chip info structure 803 * @chip: nand chip info structure
804 * @dataofs offset of requested data within the page 804 * @data_offs: offset of requested data within the page
805 * @readlen data length 805 * @readlen: data length
806 * @buf: buffer to store read data 806 * @bufpoi: buffer to store read data
807 */ 807 */
808static int nand_read_subpage(struct mtd_info *mtd, struct nand_chip *chip, uint32_t data_offs, uint32_t readlen, uint8_t *bufpoi) 808static int nand_read_subpage(struct mtd_info *mtd, struct nand_chip *chip, uint32_t data_offs, uint32_t readlen, uint8_t *bufpoi)
809{ 809{
@@ -2042,7 +2042,7 @@ int nand_erase_nand(struct mtd_info *mtd, struct erase_info *instr,
2042 return -EINVAL; 2042 return -EINVAL;
2043 } 2043 }
2044 2044
2045 instr->fail_addr = 0xffffffff; 2045 instr->fail_addr = MTD_FAIL_ADDR_UNKNOWN;
2046 2046
2047 /* Grab the lock and see if the device is available */ 2047 /* Grab the lock and see if the device is available */
2048 nand_get_device(chip, mtd, FL_ERASING); 2048 nand_get_device(chip, mtd, FL_ERASING);
@@ -2318,6 +2318,12 @@ static struct nand_flash_dev *nand_get_flash_type(struct mtd_info *mtd,
2318 /* Select the device */ 2318 /* Select the device */
2319 chip->select_chip(mtd, 0); 2319 chip->select_chip(mtd, 0);
2320 2320
2321 /*
2322 * Reset the chip, required by some chips (e.g. Micron MT29FxGxxxxx)
2323 * after power-up
2324 */
2325 chip->cmdfunc(mtd, NAND_CMD_RESET, -1, -1);
2326
2321 /* Send the command for reading device ID */ 2327 /* Send the command for reading device ID */
2322 chip->cmdfunc(mtd, NAND_CMD_READID, 0x00, -1); 2328 chip->cmdfunc(mtd, NAND_CMD_READID, 0x00, -1);
2323 2329
@@ -2488,6 +2494,8 @@ int nand_scan_ident(struct mtd_info *mtd, int maxchips)
2488 /* Check for a chip array */ 2494 /* Check for a chip array */
2489 for (i = 1; i < maxchips; i++) { 2495 for (i = 1; i < maxchips; i++) {
2490 chip->select_chip(mtd, i); 2496 chip->select_chip(mtd, i);
2497 /* See comment in nand_get_flash_type for reset */
2498 chip->cmdfunc(mtd, NAND_CMD_RESET, -1, -1);
2491 /* Send the command for reading device ID */ 2499 /* Send the command for reading device ID */
2492 chip->cmdfunc(mtd, NAND_CMD_READID, 0x00, -1); 2500 chip->cmdfunc(mtd, NAND_CMD_READID, 0x00, -1);
2493 /* Read manufacturer and device IDs */ 2501 /* Read manufacturer and device IDs */
diff --git a/drivers/mtd/nand/nand_ecc.c b/drivers/mtd/nand/nand_ecc.c
index 918a806a8471..868147acce2c 100644
--- a/drivers/mtd/nand/nand_ecc.c
+++ b/drivers/mtd/nand/nand_ecc.c
@@ -1,13 +1,18 @@
1/* 1/*
2 * This file contains an ECC algorithm from Toshiba that detects and 2 * This file contains an ECC algorithm that detects and corrects 1 bit
3 * corrects 1 bit errors in a 256 byte block of data. 3 * errors in a 256 byte block of data.
4 * 4 *
5 * drivers/mtd/nand/nand_ecc.c 5 * drivers/mtd/nand/nand_ecc.c
6 * 6 *
7 * Copyright (C) 2000-2004 Steven J. Hill (sjhill@realitydiluted.com) 7 * Copyright © 2008 Koninklijke Philips Electronics NV.
8 * Toshiba America Electronics Components, Inc. 8 * Author: Frans Meulenbroeks
9 * 9 *
10 * Copyright (C) 2006 Thomas Gleixner <tglx@linutronix.de> 10 * Completely replaces the previous ECC implementation which was written by:
11 * Steven J. Hill (sjhill@realitydiluted.com)
12 * Thomas Gleixner (tglx@linutronix.de)
13 *
14 * Information on how this algorithm works and how it was developed
15 * can be found in Documentation/mtd/nand_ecc.txt
11 * 16 *
12 * This file is free software; you can redistribute it and/or modify it 17 * This file is free software; you can redistribute it and/or modify it
13 * under the terms of the GNU General Public License as published by the 18 * under the terms of the GNU General Public License as published by the
@@ -23,174 +28,475 @@
23 * with this file; if not, write to the Free Software Foundation, Inc., 28 * with this file; if not, write to the Free Software Foundation, Inc.,
24 * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA. 29 * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA.
25 * 30 *
26 * As a special exception, if other files instantiate templates or use
27 * macros or inline functions from these files, or you compile these
28 * files and link them with other works to produce a work based on these
29 * files, these files do not by themselves cause the resulting work to be
30 * covered by the GNU General Public License. However the source code for
31 * these files must still be made available in accordance with section (3)
32 * of the GNU General Public License.
33 *
34 * This exception does not invalidate any other reasons why a work based on
35 * this file might be covered by the GNU General Public License.
36 */ 31 */
37 32
33/*
34 * The STANDALONE macro is useful when running the code outside the kernel
35 * e.g. when running the code in a testbed or a benchmark program.
36 * When STANDALONE is used, the module related macros are commented out
37 * as well as the linux include files.
38 * Instead a private definition of mtd_info is given to satisfy the compiler
39 * (the code does not use mtd_info, so the code does not care)
40 */
41#ifndef STANDALONE
38#include <linux/types.h> 42#include <linux/types.h>
39#include <linux/kernel.h> 43#include <linux/kernel.h>
40#include <linux/module.h> 44#include <linux/module.h>
45#include <linux/mtd/mtd.h>
46#include <linux/mtd/nand.h>
41#include <linux/mtd/nand_ecc.h> 47#include <linux/mtd/nand_ecc.h>
48#include <asm/byteorder.h>
49#else
50#include <stdint.h>
51struct mtd_info;
52#define EXPORT_SYMBOL(x) /* x */
53
54#define MODULE_LICENSE(x) /* x */
55#define MODULE_AUTHOR(x) /* x */
56#define MODULE_DESCRIPTION(x) /* x */
57
58#define printk printf
59#define KERN_ERR ""
60#endif
61
62/*
63 * invparity is a 256 byte table that contains the odd parity
64 * for each byte. So if the number of bits in a byte is even,
65 * the array element is 1, and when the number of bits is odd
66 * the array eleemnt is 0.
67 */
68static const char invparity[256] = {
69 1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1,
70 0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0,
71 0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0,
72 1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1,
73 0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0,
74 1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1,
75 1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1,
76 0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0,
77 0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0,
78 1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1,
79 1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1,
80 0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0,
81 1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1,
82 0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0,
83 0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0,
84 1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1
85};
86
87/*
88 * bitsperbyte contains the number of bits per byte
89 * this is only used for testing and repairing parity
90 * (a precalculated value slightly improves performance)
91 */
92static const char bitsperbyte[256] = {
93 0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4,
94 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
95 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
96 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
97 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
98 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
99 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
100 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
101 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
102 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
103 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
104 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
105 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
106 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
107 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
108 4, 5, 5, 6, 5, 6, 6, 7, 5, 6, 6, 7, 6, 7, 7, 8,
109};
42 110
43/* 111/*
44 * Pre-calculated 256-way 1 byte column parity 112 * addressbits is a lookup table to filter out the bits from the xor-ed
113 * ecc data that identify the faulty location.
114 * this is only used for repairing parity
115 * see the comments in nand_correct_data for more details
45 */ 116 */
46static const u_char nand_ecc_precalc_table[] = { 117static const char addressbits[256] = {
47 0x00, 0x55, 0x56, 0x03, 0x59, 0x0c, 0x0f, 0x5a, 0x5a, 0x0f, 0x0c, 0x59, 0x03, 0x56, 0x55, 0x00, 118 0x00, 0x00, 0x01, 0x01, 0x00, 0x00, 0x01, 0x01,
48 0x65, 0x30, 0x33, 0x66, 0x3c, 0x69, 0x6a, 0x3f, 0x3f, 0x6a, 0x69, 0x3c, 0x66, 0x33, 0x30, 0x65, 119 0x02, 0x02, 0x03, 0x03, 0x02, 0x02, 0x03, 0x03,
49 0x66, 0x33, 0x30, 0x65, 0x3f, 0x6a, 0x69, 0x3c, 0x3c, 0x69, 0x6a, 0x3f, 0x65, 0x30, 0x33, 0x66, 120 0x00, 0x00, 0x01, 0x01, 0x00, 0x00, 0x01, 0x01,
50 0x03, 0x56, 0x55, 0x00, 0x5a, 0x0f, 0x0c, 0x59, 0x59, 0x0c, 0x0f, 0x5a, 0x00, 0x55, 0x56, 0x03, 121 0x02, 0x02, 0x03, 0x03, 0x02, 0x02, 0x03, 0x03,
51 0x69, 0x3c, 0x3f, 0x6a, 0x30, 0x65, 0x66, 0x33, 0x33, 0x66, 0x65, 0x30, 0x6a, 0x3f, 0x3c, 0x69, 122 0x04, 0x04, 0x05, 0x05, 0x04, 0x04, 0x05, 0x05,
52 0x0c, 0x59, 0x5a, 0x0f, 0x55, 0x00, 0x03, 0x56, 0x56, 0x03, 0x00, 0x55, 0x0f, 0x5a, 0x59, 0x0c, 123 0x06, 0x06, 0x07, 0x07, 0x06, 0x06, 0x07, 0x07,
53 0x0f, 0x5a, 0x59, 0x0c, 0x56, 0x03, 0x00, 0x55, 0x55, 0x00, 0x03, 0x56, 0x0c, 0x59, 0x5a, 0x0f, 124 0x04, 0x04, 0x05, 0x05, 0x04, 0x04, 0x05, 0x05,
54 0x6a, 0x3f, 0x3c, 0x69, 0x33, 0x66, 0x65, 0x30, 0x30, 0x65, 0x66, 0x33, 0x69, 0x3c, 0x3f, 0x6a, 125 0x06, 0x06, 0x07, 0x07, 0x06, 0x06, 0x07, 0x07,
55 0x6a, 0x3f, 0x3c, 0x69, 0x33, 0x66, 0x65, 0x30, 0x30, 0x65, 0x66, 0x33, 0x69, 0x3c, 0x3f, 0x6a, 126 0x00, 0x00, 0x01, 0x01, 0x00, 0x00, 0x01, 0x01,
56 0x0f, 0x5a, 0x59, 0x0c, 0x56, 0x03, 0x00, 0x55, 0x55, 0x00, 0x03, 0x56, 0x0c, 0x59, 0x5a, 0x0f, 127 0x02, 0x02, 0x03, 0x03, 0x02, 0x02, 0x03, 0x03,
57 0x0c, 0x59, 0x5a, 0x0f, 0x55, 0x00, 0x03, 0x56, 0x56, 0x03, 0x00, 0x55, 0x0f, 0x5a, 0x59, 0x0c, 128 0x00, 0x00, 0x01, 0x01, 0x00, 0x00, 0x01, 0x01,
58 0x69, 0x3c, 0x3f, 0x6a, 0x30, 0x65, 0x66, 0x33, 0x33, 0x66, 0x65, 0x30, 0x6a, 0x3f, 0x3c, 0x69, 129 0x02, 0x02, 0x03, 0x03, 0x02, 0x02, 0x03, 0x03,
59 0x03, 0x56, 0x55, 0x00, 0x5a, 0x0f, 0x0c, 0x59, 0x59, 0x0c, 0x0f, 0x5a, 0x00, 0x55, 0x56, 0x03, 130 0x04, 0x04, 0x05, 0x05, 0x04, 0x04, 0x05, 0x05,
60 0x66, 0x33, 0x30, 0x65, 0x3f, 0x6a, 0x69, 0x3c, 0x3c, 0x69, 0x6a, 0x3f, 0x65, 0x30, 0x33, 0x66, 131 0x06, 0x06, 0x07, 0x07, 0x06, 0x06, 0x07, 0x07,
61 0x65, 0x30, 0x33, 0x66, 0x3c, 0x69, 0x6a, 0x3f, 0x3f, 0x6a, 0x69, 0x3c, 0x66, 0x33, 0x30, 0x65, 132 0x04, 0x04, 0x05, 0x05, 0x04, 0x04, 0x05, 0x05,
62 0x00, 0x55, 0x56, 0x03, 0x59, 0x0c, 0x0f, 0x5a, 0x5a, 0x0f, 0x0c, 0x59, 0x03, 0x56, 0x55, 0x00 133 0x06, 0x06, 0x07, 0x07, 0x06, 0x06, 0x07, 0x07,
134 0x08, 0x08, 0x09, 0x09, 0x08, 0x08, 0x09, 0x09,
135 0x0a, 0x0a, 0x0b, 0x0b, 0x0a, 0x0a, 0x0b, 0x0b,
136 0x08, 0x08, 0x09, 0x09, 0x08, 0x08, 0x09, 0x09,
137 0x0a, 0x0a, 0x0b, 0x0b, 0x0a, 0x0a, 0x0b, 0x0b,
138 0x0c, 0x0c, 0x0d, 0x0d, 0x0c, 0x0c, 0x0d, 0x0d,
139 0x0e, 0x0e, 0x0f, 0x0f, 0x0e, 0x0e, 0x0f, 0x0f,
140 0x0c, 0x0c, 0x0d, 0x0d, 0x0c, 0x0c, 0x0d, 0x0d,
141 0x0e, 0x0e, 0x0f, 0x0f, 0x0e, 0x0e, 0x0f, 0x0f,
142 0x08, 0x08, 0x09, 0x09, 0x08, 0x08, 0x09, 0x09,
143 0x0a, 0x0a, 0x0b, 0x0b, 0x0a, 0x0a, 0x0b, 0x0b,
144 0x08, 0x08, 0x09, 0x09, 0x08, 0x08, 0x09, 0x09,
145 0x0a, 0x0a, 0x0b, 0x0b, 0x0a, 0x0a, 0x0b, 0x0b,
146 0x0c, 0x0c, 0x0d, 0x0d, 0x0c, 0x0c, 0x0d, 0x0d,
147 0x0e, 0x0e, 0x0f, 0x0f, 0x0e, 0x0e, 0x0f, 0x0f,
148 0x0c, 0x0c, 0x0d, 0x0d, 0x0c, 0x0c, 0x0d, 0x0d,
149 0x0e, 0x0e, 0x0f, 0x0f, 0x0e, 0x0e, 0x0f, 0x0f
63}; 150};
64 151
65/** 152/**
66 * nand_calculate_ecc - [NAND Interface] Calculate 3-byte ECC for 256-byte block 153 * nand_calculate_ecc - [NAND Interface] Calculate 3-byte ECC for 256/512-byte
154 * block
67 * @mtd: MTD block structure 155 * @mtd: MTD block structure
68 * @dat: raw data 156 * @buf: input buffer with raw data
69 * @ecc_code: buffer for ECC 157 * @code: output buffer with ECC
70 */ 158 */
71int nand_calculate_ecc(struct mtd_info *mtd, const u_char *dat, 159int nand_calculate_ecc(struct mtd_info *mtd, const unsigned char *buf,
72 u_char *ecc_code) 160 unsigned char *code)
73{ 161{
74 uint8_t idx, reg1, reg2, reg3, tmp1, tmp2;
75 int i; 162 int i;
163 const uint32_t *bp = (uint32_t *)buf;
164 /* 256 or 512 bytes/ecc */
165 const uint32_t eccsize_mult =
166 (((struct nand_chip *)mtd->priv)->ecc.size) >> 8;
167 uint32_t cur; /* current value in buffer */
168 /* rp0..rp15..rp17 are the various accumulated parities (per byte) */
169 uint32_t rp0, rp1, rp2, rp3, rp4, rp5, rp6, rp7;
170 uint32_t rp8, rp9, rp10, rp11, rp12, rp13, rp14, rp15, rp16;
171 uint32_t uninitialized_var(rp17); /* to make compiler happy */
172 uint32_t par; /* the cumulative parity for all data */
173 uint32_t tmppar; /* the cumulative parity for this iteration;
174 for rp12, rp14 and rp16 at the end of the
175 loop */
176
177 par = 0;
178 rp4 = 0;
179 rp6 = 0;
180 rp8 = 0;
181 rp10 = 0;
182 rp12 = 0;
183 rp14 = 0;
184 rp16 = 0;
185
186 /*
187 * The loop is unrolled a number of times;
188 * This avoids if statements to decide on which rp value to update
189 * Also we process the data by longwords.
190 * Note: passing unaligned data might give a performance penalty.
191 * It is assumed that the buffers are aligned.
192 * tmppar is the cumulative sum of this iteration.
193 * needed for calculating rp12, rp14, rp16 and par
194 * also used as a performance improvement for rp6, rp8 and rp10
195 */
196 for (i = 0; i < eccsize_mult << 2; i++) {
197 cur = *bp++;
198 tmppar = cur;
199 rp4 ^= cur;
200 cur = *bp++;
201 tmppar ^= cur;
202 rp6 ^= tmppar;
203 cur = *bp++;
204 tmppar ^= cur;
205 rp4 ^= cur;
206 cur = *bp++;
207 tmppar ^= cur;
208 rp8 ^= tmppar;
76 209
77 /* Initialize variables */ 210 cur = *bp++;
78 reg1 = reg2 = reg3 = 0; 211 tmppar ^= cur;
212 rp4 ^= cur;
213 rp6 ^= cur;
214 cur = *bp++;
215 tmppar ^= cur;
216 rp6 ^= cur;
217 cur = *bp++;
218 tmppar ^= cur;
219 rp4 ^= cur;
220 cur = *bp++;
221 tmppar ^= cur;
222 rp10 ^= tmppar;
79 223
80 /* Build up column parity */ 224 cur = *bp++;
81 for(i = 0; i < 256; i++) { 225 tmppar ^= cur;
82 /* Get CP0 - CP5 from table */ 226 rp4 ^= cur;
83 idx = nand_ecc_precalc_table[*dat++]; 227 rp6 ^= cur;
84 reg1 ^= (idx & 0x3f); 228 rp8 ^= cur;
229 cur = *bp++;
230 tmppar ^= cur;
231 rp6 ^= cur;
232 rp8 ^= cur;
233 cur = *bp++;
234 tmppar ^= cur;
235 rp4 ^= cur;
236 rp8 ^= cur;
237 cur = *bp++;
238 tmppar ^= cur;
239 rp8 ^= cur;
85 240
86 /* All bit XOR = 1 ? */ 241 cur = *bp++;
87 if (idx & 0x40) { 242 tmppar ^= cur;
88 reg3 ^= (uint8_t) i; 243 rp4 ^= cur;
89 reg2 ^= ~((uint8_t) i); 244 rp6 ^= cur;
90 } 245 cur = *bp++;
246 tmppar ^= cur;
247 rp6 ^= cur;
248 cur = *bp++;
249 tmppar ^= cur;
250 rp4 ^= cur;
251 cur = *bp++;
252 tmppar ^= cur;
253
254 par ^= tmppar;
255 if ((i & 0x1) == 0)
256 rp12 ^= tmppar;
257 if ((i & 0x2) == 0)
258 rp14 ^= tmppar;
259 if (eccsize_mult == 2 && (i & 0x4) == 0)
260 rp16 ^= tmppar;
91 } 261 }
92 262
93 /* Create non-inverted ECC code from line parity */ 263 /*
94 tmp1 = (reg3 & 0x80) >> 0; /* B7 -> B7 */ 264 * handle the fact that we use longword operations
95 tmp1 |= (reg2 & 0x80) >> 1; /* B7 -> B6 */ 265 * we'll bring rp4..rp14..rp16 back to single byte entities by
96 tmp1 |= (reg3 & 0x40) >> 1; /* B6 -> B5 */ 266 * shifting and xoring first fold the upper and lower 16 bits,
97 tmp1 |= (reg2 & 0x40) >> 2; /* B6 -> B4 */ 267 * then the upper and lower 8 bits.
98 tmp1 |= (reg3 & 0x20) >> 2; /* B5 -> B3 */ 268 */
99 tmp1 |= (reg2 & 0x20) >> 3; /* B5 -> B2 */ 269 rp4 ^= (rp4 >> 16);
100 tmp1 |= (reg3 & 0x10) >> 3; /* B4 -> B1 */ 270 rp4 ^= (rp4 >> 8);
101 tmp1 |= (reg2 & 0x10) >> 4; /* B4 -> B0 */ 271 rp4 &= 0xff;
102 272 rp6 ^= (rp6 >> 16);
103 tmp2 = (reg3 & 0x08) << 4; /* B3 -> B7 */ 273 rp6 ^= (rp6 >> 8);
104 tmp2 |= (reg2 & 0x08) << 3; /* B3 -> B6 */ 274 rp6 &= 0xff;
105 tmp2 |= (reg3 & 0x04) << 3; /* B2 -> B5 */ 275 rp8 ^= (rp8 >> 16);
106 tmp2 |= (reg2 & 0x04) << 2; /* B2 -> B4 */ 276 rp8 ^= (rp8 >> 8);
107 tmp2 |= (reg3 & 0x02) << 2; /* B1 -> B3 */ 277 rp8 &= 0xff;
108 tmp2 |= (reg2 & 0x02) << 1; /* B1 -> B2 */ 278 rp10 ^= (rp10 >> 16);
109 tmp2 |= (reg3 & 0x01) << 1; /* B0 -> B1 */ 279 rp10 ^= (rp10 >> 8);
110 tmp2 |= (reg2 & 0x01) << 0; /* B7 -> B0 */ 280 rp10 &= 0xff;
111 281 rp12 ^= (rp12 >> 16);
112 /* Calculate final ECC code */ 282 rp12 ^= (rp12 >> 8);
113#ifdef CONFIG_MTD_NAND_ECC_SMC 283 rp12 &= 0xff;
114 ecc_code[0] = ~tmp2; 284 rp14 ^= (rp14 >> 16);
115 ecc_code[1] = ~tmp1; 285 rp14 ^= (rp14 >> 8);
286 rp14 &= 0xff;
287 if (eccsize_mult == 2) {
288 rp16 ^= (rp16 >> 16);
289 rp16 ^= (rp16 >> 8);
290 rp16 &= 0xff;
291 }
292
293 /*
294 * we also need to calculate the row parity for rp0..rp3
295 * This is present in par, because par is now
296 * rp3 rp3 rp2 rp2 in little endian and
297 * rp2 rp2 rp3 rp3 in big endian
298 * as well as
299 * rp1 rp0 rp1 rp0 in little endian and
300 * rp0 rp1 rp0 rp1 in big endian
301 * First calculate rp2 and rp3
302 */
303#ifdef __BIG_ENDIAN
304 rp2 = (par >> 16);
305 rp2 ^= (rp2 >> 8);
306 rp2 &= 0xff;
307 rp3 = par & 0xffff;
308 rp3 ^= (rp3 >> 8);
309 rp3 &= 0xff;
116#else 310#else
117 ecc_code[0] = ~tmp1; 311 rp3 = (par >> 16);
118 ecc_code[1] = ~tmp2; 312 rp3 ^= (rp3 >> 8);
313 rp3 &= 0xff;
314 rp2 = par & 0xffff;
315 rp2 ^= (rp2 >> 8);
316 rp2 &= 0xff;
119#endif 317#endif
120 ecc_code[2] = ((~reg1) << 2) | 0x03;
121 318
122 return 0; 319 /* reduce par to 16 bits then calculate rp1 and rp0 */
123} 320 par ^= (par >> 16);
124EXPORT_SYMBOL(nand_calculate_ecc); 321#ifdef __BIG_ENDIAN
322 rp0 = (par >> 8) & 0xff;
323 rp1 = (par & 0xff);
324#else
325 rp1 = (par >> 8) & 0xff;
326 rp0 = (par & 0xff);
327#endif
125 328
126static inline int countbits(uint32_t byte) 329 /* finally reduce par to 8 bits */
127{ 330 par ^= (par >> 8);
128 int res = 0; 331 par &= 0xff;
129 332
130 for (;byte; byte >>= 1) 333 /*
131 res += byte & 0x01; 334 * and calculate rp5..rp15..rp17
132 return res; 335 * note that par = rp4 ^ rp5 and due to the commutative property
336 * of the ^ operator we can say:
337 * rp5 = (par ^ rp4);
338 * The & 0xff seems superfluous, but benchmarking learned that
339 * leaving it out gives slightly worse results. No idea why, probably
340 * it has to do with the way the pipeline in pentium is organized.
341 */
342 rp5 = (par ^ rp4) & 0xff;
343 rp7 = (par ^ rp6) & 0xff;
344 rp9 = (par ^ rp8) & 0xff;
345 rp11 = (par ^ rp10) & 0xff;
346 rp13 = (par ^ rp12) & 0xff;
347 rp15 = (par ^ rp14) & 0xff;
348 if (eccsize_mult == 2)
349 rp17 = (par ^ rp16) & 0xff;
350
351 /*
352 * Finally calculate the ecc bits.
353 * Again here it might seem that there are performance optimisations
354 * possible, but benchmarks showed that on the system this is developed
355 * the code below is the fastest
356 */
357#ifdef CONFIG_MTD_NAND_ECC_SMC
358 code[0] =
359 (invparity[rp7] << 7) |
360 (invparity[rp6] << 6) |
361 (invparity[rp5] << 5) |
362 (invparity[rp4] << 4) |
363 (invparity[rp3] << 3) |
364 (invparity[rp2] << 2) |
365 (invparity[rp1] << 1) |
366 (invparity[rp0]);
367 code[1] =
368 (invparity[rp15] << 7) |
369 (invparity[rp14] << 6) |
370 (invparity[rp13] << 5) |
371 (invparity[rp12] << 4) |
372 (invparity[rp11] << 3) |
373 (invparity[rp10] << 2) |
374 (invparity[rp9] << 1) |
375 (invparity[rp8]);
376#else
377 code[1] =
378 (invparity[rp7] << 7) |
379 (invparity[rp6] << 6) |
380 (invparity[rp5] << 5) |
381 (invparity[rp4] << 4) |
382 (invparity[rp3] << 3) |
383 (invparity[rp2] << 2) |
384 (invparity[rp1] << 1) |
385 (invparity[rp0]);
386 code[0] =
387 (invparity[rp15] << 7) |
388 (invparity[rp14] << 6) |
389 (invparity[rp13] << 5) |
390 (invparity[rp12] << 4) |
391 (invparity[rp11] << 3) |
392 (invparity[rp10] << 2) |
393 (invparity[rp9] << 1) |
394 (invparity[rp8]);
395#endif
396 if (eccsize_mult == 1)
397 code[2] =
398 (invparity[par & 0xf0] << 7) |
399 (invparity[par & 0x0f] << 6) |
400 (invparity[par & 0xcc] << 5) |
401 (invparity[par & 0x33] << 4) |
402 (invparity[par & 0xaa] << 3) |
403 (invparity[par & 0x55] << 2) |
404 3;
405 else
406 code[2] =
407 (invparity[par & 0xf0] << 7) |
408 (invparity[par & 0x0f] << 6) |
409 (invparity[par & 0xcc] << 5) |
410 (invparity[par & 0x33] << 4) |
411 (invparity[par & 0xaa] << 3) |
412 (invparity[par & 0x55] << 2) |
413 (invparity[rp17] << 1) |
414 (invparity[rp16] << 0);
415 return 0;
133} 416}
417EXPORT_SYMBOL(nand_calculate_ecc);
134 418
135/** 419/**
136 * nand_correct_data - [NAND Interface] Detect and correct bit error(s) 420 * nand_correct_data - [NAND Interface] Detect and correct bit error(s)
137 * @mtd: MTD block structure 421 * @mtd: MTD block structure
138 * @dat: raw data read from the chip 422 * @buf: raw data read from the chip
139 * @read_ecc: ECC from the chip 423 * @read_ecc: ECC from the chip
140 * @calc_ecc: the ECC calculated from raw data 424 * @calc_ecc: the ECC calculated from raw data
141 * 425 *
142 * Detect and correct a 1 bit error for 256 byte block 426 * Detect and correct a 1 bit error for 256/512 byte block
143 */ 427 */
144int nand_correct_data(struct mtd_info *mtd, u_char *dat, 428int nand_correct_data(struct mtd_info *mtd, unsigned char *buf,
145 u_char *read_ecc, u_char *calc_ecc) 429 unsigned char *read_ecc, unsigned char *calc_ecc)
146{ 430{
147 uint8_t s0, s1, s2; 431 unsigned char b0, b1, b2;
432 unsigned char byte_addr, bit_addr;
433 /* 256 or 512 bytes/ecc */
434 const uint32_t eccsize_mult =
435 (((struct nand_chip *)mtd->priv)->ecc.size) >> 8;
148 436
437 /*
438 * b0 to b2 indicate which bit is faulty (if any)
439 * we might need the xor result more than once,
440 * so keep them in a local var
441 */
149#ifdef CONFIG_MTD_NAND_ECC_SMC 442#ifdef CONFIG_MTD_NAND_ECC_SMC
150 s0 = calc_ecc[0] ^ read_ecc[0]; 443 b0 = read_ecc[0] ^ calc_ecc[0];
151 s1 = calc_ecc[1] ^ read_ecc[1]; 444 b1 = read_ecc[1] ^ calc_ecc[1];
152 s2 = calc_ecc[2] ^ read_ecc[2];
153#else 445#else
154 s1 = calc_ecc[0] ^ read_ecc[0]; 446 b0 = read_ecc[1] ^ calc_ecc[1];
155 s0 = calc_ecc[1] ^ read_ecc[1]; 447 b1 = read_ecc[0] ^ calc_ecc[0];
156 s2 = calc_ecc[2] ^ read_ecc[2];
157#endif 448#endif
158 if ((s0 | s1 | s2) == 0) 449 b2 = read_ecc[2] ^ calc_ecc[2];
159 return 0;
160
161 /* Check for a single bit error */
162 if( ((s0 ^ (s0 >> 1)) & 0x55) == 0x55 &&
163 ((s1 ^ (s1 >> 1)) & 0x55) == 0x55 &&
164 ((s2 ^ (s2 >> 1)) & 0x54) == 0x54) {
165 450
166 uint32_t byteoffs, bitnum; 451 /* check if there are any bitfaults */
167 452
168 byteoffs = (s1 << 0) & 0x80; 453 /* repeated if statements are slightly more efficient than switch ... */
169 byteoffs |= (s1 << 1) & 0x40; 454 /* ordered in order of likelihood */
170 byteoffs |= (s1 << 2) & 0x20;
171 byteoffs |= (s1 << 3) & 0x10;
172 455
173 byteoffs |= (s0 >> 4) & 0x08; 456 if ((b0 | b1 | b2) == 0)
174 byteoffs |= (s0 >> 3) & 0x04; 457 return 0; /* no error */
175 byteoffs |= (s0 >> 2) & 0x02;
176 byteoffs |= (s0 >> 1) & 0x01;
177
178 bitnum = (s2 >> 5) & 0x04;
179 bitnum |= (s2 >> 4) & 0x02;
180 bitnum |= (s2 >> 3) & 0x01;
181
182 dat[byteoffs] ^= (1 << bitnum);
183 458
459 if ((((b0 ^ (b0 >> 1)) & 0x55) == 0x55) &&
460 (((b1 ^ (b1 >> 1)) & 0x55) == 0x55) &&
461 ((eccsize_mult == 1 && ((b2 ^ (b2 >> 1)) & 0x54) == 0x54) ||
462 (eccsize_mult == 2 && ((b2 ^ (b2 >> 1)) & 0x55) == 0x55))) {
463 /* single bit error */
464 /*
465 * rp17/rp15/13/11/9/7/5/3/1 indicate which byte is the faulty
466 * byte, cp 5/3/1 indicate the faulty bit.
467 * A lookup table (called addressbits) is used to filter
468 * the bits from the byte they are in.
469 * A marginal optimisation is possible by having three
470 * different lookup tables.
471 * One as we have now (for b0), one for b2
472 * (that would avoid the >> 1), and one for b1 (with all values
473 * << 4). However it was felt that introducing two more tables
474 * hardly justify the gain.
475 *
476 * The b2 shift is there to get rid of the lowest two bits.
477 * We could also do addressbits[b2] >> 1 but for the
478 * performace it does not make any difference
479 */
480 if (eccsize_mult == 1)
481 byte_addr = (addressbits[b1] << 4) + addressbits[b0];
482 else
483 byte_addr = (addressbits[b2 & 0x3] << 8) +
484 (addressbits[b1] << 4) + addressbits[b0];
485 bit_addr = addressbits[b2 >> 2];
486 /* flip the bit */
487 buf[byte_addr] ^= (1 << bit_addr);
184 return 1; 488 return 1;
185 }
186 489
187 if(countbits(s0 | ((uint32_t)s1 << 8) | ((uint32_t)s2 <<16)) == 1) 490 }
188 return 1; 491 /* count nr of bits; use table lookup, faster than calculating it */
492 if ((bitsperbyte[b0] + bitsperbyte[b1] + bitsperbyte[b2]) == 1)
493 return 1; /* error in ecc data; no action needed */
189 494
190 return -EBADMSG; 495 printk(KERN_ERR "uncorrectable error : ");
496 return -1;
191} 497}
192EXPORT_SYMBOL(nand_correct_data); 498EXPORT_SYMBOL(nand_correct_data);
193 499
194MODULE_LICENSE("GPL"); 500MODULE_LICENSE("GPL");
195MODULE_AUTHOR("Steven J. Hill <sjhill@realitydiluted.com>"); 501MODULE_AUTHOR("Frans Meulenbroeks <fransmeulenbroeks@gmail.com>");
196MODULE_DESCRIPTION("Generic NAND ECC support"); 502MODULE_DESCRIPTION("Generic NAND ECC support");
diff --git a/drivers/mtd/nand/nandsim.c b/drivers/mtd/nand/nandsim.c
index 556e8131ecdc..ae7c57781a68 100644
--- a/drivers/mtd/nand/nandsim.c
+++ b/drivers/mtd/nand/nandsim.c
@@ -38,7 +38,6 @@
38#include <linux/delay.h> 38#include <linux/delay.h>
39#include <linux/list.h> 39#include <linux/list.h>
40#include <linux/random.h> 40#include <linux/random.h>
41#include <asm/div64.h>
42 41
43/* Default simulator parameters values */ 42/* Default simulator parameters values */
44#if !defined(CONFIG_NANDSIM_FIRST_ID_BYTE) || \ 43#if !defined(CONFIG_NANDSIM_FIRST_ID_BYTE) || \
diff --git a/drivers/mtd/nand/pxa3xx_nand.c b/drivers/mtd/nand/pxa3xx_nand.c
index a64ad15b8fdd..c0fa9c9edf08 100644
--- a/drivers/mtd/nand/pxa3xx_nand.c
+++ b/drivers/mtd/nand/pxa3xx_nand.c
@@ -115,55 +115,11 @@ enum {
115 STATE_PIO_WRITING, 115 STATE_PIO_WRITING,
116}; 116};
117 117
118struct pxa3xx_nand_timing {
119 unsigned int tCH; /* Enable signal hold time */
120 unsigned int tCS; /* Enable signal setup time */
121 unsigned int tWH; /* ND_nWE high duration */
122 unsigned int tWP; /* ND_nWE pulse time */
123 unsigned int tRH; /* ND_nRE high duration */
124 unsigned int tRP; /* ND_nRE pulse width */
125 unsigned int tR; /* ND_nWE high to ND_nRE low for read */
126 unsigned int tWHR; /* ND_nWE high to ND_nRE low for status read */
127 unsigned int tAR; /* ND_ALE low to ND_nRE low delay */
128};
129
130struct pxa3xx_nand_cmdset {
131 uint16_t read1;
132 uint16_t read2;
133 uint16_t program;
134 uint16_t read_status;
135 uint16_t read_id;
136 uint16_t erase;
137 uint16_t reset;
138 uint16_t lock;
139 uint16_t unlock;
140 uint16_t lock_status;
141};
142
143struct pxa3xx_nand_flash {
144 struct pxa3xx_nand_timing *timing; /* NAND Flash timing */
145 struct pxa3xx_nand_cmdset *cmdset;
146
147 uint32_t page_per_block;/* Pages per block (PG_PER_BLK) */
148 uint32_t page_size; /* Page size in bytes (PAGE_SZ) */
149 uint32_t flash_width; /* Width of Flash memory (DWIDTH_M) */
150 uint32_t dfc_width; /* Width of flash controller(DWIDTH_C) */
151 uint32_t num_blocks; /* Number of physical blocks in Flash */
152 uint32_t chip_id;
153
154 /* NOTE: these are automatically calculated, do not define */
155 size_t oob_size;
156 size_t read_id_bytes;
157
158 unsigned int col_addr_cycles;
159 unsigned int row_addr_cycles;
160};
161
162struct pxa3xx_nand_info { 118struct pxa3xx_nand_info {
163 struct nand_chip nand_chip; 119 struct nand_chip nand_chip;
164 120
165 struct platform_device *pdev; 121 struct platform_device *pdev;
166 struct pxa3xx_nand_flash *flash_info; 122 const struct pxa3xx_nand_flash *flash_info;
167 123
168 struct clk *clk; 124 struct clk *clk;
169 void __iomem *mmio_base; 125 void __iomem *mmio_base;
@@ -202,12 +158,20 @@ struct pxa3xx_nand_info {
202 uint32_t ndcb0; 158 uint32_t ndcb0;
203 uint32_t ndcb1; 159 uint32_t ndcb1;
204 uint32_t ndcb2; 160 uint32_t ndcb2;
161
162 /* calculated from pxa3xx_nand_flash data */
163 size_t oob_size;
164 size_t read_id_bytes;
165
166 unsigned int col_addr_cycles;
167 unsigned int row_addr_cycles;
205}; 168};
206 169
207static int use_dma = 1; 170static int use_dma = 1;
208module_param(use_dma, bool, 0444); 171module_param(use_dma, bool, 0444);
209MODULE_PARM_DESC(use_dma, "enable DMA for data transfering to/from NAND HW"); 172MODULE_PARM_DESC(use_dma, "enable DMA for data transfering to/from NAND HW");
210 173
174#ifdef CONFIG_MTD_NAND_PXA3xx_BUILTIN
211static struct pxa3xx_nand_cmdset smallpage_cmdset = { 175static struct pxa3xx_nand_cmdset smallpage_cmdset = {
212 .read1 = 0x0000, 176 .read1 = 0x0000,
213 .read2 = 0x0050, 177 .read2 = 0x0050,
@@ -291,11 +255,35 @@ static struct pxa3xx_nand_flash micron1GbX16 = {
291 .chip_id = 0xb12c, 255 .chip_id = 0xb12c,
292}; 256};
293 257
258static struct pxa3xx_nand_timing stm2GbX16_timing = {
259 .tCH = 10,
260 .tCS = 35,
261 .tWH = 15,
262 .tWP = 25,
263 .tRH = 15,
264 .tRP = 25,
265 .tR = 25000,
266 .tWHR = 60,
267 .tAR = 10,
268};
269
270static struct pxa3xx_nand_flash stm2GbX16 = {
271 .timing = &stm2GbX16_timing,
272 .page_per_block = 64,
273 .page_size = 2048,
274 .flash_width = 16,
275 .dfc_width = 16,
276 .num_blocks = 2048,
277 .chip_id = 0xba20,
278};
279
294static struct pxa3xx_nand_flash *builtin_flash_types[] = { 280static struct pxa3xx_nand_flash *builtin_flash_types[] = {
295 &samsung512MbX16, 281 &samsung512MbX16,
296 &micron1GbX8, 282 &micron1GbX8,
297 &micron1GbX16, 283 &micron1GbX16,
284 &stm2GbX16,
298}; 285};
286#endif /* CONFIG_MTD_NAND_PXA3xx_BUILTIN */
299 287
300#define NDTR0_tCH(c) (min((c), 7) << 19) 288#define NDTR0_tCH(c) (min((c), 7) << 19)
301#define NDTR0_tCS(c) (min((c), 7) << 16) 289#define NDTR0_tCS(c) (min((c), 7) << 16)
@@ -312,7 +300,7 @@ static struct pxa3xx_nand_flash *builtin_flash_types[] = {
312#define ns2cycle(ns, clk) (int)(((ns) * (clk / 1000000) / 1000) + 1) 300#define ns2cycle(ns, clk) (int)(((ns) * (clk / 1000000) / 1000) + 1)
313 301
314static void pxa3xx_nand_set_timing(struct pxa3xx_nand_info *info, 302static void pxa3xx_nand_set_timing(struct pxa3xx_nand_info *info,
315 struct pxa3xx_nand_timing *t) 303 const struct pxa3xx_nand_timing *t)
316{ 304{
317 unsigned long nand_clk = clk_get_rate(info->clk); 305 unsigned long nand_clk = clk_get_rate(info->clk);
318 uint32_t ndtr0, ndtr1; 306 uint32_t ndtr0, ndtr1;
@@ -354,8 +342,8 @@ static int wait_for_event(struct pxa3xx_nand_info *info, uint32_t event)
354static int prepare_read_prog_cmd(struct pxa3xx_nand_info *info, 342static int prepare_read_prog_cmd(struct pxa3xx_nand_info *info,
355 uint16_t cmd, int column, int page_addr) 343 uint16_t cmd, int column, int page_addr)
356{ 344{
357 struct pxa3xx_nand_flash *f = info->flash_info; 345 const struct pxa3xx_nand_flash *f = info->flash_info;
358 struct pxa3xx_nand_cmdset *cmdset = f->cmdset; 346 const struct pxa3xx_nand_cmdset *cmdset = f->cmdset;
359 347
360 /* calculate data size */ 348 /* calculate data size */
361 switch (f->page_size) { 349 switch (f->page_size) {
@@ -373,14 +361,14 @@ static int prepare_read_prog_cmd(struct pxa3xx_nand_info *info,
373 info->ndcb0 = cmd | ((cmd & 0xff00) ? NDCB0_DBC : 0); 361 info->ndcb0 = cmd | ((cmd & 0xff00) ? NDCB0_DBC : 0);
374 info->ndcb1 = 0; 362 info->ndcb1 = 0;
375 info->ndcb2 = 0; 363 info->ndcb2 = 0;
376 info->ndcb0 |= NDCB0_ADDR_CYC(f->row_addr_cycles + f->col_addr_cycles); 364 info->ndcb0 |= NDCB0_ADDR_CYC(info->row_addr_cycles + info->col_addr_cycles);
377 365
378 if (f->col_addr_cycles == 2) { 366 if (info->col_addr_cycles == 2) {
379 /* large block, 2 cycles for column address 367 /* large block, 2 cycles for column address
380 * row address starts from 3rd cycle 368 * row address starts from 3rd cycle
381 */ 369 */
382 info->ndcb1 |= (page_addr << 16) | (column & 0xffff); 370 info->ndcb1 |= (page_addr << 16) | (column & 0xffff);
383 if (f->row_addr_cycles == 3) 371 if (info->row_addr_cycles == 3)
384 info->ndcb2 = (page_addr >> 16) & 0xff; 372 info->ndcb2 = (page_addr >> 16) & 0xff;
385 } else 373 } else
386 /* small block, 1 cycles for column address 374 /* small block, 1 cycles for column address
@@ -406,7 +394,7 @@ static int prepare_erase_cmd(struct pxa3xx_nand_info *info,
406 394
407static int prepare_other_cmd(struct pxa3xx_nand_info *info, uint16_t cmd) 395static int prepare_other_cmd(struct pxa3xx_nand_info *info, uint16_t cmd)
408{ 396{
409 struct pxa3xx_nand_cmdset *cmdset = info->flash_info->cmdset; 397 const struct pxa3xx_nand_cmdset *cmdset = info->flash_info->cmdset;
410 398
411 info->ndcb0 = cmd | ((cmd & 0xff00) ? NDCB0_DBC : 0); 399 info->ndcb0 = cmd | ((cmd & 0xff00) ? NDCB0_DBC : 0);
412 info->ndcb1 = 0; 400 info->ndcb1 = 0;
@@ -641,8 +629,8 @@ static void pxa3xx_nand_cmdfunc(struct mtd_info *mtd, unsigned command,
641 int column, int page_addr) 629 int column, int page_addr)
642{ 630{
643 struct pxa3xx_nand_info *info = mtd->priv; 631 struct pxa3xx_nand_info *info = mtd->priv;
644 struct pxa3xx_nand_flash *flash_info = info->flash_info; 632 const struct pxa3xx_nand_flash *flash_info = info->flash_info;
645 struct pxa3xx_nand_cmdset *cmdset = flash_info->cmdset; 633 const struct pxa3xx_nand_cmdset *cmdset = flash_info->cmdset;
646 int ret; 634 int ret;
647 635
648 info->use_dma = (use_dma) ? 1 : 0; 636 info->use_dma = (use_dma) ? 1 : 0;
@@ -720,7 +708,7 @@ static void pxa3xx_nand_cmdfunc(struct mtd_info *mtd, unsigned command,
720 info->use_dma = 0; /* force PIO read */ 708 info->use_dma = 0; /* force PIO read */
721 info->buf_start = 0; 709 info->buf_start = 0;
722 info->buf_count = (command == NAND_CMD_READID) ? 710 info->buf_count = (command == NAND_CMD_READID) ?
723 flash_info->read_id_bytes : 1; 711 info->read_id_bytes : 1;
724 712
725 if (prepare_other_cmd(info, (command == NAND_CMD_READID) ? 713 if (prepare_other_cmd(info, (command == NAND_CMD_READID) ?
726 cmdset->read_id : cmdset->read_status)) 714 cmdset->read_id : cmdset->read_status))
@@ -861,8 +849,8 @@ static int pxa3xx_nand_ecc_correct(struct mtd_info *mtd,
861 849
862static int __readid(struct pxa3xx_nand_info *info, uint32_t *id) 850static int __readid(struct pxa3xx_nand_info *info, uint32_t *id)
863{ 851{
864 struct pxa3xx_nand_flash *f = info->flash_info; 852 const struct pxa3xx_nand_flash *f = info->flash_info;
865 struct pxa3xx_nand_cmdset *cmdset = f->cmdset; 853 const struct pxa3xx_nand_cmdset *cmdset = f->cmdset;
866 uint32_t ndcr; 854 uint32_t ndcr;
867 uint8_t id_buff[8]; 855 uint8_t id_buff[8];
868 856
@@ -891,7 +879,7 @@ fail_timeout:
891} 879}
892 880
893static int pxa3xx_nand_config_flash(struct pxa3xx_nand_info *info, 881static int pxa3xx_nand_config_flash(struct pxa3xx_nand_info *info,
894 struct pxa3xx_nand_flash *f) 882 const struct pxa3xx_nand_flash *f)
895{ 883{
896 struct platform_device *pdev = info->pdev; 884 struct platform_device *pdev = info->pdev;
897 struct pxa3xx_nand_platform_data *pdata = pdev->dev.platform_data; 885 struct pxa3xx_nand_platform_data *pdata = pdev->dev.platform_data;
@@ -904,25 +892,25 @@ static int pxa3xx_nand_config_flash(struct pxa3xx_nand_info *info,
904 return -EINVAL; 892 return -EINVAL;
905 893
906 /* calculate flash information */ 894 /* calculate flash information */
907 f->oob_size = (f->page_size == 2048) ? 64 : 16; 895 info->oob_size = (f->page_size == 2048) ? 64 : 16;
908 f->read_id_bytes = (f->page_size == 2048) ? 4 : 2; 896 info->read_id_bytes = (f->page_size == 2048) ? 4 : 2;
909 897
910 /* calculate addressing information */ 898 /* calculate addressing information */
911 f->col_addr_cycles = (f->page_size == 2048) ? 2 : 1; 899 info->col_addr_cycles = (f->page_size == 2048) ? 2 : 1;
912 900
913 if (f->num_blocks * f->page_per_block > 65536) 901 if (f->num_blocks * f->page_per_block > 65536)
914 f->row_addr_cycles = 3; 902 info->row_addr_cycles = 3;
915 else 903 else
916 f->row_addr_cycles = 2; 904 info->row_addr_cycles = 2;
917 905
918 ndcr |= (pdata->enable_arbiter) ? NDCR_ND_ARB_EN : 0; 906 ndcr |= (pdata->enable_arbiter) ? NDCR_ND_ARB_EN : 0;
919 ndcr |= (f->col_addr_cycles == 2) ? NDCR_RA_START : 0; 907 ndcr |= (info->col_addr_cycles == 2) ? NDCR_RA_START : 0;
920 ndcr |= (f->page_per_block == 64) ? NDCR_PG_PER_BLK : 0; 908 ndcr |= (f->page_per_block == 64) ? NDCR_PG_PER_BLK : 0;
921 ndcr |= (f->page_size == 2048) ? NDCR_PAGE_SZ : 0; 909 ndcr |= (f->page_size == 2048) ? NDCR_PAGE_SZ : 0;
922 ndcr |= (f->flash_width == 16) ? NDCR_DWIDTH_M : 0; 910 ndcr |= (f->flash_width == 16) ? NDCR_DWIDTH_M : 0;
923 ndcr |= (f->dfc_width == 16) ? NDCR_DWIDTH_C : 0; 911 ndcr |= (f->dfc_width == 16) ? NDCR_DWIDTH_C : 0;
924 912
925 ndcr |= NDCR_RD_ID_CNT(f->read_id_bytes); 913 ndcr |= NDCR_RD_ID_CNT(info->read_id_bytes);
926 ndcr |= NDCR_SPARE_EN; /* enable spare by default */ 914 ndcr |= NDCR_SPARE_EN; /* enable spare by default */
927 915
928 info->reg_ndcr = ndcr; 916 info->reg_ndcr = ndcr;
@@ -932,12 +920,27 @@ static int pxa3xx_nand_config_flash(struct pxa3xx_nand_info *info,
932 return 0; 920 return 0;
933} 921}
934 922
935static int pxa3xx_nand_detect_flash(struct pxa3xx_nand_info *info) 923static int pxa3xx_nand_detect_flash(struct pxa3xx_nand_info *info,
924 const struct pxa3xx_nand_platform_data *pdata)
936{ 925{
937 struct pxa3xx_nand_flash *f; 926 const struct pxa3xx_nand_flash *f;
938 uint32_t id; 927 uint32_t id = -1;
939 int i; 928 int i;
940 929
930 for (i = 0; i<pdata->num_flash; ++i) {
931 f = pdata->flash + i;
932
933 if (pxa3xx_nand_config_flash(info, f))
934 continue;
935
936 if (__readid(info, &id))
937 continue;
938
939 if (id == f->chip_id)
940 return 0;
941 }
942
943#ifdef CONFIG_MTD_NAND_PXA3xx_BUILTIN
941 for (i = 0; i < ARRAY_SIZE(builtin_flash_types); i++) { 944 for (i = 0; i < ARRAY_SIZE(builtin_flash_types); i++) {
942 945
943 f = builtin_flash_types[i]; 946 f = builtin_flash_types[i];
@@ -951,7 +954,11 @@ static int pxa3xx_nand_detect_flash(struct pxa3xx_nand_info *info)
951 if (id == f->chip_id) 954 if (id == f->chip_id)
952 return 0; 955 return 0;
953 } 956 }
957#endif
954 958
959 dev_warn(&info->pdev->dev,
960 "failed to detect configured nand flash; found %04x instead of\n",
961 id);
955 return -ENODEV; 962 return -ENODEV;
956} 963}
957 964
@@ -1014,7 +1021,7 @@ static struct nand_ecclayout hw_largepage_ecclayout = {
1014static void pxa3xx_nand_init_mtd(struct mtd_info *mtd, 1021static void pxa3xx_nand_init_mtd(struct mtd_info *mtd,
1015 struct pxa3xx_nand_info *info) 1022 struct pxa3xx_nand_info *info)
1016{ 1023{
1017 struct pxa3xx_nand_flash *f = info->flash_info; 1024 const struct pxa3xx_nand_flash *f = info->flash_info;
1018 struct nand_chip *this = &info->nand_chip; 1025 struct nand_chip *this = &info->nand_chip;
1019 1026
1020 this->options = (f->flash_width == 16) ? NAND_BUSWIDTH_16: 0; 1027 this->options = (f->flash_width == 16) ? NAND_BUSWIDTH_16: 0;
@@ -1135,7 +1142,7 @@ static int pxa3xx_nand_probe(struct platform_device *pdev)
1135 goto fail_free_buf; 1142 goto fail_free_buf;
1136 } 1143 }
1137 1144
1138 ret = pxa3xx_nand_detect_flash(info); 1145 ret = pxa3xx_nand_detect_flash(info, pdata);
1139 if (ret) { 1146 if (ret) {
1140 dev_err(&pdev->dev, "failed to detect flash\n"); 1147 dev_err(&pdev->dev, "failed to detect flash\n");
1141 ret = -ENODEV; 1148 ret = -ENODEV;
diff --git a/drivers/mtd/nand/sh_flctl.c b/drivers/mtd/nand/sh_flctl.c
new file mode 100644
index 000000000000..821acb08ff1c
--- /dev/null
+++ b/drivers/mtd/nand/sh_flctl.c
@@ -0,0 +1,878 @@
1/*
2 * SuperH FLCTL nand controller
3 *
4 * Copyright © 2008 Renesas Solutions Corp.
5 * Copyright © 2008 Atom Create Engineering Co., Ltd.
6 *
7 * Based on fsl_elbc_nand.c, Copyright © 2006-2007 Freescale Semiconductor
8 *
9 * This program is free software; you can redistribute it and/or modify
10 * it under the terms of the GNU General Public License as published by
11 * the Free Software Foundation; version 2 of the License.
12 *
13 * This program is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 * GNU General Public License for more details.
17 *
18 * You should have received a copy of the GNU General Public License
19 * along with this program; if not, write to the Free Software
20 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
21 *
22 */
23
24#include <linux/module.h>
25#include <linux/kernel.h>
26#include <linux/delay.h>
27#include <linux/io.h>
28#include <linux/platform_device.h>
29
30#include <linux/mtd/mtd.h>
31#include <linux/mtd/nand.h>
32#include <linux/mtd/partitions.h>
33#include <linux/mtd/sh_flctl.h>
34
35static struct nand_ecclayout flctl_4secc_oob_16 = {
36 .eccbytes = 10,
37 .eccpos = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9},
38 .oobfree = {
39 {.offset = 12,
40 . length = 4} },
41};
42
43static struct nand_ecclayout flctl_4secc_oob_64 = {
44 .eccbytes = 10,
45 .eccpos = {48, 49, 50, 51, 52, 53, 54, 55, 56, 57},
46 .oobfree = {
47 {.offset = 60,
48 . length = 4} },
49};
50
51static uint8_t scan_ff_pattern[] = { 0xff, 0xff };
52
53static struct nand_bbt_descr flctl_4secc_smallpage = {
54 .options = NAND_BBT_SCAN2NDPAGE,
55 .offs = 11,
56 .len = 1,
57 .pattern = scan_ff_pattern,
58};
59
60static struct nand_bbt_descr flctl_4secc_largepage = {
61 .options = 0,
62 .offs = 58,
63 .len = 2,
64 .pattern = scan_ff_pattern,
65};
66
67static void empty_fifo(struct sh_flctl *flctl)
68{
69 writel(0x000c0000, FLINTDMACR(flctl)); /* FIFO Clear */
70 writel(0x00000000, FLINTDMACR(flctl)); /* Clear Error flags */
71}
72
73static void start_translation(struct sh_flctl *flctl)
74{
75 writeb(TRSTRT, FLTRCR(flctl));
76}
77
78static void wait_completion(struct sh_flctl *flctl)
79{
80 uint32_t timeout = LOOP_TIMEOUT_MAX;
81
82 while (timeout--) {
83 if (readb(FLTRCR(flctl)) & TREND) {
84 writeb(0x0, FLTRCR(flctl));
85 return;
86 }
87 udelay(1);
88 }
89
90 printk(KERN_ERR "wait_completion(): Timeout occured \n");
91 writeb(0x0, FLTRCR(flctl));
92}
93
94static void set_addr(struct mtd_info *mtd, int column, int page_addr)
95{
96 struct sh_flctl *flctl = mtd_to_flctl(mtd);
97 uint32_t addr = 0;
98
99 if (column == -1) {
100 addr = page_addr; /* ERASE1 */
101 } else if (page_addr != -1) {
102 /* SEQIN, READ0, etc.. */
103 if (flctl->page_size) {
104 addr = column & 0x0FFF;
105 addr |= (page_addr & 0xff) << 16;
106 addr |= ((page_addr >> 8) & 0xff) << 24;
107 /* big than 128MB */
108 if (flctl->rw_ADRCNT == ADRCNT2_E) {
109 uint32_t addr2;
110 addr2 = (page_addr >> 16) & 0xff;
111 writel(addr2, FLADR2(flctl));
112 }
113 } else {
114 addr = column;
115 addr |= (page_addr & 0xff) << 8;
116 addr |= ((page_addr >> 8) & 0xff) << 16;
117 addr |= ((page_addr >> 16) & 0xff) << 24;
118 }
119 }
120 writel(addr, FLADR(flctl));
121}
122
123static void wait_rfifo_ready(struct sh_flctl *flctl)
124{
125 uint32_t timeout = LOOP_TIMEOUT_MAX;
126
127 while (timeout--) {
128 uint32_t val;
129 /* check FIFO */
130 val = readl(FLDTCNTR(flctl)) >> 16;
131 if (val & 0xFF)
132 return;
133 udelay(1);
134 }
135 printk(KERN_ERR "wait_rfifo_ready(): Timeout occured \n");
136}
137
138static void wait_wfifo_ready(struct sh_flctl *flctl)
139{
140 uint32_t len, timeout = LOOP_TIMEOUT_MAX;
141
142 while (timeout--) {
143 /* check FIFO */
144 len = (readl(FLDTCNTR(flctl)) >> 16) & 0xFF;
145 if (len >= 4)
146 return;
147 udelay(1);
148 }
149 printk(KERN_ERR "wait_wfifo_ready(): Timeout occured \n");
150}
151
152static int wait_recfifo_ready(struct sh_flctl *flctl)
153{
154 uint32_t timeout = LOOP_TIMEOUT_MAX;
155 int checked[4];
156 void __iomem *ecc_reg[4];
157 int i;
158 uint32_t data, size;
159
160 memset(checked, 0, sizeof(checked));
161
162 while (timeout--) {
163 size = readl(FLDTCNTR(flctl)) >> 24;
164 if (size & 0xFF)
165 return 0; /* success */
166
167 if (readl(FL4ECCCR(flctl)) & _4ECCFA)
168 return 1; /* can't correct */
169
170 udelay(1);
171 if (!(readl(FL4ECCCR(flctl)) & _4ECCEND))
172 continue;
173
174 /* start error correction */
175 ecc_reg[0] = FL4ECCRESULT0(flctl);
176 ecc_reg[1] = FL4ECCRESULT1(flctl);
177 ecc_reg[2] = FL4ECCRESULT2(flctl);
178 ecc_reg[3] = FL4ECCRESULT3(flctl);
179
180 for (i = 0; i < 3; i++) {
181 data = readl(ecc_reg[i]);
182 if (data != INIT_FL4ECCRESULT_VAL && !checked[i]) {
183 uint8_t org;
184 int index;
185
186 index = data >> 16;
187 org = flctl->done_buff[index];
188 flctl->done_buff[index] = org ^ (data & 0xFF);
189 checked[i] = 1;
190 }
191 }
192
193 writel(0, FL4ECCCR(flctl));
194 }
195
196 printk(KERN_ERR "wait_recfifo_ready(): Timeout occured \n");
197 return 1; /* timeout */
198}
199
200static void wait_wecfifo_ready(struct sh_flctl *flctl)
201{
202 uint32_t timeout = LOOP_TIMEOUT_MAX;
203 uint32_t len;
204
205 while (timeout--) {
206 /* check FLECFIFO */
207 len = (readl(FLDTCNTR(flctl)) >> 24) & 0xFF;
208 if (len >= 4)
209 return;
210 udelay(1);
211 }
212 printk(KERN_ERR "wait_wecfifo_ready(): Timeout occured \n");
213}
214
215static void read_datareg(struct sh_flctl *flctl, int offset)
216{
217 unsigned long data;
218 unsigned long *buf = (unsigned long *)&flctl->done_buff[offset];
219
220 wait_completion(flctl);
221
222 data = readl(FLDATAR(flctl));
223 *buf = le32_to_cpu(data);
224}
225
226static void read_fiforeg(struct sh_flctl *flctl, int rlen, int offset)
227{
228 int i, len_4align;
229 unsigned long *buf = (unsigned long *)&flctl->done_buff[offset];
230 void *fifo_addr = (void *)FLDTFIFO(flctl);
231
232 len_4align = (rlen + 3) / 4;
233
234 for (i = 0; i < len_4align; i++) {
235 wait_rfifo_ready(flctl);
236 buf[i] = readl(fifo_addr);
237 buf[i] = be32_to_cpu(buf[i]);
238 }
239}
240
241static int read_ecfiforeg(struct sh_flctl *flctl, uint8_t *buff)
242{
243 int i;
244 unsigned long *ecc_buf = (unsigned long *)buff;
245 void *fifo_addr = (void *)FLECFIFO(flctl);
246
247 for (i = 0; i < 4; i++) {
248 if (wait_recfifo_ready(flctl))
249 return 1;
250 ecc_buf[i] = readl(fifo_addr);
251 ecc_buf[i] = be32_to_cpu(ecc_buf[i]);
252 }
253
254 return 0;
255}
256
257static void write_fiforeg(struct sh_flctl *flctl, int rlen, int offset)
258{
259 int i, len_4align;
260 unsigned long *data = (unsigned long *)&flctl->done_buff[offset];
261 void *fifo_addr = (void *)FLDTFIFO(flctl);
262
263 len_4align = (rlen + 3) / 4;
264 for (i = 0; i < len_4align; i++) {
265 wait_wfifo_ready(flctl);
266 writel(cpu_to_be32(data[i]), fifo_addr);
267 }
268}
269
270static void set_cmd_regs(struct mtd_info *mtd, uint32_t cmd, uint32_t flcmcdr_val)
271{
272 struct sh_flctl *flctl = mtd_to_flctl(mtd);
273 uint32_t flcmncr_val = readl(FLCMNCR(flctl));
274 uint32_t flcmdcr_val, addr_len_bytes = 0;
275
276 /* Set SNAND bit if page size is 2048byte */
277 if (flctl->page_size)
278 flcmncr_val |= SNAND_E;
279 else
280 flcmncr_val &= ~SNAND_E;
281
282 /* default FLCMDCR val */
283 flcmdcr_val = DOCMD1_E | DOADR_E;
284
285 /* Set for FLCMDCR */
286 switch (cmd) {
287 case NAND_CMD_ERASE1:
288 addr_len_bytes = flctl->erase_ADRCNT;
289 flcmdcr_val |= DOCMD2_E;
290 break;
291 case NAND_CMD_READ0:
292 case NAND_CMD_READOOB:
293 addr_len_bytes = flctl->rw_ADRCNT;
294 flcmdcr_val |= CDSRC_E;
295 break;
296 case NAND_CMD_SEQIN:
297 /* This case is that cmd is READ0 or READ1 or READ00 */
298 flcmdcr_val &= ~DOADR_E; /* ONLY execute 1st cmd */
299 break;
300 case NAND_CMD_PAGEPROG:
301 addr_len_bytes = flctl->rw_ADRCNT;
302 flcmdcr_val |= DOCMD2_E | CDSRC_E | SELRW;
303 break;
304 case NAND_CMD_READID:
305 flcmncr_val &= ~SNAND_E;
306 addr_len_bytes = ADRCNT_1;
307 break;
308 case NAND_CMD_STATUS:
309 case NAND_CMD_RESET:
310 flcmncr_val &= ~SNAND_E;
311 flcmdcr_val &= ~(DOADR_E | DOSR_E);
312 break;
313 default:
314 break;
315 }
316
317 /* Set address bytes parameter */
318 flcmdcr_val |= addr_len_bytes;
319
320 /* Now actually write */
321 writel(flcmncr_val, FLCMNCR(flctl));
322 writel(flcmdcr_val, FLCMDCR(flctl));
323 writel(flcmcdr_val, FLCMCDR(flctl));
324}
325
326static int flctl_read_page_hwecc(struct mtd_info *mtd, struct nand_chip *chip,
327 uint8_t *buf)
328{
329 int i, eccsize = chip->ecc.size;
330 int eccbytes = chip->ecc.bytes;
331 int eccsteps = chip->ecc.steps;
332 uint8_t *p = buf;
333 struct sh_flctl *flctl = mtd_to_flctl(mtd);
334
335 for (i = 0; eccsteps; eccsteps--, i += eccbytes, p += eccsize)
336 chip->read_buf(mtd, p, eccsize);
337
338 for (i = 0; eccsteps; eccsteps--, i += eccbytes, p += eccsize) {
339 if (flctl->hwecc_cant_correct[i])
340 mtd->ecc_stats.failed++;
341 else
342 mtd->ecc_stats.corrected += 0;
343 }
344
345 return 0;
346}
347
348static void flctl_write_page_hwecc(struct mtd_info *mtd, struct nand_chip *chip,
349 const uint8_t *buf)
350{
351 int i, eccsize = chip->ecc.size;
352 int eccbytes = chip->ecc.bytes;
353 int eccsteps = chip->ecc.steps;
354 const uint8_t *p = buf;
355
356 for (i = 0; eccsteps; eccsteps--, i += eccbytes, p += eccsize)
357 chip->write_buf(mtd, p, eccsize);
358}
359
360static void execmd_read_page_sector(struct mtd_info *mtd, int page_addr)
361{
362 struct sh_flctl *flctl = mtd_to_flctl(mtd);
363 int sector, page_sectors;
364
365 if (flctl->page_size)
366 page_sectors = 4;
367 else
368 page_sectors = 1;
369
370 writel(readl(FLCMNCR(flctl)) | ACM_SACCES_MODE | _4ECCCORRECT,
371 FLCMNCR(flctl));
372
373 set_cmd_regs(mtd, NAND_CMD_READ0,
374 (NAND_CMD_READSTART << 8) | NAND_CMD_READ0);
375
376 for (sector = 0; sector < page_sectors; sector++) {
377 int ret;
378
379 empty_fifo(flctl);
380 writel(readl(FLCMDCR(flctl)) | 1, FLCMDCR(flctl));
381 writel(page_addr << 2 | sector, FLADR(flctl));
382
383 start_translation(flctl);
384 read_fiforeg(flctl, 512, 512 * sector);
385
386 ret = read_ecfiforeg(flctl,
387 &flctl->done_buff[mtd->writesize + 16 * sector]);
388
389 if (ret)
390 flctl->hwecc_cant_correct[sector] = 1;
391
392 writel(0x0, FL4ECCCR(flctl));
393 wait_completion(flctl);
394 }
395 writel(readl(FLCMNCR(flctl)) & ~(ACM_SACCES_MODE | _4ECCCORRECT),
396 FLCMNCR(flctl));
397}
398
399static void execmd_read_oob(struct mtd_info *mtd, int page_addr)
400{
401 struct sh_flctl *flctl = mtd_to_flctl(mtd);
402
403 set_cmd_regs(mtd, NAND_CMD_READ0,
404 (NAND_CMD_READSTART << 8) | NAND_CMD_READ0);
405
406 empty_fifo(flctl);
407 if (flctl->page_size) {
408 int i;
409 /* In case that the page size is 2k */
410 for (i = 0; i < 16 * 3; i++)
411 flctl->done_buff[i] = 0xFF;
412
413 set_addr(mtd, 3 * 528 + 512, page_addr);
414 writel(16, FLDTCNTR(flctl));
415
416 start_translation(flctl);
417 read_fiforeg(flctl, 16, 16 * 3);
418 wait_completion(flctl);
419 } else {
420 /* In case that the page size is 512b */
421 set_addr(mtd, 512, page_addr);
422 writel(16, FLDTCNTR(flctl));
423
424 start_translation(flctl);
425 read_fiforeg(flctl, 16, 0);
426 wait_completion(flctl);
427 }
428}
429
430static void execmd_write_page_sector(struct mtd_info *mtd)
431{
432 struct sh_flctl *flctl = mtd_to_flctl(mtd);
433 int i, page_addr = flctl->seqin_page_addr;
434 int sector, page_sectors;
435
436 if (flctl->page_size)
437 page_sectors = 4;
438 else
439 page_sectors = 1;
440
441 writel(readl(FLCMNCR(flctl)) | ACM_SACCES_MODE, FLCMNCR(flctl));
442
443 set_cmd_regs(mtd, NAND_CMD_PAGEPROG,
444 (NAND_CMD_PAGEPROG << 8) | NAND_CMD_SEQIN);
445
446 for (sector = 0; sector < page_sectors; sector++) {
447 empty_fifo(flctl);
448 writel(readl(FLCMDCR(flctl)) | 1, FLCMDCR(flctl));
449 writel(page_addr << 2 | sector, FLADR(flctl));
450
451 start_translation(flctl);
452 write_fiforeg(flctl, 512, 512 * sector);
453
454 for (i = 0; i < 4; i++) {
455 wait_wecfifo_ready(flctl); /* wait for write ready */
456 writel(0xFFFFFFFF, FLECFIFO(flctl));
457 }
458 wait_completion(flctl);
459 }
460
461 writel(readl(FLCMNCR(flctl)) & ~ACM_SACCES_MODE, FLCMNCR(flctl));
462}
463
464static void execmd_write_oob(struct mtd_info *mtd)
465{
466 struct sh_flctl *flctl = mtd_to_flctl(mtd);
467 int page_addr = flctl->seqin_page_addr;
468 int sector, page_sectors;
469
470 if (flctl->page_size) {
471 sector = 3;
472 page_sectors = 4;
473 } else {
474 sector = 0;
475 page_sectors = 1;
476 }
477
478 set_cmd_regs(mtd, NAND_CMD_PAGEPROG,
479 (NAND_CMD_PAGEPROG << 8) | NAND_CMD_SEQIN);
480
481 for (; sector < page_sectors; sector++) {
482 empty_fifo(flctl);
483 set_addr(mtd, sector * 528 + 512, page_addr);
484 writel(16, FLDTCNTR(flctl)); /* set read size */
485
486 start_translation(flctl);
487 write_fiforeg(flctl, 16, 16 * sector);
488 wait_completion(flctl);
489 }
490}
491
492static void flctl_cmdfunc(struct mtd_info *mtd, unsigned int command,
493 int column, int page_addr)
494{
495 struct sh_flctl *flctl = mtd_to_flctl(mtd);
496 uint32_t read_cmd = 0;
497
498 flctl->read_bytes = 0;
499 if (command != NAND_CMD_PAGEPROG)
500 flctl->index = 0;
501
502 switch (command) {
503 case NAND_CMD_READ1:
504 case NAND_CMD_READ0:
505 if (flctl->hwecc) {
506 /* read page with hwecc */
507 execmd_read_page_sector(mtd, page_addr);
508 break;
509 }
510 empty_fifo(flctl);
511 if (flctl->page_size)
512 set_cmd_regs(mtd, command, (NAND_CMD_READSTART << 8)
513 | command);
514 else
515 set_cmd_regs(mtd, command, command);
516
517 set_addr(mtd, 0, page_addr);
518
519 flctl->read_bytes = mtd->writesize + mtd->oobsize;
520 flctl->index += column;
521 goto read_normal_exit;
522
523 case NAND_CMD_READOOB:
524 if (flctl->hwecc) {
525 /* read page with hwecc */
526 execmd_read_oob(mtd, page_addr);
527 break;
528 }
529
530 empty_fifo(flctl);
531 if (flctl->page_size) {
532 set_cmd_regs(mtd, command, (NAND_CMD_READSTART << 8)
533 | NAND_CMD_READ0);
534 set_addr(mtd, mtd->writesize, page_addr);
535 } else {
536 set_cmd_regs(mtd, command, command);
537 set_addr(mtd, 0, page_addr);
538 }
539 flctl->read_bytes = mtd->oobsize;
540 goto read_normal_exit;
541
542 case NAND_CMD_READID:
543 empty_fifo(flctl);
544 set_cmd_regs(mtd, command, command);
545 set_addr(mtd, 0, 0);
546
547 flctl->read_bytes = 4;
548 writel(flctl->read_bytes, FLDTCNTR(flctl)); /* set read size */
549 start_translation(flctl);
550 read_datareg(flctl, 0); /* read and end */
551 break;
552
553 case NAND_CMD_ERASE1:
554 flctl->erase1_page_addr = page_addr;
555 break;
556
557 case NAND_CMD_ERASE2:
558 set_cmd_regs(mtd, NAND_CMD_ERASE1,
559 (command << 8) | NAND_CMD_ERASE1);
560 set_addr(mtd, -1, flctl->erase1_page_addr);
561 start_translation(flctl);
562 wait_completion(flctl);
563 break;
564
565 case NAND_CMD_SEQIN:
566 if (!flctl->page_size) {
567 /* output read command */
568 if (column >= mtd->writesize) {
569 column -= mtd->writesize;
570 read_cmd = NAND_CMD_READOOB;
571 } else if (column < 256) {
572 read_cmd = NAND_CMD_READ0;
573 } else {
574 column -= 256;
575 read_cmd = NAND_CMD_READ1;
576 }
577 }
578 flctl->seqin_column = column;
579 flctl->seqin_page_addr = page_addr;
580 flctl->seqin_read_cmd = read_cmd;
581 break;
582
583 case NAND_CMD_PAGEPROG:
584 empty_fifo(flctl);
585 if (!flctl->page_size) {
586 set_cmd_regs(mtd, NAND_CMD_SEQIN,
587 flctl->seqin_read_cmd);
588 set_addr(mtd, -1, -1);
589 writel(0, FLDTCNTR(flctl)); /* set 0 size */
590 start_translation(flctl);
591 wait_completion(flctl);
592 }
593 if (flctl->hwecc) {
594 /* write page with hwecc */
595 if (flctl->seqin_column == mtd->writesize)
596 execmd_write_oob(mtd);
597 else if (!flctl->seqin_column)
598 execmd_write_page_sector(mtd);
599 else
600 printk(KERN_ERR "Invalid address !?\n");
601 break;
602 }
603 set_cmd_regs(mtd, command, (command << 8) | NAND_CMD_SEQIN);
604 set_addr(mtd, flctl->seqin_column, flctl->seqin_page_addr);
605 writel(flctl->index, FLDTCNTR(flctl)); /* set write size */
606 start_translation(flctl);
607 write_fiforeg(flctl, flctl->index, 0);
608 wait_completion(flctl);
609 break;
610
611 case NAND_CMD_STATUS:
612 set_cmd_regs(mtd, command, command);
613 set_addr(mtd, -1, -1);
614
615 flctl->read_bytes = 1;
616 writel(flctl->read_bytes, FLDTCNTR(flctl)); /* set read size */
617 start_translation(flctl);
618 read_datareg(flctl, 0); /* read and end */
619 break;
620
621 case NAND_CMD_RESET:
622 set_cmd_regs(mtd, command, command);
623 set_addr(mtd, -1, -1);
624
625 writel(0, FLDTCNTR(flctl)); /* set 0 size */
626 start_translation(flctl);
627 wait_completion(flctl);
628 break;
629
630 default:
631 break;
632 }
633 return;
634
635read_normal_exit:
636 writel(flctl->read_bytes, FLDTCNTR(flctl)); /* set read size */
637 start_translation(flctl);
638 read_fiforeg(flctl, flctl->read_bytes, 0);
639 wait_completion(flctl);
640 return;
641}
642
643static void flctl_select_chip(struct mtd_info *mtd, int chipnr)
644{
645 struct sh_flctl *flctl = mtd_to_flctl(mtd);
646 uint32_t flcmncr_val = readl(FLCMNCR(flctl));
647
648 switch (chipnr) {
649 case -1:
650 flcmncr_val &= ~CE0_ENABLE;
651 writel(flcmncr_val, FLCMNCR(flctl));
652 break;
653 case 0:
654 flcmncr_val |= CE0_ENABLE;
655 writel(flcmncr_val, FLCMNCR(flctl));
656 break;
657 default:
658 BUG();
659 }
660}
661
662static void flctl_write_buf(struct mtd_info *mtd, const uint8_t *buf, int len)
663{
664 struct sh_flctl *flctl = mtd_to_flctl(mtd);
665 int i, index = flctl->index;
666
667 for (i = 0; i < len; i++)
668 flctl->done_buff[index + i] = buf[i];
669 flctl->index += len;
670}
671
672static uint8_t flctl_read_byte(struct mtd_info *mtd)
673{
674 struct sh_flctl *flctl = mtd_to_flctl(mtd);
675 int index = flctl->index;
676 uint8_t data;
677
678 data = flctl->done_buff[index];
679 flctl->index++;
680 return data;
681}
682
683static void flctl_read_buf(struct mtd_info *mtd, uint8_t *buf, int len)
684{
685 int i;
686
687 for (i = 0; i < len; i++)
688 buf[i] = flctl_read_byte(mtd);
689}
690
691static int flctl_verify_buf(struct mtd_info *mtd, const u_char *buf, int len)
692{
693 int i;
694
695 for (i = 0; i < len; i++)
696 if (buf[i] != flctl_read_byte(mtd))
697 return -EFAULT;
698 return 0;
699}
700
701static void flctl_register_init(struct sh_flctl *flctl, unsigned long val)
702{
703 writel(val, FLCMNCR(flctl));
704}
705
706static int flctl_chip_init_tail(struct mtd_info *mtd)
707{
708 struct sh_flctl *flctl = mtd_to_flctl(mtd);
709 struct nand_chip *chip = &flctl->chip;
710
711 if (mtd->writesize == 512) {
712 flctl->page_size = 0;
713 if (chip->chipsize > (32 << 20)) {
714 /* big than 32MB */
715 flctl->rw_ADRCNT = ADRCNT_4;
716 flctl->erase_ADRCNT = ADRCNT_3;
717 } else if (chip->chipsize > (2 << 16)) {
718 /* big than 128KB */
719 flctl->rw_ADRCNT = ADRCNT_3;
720 flctl->erase_ADRCNT = ADRCNT_2;
721 } else {
722 flctl->rw_ADRCNT = ADRCNT_2;
723 flctl->erase_ADRCNT = ADRCNT_1;
724 }
725 } else {
726 flctl->page_size = 1;
727 if (chip->chipsize > (128 << 20)) {
728 /* big than 128MB */
729 flctl->rw_ADRCNT = ADRCNT2_E;
730 flctl->erase_ADRCNT = ADRCNT_3;
731 } else if (chip->chipsize > (8 << 16)) {
732 /* big than 512KB */
733 flctl->rw_ADRCNT = ADRCNT_4;
734 flctl->erase_ADRCNT = ADRCNT_2;
735 } else {
736 flctl->rw_ADRCNT = ADRCNT_3;
737 flctl->erase_ADRCNT = ADRCNT_1;
738 }
739 }
740
741 if (flctl->hwecc) {
742 if (mtd->writesize == 512) {
743 chip->ecc.layout = &flctl_4secc_oob_16;
744 chip->badblock_pattern = &flctl_4secc_smallpage;
745 } else {
746 chip->ecc.layout = &flctl_4secc_oob_64;
747 chip->badblock_pattern = &flctl_4secc_largepage;
748 }
749
750 chip->ecc.size = 512;
751 chip->ecc.bytes = 10;
752 chip->ecc.read_page = flctl_read_page_hwecc;
753 chip->ecc.write_page = flctl_write_page_hwecc;
754 chip->ecc.mode = NAND_ECC_HW;
755
756 /* 4 symbols ECC enabled */
757 writel(readl(FLCMNCR(flctl)) | _4ECCEN | ECCPOS2 | ECCPOS_02,
758 FLCMNCR(flctl));
759 } else {
760 chip->ecc.mode = NAND_ECC_SOFT;
761 }
762
763 return 0;
764}
765
766static int __init flctl_probe(struct platform_device *pdev)
767{
768 struct resource *res;
769 struct sh_flctl *flctl;
770 struct mtd_info *flctl_mtd;
771 struct nand_chip *nand;
772 struct sh_flctl_platform_data *pdata;
773 int ret;
774
775 pdata = pdev->dev.platform_data;
776 if (pdata == NULL) {
777 printk(KERN_ERR "sh_flctl platform_data not found.\n");
778 return -ENODEV;
779 }
780
781 flctl = kzalloc(sizeof(struct sh_flctl), GFP_KERNEL);
782 if (!flctl) {
783 printk(KERN_ERR "Unable to allocate NAND MTD dev structure.\n");
784 return -ENOMEM;
785 }
786
787 res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
788 if (!res) {
789 printk(KERN_ERR "%s: resource not found.\n", __func__);
790 ret = -ENODEV;
791 goto err;
792 }
793
794 flctl->reg = ioremap(res->start, res->end - res->start + 1);
795 if (flctl->reg == NULL) {
796 printk(KERN_ERR "%s: ioremap error.\n", __func__);
797 ret = -ENOMEM;
798 goto err;
799 }
800
801 platform_set_drvdata(pdev, flctl);
802 flctl_mtd = &flctl->mtd;
803 nand = &flctl->chip;
804 flctl_mtd->priv = nand;
805 flctl->hwecc = pdata->has_hwecc;
806
807 flctl_register_init(flctl, pdata->flcmncr_val);
808
809 nand->options = NAND_NO_AUTOINCR;
810
811 /* Set address of hardware control function */
812 /* 20 us command delay time */
813 nand->chip_delay = 20;
814
815 nand->read_byte = flctl_read_byte;
816 nand->write_buf = flctl_write_buf;
817 nand->read_buf = flctl_read_buf;
818 nand->verify_buf = flctl_verify_buf;
819 nand->select_chip = flctl_select_chip;
820 nand->cmdfunc = flctl_cmdfunc;
821
822 ret = nand_scan_ident(flctl_mtd, 1);
823 if (ret)
824 goto err;
825
826 ret = flctl_chip_init_tail(flctl_mtd);
827 if (ret)
828 goto err;
829
830 ret = nand_scan_tail(flctl_mtd);
831 if (ret)
832 goto err;
833
834 add_mtd_partitions(flctl_mtd, pdata->parts, pdata->nr_parts);
835
836 return 0;
837
838err:
839 kfree(flctl);
840 return ret;
841}
842
843static int __exit flctl_remove(struct platform_device *pdev)
844{
845 struct sh_flctl *flctl = platform_get_drvdata(pdev);
846
847 nand_release(&flctl->mtd);
848 kfree(flctl);
849
850 return 0;
851}
852
853static struct platform_driver flctl_driver = {
854 .probe = flctl_probe,
855 .remove = flctl_remove,
856 .driver = {
857 .name = "sh_flctl",
858 .owner = THIS_MODULE,
859 },
860};
861
862static int __init flctl_nand_init(void)
863{
864 return platform_driver_register(&flctl_driver);
865}
866
867static void __exit flctl_nand_cleanup(void)
868{
869 platform_driver_unregister(&flctl_driver);
870}
871
872module_init(flctl_nand_init);
873module_exit(flctl_nand_cleanup);
874
875MODULE_LICENSE("GPL");
876MODULE_AUTHOR("Yoshihiro Shimoda");
877MODULE_DESCRIPTION("SuperH FLCTL driver");
878MODULE_ALIAS("platform:sh_flctl");
diff --git a/drivers/mtd/nand/toto.c b/drivers/mtd/nand/toto.c
deleted file mode 100644
index bbf492e6830d..000000000000
--- a/drivers/mtd/nand/toto.c
+++ /dev/null
@@ -1,206 +0,0 @@
1/*
2 * drivers/mtd/nand/toto.c
3 *
4 * Copyright (c) 2003 Texas Instruments
5 *
6 * Derived from drivers/mtd/autcpu12.c
7 *
8 * Copyright (c) 2002 Thomas Gleixner <tgxl@linutronix.de>
9 *
10 * This program is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU General Public License version 2 as
12 * published by the Free Software Foundation.
13 *
14 * Overview:
15 * This is a device driver for the NAND flash device found on the
16 * TI fido board. It supports 32MiB and 64MiB cards
17 */
18
19#include <linux/slab.h>
20#include <linux/init.h>
21#include <linux/module.h>
22#include <linux/delay.h>
23#include <linux/mtd/mtd.h>
24#include <linux/mtd/nand.h>
25#include <linux/mtd/partitions.h>
26#include <asm/io.h>
27#include <asm/arch/hardware.h>
28#include <asm/sizes.h>
29#include <asm/arch/toto.h>
30#include <asm/arch-omap1510/hardware.h>
31#include <asm/arch/gpio.h>
32
33#define CONFIG_NAND_WORKAROUND 1
34
35/*
36 * MTD structure for TOTO board
37 */
38static struct mtd_info *toto_mtd = NULL;
39
40static unsigned long toto_io_base = OMAP_FLASH_1_BASE;
41
42/*
43 * Define partitions for flash devices
44 */
45
46static struct mtd_partition partition_info64M[] = {
47 { .name = "toto kernel partition 1",
48 .offset = 0,
49 .size = 2 * SZ_1M },
50 { .name = "toto file sys partition 2",
51 .offset = 2 * SZ_1M,
52 .size = 14 * SZ_1M },
53 { .name = "toto user partition 3",
54 .offset = 16 * SZ_1M,
55 .size = 16 * SZ_1M },
56 { .name = "toto devboard extra partition 4",
57 .offset = 32 * SZ_1M,
58 .size = 32 * SZ_1M },
59};
60
61static struct mtd_partition partition_info32M[] = {
62 { .name = "toto kernel partition 1",
63 .offset = 0,
64 .size = 2 * SZ_1M },
65 { .name = "toto file sys partition 2",
66 .offset = 2 * SZ_1M,
67 .size = 14 * SZ_1M },
68 { .name = "toto user partition 3",
69 .offset = 16 * SZ_1M,
70 .size = 16 * SZ_1M },
71};
72
73#define NUM_PARTITIONS32M 3
74#define NUM_PARTITIONS64M 4
75
76/*
77 * hardware specific access to control-lines
78 *
79 * ctrl:
80 * NAND_NCE: bit 0 -> bit 14 (0x4000)
81 * NAND_CLE: bit 1 -> bit 12 (0x1000)
82 * NAND_ALE: bit 2 -> bit 1 (0x0002)
83 */
84static void toto_hwcontrol(struct mtd_info *mtd, int cmd,
85 unsigned int ctrl)
86{
87 struct nand_chip *chip = mtd->priv;
88
89 if (ctrl & NAND_CTRL_CHANGE) {
90 unsigned long bits;
91
92 /* hopefully enough time for tc make proceding write to clear */
93 udelay(1);
94
95 bits = (~ctrl & NAND_NCE) << 14;
96 bits |= (ctrl & NAND_CLE) << 12;
97 bits |= (ctrl & NAND_ALE) >> 1;
98
99#warning Wild guess as gpiosetout() is nowhere defined in the kernel source - tglx
100 gpiosetout(0x5002, bits);
101
102#ifdef CONFIG_NAND_WORKAROUND
103 /* "some" dev boards busted, blue wired to rts2 :( */
104 rts2setout(2, (ctrl & NAND_CLE) << 1);
105#endif
106 /* allow time to ensure gpio state to over take memory write */
107 udelay(1);
108 }
109
110 if (cmd != NAND_CMD_NONE)
111 writeb(cmd, chip->IO_ADDR_W);
112}
113
114/*
115 * Main initialization routine
116 */
117static int __init toto_init(void)
118{
119 struct nand_chip *this;
120 int err = 0;
121
122 /* Allocate memory for MTD device structure and private data */
123 toto_mtd = kmalloc(sizeof(struct mtd_info) + sizeof(struct nand_chip), GFP_KERNEL);
124 if (!toto_mtd) {
125 printk(KERN_WARNING "Unable to allocate toto NAND MTD device structure.\n");
126 err = -ENOMEM;
127 goto out;
128 }
129
130 /* Get pointer to private data */
131 this = (struct nand_chip *)(&toto_mtd[1]);
132
133 /* Initialize structures */
134 memset(toto_mtd, 0, sizeof(struct mtd_info));
135 memset(this, 0, sizeof(struct nand_chip));
136
137 /* Link the private data with the MTD structure */
138 toto_mtd->priv = this;
139 toto_mtd->owner = THIS_MODULE;
140
141 /* Set address of NAND IO lines */
142 this->IO_ADDR_R = toto_io_base;
143 this->IO_ADDR_W = toto_io_base;
144 this->cmd_ctrl = toto_hwcontrol;
145 this->dev_ready = NULL;
146 /* 25 us command delay time */
147 this->chip_delay = 30;
148 this->ecc.mode = NAND_ECC_SOFT;
149
150 /* Scan to find existance of the device */
151 if (nand_scan(toto_mtd, 1)) {
152 err = -ENXIO;
153 goto out_mtd;
154 }
155
156 /* Register the partitions */
157 switch (toto_mtd->size) {
158 case SZ_64M:
159 add_mtd_partitions(toto_mtd, partition_info64M, NUM_PARTITIONS64M);
160 break;
161 case SZ_32M:
162 add_mtd_partitions(toto_mtd, partition_info32M, NUM_PARTITIONS32M);
163 break;
164 default:{
165 printk(KERN_WARNING "Unsupported Nand device\n");
166 err = -ENXIO;
167 goto out_buf;
168 }
169 }
170
171 gpioreserve(NAND_MASK); /* claim our gpios */
172 archflashwp(0, 0); /* open up flash for writing */
173
174 goto out;
175
176 out_mtd:
177 kfree(toto_mtd);
178 out:
179 return err;
180}
181
182module_init(toto_init);
183
184/*
185 * Clean up routine
186 */
187static void __exit toto_cleanup(void)
188{
189 /* Release resources, unregister device */
190 nand_release(toto_mtd);
191
192 /* Free the MTD device structure */
193 kfree(toto_mtd);
194
195 /* stop flash writes */
196 archflashwp(0, 1);
197
198 /* release gpios to system */
199 gpiorelease(NAND_MASK);
200}
201
202module_exit(toto_cleanup);
203
204MODULE_LICENSE("GPL");
205MODULE_AUTHOR("Richard Woodruff <r-woodruff2@ti.com>");
206MODULE_DESCRIPTION("Glue layer for NAND flash on toto board");
diff --git a/drivers/mtd/ofpart.c b/drivers/mtd/ofpart.c
index 4f80c2fd89af..9e45b3f39c0e 100644
--- a/drivers/mtd/ofpart.c
+++ b/drivers/mtd/ofpart.c
@@ -20,7 +20,6 @@
20#include <linux/mtd/partitions.h> 20#include <linux/mtd/partitions.h>
21 21
22int __devinit of_mtd_parse_partitions(struct device *dev, 22int __devinit of_mtd_parse_partitions(struct device *dev,
23 struct mtd_info *mtd,
24 struct device_node *node, 23 struct device_node *node,
25 struct mtd_partition **pparts) 24 struct mtd_partition **pparts)
26{ 25{
diff --git a/drivers/mtd/onenand/Kconfig b/drivers/mtd/onenand/Kconfig
index cb41cbca64f7..79fa79e8f8de 100644
--- a/drivers/mtd/onenand/Kconfig
+++ b/drivers/mtd/onenand/Kconfig
@@ -27,8 +27,16 @@ config MTD_ONENAND_GENERIC
27 help 27 help
28 Support for OneNAND flash via platform device driver. 28 Support for OneNAND flash via platform device driver.
29 29
30config MTD_ONENAND_OMAP2
31 tristate "OneNAND on OMAP2/OMAP3 support"
32 depends on MTD_ONENAND && (ARCH_OMAP2 || ARCH_OMAP3)
33 help
34 Support for a OneNAND flash device connected to an OMAP2/OMAP3 CPU
35 via the GPMC memory controller.
36
30config MTD_ONENAND_OTP 37config MTD_ONENAND_OTP
31 bool "OneNAND OTP Support" 38 bool "OneNAND OTP Support"
39 select HAVE_MTD_OTP
32 help 40 help
33 One Block of the NAND Flash Array memory is reserved as 41 One Block of the NAND Flash Array memory is reserved as
34 a One-Time Programmable Block memory area. 42 a One-Time Programmable Block memory area.
diff --git a/drivers/mtd/onenand/Makefile b/drivers/mtd/onenand/Makefile
index 4d2eacfd7e11..64b6cc61a520 100644
--- a/drivers/mtd/onenand/Makefile
+++ b/drivers/mtd/onenand/Makefile
@@ -7,6 +7,7 @@ obj-$(CONFIG_MTD_ONENAND) += onenand.o
7 7
8# Board specific. 8# Board specific.
9obj-$(CONFIG_MTD_ONENAND_GENERIC) += generic.o 9obj-$(CONFIG_MTD_ONENAND_GENERIC) += generic.o
10obj-$(CONFIG_MTD_ONENAND_OMAP2) += omap2.o
10 11
11# Simulator 12# Simulator
12obj-$(CONFIG_MTD_ONENAND_SIM) += onenand_sim.o 13obj-$(CONFIG_MTD_ONENAND_SIM) += onenand_sim.o
diff --git a/drivers/mtd/onenand/omap2.c b/drivers/mtd/onenand/omap2.c
new file mode 100644
index 000000000000..8387e05daae2
--- /dev/null
+++ b/drivers/mtd/onenand/omap2.c
@@ -0,0 +1,802 @@
1/*
2 * linux/drivers/mtd/onenand/omap2.c
3 *
4 * OneNAND driver for OMAP2 / OMAP3
5 *
6 * Copyright © 2005-2006 Nokia Corporation
7 *
8 * Author: Jarkko Lavinen <jarkko.lavinen@nokia.com> and Juha Yrjölä
9 * IRQ and DMA support written by Timo Teras
10 *
11 * This program is free software; you can redistribute it and/or modify it
12 * under the terms of the GNU General Public License version 2 as published by
13 * the Free Software Foundation.
14 *
15 * This program is distributed in the hope that it will be useful, but WITHOUT
16 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
17 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
18 * more details.
19 *
20 * You should have received a copy of the GNU General Public License along with
21 * this program; see the file COPYING. If not, write to the Free Software
22 * Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
23 *
24 */
25
26#include <linux/device.h>
27#include <linux/module.h>
28#include <linux/init.h>
29#include <linux/mtd/mtd.h>
30#include <linux/mtd/onenand.h>
31#include <linux/mtd/partitions.h>
32#include <linux/platform_device.h>
33#include <linux/interrupt.h>
34#include <linux/delay.h>
35
36#include <asm/io.h>
37#include <asm/mach/flash.h>
38#include <asm/arch/gpmc.h>
39#include <asm/arch/onenand.h>
40#include <asm/arch/gpio.h>
41#include <asm/arch/gpmc.h>
42#include <asm/arch/pm.h>
43
44#include <linux/dma-mapping.h>
45#include <asm/dma-mapping.h>
46#include <asm/arch/dma.h>
47
48#include <asm/arch/board.h>
49
50#define DRIVER_NAME "omap2-onenand"
51
52#define ONENAND_IO_SIZE SZ_128K
53#define ONENAND_BUFRAM_SIZE (1024 * 5)
54
55struct omap2_onenand {
56 struct platform_device *pdev;
57 int gpmc_cs;
58 unsigned long phys_base;
59 int gpio_irq;
60 struct mtd_info mtd;
61 struct mtd_partition *parts;
62 struct onenand_chip onenand;
63 struct completion irq_done;
64 struct completion dma_done;
65 int dma_channel;
66 int freq;
67 int (*setup)(void __iomem *base, int freq);
68};
69
70static void omap2_onenand_dma_cb(int lch, u16 ch_status, void *data)
71{
72 struct omap2_onenand *c = data;
73
74 complete(&c->dma_done);
75}
76
77static irqreturn_t omap2_onenand_interrupt(int irq, void *dev_id)
78{
79 struct omap2_onenand *c = dev_id;
80
81 complete(&c->irq_done);
82
83 return IRQ_HANDLED;
84}
85
86static inline unsigned short read_reg(struct omap2_onenand *c, int reg)
87{
88 return readw(c->onenand.base + reg);
89}
90
91static inline void write_reg(struct omap2_onenand *c, unsigned short value,
92 int reg)
93{
94 writew(value, c->onenand.base + reg);
95}
96
97static void wait_err(char *msg, int state, unsigned int ctrl, unsigned int intr)
98{
99 printk(KERN_ERR "onenand_wait: %s! state %d ctrl 0x%04x intr 0x%04x\n",
100 msg, state, ctrl, intr);
101}
102
103static void wait_warn(char *msg, int state, unsigned int ctrl,
104 unsigned int intr)
105{
106 printk(KERN_WARNING "onenand_wait: %s! state %d ctrl 0x%04x "
107 "intr 0x%04x\n", msg, state, ctrl, intr);
108}
109
110static int omap2_onenand_wait(struct mtd_info *mtd, int state)
111{
112 struct omap2_onenand *c = container_of(mtd, struct omap2_onenand, mtd);
113 unsigned int intr = 0;
114 unsigned int ctrl;
115 unsigned long timeout;
116 u32 syscfg;
117
118 if (state == FL_RESETING) {
119 int i;
120
121 for (i = 0; i < 20; i++) {
122 udelay(1);
123 intr = read_reg(c, ONENAND_REG_INTERRUPT);
124 if (intr & ONENAND_INT_MASTER)
125 break;
126 }
127 ctrl = read_reg(c, ONENAND_REG_CTRL_STATUS);
128 if (ctrl & ONENAND_CTRL_ERROR) {
129 wait_err("controller error", state, ctrl, intr);
130 return -EIO;
131 }
132 if (!(intr & ONENAND_INT_RESET)) {
133 wait_err("timeout", state, ctrl, intr);
134 return -EIO;
135 }
136 return 0;
137 }
138
139 if (state != FL_READING) {
140 int result;
141
142 /* Turn interrupts on */
143 syscfg = read_reg(c, ONENAND_REG_SYS_CFG1);
144 if (!(syscfg & ONENAND_SYS_CFG1_IOBE)) {
145 syscfg |= ONENAND_SYS_CFG1_IOBE;
146 write_reg(c, syscfg, ONENAND_REG_SYS_CFG1);
147 if (cpu_is_omap34xx())
148 /* Add a delay to let GPIO settle */
149 syscfg = read_reg(c, ONENAND_REG_SYS_CFG1);
150 }
151
152 INIT_COMPLETION(c->irq_done);
153 if (c->gpio_irq) {
154 result = omap_get_gpio_datain(c->gpio_irq);
155 if (result == -1) {
156 ctrl = read_reg(c, ONENAND_REG_CTRL_STATUS);
157 intr = read_reg(c, ONENAND_REG_INTERRUPT);
158 wait_err("gpio error", state, ctrl, intr);
159 return -EIO;
160 }
161 } else
162 result = 0;
163 if (result == 0) {
164 int retry_cnt = 0;
165retry:
166 result = wait_for_completion_timeout(&c->irq_done,
167 msecs_to_jiffies(20));
168 if (result == 0) {
169 /* Timeout after 20ms */
170 ctrl = read_reg(c, ONENAND_REG_CTRL_STATUS);
171 if (ctrl & ONENAND_CTRL_ONGO) {
172 /*
173 * The operation seems to be still going
174 * so give it some more time.
175 */
176 retry_cnt += 1;
177 if (retry_cnt < 3)
178 goto retry;
179 intr = read_reg(c,
180 ONENAND_REG_INTERRUPT);
181 wait_err("timeout", state, ctrl, intr);
182 return -EIO;
183 }
184 intr = read_reg(c, ONENAND_REG_INTERRUPT);
185 if ((intr & ONENAND_INT_MASTER) == 0)
186 wait_warn("timeout", state, ctrl, intr);
187 }
188 }
189 } else {
190 int retry_cnt = 0;
191
192 /* Turn interrupts off */
193 syscfg = read_reg(c, ONENAND_REG_SYS_CFG1);
194 syscfg &= ~ONENAND_SYS_CFG1_IOBE;
195 write_reg(c, syscfg, ONENAND_REG_SYS_CFG1);
196
197 timeout = jiffies + msecs_to_jiffies(20);
198 while (1) {
199 if (time_before(jiffies, timeout)) {
200 intr = read_reg(c, ONENAND_REG_INTERRUPT);
201 if (intr & ONENAND_INT_MASTER)
202 break;
203 } else {
204 /* Timeout after 20ms */
205 ctrl = read_reg(c, ONENAND_REG_CTRL_STATUS);
206 if (ctrl & ONENAND_CTRL_ONGO) {
207 /*
208 * The operation seems to be still going
209 * so give it some more time.
210 */
211 retry_cnt += 1;
212 if (retry_cnt < 3) {
213 timeout = jiffies +
214 msecs_to_jiffies(20);
215 continue;
216 }
217 }
218 break;
219 }
220 }
221 }
222
223 intr = read_reg(c, ONENAND_REG_INTERRUPT);
224 ctrl = read_reg(c, ONENAND_REG_CTRL_STATUS);
225
226 if (intr & ONENAND_INT_READ) {
227 int ecc = read_reg(c, ONENAND_REG_ECC_STATUS);
228
229 if (ecc) {
230 unsigned int addr1, addr8;
231
232 addr1 = read_reg(c, ONENAND_REG_START_ADDRESS1);
233 addr8 = read_reg(c, ONENAND_REG_START_ADDRESS8);
234 if (ecc & ONENAND_ECC_2BIT_ALL) {
235 printk(KERN_ERR "onenand_wait: ECC error = "
236 "0x%04x, addr1 %#x, addr8 %#x\n",
237 ecc, addr1, addr8);
238 mtd->ecc_stats.failed++;
239 return -EBADMSG;
240 } else if (ecc & ONENAND_ECC_1BIT_ALL) {
241 printk(KERN_NOTICE "onenand_wait: correctable "
242 "ECC error = 0x%04x, addr1 %#x, "
243 "addr8 %#x\n", ecc, addr1, addr8);
244 mtd->ecc_stats.corrected++;
245 }
246 }
247 } else if (state == FL_READING) {
248 wait_err("timeout", state, ctrl, intr);
249 return -EIO;
250 }
251
252 if (ctrl & ONENAND_CTRL_ERROR) {
253 wait_err("controller error", state, ctrl, intr);
254 if (ctrl & ONENAND_CTRL_LOCK)
255 printk(KERN_ERR "onenand_wait: "
256 "Device is write protected!!!\n");
257 return -EIO;
258 }
259
260 if (ctrl & 0xFE9F)
261 wait_warn("unexpected controller status", state, ctrl, intr);
262
263 return 0;
264}
265
266static inline int omap2_onenand_bufferram_offset(struct mtd_info *mtd, int area)
267{
268 struct onenand_chip *this = mtd->priv;
269
270 if (ONENAND_CURRENT_BUFFERRAM(this)) {
271 if (area == ONENAND_DATARAM)
272 return mtd->writesize;
273 if (area == ONENAND_SPARERAM)
274 return mtd->oobsize;
275 }
276
277 return 0;
278}
279
280#if defined(CONFIG_ARCH_OMAP3) || defined(MULTI_OMAP2)
281
282static int omap3_onenand_read_bufferram(struct mtd_info *mtd, int area,
283 unsigned char *buffer, int offset,
284 size_t count)
285{
286 struct omap2_onenand *c = container_of(mtd, struct omap2_onenand, mtd);
287 struct onenand_chip *this = mtd->priv;
288 dma_addr_t dma_src, dma_dst;
289 int bram_offset;
290 unsigned long timeout;
291 void *buf = (void *)buffer;
292 size_t xtra;
293 volatile unsigned *done;
294
295 bram_offset = omap2_onenand_bufferram_offset(mtd, area) + area + offset;
296 if (bram_offset & 3 || (size_t)buf & 3 || count < 384)
297 goto out_copy;
298
299 if (buf >= high_memory) {
300 struct page *p1;
301
302 if (((size_t)buf & PAGE_MASK) !=
303 ((size_t)(buf + count - 1) & PAGE_MASK))
304 goto out_copy;
305 p1 = vmalloc_to_page(buf);
306 if (!p1)
307 goto out_copy;
308 buf = page_address(p1) + ((size_t)buf & ~PAGE_MASK);
309 }
310
311 xtra = count & 3;
312 if (xtra) {
313 count -= xtra;
314 memcpy(buf + count, this->base + bram_offset + count, xtra);
315 }
316
317 dma_src = c->phys_base + bram_offset;
318 dma_dst = dma_map_single(&c->pdev->dev, buf, count, DMA_FROM_DEVICE);
319 if (dma_mapping_error(&c->pdev->dev, dma_dst)) {
320 dev_err(&c->pdev->dev,
321 "Couldn't DMA map a %d byte buffer\n",
322 count);
323 goto out_copy;
324 }
325
326 omap_set_dma_transfer_params(c->dma_channel, OMAP_DMA_DATA_TYPE_S32,
327 count >> 2, 1, 0, 0, 0);
328 omap_set_dma_src_params(c->dma_channel, 0, OMAP_DMA_AMODE_POST_INC,
329 dma_src, 0, 0);
330 omap_set_dma_dest_params(c->dma_channel, 0, OMAP_DMA_AMODE_POST_INC,
331 dma_dst, 0, 0);
332
333 INIT_COMPLETION(c->dma_done);
334 omap_start_dma(c->dma_channel);
335
336 timeout = jiffies + msecs_to_jiffies(20);
337 done = &c->dma_done.done;
338 while (time_before(jiffies, timeout))
339 if (*done)
340 break;
341
342 dma_unmap_single(&c->pdev->dev, dma_dst, count, DMA_FROM_DEVICE);
343
344 if (!*done) {
345 dev_err(&c->pdev->dev, "timeout waiting for DMA\n");
346 goto out_copy;
347 }
348
349 return 0;
350
351out_copy:
352 memcpy(buf, this->base + bram_offset, count);
353 return 0;
354}
355
356static int omap3_onenand_write_bufferram(struct mtd_info *mtd, int area,
357 const unsigned char *buffer,
358 int offset, size_t count)
359{
360 struct omap2_onenand *c = container_of(mtd, struct omap2_onenand, mtd);
361 struct onenand_chip *this = mtd->priv;
362 dma_addr_t dma_src, dma_dst;
363 int bram_offset;
364 unsigned long timeout;
365 void *buf = (void *)buffer;
366 volatile unsigned *done;
367
368 bram_offset = omap2_onenand_bufferram_offset(mtd, area) + area + offset;
369 if (bram_offset & 3 || (size_t)buf & 3 || count < 384)
370 goto out_copy;
371
372 /* panic_write() may be in an interrupt context */
373 if (in_interrupt())
374 goto out_copy;
375
376 if (buf >= high_memory) {
377 struct page *p1;
378
379 if (((size_t)buf & PAGE_MASK) !=
380 ((size_t)(buf + count - 1) & PAGE_MASK))
381 goto out_copy;
382 p1 = vmalloc_to_page(buf);
383 if (!p1)
384 goto out_copy;
385 buf = page_address(p1) + ((size_t)buf & ~PAGE_MASK);
386 }
387
388 dma_src = dma_map_single(&c->pdev->dev, buf, count, DMA_TO_DEVICE);
389 dma_dst = c->phys_base + bram_offset;
390 if (dma_mapping_error(&c->pdev->dev, dma_dst)) {
391 dev_err(&c->pdev->dev,
392 "Couldn't DMA map a %d byte buffer\n",
393 count);
394 return -1;
395 }
396
397 omap_set_dma_transfer_params(c->dma_channel, OMAP_DMA_DATA_TYPE_S32,
398 count >> 2, 1, 0, 0, 0);
399 omap_set_dma_src_params(c->dma_channel, 0, OMAP_DMA_AMODE_POST_INC,
400 dma_src, 0, 0);
401 omap_set_dma_dest_params(c->dma_channel, 0, OMAP_DMA_AMODE_POST_INC,
402 dma_dst, 0, 0);
403
404 INIT_COMPLETION(c->dma_done);
405 omap_start_dma(c->dma_channel);
406
407 timeout = jiffies + msecs_to_jiffies(20);
408 done = &c->dma_done.done;
409 while (time_before(jiffies, timeout))
410 if (*done)
411 break;
412
413 dma_unmap_single(&c->pdev->dev, dma_dst, count, DMA_TO_DEVICE);
414
415 if (!*done) {
416 dev_err(&c->pdev->dev, "timeout waiting for DMA\n");
417 goto out_copy;
418 }
419
420 return 0;
421
422out_copy:
423 memcpy(this->base + bram_offset, buf, count);
424 return 0;
425}
426
427#else
428
429int omap3_onenand_read_bufferram(struct mtd_info *mtd, int area,
430 unsigned char *buffer, int offset,
431 size_t count);
432
433int omap3_onenand_write_bufferram(struct mtd_info *mtd, int area,
434 const unsigned char *buffer,
435 int offset, size_t count);
436
437#endif
438
439#if defined(CONFIG_ARCH_OMAP2) || defined(MULTI_OMAP2)
440
441static int omap2_onenand_read_bufferram(struct mtd_info *mtd, int area,
442 unsigned char *buffer, int offset,
443 size_t count)
444{
445 struct omap2_onenand *c = container_of(mtd, struct omap2_onenand, mtd);
446 struct onenand_chip *this = mtd->priv;
447 dma_addr_t dma_src, dma_dst;
448 int bram_offset;
449
450 bram_offset = omap2_onenand_bufferram_offset(mtd, area) + area + offset;
451 /* DMA is not used. Revisit PM requirements before enabling it. */
452 if (1 || (c->dma_channel < 0) ||
453 ((void *) buffer >= (void *) high_memory) || (bram_offset & 3) ||
454 (((unsigned int) buffer) & 3) || (count < 1024) || (count & 3)) {
455 memcpy(buffer, (__force void *)(this->base + bram_offset),
456 count);
457 return 0;
458 }
459
460 dma_src = c->phys_base + bram_offset;
461 dma_dst = dma_map_single(&c->pdev->dev, buffer, count,
462 DMA_FROM_DEVICE);
463 if (dma_mapping_error(&c->pdev->dev, dma_dst)) {
464 dev_err(&c->pdev->dev,
465 "Couldn't DMA map a %d byte buffer\n",
466 count);
467 return -1;
468 }
469
470 omap_set_dma_transfer_params(c->dma_channel, OMAP_DMA_DATA_TYPE_S32,
471 count / 4, 1, 0, 0, 0);
472 omap_set_dma_src_params(c->dma_channel, 0, OMAP_DMA_AMODE_POST_INC,
473 dma_src, 0, 0);
474 omap_set_dma_dest_params(c->dma_channel, 0, OMAP_DMA_AMODE_POST_INC,
475 dma_dst, 0, 0);
476
477 INIT_COMPLETION(c->dma_done);
478 omap_start_dma(c->dma_channel);
479 wait_for_completion(&c->dma_done);
480
481 dma_unmap_single(&c->pdev->dev, dma_dst, count, DMA_FROM_DEVICE);
482
483 return 0;
484}
485
486static int omap2_onenand_write_bufferram(struct mtd_info *mtd, int area,
487 const unsigned char *buffer,
488 int offset, size_t count)
489{
490 struct omap2_onenand *c = container_of(mtd, struct omap2_onenand, mtd);
491 struct onenand_chip *this = mtd->priv;
492 dma_addr_t dma_src, dma_dst;
493 int bram_offset;
494
495 bram_offset = omap2_onenand_bufferram_offset(mtd, area) + area + offset;
496 /* DMA is not used. Revisit PM requirements before enabling it. */
497 if (1 || (c->dma_channel < 0) ||
498 ((void *) buffer >= (void *) high_memory) || (bram_offset & 3) ||
499 (((unsigned int) buffer) & 3) || (count < 1024) || (count & 3)) {
500 memcpy((__force void *)(this->base + bram_offset), buffer,
501 count);
502 return 0;
503 }
504
505 dma_src = dma_map_single(&c->pdev->dev, (void *) buffer, count,
506 DMA_TO_DEVICE);
507 dma_dst = c->phys_base + bram_offset;
508 if (dma_mapping_error(&c->pdev->dev, dma_dst)) {
509 dev_err(&c->pdev->dev,
510 "Couldn't DMA map a %d byte buffer\n",
511 count);
512 return -1;
513 }
514
515 omap_set_dma_transfer_params(c->dma_channel, OMAP_DMA_DATA_TYPE_S16,
516 count / 2, 1, 0, 0, 0);
517 omap_set_dma_src_params(c->dma_channel, 0, OMAP_DMA_AMODE_POST_INC,
518 dma_src, 0, 0);
519 omap_set_dma_dest_params(c->dma_channel, 0, OMAP_DMA_AMODE_POST_INC,
520 dma_dst, 0, 0);
521
522 INIT_COMPLETION(c->dma_done);
523 omap_start_dma(c->dma_channel);
524 wait_for_completion(&c->dma_done);
525
526 dma_unmap_single(&c->pdev->dev, dma_dst, count, DMA_TO_DEVICE);
527
528 return 0;
529}
530
531#else
532
533int omap2_onenand_read_bufferram(struct mtd_info *mtd, int area,
534 unsigned char *buffer, int offset,
535 size_t count);
536
537int omap2_onenand_write_bufferram(struct mtd_info *mtd, int area,
538 const unsigned char *buffer,
539 int offset, size_t count);
540
541#endif
542
543static struct platform_driver omap2_onenand_driver;
544
545static int __adjust_timing(struct device *dev, void *data)
546{
547 int ret = 0;
548 struct omap2_onenand *c;
549
550 c = dev_get_drvdata(dev);
551
552 BUG_ON(c->setup == NULL);
553
554 /* DMA is not in use so this is all that is needed */
555 /* Revisit for OMAP3! */
556 ret = c->setup(c->onenand.base, c->freq);
557
558 return ret;
559}
560
561int omap2_onenand_rephase(void)
562{
563 return driver_for_each_device(&omap2_onenand_driver.driver, NULL,
564 NULL, __adjust_timing);
565}
566
567static void __devexit omap2_onenand_shutdown(struct platform_device *pdev)
568{
569 struct omap2_onenand *c = dev_get_drvdata(&pdev->dev);
570
571 /* With certain content in the buffer RAM, the OMAP boot ROM code
572 * can recognize the flash chip incorrectly. Zero it out before
573 * soft reset.
574 */
575 memset((__force void *)c->onenand.base, 0, ONENAND_BUFRAM_SIZE);
576}
577
578static int __devinit omap2_onenand_probe(struct platform_device *pdev)
579{
580 struct omap_onenand_platform_data *pdata;
581 struct omap2_onenand *c;
582 int r;
583
584 pdata = pdev->dev.platform_data;
585 if (pdata == NULL) {
586 dev_err(&pdev->dev, "platform data missing\n");
587 return -ENODEV;
588 }
589
590 c = kzalloc(sizeof(struct omap2_onenand), GFP_KERNEL);
591 if (!c)
592 return -ENOMEM;
593
594 init_completion(&c->irq_done);
595 init_completion(&c->dma_done);
596 c->gpmc_cs = pdata->cs;
597 c->gpio_irq = pdata->gpio_irq;
598 c->dma_channel = pdata->dma_channel;
599 if (c->dma_channel < 0) {
600 /* if -1, don't use DMA */
601 c->gpio_irq = 0;
602 }
603
604 r = gpmc_cs_request(c->gpmc_cs, ONENAND_IO_SIZE, &c->phys_base);
605 if (r < 0) {
606 dev_err(&pdev->dev, "Cannot request GPMC CS\n");
607 goto err_kfree;
608 }
609
610 if (request_mem_region(c->phys_base, ONENAND_IO_SIZE,
611 pdev->dev.driver->name) == NULL) {
612 dev_err(&pdev->dev, "Cannot reserve memory region at 0x%08lx, "
613 "size: 0x%x\n", c->phys_base, ONENAND_IO_SIZE);
614 r = -EBUSY;
615 goto err_free_cs;
616 }
617 c->onenand.base = ioremap(c->phys_base, ONENAND_IO_SIZE);
618 if (c->onenand.base == NULL) {
619 r = -ENOMEM;
620 goto err_release_mem_region;
621 }
622
623 if (pdata->onenand_setup != NULL) {
624 r = pdata->onenand_setup(c->onenand.base, c->freq);
625 if (r < 0) {
626 dev_err(&pdev->dev, "Onenand platform setup failed: "
627 "%d\n", r);
628 goto err_iounmap;
629 }
630 c->setup = pdata->onenand_setup;
631 }
632
633 if (c->gpio_irq) {
634 if ((r = omap_request_gpio(c->gpio_irq)) < 0) {
635 dev_err(&pdev->dev, "Failed to request GPIO%d for "
636 "OneNAND\n", c->gpio_irq);
637 goto err_iounmap;
638 }
639 omap_set_gpio_direction(c->gpio_irq, 1);
640
641 if ((r = request_irq(OMAP_GPIO_IRQ(c->gpio_irq),
642 omap2_onenand_interrupt, IRQF_TRIGGER_RISING,
643 pdev->dev.driver->name, c)) < 0)
644 goto err_release_gpio;
645 }
646
647 if (c->dma_channel >= 0) {
648 r = omap_request_dma(0, pdev->dev.driver->name,
649 omap2_onenand_dma_cb, (void *) c,
650 &c->dma_channel);
651 if (r == 0) {
652 omap_set_dma_write_mode(c->dma_channel,
653 OMAP_DMA_WRITE_NON_POSTED);
654 omap_set_dma_src_data_pack(c->dma_channel, 1);
655 omap_set_dma_src_burst_mode(c->dma_channel,
656 OMAP_DMA_DATA_BURST_8);
657 omap_set_dma_dest_data_pack(c->dma_channel, 1);
658 omap_set_dma_dest_burst_mode(c->dma_channel,
659 OMAP_DMA_DATA_BURST_8);
660 } else {
661 dev_info(&pdev->dev,
662 "failed to allocate DMA for OneNAND, "
663 "using PIO instead\n");
664 c->dma_channel = -1;
665 }
666 }
667
668 dev_info(&pdev->dev, "initializing on CS%d, phys base 0x%08lx, virtual "
669 "base %p\n", c->gpmc_cs, c->phys_base,
670 c->onenand.base);
671
672 c->pdev = pdev;
673 c->mtd.name = pdev->dev.bus_id;
674 c->mtd.priv = &c->onenand;
675 c->mtd.owner = THIS_MODULE;
676
677 if (c->dma_channel >= 0) {
678 struct onenand_chip *this = &c->onenand;
679
680 this->wait = omap2_onenand_wait;
681 if (cpu_is_omap34xx()) {
682 this->read_bufferram = omap3_onenand_read_bufferram;
683 this->write_bufferram = omap3_onenand_write_bufferram;
684 } else {
685 this->read_bufferram = omap2_onenand_read_bufferram;
686 this->write_bufferram = omap2_onenand_write_bufferram;
687 }
688 }
689
690 if ((r = onenand_scan(&c->mtd, 1)) < 0)
691 goto err_release_dma;
692
693 switch ((c->onenand.version_id >> 4) & 0xf) {
694 case 0:
695 c->freq = 40;
696 break;
697 case 1:
698 c->freq = 54;
699 break;
700 case 2:
701 c->freq = 66;
702 break;
703 case 3:
704 c->freq = 83;
705 break;
706 }
707
708#ifdef CONFIG_MTD_PARTITIONS
709 if (pdata->parts != NULL)
710 r = add_mtd_partitions(&c->mtd, pdata->parts,
711 pdata->nr_parts);
712 else
713#endif
714 r = add_mtd_device(&c->mtd);
715 if (r < 0)
716 goto err_release_onenand;
717
718 platform_set_drvdata(pdev, c);
719
720 return 0;
721
722err_release_onenand:
723 onenand_release(&c->mtd);
724err_release_dma:
725 if (c->dma_channel != -1)
726 omap_free_dma(c->dma_channel);
727 if (c->gpio_irq)
728 free_irq(OMAP_GPIO_IRQ(c->gpio_irq), c);
729err_release_gpio:
730 if (c->gpio_irq)
731 omap_free_gpio(c->gpio_irq);
732err_iounmap:
733 iounmap(c->onenand.base);
734err_release_mem_region:
735 release_mem_region(c->phys_base, ONENAND_IO_SIZE);
736err_free_cs:
737 gpmc_cs_free(c->gpmc_cs);
738err_kfree:
739 kfree(c);
740
741 return r;
742}
743
744static int __devexit omap2_onenand_remove(struct platform_device *pdev)
745{
746 struct omap2_onenand *c = dev_get_drvdata(&pdev->dev);
747
748 BUG_ON(c == NULL);
749
750#ifdef CONFIG_MTD_PARTITIONS
751 if (c->parts)
752 del_mtd_partitions(&c->mtd);
753 else
754 del_mtd_device(&c->mtd);
755#else
756 del_mtd_device(&c->mtd);
757#endif
758
759 onenand_release(&c->mtd);
760 if (c->dma_channel != -1)
761 omap_free_dma(c->dma_channel);
762 omap2_onenand_shutdown(pdev);
763 platform_set_drvdata(pdev, NULL);
764 if (c->gpio_irq) {
765 free_irq(OMAP_GPIO_IRQ(c->gpio_irq), c);
766 omap_free_gpio(c->gpio_irq);
767 }
768 iounmap(c->onenand.base);
769 release_mem_region(c->phys_base, ONENAND_IO_SIZE);
770 kfree(c);
771
772 return 0;
773}
774
775static struct platform_driver omap2_onenand_driver = {
776 .probe = omap2_onenand_probe,
777 .remove = omap2_onenand_remove,
778 .shutdown = omap2_onenand_shutdown,
779 .driver = {
780 .name = DRIVER_NAME,
781 .owner = THIS_MODULE,
782 },
783};
784
785static int __init omap2_onenand_init(void)
786{
787 printk(KERN_INFO "OneNAND driver initializing\n");
788 return platform_driver_register(&omap2_onenand_driver);
789}
790
791static void __exit omap2_onenand_exit(void)
792{
793 platform_driver_unregister(&omap2_onenand_driver);
794}
795
796module_init(omap2_onenand_init);
797module_exit(omap2_onenand_exit);
798
799MODULE_ALIAS(DRIVER_NAME);
800MODULE_LICENSE("GPL");
801MODULE_AUTHOR("Jarkko Lavinen <jarkko.lavinen@nokia.com>");
802MODULE_DESCRIPTION("Glue layer for OneNAND flash on OMAP2 / OMAP3");
diff --git a/drivers/mtd/onenand/onenand_base.c b/drivers/mtd/onenand/onenand_base.c
index 926cf3a4135d..90ed319f26e6 100644
--- a/drivers/mtd/onenand/onenand_base.c
+++ b/drivers/mtd/onenand/onenand_base.c
@@ -1794,7 +1794,7 @@ static int onenand_erase(struct mtd_info *mtd, struct erase_info *instr)
1794 return -EINVAL; 1794 return -EINVAL;
1795 } 1795 }
1796 1796
1797 instr->fail_addr = 0xffffffff; 1797 instr->fail_addr = MTD_FAIL_ADDR_UNKNOWN;
1798 1798
1799 /* Grab the lock and see if the device is available */ 1799 /* Grab the lock and see if the device is available */
1800 onenand_get_device(mtd, FL_ERASING); 1800 onenand_get_device(mtd, FL_ERASING);
diff --git a/drivers/mtd/ssfdc.c b/drivers/mtd/ssfdc.c
index a5f3d60047d4..33a5d6ed6f18 100644
--- a/drivers/mtd/ssfdc.c
+++ b/drivers/mtd/ssfdc.c
@@ -321,8 +321,7 @@ static void ssfdcr_add_mtd(struct mtd_blktrans_ops *tr, struct mtd_info *mtd)
321 DEBUG(MTD_DEBUG_LEVEL1, 321 DEBUG(MTD_DEBUG_LEVEL1,
322 "SSFDC_RO: cis_block=%d,erase_size=%d,map_len=%d,n_zones=%d\n", 322 "SSFDC_RO: cis_block=%d,erase_size=%d,map_len=%d,n_zones=%d\n",
323 ssfdc->cis_block, ssfdc->erase_size, ssfdc->map_len, 323 ssfdc->cis_block, ssfdc->erase_size, ssfdc->map_len,
324 (ssfdc->map_len + MAX_PHYS_BLK_PER_ZONE - 1) / 324 DIV_ROUND_UP(ssfdc->map_len, MAX_PHYS_BLK_PER_ZONE));
325 MAX_PHYS_BLK_PER_ZONE);
326 325
327 /* Set geometry */ 326 /* Set geometry */
328 ssfdc->heads = 16; 327 ssfdc->heads = 16;
diff --git a/drivers/mtd/ubi/cdev.c b/drivers/mtd/ubi/cdev.c
index 03c759b4eeb5..b30a0b83d7f1 100644
--- a/drivers/mtd/ubi/cdev.c
+++ b/drivers/mtd/ubi/cdev.c
@@ -104,12 +104,9 @@ static int vol_cdev_open(struct inode *inode, struct file *file)
104 struct ubi_volume_desc *desc; 104 struct ubi_volume_desc *desc;
105 int vol_id = iminor(inode) - 1, mode, ubi_num; 105 int vol_id = iminor(inode) - 1, mode, ubi_num;
106 106
107 lock_kernel();
108 ubi_num = ubi_major2num(imajor(inode)); 107 ubi_num = ubi_major2num(imajor(inode));
109 if (ubi_num < 0) { 108 if (ubi_num < 0)
110 unlock_kernel();
111 return ubi_num; 109 return ubi_num;
112 }
113 110
114 if (file->f_mode & FMODE_WRITE) 111 if (file->f_mode & FMODE_WRITE)
115 mode = UBI_READWRITE; 112 mode = UBI_READWRITE;
@@ -119,7 +116,6 @@ static int vol_cdev_open(struct inode *inode, struct file *file)
119 dbg_gen("open volume %d, mode %d", vol_id, mode); 116 dbg_gen("open volume %d, mode %d", vol_id, mode);
120 117
121 desc = ubi_open_volume(ubi_num, vol_id, mode); 118 desc = ubi_open_volume(ubi_num, vol_id, mode);
122 unlock_kernel();
123 if (IS_ERR(desc)) 119 if (IS_ERR(desc))
124 return PTR_ERR(desc); 120 return PTR_ERR(desc);
125 121
diff --git a/drivers/mtd/ubi/scan.c b/drivers/mtd/ubi/scan.c
index 967bb4406df9..4f2daa5bbecf 100644
--- a/drivers/mtd/ubi/scan.c
+++ b/drivers/mtd/ubi/scan.c
@@ -387,7 +387,7 @@ int ubi_scan_add_used(struct ubi_device *ubi, struct ubi_scan_info *si,
387 pnum, vol_id, lnum, ec, sqnum, bitflips); 387 pnum, vol_id, lnum, ec, sqnum, bitflips);
388 388
389 sv = add_volume(si, vol_id, pnum, vid_hdr); 389 sv = add_volume(si, vol_id, pnum, vid_hdr);
390 if (IS_ERR(sv) < 0) 390 if (IS_ERR(sv))
391 return PTR_ERR(sv); 391 return PTR_ERR(sv);
392 392
393 if (si->max_sqnum < sqnum) 393 if (si->max_sqnum < sqnum)
diff --git a/drivers/mtd/ubi/vtbl.c b/drivers/mtd/ubi/vtbl.c
index 217d0e111b2a..333c8941552f 100644
--- a/drivers/mtd/ubi/vtbl.c
+++ b/drivers/mtd/ubi/vtbl.c
@@ -244,8 +244,8 @@ static int vtbl_check(const struct ubi_device *ubi,
244 } 244 }
245 245
246 if (reserved_pebs > ubi->good_peb_count) { 246 if (reserved_pebs > ubi->good_peb_count) {
247 dbg_err("too large reserved_pebs, good PEBs %d", 247 dbg_err("too large reserved_pebs %d, good PEBs %d",
248 ubi->good_peb_count); 248 reserved_pebs, ubi->good_peb_count);
249 err = 9; 249 err = 9;
250 goto bad; 250 goto bad;
251 } 251 }
diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c
index fc5f2dbf5323..8b51e10b7783 100644
--- a/drivers/pci/intel-iommu.c
+++ b/drivers/pci/intel-iommu.c
@@ -563,7 +563,7 @@ static int __iommu_flush_context(struct intel_iommu *iommu,
563 563
564 spin_unlock_irqrestore(&iommu->register_lock, flag); 564 spin_unlock_irqrestore(&iommu->register_lock, flag);
565 565
566 /* flush context entry will implictly flush write buffer */ 566 /* flush context entry will implicitly flush write buffer */
567 return 0; 567 return 0;
568} 568}
569 569
@@ -656,7 +656,7 @@ static int __iommu_flush_iotlb(struct intel_iommu *iommu, u16 did,
656 if (DMA_TLB_IAIG(val) != DMA_TLB_IIRG(type)) 656 if (DMA_TLB_IAIG(val) != DMA_TLB_IIRG(type))
657 pr_debug("IOMMU: tlb flush request %Lx, actual %Lx\n", 657 pr_debug("IOMMU: tlb flush request %Lx, actual %Lx\n",
658 DMA_TLB_IIRG(type), DMA_TLB_IAIG(val)); 658 DMA_TLB_IIRG(type), DMA_TLB_IAIG(val));
659 /* flush context entry will implictly flush write buffer */ 659 /* flush iotlb entry will implicitly flush write buffer */
660 return 0; 660 return 0;
661} 661}
662 662
diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c
index c9884bba22de..dbe9f39f4436 100644
--- a/drivers/pci/pci.c
+++ b/drivers/pci/pci.c
@@ -1358,11 +1358,10 @@ int pci_request_region(struct pci_dev *pdev, int bar, const char *res_name)
1358 return 0; 1358 return 0;
1359 1359
1360err_out: 1360err_out:
1361 dev_warn(&pdev->dev, "BAR %d: can't reserve %s region [%#llx-%#llx]\n", 1361 dev_warn(&pdev->dev, "BAR %d: can't reserve %s region %pR\n",
1362 bar, 1362 bar,
1363 pci_resource_flags(pdev, bar) & IORESOURCE_IO ? "I/O" : "mem", 1363 pci_resource_flags(pdev, bar) & IORESOURCE_IO ? "I/O" : "mem",
1364 (unsigned long long)pci_resource_start(pdev, bar), 1364 &pdev->resource[bar]);
1365 (unsigned long long)pci_resource_end(pdev, bar));
1366 return -EBUSY; 1365 return -EBUSY;
1367} 1366}
1368 1367
diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c
index dd9161a054e1..d3db8b249729 100644
--- a/drivers/pci/probe.c
+++ b/drivers/pci/probe.c
@@ -304,9 +304,8 @@ static int __pci_read_base(struct pci_dev *dev, enum pci_bar_type type,
304 } else { 304 } else {
305 res->start = l64; 305 res->start = l64;
306 res->end = l64 + sz64; 306 res->end = l64 + sz64;
307 printk(KERN_DEBUG "PCI: %s reg %x 64bit mmio: [%llx, %llx]\n", 307 printk(KERN_DEBUG "PCI: %s reg %x 64bit mmio: %pR\n",
308 pci_name(dev), pos, (unsigned long long)res->start, 308 pci_name(dev), pos, res);
309 (unsigned long long)res->end);
310 } 309 }
311 } else { 310 } else {
312 sz = pci_size(l, sz, mask); 311 sz = pci_size(l, sz, mask);
@@ -316,9 +315,10 @@ static int __pci_read_base(struct pci_dev *dev, enum pci_bar_type type,
316 315
317 res->start = l; 316 res->start = l;
318 res->end = l + sz; 317 res->end = l + sz;
319 printk(KERN_DEBUG "PCI: %s reg %x %s: [%llx, %llx]\n", pci_name(dev), 318 printk(KERN_DEBUG "PCI: %s reg %x %s: %pR\n",
320 pos, (res->flags & IORESOURCE_IO) ? "io port":"32bit mmio", 319 pci_name(dev), pos,
321 (unsigned long long)res->start, (unsigned long long)res->end); 320 (res->flags & IORESOURCE_IO) ? "io port":"32bit mmio",
321 res);
322 } 322 }
323 323
324 out: 324 out:
@@ -389,9 +389,8 @@ void __devinit pci_read_bridge_bases(struct pci_bus *child)
389 res->start = base; 389 res->start = base;
390 if (!res->end) 390 if (!res->end)
391 res->end = limit + 0xfff; 391 res->end = limit + 0xfff;
392 printk(KERN_DEBUG "PCI: bridge %s io port: [%llx, %llx]\n", 392 printk(KERN_DEBUG "PCI: bridge %s io port: %pR\n",
393 pci_name(dev), (unsigned long long) res->start, 393 pci_name(dev), res);
394 (unsigned long long) res->end);
395 } 394 }
396 395
397 res = child->resource[1]; 396 res = child->resource[1];
@@ -403,9 +402,8 @@ void __devinit pci_read_bridge_bases(struct pci_bus *child)
403 res->flags = (mem_base_lo & PCI_MEMORY_RANGE_TYPE_MASK) | IORESOURCE_MEM; 402 res->flags = (mem_base_lo & PCI_MEMORY_RANGE_TYPE_MASK) | IORESOURCE_MEM;
404 res->start = base; 403 res->start = base;
405 res->end = limit + 0xfffff; 404 res->end = limit + 0xfffff;
406 printk(KERN_DEBUG "PCI: bridge %s 32bit mmio: [%llx, %llx]\n", 405 printk(KERN_DEBUG "PCI: bridge %s 32bit mmio: %pR\n",
407 pci_name(dev), (unsigned long long) res->start, 406 pci_name(dev), res);
408 (unsigned long long) res->end);
409 } 407 }
410 408
411 res = child->resource[2]; 409 res = child->resource[2];
@@ -441,9 +439,9 @@ void __devinit pci_read_bridge_bases(struct pci_bus *child)
441 res->flags = (mem_base_lo & PCI_MEMORY_RANGE_TYPE_MASK) | IORESOURCE_MEM | IORESOURCE_PREFETCH; 439 res->flags = (mem_base_lo & PCI_MEMORY_RANGE_TYPE_MASK) | IORESOURCE_MEM | IORESOURCE_PREFETCH;
442 res->start = base; 440 res->start = base;
443 res->end = limit + 0xfffff; 441 res->end = limit + 0xfffff;
444 printk(KERN_DEBUG "PCI: bridge %s %sbit mmio pref: [%llx, %llx]\n", 442 printk(KERN_DEBUG "PCI: bridge %s %sbit mmio pref: %pR\n",
445 pci_name(dev), (res->flags & PCI_PREF_RANGE_TYPE_64) ? "64" : "32", 443 pci_name(dev),
446 (unsigned long long) res->start, (unsigned long long) res->end); 444 (res->flags & PCI_PREF_RANGE_TYPE_64) ? "64":"32", res);
447 } 445 }
448} 446}
449 447
diff --git a/drivers/pci/rom.c b/drivers/pci/rom.c
index bd5c0e031398..1f5f6143f35c 100644
--- a/drivers/pci/rom.c
+++ b/drivers/pci/rom.c
@@ -21,7 +21,7 @@
21 * between the ROM and other resources, so enabling it may disable access 21 * between the ROM and other resources, so enabling it may disable access
22 * to MMIO registers or other card memory. 22 * to MMIO registers or other card memory.
23 */ 23 */
24static int pci_enable_rom(struct pci_dev *pdev) 24int pci_enable_rom(struct pci_dev *pdev)
25{ 25{
26 struct resource *res = pdev->resource + PCI_ROM_RESOURCE; 26 struct resource *res = pdev->resource + PCI_ROM_RESOURCE;
27 struct pci_bus_region region; 27 struct pci_bus_region region;
@@ -45,7 +45,7 @@ static int pci_enable_rom(struct pci_dev *pdev)
45 * Disable ROM decoding on a PCI device by turning off the last bit in the 45 * Disable ROM decoding on a PCI device by turning off the last bit in the
46 * ROM BAR. 46 * ROM BAR.
47 */ 47 */
48static void pci_disable_rom(struct pci_dev *pdev) 48void pci_disable_rom(struct pci_dev *pdev)
49{ 49{
50 u32 rom_addr; 50 u32 rom_addr;
51 pci_read_config_dword(pdev, pdev->rom_base_reg, &rom_addr); 51 pci_read_config_dword(pdev, pdev->rom_base_reg, &rom_addr);
@@ -260,3 +260,5 @@ void pci_cleanup_rom(struct pci_dev *pdev)
260 260
261EXPORT_SYMBOL(pci_map_rom); 261EXPORT_SYMBOL(pci_map_rom);
262EXPORT_SYMBOL(pci_unmap_rom); 262EXPORT_SYMBOL(pci_unmap_rom);
263EXPORT_SYMBOL_GPL(pci_enable_rom);
264EXPORT_SYMBOL_GPL(pci_disable_rom);
diff --git a/drivers/pci/setup-bus.c b/drivers/pci/setup-bus.c
index d5e2106760f8..471a429d7a20 100644
--- a/drivers/pci/setup-bus.c
+++ b/drivers/pci/setup-bus.c
@@ -356,10 +356,7 @@ static int pbus_size_mem(struct pci_bus *bus, unsigned long mask, unsigned long
356 order = __ffs(align) - 20; 356 order = __ffs(align) - 20;
357 if (order > 11) { 357 if (order > 11) {
358 dev_warn(&dev->dev, "BAR %d bad alignment %llx: " 358 dev_warn(&dev->dev, "BAR %d bad alignment %llx: "
359 "%#016llx-%#016llx\n", i, 359 "%pR\n", i, (unsigned long long)align, r);
360 (unsigned long long)align,
361 (unsigned long long)r->start,
362 (unsigned long long)r->end);
363 r->flags = 0; 360 r->flags = 0;
364 continue; 361 continue;
365 } 362 }
@@ -539,11 +536,9 @@ static void pci_bus_dump_res(struct pci_bus *bus)
539 if (!res) 536 if (!res)
540 continue; 537 continue;
541 538
542 printk(KERN_INFO "bus: %02x index %x %s: [%llx, %llx]\n", 539 printk(KERN_INFO "bus: %02x index %x %s: %pR\n",
543 bus->number, i, 540 bus->number, i,
544 (res->flags & IORESOURCE_IO) ? "io port" : "mmio", 541 (res->flags & IORESOURCE_IO) ? "io port" : "mmio", res);
545 (unsigned long long) res->start,
546 (unsigned long long) res->end);
547 } 542 }
548} 543}
549 544
diff --git a/drivers/pci/setup-res.c b/drivers/pci/setup-res.c
index 1a5fc83c71b3..d4b5c690eaa7 100644
--- a/drivers/pci/setup-res.c
+++ b/drivers/pci/setup-res.c
@@ -49,10 +49,8 @@ void pci_update_resource(struct pci_dev *dev, struct resource *res, int resno)
49 49
50 pcibios_resource_to_bus(dev, &region, res); 50 pcibios_resource_to_bus(dev, &region, res);
51 51
52 dev_dbg(&dev->dev, "BAR %d: got res [%#llx-%#llx] bus [%#llx-%#llx] " 52 dev_dbg(&dev->dev, "BAR %d: got res %pR bus [%#llx-%#llx] "
53 "flags %#lx\n", resno, 53 "flags %#lx\n", resno, res,
54 (unsigned long long)res->start,
55 (unsigned long long)res->end,
56 (unsigned long long)region.start, 54 (unsigned long long)region.start,
57 (unsigned long long)region.end, 55 (unsigned long long)region.end,
58 (unsigned long)res->flags); 56 (unsigned long)res->flags);
@@ -114,13 +112,11 @@ int pci_claim_resource(struct pci_dev *dev, int resource)
114 err = insert_resource(root, res); 112 err = insert_resource(root, res);
115 113
116 if (err) { 114 if (err) {
117 dev_err(&dev->dev, "BAR %d: %s of %s [%#llx-%#llx]\n", 115 dev_err(&dev->dev, "BAR %d: %s of %s %pR\n",
118 resource, 116 resource,
119 root ? "address space collision on" : 117 root ? "address space collision on" :
120 "no parent found for", 118 "no parent found for",
121 dtype, 119 dtype, res);
122 (unsigned long long)res->start,
123 (unsigned long long)res->end);
124 } 120 }
125 121
126 return err; 122 return err;
@@ -139,9 +135,8 @@ int pci_assign_resource(struct pci_dev *dev, int resno)
139 align = resource_alignment(res); 135 align = resource_alignment(res);
140 if (!align) { 136 if (!align) {
141 dev_err(&dev->dev, "BAR %d: can't allocate resource (bogus " 137 dev_err(&dev->dev, "BAR %d: can't allocate resource (bogus "
142 "alignment) [%#llx-%#llx] flags %#lx\n", 138 "alignment) %pR flags %#lx\n",
143 resno, (unsigned long long)res->start, 139 resno, res, res->flags);
144 (unsigned long long)res->end, res->flags);
145 return -EINVAL; 140 return -EINVAL;
146 } 141 }
147 142
@@ -162,11 +157,8 @@ int pci_assign_resource(struct pci_dev *dev, int resno)
162 } 157 }
163 158
164 if (ret) { 159 if (ret) {
165 dev_err(&dev->dev, "BAR %d: can't allocate %s resource " 160 dev_err(&dev->dev, "BAR %d: can't allocate %s resource %pR\n",
166 "[%#llx-%#llx]\n", resno, 161 resno, res->flags & IORESOURCE_IO ? "I/O" : "mem", res);
167 res->flags & IORESOURCE_IO ? "I/O" : "mem",
168 (unsigned long long)res->start,
169 (unsigned long long)res->end);
170 } else { 162 } else {
171 res->flags &= ~IORESOURCE_STARTALIGN; 163 res->flags &= ~IORESOURCE_STARTALIGN;
172 if (resno < PCI_BRIDGE_RESOURCES) 164 if (resno < PCI_BRIDGE_RESOURCES)
@@ -202,11 +194,8 @@ int pci_assign_resource_fixed(struct pci_dev *dev, int resno)
202 } 194 }
203 195
204 if (ret) { 196 if (ret) {
205 dev_err(&dev->dev, "BAR %d: can't allocate %s resource " 197 dev_err(&dev->dev, "BAR %d: can't allocate %s resource %pR\n",
206 "[%#llx-%#llx\n]", resno, 198 resno, res->flags & IORESOURCE_IO ? "I/O" : "mem", res);
207 res->flags & IORESOURCE_IO ? "I/O" : "mem",
208 (unsigned long long)res->start,
209 (unsigned long long)res->end);
210 } else if (resno < PCI_BRIDGE_RESOURCES) { 199 } else if (resno < PCI_BRIDGE_RESOURCES) {
211 pci_update_resource(dev, res, resno); 200 pci_update_resource(dev, res, resno);
212 } 201 }
@@ -237,9 +226,8 @@ void pdev_sort_resources(struct pci_dev *dev, struct resource_list *head)
237 r_align = resource_alignment(r); 226 r_align = resource_alignment(r);
238 if (!r_align) { 227 if (!r_align) {
239 dev_warn(&dev->dev, "BAR %d: bogus alignment " 228 dev_warn(&dev->dev, "BAR %d: bogus alignment "
240 "[%#llx-%#llx] flags %#lx\n", 229 "%pR flags %#lx\n",
241 i, (unsigned long long)r->start, 230 i, r, r->flags);
242 (unsigned long long)r->end, r->flags);
243 continue; 231 continue;
244 } 232 }
245 for (list = head; ; list = list->next) { 233 for (list = head; ; list = list->next) {
@@ -287,9 +275,7 @@ int pci_enable_resources(struct pci_dev *dev, int mask)
287 275
288 if (!r->parent) { 276 if (!r->parent) {
289 dev_err(&dev->dev, "device not available because of " 277 dev_err(&dev->dev, "device not available because of "
290 "BAR %d [%#llx-%#llx] collisions\n", i, 278 "BAR %d %pR collisions\n", i, r);
291 (unsigned long long) r->start,
292 (unsigned long long) r->end);
293 return -EINVAL; 279 return -EINVAL;
294 } 280 }
295 281
diff --git a/drivers/power/power_supply_sysfs.c b/drivers/power/power_supply_sysfs.c
index fe2aeb11939b..23ae8460f5c1 100644
--- a/drivers/power/power_supply_sysfs.c
+++ b/drivers/power/power_supply_sysfs.c
@@ -30,7 +30,7 @@
30 30
31#define POWER_SUPPLY_ATTR(_name) \ 31#define POWER_SUPPLY_ATTR(_name) \
32{ \ 32{ \
33 .attr = { .name = #_name, .mode = 0444, .owner = THIS_MODULE }, \ 33 .attr = { .name = #_name, .mode = 0444 }, \
34 .show = power_supply_show_property, \ 34 .show = power_supply_show_property, \
35 .store = NULL, \ 35 .store = NULL, \
36} 36}
diff --git a/drivers/ps3/ps3av.c b/drivers/ps3/ps3av.c
index 6f2f90ebb020..06848b254d57 100644
--- a/drivers/ps3/ps3av.c
+++ b/drivers/ps3/ps3av.c
@@ -915,6 +915,22 @@ int ps3av_video_mute(int mute)
915 915
916EXPORT_SYMBOL_GPL(ps3av_video_mute); 916EXPORT_SYMBOL_GPL(ps3av_video_mute);
917 917
918/* mute analog output only */
919int ps3av_audio_mute_analog(int mute)
920{
921 int i, res;
922
923 for (i = 0; i < ps3av->av_hw_conf.num_of_avmulti; i++) {
924 res = ps3av_cmd_av_audio_mute(1,
925 &ps3av->av_port[i + ps3av->av_hw_conf.num_of_hdmi],
926 mute);
927 if (res < 0)
928 return -1;
929 }
930 return 0;
931}
932EXPORT_SYMBOL_GPL(ps3av_audio_mute_analog);
933
918int ps3av_audio_mute(int mute) 934int ps3av_audio_mute(int mute)
919{ 935{
920 return ps3av_set_audio_mute(mute ? PS3AV_CMD_MUTE_ON 936 return ps3av_set_audio_mute(mute ? PS3AV_CMD_MUTE_ON
diff --git a/drivers/ps3/ps3av_cmd.c b/drivers/ps3/ps3av_cmd.c
index 7f880c26122f..11eb50318fec 100644
--- a/drivers/ps3/ps3av_cmd.c
+++ b/drivers/ps3/ps3av_cmd.c
@@ -660,9 +660,10 @@ u32 ps3av_cmd_set_av_audio_param(void *p, u32 port,
660} 660}
661 661
662/* default cs val */ 662/* default cs val */
663static const u8 ps3av_mode_cs_info[] = { 663u8 ps3av_mode_cs_info[] = {
664 0x00, 0x09, 0x00, 0x02, 0x01, 0x00, 0x00, 0x00 664 0x00, 0x09, 0x00, 0x02, 0x01, 0x00, 0x00, 0x00
665}; 665};
666EXPORT_SYMBOL_GPL(ps3av_mode_cs_info);
666 667
667#define CS_44 0x00 668#define CS_44 0x00
668#define CS_48 0x02 669#define CS_48 0x02
@@ -677,7 +678,7 @@ void ps3av_cmd_set_audio_mode(struct ps3av_pkt_audio_mode *audio, u32 avport,
677 u32 ch, u32 fs, u32 word_bits, u32 format, 678 u32 ch, u32 fs, u32 word_bits, u32 format,
678 u32 source) 679 u32 source)
679{ 680{
680 int spdif_through, spdif_bitstream; 681 int spdif_through;
681 int i; 682 int i;
682 683
683 if (!(ch | fs | format | word_bits | source)) { 684 if (!(ch | fs | format | word_bits | source)) {
@@ -687,7 +688,6 @@ void ps3av_cmd_set_audio_mode(struct ps3av_pkt_audio_mode *audio, u32 avport,
687 format = PS3AV_CMD_AUDIO_FORMAT_PCM; 688 format = PS3AV_CMD_AUDIO_FORMAT_PCM;
688 source = PS3AV_CMD_AUDIO_SOURCE_SERIAL; 689 source = PS3AV_CMD_AUDIO_SOURCE_SERIAL;
689 } 690 }
690 spdif_through = spdif_bitstream = 0; /* XXX not supported */
691 691
692 /* audio mode */ 692 /* audio mode */
693 memset(audio, 0, sizeof(*audio)); 693 memset(audio, 0, sizeof(*audio));
@@ -777,16 +777,17 @@ void ps3av_cmd_set_audio_mode(struct ps3av_pkt_audio_mode *audio, u32 avport,
777 break; 777 break;
778 } 778 }
779 779
780 /* non-audio bit */
781 spdif_through = audio->audio_cs_info[0] & 0x02;
782
780 /* pass through setting */ 783 /* pass through setting */
781 if (spdif_through && 784 if (spdif_through &&
782 (avport == PS3AV_CMD_AVPORT_SPDIF_0 || 785 (avport == PS3AV_CMD_AVPORT_SPDIF_0 ||
783 avport == PS3AV_CMD_AVPORT_SPDIF_1)) { 786 avport == PS3AV_CMD_AVPORT_SPDIF_1 ||
787 avport == PS3AV_CMD_AVPORT_HDMI_0 ||
788 avport == PS3AV_CMD_AVPORT_HDMI_1)) {
784 audio->audio_word_bits = PS3AV_CMD_AUDIO_WORD_BITS_16; 789 audio->audio_word_bits = PS3AV_CMD_AUDIO_WORD_BITS_16;
785 audio->audio_source = PS3AV_CMD_AUDIO_SOURCE_SPDIF; 790 audio->audio_format = PS3AV_CMD_AUDIO_FORMAT_BITSTREAM;
786 if (spdif_bitstream) {
787 audio->audio_format = PS3AV_CMD_AUDIO_FORMAT_BITSTREAM;
788 audio->audio_cs_info[0] |= CS_BIT;
789 }
790 } 791 }
791} 792}
792 793
diff --git a/drivers/rtc/rtc-at91rm9200.c b/drivers/rtc/rtc-at91rm9200.c
index 37082616482b..b5bf93706913 100644
--- a/drivers/rtc/rtc-at91rm9200.c
+++ b/drivers/rtc/rtc-at91rm9200.c
@@ -53,21 +53,21 @@ static void at91_rtc_decodetime(unsigned int timereg, unsigned int calreg,
53 } while ((time != at91_sys_read(timereg)) || 53 } while ((time != at91_sys_read(timereg)) ||
54 (date != at91_sys_read(calreg))); 54 (date != at91_sys_read(calreg)));
55 55
56 tm->tm_sec = BCD2BIN((time & AT91_RTC_SEC) >> 0); 56 tm->tm_sec = bcd2bin((time & AT91_RTC_SEC) >> 0);
57 tm->tm_min = BCD2BIN((time & AT91_RTC_MIN) >> 8); 57 tm->tm_min = bcd2bin((time & AT91_RTC_MIN) >> 8);
58 tm->tm_hour = BCD2BIN((time & AT91_RTC_HOUR) >> 16); 58 tm->tm_hour = bcd2bin((time & AT91_RTC_HOUR) >> 16);
59 59
60 /* 60 /*
61 * The Calendar Alarm register does not have a field for 61 * The Calendar Alarm register does not have a field for
62 * the year - so these will return an invalid value. When an 62 * the year - so these will return an invalid value. When an
63 * alarm is set, at91_alarm_year wille store the current year. 63 * alarm is set, at91_alarm_year wille store the current year.
64 */ 64 */
65 tm->tm_year = BCD2BIN(date & AT91_RTC_CENT) * 100; /* century */ 65 tm->tm_year = bcd2bin(date & AT91_RTC_CENT) * 100; /* century */
66 tm->tm_year += BCD2BIN((date & AT91_RTC_YEAR) >> 8); /* year */ 66 tm->tm_year += bcd2bin((date & AT91_RTC_YEAR) >> 8); /* year */
67 67
68 tm->tm_wday = BCD2BIN((date & AT91_RTC_DAY) >> 21) - 1; /* day of the week [0-6], Sunday=0 */ 68 tm->tm_wday = bcd2bin((date & AT91_RTC_DAY) >> 21) - 1; /* day of the week [0-6], Sunday=0 */
69 tm->tm_mon = BCD2BIN((date & AT91_RTC_MONTH) >> 16) - 1; 69 tm->tm_mon = bcd2bin((date & AT91_RTC_MONTH) >> 16) - 1;
70 tm->tm_mday = BCD2BIN((date & AT91_RTC_DATE) >> 24); 70 tm->tm_mday = bcd2bin((date & AT91_RTC_DATE) >> 24);
71} 71}
72 72
73/* 73/*
@@ -106,16 +106,16 @@ static int at91_rtc_settime(struct device *dev, struct rtc_time *tm)
106 at91_sys_write(AT91_RTC_IDR, AT91_RTC_ACKUPD); 106 at91_sys_write(AT91_RTC_IDR, AT91_RTC_ACKUPD);
107 107
108 at91_sys_write(AT91_RTC_TIMR, 108 at91_sys_write(AT91_RTC_TIMR,
109 BIN2BCD(tm->tm_sec) << 0 109 bin2bcd(tm->tm_sec) << 0
110 | BIN2BCD(tm->tm_min) << 8 110 | bin2bcd(tm->tm_min) << 8
111 | BIN2BCD(tm->tm_hour) << 16); 111 | bin2bcd(tm->tm_hour) << 16);
112 112
113 at91_sys_write(AT91_RTC_CALR, 113 at91_sys_write(AT91_RTC_CALR,
114 BIN2BCD((tm->tm_year + 1900) / 100) /* century */ 114 bin2bcd((tm->tm_year + 1900) / 100) /* century */
115 | BIN2BCD(tm->tm_year % 100) << 8 /* year */ 115 | bin2bcd(tm->tm_year % 100) << 8 /* year */
116 | BIN2BCD(tm->tm_mon + 1) << 16 /* tm_mon starts at zero */ 116 | bin2bcd(tm->tm_mon + 1) << 16 /* tm_mon starts at zero */
117 | BIN2BCD(tm->tm_wday + 1) << 21 /* day of the week [0-6], Sunday=0 */ 117 | bin2bcd(tm->tm_wday + 1) << 21 /* day of the week [0-6], Sunday=0 */
118 | BIN2BCD(tm->tm_mday) << 24); 118 | bin2bcd(tm->tm_mday) << 24);
119 119
120 /* Restart Time/Calendar */ 120 /* Restart Time/Calendar */
121 cr = at91_sys_read(AT91_RTC_CR); 121 cr = at91_sys_read(AT91_RTC_CR);
@@ -162,13 +162,13 @@ static int at91_rtc_setalarm(struct device *dev, struct rtc_wkalrm *alrm)
162 162
163 at91_sys_write(AT91_RTC_IDR, AT91_RTC_ALARM); 163 at91_sys_write(AT91_RTC_IDR, AT91_RTC_ALARM);
164 at91_sys_write(AT91_RTC_TIMALR, 164 at91_sys_write(AT91_RTC_TIMALR,
165 BIN2BCD(tm.tm_sec) << 0 165 bin2bcd(tm.tm_sec) << 0
166 | BIN2BCD(tm.tm_min) << 8 166 | bin2bcd(tm.tm_min) << 8
167 | BIN2BCD(tm.tm_hour) << 16 167 | bin2bcd(tm.tm_hour) << 16
168 | AT91_RTC_HOUREN | AT91_RTC_MINEN | AT91_RTC_SECEN); 168 | AT91_RTC_HOUREN | AT91_RTC_MINEN | AT91_RTC_SECEN);
169 at91_sys_write(AT91_RTC_CALALR, 169 at91_sys_write(AT91_RTC_CALALR,
170 BIN2BCD(tm.tm_mon + 1) << 16 /* tm_mon starts at zero */ 170 bin2bcd(tm.tm_mon + 1) << 16 /* tm_mon starts at zero */
171 | BIN2BCD(tm.tm_mday) << 24 171 | bin2bcd(tm.tm_mday) << 24
172 | AT91_RTC_DATEEN | AT91_RTC_MTHEN); 172 | AT91_RTC_DATEEN | AT91_RTC_MTHEN);
173 173
174 if (alrm->enabled) { 174 if (alrm->enabled) {
diff --git a/drivers/rtc/rtc-bq4802.c b/drivers/rtc/rtc-bq4802.c
index 189a018bdf34..d00a274df8fc 100644
--- a/drivers/rtc/rtc-bq4802.c
+++ b/drivers/rtc/rtc-bq4802.c
@@ -71,14 +71,14 @@ static int bq4802_read_time(struct device *dev, struct rtc_time *tm)
71 71
72 spin_unlock_irqrestore(&p->lock, flags); 72 spin_unlock_irqrestore(&p->lock, flags);
73 73
74 BCD_TO_BIN(tm->tm_sec); 74 tm->tm_sec = bcd2bin(tm->tm_sec);
75 BCD_TO_BIN(tm->tm_min); 75 tm->tm_min = bcd2bin(tm->tm_min);
76 BCD_TO_BIN(tm->tm_hour); 76 tm->tm_hour = bcd2bin(tm->tm_hour);
77 BCD_TO_BIN(tm->tm_mday); 77 tm->tm_mday = bcd2bin(tm->tm_mday);
78 BCD_TO_BIN(tm->tm_mon); 78 tm->tm_mon = bcd2bin(tm->tm_mon);
79 BCD_TO_BIN(tm->tm_year); 79 tm->tm_year = bcd2bin(tm->tm_year);
80 BCD_TO_BIN(tm->tm_wday); 80 tm->tm_wday = bcd2bin(tm->tm_wday);
81 BCD_TO_BIN(century); 81 century = bcd2bin(century);
82 82
83 tm->tm_year += (century * 100); 83 tm->tm_year += (century * 100);
84 tm->tm_year -= 1900; 84 tm->tm_year -= 1900;
@@ -106,13 +106,13 @@ static int bq4802_set_time(struct device *dev, struct rtc_time *tm)
106 min = tm->tm_min; 106 min = tm->tm_min;
107 sec = tm->tm_sec; 107 sec = tm->tm_sec;
108 108
109 BIN_TO_BCD(sec); 109 sec = bin2bcd(sec);
110 BIN_TO_BCD(min); 110 min = bin2bcd(min);
111 BIN_TO_BCD(hrs); 111 hrs = bin2bcd(hrs);
112 BIN_TO_BCD(day); 112 day = bin2bcd(day);
113 BIN_TO_BCD(mon); 113 mon = bin2bcd(mon);
114 BIN_TO_BCD(yrs); 114 yrs = bin2bcd(yrs);
115 BIN_TO_BCD(century); 115 century = bin2bcd(century);
116 116
117 spin_lock_irqsave(&p->lock, flags); 117 spin_lock_irqsave(&p->lock, flags);
118 118
diff --git a/drivers/rtc/rtc-cmos.c b/drivers/rtc/rtc-cmos.c
index 963ad0b6a4e9..5549231179a2 100644
--- a/drivers/rtc/rtc-cmos.c
+++ b/drivers/rtc/rtc-cmos.c
@@ -143,6 +143,43 @@ static inline int hpet_unregister_irq_handler(irq_handler_t handler)
143 143
144/*----------------------------------------------------------------*/ 144/*----------------------------------------------------------------*/
145 145
146#ifdef RTC_PORT
147
148/* Most newer x86 systems have two register banks, the first used
149 * for RTC and NVRAM and the second only for NVRAM. Caller must
150 * own rtc_lock ... and we won't worry about access during NMI.
151 */
152#define can_bank2 true
153
154static inline unsigned char cmos_read_bank2(unsigned char addr)
155{
156 outb(addr, RTC_PORT(2));
157 return inb(RTC_PORT(3));
158}
159
160static inline void cmos_write_bank2(unsigned char val, unsigned char addr)
161{
162 outb(addr, RTC_PORT(2));
163 outb(val, RTC_PORT(2));
164}
165
166#else
167
168#define can_bank2 false
169
170static inline unsigned char cmos_read_bank2(unsigned char addr)
171{
172 return 0;
173}
174
175static inline void cmos_write_bank2(unsigned char val, unsigned char addr)
176{
177}
178
179#endif
180
181/*----------------------------------------------------------------*/
182
146static int cmos_read_time(struct device *dev, struct rtc_time *t) 183static int cmos_read_time(struct device *dev, struct rtc_time *t)
147{ 184{
148 /* REVISIT: if the clock has a "century" register, use 185 /* REVISIT: if the clock has a "century" register, use
@@ -203,26 +240,26 @@ static int cmos_read_alarm(struct device *dev, struct rtc_wkalrm *t)
203 /* REVISIT this assumes PC style usage: always BCD */ 240 /* REVISIT this assumes PC style usage: always BCD */
204 241
205 if (((unsigned)t->time.tm_sec) < 0x60) 242 if (((unsigned)t->time.tm_sec) < 0x60)
206 t->time.tm_sec = BCD2BIN(t->time.tm_sec); 243 t->time.tm_sec = bcd2bin(t->time.tm_sec);
207 else 244 else
208 t->time.tm_sec = -1; 245 t->time.tm_sec = -1;
209 if (((unsigned)t->time.tm_min) < 0x60) 246 if (((unsigned)t->time.tm_min) < 0x60)
210 t->time.tm_min = BCD2BIN(t->time.tm_min); 247 t->time.tm_min = bcd2bin(t->time.tm_min);
211 else 248 else
212 t->time.tm_min = -1; 249 t->time.tm_min = -1;
213 if (((unsigned)t->time.tm_hour) < 0x24) 250 if (((unsigned)t->time.tm_hour) < 0x24)
214 t->time.tm_hour = BCD2BIN(t->time.tm_hour); 251 t->time.tm_hour = bcd2bin(t->time.tm_hour);
215 else 252 else
216 t->time.tm_hour = -1; 253 t->time.tm_hour = -1;
217 254
218 if (cmos->day_alrm) { 255 if (cmos->day_alrm) {
219 if (((unsigned)t->time.tm_mday) <= 0x31) 256 if (((unsigned)t->time.tm_mday) <= 0x31)
220 t->time.tm_mday = BCD2BIN(t->time.tm_mday); 257 t->time.tm_mday = bcd2bin(t->time.tm_mday);
221 else 258 else
222 t->time.tm_mday = -1; 259 t->time.tm_mday = -1;
223 if (cmos->mon_alrm) { 260 if (cmos->mon_alrm) {
224 if (((unsigned)t->time.tm_mon) <= 0x12) 261 if (((unsigned)t->time.tm_mon) <= 0x12)
225 t->time.tm_mon = BCD2BIN(t->time.tm_mon) - 1; 262 t->time.tm_mon = bcd2bin(t->time.tm_mon) - 1;
226 else 263 else
227 t->time.tm_mon = -1; 264 t->time.tm_mon = -1;
228 } 265 }
@@ -294,19 +331,19 @@ static int cmos_set_alarm(struct device *dev, struct rtc_wkalrm *t)
294 /* Writing 0xff means "don't care" or "match all". */ 331 /* Writing 0xff means "don't care" or "match all". */
295 332
296 mon = t->time.tm_mon + 1; 333 mon = t->time.tm_mon + 1;
297 mon = (mon <= 12) ? BIN2BCD(mon) : 0xff; 334 mon = (mon <= 12) ? bin2bcd(mon) : 0xff;
298 335
299 mday = t->time.tm_mday; 336 mday = t->time.tm_mday;
300 mday = (mday >= 1 && mday <= 31) ? BIN2BCD(mday) : 0xff; 337 mday = (mday >= 1 && mday <= 31) ? bin2bcd(mday) : 0xff;
301 338
302 hrs = t->time.tm_hour; 339 hrs = t->time.tm_hour;
303 hrs = (hrs < 24) ? BIN2BCD(hrs) : 0xff; 340 hrs = (hrs < 24) ? bin2bcd(hrs) : 0xff;
304 341
305 min = t->time.tm_min; 342 min = t->time.tm_min;
306 min = (min < 60) ? BIN2BCD(min) : 0xff; 343 min = (min < 60) ? bin2bcd(min) : 0xff;
307 344
308 sec = t->time.tm_sec; 345 sec = t->time.tm_sec;
309 sec = (sec < 60) ? BIN2BCD(sec) : 0xff; 346 sec = (sec < 60) ? bin2bcd(sec) : 0xff;
310 347
311 spin_lock_irq(&rtc_lock); 348 spin_lock_irq(&rtc_lock);
312 349
@@ -491,12 +528,21 @@ cmos_nvram_read(struct kobject *kobj, struct bin_attribute *attr,
491 528
492 if (unlikely(off >= attr->size)) 529 if (unlikely(off >= attr->size))
493 return 0; 530 return 0;
531 if (unlikely(off < 0))
532 return -EINVAL;
494 if ((off + count) > attr->size) 533 if ((off + count) > attr->size)
495 count = attr->size - off; 534 count = attr->size - off;
496 535
536 off += NVRAM_OFFSET;
497 spin_lock_irq(&rtc_lock); 537 spin_lock_irq(&rtc_lock);
498 for (retval = 0, off += NVRAM_OFFSET; count--; retval++, off++) 538 for (retval = 0; count; count--, off++, retval++) {
499 *buf++ = CMOS_READ(off); 539 if (off < 128)
540 *buf++ = CMOS_READ(off);
541 else if (can_bank2)
542 *buf++ = cmos_read_bank2(off);
543 else
544 break;
545 }
500 spin_unlock_irq(&rtc_lock); 546 spin_unlock_irq(&rtc_lock);
501 547
502 return retval; 548 return retval;
@@ -512,6 +558,8 @@ cmos_nvram_write(struct kobject *kobj, struct bin_attribute *attr,
512 cmos = dev_get_drvdata(container_of(kobj, struct device, kobj)); 558 cmos = dev_get_drvdata(container_of(kobj, struct device, kobj));
513 if (unlikely(off >= attr->size)) 559 if (unlikely(off >= attr->size))
514 return -EFBIG; 560 return -EFBIG;
561 if (unlikely(off < 0))
562 return -EINVAL;
515 if ((off + count) > attr->size) 563 if ((off + count) > attr->size)
516 count = attr->size - off; 564 count = attr->size - off;
517 565
@@ -520,15 +568,20 @@ cmos_nvram_write(struct kobject *kobj, struct bin_attribute *attr,
520 * here. If userspace is smart enough to know what fields of 568 * here. If userspace is smart enough to know what fields of
521 * NVRAM to update, updating checksums is also part of its job. 569 * NVRAM to update, updating checksums is also part of its job.
522 */ 570 */
571 off += NVRAM_OFFSET;
523 spin_lock_irq(&rtc_lock); 572 spin_lock_irq(&rtc_lock);
524 for (retval = 0, off += NVRAM_OFFSET; count--; retval++, off++) { 573 for (retval = 0; count; count--, off++, retval++) {
525 /* don't trash RTC registers */ 574 /* don't trash RTC registers */
526 if (off == cmos->day_alrm 575 if (off == cmos->day_alrm
527 || off == cmos->mon_alrm 576 || off == cmos->mon_alrm
528 || off == cmos->century) 577 || off == cmos->century)
529 buf++; 578 buf++;
530 else 579 else if (off < 128)
531 CMOS_WRITE(*buf++, off); 580 CMOS_WRITE(*buf++, off);
581 else if (can_bank2)
582 cmos_write_bank2(*buf++, off);
583 else
584 break;
532 } 585 }
533 spin_unlock_irq(&rtc_lock); 586 spin_unlock_irq(&rtc_lock);
534 587
@@ -539,7 +592,6 @@ static struct bin_attribute nvram = {
539 .attr = { 592 .attr = {
540 .name = "nvram", 593 .name = "nvram",
541 .mode = S_IRUGO | S_IWUSR, 594 .mode = S_IRUGO | S_IWUSR,
542 .owner = THIS_MODULE,
543 }, 595 },
544 596
545 .read = cmos_nvram_read, 597 .read = cmos_nvram_read,
@@ -631,8 +683,8 @@ cmos_do_probe(struct device *dev, struct resource *ports, int rtc_irq)
631 683
632 /* Heuristic to deduce NVRAM size ... do what the legacy NVRAM 684 /* Heuristic to deduce NVRAM size ... do what the legacy NVRAM
633 * driver did, but don't reject unknown configs. Old hardware 685 * driver did, but don't reject unknown configs. Old hardware
634 * won't address 128 bytes, and for now we ignore the way newer 686 * won't address 128 bytes. Newer chips have multiple banks,
635 * chips can address 256 bytes (using two more i/o ports). 687 * though they may not be listed in one I/O resource.
636 */ 688 */
637#if defined(CONFIG_ATARI) 689#if defined(CONFIG_ATARI)
638 address_space = 64; 690 address_space = 64;
@@ -642,6 +694,8 @@ cmos_do_probe(struct device *dev, struct resource *ports, int rtc_irq)
642#warning Assuming 128 bytes of RTC+NVRAM address space, not 64 bytes. 694#warning Assuming 128 bytes of RTC+NVRAM address space, not 64 bytes.
643 address_space = 128; 695 address_space = 128;
644#endif 696#endif
697 if (can_bank2 && ports->end > (ports->start + 1))
698 address_space = 256;
645 699
646 /* For ACPI systems extension info comes from the FADT. On others, 700 /* For ACPI systems extension info comes from the FADT. On others,
647 * board specific setup provides it as appropriate. Systems where 701 * board specific setup provides it as appropriate. Systems where
@@ -740,7 +794,7 @@ cmos_do_probe(struct device *dev, struct resource *ports, int rtc_irq)
740 goto cleanup2; 794 goto cleanup2;
741 } 795 }
742 796
743 pr_info("%s: alarms up to one %s%s%s\n", 797 pr_info("%s: alarms up to one %s%s, %zd bytes nvram, %s irqs\n",
744 cmos_rtc.rtc->dev.bus_id, 798 cmos_rtc.rtc->dev.bus_id,
745 is_valid_irq(rtc_irq) 799 is_valid_irq(rtc_irq)
746 ? (cmos_rtc.mon_alrm 800 ? (cmos_rtc.mon_alrm
@@ -749,6 +803,7 @@ cmos_do_probe(struct device *dev, struct resource *ports, int rtc_irq)
749 ? "month" : "day")) 803 ? "month" : "day"))
750 : "no", 804 : "no",
751 cmos_rtc.century ? ", y3k" : "", 805 cmos_rtc.century ? ", y3k" : "",
806 nvram.size,
752 is_hpet_enabled() ? ", hpet irqs" : ""); 807 is_hpet_enabled() ? ", hpet irqs" : "");
753 808
754 return 0; 809 return 0;
diff --git a/drivers/rtc/rtc-ds1216.c b/drivers/rtc/rtc-ds1216.c
index 0b17770b032b..9a234a4ec06d 100644
--- a/drivers/rtc/rtc-ds1216.c
+++ b/drivers/rtc/rtc-ds1216.c
@@ -86,19 +86,19 @@ static int ds1216_rtc_read_time(struct device *dev, struct rtc_time *tm)
86 ds1216_switch_ds_to_clock(priv->ioaddr); 86 ds1216_switch_ds_to_clock(priv->ioaddr);
87 ds1216_read(priv->ioaddr, (u8 *)&regs); 87 ds1216_read(priv->ioaddr, (u8 *)&regs);
88 88
89 tm->tm_sec = BCD2BIN(regs.sec); 89 tm->tm_sec = bcd2bin(regs.sec);
90 tm->tm_min = BCD2BIN(regs.min); 90 tm->tm_min = bcd2bin(regs.min);
91 if (regs.hour & DS1216_HOUR_1224) { 91 if (regs.hour & DS1216_HOUR_1224) {
92 /* AM/PM mode */ 92 /* AM/PM mode */
93 tm->tm_hour = BCD2BIN(regs.hour & 0x1f); 93 tm->tm_hour = bcd2bin(regs.hour & 0x1f);
94 if (regs.hour & DS1216_HOUR_AMPM) 94 if (regs.hour & DS1216_HOUR_AMPM)
95 tm->tm_hour += 12; 95 tm->tm_hour += 12;
96 } else 96 } else
97 tm->tm_hour = BCD2BIN(regs.hour & 0x3f); 97 tm->tm_hour = bcd2bin(regs.hour & 0x3f);
98 tm->tm_wday = (regs.wday & 7) - 1; 98 tm->tm_wday = (regs.wday & 7) - 1;
99 tm->tm_mday = BCD2BIN(regs.mday & 0x3f); 99 tm->tm_mday = bcd2bin(regs.mday & 0x3f);
100 tm->tm_mon = BCD2BIN(regs.month & 0x1f); 100 tm->tm_mon = bcd2bin(regs.month & 0x1f);
101 tm->tm_year = BCD2BIN(regs.year); 101 tm->tm_year = bcd2bin(regs.year);
102 if (tm->tm_year < 70) 102 if (tm->tm_year < 70)
103 tm->tm_year += 100; 103 tm->tm_year += 100;
104 return 0; 104 return 0;
@@ -114,19 +114,19 @@ static int ds1216_rtc_set_time(struct device *dev, struct rtc_time *tm)
114 ds1216_read(priv->ioaddr, (u8 *)&regs); 114 ds1216_read(priv->ioaddr, (u8 *)&regs);
115 115
116 regs.tsec = 0; /* clear 0.1 and 0.01 seconds */ 116 regs.tsec = 0; /* clear 0.1 and 0.01 seconds */
117 regs.sec = BIN2BCD(tm->tm_sec); 117 regs.sec = bin2bcd(tm->tm_sec);
118 regs.min = BIN2BCD(tm->tm_min); 118 regs.min = bin2bcd(tm->tm_min);
119 regs.hour &= DS1216_HOUR_1224; 119 regs.hour &= DS1216_HOUR_1224;
120 if (regs.hour && tm->tm_hour > 12) { 120 if (regs.hour && tm->tm_hour > 12) {
121 regs.hour |= DS1216_HOUR_AMPM; 121 regs.hour |= DS1216_HOUR_AMPM;
122 tm->tm_hour -= 12; 122 tm->tm_hour -= 12;
123 } 123 }
124 regs.hour |= BIN2BCD(tm->tm_hour); 124 regs.hour |= bin2bcd(tm->tm_hour);
125 regs.wday &= ~7; 125 regs.wday &= ~7;
126 regs.wday |= tm->tm_wday; 126 regs.wday |= tm->tm_wday;
127 regs.mday = BIN2BCD(tm->tm_mday); 127 regs.mday = bin2bcd(tm->tm_mday);
128 regs.month = BIN2BCD(tm->tm_mon); 128 regs.month = bin2bcd(tm->tm_mon);
129 regs.year = BIN2BCD(tm->tm_year % 100); 129 regs.year = bin2bcd(tm->tm_year % 100);
130 130
131 ds1216_switch_ds_to_clock(priv->ioaddr); 131 ds1216_switch_ds_to_clock(priv->ioaddr);
132 ds1216_write(priv->ioaddr, (u8 *)&regs); 132 ds1216_write(priv->ioaddr, (u8 *)&regs);
diff --git a/drivers/rtc/rtc-ds1302.c b/drivers/rtc/rtc-ds1302.c
index 8f4e96bb229a..184556620778 100644
--- a/drivers/rtc/rtc-ds1302.c
+++ b/drivers/rtc/rtc-ds1302.c
@@ -107,13 +107,13 @@ static int ds1302_rtc_read_time(struct device *dev, struct rtc_time *tm)
107 107
108 spin_lock_irq(&rtc->lock); 108 spin_lock_irq(&rtc->lock);
109 109
110 tm->tm_sec = BCD2BIN(ds1302_readbyte(RTC_ADDR_SEC)); 110 tm->tm_sec = bcd2bin(ds1302_readbyte(RTC_ADDR_SEC));
111 tm->tm_min = BCD2BIN(ds1302_readbyte(RTC_ADDR_MIN)); 111 tm->tm_min = bcd2bin(ds1302_readbyte(RTC_ADDR_MIN));
112 tm->tm_hour = BCD2BIN(ds1302_readbyte(RTC_ADDR_HOUR)); 112 tm->tm_hour = bcd2bin(ds1302_readbyte(RTC_ADDR_HOUR));
113 tm->tm_wday = BCD2BIN(ds1302_readbyte(RTC_ADDR_DAY)); 113 tm->tm_wday = bcd2bin(ds1302_readbyte(RTC_ADDR_DAY));
114 tm->tm_mday = BCD2BIN(ds1302_readbyte(RTC_ADDR_DATE)); 114 tm->tm_mday = bcd2bin(ds1302_readbyte(RTC_ADDR_DATE));
115 tm->tm_mon = BCD2BIN(ds1302_readbyte(RTC_ADDR_MON)) - 1; 115 tm->tm_mon = bcd2bin(ds1302_readbyte(RTC_ADDR_MON)) - 1;
116 tm->tm_year = BCD2BIN(ds1302_readbyte(RTC_ADDR_YEAR)); 116 tm->tm_year = bcd2bin(ds1302_readbyte(RTC_ADDR_YEAR));
117 117
118 if (tm->tm_year < 70) 118 if (tm->tm_year < 70)
119 tm->tm_year += 100; 119 tm->tm_year += 100;
@@ -141,13 +141,13 @@ static int ds1302_rtc_set_time(struct device *dev, struct rtc_time *tm)
141 /* Stop RTC */ 141 /* Stop RTC */
142 ds1302_writebyte(RTC_ADDR_SEC, ds1302_readbyte(RTC_ADDR_SEC) | 0x80); 142 ds1302_writebyte(RTC_ADDR_SEC, ds1302_readbyte(RTC_ADDR_SEC) | 0x80);
143 143
144 ds1302_writebyte(RTC_ADDR_SEC, BIN2BCD(tm->tm_sec)); 144 ds1302_writebyte(RTC_ADDR_SEC, bin2bcd(tm->tm_sec));
145 ds1302_writebyte(RTC_ADDR_MIN, BIN2BCD(tm->tm_min)); 145 ds1302_writebyte(RTC_ADDR_MIN, bin2bcd(tm->tm_min));
146 ds1302_writebyte(RTC_ADDR_HOUR, BIN2BCD(tm->tm_hour)); 146 ds1302_writebyte(RTC_ADDR_HOUR, bin2bcd(tm->tm_hour));
147 ds1302_writebyte(RTC_ADDR_DAY, BIN2BCD(tm->tm_wday)); 147 ds1302_writebyte(RTC_ADDR_DAY, bin2bcd(tm->tm_wday));
148 ds1302_writebyte(RTC_ADDR_DATE, BIN2BCD(tm->tm_mday)); 148 ds1302_writebyte(RTC_ADDR_DATE, bin2bcd(tm->tm_mday));
149 ds1302_writebyte(RTC_ADDR_MON, BIN2BCD(tm->tm_mon + 1)); 149 ds1302_writebyte(RTC_ADDR_MON, bin2bcd(tm->tm_mon + 1));
150 ds1302_writebyte(RTC_ADDR_YEAR, BIN2BCD(tm->tm_year % 100)); 150 ds1302_writebyte(RTC_ADDR_YEAR, bin2bcd(tm->tm_year % 100));
151 151
152 /* Start RTC */ 152 /* Start RTC */
153 ds1302_writebyte(RTC_ADDR_SEC, ds1302_readbyte(RTC_ADDR_SEC) & ~0x80); 153 ds1302_writebyte(RTC_ADDR_SEC, ds1302_readbyte(RTC_ADDR_SEC) & ~0x80);
diff --git a/drivers/rtc/rtc-ds1305.c b/drivers/rtc/rtc-ds1305.c
index b91d02a3ace9..fc372df6534b 100644
--- a/drivers/rtc/rtc-ds1305.c
+++ b/drivers/rtc/rtc-ds1305.c
@@ -114,10 +114,10 @@ static unsigned bcd2hour(u8 bcd)
114 hour = 12; 114 hour = 12;
115 bcd &= ~DS1305_HR_PM; 115 bcd &= ~DS1305_HR_PM;
116 } 116 }
117 hour += BCD2BIN(bcd); 117 hour += bcd2bin(bcd);
118 return hour - 1; 118 return hour - 1;
119 } 119 }
120 return BCD2BIN(bcd); 120 return bcd2bin(bcd);
121} 121}
122 122
123static u8 hour2bcd(bool hr12, int hour) 123static u8 hour2bcd(bool hr12, int hour)
@@ -125,11 +125,11 @@ static u8 hour2bcd(bool hr12, int hour)
125 if (hr12) { 125 if (hr12) {
126 hour++; 126 hour++;
127 if (hour <= 12) 127 if (hour <= 12)
128 return DS1305_HR_12 | BIN2BCD(hour); 128 return DS1305_HR_12 | bin2bcd(hour);
129 hour -= 12; 129 hour -= 12;
130 return DS1305_HR_12 | DS1305_HR_PM | BIN2BCD(hour); 130 return DS1305_HR_12 | DS1305_HR_PM | bin2bcd(hour);
131 } 131 }
132 return BIN2BCD(hour); 132 return bin2bcd(hour);
133} 133}
134 134
135/*----------------------------------------------------------------------*/ 135/*----------------------------------------------------------------------*/
@@ -206,13 +206,13 @@ static int ds1305_get_time(struct device *dev, struct rtc_time *time)
206 buf[4], buf[5], buf[6]); 206 buf[4], buf[5], buf[6]);
207 207
208 /* Decode the registers */ 208 /* Decode the registers */
209 time->tm_sec = BCD2BIN(buf[DS1305_SEC]); 209 time->tm_sec = bcd2bin(buf[DS1305_SEC]);
210 time->tm_min = BCD2BIN(buf[DS1305_MIN]); 210 time->tm_min = bcd2bin(buf[DS1305_MIN]);
211 time->tm_hour = bcd2hour(buf[DS1305_HOUR]); 211 time->tm_hour = bcd2hour(buf[DS1305_HOUR]);
212 time->tm_wday = buf[DS1305_WDAY] - 1; 212 time->tm_wday = buf[DS1305_WDAY] - 1;
213 time->tm_mday = BCD2BIN(buf[DS1305_MDAY]); 213 time->tm_mday = bcd2bin(buf[DS1305_MDAY]);
214 time->tm_mon = BCD2BIN(buf[DS1305_MON]) - 1; 214 time->tm_mon = bcd2bin(buf[DS1305_MON]) - 1;
215 time->tm_year = BCD2BIN(buf[DS1305_YEAR]) + 100; 215 time->tm_year = bcd2bin(buf[DS1305_YEAR]) + 100;
216 216
217 dev_vdbg(dev, "%s secs=%d, mins=%d, " 217 dev_vdbg(dev, "%s secs=%d, mins=%d, "
218 "hours=%d, mday=%d, mon=%d, year=%d, wday=%d\n", 218 "hours=%d, mday=%d, mon=%d, year=%d, wday=%d\n",
@@ -239,13 +239,13 @@ static int ds1305_set_time(struct device *dev, struct rtc_time *time)
239 /* Write registers starting at the first time/date address. */ 239 /* Write registers starting at the first time/date address. */
240 *bp++ = DS1305_WRITE | DS1305_SEC; 240 *bp++ = DS1305_WRITE | DS1305_SEC;
241 241
242 *bp++ = BIN2BCD(time->tm_sec); 242 *bp++ = bin2bcd(time->tm_sec);
243 *bp++ = BIN2BCD(time->tm_min); 243 *bp++ = bin2bcd(time->tm_min);
244 *bp++ = hour2bcd(ds1305->hr12, time->tm_hour); 244 *bp++ = hour2bcd(ds1305->hr12, time->tm_hour);
245 *bp++ = (time->tm_wday < 7) ? (time->tm_wday + 1) : 1; 245 *bp++ = (time->tm_wday < 7) ? (time->tm_wday + 1) : 1;
246 *bp++ = BIN2BCD(time->tm_mday); 246 *bp++ = bin2bcd(time->tm_mday);
247 *bp++ = BIN2BCD(time->tm_mon + 1); 247 *bp++ = bin2bcd(time->tm_mon + 1);
248 *bp++ = BIN2BCD(time->tm_year - 100); 248 *bp++ = bin2bcd(time->tm_year - 100);
249 249
250 dev_dbg(dev, "%s: %02x %02x %02x, %02x %02x %02x %02x\n", 250 dev_dbg(dev, "%s: %02x %02x %02x, %02x %02x %02x %02x\n",
251 "write", buf[1], buf[2], buf[3], 251 "write", buf[1], buf[2], buf[3],
@@ -329,8 +329,8 @@ static int ds1305_get_alarm(struct device *dev, struct rtc_wkalrm *alm)
329 * fill in the rest ... and also handle rollover to tomorrow when 329 * fill in the rest ... and also handle rollover to tomorrow when
330 * that's needed. 330 * that's needed.
331 */ 331 */
332 alm->time.tm_sec = BCD2BIN(buf[DS1305_SEC]); 332 alm->time.tm_sec = bcd2bin(buf[DS1305_SEC]);
333 alm->time.tm_min = BCD2BIN(buf[DS1305_MIN]); 333 alm->time.tm_min = bcd2bin(buf[DS1305_MIN]);
334 alm->time.tm_hour = bcd2hour(buf[DS1305_HOUR]); 334 alm->time.tm_hour = bcd2hour(buf[DS1305_HOUR]);
335 alm->time.tm_mday = -1; 335 alm->time.tm_mday = -1;
336 alm->time.tm_mon = -1; 336 alm->time.tm_mon = -1;
@@ -387,8 +387,8 @@ static int ds1305_set_alarm(struct device *dev, struct rtc_wkalrm *alm)
387 387
388 /* write alarm */ 388 /* write alarm */
389 buf[0] = DS1305_WRITE | DS1305_ALM0(DS1305_SEC); 389 buf[0] = DS1305_WRITE | DS1305_ALM0(DS1305_SEC);
390 buf[1 + DS1305_SEC] = BIN2BCD(alm->time.tm_sec); 390 buf[1 + DS1305_SEC] = bin2bcd(alm->time.tm_sec);
391 buf[1 + DS1305_MIN] = BIN2BCD(alm->time.tm_min); 391 buf[1 + DS1305_MIN] = bin2bcd(alm->time.tm_min);
392 buf[1 + DS1305_HOUR] = hour2bcd(ds1305->hr12, alm->time.tm_hour); 392 buf[1 + DS1305_HOUR] = hour2bcd(ds1305->hr12, alm->time.tm_hour);
393 buf[1 + DS1305_WDAY] = DS1305_ALM_DISABLE; 393 buf[1 + DS1305_WDAY] = DS1305_ALM_DISABLE;
394 394
@@ -606,7 +606,6 @@ ds1305_nvram_write(struct kobject *kobj, struct bin_attribute *attr,
606static struct bin_attribute nvram = { 606static struct bin_attribute nvram = {
607 .attr.name = "nvram", 607 .attr.name = "nvram",
608 .attr.mode = S_IRUGO | S_IWUSR, 608 .attr.mode = S_IRUGO | S_IWUSR,
609 .attr.owner = THIS_MODULE,
610 .read = ds1305_nvram_read, 609 .read = ds1305_nvram_read,
611 .write = ds1305_nvram_write, 610 .write = ds1305_nvram_write,
612 .size = DS1305_NVRAM_LEN, 611 .size = DS1305_NVRAM_LEN,
diff --git a/drivers/rtc/rtc-ds1307.c b/drivers/rtc/rtc-ds1307.c
index 4fcf0734a6ef..162330b9d1dc 100644
--- a/drivers/rtc/rtc-ds1307.c
+++ b/drivers/rtc/rtc-ds1307.c
@@ -222,17 +222,17 @@ static int ds1307_get_time(struct device *dev, struct rtc_time *t)
222 ds1307->regs[4], ds1307->regs[5], 222 ds1307->regs[4], ds1307->regs[5],
223 ds1307->regs[6]); 223 ds1307->regs[6]);
224 224
225 t->tm_sec = BCD2BIN(ds1307->regs[DS1307_REG_SECS] & 0x7f); 225 t->tm_sec = bcd2bin(ds1307->regs[DS1307_REG_SECS] & 0x7f);
226 t->tm_min = BCD2BIN(ds1307->regs[DS1307_REG_MIN] & 0x7f); 226 t->tm_min = bcd2bin(ds1307->regs[DS1307_REG_MIN] & 0x7f);
227 tmp = ds1307->regs[DS1307_REG_HOUR] & 0x3f; 227 tmp = ds1307->regs[DS1307_REG_HOUR] & 0x3f;
228 t->tm_hour = BCD2BIN(tmp); 228 t->tm_hour = bcd2bin(tmp);
229 t->tm_wday = BCD2BIN(ds1307->regs[DS1307_REG_WDAY] & 0x07) - 1; 229 t->tm_wday = bcd2bin(ds1307->regs[DS1307_REG_WDAY] & 0x07) - 1;
230 t->tm_mday = BCD2BIN(ds1307->regs[DS1307_REG_MDAY] & 0x3f); 230 t->tm_mday = bcd2bin(ds1307->regs[DS1307_REG_MDAY] & 0x3f);
231 tmp = ds1307->regs[DS1307_REG_MONTH] & 0x1f; 231 tmp = ds1307->regs[DS1307_REG_MONTH] & 0x1f;
232 t->tm_mon = BCD2BIN(tmp) - 1; 232 t->tm_mon = bcd2bin(tmp) - 1;
233 233
234 /* assume 20YY not 19YY, and ignore DS1337_BIT_CENTURY */ 234 /* assume 20YY not 19YY, and ignore DS1337_BIT_CENTURY */
235 t->tm_year = BCD2BIN(ds1307->regs[DS1307_REG_YEAR]) + 100; 235 t->tm_year = bcd2bin(ds1307->regs[DS1307_REG_YEAR]) + 100;
236 236
237 dev_dbg(dev, "%s secs=%d, mins=%d, " 237 dev_dbg(dev, "%s secs=%d, mins=%d, "
238 "hours=%d, mday=%d, mon=%d, year=%d, wday=%d\n", 238 "hours=%d, mday=%d, mon=%d, year=%d, wday=%d\n",
@@ -258,16 +258,16 @@ static int ds1307_set_time(struct device *dev, struct rtc_time *t)
258 t->tm_mon, t->tm_year, t->tm_wday); 258 t->tm_mon, t->tm_year, t->tm_wday);
259 259
260 *buf++ = 0; /* first register addr */ 260 *buf++ = 0; /* first register addr */
261 buf[DS1307_REG_SECS] = BIN2BCD(t->tm_sec); 261 buf[DS1307_REG_SECS] = bin2bcd(t->tm_sec);
262 buf[DS1307_REG_MIN] = BIN2BCD(t->tm_min); 262 buf[DS1307_REG_MIN] = bin2bcd(t->tm_min);
263 buf[DS1307_REG_HOUR] = BIN2BCD(t->tm_hour); 263 buf[DS1307_REG_HOUR] = bin2bcd(t->tm_hour);
264 buf[DS1307_REG_WDAY] = BIN2BCD(t->tm_wday + 1); 264 buf[DS1307_REG_WDAY] = bin2bcd(t->tm_wday + 1);
265 buf[DS1307_REG_MDAY] = BIN2BCD(t->tm_mday); 265 buf[DS1307_REG_MDAY] = bin2bcd(t->tm_mday);
266 buf[DS1307_REG_MONTH] = BIN2BCD(t->tm_mon + 1); 266 buf[DS1307_REG_MONTH] = bin2bcd(t->tm_mon + 1);
267 267
268 /* assume 20YY not 19YY */ 268 /* assume 20YY not 19YY */
269 tmp = t->tm_year - 100; 269 tmp = t->tm_year - 100;
270 buf[DS1307_REG_YEAR] = BIN2BCD(tmp); 270 buf[DS1307_REG_YEAR] = bin2bcd(tmp);
271 271
272 switch (ds1307->type) { 272 switch (ds1307->type) {
273 case ds_1337: 273 case ds_1337:
@@ -551,7 +551,6 @@ static struct bin_attribute nvram = {
551 .attr = { 551 .attr = {
552 .name = "nvram", 552 .name = "nvram",
553 .mode = S_IRUGO | S_IWUSR, 553 .mode = S_IRUGO | S_IWUSR,
554 .owner = THIS_MODULE,
555 }, 554 },
556 555
557 .read = ds1307_nvram_read, 556 .read = ds1307_nvram_read,
@@ -709,18 +708,18 @@ read_rtc:
709 } 708 }
710 709
711 tmp = ds1307->regs[DS1307_REG_SECS]; 710 tmp = ds1307->regs[DS1307_REG_SECS];
712 tmp = BCD2BIN(tmp & 0x7f); 711 tmp = bcd2bin(tmp & 0x7f);
713 if (tmp > 60) 712 if (tmp > 60)
714 goto exit_bad; 713 goto exit_bad;
715 tmp = BCD2BIN(ds1307->regs[DS1307_REG_MIN] & 0x7f); 714 tmp = bcd2bin(ds1307->regs[DS1307_REG_MIN] & 0x7f);
716 if (tmp > 60) 715 if (tmp > 60)
717 goto exit_bad; 716 goto exit_bad;
718 717
719 tmp = BCD2BIN(ds1307->regs[DS1307_REG_MDAY] & 0x3f); 718 tmp = bcd2bin(ds1307->regs[DS1307_REG_MDAY] & 0x3f);
720 if (tmp == 0 || tmp > 31) 719 if (tmp == 0 || tmp > 31)
721 goto exit_bad; 720 goto exit_bad;
722 721
723 tmp = BCD2BIN(ds1307->regs[DS1307_REG_MONTH] & 0x1f); 722 tmp = bcd2bin(ds1307->regs[DS1307_REG_MONTH] & 0x1f);
724 if (tmp == 0 || tmp > 12) 723 if (tmp == 0 || tmp > 12)
725 goto exit_bad; 724 goto exit_bad;
726 725
@@ -739,14 +738,14 @@ read_rtc:
739 /* Be sure we're in 24 hour mode. Multi-master systems 738 /* Be sure we're in 24 hour mode. Multi-master systems
740 * take note... 739 * take note...
741 */ 740 */
742 tmp = BCD2BIN(tmp & 0x1f); 741 tmp = bcd2bin(tmp & 0x1f);
743 if (tmp == 12) 742 if (tmp == 12)
744 tmp = 0; 743 tmp = 0;
745 if (ds1307->regs[DS1307_REG_HOUR] & DS1307_BIT_PM) 744 if (ds1307->regs[DS1307_REG_HOUR] & DS1307_BIT_PM)
746 tmp += 12; 745 tmp += 12;
747 i2c_smbus_write_byte_data(client, 746 i2c_smbus_write_byte_data(client,
748 DS1307_REG_HOUR, 747 DS1307_REG_HOUR,
749 BIN2BCD(tmp)); 748 bin2bcd(tmp));
750 } 749 }
751 750
752 ds1307->rtc = rtc_device_register(client->name, &client->dev, 751 ds1307->rtc = rtc_device_register(client->name, &client->dev,
diff --git a/drivers/rtc/rtc-ds1511.c b/drivers/rtc/rtc-ds1511.c
index 86981d34fbb6..25caada78398 100644
--- a/drivers/rtc/rtc-ds1511.c
+++ b/drivers/rtc/rtc-ds1511.c
@@ -153,8 +153,8 @@ ds1511_wdog_set(unsigned long deciseconds)
153 /* 153 /*
154 * set the wdog values in the wdog registers 154 * set the wdog values in the wdog registers
155 */ 155 */
156 rtc_write(BIN2BCD(deciseconds % 100), DS1511_WD_MSEC); 156 rtc_write(bin2bcd(deciseconds % 100), DS1511_WD_MSEC);
157 rtc_write(BIN2BCD(deciseconds / 100), DS1511_WD_SEC); 157 rtc_write(bin2bcd(deciseconds / 100), DS1511_WD_SEC);
158 /* 158 /*
159 * set wdog enable and wdog 'steering' bit to issue a reset 159 * set wdog enable and wdog 'steering' bit to issue a reset
160 */ 160 */
@@ -220,13 +220,13 @@ static int ds1511_rtc_set_time(struct device *dev, struct rtc_time *rtc_tm)
220 /* 220 /*
221 * each register is a different number of valid bits 221 * each register is a different number of valid bits
222 */ 222 */
223 sec = BIN2BCD(sec) & 0x7f; 223 sec = bin2bcd(sec) & 0x7f;
224 min = BIN2BCD(min) & 0x7f; 224 min = bin2bcd(min) & 0x7f;
225 hrs = BIN2BCD(hrs) & 0x3f; 225 hrs = bin2bcd(hrs) & 0x3f;
226 day = BIN2BCD(day) & 0x3f; 226 day = bin2bcd(day) & 0x3f;
227 mon = BIN2BCD(mon) & 0x1f; 227 mon = bin2bcd(mon) & 0x1f;
228 yrs = BIN2BCD(yrs) & 0xff; 228 yrs = bin2bcd(yrs) & 0xff;
229 cen = BIN2BCD(cen) & 0xff; 229 cen = bin2bcd(cen) & 0xff;
230 230
231 spin_lock_irqsave(&ds1511_lock, flags); 231 spin_lock_irqsave(&ds1511_lock, flags);
232 rtc_disable_update(); 232 rtc_disable_update();
@@ -264,14 +264,14 @@ static int ds1511_rtc_read_time(struct device *dev, struct rtc_time *rtc_tm)
264 rtc_enable_update(); 264 rtc_enable_update();
265 spin_unlock_irqrestore(&ds1511_lock, flags); 265 spin_unlock_irqrestore(&ds1511_lock, flags);
266 266
267 rtc_tm->tm_sec = BCD2BIN(rtc_tm->tm_sec); 267 rtc_tm->tm_sec = bcd2bin(rtc_tm->tm_sec);
268 rtc_tm->tm_min = BCD2BIN(rtc_tm->tm_min); 268 rtc_tm->tm_min = bcd2bin(rtc_tm->tm_min);
269 rtc_tm->tm_hour = BCD2BIN(rtc_tm->tm_hour); 269 rtc_tm->tm_hour = bcd2bin(rtc_tm->tm_hour);
270 rtc_tm->tm_mday = BCD2BIN(rtc_tm->tm_mday); 270 rtc_tm->tm_mday = bcd2bin(rtc_tm->tm_mday);
271 rtc_tm->tm_wday = BCD2BIN(rtc_tm->tm_wday); 271 rtc_tm->tm_wday = bcd2bin(rtc_tm->tm_wday);
272 rtc_tm->tm_mon = BCD2BIN(rtc_tm->tm_mon); 272 rtc_tm->tm_mon = bcd2bin(rtc_tm->tm_mon);
273 rtc_tm->tm_year = BCD2BIN(rtc_tm->tm_year); 273 rtc_tm->tm_year = bcd2bin(rtc_tm->tm_year);
274 century = BCD2BIN(century) * 100; 274 century = bcd2bin(century) * 100;
275 275
276 /* 276 /*
277 * Account for differences between how the RTC uses the values 277 * Account for differences between how the RTC uses the values
@@ -304,16 +304,16 @@ ds1511_rtc_update_alarm(struct rtc_plat_data *pdata)
304 304
305 spin_lock_irqsave(&pdata->rtc->irq_lock, flags); 305 spin_lock_irqsave(&pdata->rtc->irq_lock, flags);
306 rtc_write(pdata->alrm_mday < 0 || (pdata->irqen & RTC_UF) ? 306 rtc_write(pdata->alrm_mday < 0 || (pdata->irqen & RTC_UF) ?
307 0x80 : BIN2BCD(pdata->alrm_mday) & 0x3f, 307 0x80 : bin2bcd(pdata->alrm_mday) & 0x3f,
308 RTC_ALARM_DATE); 308 RTC_ALARM_DATE);
309 rtc_write(pdata->alrm_hour < 0 || (pdata->irqen & RTC_UF) ? 309 rtc_write(pdata->alrm_hour < 0 || (pdata->irqen & RTC_UF) ?
310 0x80 : BIN2BCD(pdata->alrm_hour) & 0x3f, 310 0x80 : bin2bcd(pdata->alrm_hour) & 0x3f,
311 RTC_ALARM_HOUR); 311 RTC_ALARM_HOUR);
312 rtc_write(pdata->alrm_min < 0 || (pdata->irqen & RTC_UF) ? 312 rtc_write(pdata->alrm_min < 0 || (pdata->irqen & RTC_UF) ?
313 0x80 : BIN2BCD(pdata->alrm_min) & 0x7f, 313 0x80 : bin2bcd(pdata->alrm_min) & 0x7f,
314 RTC_ALARM_MIN); 314 RTC_ALARM_MIN);
315 rtc_write(pdata->alrm_sec < 0 || (pdata->irqen & RTC_UF) ? 315 rtc_write(pdata->alrm_sec < 0 || (pdata->irqen & RTC_UF) ?
316 0x80 : BIN2BCD(pdata->alrm_sec) & 0x7f, 316 0x80 : bin2bcd(pdata->alrm_sec) & 0x7f,
317 RTC_ALARM_SEC); 317 RTC_ALARM_SEC);
318 rtc_write(rtc_read(RTC_CMD) | (pdata->irqen ? RTC_TIE : 0), RTC_CMD); 318 rtc_write(rtc_read(RTC_CMD) | (pdata->irqen ? RTC_TIE : 0), RTC_CMD);
319 rtc_read(RTC_CMD1); /* clear interrupts */ 319 rtc_read(RTC_CMD1); /* clear interrupts */
@@ -481,7 +481,6 @@ static struct bin_attribute ds1511_nvram_attr = {
481 .attr = { 481 .attr = {
482 .name = "nvram", 482 .name = "nvram",
483 .mode = S_IRUGO | S_IWUGO, 483 .mode = S_IRUGO | S_IWUGO,
484 .owner = THIS_MODULE,
485 }, 484 },
486 .size = DS1511_RAM_MAX, 485 .size = DS1511_RAM_MAX,
487 .read = ds1511_nvram_read, 486 .read = ds1511_nvram_read,
diff --git a/drivers/rtc/rtc-ds1553.c b/drivers/rtc/rtc-ds1553.c
index 4ef59285b489..b9475cd20210 100644
--- a/drivers/rtc/rtc-ds1553.c
+++ b/drivers/rtc/rtc-ds1553.c
@@ -78,17 +78,17 @@ static int ds1553_rtc_set_time(struct device *dev, struct rtc_time *tm)
78 void __iomem *ioaddr = pdata->ioaddr; 78 void __iomem *ioaddr = pdata->ioaddr;
79 u8 century; 79 u8 century;
80 80
81 century = BIN2BCD((tm->tm_year + 1900) / 100); 81 century = bin2bcd((tm->tm_year + 1900) / 100);
82 82
83 writeb(RTC_WRITE, pdata->ioaddr + RTC_CONTROL); 83 writeb(RTC_WRITE, pdata->ioaddr + RTC_CONTROL);
84 84
85 writeb(BIN2BCD(tm->tm_year % 100), ioaddr + RTC_YEAR); 85 writeb(bin2bcd(tm->tm_year % 100), ioaddr + RTC_YEAR);
86 writeb(BIN2BCD(tm->tm_mon + 1), ioaddr + RTC_MONTH); 86 writeb(bin2bcd(tm->tm_mon + 1), ioaddr + RTC_MONTH);
87 writeb(BIN2BCD(tm->tm_wday) & RTC_DAY_MASK, ioaddr + RTC_DAY); 87 writeb(bin2bcd(tm->tm_wday) & RTC_DAY_MASK, ioaddr + RTC_DAY);
88 writeb(BIN2BCD(tm->tm_mday), ioaddr + RTC_DATE); 88 writeb(bin2bcd(tm->tm_mday), ioaddr + RTC_DATE);
89 writeb(BIN2BCD(tm->tm_hour), ioaddr + RTC_HOURS); 89 writeb(bin2bcd(tm->tm_hour), ioaddr + RTC_HOURS);
90 writeb(BIN2BCD(tm->tm_min), ioaddr + RTC_MINUTES); 90 writeb(bin2bcd(tm->tm_min), ioaddr + RTC_MINUTES);
91 writeb(BIN2BCD(tm->tm_sec) & RTC_SECONDS_MASK, ioaddr + RTC_SECONDS); 91 writeb(bin2bcd(tm->tm_sec) & RTC_SECONDS_MASK, ioaddr + RTC_SECONDS);
92 92
93 /* RTC_CENTURY and RTC_CONTROL share same register */ 93 /* RTC_CENTURY and RTC_CONTROL share same register */
94 writeb(RTC_WRITE | (century & RTC_CENTURY_MASK), ioaddr + RTC_CENTURY); 94 writeb(RTC_WRITE | (century & RTC_CENTURY_MASK), ioaddr + RTC_CENTURY);
@@ -118,14 +118,14 @@ static int ds1553_rtc_read_time(struct device *dev, struct rtc_time *tm)
118 year = readb(ioaddr + RTC_YEAR); 118 year = readb(ioaddr + RTC_YEAR);
119 century = readb(ioaddr + RTC_CENTURY) & RTC_CENTURY_MASK; 119 century = readb(ioaddr + RTC_CENTURY) & RTC_CENTURY_MASK;
120 writeb(0, ioaddr + RTC_CONTROL); 120 writeb(0, ioaddr + RTC_CONTROL);
121 tm->tm_sec = BCD2BIN(second); 121 tm->tm_sec = bcd2bin(second);
122 tm->tm_min = BCD2BIN(minute); 122 tm->tm_min = bcd2bin(minute);
123 tm->tm_hour = BCD2BIN(hour); 123 tm->tm_hour = bcd2bin(hour);
124 tm->tm_mday = BCD2BIN(day); 124 tm->tm_mday = bcd2bin(day);
125 tm->tm_wday = BCD2BIN(week); 125 tm->tm_wday = bcd2bin(week);
126 tm->tm_mon = BCD2BIN(month) - 1; 126 tm->tm_mon = bcd2bin(month) - 1;
127 /* year is 1900 + tm->tm_year */ 127 /* year is 1900 + tm->tm_year */
128 tm->tm_year = BCD2BIN(year) + BCD2BIN(century) * 100 - 1900; 128 tm->tm_year = bcd2bin(year) + bcd2bin(century) * 100 - 1900;
129 129
130 if (rtc_valid_tm(tm) < 0) { 130 if (rtc_valid_tm(tm) < 0) {
131 dev_err(dev, "retrieved date/time is not valid.\n"); 131 dev_err(dev, "retrieved date/time is not valid.\n");
@@ -141,16 +141,16 @@ static void ds1553_rtc_update_alarm(struct rtc_plat_data *pdata)
141 141
142 spin_lock_irqsave(&pdata->rtc->irq_lock, flags); 142 spin_lock_irqsave(&pdata->rtc->irq_lock, flags);
143 writeb(pdata->alrm_mday < 0 || (pdata->irqen & RTC_UF) ? 143 writeb(pdata->alrm_mday < 0 || (pdata->irqen & RTC_UF) ?
144 0x80 : BIN2BCD(pdata->alrm_mday), 144 0x80 : bin2bcd(pdata->alrm_mday),
145 ioaddr + RTC_DATE_ALARM); 145 ioaddr + RTC_DATE_ALARM);
146 writeb(pdata->alrm_hour < 0 || (pdata->irqen & RTC_UF) ? 146 writeb(pdata->alrm_hour < 0 || (pdata->irqen & RTC_UF) ?
147 0x80 : BIN2BCD(pdata->alrm_hour), 147 0x80 : bin2bcd(pdata->alrm_hour),
148 ioaddr + RTC_HOURS_ALARM); 148 ioaddr + RTC_HOURS_ALARM);
149 writeb(pdata->alrm_min < 0 || (pdata->irqen & RTC_UF) ? 149 writeb(pdata->alrm_min < 0 || (pdata->irqen & RTC_UF) ?
150 0x80 : BIN2BCD(pdata->alrm_min), 150 0x80 : bin2bcd(pdata->alrm_min),
151 ioaddr + RTC_MINUTES_ALARM); 151 ioaddr + RTC_MINUTES_ALARM);
152 writeb(pdata->alrm_sec < 0 || (pdata->irqen & RTC_UF) ? 152 writeb(pdata->alrm_sec < 0 || (pdata->irqen & RTC_UF) ?
153 0x80 : BIN2BCD(pdata->alrm_sec), 153 0x80 : bin2bcd(pdata->alrm_sec),
154 ioaddr + RTC_SECONDS_ALARM); 154 ioaddr + RTC_SECONDS_ALARM);
155 writeb(pdata->irqen ? RTC_INTS_AE : 0, ioaddr + RTC_INTERRUPTS); 155 writeb(pdata->irqen ? RTC_INTS_AE : 0, ioaddr + RTC_INTERRUPTS);
156 readb(ioaddr + RTC_FLAGS); /* clear interrupts */ 156 readb(ioaddr + RTC_FLAGS); /* clear interrupts */
diff --git a/drivers/rtc/rtc-ds1742.c b/drivers/rtc/rtc-ds1742.c
index 24d35ede2dbf..8bc8501bffc8 100644
--- a/drivers/rtc/rtc-ds1742.c
+++ b/drivers/rtc/rtc-ds1742.c
@@ -66,17 +66,17 @@ static int ds1742_rtc_set_time(struct device *dev, struct rtc_time *tm)
66 void __iomem *ioaddr = pdata->ioaddr_rtc; 66 void __iomem *ioaddr = pdata->ioaddr_rtc;
67 u8 century; 67 u8 century;
68 68
69 century = BIN2BCD((tm->tm_year + 1900) / 100); 69 century = bin2bcd((tm->tm_year + 1900) / 100);
70 70
71 writeb(RTC_WRITE, ioaddr + RTC_CONTROL); 71 writeb(RTC_WRITE, ioaddr + RTC_CONTROL);
72 72
73 writeb(BIN2BCD(tm->tm_year % 100), ioaddr + RTC_YEAR); 73 writeb(bin2bcd(tm->tm_year % 100), ioaddr + RTC_YEAR);
74 writeb(BIN2BCD(tm->tm_mon + 1), ioaddr + RTC_MONTH); 74 writeb(bin2bcd(tm->tm_mon + 1), ioaddr + RTC_MONTH);
75 writeb(BIN2BCD(tm->tm_wday) & RTC_DAY_MASK, ioaddr + RTC_DAY); 75 writeb(bin2bcd(tm->tm_wday) & RTC_DAY_MASK, ioaddr + RTC_DAY);
76 writeb(BIN2BCD(tm->tm_mday), ioaddr + RTC_DATE); 76 writeb(bin2bcd(tm->tm_mday), ioaddr + RTC_DATE);
77 writeb(BIN2BCD(tm->tm_hour), ioaddr + RTC_HOURS); 77 writeb(bin2bcd(tm->tm_hour), ioaddr + RTC_HOURS);
78 writeb(BIN2BCD(tm->tm_min), ioaddr + RTC_MINUTES); 78 writeb(bin2bcd(tm->tm_min), ioaddr + RTC_MINUTES);
79 writeb(BIN2BCD(tm->tm_sec) & RTC_SECONDS_MASK, ioaddr + RTC_SECONDS); 79 writeb(bin2bcd(tm->tm_sec) & RTC_SECONDS_MASK, ioaddr + RTC_SECONDS);
80 80
81 /* RTC_CENTURY and RTC_CONTROL share same register */ 81 /* RTC_CENTURY and RTC_CONTROL share same register */
82 writeb(RTC_WRITE | (century & RTC_CENTURY_MASK), ioaddr + RTC_CENTURY); 82 writeb(RTC_WRITE | (century & RTC_CENTURY_MASK), ioaddr + RTC_CENTURY);
@@ -106,14 +106,14 @@ static int ds1742_rtc_read_time(struct device *dev, struct rtc_time *tm)
106 year = readb(ioaddr + RTC_YEAR); 106 year = readb(ioaddr + RTC_YEAR);
107 century = readb(ioaddr + RTC_CENTURY) & RTC_CENTURY_MASK; 107 century = readb(ioaddr + RTC_CENTURY) & RTC_CENTURY_MASK;
108 writeb(0, ioaddr + RTC_CONTROL); 108 writeb(0, ioaddr + RTC_CONTROL);
109 tm->tm_sec = BCD2BIN(second); 109 tm->tm_sec = bcd2bin(second);
110 tm->tm_min = BCD2BIN(minute); 110 tm->tm_min = bcd2bin(minute);
111 tm->tm_hour = BCD2BIN(hour); 111 tm->tm_hour = bcd2bin(hour);
112 tm->tm_mday = BCD2BIN(day); 112 tm->tm_mday = bcd2bin(day);
113 tm->tm_wday = BCD2BIN(week); 113 tm->tm_wday = bcd2bin(week);
114 tm->tm_mon = BCD2BIN(month) - 1; 114 tm->tm_mon = bcd2bin(month) - 1;
115 /* year is 1900 + tm->tm_year */ 115 /* year is 1900 + tm->tm_year */
116 tm->tm_year = BCD2BIN(year) + BCD2BIN(century) * 100 - 1900; 116 tm->tm_year = bcd2bin(year) + bcd2bin(century) * 100 - 1900;
117 117
118 if (rtc_valid_tm(tm) < 0) { 118 if (rtc_valid_tm(tm) < 0) {
119 dev_err(dev, "retrieved date/time is not valid.\n"); 119 dev_err(dev, "retrieved date/time is not valid.\n");
diff --git a/drivers/rtc/rtc-fm3130.c b/drivers/rtc/rtc-fm3130.c
index abfdfcbaa059..3a7be11cc6b9 100644
--- a/drivers/rtc/rtc-fm3130.c
+++ b/drivers/rtc/rtc-fm3130.c
@@ -131,17 +131,17 @@ static int fm3130_get_time(struct device *dev, struct rtc_time *t)
131 fm3130->regs[0xc], fm3130->regs[0xd], 131 fm3130->regs[0xc], fm3130->regs[0xd],
132 fm3130->regs[0xe]); 132 fm3130->regs[0xe]);
133 133
134 t->tm_sec = BCD2BIN(fm3130->regs[FM3130_RTC_SECONDS] & 0x7f); 134 t->tm_sec = bcd2bin(fm3130->regs[FM3130_RTC_SECONDS] & 0x7f);
135 t->tm_min = BCD2BIN(fm3130->regs[FM3130_RTC_MINUTES] & 0x7f); 135 t->tm_min = bcd2bin(fm3130->regs[FM3130_RTC_MINUTES] & 0x7f);
136 tmp = fm3130->regs[FM3130_RTC_HOURS] & 0x3f; 136 tmp = fm3130->regs[FM3130_RTC_HOURS] & 0x3f;
137 t->tm_hour = BCD2BIN(tmp); 137 t->tm_hour = bcd2bin(tmp);
138 t->tm_wday = BCD2BIN(fm3130->regs[FM3130_RTC_DAY] & 0x07) - 1; 138 t->tm_wday = bcd2bin(fm3130->regs[FM3130_RTC_DAY] & 0x07) - 1;
139 t->tm_mday = BCD2BIN(fm3130->regs[FM3130_RTC_DATE] & 0x3f); 139 t->tm_mday = bcd2bin(fm3130->regs[FM3130_RTC_DATE] & 0x3f);
140 tmp = fm3130->regs[FM3130_RTC_MONTHS] & 0x1f; 140 tmp = fm3130->regs[FM3130_RTC_MONTHS] & 0x1f;
141 t->tm_mon = BCD2BIN(tmp) - 1; 141 t->tm_mon = bcd2bin(tmp) - 1;
142 142
143 /* assume 20YY not 19YY, and ignore CF bit */ 143 /* assume 20YY not 19YY, and ignore CF bit */
144 t->tm_year = BCD2BIN(fm3130->regs[FM3130_RTC_YEARS]) + 100; 144 t->tm_year = bcd2bin(fm3130->regs[FM3130_RTC_YEARS]) + 100;
145 145
146 dev_dbg(dev, "%s secs=%d, mins=%d, " 146 dev_dbg(dev, "%s secs=%d, mins=%d, "
147 "hours=%d, mday=%d, mon=%d, year=%d, wday=%d\n", 147 "hours=%d, mday=%d, mon=%d, year=%d, wday=%d\n",
@@ -167,16 +167,16 @@ static int fm3130_set_time(struct device *dev, struct rtc_time *t)
167 t->tm_mon, t->tm_year, t->tm_wday); 167 t->tm_mon, t->tm_year, t->tm_wday);
168 168
169 /* first register addr */ 169 /* first register addr */
170 buf[FM3130_RTC_SECONDS] = BIN2BCD(t->tm_sec); 170 buf[FM3130_RTC_SECONDS] = bin2bcd(t->tm_sec);
171 buf[FM3130_RTC_MINUTES] = BIN2BCD(t->tm_min); 171 buf[FM3130_RTC_MINUTES] = bin2bcd(t->tm_min);
172 buf[FM3130_RTC_HOURS] = BIN2BCD(t->tm_hour); 172 buf[FM3130_RTC_HOURS] = bin2bcd(t->tm_hour);
173 buf[FM3130_RTC_DAY] = BIN2BCD(t->tm_wday + 1); 173 buf[FM3130_RTC_DAY] = bin2bcd(t->tm_wday + 1);
174 buf[FM3130_RTC_DATE] = BIN2BCD(t->tm_mday); 174 buf[FM3130_RTC_DATE] = bin2bcd(t->tm_mday);
175 buf[FM3130_RTC_MONTHS] = BIN2BCD(t->tm_mon + 1); 175 buf[FM3130_RTC_MONTHS] = bin2bcd(t->tm_mon + 1);
176 176
177 /* assume 20YY not 19YY */ 177 /* assume 20YY not 19YY */
178 tmp = t->tm_year - 100; 178 tmp = t->tm_year - 100;
179 buf[FM3130_RTC_YEARS] = BIN2BCD(tmp); 179 buf[FM3130_RTC_YEARS] = bin2bcd(tmp);
180 180
181 dev_dbg(dev, "%s: %02x %02x %02x %02x %02x %02x %02x" 181 dev_dbg(dev, "%s: %02x %02x %02x %02x %02x %02x %02x"
182 "%02x %02x %02x %02x %02x %02x %02x %02x\n", 182 "%02x %02x %02x %02x %02x %02x %02x %02x\n",
@@ -222,11 +222,11 @@ static int fm3130_read_alarm(struct device *dev, struct rtc_wkalrm *alrm)
222 fm3130->regs[FM3130_ALARM_MONTHS]); 222 fm3130->regs[FM3130_ALARM_MONTHS]);
223 223
224 224
225 tm->tm_sec = BCD2BIN(fm3130->regs[FM3130_ALARM_SECONDS] & 0x7F); 225 tm->tm_sec = bcd2bin(fm3130->regs[FM3130_ALARM_SECONDS] & 0x7F);
226 tm->tm_min = BCD2BIN(fm3130->regs[FM3130_ALARM_MINUTES] & 0x7F); 226 tm->tm_min = bcd2bin(fm3130->regs[FM3130_ALARM_MINUTES] & 0x7F);
227 tm->tm_hour = BCD2BIN(fm3130->regs[FM3130_ALARM_HOURS] & 0x3F); 227 tm->tm_hour = bcd2bin(fm3130->regs[FM3130_ALARM_HOURS] & 0x3F);
228 tm->tm_mday = BCD2BIN(fm3130->regs[FM3130_ALARM_DATE] & 0x3F); 228 tm->tm_mday = bcd2bin(fm3130->regs[FM3130_ALARM_DATE] & 0x3F);
229 tm->tm_mon = BCD2BIN(fm3130->regs[FM3130_ALARM_MONTHS] & 0x1F); 229 tm->tm_mon = bcd2bin(fm3130->regs[FM3130_ALARM_MONTHS] & 0x1F);
230 if (tm->tm_mon > 0) 230 if (tm->tm_mon > 0)
231 tm->tm_mon -= 1; /* RTC is 1-12, tm_mon is 0-11 */ 231 tm->tm_mon -= 1; /* RTC is 1-12, tm_mon is 0-11 */
232 dev_dbg(dev, "%s secs=%d, mins=%d, " 232 dev_dbg(dev, "%s secs=%d, mins=%d, "
@@ -252,23 +252,23 @@ static int fm3130_set_alarm(struct device *dev, struct rtc_wkalrm *alrm)
252 252
253 if (tm->tm_sec != -1) 253 if (tm->tm_sec != -1)
254 fm3130->regs[FM3130_ALARM_SECONDS] = 254 fm3130->regs[FM3130_ALARM_SECONDS] =
255 BIN2BCD(tm->tm_sec) | 0x80; 255 bin2bcd(tm->tm_sec) | 0x80;
256 256
257 if (tm->tm_min != -1) 257 if (tm->tm_min != -1)
258 fm3130->regs[FM3130_ALARM_MINUTES] = 258 fm3130->regs[FM3130_ALARM_MINUTES] =
259 BIN2BCD(tm->tm_min) | 0x80; 259 bin2bcd(tm->tm_min) | 0x80;
260 260
261 if (tm->tm_hour != -1) 261 if (tm->tm_hour != -1)
262 fm3130->regs[FM3130_ALARM_HOURS] = 262 fm3130->regs[FM3130_ALARM_HOURS] =
263 BIN2BCD(tm->tm_hour) | 0x80; 263 bin2bcd(tm->tm_hour) | 0x80;
264 264
265 if (tm->tm_mday != -1) 265 if (tm->tm_mday != -1)
266 fm3130->regs[FM3130_ALARM_DATE] = 266 fm3130->regs[FM3130_ALARM_DATE] =
267 BIN2BCD(tm->tm_mday) | 0x80; 267 bin2bcd(tm->tm_mday) | 0x80;
268 268
269 if (tm->tm_mon != -1) 269 if (tm->tm_mon != -1)
270 fm3130->regs[FM3130_ALARM_MONTHS] = 270 fm3130->regs[FM3130_ALARM_MONTHS] =
271 BIN2BCD(tm->tm_mon + 1) | 0x80; 271 bin2bcd(tm->tm_mon + 1) | 0x80;
272 272
273 dev_dbg(dev, "alarm write %02x %02x %02x %02x %02x\n", 273 dev_dbg(dev, "alarm write %02x %02x %02x %02x %02x\n",
274 fm3130->regs[FM3130_ALARM_SECONDS], 274 fm3130->regs[FM3130_ALARM_SECONDS],
@@ -414,18 +414,18 @@ static int __devinit fm3130_probe(struct i2c_client *client,
414 /* TODO */ 414 /* TODO */
415 /* TODO need to sanity check alarm */ 415 /* TODO need to sanity check alarm */
416 tmp = fm3130->regs[FM3130_RTC_SECONDS]; 416 tmp = fm3130->regs[FM3130_RTC_SECONDS];
417 tmp = BCD2BIN(tmp & 0x7f); 417 tmp = bcd2bin(tmp & 0x7f);
418 if (tmp > 60) 418 if (tmp > 60)
419 goto exit_bad; 419 goto exit_bad;
420 tmp = BCD2BIN(fm3130->regs[FM3130_RTC_MINUTES] & 0x7f); 420 tmp = bcd2bin(fm3130->regs[FM3130_RTC_MINUTES] & 0x7f);
421 if (tmp > 60) 421 if (tmp > 60)
422 goto exit_bad; 422 goto exit_bad;
423 423
424 tmp = BCD2BIN(fm3130->regs[FM3130_RTC_DATE] & 0x3f); 424 tmp = bcd2bin(fm3130->regs[FM3130_RTC_DATE] & 0x3f);
425 if (tmp == 0 || tmp > 31) 425 if (tmp == 0 || tmp > 31)
426 goto exit_bad; 426 goto exit_bad;
427 427
428 tmp = BCD2BIN(fm3130->regs[FM3130_RTC_MONTHS] & 0x1f); 428 tmp = bcd2bin(fm3130->regs[FM3130_RTC_MONTHS] & 0x1f);
429 if (tmp == 0 || tmp > 12) 429 if (tmp == 0 || tmp > 12)
430 goto exit_bad; 430 goto exit_bad;
431 431
diff --git a/drivers/rtc/rtc-isl1208.c b/drivers/rtc/rtc-isl1208.c
index a81adab6e515..2cd77ab8fc66 100644
--- a/drivers/rtc/rtc-isl1208.c
+++ b/drivers/rtc/rtc-isl1208.c
@@ -259,26 +259,26 @@ isl1208_i2c_read_time(struct i2c_client *client, struct rtc_time *tm)
259 return sr; 259 return sr;
260 } 260 }
261 261
262 tm->tm_sec = BCD2BIN(regs[ISL1208_REG_SC]); 262 tm->tm_sec = bcd2bin(regs[ISL1208_REG_SC]);
263 tm->tm_min = BCD2BIN(regs[ISL1208_REG_MN]); 263 tm->tm_min = bcd2bin(regs[ISL1208_REG_MN]);
264 264
265 /* HR field has a more complex interpretation */ 265 /* HR field has a more complex interpretation */
266 { 266 {
267 const u8 _hr = regs[ISL1208_REG_HR]; 267 const u8 _hr = regs[ISL1208_REG_HR];
268 if (_hr & ISL1208_REG_HR_MIL) /* 24h format */ 268 if (_hr & ISL1208_REG_HR_MIL) /* 24h format */
269 tm->tm_hour = BCD2BIN(_hr & 0x3f); 269 tm->tm_hour = bcd2bin(_hr & 0x3f);
270 else { 270 else {
271 /* 12h format */ 271 /* 12h format */
272 tm->tm_hour = BCD2BIN(_hr & 0x1f); 272 tm->tm_hour = bcd2bin(_hr & 0x1f);
273 if (_hr & ISL1208_REG_HR_PM) /* PM flag set */ 273 if (_hr & ISL1208_REG_HR_PM) /* PM flag set */
274 tm->tm_hour += 12; 274 tm->tm_hour += 12;
275 } 275 }
276 } 276 }
277 277
278 tm->tm_mday = BCD2BIN(regs[ISL1208_REG_DT]); 278 tm->tm_mday = bcd2bin(regs[ISL1208_REG_DT]);
279 tm->tm_mon = BCD2BIN(regs[ISL1208_REG_MO]) - 1; /* rtc starts at 1 */ 279 tm->tm_mon = bcd2bin(regs[ISL1208_REG_MO]) - 1; /* rtc starts at 1 */
280 tm->tm_year = BCD2BIN(regs[ISL1208_REG_YR]) + 100; 280 tm->tm_year = bcd2bin(regs[ISL1208_REG_YR]) + 100;
281 tm->tm_wday = BCD2BIN(regs[ISL1208_REG_DW]); 281 tm->tm_wday = bcd2bin(regs[ISL1208_REG_DW]);
282 282
283 return 0; 283 return 0;
284} 284}
@@ -305,13 +305,13 @@ isl1208_i2c_read_alarm(struct i2c_client *client, struct rtc_wkalrm *alarm)
305 } 305 }
306 306
307 /* MSB of each alarm register is an enable bit */ 307 /* MSB of each alarm register is an enable bit */
308 tm->tm_sec = BCD2BIN(regs[ISL1208_REG_SCA - ISL1208_REG_SCA] & 0x7f); 308 tm->tm_sec = bcd2bin(regs[ISL1208_REG_SCA - ISL1208_REG_SCA] & 0x7f);
309 tm->tm_min = BCD2BIN(regs[ISL1208_REG_MNA - ISL1208_REG_SCA] & 0x7f); 309 tm->tm_min = bcd2bin(regs[ISL1208_REG_MNA - ISL1208_REG_SCA] & 0x7f);
310 tm->tm_hour = BCD2BIN(regs[ISL1208_REG_HRA - ISL1208_REG_SCA] & 0x3f); 310 tm->tm_hour = bcd2bin(regs[ISL1208_REG_HRA - ISL1208_REG_SCA] & 0x3f);
311 tm->tm_mday = BCD2BIN(regs[ISL1208_REG_DTA - ISL1208_REG_SCA] & 0x3f); 311 tm->tm_mday = bcd2bin(regs[ISL1208_REG_DTA - ISL1208_REG_SCA] & 0x3f);
312 tm->tm_mon = 312 tm->tm_mon =
313 BCD2BIN(regs[ISL1208_REG_MOA - ISL1208_REG_SCA] & 0x1f) - 1; 313 bcd2bin(regs[ISL1208_REG_MOA - ISL1208_REG_SCA] & 0x1f) - 1;
314 tm->tm_wday = BCD2BIN(regs[ISL1208_REG_DWA - ISL1208_REG_SCA] & 0x03); 314 tm->tm_wday = bcd2bin(regs[ISL1208_REG_DWA - ISL1208_REG_SCA] & 0x03);
315 315
316 return 0; 316 return 0;
317} 317}
@@ -328,15 +328,15 @@ isl1208_i2c_set_time(struct i2c_client *client, struct rtc_time const *tm)
328 int sr; 328 int sr;
329 u8 regs[ISL1208_RTC_SECTION_LEN] = { 0, }; 329 u8 regs[ISL1208_RTC_SECTION_LEN] = { 0, };
330 330
331 regs[ISL1208_REG_SC] = BIN2BCD(tm->tm_sec); 331 regs[ISL1208_REG_SC] = bin2bcd(tm->tm_sec);
332 regs[ISL1208_REG_MN] = BIN2BCD(tm->tm_min); 332 regs[ISL1208_REG_MN] = bin2bcd(tm->tm_min);
333 regs[ISL1208_REG_HR] = BIN2BCD(tm->tm_hour) | ISL1208_REG_HR_MIL; 333 regs[ISL1208_REG_HR] = bin2bcd(tm->tm_hour) | ISL1208_REG_HR_MIL;
334 334
335 regs[ISL1208_REG_DT] = BIN2BCD(tm->tm_mday); 335 regs[ISL1208_REG_DT] = bin2bcd(tm->tm_mday);
336 regs[ISL1208_REG_MO] = BIN2BCD(tm->tm_mon + 1); 336 regs[ISL1208_REG_MO] = bin2bcd(tm->tm_mon + 1);
337 regs[ISL1208_REG_YR] = BIN2BCD(tm->tm_year - 100); 337 regs[ISL1208_REG_YR] = bin2bcd(tm->tm_year - 100);
338 338
339 regs[ISL1208_REG_DW] = BIN2BCD(tm->tm_wday & 7); 339 regs[ISL1208_REG_DW] = bin2bcd(tm->tm_wday & 7);
340 340
341 sr = isl1208_i2c_get_sr(client); 341 sr = isl1208_i2c_get_sr(client);
342 if (sr < 0) { 342 if (sr < 0) {
diff --git a/drivers/rtc/rtc-m41t80.c b/drivers/rtc/rtc-m41t80.c
index 470fb2d29545..893f7dece239 100644
--- a/drivers/rtc/rtc-m41t80.c
+++ b/drivers/rtc/rtc-m41t80.c
@@ -110,15 +110,15 @@ static int m41t80_get_datetime(struct i2c_client *client,
110 return -EIO; 110 return -EIO;
111 } 111 }
112 112
113 tm->tm_sec = BCD2BIN(buf[M41T80_REG_SEC] & 0x7f); 113 tm->tm_sec = bcd2bin(buf[M41T80_REG_SEC] & 0x7f);
114 tm->tm_min = BCD2BIN(buf[M41T80_REG_MIN] & 0x7f); 114 tm->tm_min = bcd2bin(buf[M41T80_REG_MIN] & 0x7f);
115 tm->tm_hour = BCD2BIN(buf[M41T80_REG_HOUR] & 0x3f); 115 tm->tm_hour = bcd2bin(buf[M41T80_REG_HOUR] & 0x3f);
116 tm->tm_mday = BCD2BIN(buf[M41T80_REG_DAY] & 0x3f); 116 tm->tm_mday = bcd2bin(buf[M41T80_REG_DAY] & 0x3f);
117 tm->tm_wday = buf[M41T80_REG_WDAY] & 0x07; 117 tm->tm_wday = buf[M41T80_REG_WDAY] & 0x07;
118 tm->tm_mon = BCD2BIN(buf[M41T80_REG_MON] & 0x1f) - 1; 118 tm->tm_mon = bcd2bin(buf[M41T80_REG_MON] & 0x1f) - 1;
119 119
120 /* assume 20YY not 19YY, and ignore the Century Bit */ 120 /* assume 20YY not 19YY, and ignore the Century Bit */
121 tm->tm_year = BCD2BIN(buf[M41T80_REG_YEAR]) + 100; 121 tm->tm_year = bcd2bin(buf[M41T80_REG_YEAR]) + 100;
122 return 0; 122 return 0;
123} 123}
124 124
@@ -161,19 +161,19 @@ static int m41t80_set_datetime(struct i2c_client *client, struct rtc_time *tm)
161 /* Merge time-data and register flags into buf[0..7] */ 161 /* Merge time-data and register flags into buf[0..7] */
162 buf[M41T80_REG_SSEC] = 0; 162 buf[M41T80_REG_SSEC] = 0;
163 buf[M41T80_REG_SEC] = 163 buf[M41T80_REG_SEC] =
164 BIN2BCD(tm->tm_sec) | (buf[M41T80_REG_SEC] & ~0x7f); 164 bin2bcd(tm->tm_sec) | (buf[M41T80_REG_SEC] & ~0x7f);
165 buf[M41T80_REG_MIN] = 165 buf[M41T80_REG_MIN] =
166 BIN2BCD(tm->tm_min) | (buf[M41T80_REG_MIN] & ~0x7f); 166 bin2bcd(tm->tm_min) | (buf[M41T80_REG_MIN] & ~0x7f);
167 buf[M41T80_REG_HOUR] = 167 buf[M41T80_REG_HOUR] =
168 BIN2BCD(tm->tm_hour) | (buf[M41T80_REG_HOUR] & ~0x3f) ; 168 bin2bcd(tm->tm_hour) | (buf[M41T80_REG_HOUR] & ~0x3f) ;
169 buf[M41T80_REG_WDAY] = 169 buf[M41T80_REG_WDAY] =
170 (tm->tm_wday & 0x07) | (buf[M41T80_REG_WDAY] & ~0x07); 170 (tm->tm_wday & 0x07) | (buf[M41T80_REG_WDAY] & ~0x07);
171 buf[M41T80_REG_DAY] = 171 buf[M41T80_REG_DAY] =
172 BIN2BCD(tm->tm_mday) | (buf[M41T80_REG_DAY] & ~0x3f); 172 bin2bcd(tm->tm_mday) | (buf[M41T80_REG_DAY] & ~0x3f);
173 buf[M41T80_REG_MON] = 173 buf[M41T80_REG_MON] =
174 BIN2BCD(tm->tm_mon + 1) | (buf[M41T80_REG_MON] & ~0x1f); 174 bin2bcd(tm->tm_mon + 1) | (buf[M41T80_REG_MON] & ~0x1f);
175 /* assume 20YY not 19YY */ 175 /* assume 20YY not 19YY */
176 buf[M41T80_REG_YEAR] = BIN2BCD(tm->tm_year % 100); 176 buf[M41T80_REG_YEAR] = bin2bcd(tm->tm_year % 100);
177 177
178 if (i2c_transfer(client->adapter, msgs, 1) != 1) { 178 if (i2c_transfer(client->adapter, msgs, 1) != 1) {
179 dev_err(&client->dev, "write error\n"); 179 dev_err(&client->dev, "write error\n");
@@ -288,15 +288,15 @@ static int m41t80_rtc_set_alarm(struct device *dev, struct rtc_wkalrm *t)
288 288
289 wbuf[0] = M41T80_REG_ALARM_MON; /* offset into rtc's regs */ 289 wbuf[0] = M41T80_REG_ALARM_MON; /* offset into rtc's regs */
290 reg[M41T80_REG_ALARM_SEC] |= t->time.tm_sec >= 0 ? 290 reg[M41T80_REG_ALARM_SEC] |= t->time.tm_sec >= 0 ?
291 BIN2BCD(t->time.tm_sec) : 0x80; 291 bin2bcd(t->time.tm_sec) : 0x80;
292 reg[M41T80_REG_ALARM_MIN] |= t->time.tm_min >= 0 ? 292 reg[M41T80_REG_ALARM_MIN] |= t->time.tm_min >= 0 ?
293 BIN2BCD(t->time.tm_min) : 0x80; 293 bin2bcd(t->time.tm_min) : 0x80;
294 reg[M41T80_REG_ALARM_HOUR] |= t->time.tm_hour >= 0 ? 294 reg[M41T80_REG_ALARM_HOUR] |= t->time.tm_hour >= 0 ?
295 BIN2BCD(t->time.tm_hour) : 0x80; 295 bin2bcd(t->time.tm_hour) : 0x80;
296 reg[M41T80_REG_ALARM_DAY] |= t->time.tm_mday >= 0 ? 296 reg[M41T80_REG_ALARM_DAY] |= t->time.tm_mday >= 0 ?
297 BIN2BCD(t->time.tm_mday) : 0x80; 297 bin2bcd(t->time.tm_mday) : 0x80;
298 if (t->time.tm_mon >= 0) 298 if (t->time.tm_mon >= 0)
299 reg[M41T80_REG_ALARM_MON] |= BIN2BCD(t->time.tm_mon + 1); 299 reg[M41T80_REG_ALARM_MON] |= bin2bcd(t->time.tm_mon + 1);
300 else 300 else
301 reg[M41T80_REG_ALARM_DAY] |= 0x40; 301 reg[M41T80_REG_ALARM_DAY] |= 0x40;
302 302
@@ -347,15 +347,15 @@ static int m41t80_rtc_read_alarm(struct device *dev, struct rtc_wkalrm *t)
347 t->time.tm_mday = -1; 347 t->time.tm_mday = -1;
348 t->time.tm_mon = -1; 348 t->time.tm_mon = -1;
349 if (!(reg[M41T80_REG_ALARM_SEC] & 0x80)) 349 if (!(reg[M41T80_REG_ALARM_SEC] & 0x80))
350 t->time.tm_sec = BCD2BIN(reg[M41T80_REG_ALARM_SEC] & 0x7f); 350 t->time.tm_sec = bcd2bin(reg[M41T80_REG_ALARM_SEC] & 0x7f);
351 if (!(reg[M41T80_REG_ALARM_MIN] & 0x80)) 351 if (!(reg[M41T80_REG_ALARM_MIN] & 0x80))
352 t->time.tm_min = BCD2BIN(reg[M41T80_REG_ALARM_MIN] & 0x7f); 352 t->time.tm_min = bcd2bin(reg[M41T80_REG_ALARM_MIN] & 0x7f);
353 if (!(reg[M41T80_REG_ALARM_HOUR] & 0x80)) 353 if (!(reg[M41T80_REG_ALARM_HOUR] & 0x80))
354 t->time.tm_hour = BCD2BIN(reg[M41T80_REG_ALARM_HOUR] & 0x3f); 354 t->time.tm_hour = bcd2bin(reg[M41T80_REG_ALARM_HOUR] & 0x3f);
355 if (!(reg[M41T80_REG_ALARM_DAY] & 0x80)) 355 if (!(reg[M41T80_REG_ALARM_DAY] & 0x80))
356 t->time.tm_mday = BCD2BIN(reg[M41T80_REG_ALARM_DAY] & 0x3f); 356 t->time.tm_mday = bcd2bin(reg[M41T80_REG_ALARM_DAY] & 0x3f);
357 if (!(reg[M41T80_REG_ALARM_DAY] & 0x40)) 357 if (!(reg[M41T80_REG_ALARM_DAY] & 0x40))
358 t->time.tm_mon = BCD2BIN(reg[M41T80_REG_ALARM_MON] & 0x1f) - 1; 358 t->time.tm_mon = bcd2bin(reg[M41T80_REG_ALARM_MON] & 0x1f) - 1;
359 t->time.tm_year = -1; 359 t->time.tm_year = -1;
360 t->time.tm_wday = -1; 360 t->time.tm_wday = -1;
361 t->time.tm_yday = -1; 361 t->time.tm_yday = -1;
diff --git a/drivers/rtc/rtc-m41t94.c b/drivers/rtc/rtc-m41t94.c
index 9b19499c829e..c3a18c58daf6 100644
--- a/drivers/rtc/rtc-m41t94.c
+++ b/drivers/rtc/rtc-m41t94.c
@@ -41,17 +41,17 @@ static int m41t94_set_time(struct device *dev, struct rtc_time *tm)
41 tm->tm_mon, tm->tm_year, tm->tm_wday); 41 tm->tm_mon, tm->tm_year, tm->tm_wday);
42 42
43 buf[0] = 0x80 | M41T94_REG_SECONDS; /* write time + date */ 43 buf[0] = 0x80 | M41T94_REG_SECONDS; /* write time + date */
44 buf[M41T94_REG_SECONDS] = BIN2BCD(tm->tm_sec); 44 buf[M41T94_REG_SECONDS] = bin2bcd(tm->tm_sec);
45 buf[M41T94_REG_MINUTES] = BIN2BCD(tm->tm_min); 45 buf[M41T94_REG_MINUTES] = bin2bcd(tm->tm_min);
46 buf[M41T94_REG_HOURS] = BIN2BCD(tm->tm_hour); 46 buf[M41T94_REG_HOURS] = bin2bcd(tm->tm_hour);
47 buf[M41T94_REG_WDAY] = BIN2BCD(tm->tm_wday + 1); 47 buf[M41T94_REG_WDAY] = bin2bcd(tm->tm_wday + 1);
48 buf[M41T94_REG_DAY] = BIN2BCD(tm->tm_mday); 48 buf[M41T94_REG_DAY] = bin2bcd(tm->tm_mday);
49 buf[M41T94_REG_MONTH] = BIN2BCD(tm->tm_mon + 1); 49 buf[M41T94_REG_MONTH] = bin2bcd(tm->tm_mon + 1);
50 50
51 buf[M41T94_REG_HOURS] |= M41T94_BIT_CEB; 51 buf[M41T94_REG_HOURS] |= M41T94_BIT_CEB;
52 if (tm->tm_year >= 100) 52 if (tm->tm_year >= 100)
53 buf[M41T94_REG_HOURS] |= M41T94_BIT_CB; 53 buf[M41T94_REG_HOURS] |= M41T94_BIT_CB;
54 buf[M41T94_REG_YEAR] = BIN2BCD(tm->tm_year % 100); 54 buf[M41T94_REG_YEAR] = bin2bcd(tm->tm_year % 100);
55 55
56 return spi_write(spi, buf, 8); 56 return spi_write(spi, buf, 8);
57} 57}
@@ -82,14 +82,14 @@ static int m41t94_read_time(struct device *dev, struct rtc_time *tm)
82 spi_write(spi, buf, 2); 82 spi_write(spi, buf, 2);
83 } 83 }
84 84
85 tm->tm_sec = BCD2BIN(spi_w8r8(spi, M41T94_REG_SECONDS)); 85 tm->tm_sec = bcd2bin(spi_w8r8(spi, M41T94_REG_SECONDS));
86 tm->tm_min = BCD2BIN(spi_w8r8(spi, M41T94_REG_MINUTES)); 86 tm->tm_min = bcd2bin(spi_w8r8(spi, M41T94_REG_MINUTES));
87 hour = spi_w8r8(spi, M41T94_REG_HOURS); 87 hour = spi_w8r8(spi, M41T94_REG_HOURS);
88 tm->tm_hour = BCD2BIN(hour & 0x3f); 88 tm->tm_hour = bcd2bin(hour & 0x3f);
89 tm->tm_wday = BCD2BIN(spi_w8r8(spi, M41T94_REG_WDAY)) - 1; 89 tm->tm_wday = bcd2bin(spi_w8r8(spi, M41T94_REG_WDAY)) - 1;
90 tm->tm_mday = BCD2BIN(spi_w8r8(spi, M41T94_REG_DAY)); 90 tm->tm_mday = bcd2bin(spi_w8r8(spi, M41T94_REG_DAY));
91 tm->tm_mon = BCD2BIN(spi_w8r8(spi, M41T94_REG_MONTH)) - 1; 91 tm->tm_mon = bcd2bin(spi_w8r8(spi, M41T94_REG_MONTH)) - 1;
92 tm->tm_year = BCD2BIN(spi_w8r8(spi, M41T94_REG_YEAR)); 92 tm->tm_year = bcd2bin(spi_w8r8(spi, M41T94_REG_YEAR));
93 if ((hour & M41T94_BIT_CB) || !(hour & M41T94_BIT_CEB)) 93 if ((hour & M41T94_BIT_CB) || !(hour & M41T94_BIT_CEB))
94 tm->tm_year += 100; 94 tm->tm_year += 100;
95 95
diff --git a/drivers/rtc/rtc-m48t59.c b/drivers/rtc/rtc-m48t59.c
index ce4eff6a8d51..04b63dab6932 100644
--- a/drivers/rtc/rtc-m48t59.c
+++ b/drivers/rtc/rtc-m48t59.c
@@ -76,10 +76,10 @@ static int m48t59_rtc_read_time(struct device *dev, struct rtc_time *tm)
76 /* Issue the READ command */ 76 /* Issue the READ command */
77 M48T59_SET_BITS(M48T59_CNTL_READ, M48T59_CNTL); 77 M48T59_SET_BITS(M48T59_CNTL_READ, M48T59_CNTL);
78 78
79 tm->tm_year = BCD2BIN(M48T59_READ(M48T59_YEAR)); 79 tm->tm_year = bcd2bin(M48T59_READ(M48T59_YEAR));
80 /* tm_mon is 0-11 */ 80 /* tm_mon is 0-11 */
81 tm->tm_mon = BCD2BIN(M48T59_READ(M48T59_MONTH)) - 1; 81 tm->tm_mon = bcd2bin(M48T59_READ(M48T59_MONTH)) - 1;
82 tm->tm_mday = BCD2BIN(M48T59_READ(M48T59_MDAY)); 82 tm->tm_mday = bcd2bin(M48T59_READ(M48T59_MDAY));
83 83
84 val = M48T59_READ(M48T59_WDAY); 84 val = M48T59_READ(M48T59_WDAY);
85 if ((pdata->type == M48T59RTC_TYPE_M48T59) && 85 if ((pdata->type == M48T59RTC_TYPE_M48T59) &&
@@ -88,10 +88,10 @@ static int m48t59_rtc_read_time(struct device *dev, struct rtc_time *tm)
88 tm->tm_year += 100; /* one century */ 88 tm->tm_year += 100; /* one century */
89 } 89 }
90 90
91 tm->tm_wday = BCD2BIN(val & 0x07); 91 tm->tm_wday = bcd2bin(val & 0x07);
92 tm->tm_hour = BCD2BIN(M48T59_READ(M48T59_HOUR) & 0x3F); 92 tm->tm_hour = bcd2bin(M48T59_READ(M48T59_HOUR) & 0x3F);
93 tm->tm_min = BCD2BIN(M48T59_READ(M48T59_MIN) & 0x7F); 93 tm->tm_min = bcd2bin(M48T59_READ(M48T59_MIN) & 0x7F);
94 tm->tm_sec = BCD2BIN(M48T59_READ(M48T59_SEC) & 0x7F); 94 tm->tm_sec = bcd2bin(M48T59_READ(M48T59_SEC) & 0x7F);
95 95
96 /* Clear the READ bit */ 96 /* Clear the READ bit */
97 M48T59_CLEAR_BITS(M48T59_CNTL_READ, M48T59_CNTL); 97 M48T59_CLEAR_BITS(M48T59_CNTL_READ, M48T59_CNTL);
@@ -119,17 +119,17 @@ static int m48t59_rtc_set_time(struct device *dev, struct rtc_time *tm)
119 /* Issue the WRITE command */ 119 /* Issue the WRITE command */
120 M48T59_SET_BITS(M48T59_CNTL_WRITE, M48T59_CNTL); 120 M48T59_SET_BITS(M48T59_CNTL_WRITE, M48T59_CNTL);
121 121
122 M48T59_WRITE((BIN2BCD(tm->tm_sec) & 0x7F), M48T59_SEC); 122 M48T59_WRITE((bin2bcd(tm->tm_sec) & 0x7F), M48T59_SEC);
123 M48T59_WRITE((BIN2BCD(tm->tm_min) & 0x7F), M48T59_MIN); 123 M48T59_WRITE((bin2bcd(tm->tm_min) & 0x7F), M48T59_MIN);
124 M48T59_WRITE((BIN2BCD(tm->tm_hour) & 0x3F), M48T59_HOUR); 124 M48T59_WRITE((bin2bcd(tm->tm_hour) & 0x3F), M48T59_HOUR);
125 M48T59_WRITE((BIN2BCD(tm->tm_mday) & 0x3F), M48T59_MDAY); 125 M48T59_WRITE((bin2bcd(tm->tm_mday) & 0x3F), M48T59_MDAY);
126 /* tm_mon is 0-11 */ 126 /* tm_mon is 0-11 */
127 M48T59_WRITE((BIN2BCD(tm->tm_mon + 1) & 0x1F), M48T59_MONTH); 127 M48T59_WRITE((bin2bcd(tm->tm_mon + 1) & 0x1F), M48T59_MONTH);
128 M48T59_WRITE(BIN2BCD(tm->tm_year % 100), M48T59_YEAR); 128 M48T59_WRITE(bin2bcd(tm->tm_year % 100), M48T59_YEAR);
129 129
130 if (pdata->type == M48T59RTC_TYPE_M48T59 && (tm->tm_year / 100)) 130 if (pdata->type == M48T59RTC_TYPE_M48T59 && (tm->tm_year / 100))
131 val = (M48T59_WDAY_CEB | M48T59_WDAY_CB); 131 val = (M48T59_WDAY_CEB | M48T59_WDAY_CB);
132 val |= (BIN2BCD(tm->tm_wday) & 0x07); 132 val |= (bin2bcd(tm->tm_wday) & 0x07);
133 M48T59_WRITE(val, M48T59_WDAY); 133 M48T59_WRITE(val, M48T59_WDAY);
134 134
135 /* Clear the WRITE bit */ 135 /* Clear the WRITE bit */
@@ -158,18 +158,18 @@ static int m48t59_rtc_readalarm(struct device *dev, struct rtc_wkalrm *alrm)
158 /* Issue the READ command */ 158 /* Issue the READ command */
159 M48T59_SET_BITS(M48T59_CNTL_READ, M48T59_CNTL); 159 M48T59_SET_BITS(M48T59_CNTL_READ, M48T59_CNTL);
160 160
161 tm->tm_year = BCD2BIN(M48T59_READ(M48T59_YEAR)); 161 tm->tm_year = bcd2bin(M48T59_READ(M48T59_YEAR));
162 /* tm_mon is 0-11 */ 162 /* tm_mon is 0-11 */
163 tm->tm_mon = BCD2BIN(M48T59_READ(M48T59_MONTH)) - 1; 163 tm->tm_mon = bcd2bin(M48T59_READ(M48T59_MONTH)) - 1;
164 164
165 val = M48T59_READ(M48T59_WDAY); 165 val = M48T59_READ(M48T59_WDAY);
166 if ((val & M48T59_WDAY_CEB) && (val & M48T59_WDAY_CB)) 166 if ((val & M48T59_WDAY_CEB) && (val & M48T59_WDAY_CB))
167 tm->tm_year += 100; /* one century */ 167 tm->tm_year += 100; /* one century */
168 168
169 tm->tm_mday = BCD2BIN(M48T59_READ(M48T59_ALARM_DATE)); 169 tm->tm_mday = bcd2bin(M48T59_READ(M48T59_ALARM_DATE));
170 tm->tm_hour = BCD2BIN(M48T59_READ(M48T59_ALARM_HOUR)); 170 tm->tm_hour = bcd2bin(M48T59_READ(M48T59_ALARM_HOUR));
171 tm->tm_min = BCD2BIN(M48T59_READ(M48T59_ALARM_MIN)); 171 tm->tm_min = bcd2bin(M48T59_READ(M48T59_ALARM_MIN));
172 tm->tm_sec = BCD2BIN(M48T59_READ(M48T59_ALARM_SEC)); 172 tm->tm_sec = bcd2bin(M48T59_READ(M48T59_ALARM_SEC));
173 173
174 /* Clear the READ bit */ 174 /* Clear the READ bit */
175 M48T59_CLEAR_BITS(M48T59_CNTL_READ, M48T59_CNTL); 175 M48T59_CLEAR_BITS(M48T59_CNTL_READ, M48T59_CNTL);
@@ -201,18 +201,18 @@ static int m48t59_rtc_setalarm(struct device *dev, struct rtc_wkalrm *alrm)
201 * 0xff means "always match" 201 * 0xff means "always match"
202 */ 202 */
203 mday = tm->tm_mday; 203 mday = tm->tm_mday;
204 mday = (mday >= 1 && mday <= 31) ? BIN2BCD(mday) : 0xff; 204 mday = (mday >= 1 && mday <= 31) ? bin2bcd(mday) : 0xff;
205 if (mday == 0xff) 205 if (mday == 0xff)
206 mday = M48T59_READ(M48T59_MDAY); 206 mday = M48T59_READ(M48T59_MDAY);
207 207
208 hour = tm->tm_hour; 208 hour = tm->tm_hour;
209 hour = (hour < 24) ? BIN2BCD(hour) : 0x00; 209 hour = (hour < 24) ? bin2bcd(hour) : 0x00;
210 210
211 min = tm->tm_min; 211 min = tm->tm_min;
212 min = (min < 60) ? BIN2BCD(min) : 0x00; 212 min = (min < 60) ? bin2bcd(min) : 0x00;
213 213
214 sec = tm->tm_sec; 214 sec = tm->tm_sec;
215 sec = (sec < 60) ? BIN2BCD(sec) : 0x00; 215 sec = (sec < 60) ? bin2bcd(sec) : 0x00;
216 216
217 spin_lock_irqsave(&m48t59->lock, flags); 217 spin_lock_irqsave(&m48t59->lock, flags);
218 /* Issue the WRITE command */ 218 /* Issue the WRITE command */
@@ -360,7 +360,6 @@ static struct bin_attribute m48t59_nvram_attr = {
360 .attr = { 360 .attr = {
361 .name = "nvram", 361 .name = "nvram",
362 .mode = S_IRUGO | S_IWUSR, 362 .mode = S_IRUGO | S_IWUSR,
363 .owner = THIS_MODULE,
364 }, 363 },
365 .read = m48t59_nvram_read, 364 .read = m48t59_nvram_read,
366 .write = m48t59_nvram_write, 365 .write = m48t59_nvram_write,
diff --git a/drivers/rtc/rtc-m48t86.c b/drivers/rtc/rtc-m48t86.c
index 3f7f99a5d96a..7c045cffa9ff 100644
--- a/drivers/rtc/rtc-m48t86.c
+++ b/drivers/rtc/rtc-m48t86.c
@@ -62,14 +62,14 @@ static int m48t86_rtc_read_time(struct device *dev, struct rtc_time *tm)
62 tm->tm_wday = ops->readbyte(M48T86_REG_DOW); 62 tm->tm_wday = ops->readbyte(M48T86_REG_DOW);
63 } else { 63 } else {
64 /* bcd mode */ 64 /* bcd mode */
65 tm->tm_sec = BCD2BIN(ops->readbyte(M48T86_REG_SEC)); 65 tm->tm_sec = bcd2bin(ops->readbyte(M48T86_REG_SEC));
66 tm->tm_min = BCD2BIN(ops->readbyte(M48T86_REG_MIN)); 66 tm->tm_min = bcd2bin(ops->readbyte(M48T86_REG_MIN));
67 tm->tm_hour = BCD2BIN(ops->readbyte(M48T86_REG_HOUR) & 0x3F); 67 tm->tm_hour = bcd2bin(ops->readbyte(M48T86_REG_HOUR) & 0x3F);
68 tm->tm_mday = BCD2BIN(ops->readbyte(M48T86_REG_DOM)); 68 tm->tm_mday = bcd2bin(ops->readbyte(M48T86_REG_DOM));
69 /* tm_mon is 0-11 */ 69 /* tm_mon is 0-11 */
70 tm->tm_mon = BCD2BIN(ops->readbyte(M48T86_REG_MONTH)) - 1; 70 tm->tm_mon = bcd2bin(ops->readbyte(M48T86_REG_MONTH)) - 1;
71 tm->tm_year = BCD2BIN(ops->readbyte(M48T86_REG_YEAR)) + 100; 71 tm->tm_year = bcd2bin(ops->readbyte(M48T86_REG_YEAR)) + 100;
72 tm->tm_wday = BCD2BIN(ops->readbyte(M48T86_REG_DOW)); 72 tm->tm_wday = bcd2bin(ops->readbyte(M48T86_REG_DOW));
73 } 73 }
74 74
75 /* correct the hour if the clock is in 12h mode */ 75 /* correct the hour if the clock is in 12h mode */
@@ -103,13 +103,13 @@ static int m48t86_rtc_set_time(struct device *dev, struct rtc_time *tm)
103 ops->writebyte(tm->tm_wday, M48T86_REG_DOW); 103 ops->writebyte(tm->tm_wday, M48T86_REG_DOW);
104 } else { 104 } else {
105 /* bcd mode */ 105 /* bcd mode */
106 ops->writebyte(BIN2BCD(tm->tm_sec), M48T86_REG_SEC); 106 ops->writebyte(bin2bcd(tm->tm_sec), M48T86_REG_SEC);
107 ops->writebyte(BIN2BCD(tm->tm_min), M48T86_REG_MIN); 107 ops->writebyte(bin2bcd(tm->tm_min), M48T86_REG_MIN);
108 ops->writebyte(BIN2BCD(tm->tm_hour), M48T86_REG_HOUR); 108 ops->writebyte(bin2bcd(tm->tm_hour), M48T86_REG_HOUR);
109 ops->writebyte(BIN2BCD(tm->tm_mday), M48T86_REG_DOM); 109 ops->writebyte(bin2bcd(tm->tm_mday), M48T86_REG_DOM);
110 ops->writebyte(BIN2BCD(tm->tm_mon + 1), M48T86_REG_MONTH); 110 ops->writebyte(bin2bcd(tm->tm_mon + 1), M48T86_REG_MONTH);
111 ops->writebyte(BIN2BCD(tm->tm_year % 100), M48T86_REG_YEAR); 111 ops->writebyte(bin2bcd(tm->tm_year % 100), M48T86_REG_YEAR);
112 ops->writebyte(BIN2BCD(tm->tm_wday), M48T86_REG_DOW); 112 ops->writebyte(bin2bcd(tm->tm_wday), M48T86_REG_DOW);
113 } 113 }
114 114
115 /* update ended */ 115 /* update ended */
diff --git a/drivers/rtc/rtc-max6900.c b/drivers/rtc/rtc-max6900.c
index 12c9cd25cad8..80782798763f 100644
--- a/drivers/rtc/rtc-max6900.c
+++ b/drivers/rtc/rtc-max6900.c
@@ -150,14 +150,14 @@ static int max6900_i2c_read_time(struct i2c_client *client, struct rtc_time *tm)
150 if (rc < 0) 150 if (rc < 0)
151 return rc; 151 return rc;
152 152
153 tm->tm_sec = BCD2BIN(regs[MAX6900_REG_SC]); 153 tm->tm_sec = bcd2bin(regs[MAX6900_REG_SC]);
154 tm->tm_min = BCD2BIN(regs[MAX6900_REG_MN]); 154 tm->tm_min = bcd2bin(regs[MAX6900_REG_MN]);
155 tm->tm_hour = BCD2BIN(regs[MAX6900_REG_HR] & 0x3f); 155 tm->tm_hour = bcd2bin(regs[MAX6900_REG_HR] & 0x3f);
156 tm->tm_mday = BCD2BIN(regs[MAX6900_REG_DT]); 156 tm->tm_mday = bcd2bin(regs[MAX6900_REG_DT]);
157 tm->tm_mon = BCD2BIN(regs[MAX6900_REG_MO]) - 1; 157 tm->tm_mon = bcd2bin(regs[MAX6900_REG_MO]) - 1;
158 tm->tm_year = BCD2BIN(regs[MAX6900_REG_YR]) + 158 tm->tm_year = bcd2bin(regs[MAX6900_REG_YR]) +
159 BCD2BIN(regs[MAX6900_REG_CENTURY]) * 100 - 1900; 159 bcd2bin(regs[MAX6900_REG_CENTURY]) * 100 - 1900;
160 tm->tm_wday = BCD2BIN(regs[MAX6900_REG_DW]); 160 tm->tm_wday = bcd2bin(regs[MAX6900_REG_DW]);
161 161
162 return 0; 162 return 0;
163} 163}
@@ -184,14 +184,14 @@ max6900_i2c_set_time(struct i2c_client *client, struct rtc_time const *tm)
184 if (rc < 0) 184 if (rc < 0)
185 return rc; 185 return rc;
186 186
187 regs[MAX6900_REG_SC] = BIN2BCD(tm->tm_sec); 187 regs[MAX6900_REG_SC] = bin2bcd(tm->tm_sec);
188 regs[MAX6900_REG_MN] = BIN2BCD(tm->tm_min); 188 regs[MAX6900_REG_MN] = bin2bcd(tm->tm_min);
189 regs[MAX6900_REG_HR] = BIN2BCD(tm->tm_hour); 189 regs[MAX6900_REG_HR] = bin2bcd(tm->tm_hour);
190 regs[MAX6900_REG_DT] = BIN2BCD(tm->tm_mday); 190 regs[MAX6900_REG_DT] = bin2bcd(tm->tm_mday);
191 regs[MAX6900_REG_MO] = BIN2BCD(tm->tm_mon + 1); 191 regs[MAX6900_REG_MO] = bin2bcd(tm->tm_mon + 1);
192 regs[MAX6900_REG_DW] = BIN2BCD(tm->tm_wday); 192 regs[MAX6900_REG_DW] = bin2bcd(tm->tm_wday);
193 regs[MAX6900_REG_YR] = BIN2BCD(tm->tm_year % 100); 193 regs[MAX6900_REG_YR] = bin2bcd(tm->tm_year % 100);
194 regs[MAX6900_REG_CENTURY] = BIN2BCD((tm->tm_year + 1900) / 100); 194 regs[MAX6900_REG_CENTURY] = bin2bcd((tm->tm_year + 1900) / 100);
195 /* set write protect */ 195 /* set write protect */
196 regs[MAX6900_REG_CT] = MAX6900_REG_CT_WP; 196 regs[MAX6900_REG_CT] = MAX6900_REG_CT_WP;
197 197
diff --git a/drivers/rtc/rtc-max6902.c b/drivers/rtc/rtc-max6902.c
index 78b2551fb19d..2f6507df7b49 100644
--- a/drivers/rtc/rtc-max6902.c
+++ b/drivers/rtc/rtc-max6902.c
@@ -124,15 +124,15 @@ static int max6902_get_datetime(struct device *dev, struct rtc_time *dt)
124 124
125 /* The chip sends data in this order: 125 /* The chip sends data in this order:
126 * Seconds, Minutes, Hours, Date, Month, Day, Year */ 126 * Seconds, Minutes, Hours, Date, Month, Day, Year */
127 dt->tm_sec = BCD2BIN(chip->buf[1]); 127 dt->tm_sec = bcd2bin(chip->buf[1]);
128 dt->tm_min = BCD2BIN(chip->buf[2]); 128 dt->tm_min = bcd2bin(chip->buf[2]);
129 dt->tm_hour = BCD2BIN(chip->buf[3]); 129 dt->tm_hour = bcd2bin(chip->buf[3]);
130 dt->tm_mday = BCD2BIN(chip->buf[4]); 130 dt->tm_mday = bcd2bin(chip->buf[4]);
131 dt->tm_mon = BCD2BIN(chip->buf[5]) - 1; 131 dt->tm_mon = bcd2bin(chip->buf[5]) - 1;
132 dt->tm_wday = BCD2BIN(chip->buf[6]); 132 dt->tm_wday = bcd2bin(chip->buf[6]);
133 dt->tm_year = BCD2BIN(chip->buf[7]); 133 dt->tm_year = bcd2bin(chip->buf[7]);
134 134
135 century = BCD2BIN(tmp) * 100; 135 century = bcd2bin(tmp) * 100;
136 136
137 dt->tm_year += century; 137 dt->tm_year += century;
138 dt->tm_year -= 1900; 138 dt->tm_year -= 1900;
@@ -168,15 +168,15 @@ static int max6902_set_datetime(struct device *dev, struct rtc_time *dt)
168 /* Remove write protection */ 168 /* Remove write protection */
169 max6902_set_reg(dev, 0xF, 0); 169 max6902_set_reg(dev, 0xF, 0);
170 170
171 max6902_set_reg(dev, 0x01, BIN2BCD(dt->tm_sec)); 171 max6902_set_reg(dev, 0x01, bin2bcd(dt->tm_sec));
172 max6902_set_reg(dev, 0x03, BIN2BCD(dt->tm_min)); 172 max6902_set_reg(dev, 0x03, bin2bcd(dt->tm_min));
173 max6902_set_reg(dev, 0x05, BIN2BCD(dt->tm_hour)); 173 max6902_set_reg(dev, 0x05, bin2bcd(dt->tm_hour));
174 174
175 max6902_set_reg(dev, 0x07, BIN2BCD(dt->tm_mday)); 175 max6902_set_reg(dev, 0x07, bin2bcd(dt->tm_mday));
176 max6902_set_reg(dev, 0x09, BIN2BCD(dt->tm_mon+1)); 176 max6902_set_reg(dev, 0x09, bin2bcd(dt->tm_mon+1));
177 max6902_set_reg(dev, 0x0B, BIN2BCD(dt->tm_wday)); 177 max6902_set_reg(dev, 0x0B, bin2bcd(dt->tm_wday));
178 max6902_set_reg(dev, 0x0D, BIN2BCD(dt->tm_year%100)); 178 max6902_set_reg(dev, 0x0D, bin2bcd(dt->tm_year%100));
179 max6902_set_reg(dev, 0x13, BIN2BCD(dt->tm_year/100)); 179 max6902_set_reg(dev, 0x13, bin2bcd(dt->tm_year/100));
180 180
181 /* Compulab used a delay here. However, the datasheet 181 /* Compulab used a delay here. However, the datasheet
182 * does not mention a delay being required anywhere... */ 182 * does not mention a delay being required anywhere... */
diff --git a/drivers/rtc/rtc-omap.c b/drivers/rtc/rtc-omap.c
index 8876605d4d4b..2cbeb0794f14 100644
--- a/drivers/rtc/rtc-omap.c
+++ b/drivers/rtc/rtc-omap.c
@@ -186,30 +186,30 @@ static int tm2bcd(struct rtc_time *tm)
186 if (rtc_valid_tm(tm) != 0) 186 if (rtc_valid_tm(tm) != 0)
187 return -EINVAL; 187 return -EINVAL;
188 188
189 tm->tm_sec = BIN2BCD(tm->tm_sec); 189 tm->tm_sec = bin2bcd(tm->tm_sec);
190 tm->tm_min = BIN2BCD(tm->tm_min); 190 tm->tm_min = bin2bcd(tm->tm_min);
191 tm->tm_hour = BIN2BCD(tm->tm_hour); 191 tm->tm_hour = bin2bcd(tm->tm_hour);
192 tm->tm_mday = BIN2BCD(tm->tm_mday); 192 tm->tm_mday = bin2bcd(tm->tm_mday);
193 193
194 tm->tm_mon = BIN2BCD(tm->tm_mon + 1); 194 tm->tm_mon = bin2bcd(tm->tm_mon + 1);
195 195
196 /* epoch == 1900 */ 196 /* epoch == 1900 */
197 if (tm->tm_year < 100 || tm->tm_year > 199) 197 if (tm->tm_year < 100 || tm->tm_year > 199)
198 return -EINVAL; 198 return -EINVAL;
199 tm->tm_year = BIN2BCD(tm->tm_year - 100); 199 tm->tm_year = bin2bcd(tm->tm_year - 100);
200 200
201 return 0; 201 return 0;
202} 202}
203 203
204static void bcd2tm(struct rtc_time *tm) 204static void bcd2tm(struct rtc_time *tm)
205{ 205{
206 tm->tm_sec = BCD2BIN(tm->tm_sec); 206 tm->tm_sec = bcd2bin(tm->tm_sec);
207 tm->tm_min = BCD2BIN(tm->tm_min); 207 tm->tm_min = bcd2bin(tm->tm_min);
208 tm->tm_hour = BCD2BIN(tm->tm_hour); 208 tm->tm_hour = bcd2bin(tm->tm_hour);
209 tm->tm_mday = BCD2BIN(tm->tm_mday); 209 tm->tm_mday = bcd2bin(tm->tm_mday);
210 tm->tm_mon = BCD2BIN(tm->tm_mon) - 1; 210 tm->tm_mon = bcd2bin(tm->tm_mon) - 1;
211 /* epoch == 1900 */ 211 /* epoch == 1900 */
212 tm->tm_year = BCD2BIN(tm->tm_year) + 100; 212 tm->tm_year = bcd2bin(tm->tm_year) + 100;
213} 213}
214 214
215 215
diff --git a/drivers/rtc/rtc-pcf8563.c b/drivers/rtc/rtc-pcf8563.c
index a829f20ad6d6..b725913ccbe8 100644
--- a/drivers/rtc/rtc-pcf8563.c
+++ b/drivers/rtc/rtc-pcf8563.c
@@ -97,13 +97,13 @@ static int pcf8563_get_datetime(struct i2c_client *client, struct rtc_time *tm)
97 buf[8]); 97 buf[8]);
98 98
99 99
100 tm->tm_sec = BCD2BIN(buf[PCF8563_REG_SC] & 0x7F); 100 tm->tm_sec = bcd2bin(buf[PCF8563_REG_SC] & 0x7F);
101 tm->tm_min = BCD2BIN(buf[PCF8563_REG_MN] & 0x7F); 101 tm->tm_min = bcd2bin(buf[PCF8563_REG_MN] & 0x7F);
102 tm->tm_hour = BCD2BIN(buf[PCF8563_REG_HR] & 0x3F); /* rtc hr 0-23 */ 102 tm->tm_hour = bcd2bin(buf[PCF8563_REG_HR] & 0x3F); /* rtc hr 0-23 */
103 tm->tm_mday = BCD2BIN(buf[PCF8563_REG_DM] & 0x3F); 103 tm->tm_mday = bcd2bin(buf[PCF8563_REG_DM] & 0x3F);
104 tm->tm_wday = buf[PCF8563_REG_DW] & 0x07; 104 tm->tm_wday = buf[PCF8563_REG_DW] & 0x07;
105 tm->tm_mon = BCD2BIN(buf[PCF8563_REG_MO] & 0x1F) - 1; /* rtc mn 1-12 */ 105 tm->tm_mon = bcd2bin(buf[PCF8563_REG_MO] & 0x1F) - 1; /* rtc mn 1-12 */
106 tm->tm_year = BCD2BIN(buf[PCF8563_REG_YR]); 106 tm->tm_year = bcd2bin(buf[PCF8563_REG_YR]);
107 if (tm->tm_year < 70) 107 if (tm->tm_year < 70)
108 tm->tm_year += 100; /* assume we are in 1970...2069 */ 108 tm->tm_year += 100; /* assume we are in 1970...2069 */
109 /* detect the polarity heuristically. see note above. */ 109 /* detect the polarity heuristically. see note above. */
@@ -138,17 +138,17 @@ static int pcf8563_set_datetime(struct i2c_client *client, struct rtc_time *tm)
138 tm->tm_mday, tm->tm_mon, tm->tm_year, tm->tm_wday); 138 tm->tm_mday, tm->tm_mon, tm->tm_year, tm->tm_wday);
139 139
140 /* hours, minutes and seconds */ 140 /* hours, minutes and seconds */
141 buf[PCF8563_REG_SC] = BIN2BCD(tm->tm_sec); 141 buf[PCF8563_REG_SC] = bin2bcd(tm->tm_sec);
142 buf[PCF8563_REG_MN] = BIN2BCD(tm->tm_min); 142 buf[PCF8563_REG_MN] = bin2bcd(tm->tm_min);
143 buf[PCF8563_REG_HR] = BIN2BCD(tm->tm_hour); 143 buf[PCF8563_REG_HR] = bin2bcd(tm->tm_hour);
144 144
145 buf[PCF8563_REG_DM] = BIN2BCD(tm->tm_mday); 145 buf[PCF8563_REG_DM] = bin2bcd(tm->tm_mday);
146 146
147 /* month, 1 - 12 */ 147 /* month, 1 - 12 */
148 buf[PCF8563_REG_MO] = BIN2BCD(tm->tm_mon + 1); 148 buf[PCF8563_REG_MO] = bin2bcd(tm->tm_mon + 1);
149 149
150 /* year and century */ 150 /* year and century */
151 buf[PCF8563_REG_YR] = BIN2BCD(tm->tm_year % 100); 151 buf[PCF8563_REG_YR] = bin2bcd(tm->tm_year % 100);
152 if (pcf8563->c_polarity ? (tm->tm_year >= 100) : (tm->tm_year < 100)) 152 if (pcf8563->c_polarity ? (tm->tm_year >= 100) : (tm->tm_year < 100))
153 buf[PCF8563_REG_MO] |= PCF8563_MO_C; 153 buf[PCF8563_REG_MO] |= PCF8563_MO_C;
154 154
diff --git a/drivers/rtc/rtc-pcf8583.c b/drivers/rtc/rtc-pcf8583.c
index d388c662bf4b..7d33cda3f8f6 100644
--- a/drivers/rtc/rtc-pcf8583.c
+++ b/drivers/rtc/rtc-pcf8583.c
@@ -76,11 +76,11 @@ static int pcf8583_get_datetime(struct i2c_client *client, struct rtc_time *dt)
76 buf[4] &= 0x3f; 76 buf[4] &= 0x3f;
77 buf[5] &= 0x1f; 77 buf[5] &= 0x1f;
78 78
79 dt->tm_sec = BCD2BIN(buf[1]); 79 dt->tm_sec = bcd2bin(buf[1]);
80 dt->tm_min = BCD2BIN(buf[2]); 80 dt->tm_min = bcd2bin(buf[2]);
81 dt->tm_hour = BCD2BIN(buf[3]); 81 dt->tm_hour = bcd2bin(buf[3]);
82 dt->tm_mday = BCD2BIN(buf[4]); 82 dt->tm_mday = bcd2bin(buf[4]);
83 dt->tm_mon = BCD2BIN(buf[5]) - 1; 83 dt->tm_mon = bcd2bin(buf[5]) - 1;
84 } 84 }
85 85
86 return ret == 2 ? 0 : -EIO; 86 return ret == 2 ? 0 : -EIO;
@@ -94,14 +94,14 @@ static int pcf8583_set_datetime(struct i2c_client *client, struct rtc_time *dt,
94 buf[0] = 0; 94 buf[0] = 0;
95 buf[1] = get_ctrl(client) | 0x80; 95 buf[1] = get_ctrl(client) | 0x80;
96 buf[2] = 0; 96 buf[2] = 0;
97 buf[3] = BIN2BCD(dt->tm_sec); 97 buf[3] = bin2bcd(dt->tm_sec);
98 buf[4] = BIN2BCD(dt->tm_min); 98 buf[4] = bin2bcd(dt->tm_min);
99 buf[5] = BIN2BCD(dt->tm_hour); 99 buf[5] = bin2bcd(dt->tm_hour);
100 100
101 if (datetoo) { 101 if (datetoo) {
102 len = 8; 102 len = 8;
103 buf[6] = BIN2BCD(dt->tm_mday) | (dt->tm_year << 6); 103 buf[6] = bin2bcd(dt->tm_mday) | (dt->tm_year << 6);
104 buf[7] = BIN2BCD(dt->tm_mon + 1) | (dt->tm_wday << 5); 104 buf[7] = bin2bcd(dt->tm_mon + 1) | (dt->tm_wday << 5);
105 } 105 }
106 106
107 ret = i2c_master_send(client, (char *)buf, len); 107 ret = i2c_master_send(client, (char *)buf, len);
diff --git a/drivers/rtc/rtc-r9701.c b/drivers/rtc/rtc-r9701.c
index 395985b339c9..42028f233bef 100644
--- a/drivers/rtc/rtc-r9701.c
+++ b/drivers/rtc/rtc-r9701.c
@@ -80,13 +80,13 @@ static int r9701_get_datetime(struct device *dev, struct rtc_time *dt)
80 80
81 memset(dt, 0, sizeof(*dt)); 81 memset(dt, 0, sizeof(*dt));
82 82
83 dt->tm_sec = BCD2BIN(buf[0]); /* RSECCNT */ 83 dt->tm_sec = bcd2bin(buf[0]); /* RSECCNT */
84 dt->tm_min = BCD2BIN(buf[1]); /* RMINCNT */ 84 dt->tm_min = bcd2bin(buf[1]); /* RMINCNT */
85 dt->tm_hour = BCD2BIN(buf[2]); /* RHRCNT */ 85 dt->tm_hour = bcd2bin(buf[2]); /* RHRCNT */
86 86
87 dt->tm_mday = BCD2BIN(buf[3]); /* RDAYCNT */ 87 dt->tm_mday = bcd2bin(buf[3]); /* RDAYCNT */
88 dt->tm_mon = BCD2BIN(buf[4]) - 1; /* RMONCNT */ 88 dt->tm_mon = bcd2bin(buf[4]) - 1; /* RMONCNT */
89 dt->tm_year = BCD2BIN(buf[5]) + 100; /* RYRCNT */ 89 dt->tm_year = bcd2bin(buf[5]) + 100; /* RYRCNT */
90 90
91 /* the rtc device may contain illegal values on power up 91 /* the rtc device may contain illegal values on power up
92 * according to the data sheet. make sure they are valid. 92 * according to the data sheet. make sure they are valid.
@@ -103,12 +103,12 @@ static int r9701_set_datetime(struct device *dev, struct rtc_time *dt)
103 if (year >= 2100 || year < 2000) 103 if (year >= 2100 || year < 2000)
104 return -EINVAL; 104 return -EINVAL;
105 105
106 ret = write_reg(dev, RHRCNT, BIN2BCD(dt->tm_hour)); 106 ret = write_reg(dev, RHRCNT, bin2bcd(dt->tm_hour));
107 ret = ret ? ret : write_reg(dev, RMINCNT, BIN2BCD(dt->tm_min)); 107 ret = ret ? ret : write_reg(dev, RMINCNT, bin2bcd(dt->tm_min));
108 ret = ret ? ret : write_reg(dev, RSECCNT, BIN2BCD(dt->tm_sec)); 108 ret = ret ? ret : write_reg(dev, RSECCNT, bin2bcd(dt->tm_sec));
109 ret = ret ? ret : write_reg(dev, RDAYCNT, BIN2BCD(dt->tm_mday)); 109 ret = ret ? ret : write_reg(dev, RDAYCNT, bin2bcd(dt->tm_mday));
110 ret = ret ? ret : write_reg(dev, RMONCNT, BIN2BCD(dt->tm_mon + 1)); 110 ret = ret ? ret : write_reg(dev, RMONCNT, bin2bcd(dt->tm_mon + 1));
111 ret = ret ? ret : write_reg(dev, RYRCNT, BIN2BCD(dt->tm_year - 100)); 111 ret = ret ? ret : write_reg(dev, RYRCNT, bin2bcd(dt->tm_year - 100));
112 ret = ret ? ret : write_reg(dev, RWKCNT, 1 << dt->tm_wday); 112 ret = ret ? ret : write_reg(dev, RWKCNT, 1 << dt->tm_wday);
113 113
114 return ret; 114 return ret;
diff --git a/drivers/rtc/rtc-rs5c313.c b/drivers/rtc/rtc-rs5c313.c
index 1c14d4497c4d..e6ea3f5ee1eb 100644
--- a/drivers/rtc/rtc-rs5c313.c
+++ b/drivers/rtc/rtc-rs5c313.c
@@ -235,33 +235,33 @@ static int rs5c313_rtc_read_time(struct device *dev, struct rtc_time *tm)
235 235
236 data = rs5c313_read_reg(RS5C313_ADDR_SEC); 236 data = rs5c313_read_reg(RS5C313_ADDR_SEC);
237 data |= (rs5c313_read_reg(RS5C313_ADDR_SEC10) << 4); 237 data |= (rs5c313_read_reg(RS5C313_ADDR_SEC10) << 4);
238 tm->tm_sec = BCD2BIN(data); 238 tm->tm_sec = bcd2bin(data);
239 239
240 data = rs5c313_read_reg(RS5C313_ADDR_MIN); 240 data = rs5c313_read_reg(RS5C313_ADDR_MIN);
241 data |= (rs5c313_read_reg(RS5C313_ADDR_MIN10) << 4); 241 data |= (rs5c313_read_reg(RS5C313_ADDR_MIN10) << 4);
242 tm->tm_min = BCD2BIN(data); 242 tm->tm_min = bcd2bin(data);
243 243
244 data = rs5c313_read_reg(RS5C313_ADDR_HOUR); 244 data = rs5c313_read_reg(RS5C313_ADDR_HOUR);
245 data |= (rs5c313_read_reg(RS5C313_ADDR_HOUR10) << 4); 245 data |= (rs5c313_read_reg(RS5C313_ADDR_HOUR10) << 4);
246 tm->tm_hour = BCD2BIN(data); 246 tm->tm_hour = bcd2bin(data);
247 247
248 data = rs5c313_read_reg(RS5C313_ADDR_DAY); 248 data = rs5c313_read_reg(RS5C313_ADDR_DAY);
249 data |= (rs5c313_read_reg(RS5C313_ADDR_DAY10) << 4); 249 data |= (rs5c313_read_reg(RS5C313_ADDR_DAY10) << 4);
250 tm->tm_mday = BCD2BIN(data); 250 tm->tm_mday = bcd2bin(data);
251 251
252 data = rs5c313_read_reg(RS5C313_ADDR_MON); 252 data = rs5c313_read_reg(RS5C313_ADDR_MON);
253 data |= (rs5c313_read_reg(RS5C313_ADDR_MON10) << 4); 253 data |= (rs5c313_read_reg(RS5C313_ADDR_MON10) << 4);
254 tm->tm_mon = BCD2BIN(data) - 1; 254 tm->tm_mon = bcd2bin(data) - 1;
255 255
256 data = rs5c313_read_reg(RS5C313_ADDR_YEAR); 256 data = rs5c313_read_reg(RS5C313_ADDR_YEAR);
257 data |= (rs5c313_read_reg(RS5C313_ADDR_YEAR10) << 4); 257 data |= (rs5c313_read_reg(RS5C313_ADDR_YEAR10) << 4);
258 tm->tm_year = BCD2BIN(data); 258 tm->tm_year = bcd2bin(data);
259 259
260 if (tm->tm_year < 70) 260 if (tm->tm_year < 70)
261 tm->tm_year += 100; 261 tm->tm_year += 100;
262 262
263 data = rs5c313_read_reg(RS5C313_ADDR_WEEK); 263 data = rs5c313_read_reg(RS5C313_ADDR_WEEK);
264 tm->tm_wday = BCD2BIN(data); 264 tm->tm_wday = bcd2bin(data);
265 265
266 RS5C313_CEDISABLE; 266 RS5C313_CEDISABLE;
267 ndelay(700); /* CE:L */ 267 ndelay(700); /* CE:L */
@@ -294,31 +294,31 @@ static int rs5c313_rtc_set_time(struct device *dev, struct rtc_time *tm)
294 } 294 }
295 } 295 }
296 296
297 data = BIN2BCD(tm->tm_sec); 297 data = bin2bcd(tm->tm_sec);
298 rs5c313_write_reg(RS5C313_ADDR_SEC, data); 298 rs5c313_write_reg(RS5C313_ADDR_SEC, data);
299 rs5c313_write_reg(RS5C313_ADDR_SEC10, (data >> 4)); 299 rs5c313_write_reg(RS5C313_ADDR_SEC10, (data >> 4));
300 300
301 data = BIN2BCD(tm->tm_min); 301 data = bin2bcd(tm->tm_min);
302 rs5c313_write_reg(RS5C313_ADDR_MIN, data ); 302 rs5c313_write_reg(RS5C313_ADDR_MIN, data );
303 rs5c313_write_reg(RS5C313_ADDR_MIN10, (data >> 4)); 303 rs5c313_write_reg(RS5C313_ADDR_MIN10, (data >> 4));
304 304
305 data = BIN2BCD(tm->tm_hour); 305 data = bin2bcd(tm->tm_hour);
306 rs5c313_write_reg(RS5C313_ADDR_HOUR, data); 306 rs5c313_write_reg(RS5C313_ADDR_HOUR, data);
307 rs5c313_write_reg(RS5C313_ADDR_HOUR10, (data >> 4)); 307 rs5c313_write_reg(RS5C313_ADDR_HOUR10, (data >> 4));
308 308
309 data = BIN2BCD(tm->tm_mday); 309 data = bin2bcd(tm->tm_mday);
310 rs5c313_write_reg(RS5C313_ADDR_DAY, data); 310 rs5c313_write_reg(RS5C313_ADDR_DAY, data);
311 rs5c313_write_reg(RS5C313_ADDR_DAY10, (data>> 4)); 311 rs5c313_write_reg(RS5C313_ADDR_DAY10, (data>> 4));
312 312
313 data = BIN2BCD(tm->tm_mon + 1); 313 data = bin2bcd(tm->tm_mon + 1);
314 rs5c313_write_reg(RS5C313_ADDR_MON, data); 314 rs5c313_write_reg(RS5C313_ADDR_MON, data);
315 rs5c313_write_reg(RS5C313_ADDR_MON10, (data >> 4)); 315 rs5c313_write_reg(RS5C313_ADDR_MON10, (data >> 4));
316 316
317 data = BIN2BCD(tm->tm_year % 100); 317 data = bin2bcd(tm->tm_year % 100);
318 rs5c313_write_reg(RS5C313_ADDR_YEAR, data); 318 rs5c313_write_reg(RS5C313_ADDR_YEAR, data);
319 rs5c313_write_reg(RS5C313_ADDR_YEAR10, (data >> 4)); 319 rs5c313_write_reg(RS5C313_ADDR_YEAR10, (data >> 4));
320 320
321 data = BIN2BCD(tm->tm_wday); 321 data = bin2bcd(tm->tm_wday);
322 rs5c313_write_reg(RS5C313_ADDR_WEEK, data); 322 rs5c313_write_reg(RS5C313_ADDR_WEEK, data);
323 323
324 RS5C313_CEDISABLE; /* CE:H */ 324 RS5C313_CEDISABLE; /* CE:H */
diff --git a/drivers/rtc/rtc-rs5c348.c b/drivers/rtc/rtc-rs5c348.c
index 839462659afa..dd1e2bc7a472 100644
--- a/drivers/rtc/rtc-rs5c348.c
+++ b/drivers/rtc/rtc-rs5c348.c
@@ -74,20 +74,20 @@ rs5c348_rtc_set_time(struct device *dev, struct rtc_time *tm)
74 txbuf[3] = 0; /* dummy */ 74 txbuf[3] = 0; /* dummy */
75 txbuf[4] = RS5C348_CMD_MW(RS5C348_REG_SECS); /* cmd, sec, ... */ 75 txbuf[4] = RS5C348_CMD_MW(RS5C348_REG_SECS); /* cmd, sec, ... */
76 txp = &txbuf[5]; 76 txp = &txbuf[5];
77 txp[RS5C348_REG_SECS] = BIN2BCD(tm->tm_sec); 77 txp[RS5C348_REG_SECS] = bin2bcd(tm->tm_sec);
78 txp[RS5C348_REG_MINS] = BIN2BCD(tm->tm_min); 78 txp[RS5C348_REG_MINS] = bin2bcd(tm->tm_min);
79 if (pdata->rtc_24h) { 79 if (pdata->rtc_24h) {
80 txp[RS5C348_REG_HOURS] = BIN2BCD(tm->tm_hour); 80 txp[RS5C348_REG_HOURS] = bin2bcd(tm->tm_hour);
81 } else { 81 } else {
82 /* hour 0 is AM12, noon is PM12 */ 82 /* hour 0 is AM12, noon is PM12 */
83 txp[RS5C348_REG_HOURS] = BIN2BCD((tm->tm_hour + 11) % 12 + 1) | 83 txp[RS5C348_REG_HOURS] = bin2bcd((tm->tm_hour + 11) % 12 + 1) |
84 (tm->tm_hour >= 12 ? RS5C348_BIT_PM : 0); 84 (tm->tm_hour >= 12 ? RS5C348_BIT_PM : 0);
85 } 85 }
86 txp[RS5C348_REG_WDAY] = BIN2BCD(tm->tm_wday); 86 txp[RS5C348_REG_WDAY] = bin2bcd(tm->tm_wday);
87 txp[RS5C348_REG_DAY] = BIN2BCD(tm->tm_mday); 87 txp[RS5C348_REG_DAY] = bin2bcd(tm->tm_mday);
88 txp[RS5C348_REG_MONTH] = BIN2BCD(tm->tm_mon + 1) | 88 txp[RS5C348_REG_MONTH] = bin2bcd(tm->tm_mon + 1) |
89 (tm->tm_year >= 100 ? RS5C348_BIT_Y2K : 0); 89 (tm->tm_year >= 100 ? RS5C348_BIT_Y2K : 0);
90 txp[RS5C348_REG_YEAR] = BIN2BCD(tm->tm_year % 100); 90 txp[RS5C348_REG_YEAR] = bin2bcd(tm->tm_year % 100);
91 /* write in one transfer to avoid data inconsistency */ 91 /* write in one transfer to avoid data inconsistency */
92 ret = spi_write_then_read(spi, txbuf, sizeof(txbuf), NULL, 0); 92 ret = spi_write_then_read(spi, txbuf, sizeof(txbuf), NULL, 0);
93 udelay(62); /* Tcsr 62us */ 93 udelay(62); /* Tcsr 62us */
@@ -116,20 +116,20 @@ rs5c348_rtc_read_time(struct device *dev, struct rtc_time *tm)
116 if (ret < 0) 116 if (ret < 0)
117 return ret; 117 return ret;
118 118
119 tm->tm_sec = BCD2BIN(rxbuf[RS5C348_REG_SECS] & RS5C348_SECS_MASK); 119 tm->tm_sec = bcd2bin(rxbuf[RS5C348_REG_SECS] & RS5C348_SECS_MASK);
120 tm->tm_min = BCD2BIN(rxbuf[RS5C348_REG_MINS] & RS5C348_MINS_MASK); 120 tm->tm_min = bcd2bin(rxbuf[RS5C348_REG_MINS] & RS5C348_MINS_MASK);
121 tm->tm_hour = BCD2BIN(rxbuf[RS5C348_REG_HOURS] & RS5C348_HOURS_MASK); 121 tm->tm_hour = bcd2bin(rxbuf[RS5C348_REG_HOURS] & RS5C348_HOURS_MASK);
122 if (!pdata->rtc_24h) { 122 if (!pdata->rtc_24h) {
123 tm->tm_hour %= 12; 123 tm->tm_hour %= 12;
124 if (rxbuf[RS5C348_REG_HOURS] & RS5C348_BIT_PM) 124 if (rxbuf[RS5C348_REG_HOURS] & RS5C348_BIT_PM)
125 tm->tm_hour += 12; 125 tm->tm_hour += 12;
126 } 126 }
127 tm->tm_wday = BCD2BIN(rxbuf[RS5C348_REG_WDAY] & RS5C348_WDAY_MASK); 127 tm->tm_wday = bcd2bin(rxbuf[RS5C348_REG_WDAY] & RS5C348_WDAY_MASK);
128 tm->tm_mday = BCD2BIN(rxbuf[RS5C348_REG_DAY] & RS5C348_DAY_MASK); 128 tm->tm_mday = bcd2bin(rxbuf[RS5C348_REG_DAY] & RS5C348_DAY_MASK);
129 tm->tm_mon = 129 tm->tm_mon =
130 BCD2BIN(rxbuf[RS5C348_REG_MONTH] & RS5C348_MONTH_MASK) - 1; 130 bcd2bin(rxbuf[RS5C348_REG_MONTH] & RS5C348_MONTH_MASK) - 1;
131 /* year is 1900 + tm->tm_year */ 131 /* year is 1900 + tm->tm_year */
132 tm->tm_year = BCD2BIN(rxbuf[RS5C348_REG_YEAR]) + 132 tm->tm_year = bcd2bin(rxbuf[RS5C348_REG_YEAR]) +
133 ((rxbuf[RS5C348_REG_MONTH] & RS5C348_BIT_Y2K) ? 100 : 0); 133 ((rxbuf[RS5C348_REG_MONTH] & RS5C348_BIT_Y2K) ? 100 : 0);
134 134
135 if (rtc_valid_tm(tm) < 0) { 135 if (rtc_valid_tm(tm) < 0) {
diff --git a/drivers/rtc/rtc-rs5c372.c b/drivers/rtc/rtc-rs5c372.c
index 8b561958fb1e..2f2c68d476da 100644
--- a/drivers/rtc/rtc-rs5c372.c
+++ b/drivers/rtc/rtc-rs5c372.c
@@ -148,9 +148,9 @@ static unsigned rs5c_reg2hr(struct rs5c372 *rs5c, unsigned reg)
148 unsigned hour; 148 unsigned hour;
149 149
150 if (rs5c->time24) 150 if (rs5c->time24)
151 return BCD2BIN(reg & 0x3f); 151 return bcd2bin(reg & 0x3f);
152 152
153 hour = BCD2BIN(reg & 0x1f); 153 hour = bcd2bin(reg & 0x1f);
154 if (hour == 12) 154 if (hour == 12)
155 hour = 0; 155 hour = 0;
156 if (reg & 0x20) 156 if (reg & 0x20)
@@ -161,15 +161,15 @@ static unsigned rs5c_reg2hr(struct rs5c372 *rs5c, unsigned reg)
161static unsigned rs5c_hr2reg(struct rs5c372 *rs5c, unsigned hour) 161static unsigned rs5c_hr2reg(struct rs5c372 *rs5c, unsigned hour)
162{ 162{
163 if (rs5c->time24) 163 if (rs5c->time24)
164 return BIN2BCD(hour); 164 return bin2bcd(hour);
165 165
166 if (hour > 12) 166 if (hour > 12)
167 return 0x20 | BIN2BCD(hour - 12); 167 return 0x20 | bin2bcd(hour - 12);
168 if (hour == 12) 168 if (hour == 12)
169 return 0x20 | BIN2BCD(12); 169 return 0x20 | bin2bcd(12);
170 if (hour == 0) 170 if (hour == 0)
171 return BIN2BCD(12); 171 return bin2bcd(12);
172 return BIN2BCD(hour); 172 return bin2bcd(hour);
173} 173}
174 174
175static int rs5c372_get_datetime(struct i2c_client *client, struct rtc_time *tm) 175static int rs5c372_get_datetime(struct i2c_client *client, struct rtc_time *tm)
@@ -180,18 +180,18 @@ static int rs5c372_get_datetime(struct i2c_client *client, struct rtc_time *tm)
180 if (status < 0) 180 if (status < 0)
181 return status; 181 return status;
182 182
183 tm->tm_sec = BCD2BIN(rs5c->regs[RS5C372_REG_SECS] & 0x7f); 183 tm->tm_sec = bcd2bin(rs5c->regs[RS5C372_REG_SECS] & 0x7f);
184 tm->tm_min = BCD2BIN(rs5c->regs[RS5C372_REG_MINS] & 0x7f); 184 tm->tm_min = bcd2bin(rs5c->regs[RS5C372_REG_MINS] & 0x7f);
185 tm->tm_hour = rs5c_reg2hr(rs5c, rs5c->regs[RS5C372_REG_HOURS]); 185 tm->tm_hour = rs5c_reg2hr(rs5c, rs5c->regs[RS5C372_REG_HOURS]);
186 186
187 tm->tm_wday = BCD2BIN(rs5c->regs[RS5C372_REG_WDAY] & 0x07); 187 tm->tm_wday = bcd2bin(rs5c->regs[RS5C372_REG_WDAY] & 0x07);
188 tm->tm_mday = BCD2BIN(rs5c->regs[RS5C372_REG_DAY] & 0x3f); 188 tm->tm_mday = bcd2bin(rs5c->regs[RS5C372_REG_DAY] & 0x3f);
189 189
190 /* tm->tm_mon is zero-based */ 190 /* tm->tm_mon is zero-based */
191 tm->tm_mon = BCD2BIN(rs5c->regs[RS5C372_REG_MONTH] & 0x1f) - 1; 191 tm->tm_mon = bcd2bin(rs5c->regs[RS5C372_REG_MONTH] & 0x1f) - 1;
192 192
193 /* year is 1900 + tm->tm_year */ 193 /* year is 1900 + tm->tm_year */
194 tm->tm_year = BCD2BIN(rs5c->regs[RS5C372_REG_YEAR]) + 100; 194 tm->tm_year = bcd2bin(rs5c->regs[RS5C372_REG_YEAR]) + 100;
195 195
196 dev_dbg(&client->dev, "%s: tm is secs=%d, mins=%d, hours=%d, " 196 dev_dbg(&client->dev, "%s: tm is secs=%d, mins=%d, hours=%d, "
197 "mday=%d, mon=%d, year=%d, wday=%d\n", 197 "mday=%d, mon=%d, year=%d, wday=%d\n",
@@ -216,13 +216,13 @@ static int rs5c372_set_datetime(struct i2c_client *client, struct rtc_time *tm)
216 tm->tm_mday, tm->tm_mon, tm->tm_year, tm->tm_wday); 216 tm->tm_mday, tm->tm_mon, tm->tm_year, tm->tm_wday);
217 217
218 addr = RS5C_ADDR(RS5C372_REG_SECS); 218 addr = RS5C_ADDR(RS5C372_REG_SECS);
219 buf[0] = BIN2BCD(tm->tm_sec); 219 buf[0] = bin2bcd(tm->tm_sec);
220 buf[1] = BIN2BCD(tm->tm_min); 220 buf[1] = bin2bcd(tm->tm_min);
221 buf[2] = rs5c_hr2reg(rs5c, tm->tm_hour); 221 buf[2] = rs5c_hr2reg(rs5c, tm->tm_hour);
222 buf[3] = BIN2BCD(tm->tm_wday); 222 buf[3] = bin2bcd(tm->tm_wday);
223 buf[4] = BIN2BCD(tm->tm_mday); 223 buf[4] = bin2bcd(tm->tm_mday);
224 buf[5] = BIN2BCD(tm->tm_mon + 1); 224 buf[5] = bin2bcd(tm->tm_mon + 1);
225 buf[6] = BIN2BCD(tm->tm_year - 100); 225 buf[6] = bin2bcd(tm->tm_year - 100);
226 226
227 if (i2c_smbus_write_i2c_block_data(client, addr, sizeof(buf), buf) < 0) { 227 if (i2c_smbus_write_i2c_block_data(client, addr, sizeof(buf), buf) < 0) {
228 dev_err(&client->dev, "%s: write error\n", __func__); 228 dev_err(&client->dev, "%s: write error\n", __func__);
@@ -367,7 +367,7 @@ static int rs5c_read_alarm(struct device *dev, struct rtc_wkalrm *t)
367 367
368 /* report alarm time */ 368 /* report alarm time */
369 t->time.tm_sec = 0; 369 t->time.tm_sec = 0;
370 t->time.tm_min = BCD2BIN(rs5c->regs[RS5C_REG_ALARM_A_MIN] & 0x7f); 370 t->time.tm_min = bcd2bin(rs5c->regs[RS5C_REG_ALARM_A_MIN] & 0x7f);
371 t->time.tm_hour = rs5c_reg2hr(rs5c, rs5c->regs[RS5C_REG_ALARM_A_HOURS]); 371 t->time.tm_hour = rs5c_reg2hr(rs5c, rs5c->regs[RS5C_REG_ALARM_A_HOURS]);
372 t->time.tm_mday = -1; 372 t->time.tm_mday = -1;
373 t->time.tm_mon = -1; 373 t->time.tm_mon = -1;
@@ -413,7 +413,7 @@ static int rs5c_set_alarm(struct device *dev, struct rtc_wkalrm *t)
413 } 413 }
414 414
415 /* set alarm */ 415 /* set alarm */
416 buf[0] = BIN2BCD(t->time.tm_min); 416 buf[0] = bin2bcd(t->time.tm_min);
417 buf[1] = rs5c_hr2reg(rs5c, t->time.tm_hour); 417 buf[1] = rs5c_hr2reg(rs5c, t->time.tm_hour);
418 buf[2] = 0x7f; /* any/all days */ 418 buf[2] = 0x7f; /* any/all days */
419 419
diff --git a/drivers/rtc/rtc-s35390a.c b/drivers/rtc/rtc-s35390a.c
index a6fa1f2f2ca6..def4d396d0b0 100644
--- a/drivers/rtc/rtc-s35390a.c
+++ b/drivers/rtc/rtc-s35390a.c
@@ -104,12 +104,12 @@ static int s35390a_disable_test_mode(struct s35390a *s35390a)
104static char s35390a_hr2reg(struct s35390a *s35390a, int hour) 104static char s35390a_hr2reg(struct s35390a *s35390a, int hour)
105{ 105{
106 if (s35390a->twentyfourhour) 106 if (s35390a->twentyfourhour)
107 return BIN2BCD(hour); 107 return bin2bcd(hour);
108 108
109 if (hour < 12) 109 if (hour < 12)
110 return BIN2BCD(hour); 110 return bin2bcd(hour);
111 111
112 return 0x40 | BIN2BCD(hour - 12); 112 return 0x40 | bin2bcd(hour - 12);
113} 113}
114 114
115static int s35390a_reg2hr(struct s35390a *s35390a, char reg) 115static int s35390a_reg2hr(struct s35390a *s35390a, char reg)
@@ -117,9 +117,9 @@ static int s35390a_reg2hr(struct s35390a *s35390a, char reg)
117 unsigned hour; 117 unsigned hour;
118 118
119 if (s35390a->twentyfourhour) 119 if (s35390a->twentyfourhour)
120 return BCD2BIN(reg & 0x3f); 120 return bcd2bin(reg & 0x3f);
121 121
122 hour = BCD2BIN(reg & 0x3f); 122 hour = bcd2bin(reg & 0x3f);
123 if (reg & 0x40) 123 if (reg & 0x40)
124 hour += 12; 124 hour += 12;
125 125
@@ -137,13 +137,13 @@ static int s35390a_set_datetime(struct i2c_client *client, struct rtc_time *tm)
137 tm->tm_min, tm->tm_hour, tm->tm_mday, tm->tm_mon, tm->tm_year, 137 tm->tm_min, tm->tm_hour, tm->tm_mday, tm->tm_mon, tm->tm_year,
138 tm->tm_wday); 138 tm->tm_wday);
139 139
140 buf[S35390A_BYTE_YEAR] = BIN2BCD(tm->tm_year - 100); 140 buf[S35390A_BYTE_YEAR] = bin2bcd(tm->tm_year - 100);
141 buf[S35390A_BYTE_MONTH] = BIN2BCD(tm->tm_mon + 1); 141 buf[S35390A_BYTE_MONTH] = bin2bcd(tm->tm_mon + 1);
142 buf[S35390A_BYTE_DAY] = BIN2BCD(tm->tm_mday); 142 buf[S35390A_BYTE_DAY] = bin2bcd(tm->tm_mday);
143 buf[S35390A_BYTE_WDAY] = BIN2BCD(tm->tm_wday); 143 buf[S35390A_BYTE_WDAY] = bin2bcd(tm->tm_wday);
144 buf[S35390A_BYTE_HOURS] = s35390a_hr2reg(s35390a, tm->tm_hour); 144 buf[S35390A_BYTE_HOURS] = s35390a_hr2reg(s35390a, tm->tm_hour);
145 buf[S35390A_BYTE_MINS] = BIN2BCD(tm->tm_min); 145 buf[S35390A_BYTE_MINS] = bin2bcd(tm->tm_min);
146 buf[S35390A_BYTE_SECS] = BIN2BCD(tm->tm_sec); 146 buf[S35390A_BYTE_SECS] = bin2bcd(tm->tm_sec);
147 147
148 /* This chip expects the bits of each byte to be in reverse order */ 148 /* This chip expects the bits of each byte to be in reverse order */
149 for (i = 0; i < 7; ++i) 149 for (i = 0; i < 7; ++i)
@@ -168,13 +168,13 @@ static int s35390a_get_datetime(struct i2c_client *client, struct rtc_time *tm)
168 for (i = 0; i < 7; ++i) 168 for (i = 0; i < 7; ++i)
169 buf[i] = bitrev8(buf[i]); 169 buf[i] = bitrev8(buf[i]);
170 170
171 tm->tm_sec = BCD2BIN(buf[S35390A_BYTE_SECS]); 171 tm->tm_sec = bcd2bin(buf[S35390A_BYTE_SECS]);
172 tm->tm_min = BCD2BIN(buf[S35390A_BYTE_MINS]); 172 tm->tm_min = bcd2bin(buf[S35390A_BYTE_MINS]);
173 tm->tm_hour = s35390a_reg2hr(s35390a, buf[S35390A_BYTE_HOURS]); 173 tm->tm_hour = s35390a_reg2hr(s35390a, buf[S35390A_BYTE_HOURS]);
174 tm->tm_wday = BCD2BIN(buf[S35390A_BYTE_WDAY]); 174 tm->tm_wday = bcd2bin(buf[S35390A_BYTE_WDAY]);
175 tm->tm_mday = BCD2BIN(buf[S35390A_BYTE_DAY]); 175 tm->tm_mday = bcd2bin(buf[S35390A_BYTE_DAY]);
176 tm->tm_mon = BCD2BIN(buf[S35390A_BYTE_MONTH]) - 1; 176 tm->tm_mon = bcd2bin(buf[S35390A_BYTE_MONTH]) - 1;
177 tm->tm_year = BCD2BIN(buf[S35390A_BYTE_YEAR]) + 100; 177 tm->tm_year = bcd2bin(buf[S35390A_BYTE_YEAR]) + 100;
178 178
179 dev_dbg(&client->dev, "%s: tm is secs=%d, mins=%d, hours=%d, mday=%d, " 179 dev_dbg(&client->dev, "%s: tm is secs=%d, mins=%d, hours=%d, mday=%d, "
180 "mon=%d, year=%d, wday=%d\n", __func__, tm->tm_sec, 180 "mon=%d, year=%d, wday=%d\n", __func__, tm->tm_sec,
diff --git a/drivers/rtc/rtc-s3c.c b/drivers/rtc/rtc-s3c.c
index e7d19b6c265a..910bc704939c 100644
--- a/drivers/rtc/rtc-s3c.c
+++ b/drivers/rtc/rtc-s3c.c
@@ -134,12 +134,12 @@ static int s3c_rtc_gettime(struct device *dev, struct rtc_time *rtc_tm)
134 rtc_tm->tm_year, rtc_tm->tm_mon, rtc_tm->tm_mday, 134 rtc_tm->tm_year, rtc_tm->tm_mon, rtc_tm->tm_mday,
135 rtc_tm->tm_hour, rtc_tm->tm_min, rtc_tm->tm_sec); 135 rtc_tm->tm_hour, rtc_tm->tm_min, rtc_tm->tm_sec);
136 136
137 BCD_TO_BIN(rtc_tm->tm_sec); 137 rtc_tm->tm_sec = bcd2bin(rtc_tm->tm_sec);
138 BCD_TO_BIN(rtc_tm->tm_min); 138 rtc_tm->tm_min = bcd2bin(rtc_tm->tm_min);
139 BCD_TO_BIN(rtc_tm->tm_hour); 139 rtc_tm->tm_hour = bcd2bin(rtc_tm->tm_hour);
140 BCD_TO_BIN(rtc_tm->tm_mday); 140 rtc_tm->tm_mday = bcd2bin(rtc_tm->tm_mday);
141 BCD_TO_BIN(rtc_tm->tm_mon); 141 rtc_tm->tm_mon = bcd2bin(rtc_tm->tm_mon);
142 BCD_TO_BIN(rtc_tm->tm_year); 142 rtc_tm->tm_year = bcd2bin(rtc_tm->tm_year);
143 143
144 rtc_tm->tm_year += 100; 144 rtc_tm->tm_year += 100;
145 rtc_tm->tm_mon -= 1; 145 rtc_tm->tm_mon -= 1;
@@ -163,12 +163,12 @@ static int s3c_rtc_settime(struct device *dev, struct rtc_time *tm)
163 return -EINVAL; 163 return -EINVAL;
164 } 164 }
165 165
166 writeb(BIN2BCD(tm->tm_sec), base + S3C2410_RTCSEC); 166 writeb(bin2bcd(tm->tm_sec), base + S3C2410_RTCSEC);
167 writeb(BIN2BCD(tm->tm_min), base + S3C2410_RTCMIN); 167 writeb(bin2bcd(tm->tm_min), base + S3C2410_RTCMIN);
168 writeb(BIN2BCD(tm->tm_hour), base + S3C2410_RTCHOUR); 168 writeb(bin2bcd(tm->tm_hour), base + S3C2410_RTCHOUR);
169 writeb(BIN2BCD(tm->tm_mday), base + S3C2410_RTCDATE); 169 writeb(bin2bcd(tm->tm_mday), base + S3C2410_RTCDATE);
170 writeb(BIN2BCD(tm->tm_mon + 1), base + S3C2410_RTCMON); 170 writeb(bin2bcd(tm->tm_mon + 1), base + S3C2410_RTCMON);
171 writeb(BIN2BCD(year), base + S3C2410_RTCYEAR); 171 writeb(bin2bcd(year), base + S3C2410_RTCYEAR);
172 172
173 return 0; 173 return 0;
174} 174}
@@ -199,34 +199,34 @@ static int s3c_rtc_getalarm(struct device *dev, struct rtc_wkalrm *alrm)
199 /* decode the alarm enable field */ 199 /* decode the alarm enable field */
200 200
201 if (alm_en & S3C2410_RTCALM_SECEN) 201 if (alm_en & S3C2410_RTCALM_SECEN)
202 BCD_TO_BIN(alm_tm->tm_sec); 202 alm_tm->tm_sec = bcd2bin(alm_tm->tm_sec);
203 else 203 else
204 alm_tm->tm_sec = 0xff; 204 alm_tm->tm_sec = 0xff;
205 205
206 if (alm_en & S3C2410_RTCALM_MINEN) 206 if (alm_en & S3C2410_RTCALM_MINEN)
207 BCD_TO_BIN(alm_tm->tm_min); 207 alm_tm->tm_min = bcd2bin(alm_tm->tm_min);
208 else 208 else
209 alm_tm->tm_min = 0xff; 209 alm_tm->tm_min = 0xff;
210 210
211 if (alm_en & S3C2410_RTCALM_HOUREN) 211 if (alm_en & S3C2410_RTCALM_HOUREN)
212 BCD_TO_BIN(alm_tm->tm_hour); 212 alm_tm->tm_hour = bcd2bin(alm_tm->tm_hour);
213 else 213 else
214 alm_tm->tm_hour = 0xff; 214 alm_tm->tm_hour = 0xff;
215 215
216 if (alm_en & S3C2410_RTCALM_DAYEN) 216 if (alm_en & S3C2410_RTCALM_DAYEN)
217 BCD_TO_BIN(alm_tm->tm_mday); 217 alm_tm->tm_mday = bcd2bin(alm_tm->tm_mday);
218 else 218 else
219 alm_tm->tm_mday = 0xff; 219 alm_tm->tm_mday = 0xff;
220 220
221 if (alm_en & S3C2410_RTCALM_MONEN) { 221 if (alm_en & S3C2410_RTCALM_MONEN) {
222 BCD_TO_BIN(alm_tm->tm_mon); 222 alm_tm->tm_mon = bcd2bin(alm_tm->tm_mon);
223 alm_tm->tm_mon -= 1; 223 alm_tm->tm_mon -= 1;
224 } else { 224 } else {
225 alm_tm->tm_mon = 0xff; 225 alm_tm->tm_mon = 0xff;
226 } 226 }
227 227
228 if (alm_en & S3C2410_RTCALM_YEAREN) 228 if (alm_en & S3C2410_RTCALM_YEAREN)
229 BCD_TO_BIN(alm_tm->tm_year); 229 alm_tm->tm_year = bcd2bin(alm_tm->tm_year);
230 else 230 else
231 alm_tm->tm_year = 0xffff; 231 alm_tm->tm_year = 0xffff;
232 232
@@ -250,17 +250,17 @@ static int s3c_rtc_setalarm(struct device *dev, struct rtc_wkalrm *alrm)
250 250
251 if (tm->tm_sec < 60 && tm->tm_sec >= 0) { 251 if (tm->tm_sec < 60 && tm->tm_sec >= 0) {
252 alrm_en |= S3C2410_RTCALM_SECEN; 252 alrm_en |= S3C2410_RTCALM_SECEN;
253 writeb(BIN2BCD(tm->tm_sec), base + S3C2410_ALMSEC); 253 writeb(bin2bcd(tm->tm_sec), base + S3C2410_ALMSEC);
254 } 254 }
255 255
256 if (tm->tm_min < 60 && tm->tm_min >= 0) { 256 if (tm->tm_min < 60 && tm->tm_min >= 0) {
257 alrm_en |= S3C2410_RTCALM_MINEN; 257 alrm_en |= S3C2410_RTCALM_MINEN;
258 writeb(BIN2BCD(tm->tm_min), base + S3C2410_ALMMIN); 258 writeb(bin2bcd(tm->tm_min), base + S3C2410_ALMMIN);
259 } 259 }
260 260
261 if (tm->tm_hour < 24 && tm->tm_hour >= 0) { 261 if (tm->tm_hour < 24 && tm->tm_hour >= 0) {
262 alrm_en |= S3C2410_RTCALM_HOUREN; 262 alrm_en |= S3C2410_RTCALM_HOUREN;
263 writeb(BIN2BCD(tm->tm_hour), base + S3C2410_ALMHOUR); 263 writeb(bin2bcd(tm->tm_hour), base + S3C2410_ALMHOUR);
264 } 264 }
265 265
266 pr_debug("setting S3C2410_RTCALM to %08x\n", alrm_en); 266 pr_debug("setting S3C2410_RTCALM to %08x\n", alrm_en);
diff --git a/drivers/rtc/rtc-sh.c b/drivers/rtc/rtc-sh.c
index 3f393c82e32c..aaf9d6a337cc 100644
--- a/drivers/rtc/rtc-sh.c
+++ b/drivers/rtc/rtc-sh.c
@@ -324,23 +324,23 @@ static int sh_rtc_read_time(struct device *dev, struct rtc_time *tm)
324 324
325 sec128 = readb(rtc->regbase + R64CNT); 325 sec128 = readb(rtc->regbase + R64CNT);
326 326
327 tm->tm_sec = BCD2BIN(readb(rtc->regbase + RSECCNT)); 327 tm->tm_sec = bcd2bin(readb(rtc->regbase + RSECCNT));
328 tm->tm_min = BCD2BIN(readb(rtc->regbase + RMINCNT)); 328 tm->tm_min = bcd2bin(readb(rtc->regbase + RMINCNT));
329 tm->tm_hour = BCD2BIN(readb(rtc->regbase + RHRCNT)); 329 tm->tm_hour = bcd2bin(readb(rtc->regbase + RHRCNT));
330 tm->tm_wday = BCD2BIN(readb(rtc->regbase + RWKCNT)); 330 tm->tm_wday = bcd2bin(readb(rtc->regbase + RWKCNT));
331 tm->tm_mday = BCD2BIN(readb(rtc->regbase + RDAYCNT)); 331 tm->tm_mday = bcd2bin(readb(rtc->regbase + RDAYCNT));
332 tm->tm_mon = BCD2BIN(readb(rtc->regbase + RMONCNT)) - 1; 332 tm->tm_mon = bcd2bin(readb(rtc->regbase + RMONCNT)) - 1;
333 333
334 if (rtc->capabilities & RTC_CAP_4_DIGIT_YEAR) { 334 if (rtc->capabilities & RTC_CAP_4_DIGIT_YEAR) {
335 yr = readw(rtc->regbase + RYRCNT); 335 yr = readw(rtc->regbase + RYRCNT);
336 yr100 = BCD2BIN(yr >> 8); 336 yr100 = bcd2bin(yr >> 8);
337 yr &= 0xff; 337 yr &= 0xff;
338 } else { 338 } else {
339 yr = readb(rtc->regbase + RYRCNT); 339 yr = readb(rtc->regbase + RYRCNT);
340 yr100 = BCD2BIN((yr == 0x99) ? 0x19 : 0x20); 340 yr100 = bcd2bin((yr == 0x99) ? 0x19 : 0x20);
341 } 341 }
342 342
343 tm->tm_year = (yr100 * 100 + BCD2BIN(yr)) - 1900; 343 tm->tm_year = (yr100 * 100 + bcd2bin(yr)) - 1900;
344 344
345 sec2 = readb(rtc->regbase + R64CNT); 345 sec2 = readb(rtc->regbase + R64CNT);
346 cf_bit = readb(rtc->regbase + RCR1) & RCR1_CF; 346 cf_bit = readb(rtc->regbase + RCR1) & RCR1_CF;
@@ -382,20 +382,20 @@ static int sh_rtc_set_time(struct device *dev, struct rtc_time *tm)
382 tmp &= ~RCR2_START; 382 tmp &= ~RCR2_START;
383 writeb(tmp, rtc->regbase + RCR2); 383 writeb(tmp, rtc->regbase + RCR2);
384 384
385 writeb(BIN2BCD(tm->tm_sec), rtc->regbase + RSECCNT); 385 writeb(bin2bcd(tm->tm_sec), rtc->regbase + RSECCNT);
386 writeb(BIN2BCD(tm->tm_min), rtc->regbase + RMINCNT); 386 writeb(bin2bcd(tm->tm_min), rtc->regbase + RMINCNT);
387 writeb(BIN2BCD(tm->tm_hour), rtc->regbase + RHRCNT); 387 writeb(bin2bcd(tm->tm_hour), rtc->regbase + RHRCNT);
388 writeb(BIN2BCD(tm->tm_wday), rtc->regbase + RWKCNT); 388 writeb(bin2bcd(tm->tm_wday), rtc->regbase + RWKCNT);
389 writeb(BIN2BCD(tm->tm_mday), rtc->regbase + RDAYCNT); 389 writeb(bin2bcd(tm->tm_mday), rtc->regbase + RDAYCNT);
390 writeb(BIN2BCD(tm->tm_mon + 1), rtc->regbase + RMONCNT); 390 writeb(bin2bcd(tm->tm_mon + 1), rtc->regbase + RMONCNT);
391 391
392 if (rtc->capabilities & RTC_CAP_4_DIGIT_YEAR) { 392 if (rtc->capabilities & RTC_CAP_4_DIGIT_YEAR) {
393 year = (BIN2BCD((tm->tm_year + 1900) / 100) << 8) | 393 year = (bin2bcd((tm->tm_year + 1900) / 100) << 8) |
394 BIN2BCD(tm->tm_year % 100); 394 bin2bcd(tm->tm_year % 100);
395 writew(year, rtc->regbase + RYRCNT); 395 writew(year, rtc->regbase + RYRCNT);
396 } else { 396 } else {
397 year = tm->tm_year % 100; 397 year = tm->tm_year % 100;
398 writeb(BIN2BCD(year), rtc->regbase + RYRCNT); 398 writeb(bin2bcd(year), rtc->regbase + RYRCNT);
399 } 399 }
400 400
401 /* Start RTC */ 401 /* Start RTC */
@@ -417,7 +417,7 @@ static inline int sh_rtc_read_alarm_value(struct sh_rtc *rtc, int reg_off)
417 byte = readb(rtc->regbase + reg_off); 417 byte = readb(rtc->regbase + reg_off);
418 if (byte & AR_ENB) { 418 if (byte & AR_ENB) {
419 byte &= ~AR_ENB; /* strip the enable bit */ 419 byte &= ~AR_ENB; /* strip the enable bit */
420 value = BCD2BIN(byte); 420 value = bcd2bin(byte);
421 } 421 }
422 422
423 return value; 423 return value;
@@ -455,7 +455,7 @@ static inline void sh_rtc_write_alarm_value(struct sh_rtc *rtc,
455 if (value < 0) 455 if (value < 0)
456 writeb(0, rtc->regbase + reg_off); 456 writeb(0, rtc->regbase + reg_off);
457 else 457 else
458 writeb(BIN2BCD(value) | AR_ENB, rtc->regbase + reg_off); 458 writeb(bin2bcd(value) | AR_ENB, rtc->regbase + reg_off);
459} 459}
460 460
461static int sh_rtc_check_alarm(struct rtc_time *tm) 461static int sh_rtc_check_alarm(struct rtc_time *tm)
diff --git a/drivers/rtc/rtc-stk17ta8.c b/drivers/rtc/rtc-stk17ta8.c
index 9a7e920315fa..f4cd46e15af9 100644
--- a/drivers/rtc/rtc-stk17ta8.c
+++ b/drivers/rtc/rtc-stk17ta8.c
@@ -82,14 +82,14 @@ static int stk17ta8_rtc_set_time(struct device *dev, struct rtc_time *tm)
82 flags = readb(pdata->ioaddr + RTC_FLAGS); 82 flags = readb(pdata->ioaddr + RTC_FLAGS);
83 writeb(flags | RTC_WRITE, pdata->ioaddr + RTC_FLAGS); 83 writeb(flags | RTC_WRITE, pdata->ioaddr + RTC_FLAGS);
84 84
85 writeb(BIN2BCD(tm->tm_year % 100), ioaddr + RTC_YEAR); 85 writeb(bin2bcd(tm->tm_year % 100), ioaddr + RTC_YEAR);
86 writeb(BIN2BCD(tm->tm_mon + 1), ioaddr + RTC_MONTH); 86 writeb(bin2bcd(tm->tm_mon + 1), ioaddr + RTC_MONTH);
87 writeb(BIN2BCD(tm->tm_wday) & RTC_DAY_MASK, ioaddr + RTC_DAY); 87 writeb(bin2bcd(tm->tm_wday) & RTC_DAY_MASK, ioaddr + RTC_DAY);
88 writeb(BIN2BCD(tm->tm_mday), ioaddr + RTC_DATE); 88 writeb(bin2bcd(tm->tm_mday), ioaddr + RTC_DATE);
89 writeb(BIN2BCD(tm->tm_hour), ioaddr + RTC_HOURS); 89 writeb(bin2bcd(tm->tm_hour), ioaddr + RTC_HOURS);
90 writeb(BIN2BCD(tm->tm_min), ioaddr + RTC_MINUTES); 90 writeb(bin2bcd(tm->tm_min), ioaddr + RTC_MINUTES);
91 writeb(BIN2BCD(tm->tm_sec) & RTC_SECONDS_MASK, ioaddr + RTC_SECONDS); 91 writeb(bin2bcd(tm->tm_sec) & RTC_SECONDS_MASK, ioaddr + RTC_SECONDS);
92 writeb(BIN2BCD((tm->tm_year + 1900) / 100), ioaddr + RTC_CENTURY); 92 writeb(bin2bcd((tm->tm_year + 1900) / 100), ioaddr + RTC_CENTURY);
93 93
94 writeb(flags & ~RTC_WRITE, pdata->ioaddr + RTC_FLAGS); 94 writeb(flags & ~RTC_WRITE, pdata->ioaddr + RTC_FLAGS);
95 return 0; 95 return 0;
@@ -120,14 +120,14 @@ static int stk17ta8_rtc_read_time(struct device *dev, struct rtc_time *tm)
120 year = readb(ioaddr + RTC_YEAR); 120 year = readb(ioaddr + RTC_YEAR);
121 century = readb(ioaddr + RTC_CENTURY); 121 century = readb(ioaddr + RTC_CENTURY);
122 writeb(flags & ~RTC_READ, ioaddr + RTC_FLAGS); 122 writeb(flags & ~RTC_READ, ioaddr + RTC_FLAGS);
123 tm->tm_sec = BCD2BIN(second); 123 tm->tm_sec = bcd2bin(second);
124 tm->tm_min = BCD2BIN(minute); 124 tm->tm_min = bcd2bin(minute);
125 tm->tm_hour = BCD2BIN(hour); 125 tm->tm_hour = bcd2bin(hour);
126 tm->tm_mday = BCD2BIN(day); 126 tm->tm_mday = bcd2bin(day);
127 tm->tm_wday = BCD2BIN(week); 127 tm->tm_wday = bcd2bin(week);
128 tm->tm_mon = BCD2BIN(month) - 1; 128 tm->tm_mon = bcd2bin(month) - 1;
129 /* year is 1900 + tm->tm_year */ 129 /* year is 1900 + tm->tm_year */
130 tm->tm_year = BCD2BIN(year) + BCD2BIN(century) * 100 - 1900; 130 tm->tm_year = bcd2bin(year) + bcd2bin(century) * 100 - 1900;
131 131
132 if (rtc_valid_tm(tm) < 0) { 132 if (rtc_valid_tm(tm) < 0) {
133 dev_err(dev, "retrieved date/time is not valid.\n"); 133 dev_err(dev, "retrieved date/time is not valid.\n");
@@ -148,16 +148,16 @@ static void stk17ta8_rtc_update_alarm(struct rtc_plat_data *pdata)
148 writeb(flags | RTC_WRITE, ioaddr + RTC_FLAGS); 148 writeb(flags | RTC_WRITE, ioaddr + RTC_FLAGS);
149 149
150 writeb(pdata->alrm_mday < 0 || (pdata->irqen & RTC_UF) ? 150 writeb(pdata->alrm_mday < 0 || (pdata->irqen & RTC_UF) ?
151 0x80 : BIN2BCD(pdata->alrm_mday), 151 0x80 : bin2bcd(pdata->alrm_mday),
152 ioaddr + RTC_DATE_ALARM); 152 ioaddr + RTC_DATE_ALARM);
153 writeb(pdata->alrm_hour < 0 || (pdata->irqen & RTC_UF) ? 153 writeb(pdata->alrm_hour < 0 || (pdata->irqen & RTC_UF) ?
154 0x80 : BIN2BCD(pdata->alrm_hour), 154 0x80 : bin2bcd(pdata->alrm_hour),
155 ioaddr + RTC_HOURS_ALARM); 155 ioaddr + RTC_HOURS_ALARM);
156 writeb(pdata->alrm_min < 0 || (pdata->irqen & RTC_UF) ? 156 writeb(pdata->alrm_min < 0 || (pdata->irqen & RTC_UF) ?
157 0x80 : BIN2BCD(pdata->alrm_min), 157 0x80 : bin2bcd(pdata->alrm_min),
158 ioaddr + RTC_MINUTES_ALARM); 158 ioaddr + RTC_MINUTES_ALARM);
159 writeb(pdata->alrm_sec < 0 || (pdata->irqen & RTC_UF) ? 159 writeb(pdata->alrm_sec < 0 || (pdata->irqen & RTC_UF) ?
160 0x80 : BIN2BCD(pdata->alrm_sec), 160 0x80 : bin2bcd(pdata->alrm_sec),
161 ioaddr + RTC_SECONDS_ALARM); 161 ioaddr + RTC_SECONDS_ALARM);
162 writeb(pdata->irqen ? RTC_INTS_AIE : 0, ioaddr + RTC_INTERRUPTS); 162 writeb(pdata->irqen ? RTC_INTS_AIE : 0, ioaddr + RTC_INTERRUPTS);
163 readb(ioaddr + RTC_FLAGS); /* clear interrupts */ 163 readb(ioaddr + RTC_FLAGS); /* clear interrupts */
@@ -280,7 +280,6 @@ static struct bin_attribute stk17ta8_nvram_attr = {
280 .attr = { 280 .attr = {
281 .name = "nvram", 281 .name = "nvram",
282 .mode = S_IRUGO | S_IWUSR, 282 .mode = S_IRUGO | S_IWUSR,
283 .owner = THIS_MODULE,
284 }, 283 },
285 .size = RTC_OFFSET, 284 .size = RTC_OFFSET,
286 .read = stk17ta8_nvram_read, 285 .read = stk17ta8_nvram_read,
diff --git a/drivers/rtc/rtc-v3020.c b/drivers/rtc/rtc-v3020.c
index 10025d840268..14d4f036a768 100644
--- a/drivers/rtc/rtc-v3020.c
+++ b/drivers/rtc/rtc-v3020.c
@@ -92,19 +92,19 @@ static int v3020_read_time(struct device *dev, struct rtc_time *dt)
92 92
93 /* ...and then read constant values. */ 93 /* ...and then read constant values. */
94 tmp = v3020_get_reg(chip, V3020_SECONDS); 94 tmp = v3020_get_reg(chip, V3020_SECONDS);
95 dt->tm_sec = BCD2BIN(tmp); 95 dt->tm_sec = bcd2bin(tmp);
96 tmp = v3020_get_reg(chip, V3020_MINUTES); 96 tmp = v3020_get_reg(chip, V3020_MINUTES);
97 dt->tm_min = BCD2BIN(tmp); 97 dt->tm_min = bcd2bin(tmp);
98 tmp = v3020_get_reg(chip, V3020_HOURS); 98 tmp = v3020_get_reg(chip, V3020_HOURS);
99 dt->tm_hour = BCD2BIN(tmp); 99 dt->tm_hour = bcd2bin(tmp);
100 tmp = v3020_get_reg(chip, V3020_MONTH_DAY); 100 tmp = v3020_get_reg(chip, V3020_MONTH_DAY);
101 dt->tm_mday = BCD2BIN(tmp); 101 dt->tm_mday = bcd2bin(tmp);
102 tmp = v3020_get_reg(chip, V3020_MONTH); 102 tmp = v3020_get_reg(chip, V3020_MONTH);
103 dt->tm_mon = BCD2BIN(tmp) - 1; 103 dt->tm_mon = bcd2bin(tmp) - 1;
104 tmp = v3020_get_reg(chip, V3020_WEEK_DAY); 104 tmp = v3020_get_reg(chip, V3020_WEEK_DAY);
105 dt->tm_wday = BCD2BIN(tmp); 105 dt->tm_wday = bcd2bin(tmp);
106 tmp = v3020_get_reg(chip, V3020_YEAR); 106 tmp = v3020_get_reg(chip, V3020_YEAR);
107 dt->tm_year = BCD2BIN(tmp)+100; 107 dt->tm_year = bcd2bin(tmp)+100;
108 108
109#ifdef DEBUG 109#ifdef DEBUG
110 printk("\n%s : Read RTC values\n",__func__); 110 printk("\n%s : Read RTC values\n",__func__);
@@ -136,13 +136,13 @@ static int v3020_set_time(struct device *dev, struct rtc_time *dt)
136#endif 136#endif
137 137
138 /* Write all the values to ram... */ 138 /* Write all the values to ram... */
139 v3020_set_reg(chip, V3020_SECONDS, BIN2BCD(dt->tm_sec)); 139 v3020_set_reg(chip, V3020_SECONDS, bin2bcd(dt->tm_sec));
140 v3020_set_reg(chip, V3020_MINUTES, BIN2BCD(dt->tm_min)); 140 v3020_set_reg(chip, V3020_MINUTES, bin2bcd(dt->tm_min));
141 v3020_set_reg(chip, V3020_HOURS, BIN2BCD(dt->tm_hour)); 141 v3020_set_reg(chip, V3020_HOURS, bin2bcd(dt->tm_hour));
142 v3020_set_reg(chip, V3020_MONTH_DAY, BIN2BCD(dt->tm_mday)); 142 v3020_set_reg(chip, V3020_MONTH_DAY, bin2bcd(dt->tm_mday));
143 v3020_set_reg(chip, V3020_MONTH, BIN2BCD(dt->tm_mon + 1)); 143 v3020_set_reg(chip, V3020_MONTH, bin2bcd(dt->tm_mon + 1));
144 v3020_set_reg(chip, V3020_WEEK_DAY, BIN2BCD(dt->tm_wday)); 144 v3020_set_reg(chip, V3020_WEEK_DAY, bin2bcd(dt->tm_wday));
145 v3020_set_reg(chip, V3020_YEAR, BIN2BCD(dt->tm_year % 100)); 145 v3020_set_reg(chip, V3020_YEAR, bin2bcd(dt->tm_year % 100));
146 146
147 /* ...and set the clock. */ 147 /* ...and set the clock. */
148 v3020_set_reg(chip, V3020_CMD_RAM2CLOCK, 0); 148 v3020_set_reg(chip, V3020_CMD_RAM2CLOCK, 0);
diff --git a/drivers/rtc/rtc-x1205.c b/drivers/rtc/rtc-x1205.c
index 7dcfba1bbfe1..310c10795e9a 100644
--- a/drivers/rtc/rtc-x1205.c
+++ b/drivers/rtc/rtc-x1205.c
@@ -118,13 +118,13 @@ static int x1205_get_datetime(struct i2c_client *client, struct rtc_time *tm,
118 for (i = 0; i <= 4; i++) 118 for (i = 0; i <= 4; i++)
119 buf[i] &= 0x7F; 119 buf[i] &= 0x7F;
120 120
121 tm->tm_sec = BCD2BIN(buf[CCR_SEC]); 121 tm->tm_sec = bcd2bin(buf[CCR_SEC]);
122 tm->tm_min = BCD2BIN(buf[CCR_MIN]); 122 tm->tm_min = bcd2bin(buf[CCR_MIN]);
123 tm->tm_hour = BCD2BIN(buf[CCR_HOUR] & 0x3F); /* hr is 0-23 */ 123 tm->tm_hour = bcd2bin(buf[CCR_HOUR] & 0x3F); /* hr is 0-23 */
124 tm->tm_mday = BCD2BIN(buf[CCR_MDAY]); 124 tm->tm_mday = bcd2bin(buf[CCR_MDAY]);
125 tm->tm_mon = BCD2BIN(buf[CCR_MONTH]) - 1; /* mon is 0-11 */ 125 tm->tm_mon = bcd2bin(buf[CCR_MONTH]) - 1; /* mon is 0-11 */
126 tm->tm_year = BCD2BIN(buf[CCR_YEAR]) 126 tm->tm_year = bcd2bin(buf[CCR_YEAR])
127 + (BCD2BIN(buf[CCR_Y2K]) * 100) - 1900; 127 + (bcd2bin(buf[CCR_Y2K]) * 100) - 1900;
128 tm->tm_wday = buf[CCR_WDAY]; 128 tm->tm_wday = buf[CCR_WDAY];
129 129
130 dev_dbg(&client->dev, "%s: tm is secs=%d, mins=%d, hours=%d, " 130 dev_dbg(&client->dev, "%s: tm is secs=%d, mins=%d, hours=%d, "
@@ -174,11 +174,11 @@ static int x1205_set_datetime(struct i2c_client *client, struct rtc_time *tm,
174 __func__, 174 __func__,
175 tm->tm_sec, tm->tm_min, tm->tm_hour); 175 tm->tm_sec, tm->tm_min, tm->tm_hour);
176 176
177 buf[CCR_SEC] = BIN2BCD(tm->tm_sec); 177 buf[CCR_SEC] = bin2bcd(tm->tm_sec);
178 buf[CCR_MIN] = BIN2BCD(tm->tm_min); 178 buf[CCR_MIN] = bin2bcd(tm->tm_min);
179 179
180 /* set hour and 24hr bit */ 180 /* set hour and 24hr bit */
181 buf[CCR_HOUR] = BIN2BCD(tm->tm_hour) | X1205_HR_MIL; 181 buf[CCR_HOUR] = bin2bcd(tm->tm_hour) | X1205_HR_MIL;
182 182
183 /* should we also set the date? */ 183 /* should we also set the date? */
184 if (datetoo) { 184 if (datetoo) {
@@ -187,15 +187,15 @@ static int x1205_set_datetime(struct i2c_client *client, struct rtc_time *tm,
187 __func__, 187 __func__,
188 tm->tm_mday, tm->tm_mon, tm->tm_year, tm->tm_wday); 188 tm->tm_mday, tm->tm_mon, tm->tm_year, tm->tm_wday);
189 189
190 buf[CCR_MDAY] = BIN2BCD(tm->tm_mday); 190 buf[CCR_MDAY] = bin2bcd(tm->tm_mday);
191 191
192 /* month, 1 - 12 */ 192 /* month, 1 - 12 */
193 buf[CCR_MONTH] = BIN2BCD(tm->tm_mon + 1); 193 buf[CCR_MONTH] = bin2bcd(tm->tm_mon + 1);
194 194
195 /* year, since the rtc epoch*/ 195 /* year, since the rtc epoch*/
196 buf[CCR_YEAR] = BIN2BCD(tm->tm_year % 100); 196 buf[CCR_YEAR] = bin2bcd(tm->tm_year % 100);
197 buf[CCR_WDAY] = tm->tm_wday & 0x07; 197 buf[CCR_WDAY] = tm->tm_wday & 0x07;
198 buf[CCR_Y2K] = BIN2BCD(tm->tm_year / 100); 198 buf[CCR_Y2K] = bin2bcd(tm->tm_year / 100);
199 } 199 }
200 200
201 /* If writing alarm registers, set compare bits on registers 0-4 */ 201 /* If writing alarm registers, set compare bits on registers 0-4 */
@@ -437,7 +437,7 @@ static int x1205_validate_client(struct i2c_client *client)
437 return -EIO; 437 return -EIO;
438 } 438 }
439 439
440 value = BCD2BIN(reg & probe_limits_pattern[i].mask); 440 value = bcd2bin(reg & probe_limits_pattern[i].mask);
441 441
442 if (value > probe_limits_pattern[i].max || 442 if (value > probe_limits_pattern[i].max ||
443 value < probe_limits_pattern[i].min) { 443 value < probe_limits_pattern[i].min) {
diff --git a/drivers/scsi/arcmsr/arcmsr_attr.c b/drivers/scsi/arcmsr/arcmsr_attr.c
index 69f8346aa288..5877f29a6005 100644
--- a/drivers/scsi/arcmsr/arcmsr_attr.c
+++ b/drivers/scsi/arcmsr/arcmsr_attr.c
@@ -189,7 +189,6 @@ static struct bin_attribute arcmsr_sysfs_message_read_attr = {
189 .attr = { 189 .attr = {
190 .name = "mu_read", 190 .name = "mu_read",
191 .mode = S_IRUSR , 191 .mode = S_IRUSR ,
192 .owner = THIS_MODULE,
193 }, 192 },
194 .size = 1032, 193 .size = 1032,
195 .read = arcmsr_sysfs_iop_message_read, 194 .read = arcmsr_sysfs_iop_message_read,
@@ -199,7 +198,6 @@ static struct bin_attribute arcmsr_sysfs_message_write_attr = {
199 .attr = { 198 .attr = {
200 .name = "mu_write", 199 .name = "mu_write",
201 .mode = S_IWUSR, 200 .mode = S_IWUSR,
202 .owner = THIS_MODULE,
203 }, 201 },
204 .size = 1032, 202 .size = 1032,
205 .write = arcmsr_sysfs_iop_message_write, 203 .write = arcmsr_sysfs_iop_message_write,
@@ -209,7 +207,6 @@ static struct bin_attribute arcmsr_sysfs_message_clear_attr = {
209 .attr = { 207 .attr = {
210 .name = "mu_clear", 208 .name = "mu_clear",
211 .mode = S_IWUSR, 209 .mode = S_IWUSR,
212 .owner = THIS_MODULE,
213 }, 210 },
214 .size = 1, 211 .size = 1,
215 .write = arcmsr_sysfs_iop_message_clear, 212 .write = arcmsr_sysfs_iop_message_clear,
diff --git a/drivers/scsi/sr_vendor.c b/drivers/scsi/sr_vendor.c
index 4eb3da996b36..4ad3e017213f 100644
--- a/drivers/scsi/sr_vendor.c
+++ b/drivers/scsi/sr_vendor.c
@@ -223,9 +223,9 @@ int sr_cd_check(struct cdrom_device_info *cdi)
223 no_multi = 1; 223 no_multi = 1;
224 break; 224 break;
225 } 225 }
226 min = BCD2BIN(buffer[15]); 226 min = bcd2bin(buffer[15]);
227 sec = BCD2BIN(buffer[16]); 227 sec = bcd2bin(buffer[16]);
228 frame = BCD2BIN(buffer[17]); 228 frame = bcd2bin(buffer[17]);
229 sector = min * CD_SECS * CD_FRAMES + sec * CD_FRAMES + frame; 229 sector = min * CD_SECS * CD_FRAMES + sec * CD_FRAMES + frame;
230 break; 230 break;
231 } 231 }
@@ -252,9 +252,9 @@ int sr_cd_check(struct cdrom_device_info *cdi)
252 } 252 }
253 if (rc != 0) 253 if (rc != 0)
254 break; 254 break;
255 min = BCD2BIN(buffer[1]); 255 min = bcd2bin(buffer[1]);
256 sec = BCD2BIN(buffer[2]); 256 sec = bcd2bin(buffer[2]);
257 frame = BCD2BIN(buffer[3]); 257 frame = bcd2bin(buffer[3]);
258 sector = min * CD_SECS * CD_FRAMES + sec * CD_FRAMES + frame; 258 sector = min * CD_SECS * CD_FRAMES + sec * CD_FRAMES + frame;
259 if (sector) 259 if (sector)
260 sector -= CD_MSF_OFFSET; 260 sector -= CD_MSF_OFFSET;
diff --git a/drivers/serial/8250_gsc.c b/drivers/serial/8250_gsc.c
index 0416ad3bc127..418b4fe9a0a1 100644
--- a/drivers/serial/8250_gsc.c
+++ b/drivers/serial/8250_gsc.c
@@ -111,7 +111,7 @@ static struct parisc_driver serial_driver = {
111 .probe = serial_init_chip, 111 .probe = serial_init_chip,
112}; 112};
113 113
114int __init probe_serial_gsc(void) 114static int __init probe_serial_gsc(void)
115{ 115{
116 register_parisc_driver(&lasi_driver); 116 register_parisc_driver(&lasi_driver);
117 register_parisc_driver(&serial_driver); 117 register_parisc_driver(&serial_driver);
diff --git a/drivers/serial/serial_txx9.c b/drivers/serial/serial_txx9.c
index 8fcb4c5b9a26..7313c2edcb83 100644
--- a/drivers/serial/serial_txx9.c
+++ b/drivers/serial/serial_txx9.c
@@ -1039,7 +1039,7 @@ static int __devinit serial_txx9_probe(struct platform_device *dev)
1039 ret = serial_txx9_register_port(&port); 1039 ret = serial_txx9_register_port(&port);
1040 if (ret < 0) { 1040 if (ret < 0) {
1041 dev_err(&dev->dev, "unable to register port at index %d " 1041 dev_err(&dev->dev, "unable to register port at index %d "
1042 "(IO%x MEM%llx IRQ%d): %d\n", i, 1042 "(IO%lx MEM%llx IRQ%d): %d\n", i,
1043 p->iobase, (unsigned long long)p->mapbase, 1043 p->iobase, (unsigned long long)p->mapbase,
1044 p->irq, ret); 1044 p->irq, ret);
1045 } 1045 }
diff --git a/drivers/serial/sn_console.c b/drivers/serial/sn_console.c
index b73e3c0056cd..d5276c012f78 100644
--- a/drivers/serial/sn_console.c
+++ b/drivers/serial/sn_console.c
@@ -61,7 +61,7 @@
61#define SN_SAL_BUFFER_SIZE (64 * (1 << 10)) 61#define SN_SAL_BUFFER_SIZE (64 * (1 << 10))
62 62
63#define SN_SAL_UART_FIFO_DEPTH 16 63#define SN_SAL_UART_FIFO_DEPTH 16
64#define SN_SAL_UART_FIFO_SPEED_CPS 9600/10 64#define SN_SAL_UART_FIFO_SPEED_CPS (9600/10)
65 65
66/* sn_transmit_chars() calling args */ 66/* sn_transmit_chars() calling args */
67#define TRANSMIT_BUFFERED 0 67#define TRANSMIT_BUFFERED 0
diff --git a/drivers/staging/go7007/Kconfig b/drivers/staging/go7007/Kconfig
index 57a121c338c4..593fdb767aad 100644
--- a/drivers/staging/go7007/Kconfig
+++ b/drivers/staging/go7007/Kconfig
@@ -1,10 +1,12 @@
1config VIDEO_GO7007 1config VIDEO_GO7007
2 tristate "Go 7007 support" 2 tristate "Go 7007 support"
3 depends on VIDEO_DEV && PCI && I2C && INPUT 3 depends on VIDEO_DEV && PCI && I2C && INPUT
4 depends on SND
4 select VIDEOBUF_DMA_SG 5 select VIDEOBUF_DMA_SG
5 select VIDEO_IR 6 select VIDEO_IR
6 select VIDEO_TUNER 7 select VIDEO_TUNER
7 select VIDEO_TVEEPROM 8 select VIDEO_TVEEPROM
9 select SND_PCM
8 select CRC32 10 select CRC32
9 default N 11 default N
10 ---help--- 12 ---help---
diff --git a/drivers/staging/sxg/Kconfig b/drivers/staging/sxg/Kconfig
index 1ae350806600..6e6cf0b9ef99 100644
--- a/drivers/staging/sxg/Kconfig
+++ b/drivers/staging/sxg/Kconfig
@@ -1,6 +1,7 @@
1config SXG 1config SXG
2 tristate "Alacritech SLIC Technology Non-Accelerated 10Gbe support" 2 tristate "Alacritech SLIC Technology Non-Accelerated 10Gbe support"
3 depends on PCI && NETDEV_10000 3 depends on PCI && NETDEV_10000
4 depends on X86
4 default n 5 default n
5 help 6 help
6 This driver supports the Alacritech SLIC Technology Non-Accelerated 7 This driver supports the Alacritech SLIC Technology Non-Accelerated
diff --git a/drivers/telephony/phonedev.c b/drivers/telephony/phonedev.c
index 4d74ba36c3a1..37caf4d69037 100644
--- a/drivers/telephony/phonedev.c
+++ b/drivers/telephony/phonedev.c
@@ -54,7 +54,6 @@ static int phone_open(struct inode *inode, struct file *file)
54 if (minor >= PHONE_NUM_DEVICES) 54 if (minor >= PHONE_NUM_DEVICES)
55 return -ENODEV; 55 return -ENODEV;
56 56
57 lock_kernel();
58 mutex_lock(&phone_lock); 57 mutex_lock(&phone_lock);
59 p = phone_device[minor]; 58 p = phone_device[minor];
60 if (p) 59 if (p)
@@ -81,7 +80,6 @@ static int phone_open(struct inode *inode, struct file *file)
81 fops_put(old_fops); 80 fops_put(old_fops);
82end: 81end:
83 mutex_unlock(&phone_lock); 82 mutex_unlock(&phone_lock);
84 unlock_kernel();
85 return err; 83 return err;
86} 84}
87 85
diff --git a/drivers/video/fbmem.c b/drivers/video/fbmem.c
index 217c5118ae9e..cd5f20da738a 100644
--- a/drivers/video/fbmem.c
+++ b/drivers/video/fbmem.c
@@ -1002,101 +1002,132 @@ fb_blank(struct fb_info *info, int blank)
1002 return ret; 1002 return ret;
1003} 1003}
1004 1004
1005static int 1005static long
1006fb_ioctl(struct inode *inode, struct file *file, unsigned int cmd, 1006fb_ioctl(struct file *file, unsigned int cmd,
1007 unsigned long arg) 1007 unsigned long arg)
1008{ 1008{
1009 struct inode *inode = file->f_path.dentry->d_inode;
1009 int fbidx = iminor(inode); 1010 int fbidx = iminor(inode);
1010 struct fb_info *info = registered_fb[fbidx]; 1011 struct fb_info *info;
1011 struct fb_ops *fb = info->fbops; 1012 struct fb_ops *fb;
1012 struct fb_var_screeninfo var; 1013 struct fb_var_screeninfo var;
1013 struct fb_fix_screeninfo fix; 1014 struct fb_fix_screeninfo fix;
1014 struct fb_con2fbmap con2fb; 1015 struct fb_con2fbmap con2fb;
1015 struct fb_cmap_user cmap; 1016 struct fb_cmap_user cmap;
1016 struct fb_event event; 1017 struct fb_event event;
1017 void __user *argp = (void __user *)arg; 1018 void __user *argp = (void __user *)arg;
1018 int i; 1019 long ret = 0;
1019 1020
1020 if (!fb) 1021 info = registered_fb[fbidx];
1022 mutex_lock(&info->lock);
1023 fb = info->fbops;
1024
1025 if (!fb) {
1026 mutex_unlock(&info->lock);
1021 return -ENODEV; 1027 return -ENODEV;
1028 }
1022 switch (cmd) { 1029 switch (cmd) {
1023 case FBIOGET_VSCREENINFO: 1030 case FBIOGET_VSCREENINFO:
1024 return copy_to_user(argp, &info->var, 1031 ret = copy_to_user(argp, &info->var,
1025 sizeof(var)) ? -EFAULT : 0; 1032 sizeof(var)) ? -EFAULT : 0;
1033 break;
1026 case FBIOPUT_VSCREENINFO: 1034 case FBIOPUT_VSCREENINFO:
1027 if (copy_from_user(&var, argp, sizeof(var))) 1035 if (copy_from_user(&var, argp, sizeof(var))) {
1028 return -EFAULT; 1036 ret = -EFAULT;
1037 break;
1038 }
1029 acquire_console_sem(); 1039 acquire_console_sem();
1030 info->flags |= FBINFO_MISC_USEREVENT; 1040 info->flags |= FBINFO_MISC_USEREVENT;
1031 i = fb_set_var(info, &var); 1041 ret = fb_set_var(info, &var);
1032 info->flags &= ~FBINFO_MISC_USEREVENT; 1042 info->flags &= ~FBINFO_MISC_USEREVENT;
1033 release_console_sem(); 1043 release_console_sem();
1034 if (i) return i; 1044 if (ret == 0 && copy_to_user(argp, &var, sizeof(var)))
1035 if (copy_to_user(argp, &var, sizeof(var))) 1045 ret = -EFAULT;
1036 return -EFAULT; 1046 break;
1037 return 0;
1038 case FBIOGET_FSCREENINFO: 1047 case FBIOGET_FSCREENINFO:
1039 return copy_to_user(argp, &info->fix, 1048 ret = copy_to_user(argp, &info->fix,
1040 sizeof(fix)) ? -EFAULT : 0; 1049 sizeof(fix)) ? -EFAULT : 0;
1050 break;
1041 case FBIOPUTCMAP: 1051 case FBIOPUTCMAP:
1042 if (copy_from_user(&cmap, argp, sizeof(cmap))) 1052 if (copy_from_user(&cmap, argp, sizeof(cmap)))
1043 return -EFAULT; 1053 ret = -EFAULT;
1044 return (fb_set_user_cmap(&cmap, info)); 1054 else
1055 ret = fb_set_user_cmap(&cmap, info);
1056 break;
1045 case FBIOGETCMAP: 1057 case FBIOGETCMAP:
1046 if (copy_from_user(&cmap, argp, sizeof(cmap))) 1058 if (copy_from_user(&cmap, argp, sizeof(cmap)))
1047 return -EFAULT; 1059 ret = -EFAULT;
1048 return fb_cmap_to_user(&info->cmap, &cmap); 1060 else
1061 ret = fb_cmap_to_user(&info->cmap, &cmap);
1062 break;
1049 case FBIOPAN_DISPLAY: 1063 case FBIOPAN_DISPLAY:
1050 if (copy_from_user(&var, argp, sizeof(var))) 1064 if (copy_from_user(&var, argp, sizeof(var))) {
1051 return -EFAULT; 1065 ret = -EFAULT;
1066 break;
1067 }
1052 acquire_console_sem(); 1068 acquire_console_sem();
1053 i = fb_pan_display(info, &var); 1069 ret = fb_pan_display(info, &var);
1054 release_console_sem(); 1070 release_console_sem();
1055 if (i) 1071 if (ret == 0 && copy_to_user(argp, &var, sizeof(var)))
1056 return i; 1072 ret = -EFAULT;
1057 if (copy_to_user(argp, &var, sizeof(var))) 1073 break;
1058 return -EFAULT;
1059 return 0;
1060 case FBIO_CURSOR: 1074 case FBIO_CURSOR:
1061 return -EINVAL; 1075 ret = -EINVAL;
1076 break;
1062 case FBIOGET_CON2FBMAP: 1077 case FBIOGET_CON2FBMAP:
1063 if (copy_from_user(&con2fb, argp, sizeof(con2fb))) 1078 if (copy_from_user(&con2fb, argp, sizeof(con2fb)))
1064 return -EFAULT; 1079 ret = -EFAULT;
1065 if (con2fb.console < 1 || con2fb.console > MAX_NR_CONSOLES) 1080 else if (con2fb.console < 1 || con2fb.console > MAX_NR_CONSOLES)
1066 return -EINVAL; 1081 ret = -EINVAL;
1067 con2fb.framebuffer = -1; 1082 else {
1068 event.info = info; 1083 con2fb.framebuffer = -1;
1069 event.data = &con2fb; 1084 event.info = info;
1070 fb_notifier_call_chain(FB_EVENT_GET_CONSOLE_MAP, &event); 1085 event.data = &con2fb;
1071 return copy_to_user(argp, &con2fb, 1086 fb_notifier_call_chain(FB_EVENT_GET_CONSOLE_MAP,
1087 &event);
1088 ret = copy_to_user(argp, &con2fb,
1072 sizeof(con2fb)) ? -EFAULT : 0; 1089 sizeof(con2fb)) ? -EFAULT : 0;
1090 }
1091 break;
1073 case FBIOPUT_CON2FBMAP: 1092 case FBIOPUT_CON2FBMAP:
1074 if (copy_from_user(&con2fb, argp, sizeof(con2fb))) 1093 if (copy_from_user(&con2fb, argp, sizeof(con2fb))) {
1075 return - EFAULT; 1094 ret = -EFAULT;
1076 if (con2fb.console < 1 || con2fb.console > MAX_NR_CONSOLES) 1095 break;
1077 return -EINVAL; 1096 }
1078 if (con2fb.framebuffer < 0 || con2fb.framebuffer >= FB_MAX) 1097 if (con2fb.console < 1 || con2fb.console > MAX_NR_CONSOLES) {
1079 return -EINVAL; 1098 ret = -EINVAL;
1080 if (!registered_fb[con2fb.framebuffer]) 1099 break;
1081 request_module("fb%d", con2fb.framebuffer); 1100 }
1101 if (con2fb.framebuffer < 0 || con2fb.framebuffer >= FB_MAX) {
1102 ret = -EINVAL;
1103 break;
1104 }
1082 if (!registered_fb[con2fb.framebuffer]) 1105 if (!registered_fb[con2fb.framebuffer])
1083 return -EINVAL; 1106 request_module("fb%d", con2fb.framebuffer);
1107 if (!registered_fb[con2fb.framebuffer]) {
1108 ret = -EINVAL;
1109 break;
1110 }
1084 event.info = info; 1111 event.info = info;
1085 event.data = &con2fb; 1112 event.data = &con2fb;
1086 return fb_notifier_call_chain(FB_EVENT_SET_CONSOLE_MAP, 1113 ret = fb_notifier_call_chain(FB_EVENT_SET_CONSOLE_MAP,
1087 &event); 1114 &event);
1115 break;
1088 case FBIOBLANK: 1116 case FBIOBLANK:
1089 acquire_console_sem(); 1117 acquire_console_sem();
1090 info->flags |= FBINFO_MISC_USEREVENT; 1118 info->flags |= FBINFO_MISC_USEREVENT;
1091 i = fb_blank(info, arg); 1119 ret = fb_blank(info, arg);
1092 info->flags &= ~FBINFO_MISC_USEREVENT; 1120 info->flags &= ~FBINFO_MISC_USEREVENT;
1093 release_console_sem(); 1121 release_console_sem();
1094 return i; 1122 break;;
1095 default: 1123 default:
1096 if (fb->fb_ioctl == NULL) 1124 if (fb->fb_ioctl == NULL)
1097 return -EINVAL; 1125 ret = -ENOTTY;
1098 return fb->fb_ioctl(info, cmd, arg); 1126 else
1127 ret = fb->fb_ioctl(info, cmd, arg);
1099 } 1128 }
1129 mutex_unlock(&info->lock);
1130 return ret;
1100} 1131}
1101 1132
1102#ifdef CONFIG_COMPAT 1133#ifdef CONFIG_COMPAT
@@ -1150,7 +1181,7 @@ static int fb_getput_cmap(struct inode *inode, struct file *file,
1150 put_user(compat_ptr(data), &cmap->transp)) 1181 put_user(compat_ptr(data), &cmap->transp))
1151 return -EFAULT; 1182 return -EFAULT;
1152 1183
1153 err = fb_ioctl(inode, file, cmd, (unsigned long) cmap); 1184 err = fb_ioctl(file, cmd, (unsigned long) cmap);
1154 1185
1155 if (!err) { 1186 if (!err) {
1156 if (copy_in_user(&cmap32->start, 1187 if (copy_in_user(&cmap32->start,
@@ -1204,7 +1235,7 @@ static int fb_get_fscreeninfo(struct inode *inode, struct file *file,
1204 1235
1205 old_fs = get_fs(); 1236 old_fs = get_fs();
1206 set_fs(KERNEL_DS); 1237 set_fs(KERNEL_DS);
1207 err = fb_ioctl(inode, file, cmd, (unsigned long) &fix); 1238 err = fb_ioctl(file, cmd, (unsigned long) &fix);
1208 set_fs(old_fs); 1239 set_fs(old_fs);
1209 1240
1210 if (!err) 1241 if (!err)
@@ -1222,7 +1253,7 @@ fb_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
1222 struct fb_ops *fb = info->fbops; 1253 struct fb_ops *fb = info->fbops;
1223 long ret = -ENOIOCTLCMD; 1254 long ret = -ENOIOCTLCMD;
1224 1255
1225 lock_kernel(); 1256 mutex_lock(&info->lock);
1226 switch(cmd) { 1257 switch(cmd) {
1227 case FBIOGET_VSCREENINFO: 1258 case FBIOGET_VSCREENINFO:
1228 case FBIOPUT_VSCREENINFO: 1259 case FBIOPUT_VSCREENINFO:
@@ -1231,7 +1262,7 @@ fb_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
1231 case FBIOPUT_CON2FBMAP: 1262 case FBIOPUT_CON2FBMAP:
1232 arg = (unsigned long) compat_ptr(arg); 1263 arg = (unsigned long) compat_ptr(arg);
1233 case FBIOBLANK: 1264 case FBIOBLANK:
1234 ret = fb_ioctl(inode, file, cmd, arg); 1265 ret = fb_ioctl(file, cmd, arg);
1235 break; 1266 break;
1236 1267
1237 case FBIOGET_FSCREENINFO: 1268 case FBIOGET_FSCREENINFO:
@@ -1248,7 +1279,7 @@ fb_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
1248 ret = fb->fb_compat_ioctl(info, cmd, arg); 1279 ret = fb->fb_compat_ioctl(info, cmd, arg);
1249 break; 1280 break;
1250 } 1281 }
1251 unlock_kernel(); 1282 mutex_unlock(&info->lock);
1252 return ret; 1283 return ret;
1253} 1284}
1254#endif 1285#endif
@@ -1270,13 +1301,13 @@ fb_mmap(struct file *file, struct vm_area_struct * vma)
1270 return -ENODEV; 1301 return -ENODEV;
1271 if (fb->fb_mmap) { 1302 if (fb->fb_mmap) {
1272 int res; 1303 int res;
1273 lock_kernel(); 1304 mutex_lock(&info->lock);
1274 res = fb->fb_mmap(info, vma); 1305 res = fb->fb_mmap(info, vma);
1275 unlock_kernel(); 1306 mutex_unlock(&info->lock);
1276 return res; 1307 return res;
1277 } 1308 }
1278 1309
1279 lock_kernel(); 1310 mutex_lock(&info->lock);
1280 1311
1281 /* frame buffer memory */ 1312 /* frame buffer memory */
1282 start = info->fix.smem_start; 1313 start = info->fix.smem_start;
@@ -1285,13 +1316,13 @@ fb_mmap(struct file *file, struct vm_area_struct * vma)
1285 /* memory mapped io */ 1316 /* memory mapped io */
1286 off -= len; 1317 off -= len;
1287 if (info->var.accel_flags) { 1318 if (info->var.accel_flags) {
1288 unlock_kernel(); 1319 mutex_unlock(&info->lock);
1289 return -EINVAL; 1320 return -EINVAL;
1290 } 1321 }
1291 start = info->fix.mmio_start; 1322 start = info->fix.mmio_start;
1292 len = PAGE_ALIGN((start & ~PAGE_MASK) + info->fix.mmio_len); 1323 len = PAGE_ALIGN((start & ~PAGE_MASK) + info->fix.mmio_len);
1293 } 1324 }
1294 unlock_kernel(); 1325 mutex_unlock(&info->lock);
1295 start &= PAGE_MASK; 1326 start &= PAGE_MASK;
1296 if ((vma->vm_end - vma->vm_start + off) > len) 1327 if ((vma->vm_end - vma->vm_start + off) > len)
1297 return -EINVAL; 1328 return -EINVAL;
@@ -1315,13 +1346,13 @@ fb_open(struct inode *inode, struct file *file)
1315 1346
1316 if (fbidx >= FB_MAX) 1347 if (fbidx >= FB_MAX)
1317 return -ENODEV; 1348 return -ENODEV;
1318 lock_kernel(); 1349 info = registered_fb[fbidx];
1319 if (!(info = registered_fb[fbidx])) 1350 if (!info)
1320 request_module("fb%d", fbidx); 1351 request_module("fb%d", fbidx);
1321 if (!(info = registered_fb[fbidx])) { 1352 info = registered_fb[fbidx];
1322 res = -ENODEV; 1353 if (!info)
1323 goto out; 1354 return -ENODEV;
1324 } 1355 mutex_lock(&info->lock);
1325 if (!try_module_get(info->fbops->owner)) { 1356 if (!try_module_get(info->fbops->owner)) {
1326 res = -ENODEV; 1357 res = -ENODEV;
1327 goto out; 1358 goto out;
@@ -1337,7 +1368,7 @@ fb_open(struct inode *inode, struct file *file)
1337 fb_deferred_io_open(info, inode, file); 1368 fb_deferred_io_open(info, inode, file);
1338#endif 1369#endif
1339out: 1370out:
1340 unlock_kernel(); 1371 mutex_unlock(&info->lock);
1341 return res; 1372 return res;
1342} 1373}
1343 1374
@@ -1346,11 +1377,11 @@ fb_release(struct inode *inode, struct file *file)
1346{ 1377{
1347 struct fb_info * const info = file->private_data; 1378 struct fb_info * const info = file->private_data;
1348 1379
1349 lock_kernel(); 1380 mutex_lock(&info->lock);
1350 if (info->fbops->fb_release) 1381 if (info->fbops->fb_release)
1351 info->fbops->fb_release(info,1); 1382 info->fbops->fb_release(info,1);
1352 module_put(info->fbops->owner); 1383 module_put(info->fbops->owner);
1353 unlock_kernel(); 1384 mutex_unlock(&info->lock);
1354 return 0; 1385 return 0;
1355} 1386}
1356 1387
@@ -1358,7 +1389,7 @@ static const struct file_operations fb_fops = {
1358 .owner = THIS_MODULE, 1389 .owner = THIS_MODULE,
1359 .read = fb_read, 1390 .read = fb_read,
1360 .write = fb_write, 1391 .write = fb_write,
1361 .ioctl = fb_ioctl, 1392 .unlocked_ioctl = fb_ioctl,
1362#ifdef CONFIG_COMPAT 1393#ifdef CONFIG_COMPAT
1363 .compat_ioctl = fb_compat_ioctl, 1394 .compat_ioctl = fb_compat_ioctl,
1364#endif 1395#endif
@@ -1429,6 +1460,7 @@ register_framebuffer(struct fb_info *fb_info)
1429 if (!registered_fb[i]) 1460 if (!registered_fb[i])
1430 break; 1461 break;
1431 fb_info->node = i; 1462 fb_info->node = i;
1463 mutex_init(&fb_info->lock);
1432 1464
1433 fb_info->dev = device_create(fb_class, fb_info->device, 1465 fb_info->dev = device_create(fb_class, fb_info->device,
1434 MKDEV(FB_MAJOR, i), NULL, "fb%d", i); 1466 MKDEV(FB_MAJOR, i), NULL, "fb%d", i);
diff --git a/drivers/w1/slaves/w1_ds2760.c b/drivers/w1/slaves/w1_ds2760.c
index ed6b0576208c..1f09d4e4144c 100644
--- a/drivers/w1/slaves/w1_ds2760.c
+++ b/drivers/w1/slaves/w1_ds2760.c
@@ -80,7 +80,6 @@ static struct bin_attribute w1_ds2760_bin_attr = {
80 .attr = { 80 .attr = {
81 .name = "w1_slave", 81 .name = "w1_slave",
82 .mode = S_IRUGO, 82 .mode = S_IRUGO,
83 .owner = THIS_MODULE,
84 }, 83 },
85 .size = DS2760_DATA_SIZE, 84 .size = DS2760_DATA_SIZE,
86 .read = w1_ds2760_read_bin, 85 .read = w1_ds2760_read_bin,
diff --git a/fs/Kconfig b/fs/Kconfig
index d0a1174fb516..4eca61c201f0 100644
--- a/fs/Kconfig
+++ b/fs/Kconfig
@@ -1168,195 +1168,7 @@ config EFS_FS
1168 To compile the EFS file system support as a module, choose M here: the 1168 To compile the EFS file system support as a module, choose M here: the
1169 module will be called efs. 1169 module will be called efs.
1170 1170
1171config JFFS2_FS 1171source "fs/jffs2/Kconfig"
1172 tristate "Journalling Flash File System v2 (JFFS2) support"
1173 select CRC32
1174 depends on MTD
1175 help
1176 JFFS2 is the second generation of the Journalling Flash File System
1177 for use on diskless embedded devices. It provides improved wear
1178 levelling, compression and support for hard links. You cannot use
1179 this on normal block devices, only on 'MTD' devices.
1180
1181 Further information on the design and implementation of JFFS2 is
1182 available at <http://sources.redhat.com/jffs2/>.
1183
1184config JFFS2_FS_DEBUG
1185 int "JFFS2 debugging verbosity (0 = quiet, 2 = noisy)"
1186 depends on JFFS2_FS
1187 default "0"
1188 help
1189 This controls the amount of debugging messages produced by the JFFS2
1190 code. Set it to zero for use in production systems. For evaluation,
1191 testing and debugging, it's advisable to set it to one. This will
1192 enable a few assertions and will print debugging messages at the
1193 KERN_DEBUG loglevel, where they won't normally be visible. Level 2
1194 is unlikely to be useful - it enables extra debugging in certain
1195 areas which at one point needed debugging, but when the bugs were
1196 located and fixed, the detailed messages were relegated to level 2.
1197
1198 If reporting bugs, please try to have available a full dump of the
1199 messages at debug level 1 while the misbehaviour was occurring.
1200
1201config JFFS2_FS_WRITEBUFFER
1202 bool "JFFS2 write-buffering support"
1203 depends on JFFS2_FS
1204 default y
1205 help
1206 This enables the write-buffering support in JFFS2.
1207
1208 This functionality is required to support JFFS2 on the following
1209 types of flash devices:
1210 - NAND flash
1211 - NOR flash with transparent ECC
1212 - DataFlash
1213
1214config JFFS2_FS_WBUF_VERIFY
1215 bool "Verify JFFS2 write-buffer reads"
1216 depends on JFFS2_FS_WRITEBUFFER
1217 default n
1218 help
1219 This causes JFFS2 to read back every page written through the
1220 write-buffer, and check for errors.
1221
1222config JFFS2_SUMMARY
1223 bool "JFFS2 summary support (EXPERIMENTAL)"
1224 depends on JFFS2_FS && EXPERIMENTAL
1225 default n
1226 help
1227 This feature makes it possible to use summary information
1228 for faster filesystem mount.
1229
1230 The summary information can be inserted into a filesystem image
1231 by the utility 'sumtool'.
1232
1233 If unsure, say 'N'.
1234
1235config JFFS2_FS_XATTR
1236 bool "JFFS2 XATTR support (EXPERIMENTAL)"
1237 depends on JFFS2_FS && EXPERIMENTAL
1238 default n
1239 help
1240 Extended attributes are name:value pairs associated with inodes by
1241 the kernel or by users (see the attr(5) manual page, or visit
1242 <http://acl.bestbits.at/> for details).
1243
1244 If unsure, say N.
1245
1246config JFFS2_FS_POSIX_ACL
1247 bool "JFFS2 POSIX Access Control Lists"
1248 depends on JFFS2_FS_XATTR
1249 default y
1250 select FS_POSIX_ACL
1251 help
1252 Posix Access Control Lists (ACLs) support permissions for users and
1253 groups beyond the owner/group/world scheme.
1254
1255 To learn more about Access Control Lists, visit the Posix ACLs for
1256 Linux website <http://acl.bestbits.at/>.
1257
1258 If you don't know what Access Control Lists are, say N
1259
1260config JFFS2_FS_SECURITY
1261 bool "JFFS2 Security Labels"
1262 depends on JFFS2_FS_XATTR
1263 default y
1264 help
1265 Security labels support alternative access control models
1266 implemented by security modules like SELinux. This option
1267 enables an extended attribute handler for file security
1268 labels in the jffs2 filesystem.
1269
1270 If you are not using a security module that requires using
1271 extended attributes for file security labels, say N.
1272
1273config JFFS2_COMPRESSION_OPTIONS
1274 bool "Advanced compression options for JFFS2"
1275 depends on JFFS2_FS
1276 default n
1277 help
1278 Enabling this option allows you to explicitly choose which
1279 compression modules, if any, are enabled in JFFS2. Removing
1280 compressors can mean you cannot read existing file systems,
1281 and enabling experimental compressors can mean that you
1282 write a file system which cannot be read by a standard kernel.
1283
1284 If unsure, you should _definitely_ say 'N'.
1285
1286config JFFS2_ZLIB
1287 bool "JFFS2 ZLIB compression support" if JFFS2_COMPRESSION_OPTIONS
1288 select ZLIB_INFLATE
1289 select ZLIB_DEFLATE
1290 depends on JFFS2_FS
1291 default y
1292 help
1293 Zlib is designed to be a free, general-purpose, legally unencumbered,
1294 lossless data-compression library for use on virtually any computer
1295 hardware and operating system. See <http://www.gzip.org/zlib/> for
1296 further information.
1297
1298 Say 'Y' if unsure.
1299
1300config JFFS2_LZO
1301 bool "JFFS2 LZO compression support" if JFFS2_COMPRESSION_OPTIONS
1302 select LZO_COMPRESS
1303 select LZO_DECOMPRESS
1304 depends on JFFS2_FS
1305 default n
1306 help
1307 minilzo-based compression. Generally works better than Zlib.
1308
1309 This feature was added in July, 2007. Say 'N' if you need
1310 compatibility with older bootloaders or kernels.
1311
1312config JFFS2_RTIME
1313 bool "JFFS2 RTIME compression support" if JFFS2_COMPRESSION_OPTIONS
1314 depends on JFFS2_FS
1315 default y
1316 help
1317 Rtime does manage to recompress already-compressed data. Say 'Y' if unsure.
1318
1319config JFFS2_RUBIN
1320 bool "JFFS2 RUBIN compression support" if JFFS2_COMPRESSION_OPTIONS
1321 depends on JFFS2_FS
1322 default n
1323 help
1324 RUBINMIPS and DYNRUBIN compressors. Say 'N' if unsure.
1325
1326choice
1327 prompt "JFFS2 default compression mode" if JFFS2_COMPRESSION_OPTIONS
1328 default JFFS2_CMODE_PRIORITY
1329 depends on JFFS2_FS
1330 help
1331 You can set here the default compression mode of JFFS2 from
1332 the available compression modes. Don't touch if unsure.
1333
1334config JFFS2_CMODE_NONE
1335 bool "no compression"
1336 help
1337 Uses no compression.
1338
1339config JFFS2_CMODE_PRIORITY
1340 bool "priority"
1341 help
1342 Tries the compressors in a predefined order and chooses the first
1343 successful one.
1344
1345config JFFS2_CMODE_SIZE
1346 bool "size (EXPERIMENTAL)"
1347 help
1348 Tries all compressors and chooses the one which has the smallest
1349 result.
1350
1351config JFFS2_CMODE_FAVOURLZO
1352 bool "Favour LZO"
1353 help
1354 Tries all compressors and chooses the one which has the smallest
1355 result but gives some preference to LZO (which has faster
1356 decompression) at the expense of size.
1357
1358endchoice
1359
1360# UBIFS File system configuration 1172# UBIFS File system configuration
1361source "fs/ubifs/Kconfig" 1173source "fs/ubifs/Kconfig"
1362 1174
@@ -1913,148 +1725,7 @@ config SMB_NLS_REMOTE
1913 1725
1914 smbmount from samba 2.2.0 or later supports this. 1726 smbmount from samba 2.2.0 or later supports this.
1915 1727
1916config CIFS 1728source "fs/cifs/Kconfig"
1917 tristate "CIFS support (advanced network filesystem, SMBFS successor)"
1918 depends on INET
1919 select NLS
1920 help
1921 This is the client VFS module for the Common Internet File System
1922 (CIFS) protocol which is the successor to the Server Message Block
1923 (SMB) protocol, the native file sharing mechanism for most early
1924 PC operating systems. The CIFS protocol is fully supported by
1925 file servers such as Windows 2000 (including Windows 2003, NT 4
1926 and Windows XP) as well by Samba (which provides excellent CIFS
1927 server support for Linux and many other operating systems). Limited
1928 support for OS/2 and Windows ME and similar servers is provided as
1929 well.
1930
1931 The cifs module provides an advanced network file system
1932 client for mounting to CIFS compliant servers. It includes
1933 support for DFS (hierarchical name space), secure per-user
1934 session establishment via Kerberos or NTLM or NTLMv2,
1935 safe distributed caching (oplock), optional packet
1936 signing, Unicode and other internationalization improvements.
1937 If you need to mount to Samba or Windows from this machine, say Y.
1938
1939config CIFS_STATS
1940 bool "CIFS statistics"
1941 depends on CIFS
1942 help
1943 Enabling this option will cause statistics for each server share
1944 mounted by the cifs client to be displayed in /proc/fs/cifs/Stats
1945
1946config CIFS_STATS2
1947 bool "Extended statistics"
1948 depends on CIFS_STATS
1949 help
1950 Enabling this option will allow more detailed statistics on SMB
1951 request timing to be displayed in /proc/fs/cifs/DebugData and also
1952 allow optional logging of slow responses to dmesg (depending on the
1953 value of /proc/fs/cifs/cifsFYI, see fs/cifs/README for more details).
1954 These additional statistics may have a minor effect on performance
1955 and memory utilization.
1956
1957 Unless you are a developer or are doing network performance analysis
1958 or tuning, say N.
1959
1960config CIFS_WEAK_PW_HASH
1961 bool "Support legacy servers which use weaker LANMAN security"
1962 depends on CIFS
1963 help
1964 Modern CIFS servers including Samba and most Windows versions
1965 (since 1997) support stronger NTLM (and even NTLMv2 and Kerberos)
1966 security mechanisms. These hash the password more securely
1967 than the mechanisms used in the older LANMAN version of the
1968 SMB protocol but LANMAN based authentication is needed to
1969 establish sessions with some old SMB servers.
1970
1971 Enabling this option allows the cifs module to mount to older
1972 LANMAN based servers such as OS/2 and Windows 95, but such
1973 mounts may be less secure than mounts using NTLM or more recent
1974 security mechanisms if you are on a public network. Unless you
1975 have a need to access old SMB servers (and are on a private
1976 network) you probably want to say N. Even if this support
1977 is enabled in the kernel build, LANMAN authentication will not be
1978 used automatically. At runtime LANMAN mounts are disabled but
1979 can be set to required (or optional) either in
1980 /proc/fs/cifs (see fs/cifs/README for more detail) or via an
1981 option on the mount command. This support is disabled by
1982 default in order to reduce the possibility of a downgrade
1983 attack.
1984
1985 If unsure, say N.
1986
1987config CIFS_UPCALL
1988 bool "Kerberos/SPNEGO advanced session setup"
1989 depends on CIFS && KEYS
1990 help
1991 Enables an upcall mechanism for CIFS which accesses
1992 userspace helper utilities to provide SPNEGO packaged (RFC 4178)
1993 Kerberos tickets which are needed to mount to certain secure servers
1994 (for which more secure Kerberos authentication is required). If
1995 unsure, say N.
1996
1997config CIFS_XATTR
1998 bool "CIFS extended attributes"
1999 depends on CIFS
2000 help
2001 Extended attributes are name:value pairs associated with inodes by
2002 the kernel or by users (see the attr(5) manual page, or visit
2003 <http://acl.bestbits.at/> for details). CIFS maps the name of
2004 extended attributes beginning with the user namespace prefix
2005 to SMB/CIFS EAs. EAs are stored on Windows servers without the
2006 user namespace prefix, but their names are seen by Linux cifs clients
2007 prefaced by the user namespace prefix. The system namespace
2008 (used by some filesystems to store ACLs) is not supported at
2009 this time.
2010
2011 If unsure, say N.
2012
2013config CIFS_POSIX
2014 bool "CIFS POSIX Extensions"
2015 depends on CIFS_XATTR
2016 help
2017 Enabling this option will cause the cifs client to attempt to
2018 negotiate a newer dialect with servers, such as Samba 3.0.5
2019 or later, that optionally can handle more POSIX like (rather
2020 than Windows like) file behavior. It also enables
2021 support for POSIX ACLs (getfacl and setfacl) to servers
2022 (such as Samba 3.10 and later) which can negotiate
2023 CIFS POSIX ACL support. If unsure, say N.
2024
2025config CIFS_DEBUG2
2026 bool "Enable additional CIFS debugging routines"
2027 depends on CIFS
2028 help
2029 Enabling this option adds a few more debugging routines
2030 to the cifs code which slightly increases the size of
2031 the cifs module and can cause additional logging of debug
2032 messages in some error paths, slowing performance. This
2033 option can be turned off unless you are debugging
2034 cifs problems. If unsure, say N.
2035
2036config CIFS_EXPERIMENTAL
2037 bool "CIFS Experimental Features (EXPERIMENTAL)"
2038 depends on CIFS && EXPERIMENTAL
2039 help
2040 Enables cifs features under testing. These features are
2041 experimental and currently include DFS support and directory
2042 change notification ie fcntl(F_DNOTIFY), as well as the upcall
2043 mechanism which will be used for Kerberos session negotiation
2044 and uid remapping. Some of these features also may depend on
2045 setting a value of 1 to the pseudo-file /proc/fs/cifs/Experimental
2046 (which is disabled by default). See the file fs/cifs/README
2047 for more details. If unsure, say N.
2048
2049config CIFS_DFS_UPCALL
2050 bool "DFS feature support (EXPERIMENTAL)"
2051 depends on CIFS_EXPERIMENTAL
2052 depends on KEYS
2053 help
2054 Enables an upcall mechanism for CIFS which contacts userspace
2055 helper utilities to provide server name resolution (host names to
2056 IP addresses) which is needed for implicit mounts of DFS junction
2057 points. If unsure, say N.
2058 1729
2059config NCP_FS 1730config NCP_FS
2060 tristate "NCP file system support (to mount NetWare volumes)" 1731 tristate "NCP file system support (to mount NetWare volumes)"
diff --git a/fs/Kconfig.binfmt b/fs/Kconfig.binfmt
index 801db1341811..ce9fb3fbfae4 100644
--- a/fs/Kconfig.binfmt
+++ b/fs/Kconfig.binfmt
@@ -40,6 +40,28 @@ config BINFMT_ELF_FDPIC
40 40
41 It is also possible to run FDPIC ELF binaries on MMU linux also. 41 It is also possible to run FDPIC ELF binaries on MMU linux also.
42 42
43config CORE_DUMP_DEFAULT_ELF_HEADERS
44 bool "Write ELF core dumps with partial segments"
45 default n
46 depends on BINFMT_ELF
47 help
48 ELF core dump files describe each memory mapping of the crashed
49 process, and can contain or omit the memory contents of each one.
50 The contents of an unmodified text mapping are omitted by default.
51
52 For an unmodified text mapping of an ELF object, including just
53 the first page of the file in a core dump makes it possible to
54 identify the build ID bits in the file, without paying the i/o
55 cost and disk space to dump all the text. However, versions of
56 GDB before 6.7 are confused by ELF core dump files in this format.
57
58 The core dump behavior can be controlled per process using
59 the /proc/PID/coredump_filter pseudo-file; this setting is
60 inherited. See Documentation/filesystems/proc.txt for details.
61
62 This config option changes the default setting of coredump_filter
63 seen at boot time. If unsure, say N.
64
43config BINFMT_FLAT 65config BINFMT_FLAT
44 bool "Kernel support for flat binaries" 66 bool "Kernel support for flat binaries"
45 depends on !MMU && (!FRV || BROKEN) 67 depends on !MMU && (!FRV || BROKEN)
diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index c76afa26edf7..e2159063198a 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -1156,16 +1156,24 @@ static int dump_seek(struct file *file, loff_t off)
1156static unsigned long vma_dump_size(struct vm_area_struct *vma, 1156static unsigned long vma_dump_size(struct vm_area_struct *vma,
1157 unsigned long mm_flags) 1157 unsigned long mm_flags)
1158{ 1158{
1159#define FILTER(type) (mm_flags & (1UL << MMF_DUMP_##type))
1160
1159 /* The vma can be set up to tell us the answer directly. */ 1161 /* The vma can be set up to tell us the answer directly. */
1160 if (vma->vm_flags & VM_ALWAYSDUMP) 1162 if (vma->vm_flags & VM_ALWAYSDUMP)
1161 goto whole; 1163 goto whole;
1162 1164
1165 /* Hugetlb memory check */
1166 if (vma->vm_flags & VM_HUGETLB) {
1167 if ((vma->vm_flags & VM_SHARED) && FILTER(HUGETLB_SHARED))
1168 goto whole;
1169 if (!(vma->vm_flags & VM_SHARED) && FILTER(HUGETLB_PRIVATE))
1170 goto whole;
1171 }
1172
1163 /* Do not dump I/O mapped devices or special mappings */ 1173 /* Do not dump I/O mapped devices or special mappings */
1164 if (vma->vm_flags & (VM_IO | VM_RESERVED)) 1174 if (vma->vm_flags & (VM_IO | VM_RESERVED))
1165 return 0; 1175 return 0;
1166 1176
1167#define FILTER(type) (mm_flags & (1UL << MMF_DUMP_##type))
1168
1169 /* By default, dump shared memory if mapped from an anonymous file. */ 1177 /* By default, dump shared memory if mapped from an anonymous file. */
1170 if (vma->vm_flags & VM_SHARED) { 1178 if (vma->vm_flags & VM_SHARED) {
1171 if (vma->vm_file->f_path.dentry->d_inode->i_nlink == 0 ? 1179 if (vma->vm_file->f_path.dentry->d_inode->i_nlink == 0 ?
diff --git a/fs/buffer.c b/fs/buffer.c
index ac78d4c19b3b..6569fda5cfed 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -76,8 +76,7 @@ EXPORT_SYMBOL(__lock_buffer);
76 76
77void unlock_buffer(struct buffer_head *bh) 77void unlock_buffer(struct buffer_head *bh)
78{ 78{
79 smp_mb__before_clear_bit(); 79 clear_bit_unlock(BH_Lock, &bh->b_state);
80 clear_buffer_locked(bh);
81 smp_mb__after_clear_bit(); 80 smp_mb__after_clear_bit();
82 wake_up_bit(&bh->b_state, BH_Lock); 81 wake_up_bit(&bh->b_state, BH_Lock);
83} 82}
diff --git a/fs/cifs/Kconfig b/fs/cifs/Kconfig
new file mode 100644
index 000000000000..341a98965bd0
--- /dev/null
+++ b/fs/cifs/Kconfig
@@ -0,0 +1,142 @@
1config CIFS
2 tristate "CIFS support (advanced network filesystem, SMBFS successor)"
3 depends on INET
4 select NLS
5 help
6 This is the client VFS module for the Common Internet File System
7 (CIFS) protocol which is the successor to the Server Message Block
8 (SMB) protocol, the native file sharing mechanism for most early
9 PC operating systems. The CIFS protocol is fully supported by
10 file servers such as Windows 2000 (including Windows 2003, NT 4
11 and Windows XP) as well by Samba (which provides excellent CIFS
12 server support for Linux and many other operating systems). Limited
13 support for OS/2 and Windows ME and similar servers is provided as
14 well.
15
16 The cifs module provides an advanced network file system
17 client for mounting to CIFS compliant servers. It includes
18 support for DFS (hierarchical name space), secure per-user
19 session establishment via Kerberos or NTLM or NTLMv2,
20 safe distributed caching (oplock), optional packet
21 signing, Unicode and other internationalization improvements.
22 If you need to mount to Samba or Windows from this machine, say Y.
23
24config CIFS_STATS
25 bool "CIFS statistics"
26 depends on CIFS
27 help
28 Enabling this option will cause statistics for each server share
29 mounted by the cifs client to be displayed in /proc/fs/cifs/Stats
30
31config CIFS_STATS2
32 bool "Extended statistics"
33 depends on CIFS_STATS
34 help
35 Enabling this option will allow more detailed statistics on SMB
36 request timing to be displayed in /proc/fs/cifs/DebugData and also
37 allow optional logging of slow responses to dmesg (depending on the
38 value of /proc/fs/cifs/cifsFYI, see fs/cifs/README for more details).
39 These additional statistics may have a minor effect on performance
40 and memory utilization.
41
42 Unless you are a developer or are doing network performance analysis
43 or tuning, say N.
44
45config CIFS_WEAK_PW_HASH
46 bool "Support legacy servers which use weaker LANMAN security"
47 depends on CIFS
48 help
49 Modern CIFS servers including Samba and most Windows versions
50 (since 1997) support stronger NTLM (and even NTLMv2 and Kerberos)
51 security mechanisms. These hash the password more securely
52 than the mechanisms used in the older LANMAN version of the
53 SMB protocol but LANMAN based authentication is needed to
54 establish sessions with some old SMB servers.
55
56 Enabling this option allows the cifs module to mount to older
57 LANMAN based servers such as OS/2 and Windows 95, but such
58 mounts may be less secure than mounts using NTLM or more recent
59 security mechanisms if you are on a public network. Unless you
60 have a need to access old SMB servers (and are on a private
61 network) you probably want to say N. Even if this support
62 is enabled in the kernel build, LANMAN authentication will not be
63 used automatically. At runtime LANMAN mounts are disabled but
64 can be set to required (or optional) either in
65 /proc/fs/cifs (see fs/cifs/README for more detail) or via an
66 option on the mount command. This support is disabled by
67 default in order to reduce the possibility of a downgrade
68 attack.
69
70 If unsure, say N.
71
72config CIFS_UPCALL
73 bool "Kerberos/SPNEGO advanced session setup"
74 depends on CIFS && KEYS
75 help
76 Enables an upcall mechanism for CIFS which accesses
77 userspace helper utilities to provide SPNEGO packaged (RFC 4178)
78 Kerberos tickets which are needed to mount to certain secure servers
79 (for which more secure Kerberos authentication is required). If
80 unsure, say N.
81
82config CIFS_XATTR
83 bool "CIFS extended attributes"
84 depends on CIFS
85 help
86 Extended attributes are name:value pairs associated with inodes by
87 the kernel or by users (see the attr(5) manual page, or visit
88 <http://acl.bestbits.at/> for details). CIFS maps the name of
89 extended attributes beginning with the user namespace prefix
90 to SMB/CIFS EAs. EAs are stored on Windows servers without the
91 user namespace prefix, but their names are seen by Linux cifs clients
92 prefaced by the user namespace prefix. The system namespace
93 (used by some filesystems to store ACLs) is not supported at
94 this time.
95
96 If unsure, say N.
97
98config CIFS_POSIX
99 bool "CIFS POSIX Extensions"
100 depends on CIFS_XATTR
101 help
102 Enabling this option will cause the cifs client to attempt to
103 negotiate a newer dialect with servers, such as Samba 3.0.5
104 or later, that optionally can handle more POSIX like (rather
105 than Windows like) file behavior. It also enables
106 support for POSIX ACLs (getfacl and setfacl) to servers
107 (such as Samba 3.10 and later) which can negotiate
108 CIFS POSIX ACL support. If unsure, say N.
109
110config CIFS_DEBUG2
111 bool "Enable additional CIFS debugging routines"
112 depends on CIFS
113 help
114 Enabling this option adds a few more debugging routines
115 to the cifs code which slightly increases the size of
116 the cifs module and can cause additional logging of debug
117 messages in some error paths, slowing performance. This
118 option can be turned off unless you are debugging
119 cifs problems. If unsure, say N.
120
121config CIFS_EXPERIMENTAL
122 bool "CIFS Experimental Features (EXPERIMENTAL)"
123 depends on CIFS && EXPERIMENTAL
124 help
125 Enables cifs features under testing. These features are
126 experimental and currently include DFS support and directory
127 change notification ie fcntl(F_DNOTIFY), as well as the upcall
128 mechanism which will be used for Kerberos session negotiation
129 and uid remapping. Some of these features also may depend on
130 setting a value of 1 to the pseudo-file /proc/fs/cifs/Experimental
131 (which is disabled by default). See the file fs/cifs/README
132 for more details. If unsure, say N.
133
134config CIFS_DFS_UPCALL
135 bool "DFS feature support (EXPERIMENTAL)"
136 depends on CIFS_EXPERIMENTAL
137 depends on KEYS
138 help
139 Enables an upcall mechanism for CIFS which contacts userspace
140 helper utilities to provide server name resolution (host names to
141 IP addresses) which is needed for implicit mounts of DFS junction
142 points. If unsure, say N.
diff --git a/fs/cifs/file.c b/fs/cifs/file.c
index c4a8a0605125..62d8bd8f14c0 100644
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -1791,7 +1791,7 @@ static void cifs_copy_cache_pages(struct address_space *mapping,
1791 SetPageUptodate(page); 1791 SetPageUptodate(page);
1792 unlock_page(page); 1792 unlock_page(page);
1793 if (!pagevec_add(plru_pvec, page)) 1793 if (!pagevec_add(plru_pvec, page))
1794 __pagevec_lru_add(plru_pvec); 1794 __pagevec_lru_add_file(plru_pvec);
1795 data += PAGE_CACHE_SIZE; 1795 data += PAGE_CACHE_SIZE;
1796 } 1796 }
1797 return; 1797 return;
@@ -1925,7 +1925,7 @@ static int cifs_readpages(struct file *file, struct address_space *mapping,
1925 bytes_read = 0; 1925 bytes_read = 0;
1926 } 1926 }
1927 1927
1928 pagevec_lru_add(&lru_pvec); 1928 pagevec_lru_add_file(&lru_pvec);
1929 1929
1930/* need to free smb_read_data buf before exit */ 1930/* need to free smb_read_data buf before exit */
1931 if (smb_read_data) { 1931 if (smb_read_data) {
diff --git a/fs/exec.c b/fs/exec.c
index a41e7902ed0b..4e834f16d9da 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -1386,7 +1386,7 @@ EXPORT_SYMBOL(set_binfmt);
1386 * name into corename, which must have space for at least 1386 * name into corename, which must have space for at least
1387 * CORENAME_MAX_SIZE bytes plus one byte for the zero terminator. 1387 * CORENAME_MAX_SIZE bytes plus one byte for the zero terminator.
1388 */ 1388 */
1389static int format_corename(char *corename, int nr_threads, long signr) 1389static int format_corename(char *corename, long signr)
1390{ 1390{
1391 const char *pat_ptr = core_pattern; 1391 const char *pat_ptr = core_pattern;
1392 int ispipe = (*pat_ptr == '|'); 1392 int ispipe = (*pat_ptr == '|');
@@ -1493,8 +1493,7 @@ static int format_corename(char *corename, int nr_threads, long signr)
1493 * If core_pattern does not include a %p (as is the default) 1493 * If core_pattern does not include a %p (as is the default)
1494 * and core_uses_pid is set, then .%pid will be appended to 1494 * and core_uses_pid is set, then .%pid will be appended to
1495 * the filename. Do not do this for piped commands. */ 1495 * the filename. Do not do this for piped commands. */
1496 if (!ispipe && !pid_in_pattern 1496 if (!ispipe && !pid_in_pattern && core_uses_pid) {
1497 && (core_uses_pid || nr_threads)) {
1498 rc = snprintf(out_ptr, out_end - out_ptr, 1497 rc = snprintf(out_ptr, out_end - out_ptr,
1499 ".%d", task_tgid_vnr(current)); 1498 ".%d", task_tgid_vnr(current));
1500 if (rc > out_end - out_ptr) 1499 if (rc > out_end - out_ptr)
@@ -1757,7 +1756,7 @@ int do_coredump(long signr, int exit_code, struct pt_regs * regs)
1757 * uses lock_kernel() 1756 * uses lock_kernel()
1758 */ 1757 */
1759 lock_kernel(); 1758 lock_kernel();
1760 ispipe = format_corename(corename, retval, signr); 1759 ispipe = format_corename(corename, signr);
1761 unlock_kernel(); 1760 unlock_kernel();
1762 /* 1761 /*
1763 * Don't bother to check the RLIMIT_CORE value if core_pattern points 1762 * Don't bother to check the RLIMIT_CORE value if core_pattern points
diff --git a/fs/ext3/balloc.c b/fs/ext3/balloc.c
index 92fd0338a6eb..f5b57a2ca35a 100644
--- a/fs/ext3/balloc.c
+++ b/fs/ext3/balloc.c
@@ -1547,6 +1547,7 @@ retry_alloc:
1547 * turn off reservation for this allocation 1547 * turn off reservation for this allocation
1548 */ 1548 */
1549 if (my_rsv && (free_blocks < windowsz) 1549 if (my_rsv && (free_blocks < windowsz)
1550 && (free_blocks > 0)
1550 && (rsv_is_empty(&my_rsv->rsv_window))) 1551 && (rsv_is_empty(&my_rsv->rsv_window)))
1551 my_rsv = NULL; 1552 my_rsv = NULL;
1552 1553
@@ -1585,7 +1586,7 @@ retry_alloc:
1585 * free blocks is less than half of the reservation 1586 * free blocks is less than half of the reservation
1586 * window size. 1587 * window size.
1587 */ 1588 */
1588 if (free_blocks <= (windowsz/2)) 1589 if (my_rsv && (free_blocks <= (windowsz/2)))
1589 continue; 1590 continue;
1590 1591
1591 brelse(bitmap_bh); 1592 brelse(bitmap_bh);
diff --git a/fs/ext3/dir.c b/fs/ext3/dir.c
index 2eea96ec78ed..4c82531ea0a8 100644
--- a/fs/ext3/dir.c
+++ b/fs/ext3/dir.c
@@ -102,6 +102,7 @@ static int ext3_readdir(struct file * filp,
102 int err; 102 int err;
103 struct inode *inode = filp->f_path.dentry->d_inode; 103 struct inode *inode = filp->f_path.dentry->d_inode;
104 int ret = 0; 104 int ret = 0;
105 int dir_has_error = 0;
105 106
106 sb = inode->i_sb; 107 sb = inode->i_sb;
107 108
@@ -148,9 +149,12 @@ static int ext3_readdir(struct file * filp,
148 * of recovering data when there's a bad sector 149 * of recovering data when there's a bad sector
149 */ 150 */
150 if (!bh) { 151 if (!bh) {
151 ext3_error (sb, "ext3_readdir", 152 if (!dir_has_error) {
152 "directory #%lu contains a hole at offset %lu", 153 ext3_error(sb, __func__, "directory #%lu "
153 inode->i_ino, (unsigned long)filp->f_pos); 154 "contains a hole at offset %lld",
155 inode->i_ino, filp->f_pos);
156 dir_has_error = 1;
157 }
154 /* corrupt size? Maybe no more blocks to read */ 158 /* corrupt size? Maybe no more blocks to read */
155 if (filp->f_pos > inode->i_blocks << 9) 159 if (filp->f_pos > inode->i_blocks << 9)
156 break; 160 break;
@@ -410,7 +414,7 @@ static int call_filldir(struct file * filp, void * dirent,
410 get_dtype(sb, fname->file_type)); 414 get_dtype(sb, fname->file_type));
411 if (error) { 415 if (error) {
412 filp->f_pos = curr_pos; 416 filp->f_pos = curr_pos;
413 info->extra_fname = fname->next; 417 info->extra_fname = fname;
414 return error; 418 return error;
415 } 419 }
416 fname = fname->next; 420 fname = fname->next;
@@ -449,11 +453,21 @@ static int ext3_dx_readdir(struct file * filp,
449 * If there are any leftover names on the hash collision 453 * If there are any leftover names on the hash collision
450 * chain, return them first. 454 * chain, return them first.
451 */ 455 */
452 if (info->extra_fname && 456 if (info->extra_fname) {
453 call_filldir(filp, dirent, filldir, info->extra_fname)) 457 if (call_filldir(filp, dirent, filldir, info->extra_fname))
454 goto finished; 458 goto finished;
455 459
456 if (!info->curr_node) 460 info->extra_fname = NULL;
461 info->curr_node = rb_next(info->curr_node);
462 if (!info->curr_node) {
463 if (info->next_hash == ~0) {
464 filp->f_pos = EXT3_HTREE_EOF;
465 goto finished;
466 }
467 info->curr_hash = info->next_hash;
468 info->curr_minor_hash = 0;
469 }
470 } else if (!info->curr_node)
457 info->curr_node = rb_first(&info->root); 471 info->curr_node = rb_first(&info->root);
458 472
459 while (1) { 473 while (1) {
diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c
index ebfec4d0148e..f8424ad89971 100644
--- a/fs/ext3/inode.c
+++ b/fs/ext3/inode.c
@@ -1186,6 +1186,13 @@ write_begin_failed:
1186 ext3_journal_stop(handle); 1186 ext3_journal_stop(handle);
1187 unlock_page(page); 1187 unlock_page(page);
1188 page_cache_release(page); 1188 page_cache_release(page);
1189 /*
1190 * block_write_begin may have instantiated a few blocks
1191 * outside i_size. Trim these off again. Don't need
1192 * i_size_read because we hold i_mutex.
1193 */
1194 if (pos + len > inode->i_size)
1195 vmtruncate(inode, inode->i_size);
1189 } 1196 }
1190 if (ret == -ENOSPC && ext3_should_retry_alloc(inode->i_sb, &retries)) 1197 if (ret == -ENOSPC && ext3_should_retry_alloc(inode->i_sb, &retries))
1191 goto retry; 1198 goto retry;
diff --git a/fs/ext3/resize.c b/fs/ext3/resize.c
index 77278e947e94..78fdf3836370 100644
--- a/fs/ext3/resize.c
+++ b/fs/ext3/resize.c
@@ -790,7 +790,8 @@ int ext3_group_add(struct super_block *sb, struct ext3_new_group_data *input)
790 790
791 if (reserved_gdb || gdb_off == 0) { 791 if (reserved_gdb || gdb_off == 0) {
792 if (!EXT3_HAS_COMPAT_FEATURE(sb, 792 if (!EXT3_HAS_COMPAT_FEATURE(sb,
793 EXT3_FEATURE_COMPAT_RESIZE_INODE)){ 793 EXT3_FEATURE_COMPAT_RESIZE_INODE)
794 || !le16_to_cpu(es->s_reserved_gdt_blocks)) {
794 ext3_warning(sb, __func__, 795 ext3_warning(sb, __func__,
795 "No reserved GDT blocks, can't resize"); 796 "No reserved GDT blocks, can't resize");
796 return -EPERM; 797 return -EPERM;
diff --git a/fs/ext3/super.c b/fs/ext3/super.c
index 399a96a6c556..3a260af5544d 100644
--- a/fs/ext3/super.c
+++ b/fs/ext3/super.c
@@ -625,6 +625,9 @@ static int ext3_show_options(struct seq_file *seq, struct vfsmount *vfs)
625 else if (test_opt(sb, DATA_FLAGS) == EXT3_MOUNT_WRITEBACK_DATA) 625 else if (test_opt(sb, DATA_FLAGS) == EXT3_MOUNT_WRITEBACK_DATA)
626 seq_puts(seq, ",data=writeback"); 626 seq_puts(seq, ",data=writeback");
627 627
628 if (test_opt(sb, DATA_ERR_ABORT))
629 seq_puts(seq, ",data_err=abort");
630
628 ext3_show_quota_options(seq, sb); 631 ext3_show_quota_options(seq, sb);
629 632
630 return 0; 633 return 0;
@@ -754,6 +757,7 @@ enum {
754 Opt_reservation, Opt_noreservation, Opt_noload, Opt_nobh, Opt_bh, 757 Opt_reservation, Opt_noreservation, Opt_noload, Opt_nobh, Opt_bh,
755 Opt_commit, Opt_journal_update, Opt_journal_inum, Opt_journal_dev, 758 Opt_commit, Opt_journal_update, Opt_journal_inum, Opt_journal_dev,
756 Opt_abort, Opt_data_journal, Opt_data_ordered, Opt_data_writeback, 759 Opt_abort, Opt_data_journal, Opt_data_ordered, Opt_data_writeback,
760 Opt_data_err_abort, Opt_data_err_ignore,
757 Opt_usrjquota, Opt_grpjquota, Opt_offusrjquota, Opt_offgrpjquota, 761 Opt_usrjquota, Opt_grpjquota, Opt_offusrjquota, Opt_offgrpjquota,
758 Opt_jqfmt_vfsold, Opt_jqfmt_vfsv0, Opt_quota, Opt_noquota, 762 Opt_jqfmt_vfsold, Opt_jqfmt_vfsv0, Opt_quota, Opt_noquota,
759 Opt_ignore, Opt_barrier, Opt_err, Opt_resize, Opt_usrquota, 763 Opt_ignore, Opt_barrier, Opt_err, Opt_resize, Opt_usrquota,
@@ -796,6 +800,8 @@ static const match_table_t tokens = {
796 {Opt_data_journal, "data=journal"}, 800 {Opt_data_journal, "data=journal"},
797 {Opt_data_ordered, "data=ordered"}, 801 {Opt_data_ordered, "data=ordered"},
798 {Opt_data_writeback, "data=writeback"}, 802 {Opt_data_writeback, "data=writeback"},
803 {Opt_data_err_abort, "data_err=abort"},
804 {Opt_data_err_ignore, "data_err=ignore"},
799 {Opt_offusrjquota, "usrjquota="}, 805 {Opt_offusrjquota, "usrjquota="},
800 {Opt_usrjquota, "usrjquota=%s"}, 806 {Opt_usrjquota, "usrjquota=%s"},
801 {Opt_offgrpjquota, "grpjquota="}, 807 {Opt_offgrpjquota, "grpjquota="},
@@ -1011,6 +1017,12 @@ static int parse_options (char *options, struct super_block *sb,
1011 sbi->s_mount_opt |= data_opt; 1017 sbi->s_mount_opt |= data_opt;
1012 } 1018 }
1013 break; 1019 break;
1020 case Opt_data_err_abort:
1021 set_opt(sbi->s_mount_opt, DATA_ERR_ABORT);
1022 break;
1023 case Opt_data_err_ignore:
1024 clear_opt(sbi->s_mount_opt, DATA_ERR_ABORT);
1025 break;
1014#ifdef CONFIG_QUOTA 1026#ifdef CONFIG_QUOTA
1015 case Opt_usrjquota: 1027 case Opt_usrjquota:
1016 qtype = USRQUOTA; 1028 qtype = USRQUOTA;
@@ -1986,6 +1998,10 @@ static void ext3_init_journal_params(struct super_block *sb, journal_t *journal)
1986 journal->j_flags |= JFS_BARRIER; 1998 journal->j_flags |= JFS_BARRIER;
1987 else 1999 else
1988 journal->j_flags &= ~JFS_BARRIER; 2000 journal->j_flags &= ~JFS_BARRIER;
2001 if (test_opt(sb, DATA_ERR_ABORT))
2002 journal->j_flags |= JFS_ABORT_ON_SYNCDATA_ERR;
2003 else
2004 journal->j_flags &= ~JFS_ABORT_ON_SYNCDATA_ERR;
1989 spin_unlock(&journal->j_state_lock); 2005 spin_unlock(&journal->j_state_lock);
1990} 2006}
1991 2007
diff --git a/fs/hfsplus/extents.c b/fs/hfsplus/extents.c
index fec8f61227ff..0022eec63cda 100644
--- a/fs/hfsplus/extents.c
+++ b/fs/hfsplus/extents.c
@@ -199,6 +199,9 @@ int hfsplus_get_block(struct inode *inode, sector_t iblock,
199 goto done; 199 goto done;
200 } 200 }
201 201
202 if (inode->i_ino == HFSPLUS_EXT_CNID)
203 return -EIO;
204
202 mutex_lock(&HFSPLUS_I(inode).extents_lock); 205 mutex_lock(&HFSPLUS_I(inode).extents_lock);
203 res = hfsplus_ext_read_extent(inode, ablock); 206 res = hfsplus_ext_read_extent(inode, ablock);
204 if (!res) { 207 if (!res) {
diff --git a/fs/hfsplus/inode.c b/fs/hfsplus/inode.c
index b085d64a2b67..963be644297a 100644
--- a/fs/hfsplus/inode.c
+++ b/fs/hfsplus/inode.c
@@ -254,6 +254,8 @@ static int hfsplus_file_open(struct inode *inode, struct file *file)
254{ 254{
255 if (HFSPLUS_IS_RSRC(inode)) 255 if (HFSPLUS_IS_RSRC(inode))
256 inode = HFSPLUS_I(inode).rsrc_inode; 256 inode = HFSPLUS_I(inode).rsrc_inode;
257 if (!(file->f_flags & O_LARGEFILE) && i_size_read(inode) > MAX_NON_LFS)
258 return -EOVERFLOW;
257 atomic_inc(&HFSPLUS_I(inode).opencnt); 259 atomic_inc(&HFSPLUS_I(inode).opencnt);
258 return 0; 260 return 0;
259} 261}
diff --git a/fs/jbd/commit.c b/fs/jbd/commit.c
index ae08c057e751..25719d902c51 100644
--- a/fs/jbd/commit.c
+++ b/fs/jbd/commit.c
@@ -482,6 +482,8 @@ void journal_commit_transaction(journal_t *journal)
482 printk(KERN_WARNING 482 printk(KERN_WARNING
483 "JBD: Detected IO errors while flushing file data " 483 "JBD: Detected IO errors while flushing file data "
484 "on %s\n", bdevname(journal->j_fs_dev, b)); 484 "on %s\n", bdevname(journal->j_fs_dev, b));
485 if (journal->j_flags & JFS_ABORT_ON_SYNCDATA_ERR)
486 journal_abort(journal, err);
485 err = 0; 487 err = 0;
486 } 488 }
487 489
@@ -518,9 +520,10 @@ void journal_commit_transaction(journal_t *journal)
518 jh = commit_transaction->t_buffers; 520 jh = commit_transaction->t_buffers;
519 521
520 /* If we're in abort mode, we just un-journal the buffer and 522 /* If we're in abort mode, we just un-journal the buffer and
521 release it for background writing. */ 523 release it. */
522 524
523 if (is_journal_aborted(journal)) { 525 if (is_journal_aborted(journal)) {
526 clear_buffer_jbddirty(jh2bh(jh));
524 JBUFFER_TRACE(jh, "journal is aborting: refile"); 527 JBUFFER_TRACE(jh, "journal is aborting: refile");
525 journal_refile_buffer(journal, jh); 528 journal_refile_buffer(journal, jh);
526 /* If that was the last one, we need to clean up 529 /* If that was the last one, we need to clean up
@@ -762,6 +765,9 @@ wait_for_iobuf:
762 /* AKPM: bforget here */ 765 /* AKPM: bforget here */
763 } 766 }
764 767
768 if (err)
769 journal_abort(journal, err);
770
765 jbd_debug(3, "JBD: commit phase 6\n"); 771 jbd_debug(3, "JBD: commit phase 6\n");
766 772
767 if (journal_write_commit_record(journal, commit_transaction)) 773 if (journal_write_commit_record(journal, commit_transaction))
@@ -852,6 +858,8 @@ restart_loop:
852 if (buffer_jbddirty(bh)) { 858 if (buffer_jbddirty(bh)) {
853 JBUFFER_TRACE(jh, "add to new checkpointing trans"); 859 JBUFFER_TRACE(jh, "add to new checkpointing trans");
854 __journal_insert_checkpoint(jh, commit_transaction); 860 __journal_insert_checkpoint(jh, commit_transaction);
861 if (is_journal_aborted(journal))
862 clear_buffer_jbddirty(bh);
855 JBUFFER_TRACE(jh, "refile for checkpoint writeback"); 863 JBUFFER_TRACE(jh, "refile for checkpoint writeback");
856 __journal_refile_buffer(jh); 864 __journal_refile_buffer(jh);
857 jbd_unlock_bh_state(bh); 865 jbd_unlock_bh_state(bh);
diff --git a/fs/jbd/transaction.c b/fs/jbd/transaction.c
index 0540ca27a446..d15cd6e7251e 100644
--- a/fs/jbd/transaction.c
+++ b/fs/jbd/transaction.c
@@ -954,9 +954,10 @@ int journal_dirty_data(handle_t *handle, struct buffer_head *bh)
954 journal_t *journal = handle->h_transaction->t_journal; 954 journal_t *journal = handle->h_transaction->t_journal;
955 int need_brelse = 0; 955 int need_brelse = 0;
956 struct journal_head *jh; 956 struct journal_head *jh;
957 int ret = 0;
957 958
958 if (is_handle_aborted(handle)) 959 if (is_handle_aborted(handle))
959 return 0; 960 return ret;
960 961
961 jh = journal_add_journal_head(bh); 962 jh = journal_add_journal_head(bh);
962 JBUFFER_TRACE(jh, "entry"); 963 JBUFFER_TRACE(jh, "entry");
@@ -1067,7 +1068,16 @@ int journal_dirty_data(handle_t *handle, struct buffer_head *bh)
1067 time if it is redirtied */ 1068 time if it is redirtied */
1068 } 1069 }
1069 1070
1070 /* journal_clean_data_list() may have got there first */ 1071 /*
1072 * We cannot remove the buffer with io error from the
1073 * committing transaction, because otherwise it would
1074 * miss the error and the commit would not abort.
1075 */
1076 if (unlikely(!buffer_uptodate(bh))) {
1077 ret = -EIO;
1078 goto no_journal;
1079 }
1080
1071 if (jh->b_transaction != NULL) { 1081 if (jh->b_transaction != NULL) {
1072 JBUFFER_TRACE(jh, "unfile from commit"); 1082 JBUFFER_TRACE(jh, "unfile from commit");
1073 __journal_temp_unlink_buffer(jh); 1083 __journal_temp_unlink_buffer(jh);
@@ -1108,7 +1118,7 @@ no_journal:
1108 } 1118 }
1109 JBUFFER_TRACE(jh, "exit"); 1119 JBUFFER_TRACE(jh, "exit");
1110 journal_put_journal_head(jh); 1120 journal_put_journal_head(jh);
1111 return 0; 1121 return ret;
1112} 1122}
1113 1123
1114/** 1124/**
diff --git a/fs/jffs2/Kconfig b/fs/jffs2/Kconfig
new file mode 100644
index 000000000000..6ae169cd8faa
--- /dev/null
+++ b/fs/jffs2/Kconfig
@@ -0,0 +1,188 @@
1config JFFS2_FS
2 tristate "Journalling Flash File System v2 (JFFS2) support"
3 select CRC32
4 depends on MTD
5 help
6 JFFS2 is the second generation of the Journalling Flash File System
7 for use on diskless embedded devices. It provides improved wear
8 levelling, compression and support for hard links. You cannot use
9 this on normal block devices, only on 'MTD' devices.
10
11 Further information on the design and implementation of JFFS2 is
12 available at <http://sources.redhat.com/jffs2/>.
13
14config JFFS2_FS_DEBUG
15 int "JFFS2 debugging verbosity (0 = quiet, 2 = noisy)"
16 depends on JFFS2_FS
17 default "0"
18 help
19 This controls the amount of debugging messages produced by the JFFS2
20 code. Set it to zero for use in production systems. For evaluation,
21 testing and debugging, it's advisable to set it to one. This will
22 enable a few assertions and will print debugging messages at the
23 KERN_DEBUG loglevel, where they won't normally be visible. Level 2
24 is unlikely to be useful - it enables extra debugging in certain
25 areas which at one point needed debugging, but when the bugs were
26 located and fixed, the detailed messages were relegated to level 2.
27
28 If reporting bugs, please try to have available a full dump of the
29 messages at debug level 1 while the misbehaviour was occurring.
30
31config JFFS2_FS_WRITEBUFFER
32 bool "JFFS2 write-buffering support"
33 depends on JFFS2_FS
34 default y
35 help
36 This enables the write-buffering support in JFFS2.
37
38 This functionality is required to support JFFS2 on the following
39 types of flash devices:
40 - NAND flash
41 - NOR flash with transparent ECC
42 - DataFlash
43
44config JFFS2_FS_WBUF_VERIFY
45 bool "Verify JFFS2 write-buffer reads"
46 depends on JFFS2_FS_WRITEBUFFER
47 default n
48 help
49 This causes JFFS2 to read back every page written through the
50 write-buffer, and check for errors.
51
52config JFFS2_SUMMARY
53 bool "JFFS2 summary support (EXPERIMENTAL)"
54 depends on JFFS2_FS && EXPERIMENTAL
55 default n
56 help
57 This feature makes it possible to use summary information
58 for faster filesystem mount.
59
60 The summary information can be inserted into a filesystem image
61 by the utility 'sumtool'.
62
63 If unsure, say 'N'.
64
65config JFFS2_FS_XATTR
66 bool "JFFS2 XATTR support (EXPERIMENTAL)"
67 depends on JFFS2_FS && EXPERIMENTAL
68 default n
69 help
70 Extended attributes are name:value pairs associated with inodes by
71 the kernel or by users (see the attr(5) manual page, or visit
72 <http://acl.bestbits.at/> for details).
73
74 If unsure, say N.
75
76config JFFS2_FS_POSIX_ACL
77 bool "JFFS2 POSIX Access Control Lists"
78 depends on JFFS2_FS_XATTR
79 default y
80 select FS_POSIX_ACL
81 help
82 Posix Access Control Lists (ACLs) support permissions for users and
83 groups beyond the owner/group/world scheme.
84
85 To learn more about Access Control Lists, visit the Posix ACLs for
86 Linux website <http://acl.bestbits.at/>.
87
88 If you don't know what Access Control Lists are, say N
89
90config JFFS2_FS_SECURITY
91 bool "JFFS2 Security Labels"
92 depends on JFFS2_FS_XATTR
93 default y
94 help
95 Security labels support alternative access control models
96 implemented by security modules like SELinux. This option
97 enables an extended attribute handler for file security
98 labels in the jffs2 filesystem.
99
100 If you are not using a security module that requires using
101 extended attributes for file security labels, say N.
102
103config JFFS2_COMPRESSION_OPTIONS
104 bool "Advanced compression options for JFFS2"
105 depends on JFFS2_FS
106 default n
107 help
108 Enabling this option allows you to explicitly choose which
109 compression modules, if any, are enabled in JFFS2. Removing
110 compressors can mean you cannot read existing file systems,
111 and enabling experimental compressors can mean that you
112 write a file system which cannot be read by a standard kernel.
113
114 If unsure, you should _definitely_ say 'N'.
115
116config JFFS2_ZLIB
117 bool "JFFS2 ZLIB compression support" if JFFS2_COMPRESSION_OPTIONS
118 select ZLIB_INFLATE
119 select ZLIB_DEFLATE
120 depends on JFFS2_FS
121 default y
122 help
123 Zlib is designed to be a free, general-purpose, legally unencumbered,
124 lossless data-compression library for use on virtually any computer
125 hardware and operating system. See <http://www.gzip.org/zlib/> for
126 further information.
127
128 Say 'Y' if unsure.
129
130config JFFS2_LZO
131 bool "JFFS2 LZO compression support" if JFFS2_COMPRESSION_OPTIONS
132 select LZO_COMPRESS
133 select LZO_DECOMPRESS
134 depends on JFFS2_FS
135 default n
136 help
137 minilzo-based compression. Generally works better than Zlib.
138
139 This feature was added in July, 2007. Say 'N' if you need
140 compatibility with older bootloaders or kernels.
141
142config JFFS2_RTIME
143 bool "JFFS2 RTIME compression support" if JFFS2_COMPRESSION_OPTIONS
144 depends on JFFS2_FS
145 default y
146 help
147 Rtime does manage to recompress already-compressed data. Say 'Y' if unsure.
148
149config JFFS2_RUBIN
150 bool "JFFS2 RUBIN compression support" if JFFS2_COMPRESSION_OPTIONS
151 depends on JFFS2_FS
152 default n
153 help
154 RUBINMIPS and DYNRUBIN compressors. Say 'N' if unsure.
155
156choice
157 prompt "JFFS2 default compression mode" if JFFS2_COMPRESSION_OPTIONS
158 default JFFS2_CMODE_PRIORITY
159 depends on JFFS2_FS
160 help
161 You can set here the default compression mode of JFFS2 from
162 the available compression modes. Don't touch if unsure.
163
164config JFFS2_CMODE_NONE
165 bool "no compression"
166 help
167 Uses no compression.
168
169config JFFS2_CMODE_PRIORITY
170 bool "priority"
171 help
172 Tries the compressors in a predefined order and chooses the first
173 successful one.
174
175config JFFS2_CMODE_SIZE
176 bool "size (EXPERIMENTAL)"
177 help
178 Tries all compressors and chooses the one which has the smallest
179 result.
180
181config JFFS2_CMODE_FAVOURLZO
182 bool "Favour LZO"
183 help
184 Tries all compressors and chooses the one which has the smallest
185 result but gives some preference to LZO (which has faster
186 decompression) at the expense of size.
187
188endchoice
diff --git a/fs/jffs2/compr.c b/fs/jffs2/compr.c
index 86739ee53b37..f25e70c1b51c 100644
--- a/fs/jffs2/compr.c
+++ b/fs/jffs2/compr.c
@@ -53,8 +53,8 @@ static int jffs2_is_best_compression(struct jffs2_compressor *this,
53} 53}
54 54
55/* jffs2_compress: 55/* jffs2_compress:
56 * @data: Pointer to uncompressed data 56 * @data_in: Pointer to uncompressed data
57 * @cdata: Pointer to returned pointer to buffer for compressed data 57 * @cpage_out: Pointer to returned pointer to buffer for compressed data
58 * @datalen: On entry, holds the amount of data available for compression. 58 * @datalen: On entry, holds the amount of data available for compression.
59 * On exit, expected to hold the amount of data actually compressed. 59 * On exit, expected to hold the amount of data actually compressed.
60 * @cdatalen: On entry, holds the amount of space available for compressed 60 * @cdatalen: On entry, holds the amount of space available for compressed
diff --git a/fs/jffs2/dir.c b/fs/jffs2/dir.c
index cd219ef55254..b1aaae823a52 100644
--- a/fs/jffs2/dir.c
+++ b/fs/jffs2/dir.c
@@ -311,7 +311,7 @@ static int jffs2_symlink (struct inode *dir_i, struct dentry *dentry, const char
311 /* FIXME: If you care. We'd need to use frags for the target 311 /* FIXME: If you care. We'd need to use frags for the target
312 if it grows much more than this */ 312 if it grows much more than this */
313 if (targetlen > 254) 313 if (targetlen > 254)
314 return -EINVAL; 314 return -ENAMETOOLONG;
315 315
316 ri = jffs2_alloc_raw_inode(); 316 ri = jffs2_alloc_raw_inode();
317 317
diff --git a/fs/jffs2/erase.c b/fs/jffs2/erase.c
index dddb2a6c9e2c..259461b910af 100644
--- a/fs/jffs2/erase.c
+++ b/fs/jffs2/erase.c
@@ -68,7 +68,7 @@ static void jffs2_erase_block(struct jffs2_sb_info *c,
68 instr->len = c->sector_size; 68 instr->len = c->sector_size;
69 instr->callback = jffs2_erase_callback; 69 instr->callback = jffs2_erase_callback;
70 instr->priv = (unsigned long)(&instr[1]); 70 instr->priv = (unsigned long)(&instr[1]);
71 instr->fail_addr = 0xffffffff; 71 instr->fail_addr = MTD_FAIL_ADDR_UNKNOWN;
72 72
73 ((struct erase_priv_struct *)instr->priv)->jeb = jeb; 73 ((struct erase_priv_struct *)instr->priv)->jeb = jeb;
74 ((struct erase_priv_struct *)instr->priv)->c = c; 74 ((struct erase_priv_struct *)instr->priv)->c = c;
@@ -175,7 +175,7 @@ static void jffs2_erase_failed(struct jffs2_sb_info *c, struct jffs2_eraseblock
175{ 175{
176 /* For NAND, if the failure did not occur at the device level for a 176 /* For NAND, if the failure did not occur at the device level for a
177 specific physical page, don't bother updating the bad block table. */ 177 specific physical page, don't bother updating the bad block table. */
178 if (jffs2_cleanmarker_oob(c) && (bad_offset != 0xffffffff)) { 178 if (jffs2_cleanmarker_oob(c) && (bad_offset != MTD_FAIL_ADDR_UNKNOWN)) {
179 /* We had a device-level failure to erase. Let's see if we've 179 /* We had a device-level failure to erase. Let's see if we've
180 failed too many times. */ 180 failed too many times. */
181 if (!jffs2_write_nand_badblock(c, jeb, bad_offset)) { 181 if (!jffs2_write_nand_badblock(c, jeb, bad_offset)) {
diff --git a/fs/jffs2/fs.c b/fs/jffs2/fs.c
index 086c43830221..249305d65d5b 100644
--- a/fs/jffs2/fs.c
+++ b/fs/jffs2/fs.c
@@ -207,6 +207,8 @@ int jffs2_statfs(struct dentry *dentry, struct kstatfs *buf)
207 buf->f_files = 0; 207 buf->f_files = 0;
208 buf->f_ffree = 0; 208 buf->f_ffree = 0;
209 buf->f_namelen = JFFS2_MAX_NAME_LEN; 209 buf->f_namelen = JFFS2_MAX_NAME_LEN;
210 buf->f_fsid.val[0] = JFFS2_SUPER_MAGIC;
211 buf->f_fsid.val[1] = c->mtd->index;
210 212
211 spin_lock(&c->erase_completion_lock); 213 spin_lock(&c->erase_completion_lock);
212 avail = c->dirty_size + c->free_size; 214 avail = c->dirty_size + c->free_size;
@@ -440,14 +442,14 @@ struct inode *jffs2_new_inode (struct inode *dir_i, int mode, struct jffs2_raw_i
440 442
441 memset(ri, 0, sizeof(*ri)); 443 memset(ri, 0, sizeof(*ri));
442 /* Set OS-specific defaults for new inodes */ 444 /* Set OS-specific defaults for new inodes */
443 ri->uid = cpu_to_je16(current->fsuid); 445 ri->uid = cpu_to_je16(current_fsuid());
444 446
445 if (dir_i->i_mode & S_ISGID) { 447 if (dir_i->i_mode & S_ISGID) {
446 ri->gid = cpu_to_je16(dir_i->i_gid); 448 ri->gid = cpu_to_je16(dir_i->i_gid);
447 if (S_ISDIR(mode)) 449 if (S_ISDIR(mode))
448 mode |= S_ISGID; 450 mode |= S_ISGID;
449 } else { 451 } else {
450 ri->gid = cpu_to_je16(current->fsgid); 452 ri->gid = cpu_to_je16(current_fsgid());
451 } 453 }
452 454
453 /* POSIX ACLs have to be processed now, at least partly. 455 /* POSIX ACLs have to be processed now, at least partly.
diff --git a/fs/jffs2/nodemgmt.c b/fs/jffs2/nodemgmt.c
index a9bf9603c1ba..0875b60b4bf7 100644
--- a/fs/jffs2/nodemgmt.c
+++ b/fs/jffs2/nodemgmt.c
@@ -261,6 +261,10 @@ static int jffs2_find_nextblock(struct jffs2_sb_info *c)
261 261
262 jffs2_sum_reset_collected(c->summary); /* reset collected summary */ 262 jffs2_sum_reset_collected(c->summary); /* reset collected summary */
263 263
264 /* adjust write buffer offset, else we get a non contiguous write bug */
265 if (!(c->wbuf_ofs % c->sector_size) && !c->wbuf_len)
266 c->wbuf_ofs = 0xffffffff;
267
264 D1(printk(KERN_DEBUG "jffs2_find_nextblock(): new nextblock = 0x%08x\n", c->nextblock->offset)); 268 D1(printk(KERN_DEBUG "jffs2_find_nextblock(): new nextblock = 0x%08x\n", c->nextblock->offset));
265 269
266 return 0; 270 return 0;
diff --git a/fs/jffs2/wbuf.c b/fs/jffs2/wbuf.c
index 0e78b00035e4..d9a721e6db70 100644
--- a/fs/jffs2/wbuf.c
+++ b/fs/jffs2/wbuf.c
@@ -679,10 +679,7 @@ static int __jffs2_flush_wbuf(struct jffs2_sb_info *c, int pad)
679 679
680 memset(c->wbuf,0xff,c->wbuf_pagesize); 680 memset(c->wbuf,0xff,c->wbuf_pagesize);
681 /* adjust write buffer offset, else we get a non contiguous write bug */ 681 /* adjust write buffer offset, else we get a non contiguous write bug */
682 if (SECTOR_ADDR(c->wbuf_ofs) == SECTOR_ADDR(c->wbuf_ofs+c->wbuf_pagesize)) 682 c->wbuf_ofs += c->wbuf_pagesize;
683 c->wbuf_ofs += c->wbuf_pagesize;
684 else
685 c->wbuf_ofs = 0xffffffff;
686 c->wbuf_len = 0; 683 c->wbuf_len = 0;
687 return 0; 684 return 0;
688} 685}
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index 2ab70d46ecbc..efdba2e802d7 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -1517,7 +1517,7 @@ static int nfs_symlink(struct inode *dir, struct dentry *dentry, const char *sym
1517 if (!add_to_page_cache(page, dentry->d_inode->i_mapping, 0, 1517 if (!add_to_page_cache(page, dentry->d_inode->i_mapping, 0,
1518 GFP_KERNEL)) { 1518 GFP_KERNEL)) {
1519 pagevec_add(&lru_pvec, page); 1519 pagevec_add(&lru_pvec, page);
1520 pagevec_lru_add(&lru_pvec); 1520 pagevec_lru_add_file(&lru_pvec);
1521 SetPageUptodate(page); 1521 SetPageUptodate(page);
1522 unlock_page(page); 1522 unlock_page(page);
1523 } else 1523 } else
diff --git a/fs/ntfs/file.c b/fs/ntfs/file.c
index d020866d4232..3140a4429af1 100644
--- a/fs/ntfs/file.c
+++ b/fs/ntfs/file.c
@@ -439,7 +439,7 @@ static inline int __ntfs_grab_cache_pages(struct address_space *mapping,
439 pages[nr] = *cached_page; 439 pages[nr] = *cached_page;
440 page_cache_get(*cached_page); 440 page_cache_get(*cached_page);
441 if (unlikely(!pagevec_add(lru_pvec, *cached_page))) 441 if (unlikely(!pagevec_add(lru_pvec, *cached_page)))
442 __pagevec_lru_add(lru_pvec); 442 __pagevec_lru_add_file(lru_pvec);
443 *cached_page = NULL; 443 *cached_page = NULL;
444 } 444 }
445 index++; 445 index++;
@@ -2084,7 +2084,7 @@ err_out:
2084 OSYNC_METADATA|OSYNC_DATA); 2084 OSYNC_METADATA|OSYNC_DATA);
2085 } 2085 }
2086 } 2086 }
2087 pagevec_lru_add(&lru_pvec); 2087 pagevec_lru_add_file(&lru_pvec);
2088 ntfs_debug("Done. Returning %s (written 0x%lx, status %li).", 2088 ntfs_debug("Done. Returning %s (written 0x%lx, status %li).",
2089 written ? "written" : "status", (unsigned long)written, 2089 written ? "written" : "status", (unsigned long)written,
2090 (long)status); 2090 (long)status);
diff --git a/fs/proc/proc_misc.c b/fs/proc/proc_misc.c
index 59ea42e1ef03..61b25f4eabe6 100644
--- a/fs/proc/proc_misc.c
+++ b/fs/proc/proc_misc.c
@@ -136,6 +136,8 @@ static int meminfo_read_proc(char *page, char **start, off_t off,
136 unsigned long allowed; 136 unsigned long allowed;
137 struct vmalloc_info vmi; 137 struct vmalloc_info vmi;
138 long cached; 138 long cached;
139 unsigned long pages[NR_LRU_LISTS];
140 int lru;
139 141
140/* 142/*
141 * display in kilobytes. 143 * display in kilobytes.
@@ -154,51 +156,70 @@ static int meminfo_read_proc(char *page, char **start, off_t off,
154 156
155 get_vmalloc_info(&vmi); 157 get_vmalloc_info(&vmi);
156 158
159 for (lru = LRU_BASE; lru < NR_LRU_LISTS; lru++)
160 pages[lru] = global_page_state(NR_LRU_BASE + lru);
161
157 /* 162 /*
158 * Tagged format, for easy grepping and expansion. 163 * Tagged format, for easy grepping and expansion.
159 */ 164 */
160 len = sprintf(page, 165 len = sprintf(page,
161 "MemTotal: %8lu kB\n" 166 "MemTotal: %8lu kB\n"
162 "MemFree: %8lu kB\n" 167 "MemFree: %8lu kB\n"
163 "Buffers: %8lu kB\n" 168 "Buffers: %8lu kB\n"
164 "Cached: %8lu kB\n" 169 "Cached: %8lu kB\n"
165 "SwapCached: %8lu kB\n" 170 "SwapCached: %8lu kB\n"
166 "Active: %8lu kB\n" 171 "Active: %8lu kB\n"
167 "Inactive: %8lu kB\n" 172 "Inactive: %8lu kB\n"
173 "Active(anon): %8lu kB\n"
174 "Inactive(anon): %8lu kB\n"
175 "Active(file): %8lu kB\n"
176 "Inactive(file): %8lu kB\n"
177#ifdef CONFIG_UNEVICTABLE_LRU
178 "Unevictable: %8lu kB\n"
179 "Mlocked: %8lu kB\n"
180#endif
168#ifdef CONFIG_HIGHMEM 181#ifdef CONFIG_HIGHMEM
169 "HighTotal: %8lu kB\n" 182 "HighTotal: %8lu kB\n"
170 "HighFree: %8lu kB\n" 183 "HighFree: %8lu kB\n"
171 "LowTotal: %8lu kB\n" 184 "LowTotal: %8lu kB\n"
172 "LowFree: %8lu kB\n" 185 "LowFree: %8lu kB\n"
173#endif 186#endif
174 "SwapTotal: %8lu kB\n" 187 "SwapTotal: %8lu kB\n"
175 "SwapFree: %8lu kB\n" 188 "SwapFree: %8lu kB\n"
176 "Dirty: %8lu kB\n" 189 "Dirty: %8lu kB\n"
177 "Writeback: %8lu kB\n" 190 "Writeback: %8lu kB\n"
178 "AnonPages: %8lu kB\n" 191 "AnonPages: %8lu kB\n"
179 "Mapped: %8lu kB\n" 192 "Mapped: %8lu kB\n"
180 "Slab: %8lu kB\n" 193 "Slab: %8lu kB\n"
181 "SReclaimable: %8lu kB\n" 194 "SReclaimable: %8lu kB\n"
182 "SUnreclaim: %8lu kB\n" 195 "SUnreclaim: %8lu kB\n"
183 "PageTables: %8lu kB\n" 196 "PageTables: %8lu kB\n"
184#ifdef CONFIG_QUICKLIST 197#ifdef CONFIG_QUICKLIST
185 "Quicklists: %8lu kB\n" 198 "Quicklists: %8lu kB\n"
186#endif 199#endif
187 "NFS_Unstable: %8lu kB\n" 200 "NFS_Unstable: %8lu kB\n"
188 "Bounce: %8lu kB\n" 201 "Bounce: %8lu kB\n"
189 "WritebackTmp: %8lu kB\n" 202 "WritebackTmp: %8lu kB\n"
190 "CommitLimit: %8lu kB\n" 203 "CommitLimit: %8lu kB\n"
191 "Committed_AS: %8lu kB\n" 204 "Committed_AS: %8lu kB\n"
192 "VmallocTotal: %8lu kB\n" 205 "VmallocTotal: %8lu kB\n"
193 "VmallocUsed: %8lu kB\n" 206 "VmallocUsed: %8lu kB\n"
194 "VmallocChunk: %8lu kB\n", 207 "VmallocChunk: %8lu kB\n",
195 K(i.totalram), 208 K(i.totalram),
196 K(i.freeram), 209 K(i.freeram),
197 K(i.bufferram), 210 K(i.bufferram),
198 K(cached), 211 K(cached),
199 K(total_swapcache_pages), 212 K(total_swapcache_pages),
200 K(global_page_state(NR_ACTIVE)), 213 K(pages[LRU_ACTIVE_ANON] + pages[LRU_ACTIVE_FILE]),
201 K(global_page_state(NR_INACTIVE)), 214 K(pages[LRU_INACTIVE_ANON] + pages[LRU_INACTIVE_FILE]),
215 K(pages[LRU_ACTIVE_ANON]),
216 K(pages[LRU_INACTIVE_ANON]),
217 K(pages[LRU_ACTIVE_FILE]),
218 K(pages[LRU_INACTIVE_FILE]),
219#ifdef CONFIG_UNEVICTABLE_LRU
220 K(pages[LRU_UNEVICTABLE]),
221 K(global_page_state(NR_MLOCK)),
222#endif
202#ifdef CONFIG_HIGHMEM 223#ifdef CONFIG_HIGHMEM
203 K(i.totalhigh), 224 K(i.totalhigh),
204 K(i.freehigh), 225 K(i.freehigh),
diff --git a/fs/proc/vmcore.c b/fs/proc/vmcore.c
index 841368b87a29..cd9ca67f841b 100644
--- a/fs/proc/vmcore.c
+++ b/fs/proc/vmcore.c
@@ -32,9 +32,6 @@ static size_t elfcorebuf_sz;
32/* Total size of vmcore file. */ 32/* Total size of vmcore file. */
33static u64 vmcore_size; 33static u64 vmcore_size;
34 34
35/* Stores the physical address of elf header of crash image. */
36unsigned long long elfcorehdr_addr = ELFCORE_ADDR_MAX;
37
38struct proc_dir_entry *proc_vmcore = NULL; 35struct proc_dir_entry *proc_vmcore = NULL;
39 36
40/* Reads a page from the oldmem device from given offset. */ 37/* Reads a page from the oldmem device from given offset. */
@@ -647,7 +644,7 @@ static int __init vmcore_init(void)
647 int rc = 0; 644 int rc = 0;
648 645
649 /* If elfcorehdr= has been passed in cmdline, then capture the dump.*/ 646 /* If elfcorehdr= has been passed in cmdline, then capture the dump.*/
650 if (!(elfcorehdr_addr < ELFCORE_ADDR_MAX)) 647 if (!(is_vmcore_usable()))
651 return rc; 648 return rc;
652 rc = parse_crash_elf_headers(); 649 rc = parse_crash_elf_headers();
653 if (rc) { 650 if (rc) {
diff --git a/fs/ramfs/file-nommu.c b/fs/ramfs/file-nommu.c
index 5145cb9125af..76acdbc34611 100644
--- a/fs/ramfs/file-nommu.c
+++ b/fs/ramfs/file-nommu.c
@@ -112,12 +112,12 @@ int ramfs_nommu_expand_for_mapping(struct inode *inode, size_t newsize)
112 goto add_error; 112 goto add_error;
113 113
114 if (!pagevec_add(&lru_pvec, page)) 114 if (!pagevec_add(&lru_pvec, page))
115 __pagevec_lru_add(&lru_pvec); 115 __pagevec_lru_add_file(&lru_pvec);
116 116
117 unlock_page(page); 117 unlock_page(page);
118 } 118 }
119 119
120 pagevec_lru_add(&lru_pvec); 120 pagevec_lru_add_file(&lru_pvec);
121 return 0; 121 return 0;
122 122
123 fsize_exceeded: 123 fsize_exceeded:
diff --git a/fs/ramfs/inode.c b/fs/ramfs/inode.c
index b13123424e49..f031d1c925f0 100644
--- a/fs/ramfs/inode.c
+++ b/fs/ramfs/inode.c
@@ -61,6 +61,7 @@ struct inode *ramfs_get_inode(struct super_block *sb, int mode, dev_t dev)
61 inode->i_mapping->a_ops = &ramfs_aops; 61 inode->i_mapping->a_ops = &ramfs_aops;
62 inode->i_mapping->backing_dev_info = &ramfs_backing_dev_info; 62 inode->i_mapping->backing_dev_info = &ramfs_backing_dev_info;
63 mapping_set_gfp_mask(inode->i_mapping, GFP_HIGHUSER); 63 mapping_set_gfp_mask(inode->i_mapping, GFP_HIGHUSER);
64 mapping_set_unevictable(inode->i_mapping);
64 inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME; 65 inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
65 switch (mode & S_IFMT) { 66 switch (mode & S_IFMT) {
66 default: 67 default:
diff --git a/fs/seq_file.c b/fs/seq_file.c
index bd20f7f5a933..eba2eabcd2b8 100644
--- a/fs/seq_file.c
+++ b/fs/seq_file.c
@@ -452,17 +452,34 @@ int seq_dentry(struct seq_file *m, struct dentry *dentry, char *esc)
452 452
453int seq_bitmap(struct seq_file *m, unsigned long *bits, unsigned int nr_bits) 453int seq_bitmap(struct seq_file *m, unsigned long *bits, unsigned int nr_bits)
454{ 454{
455 size_t len = bitmap_scnprintf_len(nr_bits); 455 if (m->count < m->size) {
456 int len = bitmap_scnprintf(m->buf + m->count,
457 m->size - m->count, bits, nr_bits);
458 if (m->count + len < m->size) {
459 m->count += len;
460 return 0;
461 }
462 }
463 m->count = m->size;
464 return -1;
465}
466EXPORT_SYMBOL(seq_bitmap);
456 467
457 if (m->count + len < m->size) { 468int seq_bitmap_list(struct seq_file *m, unsigned long *bits,
458 bitmap_scnprintf(m->buf + m->count, m->size - m->count, 469 unsigned int nr_bits)
459 bits, nr_bits); 470{
460 m->count += len; 471 if (m->count < m->size) {
461 return 0; 472 int len = bitmap_scnlistprintf(m->buf + m->count,
473 m->size - m->count, bits, nr_bits);
474 if (m->count + len < m->size) {
475 m->count += len;
476 return 0;
477 }
462 } 478 }
463 m->count = m->size; 479 m->count = m->size;
464 return -1; 480 return -1;
465} 481}
482EXPORT_SYMBOL(seq_bitmap_list);
466 483
467static void *single_start(struct seq_file *p, loff_t *pos) 484static void *single_start(struct seq_file *p, loff_t *pos)
468{ 485{
diff --git a/include/asm-cris/thread_info.h b/include/asm-cris/thread_info.h
index 7efe1000f99d..cee97f14af3b 100644
--- a/include/asm-cris/thread_info.h
+++ b/include/asm-cris/thread_info.h
@@ -88,6 +88,7 @@ struct thread_info {
88#define TIF_RESTORE_SIGMASK 9 /* restore signal mask in do_signal() */ 88#define TIF_RESTORE_SIGMASK 9 /* restore signal mask in do_signal() */
89#define TIF_POLLING_NRFLAG 16 /* true if poll_idle() is polling TIF_NEED_RESCHED */ 89#define TIF_POLLING_NRFLAG 16 /* true if poll_idle() is polling TIF_NEED_RESCHED */
90#define TIF_MEMDIE 17 90#define TIF_MEMDIE 17
91#define TIF_FREEZE 18 /* is freezing for suspend */
91 92
92#define _TIF_SYSCALL_TRACE (1<<TIF_SYSCALL_TRACE) 93#define _TIF_SYSCALL_TRACE (1<<TIF_SYSCALL_TRACE)
93#define _TIF_NOTIFY_RESUME (1<<TIF_NOTIFY_RESUME) 94#define _TIF_NOTIFY_RESUME (1<<TIF_NOTIFY_RESUME)
@@ -95,6 +96,7 @@ struct thread_info {
95#define _TIF_NEED_RESCHED (1<<TIF_NEED_RESCHED) 96#define _TIF_NEED_RESCHED (1<<TIF_NEED_RESCHED)
96#define _TIF_RESTORE_SIGMASK (1<<TIF_RESTORE_SIGMASK) 97#define _TIF_RESTORE_SIGMASK (1<<TIF_RESTORE_SIGMASK)
97#define _TIF_POLLING_NRFLAG (1<<TIF_POLLING_NRFLAG) 98#define _TIF_POLLING_NRFLAG (1<<TIF_POLLING_NRFLAG)
99#define _TIF_FREEZE (1<<TIF_FREEZE)
98 100
99#define _TIF_WORK_MASK 0x0000FFFE /* work to do on interrupt/exception return */ 101#define _TIF_WORK_MASK 0x0000FFFE /* work to do on interrupt/exception return */
100#define _TIF_ALLWORK_MASK 0x0000FFFF /* work to do on any return to u-space */ 102#define _TIF_ALLWORK_MASK 0x0000FFFF /* work to do on any return to u-space */
diff --git a/include/asm-generic/rtc.h b/include/asm-generic/rtc.h
index 71ef3f0b9685..89061c1a67d4 100644
--- a/include/asm-generic/rtc.h
+++ b/include/asm-generic/rtc.h
@@ -84,12 +84,12 @@ static inline unsigned int get_rtc_time(struct rtc_time *time)
84 84
85 if (!(ctrl & RTC_DM_BINARY) || RTC_ALWAYS_BCD) 85 if (!(ctrl & RTC_DM_BINARY) || RTC_ALWAYS_BCD)
86 { 86 {
87 BCD_TO_BIN(time->tm_sec); 87 time->tm_sec = bcd2bin(time->tm_sec);
88 BCD_TO_BIN(time->tm_min); 88 time->tm_min = bcd2bin(time->tm_min);
89 BCD_TO_BIN(time->tm_hour); 89 time->tm_hour = bcd2bin(time->tm_hour);
90 BCD_TO_BIN(time->tm_mday); 90 time->tm_mday = bcd2bin(time->tm_mday);
91 BCD_TO_BIN(time->tm_mon); 91 time->tm_mon = bcd2bin(time->tm_mon);
92 BCD_TO_BIN(time->tm_year); 92 time->tm_year = bcd2bin(time->tm_year);
93 } 93 }
94 94
95#ifdef CONFIG_MACH_DECSTATION 95#ifdef CONFIG_MACH_DECSTATION
@@ -159,12 +159,12 @@ static inline int set_rtc_time(struct rtc_time *time)
159 159
160 if (!(CMOS_READ(RTC_CONTROL) & RTC_DM_BINARY) 160 if (!(CMOS_READ(RTC_CONTROL) & RTC_DM_BINARY)
161 || RTC_ALWAYS_BCD) { 161 || RTC_ALWAYS_BCD) {
162 BIN_TO_BCD(sec); 162 sec = bin2bcd(sec);
163 BIN_TO_BCD(min); 163 min = bin2bcd(min);
164 BIN_TO_BCD(hrs); 164 hrs = bin2bcd(hrs);
165 BIN_TO_BCD(day); 165 day = bin2bcd(day);
166 BIN_TO_BCD(mon); 166 mon = bin2bcd(mon);
167 BIN_TO_BCD(yrs); 167 yrs = bin2bcd(yrs);
168 } 168 }
169 169
170 save_control = CMOS_READ(RTC_CONTROL); 170 save_control = CMOS_READ(RTC_CONTROL);
diff --git a/include/asm-m68k/thread_info.h b/include/asm-m68k/thread_info.h
index abc002798a2b..af0fda46e94b 100644
--- a/include/asm-m68k/thread_info.h
+++ b/include/asm-m68k/thread_info.h
@@ -52,5 +52,6 @@ struct thread_info {
52#define TIF_DELAYED_TRACE 14 /* single step a syscall */ 52#define TIF_DELAYED_TRACE 14 /* single step a syscall */
53#define TIF_SYSCALL_TRACE 15 /* syscall trace active */ 53#define TIF_SYSCALL_TRACE 15 /* syscall trace active */
54#define TIF_MEMDIE 16 54#define TIF_MEMDIE 16
55#define TIF_FREEZE 17 /* thread is freezing for suspend */
55 56
56#endif /* _ASM_M68K_THREAD_INFO_H */ 57#endif /* _ASM_M68K_THREAD_INFO_H */
diff --git a/include/asm-parisc/thread_info.h b/include/asm-parisc/thread_info.h
index 9f812741c355..0407959da489 100644
--- a/include/asm-parisc/thread_info.h
+++ b/include/asm-parisc/thread_info.h
@@ -58,6 +58,7 @@ struct thread_info {
58#define TIF_32BIT 4 /* 32 bit binary */ 58#define TIF_32BIT 4 /* 32 bit binary */
59#define TIF_MEMDIE 5 59#define TIF_MEMDIE 5
60#define TIF_RESTORE_SIGMASK 6 /* restore saved signal mask */ 60#define TIF_RESTORE_SIGMASK 6 /* restore saved signal mask */
61#define TIF_FREEZE 7 /* is freezing for suspend */
61 62
62#define _TIF_SYSCALL_TRACE (1 << TIF_SYSCALL_TRACE) 63#define _TIF_SYSCALL_TRACE (1 << TIF_SYSCALL_TRACE)
63#define _TIF_SIGPENDING (1 << TIF_SIGPENDING) 64#define _TIF_SIGPENDING (1 << TIF_SIGPENDING)
@@ -65,6 +66,7 @@ struct thread_info {
65#define _TIF_POLLING_NRFLAG (1 << TIF_POLLING_NRFLAG) 66#define _TIF_POLLING_NRFLAG (1 << TIF_POLLING_NRFLAG)
66#define _TIF_32BIT (1 << TIF_32BIT) 67#define _TIF_32BIT (1 << TIF_32BIT)
67#define _TIF_RESTORE_SIGMASK (1 << TIF_RESTORE_SIGMASK) 68#define _TIF_RESTORE_SIGMASK (1 << TIF_RESTORE_SIGMASK)
69#define _TIF_FREEZE (1 << TIF_FREEZE)
68 70
69#define _TIF_USER_WORK_MASK (_TIF_SIGPENDING | \ 71#define _TIF_USER_WORK_MASK (_TIF_SIGPENDING | \
70 _TIF_NEED_RESCHED | _TIF_RESTORE_SIGMASK) 72 _TIF_NEED_RESCHED | _TIF_RESTORE_SIGMASK)
diff --git a/include/asm-um/thread_info.h b/include/asm-um/thread_info.h
index e07e72846c7a..62274ab9471f 100644
--- a/include/asm-um/thread_info.h
+++ b/include/asm-um/thread_info.h
@@ -69,6 +69,7 @@ static inline struct thread_info *current_thread_info(void)
69#define TIF_MEMDIE 5 69#define TIF_MEMDIE 5
70#define TIF_SYSCALL_AUDIT 6 70#define TIF_SYSCALL_AUDIT 6
71#define TIF_RESTORE_SIGMASK 7 71#define TIF_RESTORE_SIGMASK 7
72#define TIF_FREEZE 16 /* is freezing for suspend */
72 73
73#define _TIF_SYSCALL_TRACE (1 << TIF_SYSCALL_TRACE) 74#define _TIF_SYSCALL_TRACE (1 << TIF_SYSCALL_TRACE)
74#define _TIF_SIGPENDING (1 << TIF_SIGPENDING) 75#define _TIF_SIGPENDING (1 << TIF_SIGPENDING)
@@ -77,5 +78,6 @@ static inline struct thread_info *current_thread_info(void)
77#define _TIF_MEMDIE (1 << TIF_MEMDIE) 78#define _TIF_MEMDIE (1 << TIF_MEMDIE)
78#define _TIF_SYSCALL_AUDIT (1 << TIF_SYSCALL_AUDIT) 79#define _TIF_SYSCALL_AUDIT (1 << TIF_SYSCALL_AUDIT)
79#define _TIF_RESTORE_SIGMASK (1 << TIF_RESTORE_SIGMASK) 80#define _TIF_RESTORE_SIGMASK (1 << TIF_RESTORE_SIGMASK)
81#define _TIF_FREEZE (1 << TIF_FREEZE)
80 82
81#endif 83#endif
diff --git a/include/asm-xtensa/thread_info.h b/include/asm-xtensa/thread_info.h
index 7e4131dd546c..0f4fe1faf9ba 100644
--- a/include/asm-xtensa/thread_info.h
+++ b/include/asm-xtensa/thread_info.h
@@ -134,6 +134,7 @@ static inline struct thread_info *current_thread_info(void)
134#define TIF_MEMDIE 5 134#define TIF_MEMDIE 5
135#define TIF_RESTORE_SIGMASK 6 /* restore signal mask in do_signal() */ 135#define TIF_RESTORE_SIGMASK 6 /* restore signal mask in do_signal() */
136#define TIF_POLLING_NRFLAG 16 /* true if poll_idle() is polling TIF_NEED_RESCHED */ 136#define TIF_POLLING_NRFLAG 16 /* true if poll_idle() is polling TIF_NEED_RESCHED */
137#define TIF_FREEZE 17 /* is freezing for suspend */
137 138
138#define _TIF_SYSCALL_TRACE (1<<TIF_SYSCALL_TRACE) 139#define _TIF_SYSCALL_TRACE (1<<TIF_SYSCALL_TRACE)
139#define _TIF_SIGPENDING (1<<TIF_SIGPENDING) 140#define _TIF_SIGPENDING (1<<TIF_SIGPENDING)
@@ -142,6 +143,7 @@ static inline struct thread_info *current_thread_info(void)
142#define _TIF_IRET (1<<TIF_IRET) 143#define _TIF_IRET (1<<TIF_IRET)
143#define _TIF_POLLING_NRFLAG (1<<TIF_POLLING_NRFLAG) 144#define _TIF_POLLING_NRFLAG (1<<TIF_POLLING_NRFLAG)
144#define _TIF_RESTORE_SIGMASK (1<<TIF_RESTORE_SIGMASK) 145#define _TIF_RESTORE_SIGMASK (1<<TIF_RESTORE_SIGMASK)
146#define _TIF_FREEZE (1<<TIF_FREEZE)
145 147
146#define _TIF_WORK_MASK 0x0000FFFE /* work to do on interrupt/exception return */ 148#define _TIF_WORK_MASK 0x0000FFFE /* work to do on interrupt/exception return */
147#define _TIF_ALLWORK_MASK 0x0000FFFF /* work to do on any return to u-space */ 149#define _TIF_ALLWORK_MASK 0x0000FFFF /* work to do on any return to u-space */
diff --git a/include/linux/Kbuild b/include/linux/Kbuild
index bf9aca548f14..e531783e5d78 100644
--- a/include/linux/Kbuild
+++ b/include/linux/Kbuild
@@ -183,6 +183,7 @@ unifdef-y += auto_fs.h
183unifdef-y += auxvec.h 183unifdef-y += auxvec.h
184unifdef-y += binfmts.h 184unifdef-y += binfmts.h
185unifdef-y += blktrace_api.h 185unifdef-y += blktrace_api.h
186unifdef-y += byteorder.h
186unifdef-y += capability.h 187unifdef-y += capability.h
187unifdef-y += capi.h 188unifdef-y += capi.h
188unifdef-y += cciss_ioctl.h 189unifdef-y += cciss_ioctl.h
@@ -340,6 +341,7 @@ unifdef-y += soundcard.h
340unifdef-y += stat.h 341unifdef-y += stat.h
341unifdef-y += stddef.h 342unifdef-y += stddef.h
342unifdef-y += string.h 343unifdef-y += string.h
344unifdef-y += swab.h
343unifdef-y += synclink.h 345unifdef-y += synclink.h
344unifdef-y += sysctl.h 346unifdef-y += sysctl.h
345unifdef-y += tcp.h 347unifdef-y += tcp.h
diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h
index 0a24d5550eb3..bee52abb8a4d 100644
--- a/include/linux/backing-dev.h
+++ b/include/linux/backing-dev.h
@@ -175,6 +175,8 @@ int bdi_set_max_ratio(struct backing_dev_info *bdi, unsigned int max_ratio);
175 * BDI_CAP_READ_MAP: Can be mapped for reading 175 * BDI_CAP_READ_MAP: Can be mapped for reading
176 * BDI_CAP_WRITE_MAP: Can be mapped for writing 176 * BDI_CAP_WRITE_MAP: Can be mapped for writing
177 * BDI_CAP_EXEC_MAP: Can be mapped for execution 177 * BDI_CAP_EXEC_MAP: Can be mapped for execution
178 *
179 * BDI_CAP_SWAP_BACKED: Count shmem/tmpfs objects as swap-backed.
178 */ 180 */
179#define BDI_CAP_NO_ACCT_DIRTY 0x00000001 181#define BDI_CAP_NO_ACCT_DIRTY 0x00000001
180#define BDI_CAP_NO_WRITEBACK 0x00000002 182#define BDI_CAP_NO_WRITEBACK 0x00000002
@@ -184,6 +186,7 @@ int bdi_set_max_ratio(struct backing_dev_info *bdi, unsigned int max_ratio);
184#define BDI_CAP_WRITE_MAP 0x00000020 186#define BDI_CAP_WRITE_MAP 0x00000020
185#define BDI_CAP_EXEC_MAP 0x00000040 187#define BDI_CAP_EXEC_MAP 0x00000040
186#define BDI_CAP_NO_ACCT_WB 0x00000080 188#define BDI_CAP_NO_ACCT_WB 0x00000080
189#define BDI_CAP_SWAP_BACKED 0x00000100
187 190
188#define BDI_CAP_VMFLAGS \ 191#define BDI_CAP_VMFLAGS \
189 (BDI_CAP_READ_MAP | BDI_CAP_WRITE_MAP | BDI_CAP_EXEC_MAP) 192 (BDI_CAP_READ_MAP | BDI_CAP_WRITE_MAP | BDI_CAP_EXEC_MAP)
@@ -248,6 +251,11 @@ static inline bool bdi_cap_account_writeback(struct backing_dev_info *bdi)
248 BDI_CAP_NO_WRITEBACK)); 251 BDI_CAP_NO_WRITEBACK));
249} 252}
250 253
254static inline bool bdi_cap_swap_backed(struct backing_dev_info *bdi)
255{
256 return bdi->capabilities & BDI_CAP_SWAP_BACKED;
257}
258
251static inline bool mapping_cap_writeback_dirty(struct address_space *mapping) 259static inline bool mapping_cap_writeback_dirty(struct address_space *mapping)
252{ 260{
253 return bdi_cap_writeback_dirty(mapping->backing_dev_info); 261 return bdi_cap_writeback_dirty(mapping->backing_dev_info);
@@ -258,4 +266,9 @@ static inline bool mapping_cap_account_dirty(struct address_space *mapping)
258 return bdi_cap_account_dirty(mapping->backing_dev_info); 266 return bdi_cap_account_dirty(mapping->backing_dev_info);
259} 267}
260 268
269static inline bool mapping_cap_swap_backed(struct address_space *mapping)
270{
271 return bdi_cap_swap_backed(mapping->backing_dev_info);
272}
273
261#endif /* _LINUX_BACKING_DEV_H */ 274#endif /* _LINUX_BACKING_DEV_H */
diff --git a/include/linux/bcd.h b/include/linux/bcd.h
index 7ac518e3c152..22ea563ba3eb 100644
--- a/include/linux/bcd.h
+++ b/include/linux/bcd.h
@@ -1,12 +1,3 @@
1/* Permission is hereby granted to copy, modify and redistribute this code
2 * in terms of the GNU Library General Public License, Version 2 or later,
3 * at your option.
4 */
5
6/* macros to translate to/from binary and binary-coded decimal (frequently
7 * found in RTC chips).
8 */
9
10#ifndef _BCD_H 1#ifndef _BCD_H
11#define _BCD_H 2#define _BCD_H
12 3
@@ -15,11 +6,4 @@
15unsigned bcd2bin(unsigned char val) __attribute_const__; 6unsigned bcd2bin(unsigned char val) __attribute_const__;
16unsigned char bin2bcd(unsigned val) __attribute_const__; 7unsigned char bin2bcd(unsigned val) __attribute_const__;
17 8
18#define BCD2BIN(val) bcd2bin(val)
19#define BIN2BCD(val) bin2bcd(val)
20
21/* backwards compat */
22#define BCD_TO_BIN(val) ((val)=BCD2BIN(val))
23#define BIN_TO_BCD(val) ((val)=BIN2BCD(val))
24
25#endif /* _BCD_H */ 9#endif /* _BCD_H */
diff --git a/include/linux/bitmap.h b/include/linux/bitmap.h
index 89781fd48859..1abfe664c444 100644
--- a/include/linux/bitmap.h
+++ b/include/linux/bitmap.h
@@ -110,7 +110,6 @@ extern int __bitmap_weight(const unsigned long *bitmap, int bits);
110 110
111extern int bitmap_scnprintf(char *buf, unsigned int len, 111extern int bitmap_scnprintf(char *buf, unsigned int len,
112 const unsigned long *src, int nbits); 112 const unsigned long *src, int nbits);
113extern int bitmap_scnprintf_len(unsigned int nr_bits);
114extern int __bitmap_parse(const char *buf, unsigned int buflen, int is_user, 113extern int __bitmap_parse(const char *buf, unsigned int buflen, int is_user,
115 unsigned long *dst, int nbits); 114 unsigned long *dst, int nbits);
116extern int bitmap_parse_user(const char __user *ubuf, unsigned int ulen, 115extern int bitmap_parse_user(const char __user *ubuf, unsigned int ulen,
diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h
index eadaab44015f..3ce64b90118c 100644
--- a/include/linux/buffer_head.h
+++ b/include/linux/buffer_head.h
@@ -322,7 +322,7 @@ static inline void wait_on_buffer(struct buffer_head *bh)
322 322
323static inline int trylock_buffer(struct buffer_head *bh) 323static inline int trylock_buffer(struct buffer_head *bh)
324{ 324{
325 return likely(!test_and_set_bit(BH_Lock, &bh->b_state)); 325 return likely(!test_and_set_bit_lock(BH_Lock, &bh->b_state));
326} 326}
327 327
328static inline void lock_buffer(struct buffer_head *bh) 328static inline void lock_buffer(struct buffer_head *bh)
diff --git a/include/linux/byteorder/Kbuild b/include/linux/byteorder/Kbuild
index 1133d5f9d818..fbaa7f9cee32 100644
--- a/include/linux/byteorder/Kbuild
+++ b/include/linux/byteorder/Kbuild
@@ -1,3 +1,4 @@
1unifdef-y += big_endian.h 1unifdef-y += big_endian.h
2unifdef-y += little_endian.h 2unifdef-y += little_endian.h
3unifdef-y += swab.h 3unifdef-y += swab.h
4unifdef-y += swabb.h
diff --git a/include/linux/byteorder/big_endian.h b/include/linux/byteorder/big_endian.h
index 44f95b92393b..1cba3f3efe5f 100644
--- a/include/linux/byteorder/big_endian.h
+++ b/include/linux/byteorder/big_endian.h
@@ -10,6 +10,7 @@
10 10
11#include <linux/types.h> 11#include <linux/types.h>
12#include <linux/byteorder/swab.h> 12#include <linux/byteorder/swab.h>
13#include <linux/byteorder/swabb.h>
13 14
14#define __constant_htonl(x) ((__force __be32)(__u32)(x)) 15#define __constant_htonl(x) ((__force __be32)(__u32)(x))
15#define __constant_ntohl(x) ((__force __u32)(__be32)(x)) 16#define __constant_ntohl(x) ((__force __u32)(__be32)(x))
diff --git a/include/linux/byteorder/little_endian.h b/include/linux/byteorder/little_endian.h
index 4cc170a31762..cedc1b5a289c 100644
--- a/include/linux/byteorder/little_endian.h
+++ b/include/linux/byteorder/little_endian.h
@@ -10,6 +10,7 @@
10 10
11#include <linux/types.h> 11#include <linux/types.h>
12#include <linux/byteorder/swab.h> 12#include <linux/byteorder/swab.h>
13#include <linux/byteorder/swabb.h>
13 14
14#define __constant_htonl(x) ((__force __be32)___constant_swab32((x))) 15#define __constant_htonl(x) ((__force __be32)___constant_swab32((x)))
15#define __constant_ntohl(x) ___constant_swab32((__force __be32)(x)) 16#define __constant_ntohl(x) ___constant_swab32((__force __be32)(x))
diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index 30934e4bfaab..8b00f6643e93 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -9,12 +9,12 @@
9 */ 9 */
10 10
11#include <linux/sched.h> 11#include <linux/sched.h>
12#include <linux/kref.h>
13#include <linux/cpumask.h> 12#include <linux/cpumask.h>
14#include <linux/nodemask.h> 13#include <linux/nodemask.h>
15#include <linux/rcupdate.h> 14#include <linux/rcupdate.h>
16#include <linux/cgroupstats.h> 15#include <linux/cgroupstats.h>
17#include <linux/prio_heap.h> 16#include <linux/prio_heap.h>
17#include <linux/rwsem.h>
18 18
19#ifdef CONFIG_CGROUPS 19#ifdef CONFIG_CGROUPS
20 20
@@ -137,6 +137,15 @@ struct cgroup {
137 * release_list_lock 137 * release_list_lock
138 */ 138 */
139 struct list_head release_list; 139 struct list_head release_list;
140
141 /* pids_mutex protects the fields below */
142 struct rw_semaphore pids_mutex;
143 /* Array of process ids in the cgroup */
144 pid_t *tasks_pids;
145 /* How many files are using the current tasks_pids array */
146 int pids_use_count;
147 /* Length of the current tasks_pids array */
148 int pids_length;
140}; 149};
141 150
142/* A css_set is a structure holding pointers to a set of 151/* A css_set is a structure holding pointers to a set of
@@ -149,7 +158,7 @@ struct cgroup {
149struct css_set { 158struct css_set {
150 159
151 /* Reference count */ 160 /* Reference count */
152 struct kref ref; 161 atomic_t refcount;
153 162
154 /* 163 /*
155 * List running through all cgroup groups in the same hash 164 * List running through all cgroup groups in the same hash
@@ -394,6 +403,9 @@ void cgroup_iter_end(struct cgroup *cgrp, struct cgroup_iter *it);
394int cgroup_scan_tasks(struct cgroup_scanner *scan); 403int cgroup_scan_tasks(struct cgroup_scanner *scan);
395int cgroup_attach_task(struct cgroup *, struct task_struct *); 404int cgroup_attach_task(struct cgroup *, struct task_struct *);
396 405
406void cgroup_mm_owner_callbacks(struct task_struct *old,
407 struct task_struct *new);
408
397#else /* !CONFIG_CGROUPS */ 409#else /* !CONFIG_CGROUPS */
398 410
399static inline int cgroup_init_early(void) { return 0; } 411static inline int cgroup_init_early(void) { return 0; }
@@ -412,15 +424,9 @@ static inline int cgroupstats_build(struct cgroupstats *stats,
412 return -EINVAL; 424 return -EINVAL;
413} 425}
414 426
427static inline void cgroup_mm_owner_callbacks(struct task_struct *old,
428 struct task_struct *new) {}
429
415#endif /* !CONFIG_CGROUPS */ 430#endif /* !CONFIG_CGROUPS */
416 431
417#ifdef CONFIG_MM_OWNER
418extern void
419cgroup_mm_owner_callbacks(struct task_struct *old, struct task_struct *new);
420#else /* !CONFIG_MM_OWNER */
421static inline void
422cgroup_mm_owner_callbacks(struct task_struct *old, struct task_struct *new)
423{
424}
425#endif /* CONFIG_MM_OWNER */
426#endif /* _LINUX_CGROUP_H */ 432#endif /* _LINUX_CGROUP_H */
diff --git a/include/linux/cgroup_subsys.h b/include/linux/cgroup_subsys.h
index e2877454ec82..9c22396e8b50 100644
--- a/include/linux/cgroup_subsys.h
+++ b/include/linux/cgroup_subsys.h
@@ -48,3 +48,9 @@ SUBSYS(devices)
48#endif 48#endif
49 49
50/* */ 50/* */
51
52#ifdef CONFIG_CGROUP_FREEZER
53SUBSYS(freezer)
54#endif
55
56/* */
diff --git a/include/linux/crash_dump.h b/include/linux/crash_dump.h
index 025e4f575103..0acf3b737e2e 100644
--- a/include/linux/crash_dump.h
+++ b/include/linux/crash_dump.h
@@ -8,12 +8,9 @@
8#include <linux/proc_fs.h> 8#include <linux/proc_fs.h>
9 9
10#define ELFCORE_ADDR_MAX (-1ULL) 10#define ELFCORE_ADDR_MAX (-1ULL)
11#define ELFCORE_ADDR_ERR (-2ULL)
11 12
12#ifdef CONFIG_PROC_VMCORE
13extern unsigned long long elfcorehdr_addr; 13extern unsigned long long elfcorehdr_addr;
14#else
15static const unsigned long long elfcorehdr_addr = ELFCORE_ADDR_MAX;
16#endif
17 14
18extern ssize_t copy_oldmem_page(unsigned long, char *, size_t, 15extern ssize_t copy_oldmem_page(unsigned long, char *, size_t,
19 unsigned long, int); 16 unsigned long, int);
@@ -28,10 +25,43 @@ extern struct proc_dir_entry *proc_vmcore;
28 25
29#define vmcore_elf_check_arch(x) (elf_check_arch(x) || vmcore_elf_check_arch_cross(x)) 26#define vmcore_elf_check_arch(x) (elf_check_arch(x) || vmcore_elf_check_arch_cross(x))
30 27
28/*
29 * is_kdump_kernel() checks whether this kernel is booting after a panic of
30 * previous kernel or not. This is determined by checking if previous kernel
31 * has passed the elf core header address on command line.
32 *
33 * This is not just a test if CONFIG_CRASH_DUMP is enabled or not. It will
34 * return 1 if CONFIG_CRASH_DUMP=y and if kernel is booting after a panic of
35 * previous kernel.
36 */
37
31static inline int is_kdump_kernel(void) 38static inline int is_kdump_kernel(void)
32{ 39{
33 return (elfcorehdr_addr != ELFCORE_ADDR_MAX) ? 1 : 0; 40 return (elfcorehdr_addr != ELFCORE_ADDR_MAX) ? 1 : 0;
34} 41}
42
43/* is_vmcore_usable() checks if the kernel is booting after a panic and
44 * the vmcore region is usable.
45 *
46 * This makes use of the fact that due to alignment -2ULL is not
47 * a valid pointer, much in the vain of IS_ERR(), except
48 * dealing directly with an unsigned long long rather than a pointer.
49 */
50
51static inline int is_vmcore_usable(void)
52{
53 return is_kdump_kernel() && elfcorehdr_addr != ELFCORE_ADDR_ERR ? 1 : 0;
54}
55
56/* vmcore_unusable() marks the vmcore as unusable,
57 * without disturbing the logic of is_kdump_kernel()
58 */
59
60static inline void vmcore_unusable(void)
61{
62 if (is_kdump_kernel())
63 elfcorehdr_addr = ELFCORE_ADDR_ERR;
64}
35#else /* !CONFIG_CRASH_DUMP */ 65#else /* !CONFIG_CRASH_DUMP */
36static inline int is_kdump_kernel(void) { return 0; } 66static inline int is_kdump_kernel(void) { return 0; }
37#endif /* CONFIG_CRASH_DUMP */ 67#endif /* CONFIG_CRASH_DUMP */
diff --git a/include/linux/ext3_fs.h b/include/linux/ext3_fs.h
index 159d9b476cd7..d14f02918483 100644
--- a/include/linux/ext3_fs.h
+++ b/include/linux/ext3_fs.h
@@ -380,6 +380,8 @@ struct ext3_inode {
380#define EXT3_MOUNT_QUOTA 0x80000 /* Some quota option set */ 380#define EXT3_MOUNT_QUOTA 0x80000 /* Some quota option set */
381#define EXT3_MOUNT_USRQUOTA 0x100000 /* "old" user quota */ 381#define EXT3_MOUNT_USRQUOTA 0x100000 /* "old" user quota */
382#define EXT3_MOUNT_GRPQUOTA 0x200000 /* "old" group quota */ 382#define EXT3_MOUNT_GRPQUOTA 0x200000 /* "old" group quota */
383#define EXT3_MOUNT_DATA_ERR_ABORT 0x400000 /* Abort on file data write
384 * error in ordered mode */
383 385
384/* Compatibility, for having both ext2_fs.h and ext3_fs.h included at once */ 386/* Compatibility, for having both ext2_fs.h and ext3_fs.h included at once */
385#ifndef _LINUX_EXT2_FS_H 387#ifndef _LINUX_EXT2_FS_H
diff --git a/include/linux/fb.h b/include/linux/fb.h
index 531ccd5f5960..75a81eaf3430 100644
--- a/include/linux/fb.h
+++ b/include/linux/fb.h
@@ -808,6 +808,7 @@ struct fb_tile_ops {
808struct fb_info { 808struct fb_info {
809 int node; 809 int node;
810 int flags; 810 int flags;
811 struct mutex lock; /* Lock for open/release/ioctl funcs */
811 struct fb_var_screeninfo var; /* Current var */ 812 struct fb_var_screeninfo var; /* Current var */
812 struct fb_fix_screeninfo fix; /* Current fix */ 813 struct fb_fix_screeninfo fix; /* Current fix */
813 struct fb_monspecs monspecs; /* Current Monitor specs */ 814 struct fb_monspecs monspecs; /* Current Monitor specs */
diff --git a/include/linux/freezer.h b/include/linux/freezer.h
index deddeedf3257..8f225339eee9 100644
--- a/include/linux/freezer.h
+++ b/include/linux/freezer.h
@@ -6,7 +6,7 @@
6#include <linux/sched.h> 6#include <linux/sched.h>
7#include <linux/wait.h> 7#include <linux/wait.h>
8 8
9#ifdef CONFIG_PM_SLEEP 9#ifdef CONFIG_FREEZER
10/* 10/*
11 * Check if a process has been frozen 11 * Check if a process has been frozen
12 */ 12 */
@@ -39,28 +39,18 @@ static inline void clear_freeze_flag(struct task_struct *p)
39 clear_tsk_thread_flag(p, TIF_FREEZE); 39 clear_tsk_thread_flag(p, TIF_FREEZE);
40} 40}
41 41
42static inline bool should_send_signal(struct task_struct *p)
43{
44 return !(p->flags & PF_FREEZER_NOSIG);
45}
46
42/* 47/*
43 * Wake up a frozen process 48 * Wake up a frozen process
44 *
45 * task_lock() is taken to prevent the race with refrigerator() which may
46 * occur if the freezing of tasks fails. Namely, without the lock, if the
47 * freezing of tasks failed, thaw_tasks() might have run before a task in
48 * refrigerator() could call frozen_process(), in which case the task would be
49 * frozen and no one would thaw it.
50 */ 49 */
51static inline int thaw_process(struct task_struct *p) 50extern int __thaw_process(struct task_struct *p);
52{ 51
53 task_lock(p); 52/* Takes and releases task alloc lock using task_lock() */
54 if (frozen(p)) { 53extern int thaw_process(struct task_struct *p);
55 p->flags &= ~PF_FROZEN;
56 task_unlock(p);
57 wake_up_process(p);
58 return 1;
59 }
60 clear_freeze_flag(p);
61 task_unlock(p);
62 return 0;
63}
64 54
65extern void refrigerator(void); 55extern void refrigerator(void);
66extern int freeze_processes(void); 56extern int freeze_processes(void);
@@ -75,6 +65,15 @@ static inline int try_to_freeze(void)
75 return 0; 65 return 0;
76} 66}
77 67
68extern bool freeze_task(struct task_struct *p, bool sig_only);
69extern void cancel_freezing(struct task_struct *p);
70
71#ifdef CONFIG_CGROUP_FREEZER
72extern int cgroup_frozen(struct task_struct *task);
73#else /* !CONFIG_CGROUP_FREEZER */
74static inline int cgroup_frozen(struct task_struct *task) { return 0; }
75#endif /* !CONFIG_CGROUP_FREEZER */
76
78/* 77/*
79 * The PF_FREEZER_SKIP flag should be set by a vfork parent right before it 78 * The PF_FREEZER_SKIP flag should be set by a vfork parent right before it
80 * calls wait_for_completion(&vfork) and reset right after it returns from this 79 * calls wait_for_completion(&vfork) and reset right after it returns from this
@@ -166,7 +165,7 @@ static inline void set_freezable_with_signal(void)
166 } while (try_to_freeze()); \ 165 } while (try_to_freeze()); \
167 __retval; \ 166 __retval; \
168}) 167})
169#else /* !CONFIG_PM_SLEEP */ 168#else /* !CONFIG_FREEZER */
170static inline int frozen(struct task_struct *p) { return 0; } 169static inline int frozen(struct task_struct *p) { return 0; }
171static inline int freezing(struct task_struct *p) { return 0; } 170static inline int freezing(struct task_struct *p) { return 0; }
172static inline void set_freeze_flag(struct task_struct *p) {} 171static inline void set_freeze_flag(struct task_struct *p) {}
@@ -191,6 +190,6 @@ static inline void set_freezable_with_signal(void) {}
191#define wait_event_freezable_timeout(wq, condition, timeout) \ 190#define wait_event_freezable_timeout(wq, condition, timeout) \
192 wait_event_interruptible_timeout(wq, condition, timeout) 191 wait_event_interruptible_timeout(wq, condition, timeout)
193 192
194#endif /* !CONFIG_PM_SLEEP */ 193#endif /* !CONFIG_FREEZER */
195 194
196#endif /* FREEZER_H_INCLUDED */ 195#endif /* FREEZER_H_INCLUDED */
diff --git a/include/linux/jbd.h b/include/linux/jbd.h
index 7ebbcb1c9ba4..35d4f6342fac 100644
--- a/include/linux/jbd.h
+++ b/include/linux/jbd.h
@@ -816,6 +816,9 @@ struct journal_s
816#define JFS_FLUSHED 0x008 /* The journal superblock has been flushed */ 816#define JFS_FLUSHED 0x008 /* The journal superblock has been flushed */
817#define JFS_LOADED 0x010 /* The journal superblock has been loaded */ 817#define JFS_LOADED 0x010 /* The journal superblock has been loaded */
818#define JFS_BARRIER 0x020 /* Use IDE barriers */ 818#define JFS_BARRIER 0x020 /* Use IDE barriers */
819#define JFS_ABORT_ON_SYNCDATA_ERR 0x040 /* Abort the journal on file
820 * data write error in ordered
821 * mode */
819 822
820/* 823/*
821 * Function declarations for the journaling transaction and buffer 824 * Function declarations for the journaling transaction and buffer
diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index fdf3967e1397..1fbe14d39521 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -27,16 +27,13 @@ struct mm_struct;
27 27
28#ifdef CONFIG_CGROUP_MEM_RES_CTLR 28#ifdef CONFIG_CGROUP_MEM_RES_CTLR
29 29
30#define page_reset_bad_cgroup(page) ((page)->page_cgroup = 0)
31
32extern struct page_cgroup *page_get_page_cgroup(struct page *page);
33extern int mem_cgroup_charge(struct page *page, struct mm_struct *mm, 30extern int mem_cgroup_charge(struct page *page, struct mm_struct *mm,
34 gfp_t gfp_mask); 31 gfp_t gfp_mask);
35extern int mem_cgroup_cache_charge(struct page *page, struct mm_struct *mm, 32extern int mem_cgroup_cache_charge(struct page *page, struct mm_struct *mm,
36 gfp_t gfp_mask); 33 gfp_t gfp_mask);
34extern void mem_cgroup_move_lists(struct page *page, enum lru_list lru);
37extern void mem_cgroup_uncharge_page(struct page *page); 35extern void mem_cgroup_uncharge_page(struct page *page);
38extern void mem_cgroup_uncharge_cache_page(struct page *page); 36extern void mem_cgroup_uncharge_cache_page(struct page *page);
39extern void mem_cgroup_move_lists(struct page *page, bool active);
40extern int mem_cgroup_shrink_usage(struct mm_struct *mm, gfp_t gfp_mask); 37extern int mem_cgroup_shrink_usage(struct mm_struct *mm, gfp_t gfp_mask);
41 38
42extern unsigned long mem_cgroup_isolate_pages(unsigned long nr_to_scan, 39extern unsigned long mem_cgroup_isolate_pages(unsigned long nr_to_scan,
@@ -44,7 +41,7 @@ extern unsigned long mem_cgroup_isolate_pages(unsigned long nr_to_scan,
44 unsigned long *scanned, int order, 41 unsigned long *scanned, int order,
45 int mode, struct zone *z, 42 int mode, struct zone *z,
46 struct mem_cgroup *mem_cont, 43 struct mem_cgroup *mem_cont,
47 int active); 44 int active, int file);
48extern void mem_cgroup_out_of_memory(struct mem_cgroup *mem, gfp_t gfp_mask); 45extern void mem_cgroup_out_of_memory(struct mem_cgroup *mem, gfp_t gfp_mask);
49int task_in_mem_cgroup(struct task_struct *task, const struct mem_cgroup *mem); 46int task_in_mem_cgroup(struct task_struct *task, const struct mem_cgroup *mem);
50 47
@@ -69,21 +66,11 @@ extern void mem_cgroup_note_reclaim_priority(struct mem_cgroup *mem,
69extern void mem_cgroup_record_reclaim_priority(struct mem_cgroup *mem, 66extern void mem_cgroup_record_reclaim_priority(struct mem_cgroup *mem,
70 int priority); 67 int priority);
71 68
72extern long mem_cgroup_calc_reclaim_active(struct mem_cgroup *mem, 69extern long mem_cgroup_calc_reclaim(struct mem_cgroup *mem, struct zone *zone,
73 struct zone *zone, int priority); 70 int priority, enum lru_list lru);
74extern long mem_cgroup_calc_reclaim_inactive(struct mem_cgroup *mem,
75 struct zone *zone, int priority);
76
77#else /* CONFIG_CGROUP_MEM_RES_CTLR */
78static inline void page_reset_bad_cgroup(struct page *page)
79{
80}
81 71
82static inline struct page_cgroup *page_get_page_cgroup(struct page *page)
83{
84 return NULL;
85}
86 72
73#else /* CONFIG_CGROUP_MEM_RES_CTLR */
87static inline int mem_cgroup_charge(struct page *page, 74static inline int mem_cgroup_charge(struct page *page,
88 struct mm_struct *mm, gfp_t gfp_mask) 75 struct mm_struct *mm, gfp_t gfp_mask)
89{ 76{
@@ -159,14 +146,9 @@ static inline void mem_cgroup_record_reclaim_priority(struct mem_cgroup *mem,
159{ 146{
160} 147}
161 148
162static inline long mem_cgroup_calc_reclaim_active(struct mem_cgroup *mem, 149static inline long mem_cgroup_calc_reclaim(struct mem_cgroup *mem,
163 struct zone *zone, int priority) 150 struct zone *zone, int priority,
164{ 151 enum lru_list lru)
165 return 0;
166}
167
168static inline long mem_cgroup_calc_reclaim_inactive(struct mem_cgroup *mem,
169 struct zone *zone, int priority)
170{ 152{
171 return 0; 153 return 0;
172} 154}
diff --git a/include/linux/migrate.h b/include/linux/migrate.h
index 03aea612d284..3f34005068d4 100644
--- a/include/linux/migrate.h
+++ b/include/linux/migrate.h
@@ -7,7 +7,6 @@
7typedef struct page *new_page_t(struct page *, unsigned long private, int **); 7typedef struct page *new_page_t(struct page *, unsigned long private, int **);
8 8
9#ifdef CONFIG_MIGRATION 9#ifdef CONFIG_MIGRATION
10extern int isolate_lru_page(struct page *p, struct list_head *pagelist);
11extern int putback_lru_pages(struct list_head *l); 10extern int putback_lru_pages(struct list_head *l);
12extern int migrate_page(struct address_space *, 11extern int migrate_page(struct address_space *,
13 struct page *, struct page *); 12 struct page *, struct page *);
@@ -21,8 +20,6 @@ extern int migrate_vmas(struct mm_struct *mm,
21 const nodemask_t *from, const nodemask_t *to, 20 const nodemask_t *from, const nodemask_t *to,
22 unsigned long flags); 21 unsigned long flags);
23#else 22#else
24static inline int isolate_lru_page(struct page *p, struct list_head *list)
25 { return -ENOSYS; }
26static inline int putback_lru_pages(struct list_head *l) { return 0; } 23static inline int putback_lru_pages(struct list_head *l) { return 0; }
27static inline int migrate_pages(struct list_head *l, new_page_t x, 24static inline int migrate_pages(struct list_head *l, new_page_t x,
28 unsigned long private) { return -ENOSYS; } 25 unsigned long private) { return -ENOSYS; }
diff --git a/include/linux/mm.h b/include/linux/mm.h
index c61ba10768ea..ffee2f743418 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -132,6 +132,11 @@ extern unsigned int kobjsize(const void *objp);
132#define VM_RandomReadHint(v) ((v)->vm_flags & VM_RAND_READ) 132#define VM_RandomReadHint(v) ((v)->vm_flags & VM_RAND_READ)
133 133
134/* 134/*
135 * special vmas that are non-mergable, non-mlock()able
136 */
137#define VM_SPECIAL (VM_IO | VM_DONTEXPAND | VM_RESERVED | VM_PFNMAP)
138
139/*
135 * mapping from the currently active vm_flags protection bits (the 140 * mapping from the currently active vm_flags protection bits (the
136 * low four bits) to a page protection mask.. 141 * low four bits) to a page protection mask..
137 */ 142 */
@@ -700,10 +705,10 @@ static inline int page_mapped(struct page *page)
700extern void show_free_areas(void); 705extern void show_free_areas(void);
701 706
702#ifdef CONFIG_SHMEM 707#ifdef CONFIG_SHMEM
703int shmem_lock(struct file *file, int lock, struct user_struct *user); 708extern int shmem_lock(struct file *file, int lock, struct user_struct *user);
704#else 709#else
705static inline int shmem_lock(struct file *file, int lock, 710static inline int shmem_lock(struct file *file, int lock,
706 struct user_struct *user) 711 struct user_struct *user)
707{ 712{
708 return 0; 713 return 0;
709} 714}
diff --git a/include/linux/mm_inline.h b/include/linux/mm_inline.h
index 895bc4e93039..c948350c378e 100644
--- a/include/linux/mm_inline.h
+++ b/include/linux/mm_inline.h
@@ -1,40 +1,100 @@
1static inline void 1#ifndef LINUX_MM_INLINE_H
2add_page_to_active_list(struct zone *zone, struct page *page) 2#define LINUX_MM_INLINE_H
3{
4 list_add(&page->lru, &zone->active_list);
5 __inc_zone_state(zone, NR_ACTIVE);
6}
7 3
8static inline void 4/**
9add_page_to_inactive_list(struct zone *zone, struct page *page) 5 * page_is_file_cache - should the page be on a file LRU or anon LRU?
6 * @page: the page to test
7 *
8 * Returns LRU_FILE if @page is page cache page backed by a regular filesystem,
9 * or 0 if @page is anonymous, tmpfs or otherwise ram or swap backed.
10 * Used by functions that manipulate the LRU lists, to sort a page
11 * onto the right LRU list.
12 *
13 * We would like to get this info without a page flag, but the state
14 * needs to survive until the page is last deleted from the LRU, which
15 * could be as far down as __page_cache_release.
16 */
17static inline int page_is_file_cache(struct page *page)
10{ 18{
11 list_add(&page->lru, &zone->inactive_list); 19 if (PageSwapBacked(page))
12 __inc_zone_state(zone, NR_INACTIVE); 20 return 0;
21
22 /* The page is page cache backed by a normal filesystem. */
23 return LRU_FILE;
13} 24}
14 25
15static inline void 26static inline void
16del_page_from_active_list(struct zone *zone, struct page *page) 27add_page_to_lru_list(struct zone *zone, struct page *page, enum lru_list l)
17{ 28{
18 list_del(&page->lru); 29 list_add(&page->lru, &zone->lru[l].list);
19 __dec_zone_state(zone, NR_ACTIVE); 30 __inc_zone_state(zone, NR_LRU_BASE + l);
20} 31}
21 32
22static inline void 33static inline void
23del_page_from_inactive_list(struct zone *zone, struct page *page) 34del_page_from_lru_list(struct zone *zone, struct page *page, enum lru_list l)
24{ 35{
25 list_del(&page->lru); 36 list_del(&page->lru);
26 __dec_zone_state(zone, NR_INACTIVE); 37 __dec_zone_state(zone, NR_LRU_BASE + l);
27} 38}
28 39
29static inline void 40static inline void
30del_page_from_lru(struct zone *zone, struct page *page) 41del_page_from_lru(struct zone *zone, struct page *page)
31{ 42{
43 enum lru_list l = LRU_BASE;
44
32 list_del(&page->lru); 45 list_del(&page->lru);
33 if (PageActive(page)) { 46 if (PageUnevictable(page)) {
34 __ClearPageActive(page); 47 __ClearPageUnevictable(page);
35 __dec_zone_state(zone, NR_ACTIVE); 48 l = LRU_UNEVICTABLE;
36 } else { 49 } else {
37 __dec_zone_state(zone, NR_INACTIVE); 50 if (PageActive(page)) {
51 __ClearPageActive(page);
52 l += LRU_ACTIVE;
53 }
54 l += page_is_file_cache(page);
55 }
56 __dec_zone_state(zone, NR_LRU_BASE + l);
57}
58
59/**
60 * page_lru - which LRU list should a page be on?
61 * @page: the page to test
62 *
63 * Returns the LRU list a page should be on, as an index
64 * into the array of LRU lists.
65 */
66static inline enum lru_list page_lru(struct page *page)
67{
68 enum lru_list lru = LRU_BASE;
69
70 if (PageUnevictable(page))
71 lru = LRU_UNEVICTABLE;
72 else {
73 if (PageActive(page))
74 lru += LRU_ACTIVE;
75 lru += page_is_file_cache(page);
38 } 76 }
77
78 return lru;
39} 79}
40 80
81/**
82 * inactive_anon_is_low - check if anonymous pages need to be deactivated
83 * @zone: zone to check
84 *
85 * Returns true if the zone does not have enough inactive anon pages,
86 * meaning some active anon pages need to be deactivated.
87 */
88static inline int inactive_anon_is_low(struct zone *zone)
89{
90 unsigned long active, inactive;
91
92 active = zone_page_state(zone, NR_ACTIVE_ANON);
93 inactive = zone_page_state(zone, NR_INACTIVE_ANON);
94
95 if (inactive * zone->inactive_ratio < active)
96 return 1;
97
98 return 0;
99}
100#endif
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index 9d49fa36bbef..fe825471d5aa 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -94,9 +94,6 @@ struct page {
94 void *virtual; /* Kernel virtual address (NULL if 94 void *virtual; /* Kernel virtual address (NULL if
95 not kmapped, ie. highmem) */ 95 not kmapped, ie. highmem) */
96#endif /* WANT_PAGE_VIRTUAL */ 96#endif /* WANT_PAGE_VIRTUAL */
97#ifdef CONFIG_CGROUP_MEM_RES_CTLR
98 unsigned long page_cgroup;
99#endif
100}; 97};
101 98
102/* 99/*
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index 428328a05fa1..35a7b5e19465 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -81,21 +81,31 @@ struct zone_padding {
81enum zone_stat_item { 81enum zone_stat_item {
82 /* First 128 byte cacheline (assuming 64 bit words) */ 82 /* First 128 byte cacheline (assuming 64 bit words) */
83 NR_FREE_PAGES, 83 NR_FREE_PAGES,
84 NR_INACTIVE, 84 NR_LRU_BASE,
85 NR_ACTIVE, 85 NR_INACTIVE_ANON = NR_LRU_BASE, /* must match order of LRU_[IN]ACTIVE */
86 NR_ACTIVE_ANON, /* " " " " " */
87 NR_INACTIVE_FILE, /* " " " " " */
88 NR_ACTIVE_FILE, /* " " " " " */
89#ifdef CONFIG_UNEVICTABLE_LRU
90 NR_UNEVICTABLE, /* " " " " " */
91 NR_MLOCK, /* mlock()ed pages found and moved off LRU */
92#else
93 NR_UNEVICTABLE = NR_ACTIVE_FILE, /* avoid compiler errors in dead code */
94 NR_MLOCK = NR_ACTIVE_FILE,
95#endif
86 NR_ANON_PAGES, /* Mapped anonymous pages */ 96 NR_ANON_PAGES, /* Mapped anonymous pages */
87 NR_FILE_MAPPED, /* pagecache pages mapped into pagetables. 97 NR_FILE_MAPPED, /* pagecache pages mapped into pagetables.
88 only modified from process context */ 98 only modified from process context */
89 NR_FILE_PAGES, 99 NR_FILE_PAGES,
90 NR_FILE_DIRTY, 100 NR_FILE_DIRTY,
91 NR_WRITEBACK, 101 NR_WRITEBACK,
92 /* Second 128 byte cacheline */
93 NR_SLAB_RECLAIMABLE, 102 NR_SLAB_RECLAIMABLE,
94 NR_SLAB_UNRECLAIMABLE, 103 NR_SLAB_UNRECLAIMABLE,
95 NR_PAGETABLE, /* used for pagetables */ 104 NR_PAGETABLE, /* used for pagetables */
96 NR_UNSTABLE_NFS, /* NFS unstable pages */ 105 NR_UNSTABLE_NFS, /* NFS unstable pages */
97 NR_BOUNCE, 106 NR_BOUNCE,
98 NR_VMSCAN_WRITE, 107 NR_VMSCAN_WRITE,
108 /* Second 128 byte cacheline */
99 NR_WRITEBACK_TEMP, /* Writeback using temporary buffers */ 109 NR_WRITEBACK_TEMP, /* Writeback using temporary buffers */
100#ifdef CONFIG_NUMA 110#ifdef CONFIG_NUMA
101 NUMA_HIT, /* allocated in intended node */ 111 NUMA_HIT, /* allocated in intended node */
@@ -107,6 +117,55 @@ enum zone_stat_item {
107#endif 117#endif
108 NR_VM_ZONE_STAT_ITEMS }; 118 NR_VM_ZONE_STAT_ITEMS };
109 119
120/*
121 * We do arithmetic on the LRU lists in various places in the code,
122 * so it is important to keep the active lists LRU_ACTIVE higher in
123 * the array than the corresponding inactive lists, and to keep
124 * the *_FILE lists LRU_FILE higher than the corresponding _ANON lists.
125 *
126 * This has to be kept in sync with the statistics in zone_stat_item
127 * above and the descriptions in vmstat_text in mm/vmstat.c
128 */
129#define LRU_BASE 0
130#define LRU_ACTIVE 1
131#define LRU_FILE 2
132
133enum lru_list {
134 LRU_INACTIVE_ANON = LRU_BASE,
135 LRU_ACTIVE_ANON = LRU_BASE + LRU_ACTIVE,
136 LRU_INACTIVE_FILE = LRU_BASE + LRU_FILE,
137 LRU_ACTIVE_FILE = LRU_BASE + LRU_FILE + LRU_ACTIVE,
138#ifdef CONFIG_UNEVICTABLE_LRU
139 LRU_UNEVICTABLE,
140#else
141 LRU_UNEVICTABLE = LRU_ACTIVE_FILE, /* avoid compiler errors in dead code */
142#endif
143 NR_LRU_LISTS
144};
145
146#define for_each_lru(l) for (l = 0; l < NR_LRU_LISTS; l++)
147
148#define for_each_evictable_lru(l) for (l = 0; l <= LRU_ACTIVE_FILE; l++)
149
150static inline int is_file_lru(enum lru_list l)
151{
152 return (l == LRU_INACTIVE_FILE || l == LRU_ACTIVE_FILE);
153}
154
155static inline int is_active_lru(enum lru_list l)
156{
157 return (l == LRU_ACTIVE_ANON || l == LRU_ACTIVE_FILE);
158}
159
160static inline int is_unevictable_lru(enum lru_list l)
161{
162#ifdef CONFIG_UNEVICTABLE_LRU
163 return (l == LRU_UNEVICTABLE);
164#else
165 return 0;
166#endif
167}
168
110struct per_cpu_pages { 169struct per_cpu_pages {
111 int count; /* number of pages in the list */ 170 int count; /* number of pages in the list */
112 int high; /* high watermark, emptying needed */ 171 int high; /* high watermark, emptying needed */
@@ -251,10 +310,22 @@ struct zone {
251 310
252 /* Fields commonly accessed by the page reclaim scanner */ 311 /* Fields commonly accessed by the page reclaim scanner */
253 spinlock_t lru_lock; 312 spinlock_t lru_lock;
254 struct list_head active_list; 313 struct {
255 struct list_head inactive_list; 314 struct list_head list;
256 unsigned long nr_scan_active; 315 unsigned long nr_scan;
257 unsigned long nr_scan_inactive; 316 } lru[NR_LRU_LISTS];
317
318 /*
319 * The pageout code in vmscan.c keeps track of how many of the
320 * mem/swap backed and file backed pages are refeferenced.
321 * The higher the rotated/scanned ratio, the more valuable
322 * that cache is.
323 *
324 * The anon LRU stats live in [0], file LRU stats in [1]
325 */
326 unsigned long recent_rotated[2];
327 unsigned long recent_scanned[2];
328
258 unsigned long pages_scanned; /* since last reclaim */ 329 unsigned long pages_scanned; /* since last reclaim */
259 unsigned long flags; /* zone flags, see below */ 330 unsigned long flags; /* zone flags, see below */
260 331
@@ -276,6 +347,12 @@ struct zone {
276 */ 347 */
277 int prev_priority; 348 int prev_priority;
278 349
350 /*
351 * The target ratio of ACTIVE_ANON to INACTIVE_ANON pages on
352 * this zone's LRU. Maintained by the pageout code.
353 */
354 unsigned int inactive_ratio;
355
279 356
280 ZONE_PADDING(_pad2_) 357 ZONE_PADDING(_pad2_)
281 /* Rarely used or read-mostly fields */ 358 /* Rarely used or read-mostly fields */
@@ -524,8 +601,11 @@ typedef struct pglist_data {
524 struct zone node_zones[MAX_NR_ZONES]; 601 struct zone node_zones[MAX_NR_ZONES];
525 struct zonelist node_zonelists[MAX_ZONELISTS]; 602 struct zonelist node_zonelists[MAX_ZONELISTS];
526 int nr_zones; 603 int nr_zones;
527#ifdef CONFIG_FLAT_NODE_MEM_MAP 604#ifdef CONFIG_FLAT_NODE_MEM_MAP /* means !SPARSEMEM */
528 struct page *node_mem_map; 605 struct page *node_mem_map;
606#ifdef CONFIG_CGROUP_MEM_RES_CTLR
607 struct page_cgroup *node_page_cgroup;
608#endif
529#endif 609#endif
530 struct bootmem_data *bdata; 610 struct bootmem_data *bdata;
531#ifdef CONFIG_MEMORY_HOTPLUG 611#ifdef CONFIG_MEMORY_HOTPLUG
@@ -854,6 +934,7 @@ static inline unsigned long early_pfn_to_nid(unsigned long pfn)
854#endif 934#endif
855 935
856struct page; 936struct page;
937struct page_cgroup;
857struct mem_section { 938struct mem_section {
858 /* 939 /*
859 * This is, logically, a pointer to an array of struct 940 * This is, logically, a pointer to an array of struct
@@ -871,6 +952,14 @@ struct mem_section {
871 952
872 /* See declaration of similar field in struct zone */ 953 /* See declaration of similar field in struct zone */
873 unsigned long *pageblock_flags; 954 unsigned long *pageblock_flags;
955#ifdef CONFIG_CGROUP_MEM_RES_CTLR
956 /*
957 * If !SPARSEMEM, pgdat doesn't have page_cgroup pointer. We use
958 * section. (see memcontrol.h/page_cgroup.h about this.)
959 */
960 struct page_cgroup *page_cgroup;
961 unsigned long pad;
962#endif
874}; 963};
875 964
876#ifdef CONFIG_SPARSEMEM_EXTREME 965#ifdef CONFIG_SPARSEMEM_EXTREME
diff --git a/include/linux/mtd/cfi.h b/include/linux/mtd/cfi.h
index d6fb115f5a07..ee5124ec319e 100644
--- a/include/linux/mtd/cfi.h
+++ b/include/linux/mtd/cfi.h
@@ -12,6 +12,7 @@
12#include <linux/mtd/flashchip.h> 12#include <linux/mtd/flashchip.h>
13#include <linux/mtd/map.h> 13#include <linux/mtd/map.h>
14#include <linux/mtd/cfi_endian.h> 14#include <linux/mtd/cfi_endian.h>
15#include <linux/mtd/xip.h>
15 16
16#ifdef CONFIG_MTD_CFI_I1 17#ifdef CONFIG_MTD_CFI_I1
17#define cfi_interleave(cfi) 1 18#define cfi_interleave(cfi) 1
@@ -430,7 +431,6 @@ static inline uint32_t cfi_send_gen_cmd(u_char cmd, uint32_t cmd_addr, uint32_t
430{ 431{
431 map_word val; 432 map_word val;
432 uint32_t addr = base + cfi_build_cmd_addr(cmd_addr, cfi_interleave(cfi), type); 433 uint32_t addr = base + cfi_build_cmd_addr(cmd_addr, cfi_interleave(cfi), type);
433
434 val = cfi_build_cmd(cmd, map, cfi); 434 val = cfi_build_cmd(cmd, map, cfi);
435 435
436 if (prev_val) 436 if (prev_val)
@@ -483,6 +483,13 @@ static inline void cfi_udelay(int us)
483 } 483 }
484} 484}
485 485
486int __xipram cfi_qry_present(struct map_info *map, __u32 base,
487 struct cfi_private *cfi);
488int __xipram cfi_qry_mode_on(uint32_t base, struct map_info *map,
489 struct cfi_private *cfi);
490void __xipram cfi_qry_mode_off(uint32_t base, struct map_info *map,
491 struct cfi_private *cfi);
492
486struct cfi_extquery *cfi_read_pri(struct map_info *map, uint16_t adr, uint16_t size, 493struct cfi_extquery *cfi_read_pri(struct map_info *map, uint16_t adr, uint16_t size,
487 const char* name); 494 const char* name);
488struct cfi_fixup { 495struct cfi_fixup {
diff --git a/include/linux/mtd/flashchip.h b/include/linux/mtd/flashchip.h
index 08dd131301c1..d4f38c5fd44e 100644
--- a/include/linux/mtd/flashchip.h
+++ b/include/linux/mtd/flashchip.h
@@ -73,6 +73,10 @@ struct flchip {
73 int buffer_write_time; 73 int buffer_write_time;
74 int erase_time; 74 int erase_time;
75 75
76 int word_write_time_max;
77 int buffer_write_time_max;
78 int erase_time_max;
79
76 void *priv; 80 void *priv;
77}; 81};
78 82
diff --git a/include/linux/mtd/mtd.h b/include/linux/mtd/mtd.h
index 922636548558..eae26bb6430a 100644
--- a/include/linux/mtd/mtd.h
+++ b/include/linux/mtd/mtd.h
@@ -25,8 +25,10 @@
25#define MTD_ERASE_DONE 0x08 25#define MTD_ERASE_DONE 0x08
26#define MTD_ERASE_FAILED 0x10 26#define MTD_ERASE_FAILED 0x10
27 27
28#define MTD_FAIL_ADDR_UNKNOWN 0xffffffff
29
28/* If the erase fails, fail_addr might indicate exactly which block failed. If 30/* If the erase fails, fail_addr might indicate exactly which block failed. If
29 fail_addr = 0xffffffff, the failure was not at the device level or was not 31 fail_addr = MTD_FAIL_ADDR_UNKNOWN, the failure was not at the device level or was not
30 specific to any particular block. */ 32 specific to any particular block. */
31struct erase_info { 33struct erase_info {
32 struct mtd_info *mtd; 34 struct mtd_info *mtd;
diff --git a/include/linux/mtd/nand-gpio.h b/include/linux/mtd/nand-gpio.h
new file mode 100644
index 000000000000..51534e50f7fc
--- /dev/null
+++ b/include/linux/mtd/nand-gpio.h
@@ -0,0 +1,19 @@
1#ifndef __LINUX_MTD_NAND_GPIO_H
2#define __LINUX_MTD_NAND_GPIO_H
3
4#include <linux/mtd/nand.h>
5
6struct gpio_nand_platdata {
7 int gpio_nce;
8 int gpio_nwp;
9 int gpio_cle;
10 int gpio_ale;
11 int gpio_rdy;
12 void (*adjust_parts)(struct gpio_nand_platdata *, size_t);
13 struct mtd_partition *parts;
14 unsigned int num_parts;
15 unsigned int options;
16 int chip_delay;
17};
18
19#endif
diff --git a/include/linux/mtd/nand.h b/include/linux/mtd/nand.h
index 81774e5facf4..733d3f3b4eb8 100644
--- a/include/linux/mtd/nand.h
+++ b/include/linux/mtd/nand.h
@@ -248,6 +248,7 @@ struct nand_hw_control {
248 * @read_page_raw: function to read a raw page without ECC 248 * @read_page_raw: function to read a raw page without ECC
249 * @write_page_raw: function to write a raw page without ECC 249 * @write_page_raw: function to write a raw page without ECC
250 * @read_page: function to read a page according to the ecc generator requirements 250 * @read_page: function to read a page according to the ecc generator requirements
251 * @read_subpage: function to read parts of the page covered by ECC.
251 * @write_page: function to write a page according to the ecc generator requirements 252 * @write_page: function to write a page according to the ecc generator requirements
252 * @read_oob: function to read chip OOB data 253 * @read_oob: function to read chip OOB data
253 * @write_oob: function to write chip OOB data 254 * @write_oob: function to write chip OOB data
diff --git a/include/linux/mtd/onenand_regs.h b/include/linux/mtd/onenand_regs.h
index d1b310c92eb4..0c6bbe28f38c 100644
--- a/include/linux/mtd/onenand_regs.h
+++ b/include/linux/mtd/onenand_regs.h
@@ -152,6 +152,8 @@
152#define ONENAND_SYS_CFG1_INT (1 << 6) 152#define ONENAND_SYS_CFG1_INT (1 << 6)
153#define ONENAND_SYS_CFG1_IOBE (1 << 5) 153#define ONENAND_SYS_CFG1_IOBE (1 << 5)
154#define ONENAND_SYS_CFG1_RDY_CONF (1 << 4) 154#define ONENAND_SYS_CFG1_RDY_CONF (1 << 4)
155#define ONENAND_SYS_CFG1_HF (1 << 2)
156#define ONENAND_SYS_CFG1_SYNC_WRITE (1 << 1)
155 157
156/* 158/*
157 * Controller Status Register F240h (R) 159 * Controller Status Register F240h (R)
diff --git a/include/linux/mtd/partitions.h b/include/linux/mtd/partitions.h
index 5014f7a9f5df..c92b4d439609 100644
--- a/include/linux/mtd/partitions.h
+++ b/include/linux/mtd/partitions.h
@@ -73,7 +73,6 @@ struct device;
73struct device_node; 73struct device_node;
74 74
75int __devinit of_mtd_parse_partitions(struct device *dev, 75int __devinit of_mtd_parse_partitions(struct device *dev,
76 struct mtd_info *mtd,
77 struct device_node *node, 76 struct device_node *node,
78 struct mtd_partition **pparts); 77 struct mtd_partition **pparts);
79 78
diff --git a/include/linux/mtd/sh_flctl.h b/include/linux/mtd/sh_flctl.h
new file mode 100644
index 000000000000..e77c1cea404d
--- /dev/null
+++ b/include/linux/mtd/sh_flctl.h
@@ -0,0 +1,125 @@
1/*
2 * SuperH FLCTL nand controller
3 *
4 * Copyright © 2008 Renesas Solutions Corp.
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; version 2 of the License.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, write to the Free Software
17 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
18 */
19
20#ifndef __SH_FLCTL_H__
21#define __SH_FLCTL_H__
22
23#include <linux/mtd/mtd.h>
24#include <linux/mtd/nand.h>
25#include <linux/mtd/partitions.h>
26
27/* FLCTL registers */
28#define FLCMNCR(f) (f->reg + 0x0)
29#define FLCMDCR(f) (f->reg + 0x4)
30#define FLCMCDR(f) (f->reg + 0x8)
31#define FLADR(f) (f->reg + 0xC)
32#define FLADR2(f) (f->reg + 0x3C)
33#define FLDATAR(f) (f->reg + 0x10)
34#define FLDTCNTR(f) (f->reg + 0x14)
35#define FLINTDMACR(f) (f->reg + 0x18)
36#define FLBSYTMR(f) (f->reg + 0x1C)
37#define FLBSYCNT(f) (f->reg + 0x20)
38#define FLDTFIFO(f) (f->reg + 0x24)
39#define FLECFIFO(f) (f->reg + 0x28)
40#define FLTRCR(f) (f->reg + 0x2C)
41#define FL4ECCRESULT0(f) (f->reg + 0x80)
42#define FL4ECCRESULT1(f) (f->reg + 0x84)
43#define FL4ECCRESULT2(f) (f->reg + 0x88)
44#define FL4ECCRESULT3(f) (f->reg + 0x8C)
45#define FL4ECCCR(f) (f->reg + 0x90)
46#define FL4ECCCNT(f) (f->reg + 0x94)
47#define FLERRADR(f) (f->reg + 0x98)
48
49/* FLCMNCR control bits */
50#define ECCPOS2 (0x1 << 25)
51#define _4ECCCNTEN (0x1 << 24)
52#define _4ECCEN (0x1 << 23)
53#define _4ECCCORRECT (0x1 << 22)
54#define SNAND_E (0x1 << 18) /* SNAND (0=512 1=2048)*/
55#define QTSEL_E (0x1 << 17)
56#define ENDIAN (0x1 << 16) /* 1 = little endian */
57#define FCKSEL_E (0x1 << 15)
58#define ECCPOS_00 (0x00 << 12)
59#define ECCPOS_01 (0x01 << 12)
60#define ECCPOS_02 (0x02 << 12)
61#define ACM_SACCES_MODE (0x01 << 10)
62#define NANWF_E (0x1 << 9)
63#define SE_D (0x1 << 8) /* Spare area disable */
64#define CE1_ENABLE (0x1 << 4) /* Chip Enable 1 */
65#define CE0_ENABLE (0x1 << 3) /* Chip Enable 0 */
66#define TYPESEL_SET (0x1 << 0)
67
68/* FLCMDCR control bits */
69#define ADRCNT2_E (0x1 << 31) /* 5byte address enable */
70#define ADRMD_E (0x1 << 26) /* Sector address access */
71#define CDSRC_E (0x1 << 25) /* Data buffer selection */
72#define DOSR_E (0x1 << 24) /* Status read check */
73#define SELRW (0x1 << 21) /* 0:read 1:write */
74#define DOADR_E (0x1 << 20) /* Address stage execute */
75#define ADRCNT_1 (0x00 << 18) /* Address data bytes: 1byte */
76#define ADRCNT_2 (0x01 << 18) /* Address data bytes: 2byte */
77#define ADRCNT_3 (0x02 << 18) /* Address data bytes: 3byte */
78#define ADRCNT_4 (0x03 << 18) /* Address data bytes: 4byte */
79#define DOCMD2_E (0x1 << 17) /* 2nd cmd stage execute */
80#define DOCMD1_E (0x1 << 16) /* 1st cmd stage execute */
81
82/* FLTRCR control bits */
83#define TRSTRT (0x1 << 0) /* translation start */
84#define TREND (0x1 << 1) /* translation end */
85
86/* FL4ECCCR control bits */
87#define _4ECCFA (0x1 << 2) /* 4 symbols correct fault */
88#define _4ECCEND (0x1 << 1) /* 4 symbols end */
89#define _4ECCEXST (0x1 << 0) /* 4 symbols exist */
90
91#define INIT_FL4ECCRESULT_VAL 0x03FF03FF
92#define LOOP_TIMEOUT_MAX 0x00010000
93
94#define mtd_to_flctl(mtd) container_of(mtd, struct sh_flctl, mtd)
95
96struct sh_flctl {
97 struct mtd_info mtd;
98 struct nand_chip chip;
99 void __iomem *reg;
100
101 uint8_t done_buff[2048 + 64]; /* max size 2048 + 64 */
102 int read_bytes;
103 int index;
104 int seqin_column; /* column in SEQIN cmd */
105 int seqin_page_addr; /* page_addr in SEQIN cmd */
106 uint32_t seqin_read_cmd; /* read cmd in SEQIN cmd */
107 int erase1_page_addr; /* page_addr in ERASE1 cmd */
108 uint32_t erase_ADRCNT; /* bits of FLCMDCR in ERASE1 cmd */
109 uint32_t rw_ADRCNT; /* bits of FLCMDCR in READ WRITE cmd */
110
111 int hwecc_cant_correct[4];
112
113 unsigned page_size:1; /* NAND page size (0 = 512, 1 = 2048) */
114 unsigned hwecc:1; /* Hardware ECC (0 = disabled, 1 = enabled) */
115};
116
117struct sh_flctl_platform_data {
118 struct mtd_partition *parts;
119 int nr_parts;
120 unsigned long flcmncr_val;
121
122 unsigned has_hwecc:1;
123};
124
125#endif /* __SH_FLCTL_H__ */
diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h
index c74d3e875314..b12f93a3c345 100644
--- a/include/linux/page-flags.h
+++ b/include/linux/page-flags.h
@@ -93,6 +93,11 @@ enum pageflags {
93 PG_mappedtodisk, /* Has blocks allocated on-disk */ 93 PG_mappedtodisk, /* Has blocks allocated on-disk */
94 PG_reclaim, /* To be reclaimed asap */ 94 PG_reclaim, /* To be reclaimed asap */
95 PG_buddy, /* Page is free, on buddy lists */ 95 PG_buddy, /* Page is free, on buddy lists */
96 PG_swapbacked, /* Page is backed by RAM/swap */
97#ifdef CONFIG_UNEVICTABLE_LRU
98 PG_unevictable, /* Page is "unevictable" */
99 PG_mlocked, /* Page is vma mlocked */
100#endif
96#ifdef CONFIG_IA64_UNCACHED_ALLOCATOR 101#ifdef CONFIG_IA64_UNCACHED_ALLOCATOR
97 PG_uncached, /* Page has been mapped as uncached */ 102 PG_uncached, /* Page has been mapped as uncached */
98#endif 103#endif
@@ -161,6 +166,18 @@ static inline int Page##uname(struct page *page) \
161#define TESTSCFLAG(uname, lname) \ 166#define TESTSCFLAG(uname, lname) \
162 TESTSETFLAG(uname, lname) TESTCLEARFLAG(uname, lname) 167 TESTSETFLAG(uname, lname) TESTCLEARFLAG(uname, lname)
163 168
169#define SETPAGEFLAG_NOOP(uname) \
170static inline void SetPage##uname(struct page *page) { }
171
172#define CLEARPAGEFLAG_NOOP(uname) \
173static inline void ClearPage##uname(struct page *page) { }
174
175#define __CLEARPAGEFLAG_NOOP(uname) \
176static inline void __ClearPage##uname(struct page *page) { }
177
178#define TESTCLEARFLAG_FALSE(uname) \
179static inline int TestClearPage##uname(struct page *page) { return 0; }
180
164struct page; /* forward declaration */ 181struct page; /* forward declaration */
165 182
166TESTPAGEFLAG(Locked, locked) 183TESTPAGEFLAG(Locked, locked)
@@ -169,6 +186,7 @@ PAGEFLAG(Referenced, referenced) TESTCLEARFLAG(Referenced, referenced)
169PAGEFLAG(Dirty, dirty) TESTSCFLAG(Dirty, dirty) __CLEARPAGEFLAG(Dirty, dirty) 186PAGEFLAG(Dirty, dirty) TESTSCFLAG(Dirty, dirty) __CLEARPAGEFLAG(Dirty, dirty)
170PAGEFLAG(LRU, lru) __CLEARPAGEFLAG(LRU, lru) 187PAGEFLAG(LRU, lru) __CLEARPAGEFLAG(LRU, lru)
171PAGEFLAG(Active, active) __CLEARPAGEFLAG(Active, active) 188PAGEFLAG(Active, active) __CLEARPAGEFLAG(Active, active)
189 TESTCLEARFLAG(Active, active)
172__PAGEFLAG(Slab, slab) 190__PAGEFLAG(Slab, slab)
173PAGEFLAG(Checked, checked) /* Used by some filesystems */ 191PAGEFLAG(Checked, checked) /* Used by some filesystems */
174PAGEFLAG(Pinned, pinned) TESTSCFLAG(Pinned, pinned) /* Xen */ 192PAGEFLAG(Pinned, pinned) TESTSCFLAG(Pinned, pinned) /* Xen */
@@ -176,6 +194,7 @@ PAGEFLAG(SavePinned, savepinned); /* Xen */
176PAGEFLAG(Reserved, reserved) __CLEARPAGEFLAG(Reserved, reserved) 194PAGEFLAG(Reserved, reserved) __CLEARPAGEFLAG(Reserved, reserved)
177PAGEFLAG(Private, private) __CLEARPAGEFLAG(Private, private) 195PAGEFLAG(Private, private) __CLEARPAGEFLAG(Private, private)
178 __SETPAGEFLAG(Private, private) 196 __SETPAGEFLAG(Private, private)
197PAGEFLAG(SwapBacked, swapbacked) __CLEARPAGEFLAG(SwapBacked, swapbacked)
179 198
180__PAGEFLAG(SlobPage, slob_page) 199__PAGEFLAG(SlobPage, slob_page)
181__PAGEFLAG(SlobFree, slob_free) 200__PAGEFLAG(SlobFree, slob_free)
@@ -211,6 +230,25 @@ PAGEFLAG(SwapCache, swapcache)
211PAGEFLAG_FALSE(SwapCache) 230PAGEFLAG_FALSE(SwapCache)
212#endif 231#endif
213 232
233#ifdef CONFIG_UNEVICTABLE_LRU
234PAGEFLAG(Unevictable, unevictable) __CLEARPAGEFLAG(Unevictable, unevictable)
235 TESTCLEARFLAG(Unevictable, unevictable)
236
237#define MLOCK_PAGES 1
238PAGEFLAG(Mlocked, mlocked) __CLEARPAGEFLAG(Mlocked, mlocked)
239 TESTSCFLAG(Mlocked, mlocked)
240
241#else
242
243#define MLOCK_PAGES 0
244PAGEFLAG_FALSE(Mlocked)
245 SETPAGEFLAG_NOOP(Mlocked) TESTCLEARFLAG_FALSE(Mlocked)
246
247PAGEFLAG_FALSE(Unevictable) TESTCLEARFLAG_FALSE(Unevictable)
248 SETPAGEFLAG_NOOP(Unevictable) CLEARPAGEFLAG_NOOP(Unevictable)
249 __CLEARPAGEFLAG_NOOP(Unevictable)
250#endif
251
214#ifdef CONFIG_IA64_UNCACHED_ALLOCATOR 252#ifdef CONFIG_IA64_UNCACHED_ALLOCATOR
215PAGEFLAG(Uncached, uncached) 253PAGEFLAG(Uncached, uncached)
216#else 254#else
@@ -326,15 +364,25 @@ static inline void __ClearPageTail(struct page *page)
326 364
327#endif /* !PAGEFLAGS_EXTENDED */ 365#endif /* !PAGEFLAGS_EXTENDED */
328 366
367#ifdef CONFIG_UNEVICTABLE_LRU
368#define __PG_UNEVICTABLE (1 << PG_unevictable)
369#define __PG_MLOCKED (1 << PG_mlocked)
370#else
371#define __PG_UNEVICTABLE 0
372#define __PG_MLOCKED 0
373#endif
374
329#define PAGE_FLAGS (1 << PG_lru | 1 << PG_private | 1 << PG_locked | \ 375#define PAGE_FLAGS (1 << PG_lru | 1 << PG_private | 1 << PG_locked | \
330 1 << PG_buddy | 1 << PG_writeback | \ 376 1 << PG_buddy | 1 << PG_writeback | \
331 1 << PG_slab | 1 << PG_swapcache | 1 << PG_active) 377 1 << PG_slab | 1 << PG_swapcache | 1 << PG_active | \
378 __PG_UNEVICTABLE | __PG_MLOCKED)
332 379
333/* 380/*
334 * Flags checked in bad_page(). Pages on the free list should not have 381 * Flags checked in bad_page(). Pages on the free list should not have
335 * these flags set. It they are, there is a problem. 382 * these flags set. It they are, there is a problem.
336 */ 383 */
337#define PAGE_FLAGS_CLEAR_WHEN_BAD (PAGE_FLAGS | 1 << PG_reclaim | 1 << PG_dirty) 384#define PAGE_FLAGS_CLEAR_WHEN_BAD (PAGE_FLAGS | \
385 1 << PG_reclaim | 1 << PG_dirty | 1 << PG_swapbacked)
338 386
339/* 387/*
340 * Flags checked when a page is freed. Pages being freed should not have 388 * Flags checked when a page is freed. Pages being freed should not have
@@ -347,7 +395,8 @@ static inline void __ClearPageTail(struct page *page)
347 * Pages being prepped should not have these flags set. It they are, there 395 * Pages being prepped should not have these flags set. It they are, there
348 * is a problem. 396 * is a problem.
349 */ 397 */
350#define PAGE_FLAGS_CHECK_AT_PREP (PAGE_FLAGS | 1 << PG_reserved | 1 << PG_dirty) 398#define PAGE_FLAGS_CHECK_AT_PREP (PAGE_FLAGS | \
399 1 << PG_reserved | 1 << PG_dirty | 1 << PG_swapbacked)
351 400
352#endif /* !__GENERATING_BOUNDS_H */ 401#endif /* !__GENERATING_BOUNDS_H */
353#endif /* PAGE_FLAGS_H */ 402#endif /* PAGE_FLAGS_H */
diff --git a/include/linux/page_cgroup.h b/include/linux/page_cgroup.h
new file mode 100644
index 000000000000..0fd39f2231ec
--- /dev/null
+++ b/include/linux/page_cgroup.h
@@ -0,0 +1,103 @@
1#ifndef __LINUX_PAGE_CGROUP_H
2#define __LINUX_PAGE_CGROUP_H
3
4#ifdef CONFIG_CGROUP_MEM_RES_CTLR
5#include <linux/bit_spinlock.h>
6/*
7 * Page Cgroup can be considered as an extended mem_map.
8 * A page_cgroup page is associated with every page descriptor. The
9 * page_cgroup helps us identify information about the cgroup
10 * All page cgroups are allocated at boot or memory hotplug event,
11 * then the page cgroup for pfn always exists.
12 */
13struct page_cgroup {
14 unsigned long flags;
15 struct mem_cgroup *mem_cgroup;
16 struct page *page;
17 struct list_head lru; /* per cgroup LRU list */
18};
19
20void __init pgdat_page_cgroup_init(struct pglist_data *pgdat);
21void __init page_cgroup_init(void);
22struct page_cgroup *lookup_page_cgroup(struct page *page);
23
24enum {
25 /* flags for mem_cgroup */
26 PCG_LOCK, /* page cgroup is locked */
27 PCG_CACHE, /* charged as cache */
28 PCG_USED, /* this object is in use. */
29 /* flags for LRU placement */
30 PCG_ACTIVE, /* page is active in this cgroup */
31 PCG_FILE, /* page is file system backed */
32 PCG_UNEVICTABLE, /* page is unevictableable */
33};
34
35#define TESTPCGFLAG(uname, lname) \
36static inline int PageCgroup##uname(struct page_cgroup *pc) \
37 { return test_bit(PCG_##lname, &pc->flags); }
38
39#define SETPCGFLAG(uname, lname) \
40static inline void SetPageCgroup##uname(struct page_cgroup *pc)\
41 { set_bit(PCG_##lname, &pc->flags); }
42
43#define CLEARPCGFLAG(uname, lname) \
44static inline void ClearPageCgroup##uname(struct page_cgroup *pc) \
45 { clear_bit(PCG_##lname, &pc->flags); }
46
47/* Cache flag is set only once (at allocation) */
48TESTPCGFLAG(Cache, CACHE)
49
50TESTPCGFLAG(Used, USED)
51CLEARPCGFLAG(Used, USED)
52
53/* LRU management flags (from global-lru definition) */
54TESTPCGFLAG(File, FILE)
55SETPCGFLAG(File, FILE)
56CLEARPCGFLAG(File, FILE)
57
58TESTPCGFLAG(Active, ACTIVE)
59SETPCGFLAG(Active, ACTIVE)
60CLEARPCGFLAG(Active, ACTIVE)
61
62TESTPCGFLAG(Unevictable, UNEVICTABLE)
63SETPCGFLAG(Unevictable, UNEVICTABLE)
64CLEARPCGFLAG(Unevictable, UNEVICTABLE)
65
66static inline int page_cgroup_nid(struct page_cgroup *pc)
67{
68 return page_to_nid(pc->page);
69}
70
71static inline enum zone_type page_cgroup_zid(struct page_cgroup *pc)
72{
73 return page_zonenum(pc->page);
74}
75
76static inline void lock_page_cgroup(struct page_cgroup *pc)
77{
78 bit_spin_lock(PCG_LOCK, &pc->flags);
79}
80
81static inline int trylock_page_cgroup(struct page_cgroup *pc)
82{
83 return bit_spin_trylock(PCG_LOCK, &pc->flags);
84}
85
86static inline void unlock_page_cgroup(struct page_cgroup *pc)
87{
88 bit_spin_unlock(PCG_LOCK, &pc->flags);
89}
90
91#else /* CONFIG_CGROUP_MEM_RES_CTLR */
92struct page_cgroup;
93
94static inline void pgdat_page_cgroup_init(struct pglist_data *pgdat)
95{
96}
97
98static inline struct page_cgroup *lookup_page_cgroup(struct page *page)
99{
100 return NULL;
101}
102#endif
103#endif
diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h
index 5da31c12101c..709742be02f0 100644
--- a/include/linux/pagemap.h
+++ b/include/linux/pagemap.h
@@ -32,6 +32,34 @@ static inline void mapping_set_error(struct address_space *mapping, int error)
32 } 32 }
33} 33}
34 34
35#ifdef CONFIG_UNEVICTABLE_LRU
36#define AS_UNEVICTABLE (__GFP_BITS_SHIFT + 2) /* e.g., ramdisk, SHM_LOCK */
37
38static inline void mapping_set_unevictable(struct address_space *mapping)
39{
40 set_bit(AS_UNEVICTABLE, &mapping->flags);
41}
42
43static inline void mapping_clear_unevictable(struct address_space *mapping)
44{
45 clear_bit(AS_UNEVICTABLE, &mapping->flags);
46}
47
48static inline int mapping_unevictable(struct address_space *mapping)
49{
50 if (likely(mapping))
51 return test_bit(AS_UNEVICTABLE, &mapping->flags);
52 return !!mapping;
53}
54#else
55static inline void mapping_set_unevictable(struct address_space *mapping) { }
56static inline void mapping_clear_unevictable(struct address_space *mapping) { }
57static inline int mapping_unevictable(struct address_space *mapping)
58{
59 return 0;
60}
61#endif
62
35static inline gfp_t mapping_gfp_mask(struct address_space * mapping) 63static inline gfp_t mapping_gfp_mask(struct address_space * mapping)
36{ 64{
37 return (__force gfp_t)mapping->flags & __GFP_BITS_MASK; 65 return (__force gfp_t)mapping->flags & __GFP_BITS_MASK;
@@ -271,19 +299,19 @@ extern int __lock_page_killable(struct page *page);
271extern void __lock_page_nosync(struct page *page); 299extern void __lock_page_nosync(struct page *page);
272extern void unlock_page(struct page *page); 300extern void unlock_page(struct page *page);
273 301
274static inline void set_page_locked(struct page *page) 302static inline void __set_page_locked(struct page *page)
275{ 303{
276 set_bit(PG_locked, &page->flags); 304 __set_bit(PG_locked, &page->flags);
277} 305}
278 306
279static inline void clear_page_locked(struct page *page) 307static inline void __clear_page_locked(struct page *page)
280{ 308{
281 clear_bit(PG_locked, &page->flags); 309 __clear_bit(PG_locked, &page->flags);
282} 310}
283 311
284static inline int trylock_page(struct page *page) 312static inline int trylock_page(struct page *page)
285{ 313{
286 return !test_and_set_bit(PG_locked, &page->flags); 314 return (likely(!test_and_set_bit_lock(PG_locked, &page->flags)));
287} 315}
288 316
289/* 317/*
@@ -410,17 +438,17 @@ extern void __remove_from_page_cache(struct page *page);
410 438
411/* 439/*
412 * Like add_to_page_cache_locked, but used to add newly allocated pages: 440 * Like add_to_page_cache_locked, but used to add newly allocated pages:
413 * the page is new, so we can just run set_page_locked() against it. 441 * the page is new, so we can just run __set_page_locked() against it.
414 */ 442 */
415static inline int add_to_page_cache(struct page *page, 443static inline int add_to_page_cache(struct page *page,
416 struct address_space *mapping, pgoff_t offset, gfp_t gfp_mask) 444 struct address_space *mapping, pgoff_t offset, gfp_t gfp_mask)
417{ 445{
418 int error; 446 int error;
419 447
420 set_page_locked(page); 448 __set_page_locked(page);
421 error = add_to_page_cache_locked(page, mapping, offset, gfp_mask); 449 error = add_to_page_cache_locked(page, mapping, offset, gfp_mask);
422 if (unlikely(error)) 450 if (unlikely(error))
423 clear_page_locked(page); 451 __clear_page_locked(page);
424 return error; 452 return error;
425} 453}
426 454
diff --git a/include/linux/pagevec.h b/include/linux/pagevec.h
index 8eb7fa76c1d0..e90a2cb02915 100644
--- a/include/linux/pagevec.h
+++ b/include/linux/pagevec.h
@@ -23,9 +23,9 @@ struct pagevec {
23void __pagevec_release(struct pagevec *pvec); 23void __pagevec_release(struct pagevec *pvec);
24void __pagevec_release_nonlru(struct pagevec *pvec); 24void __pagevec_release_nonlru(struct pagevec *pvec);
25void __pagevec_free(struct pagevec *pvec); 25void __pagevec_free(struct pagevec *pvec);
26void __pagevec_lru_add(struct pagevec *pvec); 26void ____pagevec_lru_add(struct pagevec *pvec, enum lru_list lru);
27void __pagevec_lru_add_active(struct pagevec *pvec);
28void pagevec_strip(struct pagevec *pvec); 27void pagevec_strip(struct pagevec *pvec);
28void pagevec_swap_free(struct pagevec *pvec);
29unsigned pagevec_lookup(struct pagevec *pvec, struct address_space *mapping, 29unsigned pagevec_lookup(struct pagevec *pvec, struct address_space *mapping,
30 pgoff_t start, unsigned nr_pages); 30 pgoff_t start, unsigned nr_pages);
31unsigned pagevec_lookup_tag(struct pagevec *pvec, 31unsigned pagevec_lookup_tag(struct pagevec *pvec,
@@ -81,10 +81,36 @@ static inline void pagevec_free(struct pagevec *pvec)
81 __pagevec_free(pvec); 81 __pagevec_free(pvec);
82} 82}
83 83
84static inline void pagevec_lru_add(struct pagevec *pvec) 84static inline void __pagevec_lru_add_anon(struct pagevec *pvec)
85{
86 ____pagevec_lru_add(pvec, LRU_INACTIVE_ANON);
87}
88
89static inline void __pagevec_lru_add_active_anon(struct pagevec *pvec)
90{
91 ____pagevec_lru_add(pvec, LRU_ACTIVE_ANON);
92}
93
94static inline void __pagevec_lru_add_file(struct pagevec *pvec)
95{
96 ____pagevec_lru_add(pvec, LRU_INACTIVE_FILE);
97}
98
99static inline void __pagevec_lru_add_active_file(struct pagevec *pvec)
100{
101 ____pagevec_lru_add(pvec, LRU_ACTIVE_FILE);
102}
103
104static inline void pagevec_lru_add_file(struct pagevec *pvec)
105{
106 if (pagevec_count(pvec))
107 __pagevec_lru_add_file(pvec);
108}
109
110static inline void pagevec_lru_add_anon(struct pagevec *pvec)
85{ 111{
86 if (pagevec_count(pvec)) 112 if (pagevec_count(pvec))
87 __pagevec_lru_add(pvec); 113 __pagevec_lru_add_anon(pvec);
88} 114}
89 115
90#endif /* _LINUX_PAGEVEC_H */ 116#endif /* _LINUX_PAGEVEC_H */
diff --git a/include/linux/pci.h b/include/linux/pci.h
index 98dc6243a706..acf8f24037cd 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -631,6 +631,8 @@ int __must_check pci_assign_resource(struct pci_dev *dev, int i);
631int pci_select_bars(struct pci_dev *dev, unsigned long flags); 631int pci_select_bars(struct pci_dev *dev, unsigned long flags);
632 632
633/* ROM control related routines */ 633/* ROM control related routines */
634int pci_enable_rom(struct pci_dev *pdev);
635void pci_disable_rom(struct pci_dev *pdev);
634void __iomem __must_check *pci_map_rom(struct pci_dev *pdev, size_t *size); 636void __iomem __must_check *pci_map_rom(struct pci_dev *pdev, size_t *size);
635void pci_unmap_rom(struct pci_dev *pdev, void __iomem *rom); 637void pci_unmap_rom(struct pci_dev *pdev, void __iomem *rom);
636size_t pci_get_rom_size(void __iomem *rom, size_t size); 638size_t pci_get_rom_size(void __iomem *rom, size_t size);
diff --git a/include/linux/ptrace.h b/include/linux/ptrace.h
index ea7416c901d1..22641d5d45df 100644
--- a/include/linux/ptrace.h
+++ b/include/linux/ptrace.h
@@ -94,7 +94,6 @@ extern void ptrace_notify(int exit_code);
94extern void __ptrace_link(struct task_struct *child, 94extern void __ptrace_link(struct task_struct *child,
95 struct task_struct *new_parent); 95 struct task_struct *new_parent);
96extern void __ptrace_unlink(struct task_struct *child); 96extern void __ptrace_unlink(struct task_struct *child);
97extern void ptrace_untrace(struct task_struct *child);
98#define PTRACE_MODE_READ 1 97#define PTRACE_MODE_READ 1
99#define PTRACE_MODE_ATTACH 2 98#define PTRACE_MODE_ATTACH 2
100/* Returns 0 on success, -errno on denial. */ 99/* Returns 0 on success, -errno on denial. */
diff --git a/include/linux/rmap.h b/include/linux/rmap.h
index fed6f5e0b411..89f0564b10c8 100644
--- a/include/linux/rmap.h
+++ b/include/linux/rmap.h
@@ -39,18 +39,6 @@ struct anon_vma {
39 39
40#ifdef CONFIG_MMU 40#ifdef CONFIG_MMU
41 41
42extern struct kmem_cache *anon_vma_cachep;
43
44static inline struct anon_vma *anon_vma_alloc(void)
45{
46 return kmem_cache_alloc(anon_vma_cachep, GFP_KERNEL);
47}
48
49static inline void anon_vma_free(struct anon_vma *anon_vma)
50{
51 kmem_cache_free(anon_vma_cachep, anon_vma);
52}
53
54static inline void anon_vma_lock(struct vm_area_struct *vma) 42static inline void anon_vma_lock(struct vm_area_struct *vma)
55{ 43{
56 struct anon_vma *anon_vma = vma->anon_vma; 44 struct anon_vma *anon_vma = vma->anon_vma;
@@ -75,6 +63,9 @@ void anon_vma_unlink(struct vm_area_struct *);
75void anon_vma_link(struct vm_area_struct *); 63void anon_vma_link(struct vm_area_struct *);
76void __anon_vma_link(struct vm_area_struct *); 64void __anon_vma_link(struct vm_area_struct *);
77 65
66extern struct anon_vma *page_lock_anon_vma(struct page *page);
67extern void page_unlock_anon_vma(struct anon_vma *anon_vma);
68
78/* 69/*
79 * rmap interfaces called when adding or removing pte of page 70 * rmap interfaces called when adding or removing pte of page
80 */ 71 */
@@ -117,6 +108,19 @@ unsigned long page_address_in_vma(struct page *, struct vm_area_struct *);
117 */ 108 */
118int page_mkclean(struct page *); 109int page_mkclean(struct page *);
119 110
111#ifdef CONFIG_UNEVICTABLE_LRU
112/*
113 * called in munlock()/munmap() path to check for other vmas holding
114 * the page mlocked.
115 */
116int try_to_munlock(struct page *);
117#else
118static inline int try_to_munlock(struct page *page)
119{
120 return 0; /* a.k.a. SWAP_SUCCESS */
121}
122#endif
123
120#else /* !CONFIG_MMU */ 124#else /* !CONFIG_MMU */
121 125
122#define anon_vma_init() do {} while (0) 126#define anon_vma_init() do {} while (0)
@@ -140,5 +144,6 @@ static inline int page_mkclean(struct page *page)
140#define SWAP_SUCCESS 0 144#define SWAP_SUCCESS 0
141#define SWAP_AGAIN 1 145#define SWAP_AGAIN 1
142#define SWAP_FAIL 2 146#define SWAP_FAIL 2
147#define SWAP_MLOCK 3
143 148
144#endif /* _LINUX_RMAP_H */ 149#endif /* _LINUX_RMAP_H */
diff --git a/include/linux/sched.h b/include/linux/sched.h
index c226c7b82946..f52dbd3587a7 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -403,12 +403,21 @@ extern int get_dumpable(struct mm_struct *mm);
403#define MMF_DUMP_MAPPED_PRIVATE 4 403#define MMF_DUMP_MAPPED_PRIVATE 4
404#define MMF_DUMP_MAPPED_SHARED 5 404#define MMF_DUMP_MAPPED_SHARED 5
405#define MMF_DUMP_ELF_HEADERS 6 405#define MMF_DUMP_ELF_HEADERS 6
406#define MMF_DUMP_HUGETLB_PRIVATE 7
407#define MMF_DUMP_HUGETLB_SHARED 8
406#define MMF_DUMP_FILTER_SHIFT MMF_DUMPABLE_BITS 408#define MMF_DUMP_FILTER_SHIFT MMF_DUMPABLE_BITS
407#define MMF_DUMP_FILTER_BITS 5 409#define MMF_DUMP_FILTER_BITS 7
408#define MMF_DUMP_FILTER_MASK \ 410#define MMF_DUMP_FILTER_MASK \
409 (((1 << MMF_DUMP_FILTER_BITS) - 1) << MMF_DUMP_FILTER_SHIFT) 411 (((1 << MMF_DUMP_FILTER_BITS) - 1) << MMF_DUMP_FILTER_SHIFT)
410#define MMF_DUMP_FILTER_DEFAULT \ 412#define MMF_DUMP_FILTER_DEFAULT \
411 ((1 << MMF_DUMP_ANON_PRIVATE) | (1 << MMF_DUMP_ANON_SHARED)) 413 ((1 << MMF_DUMP_ANON_PRIVATE) | (1 << MMF_DUMP_ANON_SHARED) |\
414 (1 << MMF_DUMP_HUGETLB_PRIVATE) | MMF_DUMP_MASK_DEFAULT_ELF)
415
416#ifdef CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS
417# define MMF_DUMP_MASK_DEFAULT_ELF (1 << MMF_DUMP_ELF_HEADERS)
418#else
419# define MMF_DUMP_MASK_DEFAULT_ELF 0
420#endif
412 421
413struct sighand_struct { 422struct sighand_struct {
414 atomic_t count; 423 atomic_t count;
diff --git a/include/linux/seq_file.h b/include/linux/seq_file.h
index a1783b229ef4..dc50bcc282a8 100644
--- a/include/linux/seq_file.h
+++ b/include/linux/seq_file.h
@@ -60,6 +60,19 @@ static inline int seq_nodemask(struct seq_file *m, nodemask_t *mask)
60 return seq_bitmap(m, mask->bits, MAX_NUMNODES); 60 return seq_bitmap(m, mask->bits, MAX_NUMNODES);
61} 61}
62 62
63int seq_bitmap_list(struct seq_file *m, unsigned long *bits,
64 unsigned int nr_bits);
65
66static inline int seq_cpumask_list(struct seq_file *m, cpumask_t *mask)
67{
68 return seq_bitmap_list(m, mask->bits, NR_CPUS);
69}
70
71static inline int seq_nodemask_list(struct seq_file *m, nodemask_t *mask)
72{
73 return seq_bitmap_list(m, mask->bits, MAX_NUMNODES);
74}
75
63int single_open(struct file *, int (*)(struct seq_file *, void *), void *); 76int single_open(struct file *, int (*)(struct seq_file *, void *), void *);
64int single_release(struct inode *, struct file *); 77int single_release(struct inode *, struct file *);
65void *__seq_open_private(struct file *, const struct seq_operations *, int); 78void *__seq_open_private(struct file *, const struct seq_operations *, int);
diff --git a/include/linux/swab.h b/include/linux/swab.h
index 270d5c208a89..bbed279f3b32 100644
--- a/include/linux/swab.h
+++ b/include/linux/swab.h
@@ -47,8 +47,6 @@ static inline __attribute_const__ __u16 ___swab16(__u16 val)
47{ 47{
48#ifdef __arch_swab16 48#ifdef __arch_swab16
49 return __arch_swab16(val); 49 return __arch_swab16(val);
50#elif defined(__arch_swab16p)
51 return __arch_swab16p(&val);
52#else 50#else
53 return __const_swab16(val); 51 return __const_swab16(val);
54#endif 52#endif
@@ -58,8 +56,6 @@ static inline __attribute_const__ __u32 ___swab32(__u32 val)
58{ 56{
59#ifdef __arch_swab32 57#ifdef __arch_swab32
60 return __arch_swab32(val); 58 return __arch_swab32(val);
61#elif defined(__arch_swab32p)
62 return __arch_swab32p(&val);
63#else 59#else
64 return __const_swab32(val); 60 return __const_swab32(val);
65#endif 61#endif
@@ -69,8 +65,6 @@ static inline __attribute_const__ __u64 ___swab64(__u64 val)
69{ 65{
70#ifdef __arch_swab64 66#ifdef __arch_swab64
71 return __arch_swab64(val); 67 return __arch_swab64(val);
72#elif defined(__arch_swab64p)
73 return __arch_swab64p(&val);
74#elif defined(__SWAB_64_THRU_32__) 68#elif defined(__SWAB_64_THRU_32__)
75 __u32 h = val >> 32; 69 __u32 h = val >> 32;
76 __u32 l = val & ((1ULL << 32) - 1); 70 __u32 l = val & ((1ULL << 32) - 1);
@@ -84,8 +78,6 @@ static inline __attribute_const__ __u32 ___swahw32(__u32 val)
84{ 78{
85#ifdef __arch_swahw32 79#ifdef __arch_swahw32
86 return __arch_swahw32(val); 80 return __arch_swahw32(val);
87#elif defined(__arch_swahw32p)
88 return __arch_swahw32p(&val);
89#else 81#else
90 return __const_swahw32(val); 82 return __const_swahw32(val);
91#endif 83#endif
@@ -95,8 +87,6 @@ static inline __attribute_const__ __u32 ___swahb32(__u32 val)
95{ 87{
96#ifdef __arch_swahb32 88#ifdef __arch_swahb32
97 return __arch_swahb32(val); 89 return __arch_swahb32(val);
98#elif defined(__arch_swahb32p)
99 return __arch_swahb32p(&val);
100#else 90#else
101 return __const_swahb32(val); 91 return __const_swahb32(val);
102#endif 92#endif
diff --git a/include/linux/swap.h b/include/linux/swap.h
index de40f169a4e4..a3af95b2cb6d 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -7,6 +7,7 @@
7#include <linux/list.h> 7#include <linux/list.h>
8#include <linux/memcontrol.h> 8#include <linux/memcontrol.h>
9#include <linux/sched.h> 9#include <linux/sched.h>
10#include <linux/node.h>
10 11
11#include <asm/atomic.h> 12#include <asm/atomic.h>
12#include <asm/page.h> 13#include <asm/page.h>
@@ -171,8 +172,10 @@ extern unsigned int nr_free_pagecache_pages(void);
171 172
172 173
173/* linux/mm/swap.c */ 174/* linux/mm/swap.c */
174extern void lru_cache_add(struct page *); 175extern void __lru_cache_add(struct page *, enum lru_list lru);
175extern void lru_cache_add_active(struct page *); 176extern void lru_cache_add_lru(struct page *, enum lru_list lru);
177extern void lru_cache_add_active_or_unevictable(struct page *,
178 struct vm_area_struct *);
176extern void activate_page(struct page *); 179extern void activate_page(struct page *);
177extern void mark_page_accessed(struct page *); 180extern void mark_page_accessed(struct page *);
178extern void lru_add_drain(void); 181extern void lru_add_drain(void);
@@ -180,12 +183,38 @@ extern int lru_add_drain_all(void);
180extern void rotate_reclaimable_page(struct page *page); 183extern void rotate_reclaimable_page(struct page *page);
181extern void swap_setup(void); 184extern void swap_setup(void);
182 185
186extern void add_page_to_unevictable_list(struct page *page);
187
188/**
189 * lru_cache_add: add a page to the page lists
190 * @page: the page to add
191 */
192static inline void lru_cache_add_anon(struct page *page)
193{
194 __lru_cache_add(page, LRU_INACTIVE_ANON);
195}
196
197static inline void lru_cache_add_active_anon(struct page *page)
198{
199 __lru_cache_add(page, LRU_ACTIVE_ANON);
200}
201
202static inline void lru_cache_add_file(struct page *page)
203{
204 __lru_cache_add(page, LRU_INACTIVE_FILE);
205}
206
207static inline void lru_cache_add_active_file(struct page *page)
208{
209 __lru_cache_add(page, LRU_ACTIVE_FILE);
210}
211
183/* linux/mm/vmscan.c */ 212/* linux/mm/vmscan.c */
184extern unsigned long try_to_free_pages(struct zonelist *zonelist, int order, 213extern unsigned long try_to_free_pages(struct zonelist *zonelist, int order,
185 gfp_t gfp_mask); 214 gfp_t gfp_mask);
186extern unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *mem, 215extern unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *mem,
187 gfp_t gfp_mask); 216 gfp_t gfp_mask);
188extern int __isolate_lru_page(struct page *page, int mode); 217extern int __isolate_lru_page(struct page *page, int mode, int file);
189extern unsigned long shrink_all_memory(unsigned long nr_pages); 218extern unsigned long shrink_all_memory(unsigned long nr_pages);
190extern int vm_swappiness; 219extern int vm_swappiness;
191extern int remove_mapping(struct address_space *mapping, struct page *page); 220extern int remove_mapping(struct address_space *mapping, struct page *page);
@@ -204,6 +233,34 @@ static inline int zone_reclaim(struct zone *z, gfp_t mask, unsigned int order)
204} 233}
205#endif 234#endif
206 235
236#ifdef CONFIG_UNEVICTABLE_LRU
237extern int page_evictable(struct page *page, struct vm_area_struct *vma);
238extern void scan_mapping_unevictable_pages(struct address_space *);
239
240extern unsigned long scan_unevictable_pages;
241extern int scan_unevictable_handler(struct ctl_table *, int, struct file *,
242 void __user *, size_t *, loff_t *);
243extern int scan_unevictable_register_node(struct node *node);
244extern void scan_unevictable_unregister_node(struct node *node);
245#else
246static inline int page_evictable(struct page *page,
247 struct vm_area_struct *vma)
248{
249 return 1;
250}
251
252static inline void scan_mapping_unevictable_pages(struct address_space *mapping)
253{
254}
255
256static inline int scan_unevictable_register_node(struct node *node)
257{
258 return 0;
259}
260
261static inline void scan_unevictable_unregister_node(struct node *node) { }
262#endif
263
207extern int kswapd_run(int nid); 264extern int kswapd_run(int nid);
208 265
209#ifdef CONFIG_MMU 266#ifdef CONFIG_MMU
@@ -251,6 +308,7 @@ extern sector_t swapdev_block(int, pgoff_t);
251extern struct swap_info_struct *get_swap_info_struct(unsigned); 308extern struct swap_info_struct *get_swap_info_struct(unsigned);
252extern int can_share_swap_page(struct page *); 309extern int can_share_swap_page(struct page *);
253extern int remove_exclusive_swap_page(struct page *); 310extern int remove_exclusive_swap_page(struct page *);
311extern int remove_exclusive_swap_page_ref(struct page *);
254struct backing_dev_info; 312struct backing_dev_info;
255 313
256/* linux/mm/thrash.c */ 314/* linux/mm/thrash.c */
@@ -339,6 +397,11 @@ static inline int remove_exclusive_swap_page(struct page *p)
339 return 0; 397 return 0;
340} 398}
341 399
400static inline int remove_exclusive_swap_page_ref(struct page *page)
401{
402 return 0;
403}
404
342static inline swp_entry_t get_swap_page(void) 405static inline swp_entry_t get_swap_page(void)
343{ 406{
344 swp_entry_t entry; 407 swp_entry_t entry;
diff --git a/include/linux/sysfs.h b/include/linux/sysfs.h
index b330e289d71f..9d68fed50f11 100644
--- a/include/linux/sysfs.h
+++ b/include/linux/sysfs.h
@@ -21,8 +21,9 @@ struct kobject;
21struct module; 21struct module;
22 22
23/* FIXME 23/* FIXME
24 * The *owner field is no longer used, but leave around 24 * The *owner field is no longer used.
25 * until the tree gets cleaned up fully. 25 * x86 tree has been cleaned up. The owner
26 * attribute is still left for other arches.
26 */ 27 */
27struct attribute { 28struct attribute {
28 const char *name; 29 const char *name;
diff --git a/include/linux/vmalloc.h b/include/linux/vmalloc.h
index 328eb4022727..4c28c4d564e2 100644
--- a/include/linux/vmalloc.h
+++ b/include/linux/vmalloc.h
@@ -2,6 +2,7 @@
2#define _LINUX_VMALLOC_H 2#define _LINUX_VMALLOC_H
3 3
4#include <linux/spinlock.h> 4#include <linux/spinlock.h>
5#include <linux/init.h>
5#include <asm/page.h> /* pgprot_t */ 6#include <asm/page.h> /* pgprot_t */
6 7
7struct vm_area_struct; /* vma defining user mapping in mm_types.h */ 8struct vm_area_struct; /* vma defining user mapping in mm_types.h */
@@ -23,7 +24,6 @@ struct vm_area_struct; /* vma defining user mapping in mm_types.h */
23#endif 24#endif
24 25
25struct vm_struct { 26struct vm_struct {
26 /* keep next,addr,size together to speedup lookups */
27 struct vm_struct *next; 27 struct vm_struct *next;
28 void *addr; 28 void *addr;
29 unsigned long size; 29 unsigned long size;
@@ -37,6 +37,19 @@ struct vm_struct {
37/* 37/*
38 * Highlevel APIs for driver use 38 * Highlevel APIs for driver use
39 */ 39 */
40extern void vm_unmap_ram(const void *mem, unsigned int count);
41extern void *vm_map_ram(struct page **pages, unsigned int count,
42 int node, pgprot_t prot);
43extern void vm_unmap_aliases(void);
44
45#ifdef CONFIG_MMU
46extern void __init vmalloc_init(void);
47#else
48static inline void vmalloc_init(void)
49{
50}
51#endif
52
40extern void *vmalloc(unsigned long size); 53extern void *vmalloc(unsigned long size);
41extern void *vmalloc_user(unsigned long size); 54extern void *vmalloc_user(unsigned long size);
42extern void *vmalloc_node(unsigned long size, int node); 55extern void *vmalloc_node(unsigned long size, int node);
diff --git a/include/linux/vmstat.h b/include/linux/vmstat.h
index 58334d439516..9cd3ab0f554d 100644
--- a/include/linux/vmstat.h
+++ b/include/linux/vmstat.h
@@ -41,6 +41,16 @@ enum vm_event_item { PGPGIN, PGPGOUT, PSWPIN, PSWPOUT,
41#ifdef CONFIG_HUGETLB_PAGE 41#ifdef CONFIG_HUGETLB_PAGE
42 HTLB_BUDDY_PGALLOC, HTLB_BUDDY_PGALLOC_FAIL, 42 HTLB_BUDDY_PGALLOC, HTLB_BUDDY_PGALLOC_FAIL,
43#endif 43#endif
44#ifdef CONFIG_UNEVICTABLE_LRU
45 UNEVICTABLE_PGCULLED, /* culled to noreclaim list */
46 UNEVICTABLE_PGSCANNED, /* scanned for reclaimability */
47 UNEVICTABLE_PGRESCUED, /* rescued from noreclaim list */
48 UNEVICTABLE_PGMLOCKED,
49 UNEVICTABLE_PGMUNLOCKED,
50 UNEVICTABLE_PGCLEARED, /* on COW, page truncate */
51 UNEVICTABLE_PGSTRANDED, /* unable to isolate on unlock */
52 UNEVICTABLE_MLOCKFREED,
53#endif
44 NR_VM_EVENT_ITEMS 54 NR_VM_EVENT_ITEMS
45}; 55};
46 56
@@ -159,6 +169,16 @@ static inline unsigned long zone_page_state(struct zone *zone,
159 return x; 169 return x;
160} 170}
161 171
172extern unsigned long global_lru_pages(void);
173
174static inline unsigned long zone_lru_pages(struct zone *zone)
175{
176 return (zone_page_state(zone, NR_ACTIVE_ANON)
177 + zone_page_state(zone, NR_ACTIVE_FILE)
178 + zone_page_state(zone, NR_INACTIVE_ANON)
179 + zone_page_state(zone, NR_INACTIVE_FILE));
180}
181
162#ifdef CONFIG_NUMA 182#ifdef CONFIG_NUMA
163/* 183/*
164 * Determine the per node value of a stat item. This function 184 * Determine the per node value of a stat item. This function
diff --git a/include/net/netns/x_tables.h b/include/net/netns/x_tables.h
index 0cb63ed2c1fc..b8093971ccb4 100644
--- a/include/net/netns/x_tables.h
+++ b/include/net/netns/x_tables.h
@@ -2,9 +2,9 @@
2#define __NETNS_X_TABLES_H 2#define __NETNS_X_TABLES_H
3 3
4#include <linux/list.h> 4#include <linux/list.h>
5#include <linux/net.h> 5#include <linux/netfilter.h>
6 6
7struct netns_xt { 7struct netns_xt {
8 struct list_head tables[NPROTO]; 8 struct list_head tables[NFPROTO_NUMPROTO];
9}; 9};
10#endif 10#endif
diff --git a/init/Kconfig b/init/Kconfig
index 5ceff3249a2d..8828ed0b2051 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -299,6 +299,13 @@ config CGROUP_NS
299 for instance virtual servers and checkpoint/restart 299 for instance virtual servers and checkpoint/restart
300 jobs. 300 jobs.
301 301
302config CGROUP_FREEZER
303 bool "control group freezer subsystem"
304 depends on CGROUPS
305 help
306 Provides a way to freeze and unfreeze all tasks in a
307 cgroup.
308
302config CGROUP_DEVICE 309config CGROUP_DEVICE
303 bool "Device controller for cgroups" 310 bool "Device controller for cgroups"
304 depends on CGROUPS && EXPERIMENTAL 311 depends on CGROUPS && EXPERIMENTAL
diff --git a/init/main.c b/init/main.c
index 27f6bf6108e9..4371d11721f6 100644
--- a/init/main.c
+++ b/init/main.c
@@ -27,6 +27,7 @@
27#include <linux/gfp.h> 27#include <linux/gfp.h>
28#include <linux/percpu.h> 28#include <linux/percpu.h>
29#include <linux/kmod.h> 29#include <linux/kmod.h>
30#include <linux/vmalloc.h>
30#include <linux/kernel_stat.h> 31#include <linux/kernel_stat.h>
31#include <linux/start_kernel.h> 32#include <linux/start_kernel.h>
32#include <linux/security.h> 33#include <linux/security.h>
@@ -642,6 +643,7 @@ asmlinkage void __init start_kernel(void)
642 initrd_start = 0; 643 initrd_start = 0;
643 } 644 }
644#endif 645#endif
646 vmalloc_init();
645 vfs_caches_init_early(); 647 vfs_caches_init_early();
646 cpuset_init_early(); 648 cpuset_init_early();
647 mem_init(); 649 mem_init();
diff --git a/ipc/mqueue.c b/ipc/mqueue.c
index 96fb36cd9874..68eb857cfdea 100644
--- a/ipc/mqueue.c
+++ b/ipc/mqueue.c
@@ -52,6 +52,14 @@
52#define HARD_MSGMAX (131072/sizeof(void*)) 52#define HARD_MSGMAX (131072/sizeof(void*))
53#define DFLT_MSGSIZEMAX 8192 /* max message size */ 53#define DFLT_MSGSIZEMAX 8192 /* max message size */
54 54
55/*
56 * Define the ranges various user-specified maximum values can
57 * be set to.
58 */
59#define MIN_MSGMAX 1 /* min value for msg_max */
60#define MAX_MSGMAX HARD_MSGMAX /* max value for msg_max */
61#define MIN_MSGSIZEMAX 128 /* min value for msgsize_max */
62#define MAX_MSGSIZEMAX (8192*128) /* max value for msgsize_max */
55 63
56struct ext_wait_queue { /* queue of sleeping tasks */ 64struct ext_wait_queue { /* queue of sleeping tasks */
57 struct task_struct *task; 65 struct task_struct *task;
@@ -134,8 +142,8 @@ static struct inode *mqueue_get_inode(struct super_block *sb, int mode,
134 info->qsize = 0; 142 info->qsize = 0;
135 info->user = NULL; /* set when all is ok */ 143 info->user = NULL; /* set when all is ok */
136 memset(&info->attr, 0, sizeof(info->attr)); 144 memset(&info->attr, 0, sizeof(info->attr));
137 info->attr.mq_maxmsg = DFLT_MSGMAX; 145 info->attr.mq_maxmsg = msg_max;
138 info->attr.mq_msgsize = DFLT_MSGSIZEMAX; 146 info->attr.mq_msgsize = msgsize_max;
139 if (attr) { 147 if (attr) {
140 info->attr.mq_maxmsg = attr->mq_maxmsg; 148 info->attr.mq_maxmsg = attr->mq_maxmsg;
141 info->attr.mq_msgsize = attr->mq_msgsize; 149 info->attr.mq_msgsize = attr->mq_msgsize;
@@ -1191,11 +1199,11 @@ static struct file_system_type mqueue_fs_type = {
1191 .kill_sb = kill_litter_super, 1199 .kill_sb = kill_litter_super,
1192}; 1200};
1193 1201
1194static int msg_max_limit_min = DFLT_MSGMAX; 1202static int msg_max_limit_min = MIN_MSGMAX;
1195static int msg_max_limit_max = HARD_MSGMAX; 1203static int msg_max_limit_max = MAX_MSGMAX;
1196 1204
1197static int msg_maxsize_limit_min = DFLT_MSGSIZEMAX; 1205static int msg_maxsize_limit_min = MIN_MSGSIZEMAX;
1198static int msg_maxsize_limit_max = INT_MAX; 1206static int msg_maxsize_limit_max = MAX_MSGSIZEMAX;
1199 1207
1200static ctl_table mq_sysctls[] = { 1208static ctl_table mq_sysctls[] = {
1201 { 1209 {
diff --git a/ipc/shm.c b/ipc/shm.c
index e77ec698cf40..0add3fa5f547 100644
--- a/ipc/shm.c
+++ b/ipc/shm.c
@@ -737,6 +737,10 @@ asmlinkage long sys_shmctl(int shmid, int cmd, struct shmid_ds __user *buf)
737 case SHM_LOCK: 737 case SHM_LOCK:
738 case SHM_UNLOCK: 738 case SHM_UNLOCK:
739 { 739 {
740 struct file *uninitialized_var(shm_file);
741
742 lru_add_drain_all(); /* drain pagevecs to lru lists */
743
740 shp = shm_lock_check(ns, shmid); 744 shp = shm_lock_check(ns, shmid);
741 if (IS_ERR(shp)) { 745 if (IS_ERR(shp)) {
742 err = PTR_ERR(shp); 746 err = PTR_ERR(shp);
diff --git a/kernel/Kconfig.freezer b/kernel/Kconfig.freezer
new file mode 100644
index 000000000000..a3bb4cb52539
--- /dev/null
+++ b/kernel/Kconfig.freezer
@@ -0,0 +1,2 @@
1config FREEZER
2 def_bool PM_SLEEP || CGROUP_FREEZER
diff --git a/kernel/Makefile b/kernel/Makefile
index 4e1d7df7c3e2..066550aa61c5 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -24,6 +24,7 @@ CFLAGS_REMOVE_sched_clock.o = -pg
24CFLAGS_REMOVE_sched.o = -mno-spe -pg 24CFLAGS_REMOVE_sched.o = -mno-spe -pg
25endif 25endif
26 26
27obj-$(CONFIG_FREEZER) += freezer.o
27obj-$(CONFIG_PROFILING) += profile.o 28obj-$(CONFIG_PROFILING) += profile.o
28obj-$(CONFIG_SYSCTL_SYSCALL_CHECK) += sysctl_check.o 29obj-$(CONFIG_SYSCTL_SYSCALL_CHECK) += sysctl_check.o
29obj-$(CONFIG_STACKTRACE) += stacktrace.o 30obj-$(CONFIG_STACKTRACE) += stacktrace.o
@@ -55,6 +56,7 @@ obj-$(CONFIG_BACKTRACE_SELF_TEST) += backtracetest.o
55obj-$(CONFIG_COMPAT) += compat.o 56obj-$(CONFIG_COMPAT) += compat.o
56obj-$(CONFIG_CGROUPS) += cgroup.o 57obj-$(CONFIG_CGROUPS) += cgroup.o
57obj-$(CONFIG_CGROUP_DEBUG) += cgroup_debug.o 58obj-$(CONFIG_CGROUP_DEBUG) += cgroup_debug.o
59obj-$(CONFIG_CGROUP_FREEZER) += cgroup_freezer.o
58obj-$(CONFIG_CPUSETS) += cpuset.o 60obj-$(CONFIG_CPUSETS) += cpuset.o
59obj-$(CONFIG_CGROUP_NS) += ns_cgroup.o 61obj-$(CONFIG_CGROUP_NS) += ns_cgroup.o
60obj-$(CONFIG_UTS_NS) += utsname.o 62obj-$(CONFIG_UTS_NS) += utsname.o
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index 8c6e1c17e6d3..046c1609606b 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -241,7 +241,6 @@ static void unlink_css_set(struct css_set *cg)
241 struct cg_cgroup_link *link; 241 struct cg_cgroup_link *link;
242 struct cg_cgroup_link *saved_link; 242 struct cg_cgroup_link *saved_link;
243 243
244 write_lock(&css_set_lock);
245 hlist_del(&cg->hlist); 244 hlist_del(&cg->hlist);
246 css_set_count--; 245 css_set_count--;
247 246
@@ -251,16 +250,25 @@ static void unlink_css_set(struct css_set *cg)
251 list_del(&link->cgrp_link_list); 250 list_del(&link->cgrp_link_list);
252 kfree(link); 251 kfree(link);
253 } 252 }
254
255 write_unlock(&css_set_lock);
256} 253}
257 254
258static void __release_css_set(struct kref *k, int taskexit) 255static void __put_css_set(struct css_set *cg, int taskexit)
259{ 256{
260 int i; 257 int i;
261 struct css_set *cg = container_of(k, struct css_set, ref); 258 /*
262 259 * Ensure that the refcount doesn't hit zero while any readers
260 * can see it. Similar to atomic_dec_and_lock(), but for an
261 * rwlock
262 */
263 if (atomic_add_unless(&cg->refcount, -1, 1))
264 return;
265 write_lock(&css_set_lock);
266 if (!atomic_dec_and_test(&cg->refcount)) {
267 write_unlock(&css_set_lock);
268 return;
269 }
263 unlink_css_set(cg); 270 unlink_css_set(cg);
271 write_unlock(&css_set_lock);
264 272
265 rcu_read_lock(); 273 rcu_read_lock();
266 for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) { 274 for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
@@ -276,32 +284,22 @@ static void __release_css_set(struct kref *k, int taskexit)
276 kfree(cg); 284 kfree(cg);
277} 285}
278 286
279static void release_css_set(struct kref *k)
280{
281 __release_css_set(k, 0);
282}
283
284static void release_css_set_taskexit(struct kref *k)
285{
286 __release_css_set(k, 1);
287}
288
289/* 287/*
290 * refcounted get/put for css_set objects 288 * refcounted get/put for css_set objects
291 */ 289 */
292static inline void get_css_set(struct css_set *cg) 290static inline void get_css_set(struct css_set *cg)
293{ 291{
294 kref_get(&cg->ref); 292 atomic_inc(&cg->refcount);
295} 293}
296 294
297static inline void put_css_set(struct css_set *cg) 295static inline void put_css_set(struct css_set *cg)
298{ 296{
299 kref_put(&cg->ref, release_css_set); 297 __put_css_set(cg, 0);
300} 298}
301 299
302static inline void put_css_set_taskexit(struct css_set *cg) 300static inline void put_css_set_taskexit(struct css_set *cg)
303{ 301{
304 kref_put(&cg->ref, release_css_set_taskexit); 302 __put_css_set(cg, 1);
305} 303}
306 304
307/* 305/*
@@ -427,7 +425,7 @@ static struct css_set *find_css_set(
427 return NULL; 425 return NULL;
428 } 426 }
429 427
430 kref_init(&res->ref); 428 atomic_set(&res->refcount, 1);
431 INIT_LIST_HEAD(&res->cg_links); 429 INIT_LIST_HEAD(&res->cg_links);
432 INIT_LIST_HEAD(&res->tasks); 430 INIT_LIST_HEAD(&res->tasks);
433 INIT_HLIST_NODE(&res->hlist); 431 INIT_HLIST_NODE(&res->hlist);
@@ -870,6 +868,14 @@ static struct super_operations cgroup_ops = {
870 .remount_fs = cgroup_remount, 868 .remount_fs = cgroup_remount,
871}; 869};
872 870
871static void init_cgroup_housekeeping(struct cgroup *cgrp)
872{
873 INIT_LIST_HEAD(&cgrp->sibling);
874 INIT_LIST_HEAD(&cgrp->children);
875 INIT_LIST_HEAD(&cgrp->css_sets);
876 INIT_LIST_HEAD(&cgrp->release_list);
877 init_rwsem(&cgrp->pids_mutex);
878}
873static void init_cgroup_root(struct cgroupfs_root *root) 879static void init_cgroup_root(struct cgroupfs_root *root)
874{ 880{
875 struct cgroup *cgrp = &root->top_cgroup; 881 struct cgroup *cgrp = &root->top_cgroup;
@@ -878,10 +884,7 @@ static void init_cgroup_root(struct cgroupfs_root *root)
878 root->number_of_cgroups = 1; 884 root->number_of_cgroups = 1;
879 cgrp->root = root; 885 cgrp->root = root;
880 cgrp->top_cgroup = cgrp; 886 cgrp->top_cgroup = cgrp;
881 INIT_LIST_HEAD(&cgrp->sibling); 887 init_cgroup_housekeeping(cgrp);
882 INIT_LIST_HEAD(&cgrp->children);
883 INIT_LIST_HEAD(&cgrp->css_sets);
884 INIT_LIST_HEAD(&cgrp->release_list);
885} 888}
886 889
887static int cgroup_test_super(struct super_block *sb, void *data) 890static int cgroup_test_super(struct super_block *sb, void *data)
@@ -1728,7 +1731,7 @@ int cgroup_task_count(const struct cgroup *cgrp)
1728 1731
1729 read_lock(&css_set_lock); 1732 read_lock(&css_set_lock);
1730 list_for_each_entry(link, &cgrp->css_sets, cgrp_link_list) { 1733 list_for_each_entry(link, &cgrp->css_sets, cgrp_link_list) {
1731 count += atomic_read(&link->cg->ref.refcount); 1734 count += atomic_read(&link->cg->refcount);
1732 } 1735 }
1733 read_unlock(&css_set_lock); 1736 read_unlock(&css_set_lock);
1734 return count; 1737 return count;
@@ -1997,16 +2000,7 @@ int cgroup_scan_tasks(struct cgroup_scanner *scan)
1997 * but we cannot guarantee that the information we produce is correct 2000 * but we cannot guarantee that the information we produce is correct
1998 * unless we produce it entirely atomically. 2001 * unless we produce it entirely atomically.
1999 * 2002 *
2000 * Upon tasks file open(), a struct ctr_struct is allocated, that
2001 * will have a pointer to an array (also allocated here). The struct
2002 * ctr_struct * is stored in file->private_data. Its resources will
2003 * be freed by release() when the file is closed. The array is used
2004 * to sprintf the PIDs and then used by read().
2005 */ 2003 */
2006struct ctr_struct {
2007 char *buf;
2008 int bufsz;
2009};
2010 2004
2011/* 2005/*
2012 * Load into 'pidarray' up to 'npids' of the tasks using cgroup 2006 * Load into 'pidarray' up to 'npids' of the tasks using cgroup
@@ -2088,42 +2082,132 @@ static int cmppid(const void *a, const void *b)
2088 return *(pid_t *)a - *(pid_t *)b; 2082 return *(pid_t *)a - *(pid_t *)b;
2089} 2083}
2090 2084
2085
2091/* 2086/*
2092 * Convert array 'a' of 'npids' pid_t's to a string of newline separated 2087 * seq_file methods for the "tasks" file. The seq_file position is the
2093 * decimal pids in 'buf'. Don't write more than 'sz' chars, but return 2088 * next pid to display; the seq_file iterator is a pointer to the pid
2094 * count 'cnt' of how many chars would be written if buf were large enough. 2089 * in the cgroup->tasks_pids array.
2095 */ 2090 */
2096static int pid_array_to_buf(char *buf, int sz, pid_t *a, int npids) 2091
2092static void *cgroup_tasks_start(struct seq_file *s, loff_t *pos)
2097{ 2093{
2098 int cnt = 0; 2094 /*
2099 int i; 2095 * Initially we receive a position value that corresponds to
2096 * one more than the last pid shown (or 0 on the first call or
2097 * after a seek to the start). Use a binary-search to find the
2098 * next pid to display, if any
2099 */
2100 struct cgroup *cgrp = s->private;
2101 int index = 0, pid = *pos;
2102 int *iter;
2100 2103
2101 for (i = 0; i < npids; i++) 2104 down_read(&cgrp->pids_mutex);
2102 cnt += snprintf(buf + cnt, max(sz - cnt, 0), "%d\n", a[i]); 2105 if (pid) {
2103 return cnt; 2106 int end = cgrp->pids_length;
2107 int i;
2108 while (index < end) {
2109 int mid = (index + end) / 2;
2110 if (cgrp->tasks_pids[mid] == pid) {
2111 index = mid;
2112 break;
2113 } else if (cgrp->tasks_pids[mid] <= pid)
2114 index = mid + 1;
2115 else
2116 end = mid;
2117 }
2118 }
2119 /* If we're off the end of the array, we're done */
2120 if (index >= cgrp->pids_length)
2121 return NULL;
2122 /* Update the abstract position to be the actual pid that we found */
2123 iter = cgrp->tasks_pids + index;
2124 *pos = *iter;
2125 return iter;
2126}
2127
2128static void cgroup_tasks_stop(struct seq_file *s, void *v)
2129{
2130 struct cgroup *cgrp = s->private;
2131 up_read(&cgrp->pids_mutex);
2104} 2132}
2105 2133
2134static void *cgroup_tasks_next(struct seq_file *s, void *v, loff_t *pos)
2135{
2136 struct cgroup *cgrp = s->private;
2137 int *p = v;
2138 int *end = cgrp->tasks_pids + cgrp->pids_length;
2139
2140 /*
2141 * Advance to the next pid in the array. If this goes off the
2142 * end, we're done
2143 */
2144 p++;
2145 if (p >= end) {
2146 return NULL;
2147 } else {
2148 *pos = *p;
2149 return p;
2150 }
2151}
2152
2153static int cgroup_tasks_show(struct seq_file *s, void *v)
2154{
2155 return seq_printf(s, "%d\n", *(int *)v);
2156}
2157
2158static struct seq_operations cgroup_tasks_seq_operations = {
2159 .start = cgroup_tasks_start,
2160 .stop = cgroup_tasks_stop,
2161 .next = cgroup_tasks_next,
2162 .show = cgroup_tasks_show,
2163};
2164
2165static void release_cgroup_pid_array(struct cgroup *cgrp)
2166{
2167 down_write(&cgrp->pids_mutex);
2168 BUG_ON(!cgrp->pids_use_count);
2169 if (!--cgrp->pids_use_count) {
2170 kfree(cgrp->tasks_pids);
2171 cgrp->tasks_pids = NULL;
2172 cgrp->pids_length = 0;
2173 }
2174 up_write(&cgrp->pids_mutex);
2175}
2176
2177static int cgroup_tasks_release(struct inode *inode, struct file *file)
2178{
2179 struct cgroup *cgrp = __d_cgrp(file->f_dentry->d_parent);
2180
2181 if (!(file->f_mode & FMODE_READ))
2182 return 0;
2183
2184 release_cgroup_pid_array(cgrp);
2185 return seq_release(inode, file);
2186}
2187
2188static struct file_operations cgroup_tasks_operations = {
2189 .read = seq_read,
2190 .llseek = seq_lseek,
2191 .write = cgroup_file_write,
2192 .release = cgroup_tasks_release,
2193};
2194
2106/* 2195/*
2107 * Handle an open on 'tasks' file. Prepare a buffer listing the 2196 * Handle an open on 'tasks' file. Prepare an array containing the
2108 * process id's of tasks currently attached to the cgroup being opened. 2197 * process id's of tasks currently attached to the cgroup being opened.
2109 *
2110 * Does not require any specific cgroup mutexes, and does not take any.
2111 */ 2198 */
2199
2112static int cgroup_tasks_open(struct inode *unused, struct file *file) 2200static int cgroup_tasks_open(struct inode *unused, struct file *file)
2113{ 2201{
2114 struct cgroup *cgrp = __d_cgrp(file->f_dentry->d_parent); 2202 struct cgroup *cgrp = __d_cgrp(file->f_dentry->d_parent);
2115 struct ctr_struct *ctr;
2116 pid_t *pidarray; 2203 pid_t *pidarray;
2117 int npids; 2204 int npids;
2118 char c; 2205 int retval;
2119 2206
2207 /* Nothing to do for write-only files */
2120 if (!(file->f_mode & FMODE_READ)) 2208 if (!(file->f_mode & FMODE_READ))
2121 return 0; 2209 return 0;
2122 2210
2123 ctr = kmalloc(sizeof(*ctr), GFP_KERNEL);
2124 if (!ctr)
2125 goto err0;
2126
2127 /* 2211 /*
2128 * If cgroup gets more users after we read count, we won't have 2212 * If cgroup gets more users after we read count, we won't have
2129 * enough space - tough. This race is indistinguishable to the 2213 * enough space - tough. This race is indistinguishable to the
@@ -2131,57 +2215,31 @@ static int cgroup_tasks_open(struct inode *unused, struct file *file)
2131 * show up until sometime later on. 2215 * show up until sometime later on.
2132 */ 2216 */
2133 npids = cgroup_task_count(cgrp); 2217 npids = cgroup_task_count(cgrp);
2134 if (npids) { 2218 pidarray = kmalloc(npids * sizeof(pid_t), GFP_KERNEL);
2135 pidarray = kmalloc(npids * sizeof(pid_t), GFP_KERNEL); 2219 if (!pidarray)
2136 if (!pidarray) 2220 return -ENOMEM;
2137 goto err1; 2221 npids = pid_array_load(pidarray, npids, cgrp);
2138 2222 sort(pidarray, npids, sizeof(pid_t), cmppid, NULL);
2139 npids = pid_array_load(pidarray, npids, cgrp);
2140 sort(pidarray, npids, sizeof(pid_t), cmppid, NULL);
2141
2142 /* Call pid_array_to_buf() twice, first just to get bufsz */
2143 ctr->bufsz = pid_array_to_buf(&c, sizeof(c), pidarray, npids) + 1;
2144 ctr->buf = kmalloc(ctr->bufsz, GFP_KERNEL);
2145 if (!ctr->buf)
2146 goto err2;
2147 ctr->bufsz = pid_array_to_buf(ctr->buf, ctr->bufsz, pidarray, npids);
2148
2149 kfree(pidarray);
2150 } else {
2151 ctr->buf = NULL;
2152 ctr->bufsz = 0;
2153 }
2154 file->private_data = ctr;
2155 return 0;
2156
2157err2:
2158 kfree(pidarray);
2159err1:
2160 kfree(ctr);
2161err0:
2162 return -ENOMEM;
2163}
2164 2223
2165static ssize_t cgroup_tasks_read(struct cgroup *cgrp, 2224 /*
2166 struct cftype *cft, 2225 * Store the array in the cgroup, freeing the old
2167 struct file *file, char __user *buf, 2226 * array if necessary
2168 size_t nbytes, loff_t *ppos) 2227 */
2169{ 2228 down_write(&cgrp->pids_mutex);
2170 struct ctr_struct *ctr = file->private_data; 2229 kfree(cgrp->tasks_pids);
2230 cgrp->tasks_pids = pidarray;
2231 cgrp->pids_length = npids;
2232 cgrp->pids_use_count++;
2233 up_write(&cgrp->pids_mutex);
2171 2234
2172 return simple_read_from_buffer(buf, nbytes, ppos, ctr->buf, ctr->bufsz); 2235 file->f_op = &cgroup_tasks_operations;
2173}
2174 2236
2175static int cgroup_tasks_release(struct inode *unused_inode, 2237 retval = seq_open(file, &cgroup_tasks_seq_operations);
2176 struct file *file) 2238 if (retval) {
2177{ 2239 release_cgroup_pid_array(cgrp);
2178 struct ctr_struct *ctr; 2240 return retval;
2179
2180 if (file->f_mode & FMODE_READ) {
2181 ctr = file->private_data;
2182 kfree(ctr->buf);
2183 kfree(ctr);
2184 } 2241 }
2242 ((struct seq_file *)file->private_data)->private = cgrp;
2185 return 0; 2243 return 0;
2186} 2244}
2187 2245
@@ -2210,7 +2268,6 @@ static struct cftype files[] = {
2210 { 2268 {
2211 .name = "tasks", 2269 .name = "tasks",
2212 .open = cgroup_tasks_open, 2270 .open = cgroup_tasks_open,
2213 .read = cgroup_tasks_read,
2214 .write_u64 = cgroup_tasks_write, 2271 .write_u64 = cgroup_tasks_write,
2215 .release = cgroup_tasks_release, 2272 .release = cgroup_tasks_release,
2216 .private = FILE_TASKLIST, 2273 .private = FILE_TASKLIST,
@@ -2300,10 +2357,7 @@ static long cgroup_create(struct cgroup *parent, struct dentry *dentry,
2300 2357
2301 mutex_lock(&cgroup_mutex); 2358 mutex_lock(&cgroup_mutex);
2302 2359
2303 INIT_LIST_HEAD(&cgrp->sibling); 2360 init_cgroup_housekeeping(cgrp);
2304 INIT_LIST_HEAD(&cgrp->children);
2305 INIT_LIST_HEAD(&cgrp->css_sets);
2306 INIT_LIST_HEAD(&cgrp->release_list);
2307 2361
2308 cgrp->parent = parent; 2362 cgrp->parent = parent;
2309 cgrp->root = parent->root; 2363 cgrp->root = parent->root;
@@ -2495,8 +2549,7 @@ static void __init cgroup_init_subsys(struct cgroup_subsys *ss)
2495int __init cgroup_init_early(void) 2549int __init cgroup_init_early(void)
2496{ 2550{
2497 int i; 2551 int i;
2498 kref_init(&init_css_set.ref); 2552 atomic_set(&init_css_set.refcount, 1);
2499 kref_get(&init_css_set.ref);
2500 INIT_LIST_HEAD(&init_css_set.cg_links); 2553 INIT_LIST_HEAD(&init_css_set.cg_links);
2501 INIT_LIST_HEAD(&init_css_set.tasks); 2554 INIT_LIST_HEAD(&init_css_set.tasks);
2502 INIT_HLIST_NODE(&init_css_set.hlist); 2555 INIT_HLIST_NODE(&init_css_set.hlist);
diff --git a/kernel/cgroup_debug.c b/kernel/cgroup_debug.c
index c3dc3aba4c02..daca6209202d 100644
--- a/kernel/cgroup_debug.c
+++ b/kernel/cgroup_debug.c
@@ -57,7 +57,7 @@ static u64 current_css_set_refcount_read(struct cgroup *cont,
57 u64 count; 57 u64 count;
58 58
59 rcu_read_lock(); 59 rcu_read_lock();
60 count = atomic_read(&current->cgroups->ref.refcount); 60 count = atomic_read(&current->cgroups->refcount);
61 rcu_read_unlock(); 61 rcu_read_unlock();
62 return count; 62 return count;
63} 63}
@@ -90,7 +90,7 @@ static struct cftype files[] = {
90 { 90 {
91 .name = "releasable", 91 .name = "releasable",
92 .read_u64 = releasable_read, 92 .read_u64 = releasable_read,
93 } 93 },
94}; 94};
95 95
96static int debug_populate(struct cgroup_subsys *ss, struct cgroup *cont) 96static int debug_populate(struct cgroup_subsys *ss, struct cgroup *cont)
diff --git a/kernel/cgroup_freezer.c b/kernel/cgroup_freezer.c
new file mode 100644
index 000000000000..e95056954498
--- /dev/null
+++ b/kernel/cgroup_freezer.c
@@ -0,0 +1,379 @@
1/*
2 * cgroup_freezer.c - control group freezer subsystem
3 *
4 * Copyright IBM Corporation, 2007
5 *
6 * Author : Cedric Le Goater <clg@fr.ibm.com>
7 *
8 * This program is free software; you can redistribute it and/or modify it
9 * under the terms of version 2.1 of the GNU Lesser General Public License
10 * as published by the Free Software Foundation.
11 *
12 * This program is distributed in the hope that it would be useful, but
13 * WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
15 */
16
17#include <linux/module.h>
18#include <linux/cgroup.h>
19#include <linux/fs.h>
20#include <linux/uaccess.h>
21#include <linux/freezer.h>
22#include <linux/seq_file.h>
23
24enum freezer_state {
25 CGROUP_THAWED = 0,
26 CGROUP_FREEZING,
27 CGROUP_FROZEN,
28};
29
30struct freezer {
31 struct cgroup_subsys_state css;
32 enum freezer_state state;
33 spinlock_t lock; /* protects _writes_ to state */
34};
35
36static inline struct freezer *cgroup_freezer(
37 struct cgroup *cgroup)
38{
39 return container_of(
40 cgroup_subsys_state(cgroup, freezer_subsys_id),
41 struct freezer, css);
42}
43
44static inline struct freezer *task_freezer(struct task_struct *task)
45{
46 return container_of(task_subsys_state(task, freezer_subsys_id),
47 struct freezer, css);
48}
49
50int cgroup_frozen(struct task_struct *task)
51{
52 struct freezer *freezer;
53 enum freezer_state state;
54
55 task_lock(task);
56 freezer = task_freezer(task);
57 state = freezer->state;
58 task_unlock(task);
59
60 return state == CGROUP_FROZEN;
61}
62
63/*
64 * cgroups_write_string() limits the size of freezer state strings to
65 * CGROUP_LOCAL_BUFFER_SIZE
66 */
67static const char *freezer_state_strs[] = {
68 "THAWED",
69 "FREEZING",
70 "FROZEN",
71};
72
73/*
74 * State diagram
75 * Transitions are caused by userspace writes to the freezer.state file.
76 * The values in parenthesis are state labels. The rest are edge labels.
77 *
78 * (THAWED) --FROZEN--> (FREEZING) --FROZEN--> (FROZEN)
79 * ^ ^ | |
80 * | \_______THAWED_______/ |
81 * \__________________________THAWED____________/
82 */
83
84struct cgroup_subsys freezer_subsys;
85
86/* Locks taken and their ordering
87 * ------------------------------
88 * css_set_lock
89 * cgroup_mutex (AKA cgroup_lock)
90 * task->alloc_lock (AKA task_lock)
91 * freezer->lock
92 * task->sighand->siglock
93 *
94 * cgroup code forces css_set_lock to be taken before task->alloc_lock
95 *
96 * freezer_create(), freezer_destroy():
97 * cgroup_mutex [ by cgroup core ]
98 *
99 * can_attach():
100 * cgroup_mutex
101 *
102 * cgroup_frozen():
103 * task->alloc_lock (to get task's cgroup)
104 *
105 * freezer_fork() (preserving fork() performance means can't take cgroup_mutex):
106 * task->alloc_lock (to get task's cgroup)
107 * freezer->lock
108 * sighand->siglock (if the cgroup is freezing)
109 *
110 * freezer_read():
111 * cgroup_mutex
112 * freezer->lock
113 * read_lock css_set_lock (cgroup iterator start)
114 *
115 * freezer_write() (freeze):
116 * cgroup_mutex
117 * freezer->lock
118 * read_lock css_set_lock (cgroup iterator start)
119 * sighand->siglock
120 *
121 * freezer_write() (unfreeze):
122 * cgroup_mutex
123 * freezer->lock
124 * read_lock css_set_lock (cgroup iterator start)
125 * task->alloc_lock (to prevent races with freeze_task())
126 * sighand->siglock
127 */
128static struct cgroup_subsys_state *freezer_create(struct cgroup_subsys *ss,
129 struct cgroup *cgroup)
130{
131 struct freezer *freezer;
132
133 freezer = kzalloc(sizeof(struct freezer), GFP_KERNEL);
134 if (!freezer)
135 return ERR_PTR(-ENOMEM);
136
137 spin_lock_init(&freezer->lock);
138 freezer->state = CGROUP_THAWED;
139 return &freezer->css;
140}
141
142static void freezer_destroy(struct cgroup_subsys *ss,
143 struct cgroup *cgroup)
144{
145 kfree(cgroup_freezer(cgroup));
146}
147
148/* Task is frozen or will freeze immediately when next it gets woken */
149static bool is_task_frozen_enough(struct task_struct *task)
150{
151 return frozen(task) ||
152 (task_is_stopped_or_traced(task) && freezing(task));
153}
154
155/*
156 * The call to cgroup_lock() in the freezer.state write method prevents
157 * a write to that file racing against an attach, and hence the
158 * can_attach() result will remain valid until the attach completes.
159 */
160static int freezer_can_attach(struct cgroup_subsys *ss,
161 struct cgroup *new_cgroup,
162 struct task_struct *task)
163{
164 struct freezer *freezer;
165 int retval;
166
167 /* Anything frozen can't move or be moved to/from */
168
169 if (is_task_frozen_enough(task))
170 return -EBUSY;
171
172 freezer = cgroup_freezer(new_cgroup);
173 if (freezer->state == CGROUP_FROZEN)
174 return -EBUSY;
175
176 retval = 0;
177 task_lock(task);
178 freezer = task_freezer(task);
179 if (freezer->state == CGROUP_FROZEN)
180 retval = -EBUSY;
181 task_unlock(task);
182 return retval;
183}
184
185static void freezer_fork(struct cgroup_subsys *ss, struct task_struct *task)
186{
187 struct freezer *freezer;
188
189 task_lock(task);
190 freezer = task_freezer(task);
191 task_unlock(task);
192
193 BUG_ON(freezer->state == CGROUP_FROZEN);
194 spin_lock_irq(&freezer->lock);
195 /* Locking avoids race with FREEZING -> THAWED transitions. */
196 if (freezer->state == CGROUP_FREEZING)
197 freeze_task(task, true);
198 spin_unlock_irq(&freezer->lock);
199}
200
201/*
202 * caller must hold freezer->lock
203 */
204static void update_freezer_state(struct cgroup *cgroup,
205 struct freezer *freezer)
206{
207 struct cgroup_iter it;
208 struct task_struct *task;
209 unsigned int nfrozen = 0, ntotal = 0;
210
211 cgroup_iter_start(cgroup, &it);
212 while ((task = cgroup_iter_next(cgroup, &it))) {
213 ntotal++;
214 if (is_task_frozen_enough(task))
215 nfrozen++;
216 }
217
218 /*
219 * Transition to FROZEN when no new tasks can be added ensures
220 * that we never exist in the FROZEN state while there are unfrozen
221 * tasks.
222 */
223 if (nfrozen == ntotal)
224 freezer->state = CGROUP_FROZEN;
225 else if (nfrozen > 0)
226 freezer->state = CGROUP_FREEZING;
227 else
228 freezer->state = CGROUP_THAWED;
229 cgroup_iter_end(cgroup, &it);
230}
231
232static int freezer_read(struct cgroup *cgroup, struct cftype *cft,
233 struct seq_file *m)
234{
235 struct freezer *freezer;
236 enum freezer_state state;
237
238 if (!cgroup_lock_live_group(cgroup))
239 return -ENODEV;
240
241 freezer = cgroup_freezer(cgroup);
242 spin_lock_irq(&freezer->lock);
243 state = freezer->state;
244 if (state == CGROUP_FREEZING) {
245 /* We change from FREEZING to FROZEN lazily if the cgroup was
246 * only partially frozen when we exitted write. */
247 update_freezer_state(cgroup, freezer);
248 state = freezer->state;
249 }
250 spin_unlock_irq(&freezer->lock);
251 cgroup_unlock();
252
253 seq_puts(m, freezer_state_strs[state]);
254 seq_putc(m, '\n');
255 return 0;
256}
257
258static int try_to_freeze_cgroup(struct cgroup *cgroup, struct freezer *freezer)
259{
260 struct cgroup_iter it;
261 struct task_struct *task;
262 unsigned int num_cant_freeze_now = 0;
263
264 freezer->state = CGROUP_FREEZING;
265 cgroup_iter_start(cgroup, &it);
266 while ((task = cgroup_iter_next(cgroup, &it))) {
267 if (!freeze_task(task, true))
268 continue;
269 if (is_task_frozen_enough(task))
270 continue;
271 if (!freezing(task) && !freezer_should_skip(task))
272 num_cant_freeze_now++;
273 }
274 cgroup_iter_end(cgroup, &it);
275
276 return num_cant_freeze_now ? -EBUSY : 0;
277}
278
279static int unfreeze_cgroup(struct cgroup *cgroup, struct freezer *freezer)
280{
281 struct cgroup_iter it;
282 struct task_struct *task;
283
284 cgroup_iter_start(cgroup, &it);
285 while ((task = cgroup_iter_next(cgroup, &it))) {
286 int do_wake;
287
288 task_lock(task);
289 do_wake = __thaw_process(task);
290 task_unlock(task);
291 if (do_wake)
292 wake_up_process(task);
293 }
294 cgroup_iter_end(cgroup, &it);
295 freezer->state = CGROUP_THAWED;
296
297 return 0;
298}
299
300static int freezer_change_state(struct cgroup *cgroup,
301 enum freezer_state goal_state)
302{
303 struct freezer *freezer;
304 int retval = 0;
305
306 freezer = cgroup_freezer(cgroup);
307 spin_lock_irq(&freezer->lock);
308 update_freezer_state(cgroup, freezer);
309 if (goal_state == freezer->state)
310 goto out;
311 switch (freezer->state) {
312 case CGROUP_THAWED:
313 retval = try_to_freeze_cgroup(cgroup, freezer);
314 break;
315 case CGROUP_FREEZING:
316 if (goal_state == CGROUP_FROZEN) {
317 /* Userspace is retrying after
318 * "/bin/echo FROZEN > freezer.state" returned -EBUSY */
319 retval = try_to_freeze_cgroup(cgroup, freezer);
320 break;
321 }
322 /* state == FREEZING and goal_state == THAWED, so unfreeze */
323 case CGROUP_FROZEN:
324 retval = unfreeze_cgroup(cgroup, freezer);
325 break;
326 default:
327 break;
328 }
329out:
330 spin_unlock_irq(&freezer->lock);
331
332 return retval;
333}
334
335static int freezer_write(struct cgroup *cgroup,
336 struct cftype *cft,
337 const char *buffer)
338{
339 int retval;
340 enum freezer_state goal_state;
341
342 if (strcmp(buffer, freezer_state_strs[CGROUP_THAWED]) == 0)
343 goal_state = CGROUP_THAWED;
344 else if (strcmp(buffer, freezer_state_strs[CGROUP_FROZEN]) == 0)
345 goal_state = CGROUP_FROZEN;
346 else
347 return -EIO;
348
349 if (!cgroup_lock_live_group(cgroup))
350 return -ENODEV;
351 retval = freezer_change_state(cgroup, goal_state);
352 cgroup_unlock();
353 return retval;
354}
355
356static struct cftype files[] = {
357 {
358 .name = "state",
359 .read_seq_string = freezer_read,
360 .write_string = freezer_write,
361 },
362};
363
364static int freezer_populate(struct cgroup_subsys *ss, struct cgroup *cgroup)
365{
366 return cgroup_add_files(cgroup, ss, files, ARRAY_SIZE(files));
367}
368
369struct cgroup_subsys freezer_subsys = {
370 .name = "freezer",
371 .create = freezer_create,
372 .destroy = freezer_destroy,
373 .populate = freezer_populate,
374 .subsys_id = freezer_subsys_id,
375 .can_attach = freezer_can_attach,
376 .attach = NULL,
377 .fork = freezer_fork,
378 .exit = NULL,
379};
diff --git a/kernel/configs.c b/kernel/configs.c
index 4c345210ed8c..abaee684ecbf 100644
--- a/kernel/configs.c
+++ b/kernel/configs.c
@@ -54,9 +54,6 @@
54 54
55#ifdef CONFIG_IKCONFIG_PROC 55#ifdef CONFIG_IKCONFIG_PROC
56 56
57/**************************************************/
58/* globals and useful constants */
59
60static ssize_t 57static ssize_t
61ikconfig_read_current(struct file *file, char __user *buf, 58ikconfig_read_current(struct file *file, char __user *buf,
62 size_t len, loff_t * offset) 59 size_t len, loff_t * offset)
@@ -71,9 +68,6 @@ static const struct file_operations ikconfig_file_ops = {
71 .read = ikconfig_read_current, 68 .read = ikconfig_read_current,
72}; 69};
73 70
74/***************************************************/
75/* ikconfig_init: start up everything we need to */
76
77static int __init ikconfig_init(void) 71static int __init ikconfig_init(void)
78{ 72{
79 struct proc_dir_entry *entry; 73 struct proc_dir_entry *entry;
@@ -89,9 +83,6 @@ static int __init ikconfig_init(void)
89 return 0; 83 return 0;
90} 84}
91 85
92/***************************************************/
93/* ikconfig_cleanup: clean up our mess */
94
95static void __exit ikconfig_cleanup(void) 86static void __exit ikconfig_cleanup(void)
96{ 87{
97 remove_proc_entry("config.gz", NULL); 88 remove_proc_entry("config.gz", NULL);
diff --git a/kernel/cpuset.c b/kernel/cpuset.c
index eab7bd6628e0..3e00526f52ec 100644
--- a/kernel/cpuset.c
+++ b/kernel/cpuset.c
@@ -1172,7 +1172,7 @@ static int update_flag(cpuset_flagbits_t bit, struct cpuset *cs,
1172{ 1172{
1173 struct cpuset trialcs; 1173 struct cpuset trialcs;
1174 int err; 1174 int err;
1175 int cpus_nonempty, balance_flag_changed; 1175 int balance_flag_changed;
1176 1176
1177 trialcs = *cs; 1177 trialcs = *cs;
1178 if (turning_on) 1178 if (turning_on)
@@ -1184,7 +1184,6 @@ static int update_flag(cpuset_flagbits_t bit, struct cpuset *cs,
1184 if (err < 0) 1184 if (err < 0)
1185 return err; 1185 return err;
1186 1186
1187 cpus_nonempty = !cpus_empty(trialcs.cpus_allowed);
1188 balance_flag_changed = (is_sched_load_balance(cs) != 1187 balance_flag_changed = (is_sched_load_balance(cs) !=
1189 is_sched_load_balance(&trialcs)); 1188 is_sched_load_balance(&trialcs));
1190 1189
@@ -1192,7 +1191,7 @@ static int update_flag(cpuset_flagbits_t bit, struct cpuset *cs,
1192 cs->flags = trialcs.flags; 1191 cs->flags = trialcs.flags;
1193 mutex_unlock(&callback_mutex); 1192 mutex_unlock(&callback_mutex);
1194 1193
1195 if (cpus_nonempty && balance_flag_changed) 1194 if (!cpus_empty(trialcs.cpus_allowed) && balance_flag_changed)
1196 async_rebuild_sched_domains(); 1195 async_rebuild_sched_domains();
1197 1196
1198 return 0; 1197 return 0;
@@ -2437,19 +2436,15 @@ const struct file_operations proc_cpuset_operations = {
2437void cpuset_task_status_allowed(struct seq_file *m, struct task_struct *task) 2436void cpuset_task_status_allowed(struct seq_file *m, struct task_struct *task)
2438{ 2437{
2439 seq_printf(m, "Cpus_allowed:\t"); 2438 seq_printf(m, "Cpus_allowed:\t");
2440 m->count += cpumask_scnprintf(m->buf + m->count, m->size - m->count, 2439 seq_cpumask(m, &task->cpus_allowed);
2441 task->cpus_allowed);
2442 seq_printf(m, "\n"); 2440 seq_printf(m, "\n");
2443 seq_printf(m, "Cpus_allowed_list:\t"); 2441 seq_printf(m, "Cpus_allowed_list:\t");
2444 m->count += cpulist_scnprintf(m->buf + m->count, m->size - m->count, 2442 seq_cpumask_list(m, &task->cpus_allowed);
2445 task->cpus_allowed);
2446 seq_printf(m, "\n"); 2443 seq_printf(m, "\n");
2447 seq_printf(m, "Mems_allowed:\t"); 2444 seq_printf(m, "Mems_allowed:\t");
2448 m->count += nodemask_scnprintf(m->buf + m->count, m->size - m->count, 2445 seq_nodemask(m, &task->mems_allowed);
2449 task->mems_allowed);
2450 seq_printf(m, "\n"); 2446 seq_printf(m, "\n");
2451 seq_printf(m, "Mems_allowed_list:\t"); 2447 seq_printf(m, "Mems_allowed_list:\t");
2452 m->count += nodelist_scnprintf(m->buf + m->count, m->size - m->count, 2448 seq_nodemask_list(m, &task->mems_allowed);
2453 task->mems_allowed);
2454 seq_printf(m, "\n"); 2449 seq_printf(m, "\n");
2455} 2450}
diff --git a/kernel/freezer.c b/kernel/freezer.c
new file mode 100644
index 000000000000..ba6248b323ef
--- /dev/null
+++ b/kernel/freezer.c
@@ -0,0 +1,154 @@
1/*
2 * kernel/freezer.c - Function to freeze a process
3 *
4 * Originally from kernel/power/process.c
5 */
6
7#include <linux/interrupt.h>
8#include <linux/suspend.h>
9#include <linux/module.h>
10#include <linux/syscalls.h>
11#include <linux/freezer.h>
12
13/*
14 * freezing is complete, mark current process as frozen
15 */
16static inline void frozen_process(void)
17{
18 if (!unlikely(current->flags & PF_NOFREEZE)) {
19 current->flags |= PF_FROZEN;
20 wmb();
21 }
22 clear_freeze_flag(current);
23}
24
25/* Refrigerator is place where frozen processes are stored :-). */
26void refrigerator(void)
27{
28 /* Hmm, should we be allowed to suspend when there are realtime
29 processes around? */
30 long save;
31
32 task_lock(current);
33 if (freezing(current)) {
34 frozen_process();
35 task_unlock(current);
36 } else {
37 task_unlock(current);
38 return;
39 }
40 save = current->state;
41 pr_debug("%s entered refrigerator\n", current->comm);
42
43 spin_lock_irq(&current->sighand->siglock);
44 recalc_sigpending(); /* We sent fake signal, clean it up */
45 spin_unlock_irq(&current->sighand->siglock);
46
47 for (;;) {
48 set_current_state(TASK_UNINTERRUPTIBLE);
49 if (!frozen(current))
50 break;
51 schedule();
52 }
53 pr_debug("%s left refrigerator\n", current->comm);
54 __set_current_state(save);
55}
56EXPORT_SYMBOL(refrigerator);
57
58static void fake_signal_wake_up(struct task_struct *p)
59{
60 unsigned long flags;
61
62 spin_lock_irqsave(&p->sighand->siglock, flags);
63 signal_wake_up(p, 0);
64 spin_unlock_irqrestore(&p->sighand->siglock, flags);
65}
66
67/**
68 * freeze_task - send a freeze request to given task
69 * @p: task to send the request to
70 * @sig_only: if set, the request will only be sent if the task has the
71 * PF_FREEZER_NOSIG flag unset
72 * Return value: 'false', if @sig_only is set and the task has
73 * PF_FREEZER_NOSIG set or the task is frozen, 'true', otherwise
74 *
75 * The freeze request is sent by setting the tasks's TIF_FREEZE flag and
76 * either sending a fake signal to it or waking it up, depending on whether
77 * or not it has PF_FREEZER_NOSIG set. If @sig_only is set and the task
78 * has PF_FREEZER_NOSIG set (ie. it is a typical kernel thread), its
79 * TIF_FREEZE flag will not be set.
80 */
81bool freeze_task(struct task_struct *p, bool sig_only)
82{
83 /*
84 * We first check if the task is freezing and next if it has already
85 * been frozen to avoid the race with frozen_process() which first marks
86 * the task as frozen and next clears its TIF_FREEZE.
87 */
88 if (!freezing(p)) {
89 rmb();
90 if (frozen(p))
91 return false;
92
93 if (!sig_only || should_send_signal(p))
94 set_freeze_flag(p);
95 else
96 return false;
97 }
98
99 if (should_send_signal(p)) {
100 if (!signal_pending(p))
101 fake_signal_wake_up(p);
102 } else if (sig_only) {
103 return false;
104 } else {
105 wake_up_state(p, TASK_INTERRUPTIBLE);
106 }
107
108 return true;
109}
110
111void cancel_freezing(struct task_struct *p)
112{
113 unsigned long flags;
114
115 if (freezing(p)) {
116 pr_debug(" clean up: %s\n", p->comm);
117 clear_freeze_flag(p);
118 spin_lock_irqsave(&p->sighand->siglock, flags);
119 recalc_sigpending_and_wake(p);
120 spin_unlock_irqrestore(&p->sighand->siglock, flags);
121 }
122}
123
124/*
125 * Wake up a frozen process
126 *
127 * task_lock() is needed to prevent the race with refrigerator() which may
128 * occur if the freezing of tasks fails. Namely, without the lock, if the
129 * freezing of tasks failed, thaw_tasks() might have run before a task in
130 * refrigerator() could call frozen_process(), in which case the task would be
131 * frozen and no one would thaw it.
132 */
133int __thaw_process(struct task_struct *p)
134{
135 if (frozen(p)) {
136 p->flags &= ~PF_FROZEN;
137 return 1;
138 }
139 clear_freeze_flag(p);
140 return 0;
141}
142
143int thaw_process(struct task_struct *p)
144{
145 task_lock(p);
146 if (__thaw_process(p) == 1) {
147 task_unlock(p);
148 wake_up_process(p);
149 return 1;
150 }
151 task_unlock(p);
152 return 0;
153}
154EXPORT_SYMBOL(thaw_process);
diff --git a/kernel/kexec.c b/kernel/kexec.c
index aef265325cd3..777ac458ac99 100644
--- a/kernel/kexec.c
+++ b/kernel/kexec.c
@@ -1371,6 +1371,7 @@ static int __init crash_save_vmcoreinfo_init(void)
1371 VMCOREINFO_SYMBOL(node_online_map); 1371 VMCOREINFO_SYMBOL(node_online_map);
1372 VMCOREINFO_SYMBOL(swapper_pg_dir); 1372 VMCOREINFO_SYMBOL(swapper_pg_dir);
1373 VMCOREINFO_SYMBOL(_stext); 1373 VMCOREINFO_SYMBOL(_stext);
1374 VMCOREINFO_SYMBOL(vmlist);
1374 1375
1375#ifndef CONFIG_NEED_MULTIPLE_NODES 1376#ifndef CONFIG_NEED_MULTIPLE_NODES
1376 VMCOREINFO_SYMBOL(mem_map); 1377 VMCOREINFO_SYMBOL(mem_map);
@@ -1406,6 +1407,7 @@ static int __init crash_save_vmcoreinfo_init(void)
1406 VMCOREINFO_OFFSET(free_area, free_list); 1407 VMCOREINFO_OFFSET(free_area, free_list);
1407 VMCOREINFO_OFFSET(list_head, next); 1408 VMCOREINFO_OFFSET(list_head, next);
1408 VMCOREINFO_OFFSET(list_head, prev); 1409 VMCOREINFO_OFFSET(list_head, prev);
1410 VMCOREINFO_OFFSET(vm_struct, addr);
1409 VMCOREINFO_LENGTH(zone.free_area, MAX_ORDER); 1411 VMCOREINFO_LENGTH(zone.free_area, MAX_ORDER);
1410 VMCOREINFO_LENGTH(free_area.free_list, MIGRATE_TYPES); 1412 VMCOREINFO_LENGTH(free_area.free_list, MIGRATE_TYPES);
1411 VMCOREINFO_NUMBER(NR_FREE_PAGES); 1413 VMCOREINFO_NUMBER(NR_FREE_PAGES);
diff --git a/kernel/kthread.c b/kernel/kthread.c
index 96cff2f8710b..14ec64fe175a 100644
--- a/kernel/kthread.c
+++ b/kernel/kthread.c
@@ -171,12 +171,11 @@ EXPORT_SYMBOL(kthread_create);
171 */ 171 */
172void kthread_bind(struct task_struct *k, unsigned int cpu) 172void kthread_bind(struct task_struct *k, unsigned int cpu)
173{ 173{
174 if (k->state != TASK_UNINTERRUPTIBLE) { 174 /* Must have done schedule() in kthread() before we set_task_cpu */
175 if (!wait_task_inactive(k, TASK_UNINTERRUPTIBLE)) {
175 WARN_ON(1); 176 WARN_ON(1);
176 return; 177 return;
177 } 178 }
178 /* Must have done schedule() in kthread() before we set_task_cpu */
179 wait_task_inactive(k, 0);
180 set_task_cpu(k, cpu); 179 set_task_cpu(k, cpu);
181 k->cpus_allowed = cpumask_of_cpu(cpu); 180 k->cpus_allowed = cpumask_of_cpu(cpu);
182 k->rt.nr_cpus_allowed = 1; 181 k->rt.nr_cpus_allowed = 1;
diff --git a/kernel/power/process.c b/kernel/power/process.c
index 278946aecaf0..ca634019497a 100644
--- a/kernel/power/process.c
+++ b/kernel/power/process.c
@@ -28,121 +28,6 @@ static inline int freezeable(struct task_struct * p)
28 return 1; 28 return 1;
29} 29}
30 30
31/*
32 * freezing is complete, mark current process as frozen
33 */
34static inline void frozen_process(void)
35{
36 if (!unlikely(current->flags & PF_NOFREEZE)) {
37 current->flags |= PF_FROZEN;
38 wmb();
39 }
40 clear_freeze_flag(current);
41}
42
43/* Refrigerator is place where frozen processes are stored :-). */
44void refrigerator(void)
45{
46 /* Hmm, should we be allowed to suspend when there are realtime
47 processes around? */
48 long save;
49
50 task_lock(current);
51 if (freezing(current)) {
52 frozen_process();
53 task_unlock(current);
54 } else {
55 task_unlock(current);
56 return;
57 }
58 save = current->state;
59 pr_debug("%s entered refrigerator\n", current->comm);
60
61 spin_lock_irq(&current->sighand->siglock);
62 recalc_sigpending(); /* We sent fake signal, clean it up */
63 spin_unlock_irq(&current->sighand->siglock);
64
65 for (;;) {
66 set_current_state(TASK_UNINTERRUPTIBLE);
67 if (!frozen(current))
68 break;
69 schedule();
70 }
71 pr_debug("%s left refrigerator\n", current->comm);
72 __set_current_state(save);
73}
74
75static void fake_signal_wake_up(struct task_struct *p)
76{
77 unsigned long flags;
78
79 spin_lock_irqsave(&p->sighand->siglock, flags);
80 signal_wake_up(p, 0);
81 spin_unlock_irqrestore(&p->sighand->siglock, flags);
82}
83
84static inline bool should_send_signal(struct task_struct *p)
85{
86 return !(p->flags & PF_FREEZER_NOSIG);
87}
88
89/**
90 * freeze_task - send a freeze request to given task
91 * @p: task to send the request to
92 * @sig_only: if set, the request will only be sent if the task has the
93 * PF_FREEZER_NOSIG flag unset
94 * Return value: 'false', if @sig_only is set and the task has
95 * PF_FREEZER_NOSIG set or the task is frozen, 'true', otherwise
96 *
97 * The freeze request is sent by setting the tasks's TIF_FREEZE flag and
98 * either sending a fake signal to it or waking it up, depending on whether
99 * or not it has PF_FREEZER_NOSIG set. If @sig_only is set and the task
100 * has PF_FREEZER_NOSIG set (ie. it is a typical kernel thread), its
101 * TIF_FREEZE flag will not be set.
102 */
103static bool freeze_task(struct task_struct *p, bool sig_only)
104{
105 /*
106 * We first check if the task is freezing and next if it has already
107 * been frozen to avoid the race with frozen_process() which first marks
108 * the task as frozen and next clears its TIF_FREEZE.
109 */
110 if (!freezing(p)) {
111 rmb();
112 if (frozen(p))
113 return false;
114
115 if (!sig_only || should_send_signal(p))
116 set_freeze_flag(p);
117 else
118 return false;
119 }
120
121 if (should_send_signal(p)) {
122 if (!signal_pending(p))
123 fake_signal_wake_up(p);
124 } else if (sig_only) {
125 return false;
126 } else {
127 wake_up_state(p, TASK_INTERRUPTIBLE);
128 }
129
130 return true;
131}
132
133static void cancel_freezing(struct task_struct *p)
134{
135 unsigned long flags;
136
137 if (freezing(p)) {
138 pr_debug(" clean up: %s\n", p->comm);
139 clear_freeze_flag(p);
140 spin_lock_irqsave(&p->sighand->siglock, flags);
141 recalc_sigpending_and_wake(p);
142 spin_unlock_irqrestore(&p->sighand->siglock, flags);
143 }
144}
145
146static int try_to_freeze_tasks(bool sig_only) 31static int try_to_freeze_tasks(bool sig_only)
147{ 32{
148 struct task_struct *g, *p; 33 struct task_struct *g, *p;
@@ -250,6 +135,9 @@ static void thaw_tasks(bool nosig_only)
250 if (nosig_only && should_send_signal(p)) 135 if (nosig_only && should_send_signal(p))
251 continue; 136 continue;
252 137
138 if (cgroup_frozen(p))
139 continue;
140
253 thaw_process(p); 141 thaw_process(p);
254 } while_each_thread(g, p); 142 } while_each_thread(g, p);
255 read_unlock(&tasklist_lock); 143 read_unlock(&tasklist_lock);
@@ -264,4 +152,3 @@ void thaw_processes(void)
264 printk("done.\n"); 152 printk("done.\n");
265} 153}
266 154
267EXPORT_SYMBOL(refrigerator);
diff --git a/kernel/ptrace.c b/kernel/ptrace.c
index 356699a96d56..1e68e4c39e2c 100644
--- a/kernel/ptrace.c
+++ b/kernel/ptrace.c
@@ -45,7 +45,7 @@ void __ptrace_link(struct task_struct *child, struct task_struct *new_parent)
45 * TASK_TRACED, resume it now. 45 * TASK_TRACED, resume it now.
46 * Requires that irqs be disabled. 46 * Requires that irqs be disabled.
47 */ 47 */
48void ptrace_untrace(struct task_struct *child) 48static void ptrace_untrace(struct task_struct *child)
49{ 49{
50 spin_lock(&child->sighand->siglock); 50 spin_lock(&child->sighand->siglock);
51 if (task_is_traced(child)) { 51 if (task_is_traced(child)) {
diff --git a/kernel/rcupreempt.c b/kernel/rcupreempt.c
index ca4bbbe04aa4..59236e8b9daa 100644
--- a/kernel/rcupreempt.c
+++ b/kernel/rcupreempt.c
@@ -54,9 +54,9 @@
54#include <linux/cpu.h> 54#include <linux/cpu.h>
55#include <linux/random.h> 55#include <linux/random.h>
56#include <linux/delay.h> 56#include <linux/delay.h>
57#include <linux/byteorder/swabb.h>
58#include <linux/cpumask.h> 57#include <linux/cpumask.h>
59#include <linux/rcupreempt_trace.h> 58#include <linux/rcupreempt_trace.h>
59#include <asm/byteorder.h>
60 60
61/* 61/*
62 * PREEMPT_RCU data structures. 62 * PREEMPT_RCU data structures.
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 617d41e4d6a0..b3cc73931d1f 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -833,6 +833,16 @@ static struct ctl_table kern_table[] = {
833 .proc_handler = &proc_dointvec, 833 .proc_handler = &proc_dointvec,
834 }, 834 },
835#endif 835#endif
836#ifdef CONFIG_UNEVICTABLE_LRU
837 {
838 .ctl_name = CTL_UNNUMBERED,
839 .procname = "scan_unevictable_pages",
840 .data = &scan_unevictable_pages,
841 .maxlen = sizeof(scan_unevictable_pages),
842 .mode = 0644,
843 .proc_handler = &scan_unevictable_handler,
844 },
845#endif
836/* 846/*
837 * NOTE: do not add new entries to this table unless you have read 847 * NOTE: do not add new entries to this table unless you have read
838 * Documentation/sysctl/ctl_unnumbered.txt 848 * Documentation/sysctl/ctl_unnumbered.txt
diff --git a/lib/bitmap.c b/lib/bitmap.c
index 06fb57c86de0..482df94ea21e 100644
--- a/lib/bitmap.c
+++ b/lib/bitmap.c
@@ -316,17 +316,6 @@ int bitmap_scnprintf(char *buf, unsigned int buflen,
316EXPORT_SYMBOL(bitmap_scnprintf); 316EXPORT_SYMBOL(bitmap_scnprintf);
317 317
318/** 318/**
319 * bitmap_scnprintf_len - return buffer length needed to convert
320 * bitmap to an ASCII hex string
321 * @nr_bits: number of bits to be converted
322 */
323int bitmap_scnprintf_len(unsigned int nr_bits)
324{
325 unsigned int nr_nibbles = ALIGN(nr_bits, 4) / 4;
326 return nr_nibbles + ALIGN(nr_nibbles, CHUNKSZ / 4) / (CHUNKSZ / 4) - 1;
327}
328
329/**
330 * __bitmap_parse - convert an ASCII hex string into a bitmap. 319 * __bitmap_parse - convert an ASCII hex string into a bitmap.
331 * @buf: pointer to buffer containing string. 320 * @buf: pointer to buffer containing string.
332 * @buflen: buffer size in bytes. If string is smaller than this 321 * @buflen: buffer size in bytes. If string is smaller than this
diff --git a/lib/vsprintf.c b/lib/vsprintf.c
index cceecb6a963d..a013bbc23717 100644
--- a/lib/vsprintf.c
+++ b/lib/vsprintf.c
@@ -24,6 +24,7 @@
24#include <linux/kernel.h> 24#include <linux/kernel.h>
25#include <linux/kallsyms.h> 25#include <linux/kallsyms.h>
26#include <linux/uaccess.h> 26#include <linux/uaccess.h>
27#include <linux/ioport.h>
27 28
28#include <asm/page.h> /* for PAGE_SIZE */ 29#include <asm/page.h> /* for PAGE_SIZE */
29#include <asm/div64.h> 30#include <asm/div64.h>
@@ -550,18 +551,51 @@ static char *symbol_string(char *buf, char *end, void *ptr, int field_width, int
550#endif 551#endif
551} 552}
552 553
554static char *resource_string(char *buf, char *end, struct resource *res, int field_width, int precision, int flags)
555{
556#ifndef IO_RSRC_PRINTK_SIZE
557#define IO_RSRC_PRINTK_SIZE 4
558#endif
559
560#ifndef MEM_RSRC_PRINTK_SIZE
561#define MEM_RSRC_PRINTK_SIZE 8
562#endif
563
564 /* room for the actual numbers, the two "0x", -, [, ] and the final zero */
565 char sym[4*sizeof(resource_size_t) + 8];
566 char *p = sym, *pend = sym + sizeof(sym);
567 int size = -1;
568
569 if (res->flags & IORESOURCE_IO)
570 size = IO_RSRC_PRINTK_SIZE;
571 else if (res->flags & IORESOURCE_MEM)
572 size = MEM_RSRC_PRINTK_SIZE;
573
574 *p++ = '[';
575 p = number(p, pend, res->start, 16, size, -1, SPECIAL | SMALL | ZEROPAD);
576 *p++ = '-';
577 p = number(p, pend, res->end, 16, size, -1, SPECIAL | SMALL | ZEROPAD);
578 *p++ = ']';
579 *p = 0;
580
581 return string(buf, end, sym, field_width, precision, flags);
582}
583
553/* 584/*
554 * Show a '%p' thing. A kernel extension is that the '%p' is followed 585 * Show a '%p' thing. A kernel extension is that the '%p' is followed
555 * by an extra set of alphanumeric characters that are extended format 586 * by an extra set of alphanumeric characters that are extended format
556 * specifiers. 587 * specifiers.
557 * 588 *
558 * Right now we just handle 'F' (for symbolic Function descriptor pointers) 589 * Right now we handle:
559 * and 'S' (for Symbolic direct pointers), but this can easily be 590 *
560 * extended in the future (network address types etc). 591 * - 'F' For symbolic function descriptor pointers
592 * - 'S' For symbolic direct pointers
593 * - 'R' For a struct resource pointer, it prints the range of
594 * addresses (not the name nor the flags)
561 * 595 *
562 * The difference between 'S' and 'F' is that on ia64 and ppc64 function 596 * Note: The difference between 'S' and 'F' is that on ia64 and ppc64
563 * pointers are really function descriptors, which contain a pointer the 597 * function pointers are really function descriptors, which contain a
564 * real address. 598 * pointer to the real address.
565 */ 599 */
566static char *pointer(const char *fmt, char *buf, char *end, void *ptr, int field_width, int precision, int flags) 600static char *pointer(const char *fmt, char *buf, char *end, void *ptr, int field_width, int precision, int flags)
567{ 601{
@@ -571,6 +605,8 @@ static char *pointer(const char *fmt, char *buf, char *end, void *ptr, int field
571 /* Fallthrough */ 605 /* Fallthrough */
572 case 'S': 606 case 'S':
573 return symbol_string(buf, end, ptr, field_width, precision, flags); 607 return symbol_string(buf, end, ptr, field_width, precision, flags);
608 case 'R':
609 return resource_string(buf, end, ptr, field_width, precision, flags);
574 } 610 }
575 flags |= SMALL; 611 flags |= SMALL;
576 if (field_width == -1) { 612 if (field_width == -1) {
@@ -590,6 +626,7 @@ static char *pointer(const char *fmt, char *buf, char *end, void *ptr, int field
590 * This function follows C99 vsnprintf, but has some extensions: 626 * This function follows C99 vsnprintf, but has some extensions:
591 * %pS output the name of a text symbol 627 * %pS output the name of a text symbol
592 * %pF output the name of a function pointer 628 * %pF output the name of a function pointer
629 * %pR output the address range in a struct resource
593 * 630 *
594 * The return value is the number of characters which would 631 * The return value is the number of characters which would
595 * be generated for the given input, excluding the trailing 632 * be generated for the given input, excluding the trailing
diff --git a/mm/Kconfig b/mm/Kconfig
index 1a501a4de95c..5b5790f8a816 100644
--- a/mm/Kconfig
+++ b/mm/Kconfig
@@ -209,5 +209,16 @@ config VIRT_TO_BUS
209 def_bool y 209 def_bool y
210 depends on !ARCH_NO_VIRT_TO_BUS 210 depends on !ARCH_NO_VIRT_TO_BUS
211 211
212config UNEVICTABLE_LRU
213 bool "Add LRU list to track non-evictable pages"
214 default y
215 depends on MMU
216 help
217 Keeps unevictable pages off of the active and inactive pageout
218 lists, so kswapd will not waste CPU time or have its balancing
219 algorithms thrown off by scanning these pages. Selecting this
220 will use one page flag and increase the code size a little,
221 say Y unless you know what you are doing.
222
212config MMU_NOTIFIER 223config MMU_NOTIFIER
213 bool 224 bool
diff --git a/mm/Makefile b/mm/Makefile
index da4ccf015aea..c06b45a1ff5f 100644
--- a/mm/Makefile
+++ b/mm/Makefile
@@ -33,5 +33,4 @@ obj-$(CONFIG_FS_XIP) += filemap_xip.o
33obj-$(CONFIG_MIGRATION) += migrate.o 33obj-$(CONFIG_MIGRATION) += migrate.o
34obj-$(CONFIG_SMP) += allocpercpu.o 34obj-$(CONFIG_SMP) += allocpercpu.o
35obj-$(CONFIG_QUICKLIST) += quicklist.o 35obj-$(CONFIG_QUICKLIST) += quicklist.o
36obj-$(CONFIG_CGROUP_MEM_RES_CTLR) += memcontrol.o 36obj-$(CONFIG_CGROUP_MEM_RES_CTLR) += memcontrol.o page_cgroup.o
37
diff --git a/mm/filemap.c b/mm/filemap.c
index 903bf316912a..ab8553658af3 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -33,6 +33,7 @@
33#include <linux/cpuset.h> 33#include <linux/cpuset.h>
34#include <linux/hardirq.h> /* for BUG_ON(!in_atomic()) only */ 34#include <linux/hardirq.h> /* for BUG_ON(!in_atomic()) only */
35#include <linux/memcontrol.h> 35#include <linux/memcontrol.h>
36#include <linux/mm_inline.h> /* for page_is_file_cache() */
36#include "internal.h" 37#include "internal.h"
37 38
38/* 39/*
@@ -115,12 +116,12 @@ void __remove_from_page_cache(struct page *page)
115{ 116{
116 struct address_space *mapping = page->mapping; 117 struct address_space *mapping = page->mapping;
117 118
118 mem_cgroup_uncharge_cache_page(page);
119 radix_tree_delete(&mapping->page_tree, page->index); 119 radix_tree_delete(&mapping->page_tree, page->index);
120 page->mapping = NULL; 120 page->mapping = NULL;
121 mapping->nrpages--; 121 mapping->nrpages--;
122 __dec_zone_page_state(page, NR_FILE_PAGES); 122 __dec_zone_page_state(page, NR_FILE_PAGES);
123 BUG_ON(page_mapped(page)); 123 BUG_ON(page_mapped(page));
124 mem_cgroup_uncharge_cache_page(page);
124 125
125 /* 126 /*
126 * Some filesystems seem to re-dirty the page even after 127 * Some filesystems seem to re-dirty the page even after
@@ -492,9 +493,24 @@ EXPORT_SYMBOL(add_to_page_cache_locked);
492int add_to_page_cache_lru(struct page *page, struct address_space *mapping, 493int add_to_page_cache_lru(struct page *page, struct address_space *mapping,
493 pgoff_t offset, gfp_t gfp_mask) 494 pgoff_t offset, gfp_t gfp_mask)
494{ 495{
495 int ret = add_to_page_cache(page, mapping, offset, gfp_mask); 496 int ret;
496 if (ret == 0) 497
497 lru_cache_add(page); 498 /*
499 * Splice_read and readahead add shmem/tmpfs pages into the page cache
500 * before shmem_readpage has a chance to mark them as SwapBacked: they
501 * need to go on the active_anon lru below, and mem_cgroup_cache_charge
502 * (called in add_to_page_cache) needs to know where they're going too.
503 */
504 if (mapping_cap_swap_backed(mapping))
505 SetPageSwapBacked(page);
506
507 ret = add_to_page_cache(page, mapping, offset, gfp_mask);
508 if (ret == 0) {
509 if (page_is_file_cache(page))
510 lru_cache_add_file(page);
511 else
512 lru_cache_add_active_anon(page);
513 }
498 return ret; 514 return ret;
499} 515}
500 516
@@ -557,17 +573,14 @@ EXPORT_SYMBOL(wait_on_page_bit);
557 * mechananism between PageLocked pages and PageWriteback pages is shared. 573 * mechananism between PageLocked pages and PageWriteback pages is shared.
558 * But that's OK - sleepers in wait_on_page_writeback() just go back to sleep. 574 * But that's OK - sleepers in wait_on_page_writeback() just go back to sleep.
559 * 575 *
560 * The first mb is necessary to safely close the critical section opened by the 576 * The mb is necessary to enforce ordering between the clear_bit and the read
561 * test_and_set_bit() to lock the page; the second mb is necessary to enforce 577 * of the waitqueue (to avoid SMP races with a parallel wait_on_page_locked()).
562 * ordering between the clear_bit and the read of the waitqueue (to avoid SMP
563 * races with a parallel wait_on_page_locked()).
564 */ 578 */
565void unlock_page(struct page *page) 579void unlock_page(struct page *page)
566{ 580{
567 smp_mb__before_clear_bit(); 581 VM_BUG_ON(!PageLocked(page));
568 if (!test_and_clear_bit(PG_locked, &page->flags)) 582 clear_bit_unlock(PG_locked, &page->flags);
569 BUG(); 583 smp_mb__after_clear_bit();
570 smp_mb__after_clear_bit();
571 wake_up_page(page, PG_locked); 584 wake_up_page(page, PG_locked);
572} 585}
573EXPORT_SYMBOL(unlock_page); 586EXPORT_SYMBOL(unlock_page);
diff --git a/mm/fremap.c b/mm/fremap.c
index 7881638e4a12..7d12ca70ef7b 100644
--- a/mm/fremap.c
+++ b/mm/fremap.c
@@ -21,6 +21,8 @@
21#include <asm/cacheflush.h> 21#include <asm/cacheflush.h>
22#include <asm/tlbflush.h> 22#include <asm/tlbflush.h>
23 23
24#include "internal.h"
25
24static void zap_pte(struct mm_struct *mm, struct vm_area_struct *vma, 26static void zap_pte(struct mm_struct *mm, struct vm_area_struct *vma,
25 unsigned long addr, pte_t *ptep) 27 unsigned long addr, pte_t *ptep)
26{ 28{
@@ -215,15 +217,31 @@ asmlinkage long sys_remap_file_pages(unsigned long start, unsigned long size,
215 spin_unlock(&mapping->i_mmap_lock); 217 spin_unlock(&mapping->i_mmap_lock);
216 } 218 }
217 219
220 if (vma->vm_flags & VM_LOCKED) {
221 /*
222 * drop PG_Mlocked flag for over-mapped range
223 */
224 unsigned int saved_flags = vma->vm_flags;
225 munlock_vma_pages_range(vma, start, start + size);
226 vma->vm_flags = saved_flags;
227 }
228
218 mmu_notifier_invalidate_range_start(mm, start, start + size); 229 mmu_notifier_invalidate_range_start(mm, start, start + size);
219 err = populate_range(mm, vma, start, size, pgoff); 230 err = populate_range(mm, vma, start, size, pgoff);
220 mmu_notifier_invalidate_range_end(mm, start, start + size); 231 mmu_notifier_invalidate_range_end(mm, start, start + size);
221 if (!err && !(flags & MAP_NONBLOCK)) { 232 if (!err && !(flags & MAP_NONBLOCK)) {
222 if (unlikely(has_write_lock)) { 233 if (vma->vm_flags & VM_LOCKED) {
223 downgrade_write(&mm->mmap_sem); 234 /*
224 has_write_lock = 0; 235 * might be mapping previously unmapped range of file
236 */
237 mlock_vma_pages_range(vma, start, start + size);
238 } else {
239 if (unlikely(has_write_lock)) {
240 downgrade_write(&mm->mmap_sem);
241 has_write_lock = 0;
242 }
243 make_pages_present(start, start+size);
225 } 244 }
226 make_pages_present(start, start+size);
227 } 245 }
228 246
229 /* 247 /*
@@ -240,4 +258,3 @@ out:
240 258
241 return err; 259 return err;
242} 260}
243
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 38633864a93e..ce8cbb29860b 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -262,7 +262,7 @@ struct resv_map {
262 struct list_head regions; 262 struct list_head regions;
263}; 263};
264 264
265struct resv_map *resv_map_alloc(void) 265static struct resv_map *resv_map_alloc(void)
266{ 266{
267 struct resv_map *resv_map = kmalloc(sizeof(*resv_map), GFP_KERNEL); 267 struct resv_map *resv_map = kmalloc(sizeof(*resv_map), GFP_KERNEL);
268 if (!resv_map) 268 if (!resv_map)
@@ -274,7 +274,7 @@ struct resv_map *resv_map_alloc(void)
274 return resv_map; 274 return resv_map;
275} 275}
276 276
277void resv_map_release(struct kref *ref) 277static void resv_map_release(struct kref *ref)
278{ 278{
279 struct resv_map *resv_map = container_of(ref, struct resv_map, refs); 279 struct resv_map *resv_map = container_of(ref, struct resv_map, refs);
280 280
@@ -289,7 +289,7 @@ static struct resv_map *vma_resv_map(struct vm_area_struct *vma)
289 if (!(vma->vm_flags & VM_SHARED)) 289 if (!(vma->vm_flags & VM_SHARED))
290 return (struct resv_map *)(get_vma_private_data(vma) & 290 return (struct resv_map *)(get_vma_private_data(vma) &
291 ~HPAGE_RESV_MASK); 291 ~HPAGE_RESV_MASK);
292 return 0; 292 return NULL;
293} 293}
294 294
295static void set_vma_resv_map(struct vm_area_struct *vma, struct resv_map *map) 295static void set_vma_resv_map(struct vm_area_struct *vma, struct resv_map *map)
@@ -1459,11 +1459,11 @@ int hugetlb_report_meminfo(char *buf)
1459{ 1459{
1460 struct hstate *h = &default_hstate; 1460 struct hstate *h = &default_hstate;
1461 return sprintf(buf, 1461 return sprintf(buf,
1462 "HugePages_Total: %5lu\n" 1462 "HugePages_Total: %5lu\n"
1463 "HugePages_Free: %5lu\n" 1463 "HugePages_Free: %5lu\n"
1464 "HugePages_Rsvd: %5lu\n" 1464 "HugePages_Rsvd: %5lu\n"
1465 "HugePages_Surp: %5lu\n" 1465 "HugePages_Surp: %5lu\n"
1466 "Hugepagesize: %5lu kB\n", 1466 "Hugepagesize: %8lu kB\n",
1467 h->nr_huge_pages, 1467 h->nr_huge_pages,
1468 h->free_huge_pages, 1468 h->free_huge_pages,
1469 h->resv_huge_pages, 1469 h->resv_huge_pages,
@@ -1747,10 +1747,8 @@ void unmap_hugepage_range(struct vm_area_struct *vma, unsigned long start,
1747 * from other VMAs and let the children be SIGKILLed if they are faulting the 1747 * from other VMAs and let the children be SIGKILLed if they are faulting the
1748 * same region. 1748 * same region.
1749 */ 1749 */
1750int unmap_ref_private(struct mm_struct *mm, 1750static int unmap_ref_private(struct mm_struct *mm, struct vm_area_struct *vma,
1751 struct vm_area_struct *vma, 1751 struct page *page, unsigned long address)
1752 struct page *page,
1753 unsigned long address)
1754{ 1752{
1755 struct vm_area_struct *iter_vma; 1753 struct vm_area_struct *iter_vma;
1756 struct address_space *mapping; 1754 struct address_space *mapping;
@@ -2073,6 +2071,14 @@ follow_huge_pud(struct mm_struct *mm, unsigned long address,
2073 return NULL; 2071 return NULL;
2074} 2072}
2075 2073
2074static int huge_zeropage_ok(pte_t *ptep, int write, int shared)
2075{
2076 if (!ptep || write || shared)
2077 return 0;
2078 else
2079 return huge_pte_none(huge_ptep_get(ptep));
2080}
2081
2076int follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma, 2082int follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma,
2077 struct page **pages, struct vm_area_struct **vmas, 2083 struct page **pages, struct vm_area_struct **vmas,
2078 unsigned long *position, int *length, int i, 2084 unsigned long *position, int *length, int i,
@@ -2082,6 +2088,8 @@ int follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma,
2082 unsigned long vaddr = *position; 2088 unsigned long vaddr = *position;
2083 int remainder = *length; 2089 int remainder = *length;
2084 struct hstate *h = hstate_vma(vma); 2090 struct hstate *h = hstate_vma(vma);
2091 int zeropage_ok = 0;
2092 int shared = vma->vm_flags & VM_SHARED;
2085 2093
2086 spin_lock(&mm->page_table_lock); 2094 spin_lock(&mm->page_table_lock);
2087 while (vaddr < vma->vm_end && remainder) { 2095 while (vaddr < vma->vm_end && remainder) {
@@ -2094,8 +2102,11 @@ int follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma,
2094 * first, for the page indexing below to work. 2102 * first, for the page indexing below to work.
2095 */ 2103 */
2096 pte = huge_pte_offset(mm, vaddr & huge_page_mask(h)); 2104 pte = huge_pte_offset(mm, vaddr & huge_page_mask(h));
2105 if (huge_zeropage_ok(pte, write, shared))
2106 zeropage_ok = 1;
2097 2107
2098 if (!pte || huge_pte_none(huge_ptep_get(pte)) || 2108 if (!pte ||
2109 (huge_pte_none(huge_ptep_get(pte)) && !zeropage_ok) ||
2099 (write && !pte_write(huge_ptep_get(pte)))) { 2110 (write && !pte_write(huge_ptep_get(pte)))) {
2100 int ret; 2111 int ret;
2101 2112
@@ -2115,8 +2126,11 @@ int follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma,
2115 page = pte_page(huge_ptep_get(pte)); 2126 page = pte_page(huge_ptep_get(pte));
2116same_page: 2127same_page:
2117 if (pages) { 2128 if (pages) {
2118 get_page(page); 2129 if (zeropage_ok)
2119 pages[i] = page + pfn_offset; 2130 pages[i] = ZERO_PAGE(0);
2131 else
2132 pages[i] = page + pfn_offset;
2133 get_page(pages[i]);
2120 } 2134 }
2121 2135
2122 if (vmas) 2136 if (vmas)
diff --git a/mm/internal.h b/mm/internal.h
index 1f43f7416972..e4e728bdf324 100644
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -39,6 +39,15 @@ static inline void __put_page(struct page *page)
39 atomic_dec(&page->_count); 39 atomic_dec(&page->_count);
40} 40}
41 41
42/*
43 * in mm/vmscan.c:
44 */
45extern int isolate_lru_page(struct page *page);
46extern void putback_lru_page(struct page *page);
47
48/*
49 * in mm/page_alloc.c
50 */
42extern void __free_pages_bootmem(struct page *page, unsigned int order); 51extern void __free_pages_bootmem(struct page *page, unsigned int order);
43 52
44/* 53/*
@@ -52,6 +61,120 @@ static inline unsigned long page_order(struct page *page)
52 return page_private(page); 61 return page_private(page);
53} 62}
54 63
64extern long mlock_vma_pages_range(struct vm_area_struct *vma,
65 unsigned long start, unsigned long end);
66extern void munlock_vma_pages_range(struct vm_area_struct *vma,
67 unsigned long start, unsigned long end);
68static inline void munlock_vma_pages_all(struct vm_area_struct *vma)
69{
70 munlock_vma_pages_range(vma, vma->vm_start, vma->vm_end);
71}
72
73#ifdef CONFIG_UNEVICTABLE_LRU
74/*
75 * unevictable_migrate_page() called only from migrate_page_copy() to
76 * migrate unevictable flag to new page.
77 * Note that the old page has been isolated from the LRU lists at this
78 * point so we don't need to worry about LRU statistics.
79 */
80static inline void unevictable_migrate_page(struct page *new, struct page *old)
81{
82 if (TestClearPageUnevictable(old))
83 SetPageUnevictable(new);
84}
85#else
86static inline void unevictable_migrate_page(struct page *new, struct page *old)
87{
88}
89#endif
90
91#ifdef CONFIG_UNEVICTABLE_LRU
92/*
93 * Called only in fault path via page_evictable() for a new page
94 * to determine if it's being mapped into a LOCKED vma.
95 * If so, mark page as mlocked.
96 */
97static inline int is_mlocked_vma(struct vm_area_struct *vma, struct page *page)
98{
99 VM_BUG_ON(PageLRU(page));
100
101 if (likely((vma->vm_flags & (VM_LOCKED | VM_SPECIAL)) != VM_LOCKED))
102 return 0;
103
104 if (!TestSetPageMlocked(page)) {
105 inc_zone_page_state(page, NR_MLOCK);
106 count_vm_event(UNEVICTABLE_PGMLOCKED);
107 }
108 return 1;
109}
110
111/*
112 * must be called with vma's mmap_sem held for read, and page locked.
113 */
114extern void mlock_vma_page(struct page *page);
115
116/*
117 * Clear the page's PageMlocked(). This can be useful in a situation where
118 * we want to unconditionally remove a page from the pagecache -- e.g.,
119 * on truncation or freeing.
120 *
121 * It is legal to call this function for any page, mlocked or not.
122 * If called for a page that is still mapped by mlocked vmas, all we do
123 * is revert to lazy LRU behaviour -- semantics are not broken.
124 */
125extern void __clear_page_mlock(struct page *page);
126static inline void clear_page_mlock(struct page *page)
127{
128 if (unlikely(TestClearPageMlocked(page)))
129 __clear_page_mlock(page);
130}
131
132/*
133 * mlock_migrate_page - called only from migrate_page_copy() to
134 * migrate the Mlocked page flag; update statistics.
135 */
136static inline void mlock_migrate_page(struct page *newpage, struct page *page)
137{
138 if (TestClearPageMlocked(page)) {
139 unsigned long flags;
140
141 local_irq_save(flags);
142 __dec_zone_page_state(page, NR_MLOCK);
143 SetPageMlocked(newpage);
144 __inc_zone_page_state(newpage, NR_MLOCK);
145 local_irq_restore(flags);
146 }
147}
148
149/*
150 * free_page_mlock() -- clean up attempts to free and mlocked() page.
151 * Page should not be on lru, so no need to fix that up.
152 * free_pages_check() will verify...
153 */
154static inline void free_page_mlock(struct page *page)
155{
156 if (unlikely(TestClearPageMlocked(page))) {
157 unsigned long flags;
158
159 local_irq_save(flags);
160 __dec_zone_page_state(page, NR_MLOCK);
161 __count_vm_event(UNEVICTABLE_MLOCKFREED);
162 local_irq_restore(flags);
163 }
164}
165
166#else /* CONFIG_UNEVICTABLE_LRU */
167static inline int is_mlocked_vma(struct vm_area_struct *v, struct page *p)
168{
169 return 0;
170}
171static inline void clear_page_mlock(struct page *page) { }
172static inline void mlock_vma_page(struct page *page) { }
173static inline void mlock_migrate_page(struct page *new, struct page *old) { }
174static inline void free_page_mlock(struct page *page) { }
175
176#endif /* CONFIG_UNEVICTABLE_LRU */
177
55/* 178/*
56 * FLATMEM and DISCONTIGMEM configurations use alloc_bootmem_node, 179 * FLATMEM and DISCONTIGMEM configurations use alloc_bootmem_node,
57 * so all functions starting at paging_init should be marked __init 180 * so all functions starting at paging_init should be marked __init
@@ -120,4 +243,12 @@ static inline void mminit_validate_memmodel_limits(unsigned long *start_pfn,
120} 243}
121#endif /* CONFIG_SPARSEMEM */ 244#endif /* CONFIG_SPARSEMEM */
122 245
246#define GUP_FLAGS_WRITE 0x1
247#define GUP_FLAGS_FORCE 0x2
248#define GUP_FLAGS_IGNORE_VMA_PERMISSIONS 0x4
249
250int __get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
251 unsigned long start, int len, int flags,
252 struct page **pages, struct vm_area_struct **vmas);
253
123#endif 254#endif
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 36896f3eb7f5..d4a92b63e98e 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -32,11 +32,12 @@
32#include <linux/fs.h> 32#include <linux/fs.h>
33#include <linux/seq_file.h> 33#include <linux/seq_file.h>
34#include <linux/vmalloc.h> 34#include <linux/vmalloc.h>
35#include <linux/mm_inline.h>
36#include <linux/page_cgroup.h>
35 37
36#include <asm/uaccess.h> 38#include <asm/uaccess.h>
37 39
38struct cgroup_subsys mem_cgroup_subsys __read_mostly; 40struct cgroup_subsys mem_cgroup_subsys __read_mostly;
39static struct kmem_cache *page_cgroup_cache __read_mostly;
40#define MEM_CGROUP_RECLAIM_RETRIES 5 41#define MEM_CGROUP_RECLAIM_RETRIES 5
41 42
42/* 43/*
@@ -65,11 +66,10 @@ struct mem_cgroup_stat {
65/* 66/*
66 * For accounting under irq disable, no need for increment preempt count. 67 * For accounting under irq disable, no need for increment preempt count.
67 */ 68 */
68static void __mem_cgroup_stat_add_safe(struct mem_cgroup_stat *stat, 69static inline void __mem_cgroup_stat_add_safe(struct mem_cgroup_stat_cpu *stat,
69 enum mem_cgroup_stat_index idx, int val) 70 enum mem_cgroup_stat_index idx, int val)
70{ 71{
71 int cpu = smp_processor_id(); 72 stat->count[idx] += val;
72 stat->cpustat[cpu].count[idx] += val;
73} 73}
74 74
75static s64 mem_cgroup_read_stat(struct mem_cgroup_stat *stat, 75static s64 mem_cgroup_read_stat(struct mem_cgroup_stat *stat,
@@ -85,22 +85,13 @@ static s64 mem_cgroup_read_stat(struct mem_cgroup_stat *stat,
85/* 85/*
86 * per-zone information in memory controller. 86 * per-zone information in memory controller.
87 */ 87 */
88
89enum mem_cgroup_zstat_index {
90 MEM_CGROUP_ZSTAT_ACTIVE,
91 MEM_CGROUP_ZSTAT_INACTIVE,
92
93 NR_MEM_CGROUP_ZSTAT,
94};
95
96struct mem_cgroup_per_zone { 88struct mem_cgroup_per_zone {
97 /* 89 /*
98 * spin_lock to protect the per cgroup LRU 90 * spin_lock to protect the per cgroup LRU
99 */ 91 */
100 spinlock_t lru_lock; 92 spinlock_t lru_lock;
101 struct list_head active_list; 93 struct list_head lists[NR_LRU_LISTS];
102 struct list_head inactive_list; 94 unsigned long count[NR_LRU_LISTS];
103 unsigned long count[NR_MEM_CGROUP_ZSTAT];
104}; 95};
105/* Macro for accessing counter */ 96/* Macro for accessing counter */
106#define MEM_CGROUP_ZSTAT(mz, idx) ((mz)->count[(idx)]) 97#define MEM_CGROUP_ZSTAT(mz, idx) ((mz)->count[(idx)])
@@ -144,69 +135,52 @@ struct mem_cgroup {
144}; 135};
145static struct mem_cgroup init_mem_cgroup; 136static struct mem_cgroup init_mem_cgroup;
146 137
147/*
148 * We use the lower bit of the page->page_cgroup pointer as a bit spin
149 * lock. We need to ensure that page->page_cgroup is at least two
150 * byte aligned (based on comments from Nick Piggin). But since
151 * bit_spin_lock doesn't actually set that lock bit in a non-debug
152 * uniprocessor kernel, we should avoid setting it here too.
153 */
154#define PAGE_CGROUP_LOCK_BIT 0x0
155#if defined(CONFIG_SMP) || defined(CONFIG_DEBUG_SPINLOCK)
156#define PAGE_CGROUP_LOCK (1 << PAGE_CGROUP_LOCK_BIT)
157#else
158#define PAGE_CGROUP_LOCK 0x0
159#endif
160
161/*
162 * A page_cgroup page is associated with every page descriptor. The
163 * page_cgroup helps us identify information about the cgroup
164 */
165struct page_cgroup {
166 struct list_head lru; /* per cgroup LRU list */
167 struct page *page;
168 struct mem_cgroup *mem_cgroup;
169 int flags;
170};
171#define PAGE_CGROUP_FLAG_CACHE (0x1) /* charged as cache */
172#define PAGE_CGROUP_FLAG_ACTIVE (0x2) /* page is active in this cgroup */
173
174static int page_cgroup_nid(struct page_cgroup *pc)
175{
176 return page_to_nid(pc->page);
177}
178
179static enum zone_type page_cgroup_zid(struct page_cgroup *pc)
180{
181 return page_zonenum(pc->page);
182}
183
184enum charge_type { 138enum charge_type {
185 MEM_CGROUP_CHARGE_TYPE_CACHE = 0, 139 MEM_CGROUP_CHARGE_TYPE_CACHE = 0,
186 MEM_CGROUP_CHARGE_TYPE_MAPPED, 140 MEM_CGROUP_CHARGE_TYPE_MAPPED,
141 MEM_CGROUP_CHARGE_TYPE_SHMEM, /* used by page migration of shmem */
187 MEM_CGROUP_CHARGE_TYPE_FORCE, /* used by force_empty */ 142 MEM_CGROUP_CHARGE_TYPE_FORCE, /* used by force_empty */
143 NR_CHARGE_TYPE,
144};
145
146/* only for here (for easy reading.) */
147#define PCGF_CACHE (1UL << PCG_CACHE)
148#define PCGF_USED (1UL << PCG_USED)
149#define PCGF_ACTIVE (1UL << PCG_ACTIVE)
150#define PCGF_LOCK (1UL << PCG_LOCK)
151#define PCGF_FILE (1UL << PCG_FILE)
152static const unsigned long
153pcg_default_flags[NR_CHARGE_TYPE] = {
154 PCGF_CACHE | PCGF_FILE | PCGF_USED | PCGF_LOCK, /* File Cache */
155 PCGF_ACTIVE | PCGF_USED | PCGF_LOCK, /* Anon */
156 PCGF_ACTIVE | PCGF_CACHE | PCGF_USED | PCGF_LOCK, /* Shmem */
157 0, /* FORCE */
188}; 158};
189 159
190/* 160/*
191 * Always modified under lru lock. Then, not necessary to preempt_disable() 161 * Always modified under lru lock. Then, not necessary to preempt_disable()
192 */ 162 */
193static void mem_cgroup_charge_statistics(struct mem_cgroup *mem, int flags, 163static void mem_cgroup_charge_statistics(struct mem_cgroup *mem,
194 bool charge) 164 struct page_cgroup *pc,
165 bool charge)
195{ 166{
196 int val = (charge)? 1 : -1; 167 int val = (charge)? 1 : -1;
197 struct mem_cgroup_stat *stat = &mem->stat; 168 struct mem_cgroup_stat *stat = &mem->stat;
169 struct mem_cgroup_stat_cpu *cpustat;
198 170
199 VM_BUG_ON(!irqs_disabled()); 171 VM_BUG_ON(!irqs_disabled());
200 if (flags & PAGE_CGROUP_FLAG_CACHE) 172
201 __mem_cgroup_stat_add_safe(stat, MEM_CGROUP_STAT_CACHE, val); 173 cpustat = &stat->cpustat[smp_processor_id()];
174 if (PageCgroupCache(pc))
175 __mem_cgroup_stat_add_safe(cpustat, MEM_CGROUP_STAT_CACHE, val);
202 else 176 else
203 __mem_cgroup_stat_add_safe(stat, MEM_CGROUP_STAT_RSS, val); 177 __mem_cgroup_stat_add_safe(cpustat, MEM_CGROUP_STAT_RSS, val);
204 178
205 if (charge) 179 if (charge)
206 __mem_cgroup_stat_add_safe(stat, 180 __mem_cgroup_stat_add_safe(cpustat,
207 MEM_CGROUP_STAT_PGPGIN_COUNT, 1); 181 MEM_CGROUP_STAT_PGPGIN_COUNT, 1);
208 else 182 else
209 __mem_cgroup_stat_add_safe(stat, 183 __mem_cgroup_stat_add_safe(cpustat,
210 MEM_CGROUP_STAT_PGPGOUT_COUNT, 1); 184 MEM_CGROUP_STAT_PGPGOUT_COUNT, 1);
211} 185}
212 186
@@ -227,7 +201,7 @@ page_cgroup_zoneinfo(struct page_cgroup *pc)
227} 201}
228 202
229static unsigned long mem_cgroup_get_all_zonestat(struct mem_cgroup *mem, 203static unsigned long mem_cgroup_get_all_zonestat(struct mem_cgroup *mem,
230 enum mem_cgroup_zstat_index idx) 204 enum lru_list idx)
231{ 205{
232 int nid, zid; 206 int nid, zid;
233 struct mem_cgroup_per_zone *mz; 207 struct mem_cgroup_per_zone *mz;
@@ -262,85 +236,77 @@ struct mem_cgroup *mem_cgroup_from_task(struct task_struct *p)
262 struct mem_cgroup, css); 236 struct mem_cgroup, css);
263} 237}
264 238
265static inline int page_cgroup_locked(struct page *page)
266{
267 return bit_spin_is_locked(PAGE_CGROUP_LOCK_BIT, &page->page_cgroup);
268}
269
270static void page_assign_page_cgroup(struct page *page, struct page_cgroup *pc)
271{
272 VM_BUG_ON(!page_cgroup_locked(page));
273 page->page_cgroup = ((unsigned long)pc | PAGE_CGROUP_LOCK);
274}
275
276struct page_cgroup *page_get_page_cgroup(struct page *page)
277{
278 return (struct page_cgroup *) (page->page_cgroup & ~PAGE_CGROUP_LOCK);
279}
280
281static void lock_page_cgroup(struct page *page)
282{
283 bit_spin_lock(PAGE_CGROUP_LOCK_BIT, &page->page_cgroup);
284}
285
286static int try_lock_page_cgroup(struct page *page)
287{
288 return bit_spin_trylock(PAGE_CGROUP_LOCK_BIT, &page->page_cgroup);
289}
290
291static void unlock_page_cgroup(struct page *page)
292{
293 bit_spin_unlock(PAGE_CGROUP_LOCK_BIT, &page->page_cgroup);
294}
295
296static void __mem_cgroup_remove_list(struct mem_cgroup_per_zone *mz, 239static void __mem_cgroup_remove_list(struct mem_cgroup_per_zone *mz,
297 struct page_cgroup *pc) 240 struct page_cgroup *pc)
298{ 241{
299 int from = pc->flags & PAGE_CGROUP_FLAG_ACTIVE; 242 int lru = LRU_BASE;
243
244 if (PageCgroupUnevictable(pc))
245 lru = LRU_UNEVICTABLE;
246 else {
247 if (PageCgroupActive(pc))
248 lru += LRU_ACTIVE;
249 if (PageCgroupFile(pc))
250 lru += LRU_FILE;
251 }
300 252
301 if (from) 253 MEM_CGROUP_ZSTAT(mz, lru) -= 1;
302 MEM_CGROUP_ZSTAT(mz, MEM_CGROUP_ZSTAT_ACTIVE) -= 1;
303 else
304 MEM_CGROUP_ZSTAT(mz, MEM_CGROUP_ZSTAT_INACTIVE) -= 1;
305 254
306 mem_cgroup_charge_statistics(pc->mem_cgroup, pc->flags, false); 255 mem_cgroup_charge_statistics(pc->mem_cgroup, pc, false);
307 list_del(&pc->lru); 256 list_del(&pc->lru);
308} 257}
309 258
310static void __mem_cgroup_add_list(struct mem_cgroup_per_zone *mz, 259static void __mem_cgroup_add_list(struct mem_cgroup_per_zone *mz,
311 struct page_cgroup *pc) 260 struct page_cgroup *pc)
312{ 261{
313 int to = pc->flags & PAGE_CGROUP_FLAG_ACTIVE; 262 int lru = LRU_BASE;
314 263
315 if (!to) { 264 if (PageCgroupUnevictable(pc))
316 MEM_CGROUP_ZSTAT(mz, MEM_CGROUP_ZSTAT_INACTIVE) += 1; 265 lru = LRU_UNEVICTABLE;
317 list_add(&pc->lru, &mz->inactive_list); 266 else {
318 } else { 267 if (PageCgroupActive(pc))
319 MEM_CGROUP_ZSTAT(mz, MEM_CGROUP_ZSTAT_ACTIVE) += 1; 268 lru += LRU_ACTIVE;
320 list_add(&pc->lru, &mz->active_list); 269 if (PageCgroupFile(pc))
270 lru += LRU_FILE;
321 } 271 }
322 mem_cgroup_charge_statistics(pc->mem_cgroup, pc->flags, true); 272
273 MEM_CGROUP_ZSTAT(mz, lru) += 1;
274 list_add(&pc->lru, &mz->lists[lru]);
275
276 mem_cgroup_charge_statistics(pc->mem_cgroup, pc, true);
323} 277}
324 278
325static void __mem_cgroup_move_lists(struct page_cgroup *pc, bool active) 279static void __mem_cgroup_move_lists(struct page_cgroup *pc, enum lru_list lru)
326{ 280{
327 int from = pc->flags & PAGE_CGROUP_FLAG_ACTIVE;
328 struct mem_cgroup_per_zone *mz = page_cgroup_zoneinfo(pc); 281 struct mem_cgroup_per_zone *mz = page_cgroup_zoneinfo(pc);
282 int active = PageCgroupActive(pc);
283 int file = PageCgroupFile(pc);
284 int unevictable = PageCgroupUnevictable(pc);
285 enum lru_list from = unevictable ? LRU_UNEVICTABLE :
286 (LRU_FILE * !!file + !!active);
329 287
330 if (from) 288 if (lru == from)
331 MEM_CGROUP_ZSTAT(mz, MEM_CGROUP_ZSTAT_ACTIVE) -= 1; 289 return;
332 else
333 MEM_CGROUP_ZSTAT(mz, MEM_CGROUP_ZSTAT_INACTIVE) -= 1;
334 290
335 if (active) { 291 MEM_CGROUP_ZSTAT(mz, from) -= 1;
336 MEM_CGROUP_ZSTAT(mz, MEM_CGROUP_ZSTAT_ACTIVE) += 1; 292 /*
337 pc->flags |= PAGE_CGROUP_FLAG_ACTIVE; 293 * However this is done under mz->lru_lock, another flags, which
338 list_move(&pc->lru, &mz->active_list); 294 * are not related to LRU, will be modified from out-of-lock.
295 * We have to use atomic set/clear flags.
296 */
297 if (is_unevictable_lru(lru)) {
298 ClearPageCgroupActive(pc);
299 SetPageCgroupUnevictable(pc);
339 } else { 300 } else {
340 MEM_CGROUP_ZSTAT(mz, MEM_CGROUP_ZSTAT_INACTIVE) += 1; 301 if (is_active_lru(lru))
341 pc->flags &= ~PAGE_CGROUP_FLAG_ACTIVE; 302 SetPageCgroupActive(pc);
342 list_move(&pc->lru, &mz->inactive_list); 303 else
304 ClearPageCgroupActive(pc);
305 ClearPageCgroupUnevictable(pc);
343 } 306 }
307
308 MEM_CGROUP_ZSTAT(mz, lru) += 1;
309 list_move(&pc->lru, &mz->lists[lru]);
344} 310}
345 311
346int task_in_mem_cgroup(struct task_struct *task, const struct mem_cgroup *mem) 312int task_in_mem_cgroup(struct task_struct *task, const struct mem_cgroup *mem)
@@ -356,7 +322,7 @@ int task_in_mem_cgroup(struct task_struct *task, const struct mem_cgroup *mem)
356/* 322/*
357 * This routine assumes that the appropriate zone's lru lock is already held 323 * This routine assumes that the appropriate zone's lru lock is already held
358 */ 324 */
359void mem_cgroup_move_lists(struct page *page, bool active) 325void mem_cgroup_move_lists(struct page *page, enum lru_list lru)
360{ 326{
361 struct page_cgroup *pc; 327 struct page_cgroup *pc;
362 struct mem_cgroup_per_zone *mz; 328 struct mem_cgroup_per_zone *mz;
@@ -372,17 +338,16 @@ void mem_cgroup_move_lists(struct page *page, bool active)
372 * safely get to page_cgroup without it, so just try_lock it: 338 * safely get to page_cgroup without it, so just try_lock it:
373 * mem_cgroup_isolate_pages allows for page left on wrong list. 339 * mem_cgroup_isolate_pages allows for page left on wrong list.
374 */ 340 */
375 if (!try_lock_page_cgroup(page)) 341 pc = lookup_page_cgroup(page);
342 if (!trylock_page_cgroup(pc))
376 return; 343 return;
377 344 if (pc && PageCgroupUsed(pc)) {
378 pc = page_get_page_cgroup(page);
379 if (pc) {
380 mz = page_cgroup_zoneinfo(pc); 345 mz = page_cgroup_zoneinfo(pc);
381 spin_lock_irqsave(&mz->lru_lock, flags); 346 spin_lock_irqsave(&mz->lru_lock, flags);
382 __mem_cgroup_move_lists(pc, active); 347 __mem_cgroup_move_lists(pc, lru);
383 spin_unlock_irqrestore(&mz->lru_lock, flags); 348 spin_unlock_irqrestore(&mz->lru_lock, flags);
384 } 349 }
385 unlock_page_cgroup(page); 350 unlock_page_cgroup(pc);
386} 351}
387 352
388/* 353/*
@@ -403,21 +368,6 @@ int mem_cgroup_calc_mapped_ratio(struct mem_cgroup *mem)
403} 368}
404 369
405/* 370/*
406 * This function is called from vmscan.c. In page reclaiming loop. balance
407 * between active and inactive list is calculated. For memory controller
408 * page reclaiming, we should use using mem_cgroup's imbalance rather than
409 * zone's global lru imbalance.
410 */
411long mem_cgroup_reclaim_imbalance(struct mem_cgroup *mem)
412{
413 unsigned long active, inactive;
414 /* active and inactive are the number of pages. 'long' is ok.*/
415 active = mem_cgroup_get_all_zonestat(mem, MEM_CGROUP_ZSTAT_ACTIVE);
416 inactive = mem_cgroup_get_all_zonestat(mem, MEM_CGROUP_ZSTAT_INACTIVE);
417 return (long) (active / (inactive + 1));
418}
419
420/*
421 * prev_priority control...this will be used in memory reclaim path. 371 * prev_priority control...this will be used in memory reclaim path.
422 */ 372 */
423int mem_cgroup_get_reclaim_priority(struct mem_cgroup *mem) 373int mem_cgroup_get_reclaim_priority(struct mem_cgroup *mem)
@@ -444,28 +394,17 @@ void mem_cgroup_record_reclaim_priority(struct mem_cgroup *mem, int priority)
444 * (see include/linux/mmzone.h) 394 * (see include/linux/mmzone.h)
445 */ 395 */
446 396
447long mem_cgroup_calc_reclaim_active(struct mem_cgroup *mem, 397long mem_cgroup_calc_reclaim(struct mem_cgroup *mem, struct zone *zone,
448 struct zone *zone, int priority) 398 int priority, enum lru_list lru)
449{ 399{
450 long nr_active; 400 long nr_pages;
451 int nid = zone->zone_pgdat->node_id; 401 int nid = zone->zone_pgdat->node_id;
452 int zid = zone_idx(zone); 402 int zid = zone_idx(zone);
453 struct mem_cgroup_per_zone *mz = mem_cgroup_zoneinfo(mem, nid, zid); 403 struct mem_cgroup_per_zone *mz = mem_cgroup_zoneinfo(mem, nid, zid);
454 404
455 nr_active = MEM_CGROUP_ZSTAT(mz, MEM_CGROUP_ZSTAT_ACTIVE); 405 nr_pages = MEM_CGROUP_ZSTAT(mz, lru);
456 return (nr_active >> priority);
457}
458 406
459long mem_cgroup_calc_reclaim_inactive(struct mem_cgroup *mem, 407 return (nr_pages >> priority);
460 struct zone *zone, int priority)
461{
462 long nr_inactive;
463 int nid = zone->zone_pgdat->node_id;
464 int zid = zone_idx(zone);
465 struct mem_cgroup_per_zone *mz = mem_cgroup_zoneinfo(mem, nid, zid);
466
467 nr_inactive = MEM_CGROUP_ZSTAT(mz, MEM_CGROUP_ZSTAT_INACTIVE);
468 return (nr_inactive >> priority);
469} 408}
470 409
471unsigned long mem_cgroup_isolate_pages(unsigned long nr_to_scan, 410unsigned long mem_cgroup_isolate_pages(unsigned long nr_to_scan,
@@ -473,7 +412,7 @@ unsigned long mem_cgroup_isolate_pages(unsigned long nr_to_scan,
473 unsigned long *scanned, int order, 412 unsigned long *scanned, int order,
474 int mode, struct zone *z, 413 int mode, struct zone *z,
475 struct mem_cgroup *mem_cont, 414 struct mem_cgroup *mem_cont,
476 int active) 415 int active, int file)
477{ 416{
478 unsigned long nr_taken = 0; 417 unsigned long nr_taken = 0;
479 struct page *page; 418 struct page *page;
@@ -484,38 +423,38 @@ unsigned long mem_cgroup_isolate_pages(unsigned long nr_to_scan,
484 int nid = z->zone_pgdat->node_id; 423 int nid = z->zone_pgdat->node_id;
485 int zid = zone_idx(z); 424 int zid = zone_idx(z);
486 struct mem_cgroup_per_zone *mz; 425 struct mem_cgroup_per_zone *mz;
426 int lru = LRU_FILE * !!file + !!active;
487 427
488 BUG_ON(!mem_cont); 428 BUG_ON(!mem_cont);
489 mz = mem_cgroup_zoneinfo(mem_cont, nid, zid); 429 mz = mem_cgroup_zoneinfo(mem_cont, nid, zid);
490 if (active) 430 src = &mz->lists[lru];
491 src = &mz->active_list;
492 else
493 src = &mz->inactive_list;
494
495 431
496 spin_lock(&mz->lru_lock); 432 spin_lock(&mz->lru_lock);
497 scan = 0; 433 scan = 0;
498 list_for_each_entry_safe_reverse(pc, tmp, src, lru) { 434 list_for_each_entry_safe_reverse(pc, tmp, src, lru) {
499 if (scan >= nr_to_scan) 435 if (scan >= nr_to_scan)
500 break; 436 break;
437 if (unlikely(!PageCgroupUsed(pc)))
438 continue;
501 page = pc->page; 439 page = pc->page;
502 440
503 if (unlikely(!PageLRU(page))) 441 if (unlikely(!PageLRU(page)))
504 continue; 442 continue;
505 443
506 if (PageActive(page) && !active) { 444 /*
507 __mem_cgroup_move_lists(pc, true); 445 * TODO: play better with lumpy reclaim, grabbing anything.
508 continue; 446 */
509 } 447 if (PageUnevictable(page) ||
510 if (!PageActive(page) && active) { 448 (PageActive(page) && !active) ||
511 __mem_cgroup_move_lists(pc, false); 449 (!PageActive(page) && active)) {
450 __mem_cgroup_move_lists(pc, page_lru(page));
512 continue; 451 continue;
513 } 452 }
514 453
515 scan++; 454 scan++;
516 list_move(&pc->lru, &pc_list); 455 list_move(&pc->lru, &pc_list);
517 456
518 if (__isolate_lru_page(page, mode) == 0) { 457 if (__isolate_lru_page(page, mode, file) == 0) {
519 list_move(&page->lru, dst); 458 list_move(&page->lru, dst);
520 nr_taken++; 459 nr_taken++;
521 } 460 }
@@ -540,26 +479,27 @@ static int mem_cgroup_charge_common(struct page *page, struct mm_struct *mm,
540{ 479{
541 struct mem_cgroup *mem; 480 struct mem_cgroup *mem;
542 struct page_cgroup *pc; 481 struct page_cgroup *pc;
543 unsigned long flags;
544 unsigned long nr_retries = MEM_CGROUP_RECLAIM_RETRIES; 482 unsigned long nr_retries = MEM_CGROUP_RECLAIM_RETRIES;
545 struct mem_cgroup_per_zone *mz; 483 struct mem_cgroup_per_zone *mz;
484 unsigned long flags;
546 485
547 pc = kmem_cache_alloc(page_cgroup_cache, gfp_mask); 486 pc = lookup_page_cgroup(page);
548 if (unlikely(pc == NULL)) 487 /* can happen at boot */
549 goto err; 488 if (unlikely(!pc))
550 489 return 0;
490 prefetchw(pc);
551 /* 491 /*
552 * We always charge the cgroup the mm_struct belongs to. 492 * We always charge the cgroup the mm_struct belongs to.
553 * The mm_struct's mem_cgroup changes on task migration if the 493 * The mm_struct's mem_cgroup changes on task migration if the
554 * thread group leader migrates. It's possible that mm is not 494 * thread group leader migrates. It's possible that mm is not
555 * set, if so charge the init_mm (happens for pagecache usage). 495 * set, if so charge the init_mm (happens for pagecache usage).
556 */ 496 */
497
557 if (likely(!memcg)) { 498 if (likely(!memcg)) {
558 rcu_read_lock(); 499 rcu_read_lock();
559 mem = mem_cgroup_from_task(rcu_dereference(mm->owner)); 500 mem = mem_cgroup_from_task(rcu_dereference(mm->owner));
560 if (unlikely(!mem)) { 501 if (unlikely(!mem)) {
561 rcu_read_unlock(); 502 rcu_read_unlock();
562 kmem_cache_free(page_cgroup_cache, pc);
563 return 0; 503 return 0;
564 } 504 }
565 /* 505 /*
@@ -572,7 +512,7 @@ static int mem_cgroup_charge_common(struct page *page, struct mm_struct *mm,
572 css_get(&memcg->css); 512 css_get(&memcg->css);
573 } 513 }
574 514
575 while (res_counter_charge(&mem->res, PAGE_SIZE)) { 515 while (unlikely(res_counter_charge(&mem->res, PAGE_SIZE))) {
576 if (!(gfp_mask & __GFP_WAIT)) 516 if (!(gfp_mask & __GFP_WAIT))
577 goto out; 517 goto out;
578 518
@@ -595,39 +535,33 @@ static int mem_cgroup_charge_common(struct page *page, struct mm_struct *mm,
595 } 535 }
596 } 536 }
597 537
598 pc->mem_cgroup = mem;
599 pc->page = page;
600 /*
601 * If a page is accounted as a page cache, insert to inactive list.
602 * If anon, insert to active list.
603 */
604 if (ctype == MEM_CGROUP_CHARGE_TYPE_CACHE)
605 pc->flags = PAGE_CGROUP_FLAG_CACHE;
606 else
607 pc->flags = PAGE_CGROUP_FLAG_ACTIVE;
608 538
609 lock_page_cgroup(page); 539 lock_page_cgroup(pc);
610 if (unlikely(page_get_page_cgroup(page))) { 540 if (unlikely(PageCgroupUsed(pc))) {
611 unlock_page_cgroup(page); 541 unlock_page_cgroup(pc);
612 res_counter_uncharge(&mem->res, PAGE_SIZE); 542 res_counter_uncharge(&mem->res, PAGE_SIZE);
613 css_put(&mem->css); 543 css_put(&mem->css);
614 kmem_cache_free(page_cgroup_cache, pc); 544
615 goto done; 545 goto done;
616 } 546 }
617 page_assign_page_cgroup(page, pc); 547 pc->mem_cgroup = mem;
548 /*
549 * If a page is accounted as a page cache, insert to inactive list.
550 * If anon, insert to active list.
551 */
552 pc->flags = pcg_default_flags[ctype];
618 553
619 mz = page_cgroup_zoneinfo(pc); 554 mz = page_cgroup_zoneinfo(pc);
555
620 spin_lock_irqsave(&mz->lru_lock, flags); 556 spin_lock_irqsave(&mz->lru_lock, flags);
621 __mem_cgroup_add_list(mz, pc); 557 __mem_cgroup_add_list(mz, pc);
622 spin_unlock_irqrestore(&mz->lru_lock, flags); 558 spin_unlock_irqrestore(&mz->lru_lock, flags);
559 unlock_page_cgroup(pc);
623 560
624 unlock_page_cgroup(page);
625done: 561done:
626 return 0; 562 return 0;
627out: 563out:
628 css_put(&mem->css); 564 css_put(&mem->css);
629 kmem_cache_free(page_cgroup_cache, pc);
630err:
631 return -ENOMEM; 565 return -ENOMEM;
632} 566}
633 567
@@ -635,7 +569,8 @@ int mem_cgroup_charge(struct page *page, struct mm_struct *mm, gfp_t gfp_mask)
635{ 569{
636 if (mem_cgroup_subsys.disabled) 570 if (mem_cgroup_subsys.disabled)
637 return 0; 571 return 0;
638 572 if (PageCompound(page))
573 return 0;
639 /* 574 /*
640 * If already mapped, we don't have to account. 575 * If already mapped, we don't have to account.
641 * If page cache, page->mapping has address_space. 576 * If page cache, page->mapping has address_space.
@@ -656,7 +591,8 @@ int mem_cgroup_cache_charge(struct page *page, struct mm_struct *mm,
656{ 591{
657 if (mem_cgroup_subsys.disabled) 592 if (mem_cgroup_subsys.disabled)
658 return 0; 593 return 0;
659 594 if (PageCompound(page))
595 return 0;
660 /* 596 /*
661 * Corner case handling. This is called from add_to_page_cache() 597 * Corner case handling. This is called from add_to_page_cache()
662 * in usual. But some FS (shmem) precharges this page before calling it 598 * in usual. But some FS (shmem) precharges this page before calling it
@@ -669,22 +605,27 @@ int mem_cgroup_cache_charge(struct page *page, struct mm_struct *mm,
669 if (!(gfp_mask & __GFP_WAIT)) { 605 if (!(gfp_mask & __GFP_WAIT)) {
670 struct page_cgroup *pc; 606 struct page_cgroup *pc;
671 607
672 lock_page_cgroup(page); 608
673 pc = page_get_page_cgroup(page); 609 pc = lookup_page_cgroup(page);
674 if (pc) { 610 if (!pc)
675 VM_BUG_ON(pc->page != page); 611 return 0;
676 VM_BUG_ON(!pc->mem_cgroup); 612 lock_page_cgroup(pc);
677 unlock_page_cgroup(page); 613 if (PageCgroupUsed(pc)) {
614 unlock_page_cgroup(pc);
678 return 0; 615 return 0;
679 } 616 }
680 unlock_page_cgroup(page); 617 unlock_page_cgroup(pc);
681 } 618 }
682 619
683 if (unlikely(!mm)) 620 if (unlikely(!mm))
684 mm = &init_mm; 621 mm = &init_mm;
685 622
686 return mem_cgroup_charge_common(page, mm, gfp_mask, 623 if (page_is_file_cache(page))
624 return mem_cgroup_charge_common(page, mm, gfp_mask,
687 MEM_CGROUP_CHARGE_TYPE_CACHE, NULL); 625 MEM_CGROUP_CHARGE_TYPE_CACHE, NULL);
626 else
627 return mem_cgroup_charge_common(page, mm, gfp_mask,
628 MEM_CGROUP_CHARGE_TYPE_SHMEM, NULL);
688} 629}
689 630
690/* 631/*
@@ -704,44 +645,46 @@ __mem_cgroup_uncharge_common(struct page *page, enum charge_type ctype)
704 /* 645 /*
705 * Check if our page_cgroup is valid 646 * Check if our page_cgroup is valid
706 */ 647 */
707 lock_page_cgroup(page); 648 pc = lookup_page_cgroup(page);
708 pc = page_get_page_cgroup(page); 649 if (unlikely(!pc || !PageCgroupUsed(pc)))
709 if (unlikely(!pc)) 650 return;
710 goto unlock;
711
712 VM_BUG_ON(pc->page != page);
713 651
714 if ((ctype == MEM_CGROUP_CHARGE_TYPE_MAPPED) 652 lock_page_cgroup(pc);
715 && ((pc->flags & PAGE_CGROUP_FLAG_CACHE) 653 if ((ctype == MEM_CGROUP_CHARGE_TYPE_MAPPED && page_mapped(page))
716 || page_mapped(page))) 654 || !PageCgroupUsed(pc)) {
717 goto unlock; 655 /* This happens at race in zap_pte_range() and do_swap_page()*/
656 unlock_page_cgroup(pc);
657 return;
658 }
659 ClearPageCgroupUsed(pc);
660 mem = pc->mem_cgroup;
718 661
719 mz = page_cgroup_zoneinfo(pc); 662 mz = page_cgroup_zoneinfo(pc);
720 spin_lock_irqsave(&mz->lru_lock, flags); 663 spin_lock_irqsave(&mz->lru_lock, flags);
721 __mem_cgroup_remove_list(mz, pc); 664 __mem_cgroup_remove_list(mz, pc);
722 spin_unlock_irqrestore(&mz->lru_lock, flags); 665 spin_unlock_irqrestore(&mz->lru_lock, flags);
666 unlock_page_cgroup(pc);
723 667
724 page_assign_page_cgroup(page, NULL);
725 unlock_page_cgroup(page);
726
727 mem = pc->mem_cgroup;
728 res_counter_uncharge(&mem->res, PAGE_SIZE); 668 res_counter_uncharge(&mem->res, PAGE_SIZE);
729 css_put(&mem->css); 669 css_put(&mem->css);
730 670
731 kmem_cache_free(page_cgroup_cache, pc);
732 return; 671 return;
733unlock:
734 unlock_page_cgroup(page);
735} 672}
736 673
737void mem_cgroup_uncharge_page(struct page *page) 674void mem_cgroup_uncharge_page(struct page *page)
738{ 675{
676 /* early check. */
677 if (page_mapped(page))
678 return;
679 if (page->mapping && !PageAnon(page))
680 return;
739 __mem_cgroup_uncharge_common(page, MEM_CGROUP_CHARGE_TYPE_MAPPED); 681 __mem_cgroup_uncharge_common(page, MEM_CGROUP_CHARGE_TYPE_MAPPED);
740} 682}
741 683
742void mem_cgroup_uncharge_cache_page(struct page *page) 684void mem_cgroup_uncharge_cache_page(struct page *page)
743{ 685{
744 VM_BUG_ON(page_mapped(page)); 686 VM_BUG_ON(page_mapped(page));
687 VM_BUG_ON(page->mapping);
745 __mem_cgroup_uncharge_common(page, MEM_CGROUP_CHARGE_TYPE_CACHE); 688 __mem_cgroup_uncharge_common(page, MEM_CGROUP_CHARGE_TYPE_CACHE);
746} 689}
747 690
@@ -758,15 +701,19 @@ int mem_cgroup_prepare_migration(struct page *page, struct page *newpage)
758 if (mem_cgroup_subsys.disabled) 701 if (mem_cgroup_subsys.disabled)
759 return 0; 702 return 0;
760 703
761 lock_page_cgroup(page); 704 pc = lookup_page_cgroup(page);
762 pc = page_get_page_cgroup(page); 705 lock_page_cgroup(pc);
763 if (pc) { 706 if (PageCgroupUsed(pc)) {
764 mem = pc->mem_cgroup; 707 mem = pc->mem_cgroup;
765 css_get(&mem->css); 708 css_get(&mem->css);
766 if (pc->flags & PAGE_CGROUP_FLAG_CACHE) 709 if (PageCgroupCache(pc)) {
767 ctype = MEM_CGROUP_CHARGE_TYPE_CACHE; 710 if (page_is_file_cache(page))
711 ctype = MEM_CGROUP_CHARGE_TYPE_CACHE;
712 else
713 ctype = MEM_CGROUP_CHARGE_TYPE_SHMEM;
714 }
768 } 715 }
769 unlock_page_cgroup(page); 716 unlock_page_cgroup(pc);
770 if (mem) { 717 if (mem) {
771 ret = mem_cgroup_charge_common(newpage, NULL, GFP_KERNEL, 718 ret = mem_cgroup_charge_common(newpage, NULL, GFP_KERNEL,
772 ctype, mem); 719 ctype, mem);
@@ -791,7 +738,7 @@ void mem_cgroup_end_migration(struct page *newpage)
791 */ 738 */
792 if (!newpage->mapping) 739 if (!newpage->mapping)
793 __mem_cgroup_uncharge_common(newpage, 740 __mem_cgroup_uncharge_common(newpage,
794 MEM_CGROUP_CHARGE_TYPE_FORCE); 741 MEM_CGROUP_CHARGE_TYPE_FORCE);
795 else if (PageAnon(newpage)) 742 else if (PageAnon(newpage))
796 mem_cgroup_uncharge_page(newpage); 743 mem_cgroup_uncharge_page(newpage);
797} 744}
@@ -863,7 +810,7 @@ int mem_cgroup_resize_limit(struct mem_cgroup *memcg, unsigned long long val)
863#define FORCE_UNCHARGE_BATCH (128) 810#define FORCE_UNCHARGE_BATCH (128)
864static void mem_cgroup_force_empty_list(struct mem_cgroup *mem, 811static void mem_cgroup_force_empty_list(struct mem_cgroup *mem,
865 struct mem_cgroup_per_zone *mz, 812 struct mem_cgroup_per_zone *mz,
866 int active) 813 enum lru_list lru)
867{ 814{
868 struct page_cgroup *pc; 815 struct page_cgroup *pc;
869 struct page *page; 816 struct page *page;
@@ -871,15 +818,14 @@ static void mem_cgroup_force_empty_list(struct mem_cgroup *mem,
871 unsigned long flags; 818 unsigned long flags;
872 struct list_head *list; 819 struct list_head *list;
873 820
874 if (active) 821 list = &mz->lists[lru];
875 list = &mz->active_list;
876 else
877 list = &mz->inactive_list;
878 822
879 spin_lock_irqsave(&mz->lru_lock, flags); 823 spin_lock_irqsave(&mz->lru_lock, flags);
880 while (!list_empty(list)) { 824 while (!list_empty(list)) {
881 pc = list_entry(list->prev, struct page_cgroup, lru); 825 pc = list_entry(list->prev, struct page_cgroup, lru);
882 page = pc->page; 826 page = pc->page;
827 if (!PageCgroupUsed(pc))
828 break;
883 get_page(page); 829 get_page(page);
884 spin_unlock_irqrestore(&mz->lru_lock, flags); 830 spin_unlock_irqrestore(&mz->lru_lock, flags);
885 /* 831 /*
@@ -894,8 +840,10 @@ static void mem_cgroup_force_empty_list(struct mem_cgroup *mem,
894 count = FORCE_UNCHARGE_BATCH; 840 count = FORCE_UNCHARGE_BATCH;
895 cond_resched(); 841 cond_resched();
896 } 842 }
897 } else 843 } else {
898 cond_resched(); 844 spin_lock_irqsave(&mz->lru_lock, flags);
845 break;
846 }
899 spin_lock_irqsave(&mz->lru_lock, flags); 847 spin_lock_irqsave(&mz->lru_lock, flags);
900 } 848 }
901 spin_unlock_irqrestore(&mz->lru_lock, flags); 849 spin_unlock_irqrestore(&mz->lru_lock, flags);
@@ -919,15 +867,17 @@ static int mem_cgroup_force_empty(struct mem_cgroup *mem)
919 while (mem->res.usage > 0) { 867 while (mem->res.usage > 0) {
920 if (atomic_read(&mem->css.cgroup->count) > 0) 868 if (atomic_read(&mem->css.cgroup->count) > 0)
921 goto out; 869 goto out;
870 /* This is for making all *used* pages to be on LRU. */
871 lru_add_drain_all();
922 for_each_node_state(node, N_POSSIBLE) 872 for_each_node_state(node, N_POSSIBLE)
923 for (zid = 0; zid < MAX_NR_ZONES; zid++) { 873 for (zid = 0; zid < MAX_NR_ZONES; zid++) {
924 struct mem_cgroup_per_zone *mz; 874 struct mem_cgroup_per_zone *mz;
875 enum lru_list l;
925 mz = mem_cgroup_zoneinfo(mem, node, zid); 876 mz = mem_cgroup_zoneinfo(mem, node, zid);
926 /* drop all page_cgroup in active_list */ 877 for_each_lru(l)
927 mem_cgroup_force_empty_list(mem, mz, 1); 878 mem_cgroup_force_empty_list(mem, mz, l);
928 /* drop all page_cgroup in inactive_list */
929 mem_cgroup_force_empty_list(mem, mz, 0);
930 } 879 }
880 cond_resched();
931 } 881 }
932 ret = 0; 882 ret = 0;
933out: 883out:
@@ -1012,14 +962,27 @@ static int mem_control_stat_show(struct cgroup *cont, struct cftype *cft,
1012 } 962 }
1013 /* showing # of active pages */ 963 /* showing # of active pages */
1014 { 964 {
1015 unsigned long active, inactive; 965 unsigned long active_anon, inactive_anon;
1016 966 unsigned long active_file, inactive_file;
1017 inactive = mem_cgroup_get_all_zonestat(mem_cont, 967 unsigned long unevictable;
1018 MEM_CGROUP_ZSTAT_INACTIVE); 968
1019 active = mem_cgroup_get_all_zonestat(mem_cont, 969 inactive_anon = mem_cgroup_get_all_zonestat(mem_cont,
1020 MEM_CGROUP_ZSTAT_ACTIVE); 970 LRU_INACTIVE_ANON);
1021 cb->fill(cb, "active", (active) * PAGE_SIZE); 971 active_anon = mem_cgroup_get_all_zonestat(mem_cont,
1022 cb->fill(cb, "inactive", (inactive) * PAGE_SIZE); 972 LRU_ACTIVE_ANON);
973 inactive_file = mem_cgroup_get_all_zonestat(mem_cont,
974 LRU_INACTIVE_FILE);
975 active_file = mem_cgroup_get_all_zonestat(mem_cont,
976 LRU_ACTIVE_FILE);
977 unevictable = mem_cgroup_get_all_zonestat(mem_cont,
978 LRU_UNEVICTABLE);
979
980 cb->fill(cb, "active_anon", (active_anon) * PAGE_SIZE);
981 cb->fill(cb, "inactive_anon", (inactive_anon) * PAGE_SIZE);
982 cb->fill(cb, "active_file", (active_file) * PAGE_SIZE);
983 cb->fill(cb, "inactive_file", (inactive_file) * PAGE_SIZE);
984 cb->fill(cb, "unevictable", unevictable * PAGE_SIZE);
985
1023 } 986 }
1024 return 0; 987 return 0;
1025} 988}
@@ -1062,6 +1025,7 @@ static int alloc_mem_cgroup_per_zone_info(struct mem_cgroup *mem, int node)
1062{ 1025{
1063 struct mem_cgroup_per_node *pn; 1026 struct mem_cgroup_per_node *pn;
1064 struct mem_cgroup_per_zone *mz; 1027 struct mem_cgroup_per_zone *mz;
1028 enum lru_list l;
1065 int zone, tmp = node; 1029 int zone, tmp = node;
1066 /* 1030 /*
1067 * This routine is called against possible nodes. 1031 * This routine is called against possible nodes.
@@ -1082,9 +1046,9 @@ static int alloc_mem_cgroup_per_zone_info(struct mem_cgroup *mem, int node)
1082 1046
1083 for (zone = 0; zone < MAX_NR_ZONES; zone++) { 1047 for (zone = 0; zone < MAX_NR_ZONES; zone++) {
1084 mz = &pn->zoneinfo[zone]; 1048 mz = &pn->zoneinfo[zone];
1085 INIT_LIST_HEAD(&mz->active_list);
1086 INIT_LIST_HEAD(&mz->inactive_list);
1087 spin_lock_init(&mz->lru_lock); 1049 spin_lock_init(&mz->lru_lock);
1050 for_each_lru(l)
1051 INIT_LIST_HEAD(&mz->lists[l]);
1088 } 1052 }
1089 return 0; 1053 return 0;
1090} 1054}
@@ -1124,8 +1088,8 @@ mem_cgroup_create(struct cgroup_subsys *ss, struct cgroup *cont)
1124 int node; 1088 int node;
1125 1089
1126 if (unlikely((cont->parent) == NULL)) { 1090 if (unlikely((cont->parent) == NULL)) {
1091 page_cgroup_init();
1127 mem = &init_mem_cgroup; 1092 mem = &init_mem_cgroup;
1128 page_cgroup_cache = KMEM_CACHE(page_cgroup, SLAB_PANIC);
1129 } else { 1093 } else {
1130 mem = mem_cgroup_alloc(); 1094 mem = mem_cgroup_alloc();
1131 if (!mem) 1095 if (!mem)
diff --git a/mm/memory.c b/mm/memory.c
index 1002f473f497..3a6c4a658325 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -64,6 +64,8 @@
64 64
65#include "internal.h" 65#include "internal.h"
66 66
67#include "internal.h"
68
67#ifndef CONFIG_NEED_MULTIPLE_NODES 69#ifndef CONFIG_NEED_MULTIPLE_NODES
68/* use the per-pgdat data instead for discontigmem - mbligh */ 70/* use the per-pgdat data instead for discontigmem - mbligh */
69unsigned long max_mapnr; 71unsigned long max_mapnr;
@@ -1129,12 +1131,17 @@ static inline int use_zero_page(struct vm_area_struct *vma)
1129 return !vma->vm_ops || !vma->vm_ops->fault; 1131 return !vma->vm_ops || !vma->vm_ops->fault;
1130} 1132}
1131 1133
1132int get_user_pages(struct task_struct *tsk, struct mm_struct *mm, 1134
1133 unsigned long start, int len, int write, int force, 1135
1136int __get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
1137 unsigned long start, int len, int flags,
1134 struct page **pages, struct vm_area_struct **vmas) 1138 struct page **pages, struct vm_area_struct **vmas)
1135{ 1139{
1136 int i; 1140 int i;
1137 unsigned int vm_flags; 1141 unsigned int vm_flags = 0;
1142 int write = !!(flags & GUP_FLAGS_WRITE);
1143 int force = !!(flags & GUP_FLAGS_FORCE);
1144 int ignore = !!(flags & GUP_FLAGS_IGNORE_VMA_PERMISSIONS);
1138 1145
1139 if (len <= 0) 1146 if (len <= 0)
1140 return 0; 1147 return 0;
@@ -1158,7 +1165,9 @@ int get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
1158 pud_t *pud; 1165 pud_t *pud;
1159 pmd_t *pmd; 1166 pmd_t *pmd;
1160 pte_t *pte; 1167 pte_t *pte;
1161 if (write) /* user gate pages are read-only */ 1168
1169 /* user gate pages are read-only */
1170 if (!ignore && write)
1162 return i ? : -EFAULT; 1171 return i ? : -EFAULT;
1163 if (pg > TASK_SIZE) 1172 if (pg > TASK_SIZE)
1164 pgd = pgd_offset_k(pg); 1173 pgd = pgd_offset_k(pg);
@@ -1190,8 +1199,9 @@ int get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
1190 continue; 1199 continue;
1191 } 1200 }
1192 1201
1193 if (!vma || (vma->vm_flags & (VM_IO | VM_PFNMAP)) 1202 if (!vma ||
1194 || !(vm_flags & vma->vm_flags)) 1203 (vma->vm_flags & (VM_IO | VM_PFNMAP)) ||
1204 (!ignore && !(vm_flags & vma->vm_flags)))
1195 return i ? : -EFAULT; 1205 return i ? : -EFAULT;
1196 1206
1197 if (is_vm_hugetlb_page(vma)) { 1207 if (is_vm_hugetlb_page(vma)) {
@@ -1266,6 +1276,23 @@ int get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
1266 } while (len); 1276 } while (len);
1267 return i; 1277 return i;
1268} 1278}
1279
1280int get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
1281 unsigned long start, int len, int write, int force,
1282 struct page **pages, struct vm_area_struct **vmas)
1283{
1284 int flags = 0;
1285
1286 if (write)
1287 flags |= GUP_FLAGS_WRITE;
1288 if (force)
1289 flags |= GUP_FLAGS_FORCE;
1290
1291 return __get_user_pages(tsk, mm,
1292 start, len, flags,
1293 pages, vmas);
1294}
1295
1269EXPORT_SYMBOL(get_user_pages); 1296EXPORT_SYMBOL(get_user_pages);
1270 1297
1271pte_t *get_locked_pte(struct mm_struct *mm, unsigned long addr, 1298pte_t *get_locked_pte(struct mm_struct *mm, unsigned long addr,
@@ -1296,18 +1323,14 @@ static int insert_page(struct vm_area_struct *vma, unsigned long addr,
1296 pte_t *pte; 1323 pte_t *pte;
1297 spinlock_t *ptl; 1324 spinlock_t *ptl;
1298 1325
1299 retval = mem_cgroup_charge(page, mm, GFP_KERNEL);
1300 if (retval)
1301 goto out;
1302
1303 retval = -EINVAL; 1326 retval = -EINVAL;
1304 if (PageAnon(page)) 1327 if (PageAnon(page))
1305 goto out_uncharge; 1328 goto out;
1306 retval = -ENOMEM; 1329 retval = -ENOMEM;
1307 flush_dcache_page(page); 1330 flush_dcache_page(page);
1308 pte = get_locked_pte(mm, addr, &ptl); 1331 pte = get_locked_pte(mm, addr, &ptl);
1309 if (!pte) 1332 if (!pte)
1310 goto out_uncharge; 1333 goto out;
1311 retval = -EBUSY; 1334 retval = -EBUSY;
1312 if (!pte_none(*pte)) 1335 if (!pte_none(*pte))
1313 goto out_unlock; 1336 goto out_unlock;
@@ -1323,8 +1346,6 @@ static int insert_page(struct vm_area_struct *vma, unsigned long addr,
1323 return retval; 1346 return retval;
1324out_unlock: 1347out_unlock:
1325 pte_unmap_unlock(pte, ptl); 1348 pte_unmap_unlock(pte, ptl);
1326out_uncharge:
1327 mem_cgroup_uncharge_page(page);
1328out: 1349out:
1329 return retval; 1350 return retval;
1330} 1351}
@@ -1858,6 +1879,15 @@ gotten:
1858 new_page = alloc_page_vma(GFP_HIGHUSER_MOVABLE, vma, address); 1879 new_page = alloc_page_vma(GFP_HIGHUSER_MOVABLE, vma, address);
1859 if (!new_page) 1880 if (!new_page)
1860 goto oom; 1881 goto oom;
1882 /*
1883 * Don't let another task, with possibly unlocked vma,
1884 * keep the mlocked page.
1885 */
1886 if (vma->vm_flags & VM_LOCKED) {
1887 lock_page(old_page); /* for LRU manipulation */
1888 clear_page_mlock(old_page);
1889 unlock_page(old_page);
1890 }
1861 cow_user_page(new_page, old_page, address, vma); 1891 cow_user_page(new_page, old_page, address, vma);
1862 __SetPageUptodate(new_page); 1892 __SetPageUptodate(new_page);
1863 1893
@@ -1886,11 +1916,13 @@ gotten:
1886 * thread doing COW. 1916 * thread doing COW.
1887 */ 1917 */
1888 ptep_clear_flush_notify(vma, address, page_table); 1918 ptep_clear_flush_notify(vma, address, page_table);
1889 set_pte_at(mm, address, page_table, entry); 1919 SetPageSwapBacked(new_page);
1890 update_mmu_cache(vma, address, entry); 1920 lru_cache_add_active_or_unevictable(new_page, vma);
1891 lru_cache_add_active(new_page);
1892 page_add_new_anon_rmap(new_page, vma, address); 1921 page_add_new_anon_rmap(new_page, vma, address);
1893 1922
1923//TODO: is this safe? do_anonymous_page() does it this way.
1924 set_pte_at(mm, address, page_table, entry);
1925 update_mmu_cache(vma, address, entry);
1894 if (old_page) { 1926 if (old_page) {
1895 /* 1927 /*
1896 * Only after switching the pte to the new page may 1928 * Only after switching the pte to the new page may
@@ -2288,16 +2320,17 @@ static int do_swap_page(struct mm_struct *mm, struct vm_area_struct *vma,
2288 count_vm_event(PGMAJFAULT); 2320 count_vm_event(PGMAJFAULT);
2289 } 2321 }
2290 2322
2323 mark_page_accessed(page);
2324
2325 lock_page(page);
2326 delayacct_clear_flag(DELAYACCT_PF_SWAPIN);
2327
2291 if (mem_cgroup_charge(page, mm, GFP_KERNEL)) { 2328 if (mem_cgroup_charge(page, mm, GFP_KERNEL)) {
2292 delayacct_clear_flag(DELAYACCT_PF_SWAPIN);
2293 ret = VM_FAULT_OOM; 2329 ret = VM_FAULT_OOM;
2330 unlock_page(page);
2294 goto out; 2331 goto out;
2295 } 2332 }
2296 2333
2297 mark_page_accessed(page);
2298 lock_page(page);
2299 delayacct_clear_flag(DELAYACCT_PF_SWAPIN);
2300
2301 /* 2334 /*
2302 * Back out if somebody else already faulted in this pte. 2335 * Back out if somebody else already faulted in this pte.
2303 */ 2336 */
@@ -2324,7 +2357,7 @@ static int do_swap_page(struct mm_struct *mm, struct vm_area_struct *vma,
2324 page_add_anon_rmap(page, vma, address); 2357 page_add_anon_rmap(page, vma, address);
2325 2358
2326 swap_free(entry); 2359 swap_free(entry);
2327 if (vm_swap_full()) 2360 if (vm_swap_full() || (vma->vm_flags & VM_LOCKED) || PageMlocked(page))
2328 remove_exclusive_swap_page(page); 2361 remove_exclusive_swap_page(page);
2329 unlock_page(page); 2362 unlock_page(page);
2330 2363
@@ -2382,7 +2415,8 @@ static int do_anonymous_page(struct mm_struct *mm, struct vm_area_struct *vma,
2382 if (!pte_none(*page_table)) 2415 if (!pte_none(*page_table))
2383 goto release; 2416 goto release;
2384 inc_mm_counter(mm, anon_rss); 2417 inc_mm_counter(mm, anon_rss);
2385 lru_cache_add_active(page); 2418 SetPageSwapBacked(page);
2419 lru_cache_add_active_or_unevictable(page, vma);
2386 page_add_new_anon_rmap(page, vma, address); 2420 page_add_new_anon_rmap(page, vma, address);
2387 set_pte_at(mm, address, page_table, entry); 2421 set_pte_at(mm, address, page_table, entry);
2388 2422
@@ -2423,6 +2457,7 @@ static int __do_fault(struct mm_struct *mm, struct vm_area_struct *vma,
2423 struct page *page; 2457 struct page *page;
2424 pte_t entry; 2458 pte_t entry;
2425 int anon = 0; 2459 int anon = 0;
2460 int charged = 0;
2426 struct page *dirty_page = NULL; 2461 struct page *dirty_page = NULL;
2427 struct vm_fault vmf; 2462 struct vm_fault vmf;
2428 int ret; 2463 int ret;
@@ -2463,6 +2498,18 @@ static int __do_fault(struct mm_struct *mm, struct vm_area_struct *vma,
2463 ret = VM_FAULT_OOM; 2498 ret = VM_FAULT_OOM;
2464 goto out; 2499 goto out;
2465 } 2500 }
2501 if (mem_cgroup_charge(page, mm, GFP_KERNEL)) {
2502 ret = VM_FAULT_OOM;
2503 page_cache_release(page);
2504 goto out;
2505 }
2506 charged = 1;
2507 /*
2508 * Don't let another task, with possibly unlocked vma,
2509 * keep the mlocked page.
2510 */
2511 if (vma->vm_flags & VM_LOCKED)
2512 clear_page_mlock(vmf.page);
2466 copy_user_highpage(page, vmf.page, address, vma); 2513 copy_user_highpage(page, vmf.page, address, vma);
2467 __SetPageUptodate(page); 2514 __SetPageUptodate(page);
2468 } else { 2515 } else {
@@ -2497,11 +2544,6 @@ static int __do_fault(struct mm_struct *mm, struct vm_area_struct *vma,
2497 2544
2498 } 2545 }
2499 2546
2500 if (mem_cgroup_charge(page, mm, GFP_KERNEL)) {
2501 ret = VM_FAULT_OOM;
2502 goto out;
2503 }
2504
2505 page_table = pte_offset_map_lock(mm, pmd, address, &ptl); 2547 page_table = pte_offset_map_lock(mm, pmd, address, &ptl);
2506 2548
2507 /* 2549 /*
@@ -2520,11 +2562,11 @@ static int __do_fault(struct mm_struct *mm, struct vm_area_struct *vma,
2520 entry = mk_pte(page, vma->vm_page_prot); 2562 entry = mk_pte(page, vma->vm_page_prot);
2521 if (flags & FAULT_FLAG_WRITE) 2563 if (flags & FAULT_FLAG_WRITE)
2522 entry = maybe_mkwrite(pte_mkdirty(entry), vma); 2564 entry = maybe_mkwrite(pte_mkdirty(entry), vma);
2523 set_pte_at(mm, address, page_table, entry);
2524 if (anon) { 2565 if (anon) {
2525 inc_mm_counter(mm, anon_rss); 2566 inc_mm_counter(mm, anon_rss);
2526 lru_cache_add_active(page); 2567 SetPageSwapBacked(page);
2527 page_add_new_anon_rmap(page, vma, address); 2568 lru_cache_add_active_or_unevictable(page, vma);
2569 page_add_new_anon_rmap(page, vma, address);
2528 } else { 2570 } else {
2529 inc_mm_counter(mm, file_rss); 2571 inc_mm_counter(mm, file_rss);
2530 page_add_file_rmap(page); 2572 page_add_file_rmap(page);
@@ -2533,11 +2575,14 @@ static int __do_fault(struct mm_struct *mm, struct vm_area_struct *vma,
2533 get_page(dirty_page); 2575 get_page(dirty_page);
2534 } 2576 }
2535 } 2577 }
2578//TODO: is this safe? do_anonymous_page() does it this way.
2579 set_pte_at(mm, address, page_table, entry);
2536 2580
2537 /* no need to invalidate: a not-present page won't be cached */ 2581 /* no need to invalidate: a not-present page won't be cached */
2538 update_mmu_cache(vma, address, entry); 2582 update_mmu_cache(vma, address, entry);
2539 } else { 2583 } else {
2540 mem_cgroup_uncharge_page(page); 2584 if (charged)
2585 mem_cgroup_uncharge_page(page);
2541 if (anon) 2586 if (anon)
2542 page_cache_release(page); 2587 page_cache_release(page);
2543 else 2588 else
@@ -2772,19 +2817,9 @@ int make_pages_present(unsigned long addr, unsigned long end)
2772 len = DIV_ROUND_UP(end, PAGE_SIZE) - addr/PAGE_SIZE; 2817 len = DIV_ROUND_UP(end, PAGE_SIZE) - addr/PAGE_SIZE;
2773 ret = get_user_pages(current, current->mm, addr, 2818 ret = get_user_pages(current, current->mm, addr,
2774 len, write, 0, NULL, NULL); 2819 len, write, 0, NULL, NULL);
2775 if (ret < 0) { 2820 if (ret < 0)
2776 /*
2777 SUS require strange return value to mlock
2778 - invalid addr generate to ENOMEM.
2779 - out of memory should generate EAGAIN.
2780 */
2781 if (ret == -EFAULT)
2782 ret = -ENOMEM;
2783 else if (ret == -ENOMEM)
2784 ret = -EAGAIN;
2785 return ret; 2821 return ret;
2786 } 2822 return ret == len ? 0 : -EFAULT;
2787 return ret == len ? 0 : -ENOMEM;
2788} 2823}
2789 2824
2790#if !defined(__HAVE_ARCH_GATE_AREA) 2825#if !defined(__HAVE_ARCH_GATE_AREA)
diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
index 89fee2dcb039..6837a1014372 100644
--- a/mm/memory_hotplug.c
+++ b/mm/memory_hotplug.c
@@ -26,6 +26,7 @@
26#include <linux/delay.h> 26#include <linux/delay.h>
27#include <linux/migrate.h> 27#include <linux/migrate.h>
28#include <linux/page-isolation.h> 28#include <linux/page-isolation.h>
29#include <linux/pfn.h>
29 30
30#include <asm/tlbflush.h> 31#include <asm/tlbflush.h>
31 32
@@ -323,11 +324,11 @@ int __remove_pages(struct zone *zone, unsigned long phys_start_pfn,
323 BUG_ON(phys_start_pfn & ~PAGE_SECTION_MASK); 324 BUG_ON(phys_start_pfn & ~PAGE_SECTION_MASK);
324 BUG_ON(nr_pages % PAGES_PER_SECTION); 325 BUG_ON(nr_pages % PAGES_PER_SECTION);
325 326
326 release_mem_region(phys_start_pfn << PAGE_SHIFT, nr_pages * PAGE_SIZE);
327
328 sections_to_remove = nr_pages / PAGES_PER_SECTION; 327 sections_to_remove = nr_pages / PAGES_PER_SECTION;
329 for (i = 0; i < sections_to_remove; i++) { 328 for (i = 0; i < sections_to_remove; i++) {
330 unsigned long pfn = phys_start_pfn + i*PAGES_PER_SECTION; 329 unsigned long pfn = phys_start_pfn + i*PAGES_PER_SECTION;
330 release_mem_region(pfn << PAGE_SHIFT,
331 PAGES_PER_SECTION << PAGE_SHIFT);
331 ret = __remove_section(zone, __pfn_to_section(pfn)); 332 ret = __remove_section(zone, __pfn_to_section(pfn));
332 if (ret) 333 if (ret)
333 break; 334 break;
@@ -657,8 +658,9 @@ do_migrate_range(unsigned long start_pfn, unsigned long end_pfn)
657 * We can skip free pages. And we can only deal with pages on 658 * We can skip free pages. And we can only deal with pages on
658 * LRU. 659 * LRU.
659 */ 660 */
660 ret = isolate_lru_page(page, &source); 661 ret = isolate_lru_page(page);
661 if (!ret) { /* Success */ 662 if (!ret) { /* Success */
663 list_add_tail(&page->lru, &source);
662 move_pages--; 664 move_pages--;
663 } else { 665 } else {
664 /* Becasue we don't have big zone->lock. we should 666 /* Becasue we don't have big zone->lock. we should
@@ -849,10 +851,19 @@ failed_removal:
849 851
850 return ret; 852 return ret;
851} 853}
854
855int remove_memory(u64 start, u64 size)
856{
857 unsigned long start_pfn, end_pfn;
858
859 start_pfn = PFN_DOWN(start);
860 end_pfn = start_pfn + PFN_DOWN(size);
861 return offline_pages(start_pfn, end_pfn, 120 * HZ);
862}
852#else 863#else
853int remove_memory(u64 start, u64 size) 864int remove_memory(u64 start, u64 size)
854{ 865{
855 return -EINVAL; 866 return -EINVAL;
856} 867}
857EXPORT_SYMBOL_GPL(remove_memory);
858#endif /* CONFIG_MEMORY_HOTREMOVE */ 868#endif /* CONFIG_MEMORY_HOTREMOVE */
869EXPORT_SYMBOL_GPL(remove_memory);
diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index 83369058ec13..36f42573a335 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -93,6 +93,8 @@
93#include <asm/tlbflush.h> 93#include <asm/tlbflush.h>
94#include <asm/uaccess.h> 94#include <asm/uaccess.h>
95 95
96#include "internal.h"
97
96/* Internal flags */ 98/* Internal flags */
97#define MPOL_MF_DISCONTIG_OK (MPOL_MF_INTERNAL << 0) /* Skip checks for continuous vmas */ 99#define MPOL_MF_DISCONTIG_OK (MPOL_MF_INTERNAL << 0) /* Skip checks for continuous vmas */
98#define MPOL_MF_INVERT (MPOL_MF_INTERNAL << 1) /* Invert check for nodemask */ 100#define MPOL_MF_INVERT (MPOL_MF_INTERNAL << 1) /* Invert check for nodemask */
@@ -762,8 +764,11 @@ static void migrate_page_add(struct page *page, struct list_head *pagelist,
762 /* 764 /*
763 * Avoid migrating a page that is shared with others. 765 * Avoid migrating a page that is shared with others.
764 */ 766 */
765 if ((flags & MPOL_MF_MOVE_ALL) || page_mapcount(page) == 1) 767 if ((flags & MPOL_MF_MOVE_ALL) || page_mapcount(page) == 1) {
766 isolate_lru_page(page, pagelist); 768 if (!isolate_lru_page(page)) {
769 list_add_tail(&page->lru, pagelist);
770 }
771 }
767} 772}
768 773
769static struct page *new_node_page(struct page *page, unsigned long node, int **x) 774static struct page *new_node_page(struct page *page, unsigned long node, int **x)
@@ -2197,7 +2202,7 @@ static void gather_stats(struct page *page, void *private, int pte_dirty)
2197 if (PageSwapCache(page)) 2202 if (PageSwapCache(page))
2198 md->swapcache++; 2203 md->swapcache++;
2199 2204
2200 if (PageActive(page)) 2205 if (PageActive(page) || PageUnevictable(page))
2201 md->active++; 2206 md->active++;
2202 2207
2203 if (PageWriteback(page)) 2208 if (PageWriteback(page))
diff --git a/mm/migrate.c b/mm/migrate.c
index 2a80136b23bb..6602941bfab0 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -37,36 +37,6 @@
37#define lru_to_page(_head) (list_entry((_head)->prev, struct page, lru)) 37#define lru_to_page(_head) (list_entry((_head)->prev, struct page, lru))
38 38
39/* 39/*
40 * Isolate one page from the LRU lists. If successful put it onto
41 * the indicated list with elevated page count.
42 *
43 * Result:
44 * -EBUSY: page not on LRU list
45 * 0: page removed from LRU list and added to the specified list.
46 */
47int isolate_lru_page(struct page *page, struct list_head *pagelist)
48{
49 int ret = -EBUSY;
50
51 if (PageLRU(page)) {
52 struct zone *zone = page_zone(page);
53
54 spin_lock_irq(&zone->lru_lock);
55 if (PageLRU(page) && get_page_unless_zero(page)) {
56 ret = 0;
57 ClearPageLRU(page);
58 if (PageActive(page))
59 del_page_from_active_list(zone, page);
60 else
61 del_page_from_inactive_list(zone, page);
62 list_add_tail(&page->lru, pagelist);
63 }
64 spin_unlock_irq(&zone->lru_lock);
65 }
66 return ret;
67}
68
69/*
70 * migrate_prep() needs to be called before we start compiling a list of pages 40 * migrate_prep() needs to be called before we start compiling a list of pages
71 * to be migrated using isolate_lru_page(). 41 * to be migrated using isolate_lru_page().
72 */ 42 */
@@ -83,23 +53,9 @@ int migrate_prep(void)
83 return 0; 53 return 0;
84} 54}
85 55
86static inline void move_to_lru(struct page *page)
87{
88 if (PageActive(page)) {
89 /*
90 * lru_cache_add_active checks that
91 * the PG_active bit is off.
92 */
93 ClearPageActive(page);
94 lru_cache_add_active(page);
95 } else {
96 lru_cache_add(page);
97 }
98 put_page(page);
99}
100
101/* 56/*
102 * Add isolated pages on the list back to the LRU. 57 * Add isolated pages on the list back to the LRU under page lock
58 * to avoid leaking evictable pages back onto unevictable list.
103 * 59 *
104 * returns the number of pages put back. 60 * returns the number of pages put back.
105 */ 61 */
@@ -111,7 +67,7 @@ int putback_lru_pages(struct list_head *l)
111 67
112 list_for_each_entry_safe(page, page2, l, lru) { 68 list_for_each_entry_safe(page, page2, l, lru) {
113 list_del(&page->lru); 69 list_del(&page->lru);
114 move_to_lru(page); 70 putback_lru_page(page);
115 count++; 71 count++;
116 } 72 }
117 return count; 73 return count;
@@ -374,8 +330,6 @@ static int migrate_page_move_mapping(struct address_space *mapping,
374 __inc_zone_page_state(newpage, NR_FILE_PAGES); 330 __inc_zone_page_state(newpage, NR_FILE_PAGES);
375 331
376 spin_unlock_irq(&mapping->tree_lock); 332 spin_unlock_irq(&mapping->tree_lock);
377 if (!PageSwapCache(newpage))
378 mem_cgroup_uncharge_cache_page(page);
379 333
380 return 0; 334 return 0;
381} 335}
@@ -385,6 +339,8 @@ static int migrate_page_move_mapping(struct address_space *mapping,
385 */ 339 */
386static void migrate_page_copy(struct page *newpage, struct page *page) 340static void migrate_page_copy(struct page *newpage, struct page *page)
387{ 341{
342 int anon;
343
388 copy_highpage(newpage, page); 344 copy_highpage(newpage, page);
389 345
390 if (PageError(page)) 346 if (PageError(page))
@@ -393,8 +349,11 @@ static void migrate_page_copy(struct page *newpage, struct page *page)
393 SetPageReferenced(newpage); 349 SetPageReferenced(newpage);
394 if (PageUptodate(page)) 350 if (PageUptodate(page))
395 SetPageUptodate(newpage); 351 SetPageUptodate(newpage);
396 if (PageActive(page)) 352 if (TestClearPageActive(page)) {
353 VM_BUG_ON(PageUnevictable(page));
397 SetPageActive(newpage); 354 SetPageActive(newpage);
355 } else
356 unevictable_migrate_page(newpage, page);
398 if (PageChecked(page)) 357 if (PageChecked(page))
399 SetPageChecked(newpage); 358 SetPageChecked(newpage);
400 if (PageMappedToDisk(page)) 359 if (PageMappedToDisk(page))
@@ -412,14 +371,20 @@ static void migrate_page_copy(struct page *newpage, struct page *page)
412 __set_page_dirty_nobuffers(newpage); 371 __set_page_dirty_nobuffers(newpage);
413 } 372 }
414 373
374 mlock_migrate_page(newpage, page);
375
415#ifdef CONFIG_SWAP 376#ifdef CONFIG_SWAP
416 ClearPageSwapCache(page); 377 ClearPageSwapCache(page);
417#endif 378#endif
418 ClearPageActive(page);
419 ClearPagePrivate(page); 379 ClearPagePrivate(page);
420 set_page_private(page, 0); 380 set_page_private(page, 0);
381 /* page->mapping contains a flag for PageAnon() */
382 anon = PageAnon(page);
421 page->mapping = NULL; 383 page->mapping = NULL;
422 384
385 if (!anon) /* This page was removed from radix-tree. */
386 mem_cgroup_uncharge_cache_page(page);
387
423 /* 388 /*
424 * If any waiters have accumulated on the new page then 389 * If any waiters have accumulated on the new page then
425 * wake them up. 390 * wake them up.
@@ -594,6 +559,10 @@ static int fallback_migrate_page(struct address_space *mapping,
594 * 559 *
595 * The new page will have replaced the old page if this function 560 * The new page will have replaced the old page if this function
596 * is successful. 561 * is successful.
562 *
563 * Return value:
564 * < 0 - error code
565 * == 0 - success
597 */ 566 */
598static int move_to_new_page(struct page *newpage, struct page *page) 567static int move_to_new_page(struct page *newpage, struct page *page)
599{ 568{
@@ -611,6 +580,8 @@ static int move_to_new_page(struct page *newpage, struct page *page)
611 /* Prepare mapping for the new page.*/ 580 /* Prepare mapping for the new page.*/
612 newpage->index = page->index; 581 newpage->index = page->index;
613 newpage->mapping = page->mapping; 582 newpage->mapping = page->mapping;
583 if (PageSwapBacked(page))
584 SetPageSwapBacked(newpage);
614 585
615 mapping = page_mapping(page); 586 mapping = page_mapping(page);
616 if (!mapping) 587 if (!mapping)
@@ -654,9 +625,10 @@ static int unmap_and_move(new_page_t get_new_page, unsigned long private,
654 if (!newpage) 625 if (!newpage)
655 return -ENOMEM; 626 return -ENOMEM;
656 627
657 if (page_count(page) == 1) 628 if (page_count(page) == 1) {
658 /* page was freed from under us. So we are done. */ 629 /* page was freed from under us. So we are done. */
659 goto move_newpage; 630 goto move_newpage;
631 }
660 632
661 charge = mem_cgroup_prepare_migration(page, newpage); 633 charge = mem_cgroup_prepare_migration(page, newpage);
662 if (charge == -ENOMEM) { 634 if (charge == -ENOMEM) {
@@ -730,7 +702,6 @@ rcu_unlock:
730 rcu_read_unlock(); 702 rcu_read_unlock();
731 703
732unlock: 704unlock:
733
734 unlock_page(page); 705 unlock_page(page);
735 706
736 if (rc != -EAGAIN) { 707 if (rc != -EAGAIN) {
@@ -741,17 +712,19 @@ unlock:
741 * restored. 712 * restored.
742 */ 713 */
743 list_del(&page->lru); 714 list_del(&page->lru);
744 move_to_lru(page); 715 putback_lru_page(page);
745 } 716 }
746 717
747move_newpage: 718move_newpage:
748 if (!charge) 719 if (!charge)
749 mem_cgroup_end_migration(newpage); 720 mem_cgroup_end_migration(newpage);
721
750 /* 722 /*
751 * Move the new page to the LRU. If migration was not successful 723 * Move the new page to the LRU. If migration was not successful
752 * then this will free the page. 724 * then this will free the page.
753 */ 725 */
754 move_to_lru(newpage); 726 putback_lru_page(newpage);
727
755 if (result) { 728 if (result) {
756 if (rc) 729 if (rc)
757 *result = rc; 730 *result = rc;
@@ -858,9 +831,11 @@ static struct page *new_page_node(struct page *p, unsigned long private,
858 * Move a set of pages as indicated in the pm array. The addr 831 * Move a set of pages as indicated in the pm array. The addr
859 * field must be set to the virtual address of the page to be moved 832 * field must be set to the virtual address of the page to be moved
860 * and the node number must contain a valid target node. 833 * and the node number must contain a valid target node.
834 * The pm array ends with node = MAX_NUMNODES.
861 */ 835 */
862static int do_move_pages(struct mm_struct *mm, struct page_to_node *pm, 836static int do_move_page_to_node_array(struct mm_struct *mm,
863 int migrate_all) 837 struct page_to_node *pm,
838 int migrate_all)
864{ 839{
865 int err; 840 int err;
866 struct page_to_node *pp; 841 struct page_to_node *pp;
@@ -914,7 +889,9 @@ static int do_move_pages(struct mm_struct *mm, struct page_to_node *pm,
914 !migrate_all) 889 !migrate_all)
915 goto put_and_set; 890 goto put_and_set;
916 891
917 err = isolate_lru_page(page, &pagelist); 892 err = isolate_lru_page(page);
893 if (!err)
894 list_add_tail(&page->lru, &pagelist);
918put_and_set: 895put_and_set:
919 /* 896 /*
920 * Either remove the duplicate refcount from 897 * Either remove the duplicate refcount from
@@ -926,36 +903,118 @@ set_status:
926 pp->status = err; 903 pp->status = err;
927 } 904 }
928 905
906 err = 0;
929 if (!list_empty(&pagelist)) 907 if (!list_empty(&pagelist))
930 err = migrate_pages(&pagelist, new_page_node, 908 err = migrate_pages(&pagelist, new_page_node,
931 (unsigned long)pm); 909 (unsigned long)pm);
932 else
933 err = -ENOENT;
934 910
935 up_read(&mm->mmap_sem); 911 up_read(&mm->mmap_sem);
936 return err; 912 return err;
937} 913}
938 914
939/* 915/*
940 * Determine the nodes of a list of pages. The addr in the pm array 916 * Migrate an array of page address onto an array of nodes and fill
941 * must have been set to the virtual address of which we want to determine 917 * the corresponding array of status.
942 * the node number.
943 */ 918 */
944static int do_pages_stat(struct mm_struct *mm, struct page_to_node *pm) 919static int do_pages_move(struct mm_struct *mm, struct task_struct *task,
920 unsigned long nr_pages,
921 const void __user * __user *pages,
922 const int __user *nodes,
923 int __user *status, int flags)
945{ 924{
925 struct page_to_node *pm = NULL;
926 nodemask_t task_nodes;
927 int err = 0;
928 int i;
929
930 task_nodes = cpuset_mems_allowed(task);
931
932 /* Limit nr_pages so that the multiplication may not overflow */
933 if (nr_pages >= ULONG_MAX / sizeof(struct page_to_node) - 1) {
934 err = -E2BIG;
935 goto out;
936 }
937
938 pm = vmalloc((nr_pages + 1) * sizeof(struct page_to_node));
939 if (!pm) {
940 err = -ENOMEM;
941 goto out;
942 }
943
944 /*
945 * Get parameters from user space and initialize the pm
946 * array. Return various errors if the user did something wrong.
947 */
948 for (i = 0; i < nr_pages; i++) {
949 const void __user *p;
950
951 err = -EFAULT;
952 if (get_user(p, pages + i))
953 goto out_pm;
954
955 pm[i].addr = (unsigned long)p;
956 if (nodes) {
957 int node;
958
959 if (get_user(node, nodes + i))
960 goto out_pm;
961
962 err = -ENODEV;
963 if (!node_state(node, N_HIGH_MEMORY))
964 goto out_pm;
965
966 err = -EACCES;
967 if (!node_isset(node, task_nodes))
968 goto out_pm;
969
970 pm[i].node = node;
971 } else
972 pm[i].node = 0; /* anything to not match MAX_NUMNODES */
973 }
974 /* End marker */
975 pm[nr_pages].node = MAX_NUMNODES;
976
977 err = do_move_page_to_node_array(mm, pm, flags & MPOL_MF_MOVE_ALL);
978 if (err >= 0)
979 /* Return status information */
980 for (i = 0; i < nr_pages; i++)
981 if (put_user(pm[i].status, status + i))
982 err = -EFAULT;
983
984out_pm:
985 vfree(pm);
986out:
987 return err;
988}
989
990/*
991 * Determine the nodes of an array of pages and store it in an array of status.
992 */
993static int do_pages_stat(struct mm_struct *mm, unsigned long nr_pages,
994 const void __user * __user *pages,
995 int __user *status)
996{
997 unsigned long i;
998 int err;
999
946 down_read(&mm->mmap_sem); 1000 down_read(&mm->mmap_sem);
947 1001
948 for ( ; pm->node != MAX_NUMNODES; pm++) { 1002 for (i = 0; i < nr_pages; i++) {
1003 const void __user *p;
1004 unsigned long addr;
949 struct vm_area_struct *vma; 1005 struct vm_area_struct *vma;
950 struct page *page; 1006 struct page *page;
951 int err;
952 1007
953 err = -EFAULT; 1008 err = -EFAULT;
954 vma = find_vma(mm, pm->addr); 1009 if (get_user(p, pages+i))
1010 goto out;
1011 addr = (unsigned long) p;
1012
1013 vma = find_vma(mm, addr);
955 if (!vma) 1014 if (!vma)
956 goto set_status; 1015 goto set_status;
957 1016
958 page = follow_page(vma, pm->addr, 0); 1017 page = follow_page(vma, addr, 0);
959 1018
960 err = PTR_ERR(page); 1019 err = PTR_ERR(page);
961 if (IS_ERR(page)) 1020 if (IS_ERR(page))
@@ -968,11 +1027,13 @@ static int do_pages_stat(struct mm_struct *mm, struct page_to_node *pm)
968 1027
969 err = page_to_nid(page); 1028 err = page_to_nid(page);
970set_status: 1029set_status:
971 pm->status = err; 1030 put_user(err, status+i);
972 } 1031 }
1032 err = 0;
973 1033
1034out:
974 up_read(&mm->mmap_sem); 1035 up_read(&mm->mmap_sem);
975 return 0; 1036 return err;
976} 1037}
977 1038
978/* 1039/*
@@ -984,12 +1045,9 @@ asmlinkage long sys_move_pages(pid_t pid, unsigned long nr_pages,
984 const int __user *nodes, 1045 const int __user *nodes,
985 int __user *status, int flags) 1046 int __user *status, int flags)
986{ 1047{
987 int err = 0;
988 int i;
989 struct task_struct *task; 1048 struct task_struct *task;
990 nodemask_t task_nodes;
991 struct mm_struct *mm; 1049 struct mm_struct *mm;
992 struct page_to_node *pm = NULL; 1050 int err;
993 1051
994 /* Check flags */ 1052 /* Check flags */
995 if (flags & ~(MPOL_MF_MOVE|MPOL_MF_MOVE_ALL)) 1053 if (flags & ~(MPOL_MF_MOVE|MPOL_MF_MOVE_ALL))
@@ -1021,75 +1079,21 @@ asmlinkage long sys_move_pages(pid_t pid, unsigned long nr_pages,
1021 (current->uid != task->suid) && (current->uid != task->uid) && 1079 (current->uid != task->suid) && (current->uid != task->uid) &&
1022 !capable(CAP_SYS_NICE)) { 1080 !capable(CAP_SYS_NICE)) {
1023 err = -EPERM; 1081 err = -EPERM;
1024 goto out2; 1082 goto out;
1025 } 1083 }
1026 1084
1027 err = security_task_movememory(task); 1085 err = security_task_movememory(task);
1028 if (err) 1086 if (err)
1029 goto out2; 1087 goto out;
1030
1031
1032 task_nodes = cpuset_mems_allowed(task);
1033
1034 /* Limit nr_pages so that the multiplication may not overflow */
1035 if (nr_pages >= ULONG_MAX / sizeof(struct page_to_node) - 1) {
1036 err = -E2BIG;
1037 goto out2;
1038 }
1039
1040 pm = vmalloc((nr_pages + 1) * sizeof(struct page_to_node));
1041 if (!pm) {
1042 err = -ENOMEM;
1043 goto out2;
1044 }
1045
1046 /*
1047 * Get parameters from user space and initialize the pm
1048 * array. Return various errors if the user did something wrong.
1049 */
1050 for (i = 0; i < nr_pages; i++) {
1051 const void __user *p;
1052
1053 err = -EFAULT;
1054 if (get_user(p, pages + i))
1055 goto out;
1056
1057 pm[i].addr = (unsigned long)p;
1058 if (nodes) {
1059 int node;
1060
1061 if (get_user(node, nodes + i))
1062 goto out;
1063
1064 err = -ENODEV;
1065 if (!node_state(node, N_HIGH_MEMORY))
1066 goto out;
1067
1068 err = -EACCES;
1069 if (!node_isset(node, task_nodes))
1070 goto out;
1071 1088
1072 pm[i].node = node; 1089 if (nodes) {
1073 } else 1090 err = do_pages_move(mm, task, nr_pages, pages, nodes, status,
1074 pm[i].node = 0; /* anything to not match MAX_NUMNODES */ 1091 flags);
1092 } else {
1093 err = do_pages_stat(mm, nr_pages, pages, status);
1075 } 1094 }
1076 /* End marker */
1077 pm[nr_pages].node = MAX_NUMNODES;
1078
1079 if (nodes)
1080 err = do_move_pages(mm, pm, flags & MPOL_MF_MOVE_ALL);
1081 else
1082 err = do_pages_stat(mm, pm);
1083
1084 if (err >= 0)
1085 /* Return status information */
1086 for (i = 0; i < nr_pages; i++)
1087 if (put_user(pm[i].status, status + i))
1088 err = -EFAULT;
1089 1095
1090out: 1096out:
1091 vfree(pm);
1092out2:
1093 mmput(mm); 1097 mmput(mm);
1094 return err; 1098 return err;
1095} 1099}
diff --git a/mm/mlock.c b/mm/mlock.c
index 01fbe93eff5c..008ea70b7afa 100644
--- a/mm/mlock.c
+++ b/mm/mlock.c
@@ -8,10 +8,18 @@
8#include <linux/capability.h> 8#include <linux/capability.h>
9#include <linux/mman.h> 9#include <linux/mman.h>
10#include <linux/mm.h> 10#include <linux/mm.h>
11#include <linux/swap.h>
12#include <linux/swapops.h>
13#include <linux/pagemap.h>
11#include <linux/mempolicy.h> 14#include <linux/mempolicy.h>
12#include <linux/syscalls.h> 15#include <linux/syscalls.h>
13#include <linux/sched.h> 16#include <linux/sched.h>
14#include <linux/module.h> 17#include <linux/module.h>
18#include <linux/rmap.h>
19#include <linux/mmzone.h>
20#include <linux/hugetlb.h>
21
22#include "internal.h"
15 23
16int can_do_mlock(void) 24int can_do_mlock(void)
17{ 25{
@@ -23,17 +31,381 @@ int can_do_mlock(void)
23} 31}
24EXPORT_SYMBOL(can_do_mlock); 32EXPORT_SYMBOL(can_do_mlock);
25 33
34#ifdef CONFIG_UNEVICTABLE_LRU
35/*
36 * Mlocked pages are marked with PageMlocked() flag for efficient testing
37 * in vmscan and, possibly, the fault path; and to support semi-accurate
38 * statistics.
39 *
40 * An mlocked page [PageMlocked(page)] is unevictable. As such, it will
41 * be placed on the LRU "unevictable" list, rather than the [in]active lists.
42 * The unevictable list is an LRU sibling list to the [in]active lists.
43 * PageUnevictable is set to indicate the unevictable state.
44 *
45 * When lazy mlocking via vmscan, it is important to ensure that the
46 * vma's VM_LOCKED status is not concurrently being modified, otherwise we
47 * may have mlocked a page that is being munlocked. So lazy mlock must take
48 * the mmap_sem for read, and verify that the vma really is locked
49 * (see mm/rmap.c).
50 */
51
52/*
53 * LRU accounting for clear_page_mlock()
54 */
55void __clear_page_mlock(struct page *page)
56{
57 VM_BUG_ON(!PageLocked(page));
58
59 if (!page->mapping) { /* truncated ? */
60 return;
61 }
62
63 dec_zone_page_state(page, NR_MLOCK);
64 count_vm_event(UNEVICTABLE_PGCLEARED);
65 if (!isolate_lru_page(page)) {
66 putback_lru_page(page);
67 } else {
68 /*
69 * Page not on the LRU yet. Flush all pagevecs and retry.
70 */
71 lru_add_drain_all();
72 if (!isolate_lru_page(page))
73 putback_lru_page(page);
74 else if (PageUnevictable(page))
75 count_vm_event(UNEVICTABLE_PGSTRANDED);
76
77 }
78}
79
80/*
81 * Mark page as mlocked if not already.
82 * If page on LRU, isolate and putback to move to unevictable list.
83 */
84void mlock_vma_page(struct page *page)
85{
86 BUG_ON(!PageLocked(page));
87
88 if (!TestSetPageMlocked(page)) {
89 inc_zone_page_state(page, NR_MLOCK);
90 count_vm_event(UNEVICTABLE_PGMLOCKED);
91 if (!isolate_lru_page(page))
92 putback_lru_page(page);
93 }
94}
95
96/*
97 * called from munlock()/munmap() path with page supposedly on the LRU.
98 *
99 * Note: unlike mlock_vma_page(), we can't just clear the PageMlocked
100 * [in try_to_munlock()] and then attempt to isolate the page. We must
101 * isolate the page to keep others from messing with its unevictable
102 * and mlocked state while trying to munlock. However, we pre-clear the
103 * mlocked state anyway as we might lose the isolation race and we might
104 * not get another chance to clear PageMlocked. If we successfully
105 * isolate the page and try_to_munlock() detects other VM_LOCKED vmas
106 * mapping the page, it will restore the PageMlocked state, unless the page
107 * is mapped in a non-linear vma. So, we go ahead and SetPageMlocked(),
108 * perhaps redundantly.
109 * If we lose the isolation race, and the page is mapped by other VM_LOCKED
110 * vmas, we'll detect this in vmscan--via try_to_munlock() or try_to_unmap()
111 * either of which will restore the PageMlocked state by calling
112 * mlock_vma_page() above, if it can grab the vma's mmap sem.
113 */
114static void munlock_vma_page(struct page *page)
115{
116 BUG_ON(!PageLocked(page));
117
118 if (TestClearPageMlocked(page)) {
119 dec_zone_page_state(page, NR_MLOCK);
120 if (!isolate_lru_page(page)) {
121 int ret = try_to_munlock(page);
122 /*
123 * did try_to_unlock() succeed or punt?
124 */
125 if (ret == SWAP_SUCCESS || ret == SWAP_AGAIN)
126 count_vm_event(UNEVICTABLE_PGMUNLOCKED);
127
128 putback_lru_page(page);
129 } else {
130 /*
131 * We lost the race. let try_to_unmap() deal
132 * with it. At least we get the page state and
133 * mlock stats right. However, page is still on
134 * the noreclaim list. We'll fix that up when
135 * the page is eventually freed or we scan the
136 * noreclaim list.
137 */
138 if (PageUnevictable(page))
139 count_vm_event(UNEVICTABLE_PGSTRANDED);
140 else
141 count_vm_event(UNEVICTABLE_PGMUNLOCKED);
142 }
143 }
144}
145
146/**
147 * __mlock_vma_pages_range() - mlock/munlock a range of pages in the vma.
148 * @vma: target vma
149 * @start: start address
150 * @end: end address
151 * @mlock: 0 indicate munlock, otherwise mlock.
152 *
153 * If @mlock == 0, unlock an mlocked range;
154 * else mlock the range of pages. This takes care of making the pages present ,
155 * too.
156 *
157 * return 0 on success, negative error code on error.
158 *
159 * vma->vm_mm->mmap_sem must be held for at least read.
160 */
161static long __mlock_vma_pages_range(struct vm_area_struct *vma,
162 unsigned long start, unsigned long end,
163 int mlock)
164{
165 struct mm_struct *mm = vma->vm_mm;
166 unsigned long addr = start;
167 struct page *pages[16]; /* 16 gives a reasonable batch */
168 int nr_pages = (end - start) / PAGE_SIZE;
169 int ret;
170 int gup_flags = 0;
171
172 VM_BUG_ON(start & ~PAGE_MASK);
173 VM_BUG_ON(end & ~PAGE_MASK);
174 VM_BUG_ON(start < vma->vm_start);
175 VM_BUG_ON(end > vma->vm_end);
176 VM_BUG_ON((!rwsem_is_locked(&mm->mmap_sem)) &&
177 (atomic_read(&mm->mm_users) != 0));
178
179 /*
180 * mlock: don't page populate if page has PROT_NONE permission.
181 * munlock: the pages always do munlock althrough
182 * its has PROT_NONE permission.
183 */
184 if (!mlock)
185 gup_flags |= GUP_FLAGS_IGNORE_VMA_PERMISSIONS;
186
187 if (vma->vm_flags & VM_WRITE)
188 gup_flags |= GUP_FLAGS_WRITE;
189
190 lru_add_drain_all(); /* push cached pages to LRU */
191
192 while (nr_pages > 0) {
193 int i;
194
195 cond_resched();
196
197 /*
198 * get_user_pages makes pages present if we are
199 * setting mlock. and this extra reference count will
200 * disable migration of this page. However, page may
201 * still be truncated out from under us.
202 */
203 ret = __get_user_pages(current, mm, addr,
204 min_t(int, nr_pages, ARRAY_SIZE(pages)),
205 gup_flags, pages, NULL);
206 /*
207 * This can happen for, e.g., VM_NONLINEAR regions before
208 * a page has been allocated and mapped at a given offset,
209 * or for addresses that map beyond end of a file.
210 * We'll mlock the the pages if/when they get faulted in.
211 */
212 if (ret < 0)
213 break;
214 if (ret == 0) {
215 /*
216 * We know the vma is there, so the only time
217 * we cannot get a single page should be an
218 * error (ret < 0) case.
219 */
220 WARN_ON(1);
221 break;
222 }
223
224 lru_add_drain(); /* push cached pages to LRU */
225
226 for (i = 0; i < ret; i++) {
227 struct page *page = pages[i];
228
229 lock_page(page);
230 /*
231 * Because we lock page here and migration is blocked
232 * by the elevated reference, we need only check for
233 * page truncation (file-cache only).
234 */
235 if (page->mapping) {
236 if (mlock)
237 mlock_vma_page(page);
238 else
239 munlock_vma_page(page);
240 }
241 unlock_page(page);
242 put_page(page); /* ref from get_user_pages() */
243
244 /*
245 * here we assume that get_user_pages() has given us
246 * a list of virtually contiguous pages.
247 */
248 addr += PAGE_SIZE; /* for next get_user_pages() */
249 nr_pages--;
250 }
251 ret = 0;
252 }
253
254 lru_add_drain_all(); /* to update stats */
255
256 return ret; /* count entire vma as locked_vm */
257}
258
259/*
260 * convert get_user_pages() return value to posix mlock() error
261 */
262static int __mlock_posix_error_return(long retval)
263{
264 if (retval == -EFAULT)
265 retval = -ENOMEM;
266 else if (retval == -ENOMEM)
267 retval = -EAGAIN;
268 return retval;
269}
270
271#else /* CONFIG_UNEVICTABLE_LRU */
272
273/*
274 * Just make pages present if VM_LOCKED. No-op if unlocking.
275 */
276static long __mlock_vma_pages_range(struct vm_area_struct *vma,
277 unsigned long start, unsigned long end,
278 int mlock)
279{
280 if (mlock && (vma->vm_flags & VM_LOCKED))
281 return make_pages_present(start, end);
282 return 0;
283}
284
285static inline int __mlock_posix_error_return(long retval)
286{
287 return 0;
288}
289
290#endif /* CONFIG_UNEVICTABLE_LRU */
291
292/**
293 * mlock_vma_pages_range() - mlock pages in specified vma range.
294 * @vma - the vma containing the specfied address range
295 * @start - starting address in @vma to mlock
296 * @end - end address [+1] in @vma to mlock
297 *
298 * For mmap()/mremap()/expansion of mlocked vma.
299 *
300 * return 0 on success for "normal" vmas.
301 *
302 * return number of pages [> 0] to be removed from locked_vm on success
303 * of "special" vmas.
304 *
305 * return negative error if vma spanning @start-@range disappears while
306 * mmap semaphore is dropped. Unlikely?
307 */
308long mlock_vma_pages_range(struct vm_area_struct *vma,
309 unsigned long start, unsigned long end)
310{
311 struct mm_struct *mm = vma->vm_mm;
312 int nr_pages = (end - start) / PAGE_SIZE;
313 BUG_ON(!(vma->vm_flags & VM_LOCKED));
314
315 /*
316 * filter unlockable vmas
317 */
318 if (vma->vm_flags & (VM_IO | VM_PFNMAP))
319 goto no_mlock;
320
321 if (!((vma->vm_flags & (VM_DONTEXPAND | VM_RESERVED)) ||
322 is_vm_hugetlb_page(vma) ||
323 vma == get_gate_vma(current))) {
324 long error;
325 downgrade_write(&mm->mmap_sem);
326
327 error = __mlock_vma_pages_range(vma, start, end, 1);
328
329 up_read(&mm->mmap_sem);
330 /* vma can change or disappear */
331 down_write(&mm->mmap_sem);
332 vma = find_vma(mm, start);
333 /* non-NULL vma must contain @start, but need to check @end */
334 if (!vma || end > vma->vm_end)
335 return -ENOMEM;
336
337 return 0; /* hide other errors from mmap(), et al */
338 }
339
340 /*
341 * User mapped kernel pages or huge pages:
342 * make these pages present to populate the ptes, but
343 * fall thru' to reset VM_LOCKED--no need to unlock, and
344 * return nr_pages so these don't get counted against task's
345 * locked limit. huge pages are already counted against
346 * locked vm limit.
347 */
348 make_pages_present(start, end);
349
350no_mlock:
351 vma->vm_flags &= ~VM_LOCKED; /* and don't come back! */
352 return nr_pages; /* error or pages NOT mlocked */
353}
354
355
356/*
357 * munlock_vma_pages_range() - munlock all pages in the vma range.'
358 * @vma - vma containing range to be munlock()ed.
359 * @start - start address in @vma of the range
360 * @end - end of range in @vma.
361 *
362 * For mremap(), munmap() and exit().
363 *
364 * Called with @vma VM_LOCKED.
365 *
366 * Returns with VM_LOCKED cleared. Callers must be prepared to
367 * deal with this.
368 *
369 * We don't save and restore VM_LOCKED here because pages are
370 * still on lru. In unmap path, pages might be scanned by reclaim
371 * and re-mlocked by try_to_{munlock|unmap} before we unmap and
372 * free them. This will result in freeing mlocked pages.
373 */
374void munlock_vma_pages_range(struct vm_area_struct *vma,
375 unsigned long start, unsigned long end)
376{
377 vma->vm_flags &= ~VM_LOCKED;
378 __mlock_vma_pages_range(vma, start, end, 0);
379}
380
381/*
382 * mlock_fixup - handle mlock[all]/munlock[all] requests.
383 *
384 * Filters out "special" vmas -- VM_LOCKED never gets set for these, and
385 * munlock is a no-op. However, for some special vmas, we go ahead and
386 * populate the ptes via make_pages_present().
387 *
388 * For vmas that pass the filters, merge/split as appropriate.
389 */
26static int mlock_fixup(struct vm_area_struct *vma, struct vm_area_struct **prev, 390static int mlock_fixup(struct vm_area_struct *vma, struct vm_area_struct **prev,
27 unsigned long start, unsigned long end, unsigned int newflags) 391 unsigned long start, unsigned long end, unsigned int newflags)
28{ 392{
29 struct mm_struct * mm = vma->vm_mm; 393 struct mm_struct *mm = vma->vm_mm;
30 pgoff_t pgoff; 394 pgoff_t pgoff;
31 int pages; 395 int nr_pages;
32 int ret = 0; 396 int ret = 0;
33 397 int lock = newflags & VM_LOCKED;
34 if (newflags == vma->vm_flags) { 398
35 *prev = vma; 399 if (newflags == vma->vm_flags ||
36 goto out; 400 (vma->vm_flags & (VM_IO | VM_PFNMAP)))
401 goto out; /* don't set VM_LOCKED, don't count */
402
403 if ((vma->vm_flags & (VM_DONTEXPAND | VM_RESERVED)) ||
404 is_vm_hugetlb_page(vma) ||
405 vma == get_gate_vma(current)) {
406 if (lock)
407 make_pages_present(start, end);
408 goto out; /* don't set VM_LOCKED, don't count */
37 } 409 }
38 410
39 pgoff = vma->vm_pgoff + ((start - vma->vm_start) >> PAGE_SHIFT); 411 pgoff = vma->vm_pgoff + ((start - vma->vm_start) >> PAGE_SHIFT);
@@ -44,8 +416,6 @@ static int mlock_fixup(struct vm_area_struct *vma, struct vm_area_struct **prev,
44 goto success; 416 goto success;
45 } 417 }
46 418
47 *prev = vma;
48
49 if (start != vma->vm_start) { 419 if (start != vma->vm_start) {
50 ret = split_vma(mm, vma, start, 1); 420 ret = split_vma(mm, vma, start, 1);
51 if (ret) 421 if (ret)
@@ -60,24 +430,61 @@ static int mlock_fixup(struct vm_area_struct *vma, struct vm_area_struct **prev,
60 430
61success: 431success:
62 /* 432 /*
433 * Keep track of amount of locked VM.
434 */
435 nr_pages = (end - start) >> PAGE_SHIFT;
436 if (!lock)
437 nr_pages = -nr_pages;
438 mm->locked_vm += nr_pages;
439
440 /*
63 * vm_flags is protected by the mmap_sem held in write mode. 441 * vm_flags is protected by the mmap_sem held in write mode.
64 * It's okay if try_to_unmap_one unmaps a page just after we 442 * It's okay if try_to_unmap_one unmaps a page just after we
65 * set VM_LOCKED, make_pages_present below will bring it back. 443 * set VM_LOCKED, __mlock_vma_pages_range will bring it back.
66 */ 444 */
67 vma->vm_flags = newflags; 445 vma->vm_flags = newflags;
68 446
69 /* 447 if (lock) {
70 * Keep track of amount of locked VM. 448 /*
71 */ 449 * mmap_sem is currently held for write. Downgrade the write
72 pages = (end - start) >> PAGE_SHIFT; 450 * lock to a read lock so that other faults, mmap scans, ...
73 if (newflags & VM_LOCKED) { 451 * while we fault in all pages.
74 pages = -pages; 452 */
75 if (!(newflags & VM_IO)) 453 downgrade_write(&mm->mmap_sem);
76 ret = make_pages_present(start, end); 454
455 ret = __mlock_vma_pages_range(vma, start, end, 1);
456
457 /*
458 * Need to reacquire mmap sem in write mode, as our callers
459 * expect this. We have no support for atomically upgrading
460 * a sem to write, so we need to check for ranges while sem
461 * is unlocked.
462 */
463 up_read(&mm->mmap_sem);
464 /* vma can change or disappear */
465 down_write(&mm->mmap_sem);
466 *prev = find_vma(mm, start);
467 /* non-NULL *prev must contain @start, but need to check @end */
468 if (!(*prev) || end > (*prev)->vm_end)
469 ret = -ENOMEM;
470 else if (ret > 0) {
471 mm->locked_vm -= ret;
472 ret = 0;
473 } else
474 ret = __mlock_posix_error_return(ret); /* translate if needed */
475 } else {
476 /*
477 * TODO: for unlocking, pages will already be resident, so
478 * we don't need to wait for allocations/reclaim/pagein, ...
479 * However, unlocking a very large region can still take a
480 * while. Should we downgrade the semaphore for both lock
481 * AND unlock ?
482 */
483 __mlock_vma_pages_range(vma, start, end, 0);
77 } 484 }
78 485
79 mm->locked_vm -= pages;
80out: 486out:
487 *prev = vma;
81 return ret; 488 return ret;
82} 489}
83 490
diff --git a/mm/mmap.c b/mm/mmap.c
index e7a5a68a9c2e..74f4d158022e 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -410,7 +410,7 @@ void __vma_link_rb(struct mm_struct *mm, struct vm_area_struct *vma,
410 rb_insert_color(&vma->vm_rb, &mm->mm_rb); 410 rb_insert_color(&vma->vm_rb, &mm->mm_rb);
411} 411}
412 412
413static inline void __vma_link_file(struct vm_area_struct *vma) 413static void __vma_link_file(struct vm_area_struct *vma)
414{ 414{
415 struct file * file; 415 struct file * file;
416 416
@@ -662,8 +662,6 @@ again: remove_next = 1 + (end > next->vm_end);
662 * If the vma has a ->close operation then the driver probably needs to release 662 * If the vma has a ->close operation then the driver probably needs to release
663 * per-vma resources, so we don't attempt to merge those. 663 * per-vma resources, so we don't attempt to merge those.
664 */ 664 */
665#define VM_SPECIAL (VM_IO | VM_DONTEXPAND | VM_RESERVED | VM_PFNMAP)
666
667static inline int is_mergeable_vma(struct vm_area_struct *vma, 665static inline int is_mergeable_vma(struct vm_area_struct *vma,
668 struct file *file, unsigned long vm_flags) 666 struct file *file, unsigned long vm_flags)
669{ 667{
@@ -972,6 +970,7 @@ unsigned long do_mmap_pgoff(struct file * file, unsigned long addr,
972 return -EPERM; 970 return -EPERM;
973 vm_flags |= VM_LOCKED; 971 vm_flags |= VM_LOCKED;
974 } 972 }
973
975 /* mlock MCL_FUTURE? */ 974 /* mlock MCL_FUTURE? */
976 if (vm_flags & VM_LOCKED) { 975 if (vm_flags & VM_LOCKED) {
977 unsigned long locked, lock_limit; 976 unsigned long locked, lock_limit;
@@ -1139,10 +1138,12 @@ munmap_back:
1139 * The VM_SHARED test is necessary because shmem_zero_setup 1138 * The VM_SHARED test is necessary because shmem_zero_setup
1140 * will create the file object for a shared anonymous map below. 1139 * will create the file object for a shared anonymous map below.
1141 */ 1140 */
1142 if (!file && !(vm_flags & VM_SHARED) && 1141 if (!file && !(vm_flags & VM_SHARED)) {
1143 vma_merge(mm, prev, addr, addr + len, vm_flags, 1142 vma = vma_merge(mm, prev, addr, addr + len, vm_flags,
1144 NULL, NULL, pgoff, NULL)) 1143 NULL, NULL, pgoff, NULL);
1145 goto out; 1144 if (vma)
1145 goto out;
1146 }
1146 1147
1147 /* 1148 /*
1148 * Determine the object being mapped and call the appropriate 1149 * Determine the object being mapped and call the appropriate
@@ -1224,10 +1225,14 @@ out:
1224 mm->total_vm += len >> PAGE_SHIFT; 1225 mm->total_vm += len >> PAGE_SHIFT;
1225 vm_stat_account(mm, vm_flags, file, len >> PAGE_SHIFT); 1226 vm_stat_account(mm, vm_flags, file, len >> PAGE_SHIFT);
1226 if (vm_flags & VM_LOCKED) { 1227 if (vm_flags & VM_LOCKED) {
1227 mm->locked_vm += len >> PAGE_SHIFT; 1228 /*
1228 make_pages_present(addr, addr + len); 1229 * makes pages present; downgrades, drops, reacquires mmap_sem
1229 } 1230 */
1230 if ((flags & MAP_POPULATE) && !(flags & MAP_NONBLOCK)) 1231 long nr_pages = mlock_vma_pages_range(vma, addr, addr + len);
1232 if (nr_pages < 0)
1233 return nr_pages; /* vma gone! */
1234 mm->locked_vm += (len >> PAGE_SHIFT) - nr_pages;
1235 } else if ((flags & MAP_POPULATE) && !(flags & MAP_NONBLOCK))
1231 make_pages_present(addr, addr + len); 1236 make_pages_present(addr, addr + len);
1232 return addr; 1237 return addr;
1233 1238
@@ -1586,7 +1591,7 @@ static int acct_stack_growth(struct vm_area_struct * vma, unsigned long size, un
1586 * vma is the last one with address > vma->vm_end. Have to extend vma. 1591 * vma is the last one with address > vma->vm_end. Have to extend vma.
1587 */ 1592 */
1588#ifndef CONFIG_IA64 1593#ifndef CONFIG_IA64
1589static inline 1594static
1590#endif 1595#endif
1591int expand_upwards(struct vm_area_struct *vma, unsigned long address) 1596int expand_upwards(struct vm_area_struct *vma, unsigned long address)
1592{ 1597{
@@ -1636,7 +1641,7 @@ int expand_upwards(struct vm_area_struct *vma, unsigned long address)
1636/* 1641/*
1637 * vma is the first one with address < vma->vm_start. Have to extend vma. 1642 * vma is the first one with address < vma->vm_start. Have to extend vma.
1638 */ 1643 */
1639static inline int expand_downwards(struct vm_area_struct *vma, 1644static int expand_downwards(struct vm_area_struct *vma,
1640 unsigned long address) 1645 unsigned long address)
1641{ 1646{
1642 int error; 1647 int error;
@@ -1698,10 +1703,12 @@ find_extend_vma(struct mm_struct *mm, unsigned long addr)
1698 vma = find_vma_prev(mm, addr, &prev); 1703 vma = find_vma_prev(mm, addr, &prev);
1699 if (vma && (vma->vm_start <= addr)) 1704 if (vma && (vma->vm_start <= addr))
1700 return vma; 1705 return vma;
1701 if (!prev || expand_stack(prev, addr)) 1706 if (expand_stack(prev, addr))
1702 return NULL; 1707 return NULL;
1703 if (prev->vm_flags & VM_LOCKED) 1708 if (prev->vm_flags & VM_LOCKED) {
1704 make_pages_present(addr, prev->vm_end); 1709 if (mlock_vma_pages_range(prev, addr, prev->vm_end) < 0)
1710 return NULL; /* vma gone! */
1711 }
1705 return prev; 1712 return prev;
1706} 1713}
1707#else 1714#else
@@ -1727,8 +1734,10 @@ find_extend_vma(struct mm_struct * mm, unsigned long addr)
1727 start = vma->vm_start; 1734 start = vma->vm_start;
1728 if (expand_stack(vma, addr)) 1735 if (expand_stack(vma, addr))
1729 return NULL; 1736 return NULL;
1730 if (vma->vm_flags & VM_LOCKED) 1737 if (vma->vm_flags & VM_LOCKED) {
1731 make_pages_present(addr, start); 1738 if (mlock_vma_pages_range(vma, addr, start) < 0)
1739 return NULL; /* vma gone! */
1740 }
1732 return vma; 1741 return vma;
1733} 1742}
1734#endif 1743#endif
@@ -1747,8 +1756,6 @@ static void remove_vma_list(struct mm_struct *mm, struct vm_area_struct *vma)
1747 long nrpages = vma_pages(vma); 1756 long nrpages = vma_pages(vma);
1748 1757
1749 mm->total_vm -= nrpages; 1758 mm->total_vm -= nrpages;
1750 if (vma->vm_flags & VM_LOCKED)
1751 mm->locked_vm -= nrpages;
1752 vm_stat_account(mm, vma->vm_flags, vma->vm_file, -nrpages); 1759 vm_stat_account(mm, vma->vm_flags, vma->vm_file, -nrpages);
1753 vma = remove_vma(vma); 1760 vma = remove_vma(vma);
1754 } while (vma); 1761 } while (vma);
@@ -1914,6 +1921,20 @@ int do_munmap(struct mm_struct *mm, unsigned long start, size_t len)
1914 vma = prev? prev->vm_next: mm->mmap; 1921 vma = prev? prev->vm_next: mm->mmap;
1915 1922
1916 /* 1923 /*
1924 * unlock any mlock()ed ranges before detaching vmas
1925 */
1926 if (mm->locked_vm) {
1927 struct vm_area_struct *tmp = vma;
1928 while (tmp && tmp->vm_start < end) {
1929 if (tmp->vm_flags & VM_LOCKED) {
1930 mm->locked_vm -= vma_pages(tmp);
1931 munlock_vma_pages_all(tmp);
1932 }
1933 tmp = tmp->vm_next;
1934 }
1935 }
1936
1937 /*
1917 * Remove the vma's, and unmap the actual pages 1938 * Remove the vma's, and unmap the actual pages
1918 */ 1939 */
1919 detach_vmas_to_be_unmapped(mm, vma, prev, end); 1940 detach_vmas_to_be_unmapped(mm, vma, prev, end);
@@ -2025,8 +2046,9 @@ unsigned long do_brk(unsigned long addr, unsigned long len)
2025 return -ENOMEM; 2046 return -ENOMEM;
2026 2047
2027 /* Can we just expand an old private anonymous mapping? */ 2048 /* Can we just expand an old private anonymous mapping? */
2028 if (vma_merge(mm, prev, addr, addr + len, flags, 2049 vma = vma_merge(mm, prev, addr, addr + len, flags,
2029 NULL, NULL, pgoff, NULL)) 2050 NULL, NULL, pgoff, NULL);
2051 if (vma)
2030 goto out; 2052 goto out;
2031 2053
2032 /* 2054 /*
@@ -2048,8 +2070,8 @@ unsigned long do_brk(unsigned long addr, unsigned long len)
2048out: 2070out:
2049 mm->total_vm += len >> PAGE_SHIFT; 2071 mm->total_vm += len >> PAGE_SHIFT;
2050 if (flags & VM_LOCKED) { 2072 if (flags & VM_LOCKED) {
2051 mm->locked_vm += len >> PAGE_SHIFT; 2073 if (!mlock_vma_pages_range(vma, addr, addr + len))
2052 make_pages_present(addr, addr + len); 2074 mm->locked_vm += (len >> PAGE_SHIFT);
2053 } 2075 }
2054 return addr; 2076 return addr;
2055} 2077}
@@ -2060,7 +2082,7 @@ EXPORT_SYMBOL(do_brk);
2060void exit_mmap(struct mm_struct *mm) 2082void exit_mmap(struct mm_struct *mm)
2061{ 2083{
2062 struct mmu_gather *tlb; 2084 struct mmu_gather *tlb;
2063 struct vm_area_struct *vma = mm->mmap; 2085 struct vm_area_struct *vma;
2064 unsigned long nr_accounted = 0; 2086 unsigned long nr_accounted = 0;
2065 unsigned long end; 2087 unsigned long end;
2066 2088
@@ -2068,6 +2090,15 @@ void exit_mmap(struct mm_struct *mm)
2068 arch_exit_mmap(mm); 2090 arch_exit_mmap(mm);
2069 mmu_notifier_release(mm); 2091 mmu_notifier_release(mm);
2070 2092
2093 if (mm->locked_vm) {
2094 vma = mm->mmap;
2095 while (vma) {
2096 if (vma->vm_flags & VM_LOCKED)
2097 munlock_vma_pages_all(vma);
2098 vma = vma->vm_next;
2099 }
2100 }
2101 vma = mm->mmap;
2071 lru_add_drain(); 2102 lru_add_drain();
2072 flush_cache_mm(mm); 2103 flush_cache_mm(mm);
2073 tlb = tlb_gather_mmu(mm, 1); 2104 tlb = tlb_gather_mmu(mm, 1);
diff --git a/mm/mremap.c b/mm/mremap.c
index 1a7743923c8c..58a2908f42f5 100644
--- a/mm/mremap.c
+++ b/mm/mremap.c
@@ -24,6 +24,8 @@
24#include <asm/cacheflush.h> 24#include <asm/cacheflush.h>
25#include <asm/tlbflush.h> 25#include <asm/tlbflush.h>
26 26
27#include "internal.h"
28
27static pmd_t *get_old_pmd(struct mm_struct *mm, unsigned long addr) 29static pmd_t *get_old_pmd(struct mm_struct *mm, unsigned long addr)
28{ 30{
29 pgd_t *pgd; 31 pgd_t *pgd;
@@ -238,8 +240,8 @@ static unsigned long move_vma(struct vm_area_struct *vma,
238 if (vm_flags & VM_LOCKED) { 240 if (vm_flags & VM_LOCKED) {
239 mm->locked_vm += new_len >> PAGE_SHIFT; 241 mm->locked_vm += new_len >> PAGE_SHIFT;
240 if (new_len > old_len) 242 if (new_len > old_len)
241 make_pages_present(new_addr + old_len, 243 mlock_vma_pages_range(new_vma, new_addr + old_len,
242 new_addr + new_len); 244 new_addr + new_len);
243 } 245 }
244 246
245 return new_addr; 247 return new_addr;
@@ -379,7 +381,7 @@ unsigned long do_mremap(unsigned long addr,
379 vm_stat_account(mm, vma->vm_flags, vma->vm_file, pages); 381 vm_stat_account(mm, vma->vm_flags, vma->vm_file, pages);
380 if (vma->vm_flags & VM_LOCKED) { 382 if (vma->vm_flags & VM_LOCKED) {
381 mm->locked_vm += pages; 383 mm->locked_vm += pages;
382 make_pages_present(addr + old_len, 384 mlock_vma_pages_range(vma, addr + old_len,
383 addr + new_len); 385 addr + new_len);
384 } 386 }
385 ret = addr; 387 ret = addr;
diff --git a/mm/nommu.c b/mm/nommu.c
index ed75bc962fbe..2696b24f2bb3 100644
--- a/mm/nommu.c
+++ b/mm/nommu.c
@@ -34,6 +34,8 @@
34#include <asm/tlb.h> 34#include <asm/tlb.h>
35#include <asm/tlbflush.h> 35#include <asm/tlbflush.h>
36 36
37#include "internal.h"
38
37void *high_memory; 39void *high_memory;
38struct page *mem_map; 40struct page *mem_map;
39unsigned long max_mapnr; 41unsigned long max_mapnr;
@@ -128,20 +130,16 @@ unsigned int kobjsize(const void *objp)
128 return PAGE_SIZE << compound_order(page); 130 return PAGE_SIZE << compound_order(page);
129} 131}
130 132
131/* 133int __get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
132 * get a list of pages in an address range belonging to the specified process 134 unsigned long start, int len, int flags,
133 * and indicate the VMA that covers each page 135 struct page **pages, struct vm_area_struct **vmas)
134 * - this is potentially dodgy as we may end incrementing the page count of a
135 * slab page or a secondary page from a compound page
136 * - don't permit access to VMAs that don't support it, such as I/O mappings
137 */
138int get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
139 unsigned long start, int len, int write, int force,
140 struct page **pages, struct vm_area_struct **vmas)
141{ 136{
142 struct vm_area_struct *vma; 137 struct vm_area_struct *vma;
143 unsigned long vm_flags; 138 unsigned long vm_flags;
144 int i; 139 int i;
140 int write = !!(flags & GUP_FLAGS_WRITE);
141 int force = !!(flags & GUP_FLAGS_FORCE);
142 int ignore = !!(flags & GUP_FLAGS_IGNORE_VMA_PERMISSIONS);
145 143
146 /* calculate required read or write permissions. 144 /* calculate required read or write permissions.
147 * - if 'force' is set, we only require the "MAY" flags. 145 * - if 'force' is set, we only require the "MAY" flags.
@@ -156,7 +154,7 @@ int get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
156 154
157 /* protect what we can, including chardevs */ 155 /* protect what we can, including chardevs */
158 if (vma->vm_flags & (VM_IO | VM_PFNMAP) || 156 if (vma->vm_flags & (VM_IO | VM_PFNMAP) ||
159 !(vm_flags & vma->vm_flags)) 157 (!ignore && !(vm_flags & vma->vm_flags)))
160 goto finish_or_fault; 158 goto finish_or_fault;
161 159
162 if (pages) { 160 if (pages) {
@@ -174,6 +172,30 @@ int get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
174finish_or_fault: 172finish_or_fault:
175 return i ? : -EFAULT; 173 return i ? : -EFAULT;
176} 174}
175
176
177/*
178 * get a list of pages in an address range belonging to the specified process
179 * and indicate the VMA that covers each page
180 * - this is potentially dodgy as we may end incrementing the page count of a
181 * slab page or a secondary page from a compound page
182 * - don't permit access to VMAs that don't support it, such as I/O mappings
183 */
184int get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
185 unsigned long start, int len, int write, int force,
186 struct page **pages, struct vm_area_struct **vmas)
187{
188 int flags = 0;
189
190 if (write)
191 flags |= GUP_FLAGS_WRITE;
192 if (force)
193 flags |= GUP_FLAGS_FORCE;
194
195 return __get_user_pages(tsk, mm,
196 start, len, flags,
197 pages, vmas);
198}
177EXPORT_SYMBOL(get_user_pages); 199EXPORT_SYMBOL(get_user_pages);
178 200
179DEFINE_RWLOCK(vmlist_lock); 201DEFINE_RWLOCK(vmlist_lock);
diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index b40f6d5f8fe9..2970e35fd03f 100644
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -329,9 +329,7 @@ static unsigned long highmem_dirtyable_memory(unsigned long total)
329 struct zone *z = 329 struct zone *z =
330 &NODE_DATA(node)->node_zones[ZONE_HIGHMEM]; 330 &NODE_DATA(node)->node_zones[ZONE_HIGHMEM];
331 331
332 x += zone_page_state(z, NR_FREE_PAGES) 332 x += zone_page_state(z, NR_FREE_PAGES) + zone_lru_pages(z);
333 + zone_page_state(z, NR_INACTIVE)
334 + zone_page_state(z, NR_ACTIVE);
335 } 333 }
336 /* 334 /*
337 * Make sure that the number of highmem pages is never larger 335 * Make sure that the number of highmem pages is never larger
@@ -355,9 +353,7 @@ unsigned long determine_dirtyable_memory(void)
355{ 353{
356 unsigned long x; 354 unsigned long x;
357 355
358 x = global_page_state(NR_FREE_PAGES) 356 x = global_page_state(NR_FREE_PAGES) + global_lru_pages();
359 + global_page_state(NR_INACTIVE)
360 + global_page_state(NR_ACTIVE);
361 357
362 if (!vm_highmem_is_dirtyable) 358 if (!vm_highmem_is_dirtyable)
363 x -= highmem_dirtyable_memory(x); 359 x -= highmem_dirtyable_memory(x);
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 9eb9eb928285..d0a240fbb8bf 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -44,7 +44,7 @@
44#include <linux/backing-dev.h> 44#include <linux/backing-dev.h>
45#include <linux/fault-inject.h> 45#include <linux/fault-inject.h>
46#include <linux/page-isolation.h> 46#include <linux/page-isolation.h>
47#include <linux/memcontrol.h> 47#include <linux/page_cgroup.h>
48#include <linux/debugobjects.h> 48#include <linux/debugobjects.h>
49 49
50#include <asm/tlbflush.h> 50#include <asm/tlbflush.h>
@@ -223,17 +223,12 @@ static inline int bad_range(struct zone *zone, struct page *page)
223 223
224static void bad_page(struct page *page) 224static void bad_page(struct page *page)
225{ 225{
226 void *pc = page_get_page_cgroup(page);
227
228 printk(KERN_EMERG "Bad page state in process '%s'\n" KERN_EMERG 226 printk(KERN_EMERG "Bad page state in process '%s'\n" KERN_EMERG
229 "page:%p flags:0x%0*lx mapping:%p mapcount:%d count:%d\n", 227 "page:%p flags:0x%0*lx mapping:%p mapcount:%d count:%d\n",
230 current->comm, page, (int)(2*sizeof(unsigned long)), 228 current->comm, page, (int)(2*sizeof(unsigned long)),
231 (unsigned long)page->flags, page->mapping, 229 (unsigned long)page->flags, page->mapping,
232 page_mapcount(page), page_count(page)); 230 page_mapcount(page), page_count(page));
233 if (pc) { 231
234 printk(KERN_EMERG "cgroup:%p\n", pc);
235 page_reset_bad_cgroup(page);
236 }
237 printk(KERN_EMERG "Trying to fix it up, but a reboot is needed\n" 232 printk(KERN_EMERG "Trying to fix it up, but a reboot is needed\n"
238 KERN_EMERG "Backtrace:\n"); 233 KERN_EMERG "Backtrace:\n");
239 dump_stack(); 234 dump_stack();
@@ -454,14 +449,16 @@ static inline void __free_one_page(struct page *page,
454 449
455static inline int free_pages_check(struct page *page) 450static inline int free_pages_check(struct page *page)
456{ 451{
452 free_page_mlock(page);
457 if (unlikely(page_mapcount(page) | 453 if (unlikely(page_mapcount(page) |
458 (page->mapping != NULL) | 454 (page->mapping != NULL) |
459 (page_get_page_cgroup(page) != NULL) |
460 (page_count(page) != 0) | 455 (page_count(page) != 0) |
461 (page->flags & PAGE_FLAGS_CHECK_AT_FREE))) 456 (page->flags & PAGE_FLAGS_CHECK_AT_FREE)))
462 bad_page(page); 457 bad_page(page);
463 if (PageDirty(page)) 458 if (PageDirty(page))
464 __ClearPageDirty(page); 459 __ClearPageDirty(page);
460 if (PageSwapBacked(page))
461 __ClearPageSwapBacked(page);
465 /* 462 /*
466 * For now, we report if PG_reserved was found set, but do not 463 * For now, we report if PG_reserved was found set, but do not
467 * clear it, and do not free the page. But we shall soon need 464 * clear it, and do not free the page. But we shall soon need
@@ -600,7 +597,6 @@ static int prep_new_page(struct page *page, int order, gfp_t gfp_flags)
600{ 597{
601 if (unlikely(page_mapcount(page) | 598 if (unlikely(page_mapcount(page) |
602 (page->mapping != NULL) | 599 (page->mapping != NULL) |
603 (page_get_page_cgroup(page) != NULL) |
604 (page_count(page) != 0) | 600 (page_count(page) != 0) |
605 (page->flags & PAGE_FLAGS_CHECK_AT_PREP))) 601 (page->flags & PAGE_FLAGS_CHECK_AT_PREP)))
606 bad_page(page); 602 bad_page(page);
@@ -614,7 +610,11 @@ static int prep_new_page(struct page *page, int order, gfp_t gfp_flags)
614 610
615 page->flags &= ~(1 << PG_uptodate | 1 << PG_error | 1 << PG_reclaim | 611 page->flags &= ~(1 << PG_uptodate | 1 << PG_error | 1 << PG_reclaim |
616 1 << PG_referenced | 1 << PG_arch_1 | 612 1 << PG_referenced | 1 << PG_arch_1 |
617 1 << PG_owner_priv_1 | 1 << PG_mappedtodisk); 613 1 << PG_owner_priv_1 | 1 << PG_mappedtodisk
614#ifdef CONFIG_UNEVICTABLE_LRU
615 | 1 << PG_mlocked
616#endif
617 );
618 set_page_private(page, 0); 618 set_page_private(page, 0);
619 set_page_refcounted(page); 619 set_page_refcounted(page);
620 620
@@ -1862,10 +1862,21 @@ void show_free_areas(void)
1862 } 1862 }
1863 } 1863 }
1864 1864
1865 printk("Active:%lu inactive:%lu dirty:%lu writeback:%lu unstable:%lu\n" 1865 printk("Active_anon:%lu active_file:%lu inactive_anon:%lu\n"
1866 " inactive_file:%lu"
1867//TODO: check/adjust line lengths
1868#ifdef CONFIG_UNEVICTABLE_LRU
1869 " unevictable:%lu"
1870#endif
1871 " dirty:%lu writeback:%lu unstable:%lu\n"
1866 " free:%lu slab:%lu mapped:%lu pagetables:%lu bounce:%lu\n", 1872 " free:%lu slab:%lu mapped:%lu pagetables:%lu bounce:%lu\n",
1867 global_page_state(NR_ACTIVE), 1873 global_page_state(NR_ACTIVE_ANON),
1868 global_page_state(NR_INACTIVE), 1874 global_page_state(NR_ACTIVE_FILE),
1875 global_page_state(NR_INACTIVE_ANON),
1876 global_page_state(NR_INACTIVE_FILE),
1877#ifdef CONFIG_UNEVICTABLE_LRU
1878 global_page_state(NR_UNEVICTABLE),
1879#endif
1869 global_page_state(NR_FILE_DIRTY), 1880 global_page_state(NR_FILE_DIRTY),
1870 global_page_state(NR_WRITEBACK), 1881 global_page_state(NR_WRITEBACK),
1871 global_page_state(NR_UNSTABLE_NFS), 1882 global_page_state(NR_UNSTABLE_NFS),
@@ -1888,8 +1899,13 @@ void show_free_areas(void)
1888 " min:%lukB" 1899 " min:%lukB"
1889 " low:%lukB" 1900 " low:%lukB"
1890 " high:%lukB" 1901 " high:%lukB"
1891 " active:%lukB" 1902 " active_anon:%lukB"
1892 " inactive:%lukB" 1903 " inactive_anon:%lukB"
1904 " active_file:%lukB"
1905 " inactive_file:%lukB"
1906#ifdef CONFIG_UNEVICTABLE_LRU
1907 " unevictable:%lukB"
1908#endif
1893 " present:%lukB" 1909 " present:%lukB"
1894 " pages_scanned:%lu" 1910 " pages_scanned:%lu"
1895 " all_unreclaimable? %s" 1911 " all_unreclaimable? %s"
@@ -1899,8 +1915,13 @@ void show_free_areas(void)
1899 K(zone->pages_min), 1915 K(zone->pages_min),
1900 K(zone->pages_low), 1916 K(zone->pages_low),
1901 K(zone->pages_high), 1917 K(zone->pages_high),
1902 K(zone_page_state(zone, NR_ACTIVE)), 1918 K(zone_page_state(zone, NR_ACTIVE_ANON)),
1903 K(zone_page_state(zone, NR_INACTIVE)), 1919 K(zone_page_state(zone, NR_INACTIVE_ANON)),
1920 K(zone_page_state(zone, NR_ACTIVE_FILE)),
1921 K(zone_page_state(zone, NR_INACTIVE_FILE)),
1922#ifdef CONFIG_UNEVICTABLE_LRU
1923 K(zone_page_state(zone, NR_UNEVICTABLE)),
1924#endif
1904 K(zone->present_pages), 1925 K(zone->present_pages),
1905 zone->pages_scanned, 1926 zone->pages_scanned,
1906 (zone_is_all_unreclaimable(zone) ? "yes" : "no") 1927 (zone_is_all_unreclaimable(zone) ? "yes" : "no")
@@ -3410,10 +3431,12 @@ static void __paginginit free_area_init_core(struct pglist_data *pgdat,
3410 pgdat->nr_zones = 0; 3431 pgdat->nr_zones = 0;
3411 init_waitqueue_head(&pgdat->kswapd_wait); 3432 init_waitqueue_head(&pgdat->kswapd_wait);
3412 pgdat->kswapd_max_order = 0; 3433 pgdat->kswapd_max_order = 0;
3434 pgdat_page_cgroup_init(pgdat);
3413 3435
3414 for (j = 0; j < MAX_NR_ZONES; j++) { 3436 for (j = 0; j < MAX_NR_ZONES; j++) {
3415 struct zone *zone = pgdat->node_zones + j; 3437 struct zone *zone = pgdat->node_zones + j;
3416 unsigned long size, realsize, memmap_pages; 3438 unsigned long size, realsize, memmap_pages;
3439 enum lru_list l;
3417 3440
3418 size = zone_spanned_pages_in_node(nid, j, zones_size); 3441 size = zone_spanned_pages_in_node(nid, j, zones_size);
3419 realsize = size - zone_absent_pages_in_node(nid, j, 3442 realsize = size - zone_absent_pages_in_node(nid, j,
@@ -3428,8 +3451,8 @@ static void __paginginit free_area_init_core(struct pglist_data *pgdat,
3428 PAGE_ALIGN(size * sizeof(struct page)) >> PAGE_SHIFT; 3451 PAGE_ALIGN(size * sizeof(struct page)) >> PAGE_SHIFT;
3429 if (realsize >= memmap_pages) { 3452 if (realsize >= memmap_pages) {
3430 realsize -= memmap_pages; 3453 realsize -= memmap_pages;
3431 mminit_dprintk(MMINIT_TRACE, "memmap_init", 3454 printk(KERN_DEBUG
3432 "%s zone: %lu pages used for memmap\n", 3455 " %s zone: %lu pages used for memmap\n",
3433 zone_names[j], memmap_pages); 3456 zone_names[j], memmap_pages);
3434 } else 3457 } else
3435 printk(KERN_WARNING 3458 printk(KERN_WARNING
@@ -3439,8 +3462,7 @@ static void __paginginit free_area_init_core(struct pglist_data *pgdat,
3439 /* Account for reserved pages */ 3462 /* Account for reserved pages */
3440 if (j == 0 && realsize > dma_reserve) { 3463 if (j == 0 && realsize > dma_reserve) {
3441 realsize -= dma_reserve; 3464 realsize -= dma_reserve;
3442 mminit_dprintk(MMINIT_TRACE, "memmap_init", 3465 printk(KERN_DEBUG " %s zone: %lu pages reserved\n",
3443 "%s zone: %lu pages reserved\n",
3444 zone_names[0], dma_reserve); 3466 zone_names[0], dma_reserve);
3445 } 3467 }
3446 3468
@@ -3465,10 +3487,14 @@ static void __paginginit free_area_init_core(struct pglist_data *pgdat,
3465 zone->prev_priority = DEF_PRIORITY; 3487 zone->prev_priority = DEF_PRIORITY;
3466 3488
3467 zone_pcp_init(zone); 3489 zone_pcp_init(zone);
3468 INIT_LIST_HEAD(&zone->active_list); 3490 for_each_lru(l) {
3469 INIT_LIST_HEAD(&zone->inactive_list); 3491 INIT_LIST_HEAD(&zone->lru[l].list);
3470 zone->nr_scan_active = 0; 3492 zone->lru[l].nr_scan = 0;
3471 zone->nr_scan_inactive = 0; 3493 }
3494 zone->recent_rotated[0] = 0;
3495 zone->recent_rotated[1] = 0;
3496 zone->recent_scanned[0] = 0;
3497 zone->recent_scanned[1] = 0;
3472 zap_zone_vm_stats(zone); 3498 zap_zone_vm_stats(zone);
3473 zone->flags = 0; 3499 zone->flags = 0;
3474 if (!size) 3500 if (!size)
@@ -4210,7 +4236,7 @@ void setup_per_zone_pages_min(void)
4210 for_each_zone(zone) { 4236 for_each_zone(zone) {
4211 u64 tmp; 4237 u64 tmp;
4212 4238
4213 spin_lock_irqsave(&zone->lru_lock, flags); 4239 spin_lock_irqsave(&zone->lock, flags);
4214 tmp = (u64)pages_min * zone->present_pages; 4240 tmp = (u64)pages_min * zone->present_pages;
4215 do_div(tmp, lowmem_pages); 4241 do_div(tmp, lowmem_pages);
4216 if (is_highmem(zone)) { 4242 if (is_highmem(zone)) {
@@ -4242,13 +4268,53 @@ void setup_per_zone_pages_min(void)
4242 zone->pages_low = zone->pages_min + (tmp >> 2); 4268 zone->pages_low = zone->pages_min + (tmp >> 2);
4243 zone->pages_high = zone->pages_min + (tmp >> 1); 4269 zone->pages_high = zone->pages_min + (tmp >> 1);
4244 setup_zone_migrate_reserve(zone); 4270 setup_zone_migrate_reserve(zone);
4245 spin_unlock_irqrestore(&zone->lru_lock, flags); 4271 spin_unlock_irqrestore(&zone->lock, flags);
4246 } 4272 }
4247 4273
4248 /* update totalreserve_pages */ 4274 /* update totalreserve_pages */
4249 calculate_totalreserve_pages(); 4275 calculate_totalreserve_pages();
4250} 4276}
4251 4277
4278/**
4279 * setup_per_zone_inactive_ratio - called when min_free_kbytes changes.
4280 *
4281 * The inactive anon list should be small enough that the VM never has to
4282 * do too much work, but large enough that each inactive page has a chance
4283 * to be referenced again before it is swapped out.
4284 *
4285 * The inactive_anon ratio is the target ratio of ACTIVE_ANON to
4286 * INACTIVE_ANON pages on this zone's LRU, maintained by the
4287 * pageout code. A zone->inactive_ratio of 3 means 3:1 or 25% of
4288 * the anonymous pages are kept on the inactive list.
4289 *
4290 * total target max
4291 * memory ratio inactive anon
4292 * -------------------------------------
4293 * 10MB 1 5MB
4294 * 100MB 1 50MB
4295 * 1GB 3 250MB
4296 * 10GB 10 0.9GB
4297 * 100GB 31 3GB
4298 * 1TB 101 10GB
4299 * 10TB 320 32GB
4300 */
4301void setup_per_zone_inactive_ratio(void)
4302{
4303 struct zone *zone;
4304
4305 for_each_zone(zone) {
4306 unsigned int gb, ratio;
4307
4308 /* Zone size in gigabytes */
4309 gb = zone->present_pages >> (30 - PAGE_SHIFT);
4310 ratio = int_sqrt(10 * gb);
4311 if (!ratio)
4312 ratio = 1;
4313
4314 zone->inactive_ratio = ratio;
4315 }
4316}
4317
4252/* 4318/*
4253 * Initialise min_free_kbytes. 4319 * Initialise min_free_kbytes.
4254 * 4320 *
@@ -4286,6 +4352,7 @@ static int __init init_per_zone_pages_min(void)
4286 min_free_kbytes = 65536; 4352 min_free_kbytes = 65536;
4287 setup_per_zone_pages_min(); 4353 setup_per_zone_pages_min();
4288 setup_per_zone_lowmem_reserve(); 4354 setup_per_zone_lowmem_reserve();
4355 setup_per_zone_inactive_ratio();
4289 return 0; 4356 return 0;
4290} 4357}
4291module_init(init_per_zone_pages_min) 4358module_init(init_per_zone_pages_min)
diff --git a/mm/page_cgroup.c b/mm/page_cgroup.c
new file mode 100644
index 000000000000..5d86550701f2
--- /dev/null
+++ b/mm/page_cgroup.c
@@ -0,0 +1,237 @@
1#include <linux/mm.h>
2#include <linux/mmzone.h>
3#include <linux/bootmem.h>
4#include <linux/bit_spinlock.h>
5#include <linux/page_cgroup.h>
6#include <linux/hash.h>
7#include <linux/memory.h>
8
9static void __meminit
10__init_page_cgroup(struct page_cgroup *pc, unsigned long pfn)
11{
12 pc->flags = 0;
13 pc->mem_cgroup = NULL;
14 pc->page = pfn_to_page(pfn);
15}
16static unsigned long total_usage;
17
18#if !defined(CONFIG_SPARSEMEM)
19
20
21void __init pgdat_page_cgroup_init(struct pglist_data *pgdat)
22{
23 pgdat->node_page_cgroup = NULL;
24}
25
26struct page_cgroup *lookup_page_cgroup(struct page *page)
27{
28 unsigned long pfn = page_to_pfn(page);
29 unsigned long offset;
30 struct page_cgroup *base;
31
32 base = NODE_DATA(page_to_nid(page))->node_page_cgroup;
33 if (unlikely(!base))
34 return NULL;
35
36 offset = pfn - NODE_DATA(page_to_nid(page))->node_start_pfn;
37 return base + offset;
38}
39
40static int __init alloc_node_page_cgroup(int nid)
41{
42 struct page_cgroup *base, *pc;
43 unsigned long table_size;
44 unsigned long start_pfn, nr_pages, index;
45
46 start_pfn = NODE_DATA(nid)->node_start_pfn;
47 nr_pages = NODE_DATA(nid)->node_spanned_pages;
48
49 table_size = sizeof(struct page_cgroup) * nr_pages;
50
51 base = __alloc_bootmem_node_nopanic(NODE_DATA(nid),
52 table_size, PAGE_SIZE, __pa(MAX_DMA_ADDRESS));
53 if (!base)
54 return -ENOMEM;
55 for (index = 0; index < nr_pages; index++) {
56 pc = base + index;
57 __init_page_cgroup(pc, start_pfn + index);
58 }
59 NODE_DATA(nid)->node_page_cgroup = base;
60 total_usage += table_size;
61 return 0;
62}
63
64void __init page_cgroup_init(void)
65{
66
67 int nid, fail;
68
69 for_each_online_node(nid) {
70 fail = alloc_node_page_cgroup(nid);
71 if (fail)
72 goto fail;
73 }
74 printk(KERN_INFO "allocated %ld bytes of page_cgroup\n", total_usage);
75 printk(KERN_INFO "please try cgroup_disable=memory option if you"
76 " don't want\n");
77 return;
78fail:
79 printk(KERN_CRIT "allocation of page_cgroup was failed.\n");
80 printk(KERN_CRIT "please try cgroup_disable=memory boot option\n");
81 panic("Out of memory");
82}
83
84#else /* CONFIG_FLAT_NODE_MEM_MAP */
85
86struct page_cgroup *lookup_page_cgroup(struct page *page)
87{
88 unsigned long pfn = page_to_pfn(page);
89 struct mem_section *section = __pfn_to_section(pfn);
90
91 return section->page_cgroup + pfn;
92}
93
94int __meminit init_section_page_cgroup(unsigned long pfn)
95{
96 struct mem_section *section;
97 struct page_cgroup *base, *pc;
98 unsigned long table_size;
99 int nid, index;
100
101 section = __pfn_to_section(pfn);
102
103 if (section->page_cgroup)
104 return 0;
105
106 nid = page_to_nid(pfn_to_page(pfn));
107
108 table_size = sizeof(struct page_cgroup) * PAGES_PER_SECTION;
109 base = kmalloc_node(table_size, GFP_KERNEL, nid);
110 if (!base)
111 base = vmalloc_node(table_size, nid);
112
113 if (!base) {
114 printk(KERN_ERR "page cgroup allocation failure\n");
115 return -ENOMEM;
116 }
117
118 for (index = 0; index < PAGES_PER_SECTION; index++) {
119 pc = base + index;
120 __init_page_cgroup(pc, pfn + index);
121 }
122
123 section = __pfn_to_section(pfn);
124 section->page_cgroup = base - pfn;
125 total_usage += table_size;
126 return 0;
127}
128#ifdef CONFIG_MEMORY_HOTPLUG
129void __free_page_cgroup(unsigned long pfn)
130{
131 struct mem_section *ms;
132 struct page_cgroup *base;
133
134 ms = __pfn_to_section(pfn);
135 if (!ms || !ms->page_cgroup)
136 return;
137 base = ms->page_cgroup + pfn;
138 ms->page_cgroup = NULL;
139 if (is_vmalloc_addr(base))
140 vfree(base);
141 else
142 kfree(base);
143}
144
145int online_page_cgroup(unsigned long start_pfn,
146 unsigned long nr_pages,
147 int nid)
148{
149 unsigned long start, end, pfn;
150 int fail = 0;
151
152 start = start_pfn & (PAGES_PER_SECTION - 1);
153 end = ALIGN(start_pfn + nr_pages, PAGES_PER_SECTION);
154
155 for (pfn = start; !fail && pfn < end; pfn += PAGES_PER_SECTION) {
156 if (!pfn_present(pfn))
157 continue;
158 fail = init_section_page_cgroup(pfn);
159 }
160 if (!fail)
161 return 0;
162
163 /* rollback */
164 for (pfn = start; pfn < end; pfn += PAGES_PER_SECTION)
165 __free_page_cgroup(pfn);
166
167 return -ENOMEM;
168}
169
170int offline_page_cgroup(unsigned long start_pfn,
171 unsigned long nr_pages, int nid)
172{
173 unsigned long start, end, pfn;
174
175 start = start_pfn & (PAGES_PER_SECTION - 1);
176 end = ALIGN(start_pfn + nr_pages, PAGES_PER_SECTION);
177
178 for (pfn = start; pfn < end; pfn += PAGES_PER_SECTION)
179 __free_page_cgroup(pfn);
180 return 0;
181
182}
183
184static int page_cgroup_callback(struct notifier_block *self,
185 unsigned long action, void *arg)
186{
187 struct memory_notify *mn = arg;
188 int ret = 0;
189 switch (action) {
190 case MEM_GOING_ONLINE:
191 ret = online_page_cgroup(mn->start_pfn,
192 mn->nr_pages, mn->status_change_nid);
193 break;
194 case MEM_CANCEL_ONLINE:
195 case MEM_OFFLINE:
196 offline_page_cgroup(mn->start_pfn,
197 mn->nr_pages, mn->status_change_nid);
198 break;
199 case MEM_GOING_OFFLINE:
200 break;
201 case MEM_ONLINE:
202 case MEM_CANCEL_OFFLINE:
203 break;
204 }
205 ret = notifier_from_errno(ret);
206 return ret;
207}
208
209#endif
210
211void __init page_cgroup_init(void)
212{
213 unsigned long pfn;
214 int fail = 0;
215
216 for (pfn = 0; !fail && pfn < max_pfn; pfn += PAGES_PER_SECTION) {
217 if (!pfn_present(pfn))
218 continue;
219 fail = init_section_page_cgroup(pfn);
220 }
221 if (fail) {
222 printk(KERN_CRIT "try cgroup_disable=memory boot option\n");
223 panic("Out of memory");
224 } else {
225 hotplug_memory_notifier(page_cgroup_callback, 0);
226 }
227 printk(KERN_INFO "allocated %ld bytes of page_cgroup\n", total_usage);
228 printk(KERN_INFO "please try cgroup_disable=memory option if you don't"
229 " want\n");
230}
231
232void __init pgdat_page_cgroup_init(struct pglist_data *pgdat)
233{
234 return;
235}
236
237#endif
diff --git a/mm/readahead.c b/mm/readahead.c
index 6cbd9a72fde2..bec83c15a78f 100644
--- a/mm/readahead.c
+++ b/mm/readahead.c
@@ -229,7 +229,7 @@ int do_page_cache_readahead(struct address_space *mapping, struct file *filp,
229 */ 229 */
230unsigned long max_sane_readahead(unsigned long nr) 230unsigned long max_sane_readahead(unsigned long nr)
231{ 231{
232 return min(nr, (node_page_state(numa_node_id(), NR_INACTIVE) 232 return min(nr, (node_page_state(numa_node_id(), NR_INACTIVE_FILE)
233 + node_page_state(numa_node_id(), NR_FREE_PAGES)) / 2); 233 + node_page_state(numa_node_id(), NR_FREE_PAGES)) / 2);
234} 234}
235 235
diff --git a/mm/rmap.c b/mm/rmap.c
index 0383acfcb068..10993942d6c9 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -53,9 +53,47 @@
53 53
54#include <asm/tlbflush.h> 54#include <asm/tlbflush.h>
55 55
56struct kmem_cache *anon_vma_cachep; 56#include "internal.h"
57 57
58/* This must be called under the mmap_sem. */ 58static struct kmem_cache *anon_vma_cachep;
59
60static inline struct anon_vma *anon_vma_alloc(void)
61{
62 return kmem_cache_alloc(anon_vma_cachep, GFP_KERNEL);
63}
64
65static inline void anon_vma_free(struct anon_vma *anon_vma)
66{
67 kmem_cache_free(anon_vma_cachep, anon_vma);
68}
69
70/**
71 * anon_vma_prepare - attach an anon_vma to a memory region
72 * @vma: the memory region in question
73 *
74 * This makes sure the memory mapping described by 'vma' has
75 * an 'anon_vma' attached to it, so that we can associate the
76 * anonymous pages mapped into it with that anon_vma.
77 *
78 * The common case will be that we already have one, but if
79 * if not we either need to find an adjacent mapping that we
80 * can re-use the anon_vma from (very common when the only
81 * reason for splitting a vma has been mprotect()), or we
82 * allocate a new one.
83 *
84 * Anon-vma allocations are very subtle, because we may have
85 * optimistically looked up an anon_vma in page_lock_anon_vma()
86 * and that may actually touch the spinlock even in the newly
87 * allocated vma (it depends on RCU to make sure that the
88 * anon_vma isn't actually destroyed).
89 *
90 * As a result, we need to do proper anon_vma locking even
91 * for the new allocation. At the same time, we do not want
92 * to do any locking for the common case of already having
93 * an anon_vma.
94 *
95 * This must be called with the mmap_sem held for reading.
96 */
59int anon_vma_prepare(struct vm_area_struct *vma) 97int anon_vma_prepare(struct vm_area_struct *vma)
60{ 98{
61 struct anon_vma *anon_vma = vma->anon_vma; 99 struct anon_vma *anon_vma = vma->anon_vma;
@@ -63,20 +101,17 @@ int anon_vma_prepare(struct vm_area_struct *vma)
63 might_sleep(); 101 might_sleep();
64 if (unlikely(!anon_vma)) { 102 if (unlikely(!anon_vma)) {
65 struct mm_struct *mm = vma->vm_mm; 103 struct mm_struct *mm = vma->vm_mm;
66 struct anon_vma *allocated, *locked; 104 struct anon_vma *allocated;
67 105
68 anon_vma = find_mergeable_anon_vma(vma); 106 anon_vma = find_mergeable_anon_vma(vma);
69 if (anon_vma) { 107 allocated = NULL;
70 allocated = NULL; 108 if (!anon_vma) {
71 locked = anon_vma;
72 spin_lock(&locked->lock);
73 } else {
74 anon_vma = anon_vma_alloc(); 109 anon_vma = anon_vma_alloc();
75 if (unlikely(!anon_vma)) 110 if (unlikely(!anon_vma))
76 return -ENOMEM; 111 return -ENOMEM;
77 allocated = anon_vma; 112 allocated = anon_vma;
78 locked = NULL;
79 } 113 }
114 spin_lock(&anon_vma->lock);
80 115
81 /* page_table_lock to protect against threads */ 116 /* page_table_lock to protect against threads */
82 spin_lock(&mm->page_table_lock); 117 spin_lock(&mm->page_table_lock);
@@ -87,8 +122,7 @@ int anon_vma_prepare(struct vm_area_struct *vma)
87 } 122 }
88 spin_unlock(&mm->page_table_lock); 123 spin_unlock(&mm->page_table_lock);
89 124
90 if (locked) 125 spin_unlock(&anon_vma->lock);
91 spin_unlock(&locked->lock);
92 if (unlikely(allocated)) 126 if (unlikely(allocated))
93 anon_vma_free(allocated); 127 anon_vma_free(allocated);
94 } 128 }
@@ -157,7 +191,7 @@ void __init anon_vma_init(void)
157 * Getting a lock on a stable anon_vma from a page off the LRU is 191 * Getting a lock on a stable anon_vma from a page off the LRU is
158 * tricky: page_lock_anon_vma rely on RCU to guard against the races. 192 * tricky: page_lock_anon_vma rely on RCU to guard against the races.
159 */ 193 */
160static struct anon_vma *page_lock_anon_vma(struct page *page) 194struct anon_vma *page_lock_anon_vma(struct page *page)
161{ 195{
162 struct anon_vma *anon_vma; 196 struct anon_vma *anon_vma;
163 unsigned long anon_mapping; 197 unsigned long anon_mapping;
@@ -177,7 +211,7 @@ out:
177 return NULL; 211 return NULL;
178} 212}
179 213
180static void page_unlock_anon_vma(struct anon_vma *anon_vma) 214void page_unlock_anon_vma(struct anon_vma *anon_vma)
181{ 215{
182 spin_unlock(&anon_vma->lock); 216 spin_unlock(&anon_vma->lock);
183 rcu_read_unlock(); 217 rcu_read_unlock();
@@ -268,6 +302,32 @@ pte_t *page_check_address(struct page *page, struct mm_struct *mm,
268 return NULL; 302 return NULL;
269} 303}
270 304
305/**
306 * page_mapped_in_vma - check whether a page is really mapped in a VMA
307 * @page: the page to test
308 * @vma: the VMA to test
309 *
310 * Returns 1 if the page is mapped into the page tables of the VMA, 0
311 * if the page is not mapped into the page tables of this VMA. Only
312 * valid for normal file or anonymous VMAs.
313 */
314static int page_mapped_in_vma(struct page *page, struct vm_area_struct *vma)
315{
316 unsigned long address;
317 pte_t *pte;
318 spinlock_t *ptl;
319
320 address = vma_address(page, vma);
321 if (address == -EFAULT) /* out of vma range */
322 return 0;
323 pte = page_check_address(page, vma->vm_mm, address, &ptl, 1);
324 if (!pte) /* the page is not in this mm */
325 return 0;
326 pte_unmap_unlock(pte, ptl);
327
328 return 1;
329}
330
271/* 331/*
272 * Subfunctions of page_referenced: page_referenced_one called 332 * Subfunctions of page_referenced: page_referenced_one called
273 * repeatedly from either page_referenced_anon or page_referenced_file. 333 * repeatedly from either page_referenced_anon or page_referenced_file.
@@ -289,10 +349,17 @@ static int page_referenced_one(struct page *page,
289 if (!pte) 349 if (!pte)
290 goto out; 350 goto out;
291 351
352 /*
353 * Don't want to elevate referenced for mlocked page that gets this far,
354 * in order that it progresses to try_to_unmap and is moved to the
355 * unevictable list.
356 */
292 if (vma->vm_flags & VM_LOCKED) { 357 if (vma->vm_flags & VM_LOCKED) {
293 referenced++;
294 *mapcount = 1; /* break early from loop */ 358 *mapcount = 1; /* break early from loop */
295 } else if (ptep_clear_flush_young_notify(vma, address, pte)) 359 goto out_unmap;
360 }
361
362 if (ptep_clear_flush_young_notify(vma, address, pte))
296 referenced++; 363 referenced++;
297 364
298 /* Pretend the page is referenced if the task has the 365 /* Pretend the page is referenced if the task has the
@@ -301,6 +368,7 @@ static int page_referenced_one(struct page *page,
301 rwsem_is_locked(&mm->mmap_sem)) 368 rwsem_is_locked(&mm->mmap_sem))
302 referenced++; 369 referenced++;
303 370
371out_unmap:
304 (*mapcount)--; 372 (*mapcount)--;
305 pte_unmap_unlock(pte, ptl); 373 pte_unmap_unlock(pte, ptl);
306out: 374out:
@@ -390,11 +458,6 @@ static int page_referenced_file(struct page *page,
390 */ 458 */
391 if (mem_cont && !mm_match_cgroup(vma->vm_mm, mem_cont)) 459 if (mem_cont && !mm_match_cgroup(vma->vm_mm, mem_cont))
392 continue; 460 continue;
393 if ((vma->vm_flags & (VM_LOCKED|VM_MAYSHARE))
394 == (VM_LOCKED|VM_MAYSHARE)) {
395 referenced++;
396 break;
397 }
398 referenced += page_referenced_one(page, vma, &mapcount); 461 referenced += page_referenced_one(page, vma, &mapcount);
399 if (!mapcount) 462 if (!mapcount)
400 break; 463 break;
@@ -674,8 +737,8 @@ void page_remove_rmap(struct page *page, struct vm_area_struct *vma)
674 page_clear_dirty(page); 737 page_clear_dirty(page);
675 set_page_dirty(page); 738 set_page_dirty(page);
676 } 739 }
677 740 if (PageAnon(page))
678 mem_cgroup_uncharge_page(page); 741 mem_cgroup_uncharge_page(page);
679 __dec_zone_page_state(page, 742 __dec_zone_page_state(page,
680 PageAnon(page) ? NR_ANON_PAGES : NR_FILE_MAPPED); 743 PageAnon(page) ? NR_ANON_PAGES : NR_FILE_MAPPED);
681 /* 744 /*
@@ -717,11 +780,16 @@ static int try_to_unmap_one(struct page *page, struct vm_area_struct *vma,
717 * If it's recently referenced (perhaps page_referenced 780 * If it's recently referenced (perhaps page_referenced
718 * skipped over this mm) then we should reactivate it. 781 * skipped over this mm) then we should reactivate it.
719 */ 782 */
720 if (!migration && ((vma->vm_flags & VM_LOCKED) || 783 if (!migration) {
721 (ptep_clear_flush_young_notify(vma, address, pte)))) { 784 if (vma->vm_flags & VM_LOCKED) {
722 ret = SWAP_FAIL; 785 ret = SWAP_MLOCK;
723 goto out_unmap; 786 goto out_unmap;
724 } 787 }
788 if (ptep_clear_flush_young_notify(vma, address, pte)) {
789 ret = SWAP_FAIL;
790 goto out_unmap;
791 }
792 }
725 793
726 /* Nuke the page table entry. */ 794 /* Nuke the page table entry. */
727 flush_cache_page(vma, address, page_to_pfn(page)); 795 flush_cache_page(vma, address, page_to_pfn(page));
@@ -802,12 +870,17 @@ out:
802 * For very sparsely populated VMAs this is a little inefficient - chances are 870 * For very sparsely populated VMAs this is a little inefficient - chances are
803 * there there won't be many ptes located within the scan cluster. In this case 871 * there there won't be many ptes located within the scan cluster. In this case
804 * maybe we could scan further - to the end of the pte page, perhaps. 872 * maybe we could scan further - to the end of the pte page, perhaps.
873 *
874 * Mlocked pages: check VM_LOCKED under mmap_sem held for read, if we can
875 * acquire it without blocking. If vma locked, mlock the pages in the cluster,
876 * rather than unmapping them. If we encounter the "check_page" that vmscan is
877 * trying to unmap, return SWAP_MLOCK, else default SWAP_AGAIN.
805 */ 878 */
806#define CLUSTER_SIZE min(32*PAGE_SIZE, PMD_SIZE) 879#define CLUSTER_SIZE min(32*PAGE_SIZE, PMD_SIZE)
807#define CLUSTER_MASK (~(CLUSTER_SIZE - 1)) 880#define CLUSTER_MASK (~(CLUSTER_SIZE - 1))
808 881
809static void try_to_unmap_cluster(unsigned long cursor, 882static int try_to_unmap_cluster(unsigned long cursor, unsigned int *mapcount,
810 unsigned int *mapcount, struct vm_area_struct *vma) 883 struct vm_area_struct *vma, struct page *check_page)
811{ 884{
812 struct mm_struct *mm = vma->vm_mm; 885 struct mm_struct *mm = vma->vm_mm;
813 pgd_t *pgd; 886 pgd_t *pgd;
@@ -819,6 +892,8 @@ static void try_to_unmap_cluster(unsigned long cursor,
819 struct page *page; 892 struct page *page;
820 unsigned long address; 893 unsigned long address;
821 unsigned long end; 894 unsigned long end;
895 int ret = SWAP_AGAIN;
896 int locked_vma = 0;
822 897
823 address = (vma->vm_start + cursor) & CLUSTER_MASK; 898 address = (vma->vm_start + cursor) & CLUSTER_MASK;
824 end = address + CLUSTER_SIZE; 899 end = address + CLUSTER_SIZE;
@@ -829,15 +904,26 @@ static void try_to_unmap_cluster(unsigned long cursor,
829 904
830 pgd = pgd_offset(mm, address); 905 pgd = pgd_offset(mm, address);
831 if (!pgd_present(*pgd)) 906 if (!pgd_present(*pgd))
832 return; 907 return ret;
833 908
834 pud = pud_offset(pgd, address); 909 pud = pud_offset(pgd, address);
835 if (!pud_present(*pud)) 910 if (!pud_present(*pud))
836 return; 911 return ret;
837 912
838 pmd = pmd_offset(pud, address); 913 pmd = pmd_offset(pud, address);
839 if (!pmd_present(*pmd)) 914 if (!pmd_present(*pmd))
840 return; 915 return ret;
916
917 /*
918 * MLOCK_PAGES => feature is configured.
919 * if we can acquire the mmap_sem for read, and vma is VM_LOCKED,
920 * keep the sem while scanning the cluster for mlocking pages.
921 */
922 if (MLOCK_PAGES && down_read_trylock(&vma->vm_mm->mmap_sem)) {
923 locked_vma = (vma->vm_flags & VM_LOCKED);
924 if (!locked_vma)
925 up_read(&vma->vm_mm->mmap_sem); /* don't need it */
926 }
841 927
842 pte = pte_offset_map_lock(mm, pmd, address, &ptl); 928 pte = pte_offset_map_lock(mm, pmd, address, &ptl);
843 929
@@ -850,6 +936,13 @@ static void try_to_unmap_cluster(unsigned long cursor,
850 page = vm_normal_page(vma, address, *pte); 936 page = vm_normal_page(vma, address, *pte);
851 BUG_ON(!page || PageAnon(page)); 937 BUG_ON(!page || PageAnon(page));
852 938
939 if (locked_vma) {
940 mlock_vma_page(page); /* no-op if already mlocked */
941 if (page == check_page)
942 ret = SWAP_MLOCK;
943 continue; /* don't unmap */
944 }
945
853 if (ptep_clear_flush_young_notify(vma, address, pte)) 946 if (ptep_clear_flush_young_notify(vma, address, pte))
854 continue; 947 continue;
855 948
@@ -871,39 +964,104 @@ static void try_to_unmap_cluster(unsigned long cursor,
871 (*mapcount)--; 964 (*mapcount)--;
872 } 965 }
873 pte_unmap_unlock(pte - 1, ptl); 966 pte_unmap_unlock(pte - 1, ptl);
967 if (locked_vma)
968 up_read(&vma->vm_mm->mmap_sem);
969 return ret;
874} 970}
875 971
876static int try_to_unmap_anon(struct page *page, int migration) 972/*
973 * common handling for pages mapped in VM_LOCKED vmas
974 */
975static int try_to_mlock_page(struct page *page, struct vm_area_struct *vma)
976{
977 int mlocked = 0;
978
979 if (down_read_trylock(&vma->vm_mm->mmap_sem)) {
980 if (vma->vm_flags & VM_LOCKED) {
981 mlock_vma_page(page);
982 mlocked++; /* really mlocked the page */
983 }
984 up_read(&vma->vm_mm->mmap_sem);
985 }
986 return mlocked;
987}
988
989/**
990 * try_to_unmap_anon - unmap or unlock anonymous page using the object-based
991 * rmap method
992 * @page: the page to unmap/unlock
993 * @unlock: request for unlock rather than unmap [unlikely]
994 * @migration: unmapping for migration - ignored if @unlock
995 *
996 * Find all the mappings of a page using the mapping pointer and the vma chains
997 * contained in the anon_vma struct it points to.
998 *
999 * This function is only called from try_to_unmap/try_to_munlock for
1000 * anonymous pages.
1001 * When called from try_to_munlock(), the mmap_sem of the mm containing the vma
1002 * where the page was found will be held for write. So, we won't recheck
1003 * vm_flags for that VMA. That should be OK, because that vma shouldn't be
1004 * 'LOCKED.
1005 */
1006static int try_to_unmap_anon(struct page *page, int unlock, int migration)
877{ 1007{
878 struct anon_vma *anon_vma; 1008 struct anon_vma *anon_vma;
879 struct vm_area_struct *vma; 1009 struct vm_area_struct *vma;
1010 unsigned int mlocked = 0;
880 int ret = SWAP_AGAIN; 1011 int ret = SWAP_AGAIN;
881 1012
1013 if (MLOCK_PAGES && unlikely(unlock))
1014 ret = SWAP_SUCCESS; /* default for try_to_munlock() */
1015
882 anon_vma = page_lock_anon_vma(page); 1016 anon_vma = page_lock_anon_vma(page);
883 if (!anon_vma) 1017 if (!anon_vma)
884 return ret; 1018 return ret;
885 1019
886 list_for_each_entry(vma, &anon_vma->head, anon_vma_node) { 1020 list_for_each_entry(vma, &anon_vma->head, anon_vma_node) {
887 ret = try_to_unmap_one(page, vma, migration); 1021 if (MLOCK_PAGES && unlikely(unlock)) {
888 if (ret == SWAP_FAIL || !page_mapped(page)) 1022 if (!((vma->vm_flags & VM_LOCKED) &&
889 break; 1023 page_mapped_in_vma(page, vma)))
1024 continue; /* must visit all unlocked vmas */
1025 ret = SWAP_MLOCK; /* saw at least one mlocked vma */
1026 } else {
1027 ret = try_to_unmap_one(page, vma, migration);
1028 if (ret == SWAP_FAIL || !page_mapped(page))
1029 break;
1030 }
1031 if (ret == SWAP_MLOCK) {
1032 mlocked = try_to_mlock_page(page, vma);
1033 if (mlocked)
1034 break; /* stop if actually mlocked page */
1035 }
890 } 1036 }
891 1037
892 page_unlock_anon_vma(anon_vma); 1038 page_unlock_anon_vma(anon_vma);
1039
1040 if (mlocked)
1041 ret = SWAP_MLOCK; /* actually mlocked the page */
1042 else if (ret == SWAP_MLOCK)
1043 ret = SWAP_AGAIN; /* saw VM_LOCKED vma */
1044
893 return ret; 1045 return ret;
894} 1046}
895 1047
896/** 1048/**
897 * try_to_unmap_file - unmap file page using the object-based rmap method 1049 * try_to_unmap_file - unmap/unlock file page using the object-based rmap method
898 * @page: the page to unmap 1050 * @page: the page to unmap/unlock
899 * @migration: migration flag 1051 * @unlock: request for unlock rather than unmap [unlikely]
1052 * @migration: unmapping for migration - ignored if @unlock
900 * 1053 *
901 * Find all the mappings of a page using the mapping pointer and the vma chains 1054 * Find all the mappings of a page using the mapping pointer and the vma chains
902 * contained in the address_space struct it points to. 1055 * contained in the address_space struct it points to.
903 * 1056 *
904 * This function is only called from try_to_unmap for object-based pages. 1057 * This function is only called from try_to_unmap/try_to_munlock for
1058 * object-based pages.
1059 * When called from try_to_munlock(), the mmap_sem of the mm containing the vma
1060 * where the page was found will be held for write. So, we won't recheck
1061 * vm_flags for that VMA. That should be OK, because that vma shouldn't be
1062 * 'LOCKED.
905 */ 1063 */
906static int try_to_unmap_file(struct page *page, int migration) 1064static int try_to_unmap_file(struct page *page, int unlock, int migration)
907{ 1065{
908 struct address_space *mapping = page->mapping; 1066 struct address_space *mapping = page->mapping;
909 pgoff_t pgoff = page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT); 1067 pgoff_t pgoff = page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT);
@@ -914,20 +1072,44 @@ static int try_to_unmap_file(struct page *page, int migration)
914 unsigned long max_nl_cursor = 0; 1072 unsigned long max_nl_cursor = 0;
915 unsigned long max_nl_size = 0; 1073 unsigned long max_nl_size = 0;
916 unsigned int mapcount; 1074 unsigned int mapcount;
1075 unsigned int mlocked = 0;
1076
1077 if (MLOCK_PAGES && unlikely(unlock))
1078 ret = SWAP_SUCCESS; /* default for try_to_munlock() */
917 1079
918 spin_lock(&mapping->i_mmap_lock); 1080 spin_lock(&mapping->i_mmap_lock);
919 vma_prio_tree_foreach(vma, &iter, &mapping->i_mmap, pgoff, pgoff) { 1081 vma_prio_tree_foreach(vma, &iter, &mapping->i_mmap, pgoff, pgoff) {
920 ret = try_to_unmap_one(page, vma, migration); 1082 if (MLOCK_PAGES && unlikely(unlock)) {
921 if (ret == SWAP_FAIL || !page_mapped(page)) 1083 if (!(vma->vm_flags & VM_LOCKED))
922 goto out; 1084 continue; /* must visit all vmas */
1085 ret = SWAP_MLOCK;
1086 } else {
1087 ret = try_to_unmap_one(page, vma, migration);
1088 if (ret == SWAP_FAIL || !page_mapped(page))
1089 goto out;
1090 }
1091 if (ret == SWAP_MLOCK) {
1092 mlocked = try_to_mlock_page(page, vma);
1093 if (mlocked)
1094 break; /* stop if actually mlocked page */
1095 }
923 } 1096 }
924 1097
1098 if (mlocked)
1099 goto out;
1100
925 if (list_empty(&mapping->i_mmap_nonlinear)) 1101 if (list_empty(&mapping->i_mmap_nonlinear))
926 goto out; 1102 goto out;
927 1103
928 list_for_each_entry(vma, &mapping->i_mmap_nonlinear, 1104 list_for_each_entry(vma, &mapping->i_mmap_nonlinear,
929 shared.vm_set.list) { 1105 shared.vm_set.list) {
930 if ((vma->vm_flags & VM_LOCKED) && !migration) 1106 if (MLOCK_PAGES && unlikely(unlock)) {
1107 if (!(vma->vm_flags & VM_LOCKED))
1108 continue; /* must visit all vmas */
1109 ret = SWAP_MLOCK; /* leave mlocked == 0 */
1110 goto out; /* no need to look further */
1111 }
1112 if (!MLOCK_PAGES && !migration && (vma->vm_flags & VM_LOCKED))
931 continue; 1113 continue;
932 cursor = (unsigned long) vma->vm_private_data; 1114 cursor = (unsigned long) vma->vm_private_data;
933 if (cursor > max_nl_cursor) 1115 if (cursor > max_nl_cursor)
@@ -937,7 +1119,7 @@ static int try_to_unmap_file(struct page *page, int migration)
937 max_nl_size = cursor; 1119 max_nl_size = cursor;
938 } 1120 }
939 1121
940 if (max_nl_size == 0) { /* any nonlinears locked or reserved */ 1122 if (max_nl_size == 0) { /* all nonlinears locked or reserved ? */
941 ret = SWAP_FAIL; 1123 ret = SWAP_FAIL;
942 goto out; 1124 goto out;
943 } 1125 }
@@ -961,12 +1143,16 @@ static int try_to_unmap_file(struct page *page, int migration)
961 do { 1143 do {
962 list_for_each_entry(vma, &mapping->i_mmap_nonlinear, 1144 list_for_each_entry(vma, &mapping->i_mmap_nonlinear,
963 shared.vm_set.list) { 1145 shared.vm_set.list) {
964 if ((vma->vm_flags & VM_LOCKED) && !migration) 1146 if (!MLOCK_PAGES && !migration &&
1147 (vma->vm_flags & VM_LOCKED))
965 continue; 1148 continue;
966 cursor = (unsigned long) vma->vm_private_data; 1149 cursor = (unsigned long) vma->vm_private_data;
967 while ( cursor < max_nl_cursor && 1150 while ( cursor < max_nl_cursor &&
968 cursor < vma->vm_end - vma->vm_start) { 1151 cursor < vma->vm_end - vma->vm_start) {
969 try_to_unmap_cluster(cursor, &mapcount, vma); 1152 ret = try_to_unmap_cluster(cursor, &mapcount,
1153 vma, page);
1154 if (ret == SWAP_MLOCK)
1155 mlocked = 2; /* to return below */
970 cursor += CLUSTER_SIZE; 1156 cursor += CLUSTER_SIZE;
971 vma->vm_private_data = (void *) cursor; 1157 vma->vm_private_data = (void *) cursor;
972 if ((int)mapcount <= 0) 1158 if ((int)mapcount <= 0)
@@ -987,6 +1173,10 @@ static int try_to_unmap_file(struct page *page, int migration)
987 vma->vm_private_data = NULL; 1173 vma->vm_private_data = NULL;
988out: 1174out:
989 spin_unlock(&mapping->i_mmap_lock); 1175 spin_unlock(&mapping->i_mmap_lock);
1176 if (mlocked)
1177 ret = SWAP_MLOCK; /* actually mlocked the page */
1178 else if (ret == SWAP_MLOCK)
1179 ret = SWAP_AGAIN; /* saw VM_LOCKED vma */
990 return ret; 1180 return ret;
991} 1181}
992 1182
@@ -1002,6 +1192,7 @@ out:
1002 * SWAP_SUCCESS - we succeeded in removing all mappings 1192 * SWAP_SUCCESS - we succeeded in removing all mappings
1003 * SWAP_AGAIN - we missed a mapping, try again later 1193 * SWAP_AGAIN - we missed a mapping, try again later
1004 * SWAP_FAIL - the page is unswappable 1194 * SWAP_FAIL - the page is unswappable
1195 * SWAP_MLOCK - page is mlocked.
1005 */ 1196 */
1006int try_to_unmap(struct page *page, int migration) 1197int try_to_unmap(struct page *page, int migration)
1007{ 1198{
@@ -1010,12 +1201,36 @@ int try_to_unmap(struct page *page, int migration)
1010 BUG_ON(!PageLocked(page)); 1201 BUG_ON(!PageLocked(page));
1011 1202
1012 if (PageAnon(page)) 1203 if (PageAnon(page))
1013 ret = try_to_unmap_anon(page, migration); 1204 ret = try_to_unmap_anon(page, 0, migration);
1014 else 1205 else
1015 ret = try_to_unmap_file(page, migration); 1206 ret = try_to_unmap_file(page, 0, migration);
1016 1207 if (ret != SWAP_MLOCK && !page_mapped(page))
1017 if (!page_mapped(page))
1018 ret = SWAP_SUCCESS; 1208 ret = SWAP_SUCCESS;
1019 return ret; 1209 return ret;
1020} 1210}
1021 1211
1212#ifdef CONFIG_UNEVICTABLE_LRU
1213/**
1214 * try_to_munlock - try to munlock a page
1215 * @page: the page to be munlocked
1216 *
1217 * Called from munlock code. Checks all of the VMAs mapping the page
1218 * to make sure nobody else has this page mlocked. The page will be
1219 * returned with PG_mlocked cleared if no other vmas have it mlocked.
1220 *
1221 * Return values are:
1222 *
1223 * SWAP_SUCCESS - no vma's holding page mlocked.
1224 * SWAP_AGAIN - page mapped in mlocked vma -- couldn't acquire mmap sem
1225 * SWAP_MLOCK - page is now mlocked.
1226 */
1227int try_to_munlock(struct page *page)
1228{
1229 VM_BUG_ON(!PageLocked(page) || PageLRU(page));
1230
1231 if (PageAnon(page))
1232 return try_to_unmap_anon(page, 1, 0);
1233 else
1234 return try_to_unmap_file(page, 1, 0);
1235}
1236#endif
diff --git a/mm/shmem.c b/mm/shmem.c
index d87958a5f03e..d38d7e61fcd0 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -199,7 +199,7 @@ static struct vm_operations_struct shmem_vm_ops;
199 199
200static struct backing_dev_info shmem_backing_dev_info __read_mostly = { 200static struct backing_dev_info shmem_backing_dev_info __read_mostly = {
201 .ra_pages = 0, /* No readahead */ 201 .ra_pages = 0, /* No readahead */
202 .capabilities = BDI_CAP_NO_ACCT_AND_WRITEBACK, 202 .capabilities = BDI_CAP_NO_ACCT_AND_WRITEBACK | BDI_CAP_SWAP_BACKED,
203 .unplug_io_fn = default_unplug_io_fn, 203 .unplug_io_fn = default_unplug_io_fn,
204}; 204};
205 205
@@ -1367,6 +1367,7 @@ repeat:
1367 error = -ENOMEM; 1367 error = -ENOMEM;
1368 goto failed; 1368 goto failed;
1369 } 1369 }
1370 SetPageSwapBacked(filepage);
1370 1371
1371 /* Precharge page while we can wait, compensate after */ 1372 /* Precharge page while we can wait, compensate after */
1372 error = mem_cgroup_cache_charge(filepage, current->mm, 1373 error = mem_cgroup_cache_charge(filepage, current->mm,
@@ -1476,12 +1477,16 @@ int shmem_lock(struct file *file, int lock, struct user_struct *user)
1476 if (!user_shm_lock(inode->i_size, user)) 1477 if (!user_shm_lock(inode->i_size, user))
1477 goto out_nomem; 1478 goto out_nomem;
1478 info->flags |= VM_LOCKED; 1479 info->flags |= VM_LOCKED;
1480 mapping_set_unevictable(file->f_mapping);
1479 } 1481 }
1480 if (!lock && (info->flags & VM_LOCKED) && user) { 1482 if (!lock && (info->flags & VM_LOCKED) && user) {
1481 user_shm_unlock(inode->i_size, user); 1483 user_shm_unlock(inode->i_size, user);
1482 info->flags &= ~VM_LOCKED; 1484 info->flags &= ~VM_LOCKED;
1485 mapping_clear_unevictable(file->f_mapping);
1486 scan_mapping_unevictable_pages(file->f_mapping);
1483 } 1487 }
1484 retval = 0; 1488 retval = 0;
1489
1485out_nomem: 1490out_nomem:
1486 spin_unlock(&info->lock); 1491 spin_unlock(&info->lock);
1487 return retval; 1492 return retval;
diff --git a/mm/swap.c b/mm/swap.c
index 9e0cb3118079..2152e48a7b8f 100644
--- a/mm/swap.c
+++ b/mm/swap.c
@@ -31,11 +31,12 @@
31#include <linux/backing-dev.h> 31#include <linux/backing-dev.h>
32#include <linux/memcontrol.h> 32#include <linux/memcontrol.h>
33 33
34#include "internal.h"
35
34/* How many pages do we try to swap or page in/out together? */ 36/* How many pages do we try to swap or page in/out together? */
35int page_cluster; 37int page_cluster;
36 38
37static DEFINE_PER_CPU(struct pagevec, lru_add_pvecs); 39static DEFINE_PER_CPU(struct pagevec[NR_LRU_LISTS], lru_add_pvecs);
38static DEFINE_PER_CPU(struct pagevec, lru_add_active_pvecs);
39static DEFINE_PER_CPU(struct pagevec, lru_rotate_pvecs); 40static DEFINE_PER_CPU(struct pagevec, lru_rotate_pvecs);
40 41
41/* 42/*
@@ -116,8 +117,9 @@ static void pagevec_move_tail(struct pagevec *pvec)
116 zone = pagezone; 117 zone = pagezone;
117 spin_lock(&zone->lru_lock); 118 spin_lock(&zone->lru_lock);
118 } 119 }
119 if (PageLRU(page) && !PageActive(page)) { 120 if (PageLRU(page) && !PageActive(page) && !PageUnevictable(page)) {
120 list_move_tail(&page->lru, &zone->inactive_list); 121 int lru = page_is_file_cache(page);
122 list_move_tail(&page->lru, &zone->lru[lru].list);
121 pgmoved++; 123 pgmoved++;
122 } 124 }
123 } 125 }
@@ -136,7 +138,7 @@ static void pagevec_move_tail(struct pagevec *pvec)
136void rotate_reclaimable_page(struct page *page) 138void rotate_reclaimable_page(struct page *page)
137{ 139{
138 if (!PageLocked(page) && !PageDirty(page) && !PageActive(page) && 140 if (!PageLocked(page) && !PageDirty(page) && !PageActive(page) &&
139 PageLRU(page)) { 141 !PageUnevictable(page) && PageLRU(page)) {
140 struct pagevec *pvec; 142 struct pagevec *pvec;
141 unsigned long flags; 143 unsigned long flags;
142 144
@@ -157,12 +159,19 @@ void activate_page(struct page *page)
157 struct zone *zone = page_zone(page); 159 struct zone *zone = page_zone(page);
158 160
159 spin_lock_irq(&zone->lru_lock); 161 spin_lock_irq(&zone->lru_lock);
160 if (PageLRU(page) && !PageActive(page)) { 162 if (PageLRU(page) && !PageActive(page) && !PageUnevictable(page)) {
161 del_page_from_inactive_list(zone, page); 163 int file = page_is_file_cache(page);
164 int lru = LRU_BASE + file;
165 del_page_from_lru_list(zone, page, lru);
166
162 SetPageActive(page); 167 SetPageActive(page);
163 add_page_to_active_list(zone, page); 168 lru += LRU_ACTIVE;
169 add_page_to_lru_list(zone, page, lru);
164 __count_vm_event(PGACTIVATE); 170 __count_vm_event(PGACTIVATE);
165 mem_cgroup_move_lists(page, true); 171 mem_cgroup_move_lists(page, lru);
172
173 zone->recent_rotated[!!file]++;
174 zone->recent_scanned[!!file]++;
166 } 175 }
167 spin_unlock_irq(&zone->lru_lock); 176 spin_unlock_irq(&zone->lru_lock);
168} 177}
@@ -176,7 +185,8 @@ void activate_page(struct page *page)
176 */ 185 */
177void mark_page_accessed(struct page *page) 186void mark_page_accessed(struct page *page)
178{ 187{
179 if (!PageActive(page) && PageReferenced(page) && PageLRU(page)) { 188 if (!PageActive(page) && !PageUnevictable(page) &&
189 PageReferenced(page) && PageLRU(page)) {
180 activate_page(page); 190 activate_page(page);
181 ClearPageReferenced(page); 191 ClearPageReferenced(page);
182 } else if (!PageReferenced(page)) { 192 } else if (!PageReferenced(page)) {
@@ -186,28 +196,73 @@ void mark_page_accessed(struct page *page)
186 196
187EXPORT_SYMBOL(mark_page_accessed); 197EXPORT_SYMBOL(mark_page_accessed);
188 198
189/** 199void __lru_cache_add(struct page *page, enum lru_list lru)
190 * lru_cache_add: add a page to the page lists
191 * @page: the page to add
192 */
193void lru_cache_add(struct page *page)
194{ 200{
195 struct pagevec *pvec = &get_cpu_var(lru_add_pvecs); 201 struct pagevec *pvec = &get_cpu_var(lru_add_pvecs)[lru];
196 202
197 page_cache_get(page); 203 page_cache_get(page);
198 if (!pagevec_add(pvec, page)) 204 if (!pagevec_add(pvec, page))
199 __pagevec_lru_add(pvec); 205 ____pagevec_lru_add(pvec, lru);
200 put_cpu_var(lru_add_pvecs); 206 put_cpu_var(lru_add_pvecs);
201} 207}
202 208
203void lru_cache_add_active(struct page *page) 209/**
210 * lru_cache_add_lru - add a page to a page list
211 * @page: the page to be added to the LRU.
212 * @lru: the LRU list to which the page is added.
213 */
214void lru_cache_add_lru(struct page *page, enum lru_list lru)
204{ 215{
205 struct pagevec *pvec = &get_cpu_var(lru_add_active_pvecs); 216 if (PageActive(page)) {
217 VM_BUG_ON(PageUnevictable(page));
218 ClearPageActive(page);
219 } else if (PageUnevictable(page)) {
220 VM_BUG_ON(PageActive(page));
221 ClearPageUnevictable(page);
222 }
206 223
207 page_cache_get(page); 224 VM_BUG_ON(PageLRU(page) || PageActive(page) || PageUnevictable(page));
208 if (!pagevec_add(pvec, page)) 225 __lru_cache_add(page, lru);
209 __pagevec_lru_add_active(pvec); 226}
210 put_cpu_var(lru_add_active_pvecs); 227
228/**
229 * add_page_to_unevictable_list - add a page to the unevictable list
230 * @page: the page to be added to the unevictable list
231 *
232 * Add page directly to its zone's unevictable list. To avoid races with
233 * tasks that might be making the page evictable, through eg. munlock,
234 * munmap or exit, while it's not on the lru, we want to add the page
235 * while it's locked or otherwise "invisible" to other tasks. This is
236 * difficult to do when using the pagevec cache, so bypass that.
237 */
238void add_page_to_unevictable_list(struct page *page)
239{
240 struct zone *zone = page_zone(page);
241
242 spin_lock_irq(&zone->lru_lock);
243 SetPageUnevictable(page);
244 SetPageLRU(page);
245 add_page_to_lru_list(zone, page, LRU_UNEVICTABLE);
246 spin_unlock_irq(&zone->lru_lock);
247}
248
249/**
250 * lru_cache_add_active_or_unevictable
251 * @page: the page to be added to LRU
252 * @vma: vma in which page is mapped for determining reclaimability
253 *
254 * place @page on active or unevictable LRU list, depending on
255 * page_evictable(). Note that if the page is not evictable,
256 * it goes directly back onto it's zone's unevictable list. It does
257 * NOT use a per cpu pagevec.
258 */
259void lru_cache_add_active_or_unevictable(struct page *page,
260 struct vm_area_struct *vma)
261{
262 if (page_evictable(page, vma))
263 lru_cache_add_lru(page, LRU_ACTIVE + page_is_file_cache(page));
264 else
265 add_page_to_unevictable_list(page);
211} 266}
212 267
213/* 268/*
@@ -217,15 +272,15 @@ void lru_cache_add_active(struct page *page)
217 */ 272 */
218static void drain_cpu_pagevecs(int cpu) 273static void drain_cpu_pagevecs(int cpu)
219{ 274{
275 struct pagevec *pvecs = per_cpu(lru_add_pvecs, cpu);
220 struct pagevec *pvec; 276 struct pagevec *pvec;
277 int lru;
221 278
222 pvec = &per_cpu(lru_add_pvecs, cpu); 279 for_each_lru(lru) {
223 if (pagevec_count(pvec)) 280 pvec = &pvecs[lru - LRU_BASE];
224 __pagevec_lru_add(pvec); 281 if (pagevec_count(pvec))
225 282 ____pagevec_lru_add(pvec, lru);
226 pvec = &per_cpu(lru_add_active_pvecs, cpu); 283 }
227 if (pagevec_count(pvec))
228 __pagevec_lru_add_active(pvec);
229 284
230 pvec = &per_cpu(lru_rotate_pvecs, cpu); 285 pvec = &per_cpu(lru_rotate_pvecs, cpu);
231 if (pagevec_count(pvec)) { 286 if (pagevec_count(pvec)) {
@@ -244,7 +299,7 @@ void lru_add_drain(void)
244 put_cpu(); 299 put_cpu();
245} 300}
246 301
247#ifdef CONFIG_NUMA 302#if defined(CONFIG_NUMA) || defined(CONFIG_UNEVICTABLE_LRU)
248static void lru_add_drain_per_cpu(struct work_struct *dummy) 303static void lru_add_drain_per_cpu(struct work_struct *dummy)
249{ 304{
250 lru_add_drain(); 305 lru_add_drain();
@@ -308,6 +363,7 @@ void release_pages(struct page **pages, int nr, int cold)
308 363
309 if (PageLRU(page)) { 364 if (PageLRU(page)) {
310 struct zone *pagezone = page_zone(page); 365 struct zone *pagezone = page_zone(page);
366
311 if (pagezone != zone) { 367 if (pagezone != zone) {
312 if (zone) 368 if (zone)
313 spin_unlock_irqrestore(&zone->lru_lock, 369 spin_unlock_irqrestore(&zone->lru_lock,
@@ -380,10 +436,11 @@ void __pagevec_release_nonlru(struct pagevec *pvec)
380 * Add the passed pages to the LRU, then drop the caller's refcount 436 * Add the passed pages to the LRU, then drop the caller's refcount
381 * on them. Reinitialises the caller's pagevec. 437 * on them. Reinitialises the caller's pagevec.
382 */ 438 */
383void __pagevec_lru_add(struct pagevec *pvec) 439void ____pagevec_lru_add(struct pagevec *pvec, enum lru_list lru)
384{ 440{
385 int i; 441 int i;
386 struct zone *zone = NULL; 442 struct zone *zone = NULL;
443 VM_BUG_ON(is_unevictable_lru(lru));
387 444
388 for (i = 0; i < pagevec_count(pvec); i++) { 445 for (i = 0; i < pagevec_count(pvec); i++) {
389 struct page *page = pvec->pages[i]; 446 struct page *page = pvec->pages[i];
@@ -395,9 +452,13 @@ void __pagevec_lru_add(struct pagevec *pvec)
395 zone = pagezone; 452 zone = pagezone;
396 spin_lock_irq(&zone->lru_lock); 453 spin_lock_irq(&zone->lru_lock);
397 } 454 }
455 VM_BUG_ON(PageActive(page));
456 VM_BUG_ON(PageUnevictable(page));
398 VM_BUG_ON(PageLRU(page)); 457 VM_BUG_ON(PageLRU(page));
399 SetPageLRU(page); 458 SetPageLRU(page);
400 add_page_to_inactive_list(zone, page); 459 if (is_active_lru(lru))
460 SetPageActive(page);
461 add_page_to_lru_list(zone, page, lru);
401 } 462 }
402 if (zone) 463 if (zone)
403 spin_unlock_irq(&zone->lru_lock); 464 spin_unlock_irq(&zone->lru_lock);
@@ -405,48 +466,45 @@ void __pagevec_lru_add(struct pagevec *pvec)
405 pagevec_reinit(pvec); 466 pagevec_reinit(pvec);
406} 467}
407 468
408EXPORT_SYMBOL(__pagevec_lru_add); 469EXPORT_SYMBOL(____pagevec_lru_add);
409 470
410void __pagevec_lru_add_active(struct pagevec *pvec) 471/*
472 * Try to drop buffers from the pages in a pagevec
473 */
474void pagevec_strip(struct pagevec *pvec)
411{ 475{
412 int i; 476 int i;
413 struct zone *zone = NULL;
414 477
415 for (i = 0; i < pagevec_count(pvec); i++) { 478 for (i = 0; i < pagevec_count(pvec); i++) {
416 struct page *page = pvec->pages[i]; 479 struct page *page = pvec->pages[i];
417 struct zone *pagezone = page_zone(page);
418 480
419 if (pagezone != zone) { 481 if (PagePrivate(page) && trylock_page(page)) {
420 if (zone) 482 if (PagePrivate(page))
421 spin_unlock_irq(&zone->lru_lock); 483 try_to_release_page(page, 0);
422 zone = pagezone; 484 unlock_page(page);
423 spin_lock_irq(&zone->lru_lock);
424 } 485 }
425 VM_BUG_ON(PageLRU(page));
426 SetPageLRU(page);
427 VM_BUG_ON(PageActive(page));
428 SetPageActive(page);
429 add_page_to_active_list(zone, page);
430 } 486 }
431 if (zone)
432 spin_unlock_irq(&zone->lru_lock);
433 release_pages(pvec->pages, pvec->nr, pvec->cold);
434 pagevec_reinit(pvec);
435} 487}
436 488
437/* 489/**
438 * Try to drop buffers from the pages in a pagevec 490 * pagevec_swap_free - try to free swap space from the pages in a pagevec
491 * @pvec: pagevec with swapcache pages to free the swap space of
492 *
493 * The caller needs to hold an extra reference to each page and
494 * not hold the page lock on the pages. This function uses a
495 * trylock on the page lock so it may not always free the swap
496 * space associated with a page.
439 */ 497 */
440void pagevec_strip(struct pagevec *pvec) 498void pagevec_swap_free(struct pagevec *pvec)
441{ 499{
442 int i; 500 int i;
443 501
444 for (i = 0; i < pagevec_count(pvec); i++) { 502 for (i = 0; i < pagevec_count(pvec); i++) {
445 struct page *page = pvec->pages[i]; 503 struct page *page = pvec->pages[i];
446 504
447 if (PagePrivate(page) && trylock_page(page)) { 505 if (PageSwapCache(page) && trylock_page(page)) {
448 if (PagePrivate(page)) 506 if (PageSwapCache(page))
449 try_to_release_page(page, 0); 507 remove_exclusive_swap_page_ref(page);
450 unlock_page(page); 508 unlock_page(page);
451 } 509 }
452 } 510 }
diff --git a/mm/swap_state.c b/mm/swap_state.c
index 797c3831cbec..3353c9029cef 100644
--- a/mm/swap_state.c
+++ b/mm/swap_state.c
@@ -33,7 +33,7 @@ static const struct address_space_operations swap_aops = {
33}; 33};
34 34
35static struct backing_dev_info swap_backing_dev_info = { 35static struct backing_dev_info swap_backing_dev_info = {
36 .capabilities = BDI_CAP_NO_ACCT_AND_WRITEBACK, 36 .capabilities = BDI_CAP_NO_ACCT_AND_WRITEBACK | BDI_CAP_SWAP_BACKED,
37 .unplug_io_fn = swap_unplug_io_fn, 37 .unplug_io_fn = swap_unplug_io_fn,
38}; 38};
39 39
@@ -75,6 +75,7 @@ int add_to_swap_cache(struct page *page, swp_entry_t entry, gfp_t gfp_mask)
75 BUG_ON(!PageLocked(page)); 75 BUG_ON(!PageLocked(page));
76 BUG_ON(PageSwapCache(page)); 76 BUG_ON(PageSwapCache(page));
77 BUG_ON(PagePrivate(page)); 77 BUG_ON(PagePrivate(page));
78 BUG_ON(!PageSwapBacked(page));
78 error = radix_tree_preload(gfp_mask); 79 error = radix_tree_preload(gfp_mask);
79 if (!error) { 80 if (!error) {
80 page_cache_get(page); 81 page_cache_get(page);
@@ -302,17 +303,19 @@ struct page *read_swap_cache_async(swp_entry_t entry, gfp_t gfp_mask,
302 * re-using the just freed swap entry for an existing page. 303 * re-using the just freed swap entry for an existing page.
303 * May fail (-ENOMEM) if radix-tree node allocation failed. 304 * May fail (-ENOMEM) if radix-tree node allocation failed.
304 */ 305 */
305 set_page_locked(new_page); 306 __set_page_locked(new_page);
307 SetPageSwapBacked(new_page);
306 err = add_to_swap_cache(new_page, entry, gfp_mask & GFP_KERNEL); 308 err = add_to_swap_cache(new_page, entry, gfp_mask & GFP_KERNEL);
307 if (likely(!err)) { 309 if (likely(!err)) {
308 /* 310 /*
309 * Initiate read into locked page and return. 311 * Initiate read into locked page and return.
310 */ 312 */
311 lru_cache_add_active(new_page); 313 lru_cache_add_anon(new_page);
312 swap_readpage(NULL, new_page); 314 swap_readpage(NULL, new_page);
313 return new_page; 315 return new_page;
314 } 316 }
315 clear_page_locked(new_page); 317 ClearPageSwapBacked(new_page);
318 __clear_page_locked(new_page);
316 swap_free(entry); 319 swap_free(entry);
317 } while (err != -ENOMEM); 320 } while (err != -ENOMEM);
318 321
diff --git a/mm/swapfile.c b/mm/swapfile.c
index 1e330f2998fa..90cb67a5417c 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -344,7 +344,7 @@ int can_share_swap_page(struct page *page)
344 * Work out if there are any other processes sharing this 344 * Work out if there are any other processes sharing this
345 * swap cache page. Free it if you can. Return success. 345 * swap cache page. Free it if you can. Return success.
346 */ 346 */
347int remove_exclusive_swap_page(struct page *page) 347static int remove_exclusive_swap_page_count(struct page *page, int count)
348{ 348{
349 int retval; 349 int retval;
350 struct swap_info_struct * p; 350 struct swap_info_struct * p;
@@ -357,7 +357,7 @@ int remove_exclusive_swap_page(struct page *page)
357 return 0; 357 return 0;
358 if (PageWriteback(page)) 358 if (PageWriteback(page))
359 return 0; 359 return 0;
360 if (page_count(page) != 2) /* 2: us + cache */ 360 if (page_count(page) != count) /* us + cache + ptes */
361 return 0; 361 return 0;
362 362
363 entry.val = page_private(page); 363 entry.val = page_private(page);
@@ -370,7 +370,7 @@ int remove_exclusive_swap_page(struct page *page)
370 if (p->swap_map[swp_offset(entry)] == 1) { 370 if (p->swap_map[swp_offset(entry)] == 1) {
371 /* Recheck the page count with the swapcache lock held.. */ 371 /* Recheck the page count with the swapcache lock held.. */
372 spin_lock_irq(&swapper_space.tree_lock); 372 spin_lock_irq(&swapper_space.tree_lock);
373 if ((page_count(page) == 2) && !PageWriteback(page)) { 373 if ((page_count(page) == count) && !PageWriteback(page)) {
374 __delete_from_swap_cache(page); 374 __delete_from_swap_cache(page);
375 SetPageDirty(page); 375 SetPageDirty(page);
376 retval = 1; 376 retval = 1;
@@ -388,6 +388,25 @@ int remove_exclusive_swap_page(struct page *page)
388} 388}
389 389
390/* 390/*
391 * Most of the time the page should have two references: one for the
392 * process and one for the swap cache.
393 */
394int remove_exclusive_swap_page(struct page *page)
395{
396 return remove_exclusive_swap_page_count(page, 2);
397}
398
399/*
400 * The pageout code holds an extra reference to the page. That raises
401 * the reference count to test for to 2 for a page that is only in the
402 * swap cache plus 1 for each process that maps the page.
403 */
404int remove_exclusive_swap_page_ref(struct page *page)
405{
406 return remove_exclusive_swap_page_count(page, 2 + page_mapcount(page));
407}
408
409/*
391 * Free the swap entry like above, but also try to 410 * Free the swap entry like above, but also try to
392 * free the page cache entry if it is the last user. 411 * free the page cache entry if it is the last user.
393 */ 412 */
@@ -403,7 +422,7 @@ void free_swap_and_cache(swp_entry_t entry)
403 if (p) { 422 if (p) {
404 if (swap_entry_free(p, swp_offset(entry)) == 1) { 423 if (swap_entry_free(p, swp_offset(entry)) == 1) {
405 page = find_get_page(&swapper_space, entry.val); 424 page = find_get_page(&swapper_space, entry.val);
406 if (page && unlikely(!trylock_page(page))) { 425 if (page && !trylock_page(page)) {
407 page_cache_release(page); 426 page_cache_release(page);
408 page = NULL; 427 page = NULL;
409 } 428 }
diff --git a/mm/truncate.c b/mm/truncate.c
index e83e4b114ef1..1229211104f8 100644
--- a/mm/truncate.c
+++ b/mm/truncate.c
@@ -18,6 +18,7 @@
18#include <linux/task_io_accounting_ops.h> 18#include <linux/task_io_accounting_ops.h>
19#include <linux/buffer_head.h> /* grr. try_to_release_page, 19#include <linux/buffer_head.h> /* grr. try_to_release_page,
20 do_invalidatepage */ 20 do_invalidatepage */
21#include "internal.h"
21 22
22 23
23/** 24/**
@@ -103,6 +104,7 @@ truncate_complete_page(struct address_space *mapping, struct page *page)
103 104
104 cancel_dirty_page(page, PAGE_CACHE_SIZE); 105 cancel_dirty_page(page, PAGE_CACHE_SIZE);
105 106
107 clear_page_mlock(page);
106 remove_from_page_cache(page); 108 remove_from_page_cache(page);
107 ClearPageMappedToDisk(page); 109 ClearPageMappedToDisk(page);
108 page_cache_release(page); /* pagecache ref */ 110 page_cache_release(page); /* pagecache ref */
@@ -127,6 +129,7 @@ invalidate_complete_page(struct address_space *mapping, struct page *page)
127 if (PagePrivate(page) && !try_to_release_page(page, 0)) 129 if (PagePrivate(page) && !try_to_release_page(page, 0))
128 return 0; 130 return 0;
129 131
132 clear_page_mlock(page);
130 ret = remove_mapping(mapping, page); 133 ret = remove_mapping(mapping, page);
131 134
132 return ret; 135 return ret;
@@ -352,6 +355,7 @@ invalidate_complete_page2(struct address_space *mapping, struct page *page)
352 if (PageDirty(page)) 355 if (PageDirty(page))
353 goto failed; 356 goto failed;
354 357
358 clear_page_mlock(page);
355 BUG_ON(PagePrivate(page)); 359 BUG_ON(PagePrivate(page));
356 __remove_from_page_cache(page); 360 __remove_from_page_cache(page);
357 spin_unlock_irq(&mapping->tree_lock); 361 spin_unlock_irq(&mapping->tree_lock);
diff --git a/mm/vmalloc.c b/mm/vmalloc.c
index bba06c41fc59..712ae47af0bf 100644
--- a/mm/vmalloc.c
+++ b/mm/vmalloc.c
@@ -8,6 +8,7 @@
8 * Numa awareness, Christoph Lameter, SGI, June 2005 8 * Numa awareness, Christoph Lameter, SGI, June 2005
9 */ 9 */
10 10
11#include <linux/vmalloc.h>
11#include <linux/mm.h> 12#include <linux/mm.h>
12#include <linux/module.h> 13#include <linux/module.h>
13#include <linux/highmem.h> 14#include <linux/highmem.h>
@@ -18,16 +19,17 @@
18#include <linux/debugobjects.h> 19#include <linux/debugobjects.h>
19#include <linux/vmalloc.h> 20#include <linux/vmalloc.h>
20#include <linux/kallsyms.h> 21#include <linux/kallsyms.h>
22#include <linux/list.h>
23#include <linux/rbtree.h>
24#include <linux/radix-tree.h>
25#include <linux/rcupdate.h>
21 26
27#include <asm/atomic.h>
22#include <asm/uaccess.h> 28#include <asm/uaccess.h>
23#include <asm/tlbflush.h> 29#include <asm/tlbflush.h>
24 30
25 31
26DEFINE_RWLOCK(vmlist_lock); 32/*** Page table manipulation functions ***/
27struct vm_struct *vmlist;
28
29static void *__vmalloc_node(unsigned long size, gfp_t gfp_mask, pgprot_t prot,
30 int node, void *caller);
31 33
32static void vunmap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end) 34static void vunmap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end)
33{ 35{
@@ -40,8 +42,7 @@ static void vunmap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end)
40 } while (pte++, addr += PAGE_SIZE, addr != end); 42 } while (pte++, addr += PAGE_SIZE, addr != end);
41} 43}
42 44
43static inline void vunmap_pmd_range(pud_t *pud, unsigned long addr, 45static void vunmap_pmd_range(pud_t *pud, unsigned long addr, unsigned long end)
44 unsigned long end)
45{ 46{
46 pmd_t *pmd; 47 pmd_t *pmd;
47 unsigned long next; 48 unsigned long next;
@@ -55,8 +56,7 @@ static inline void vunmap_pmd_range(pud_t *pud, unsigned long addr,
55 } while (pmd++, addr = next, addr != end); 56 } while (pmd++, addr = next, addr != end);
56} 57}
57 58
58static inline void vunmap_pud_range(pgd_t *pgd, unsigned long addr, 59static void vunmap_pud_range(pgd_t *pgd, unsigned long addr, unsigned long end)
59 unsigned long end)
60{ 60{
61 pud_t *pud; 61 pud_t *pud;
62 unsigned long next; 62 unsigned long next;
@@ -70,12 +70,10 @@ static inline void vunmap_pud_range(pgd_t *pgd, unsigned long addr,
70 } while (pud++, addr = next, addr != end); 70 } while (pud++, addr = next, addr != end);
71} 71}
72 72
73void unmap_kernel_range(unsigned long addr, unsigned long size) 73static void vunmap_page_range(unsigned long addr, unsigned long end)
74{ 74{
75 pgd_t *pgd; 75 pgd_t *pgd;
76 unsigned long next; 76 unsigned long next;
77 unsigned long start = addr;
78 unsigned long end = addr + size;
79 77
80 BUG_ON(addr >= end); 78 BUG_ON(addr >= end);
81 pgd = pgd_offset_k(addr); 79 pgd = pgd_offset_k(addr);
@@ -86,35 +84,36 @@ void unmap_kernel_range(unsigned long addr, unsigned long size)
86 continue; 84 continue;
87 vunmap_pud_range(pgd, addr, next); 85 vunmap_pud_range(pgd, addr, next);
88 } while (pgd++, addr = next, addr != end); 86 } while (pgd++, addr = next, addr != end);
89 flush_tlb_kernel_range(start, end);
90}
91
92static void unmap_vm_area(struct vm_struct *area)
93{
94 unmap_kernel_range((unsigned long)area->addr, area->size);
95} 87}
96 88
97static int vmap_pte_range(pmd_t *pmd, unsigned long addr, 89static int vmap_pte_range(pmd_t *pmd, unsigned long addr,
98 unsigned long end, pgprot_t prot, struct page ***pages) 90 unsigned long end, pgprot_t prot, struct page **pages, int *nr)
99{ 91{
100 pte_t *pte; 92 pte_t *pte;
101 93
94 /*
95 * nr is a running index into the array which helps higher level
96 * callers keep track of where we're up to.
97 */
98
102 pte = pte_alloc_kernel(pmd, addr); 99 pte = pte_alloc_kernel(pmd, addr);
103 if (!pte) 100 if (!pte)
104 return -ENOMEM; 101 return -ENOMEM;
105 do { 102 do {
106 struct page *page = **pages; 103 struct page *page = pages[*nr];
107 WARN_ON(!pte_none(*pte)); 104
108 if (!page) 105 if (WARN_ON(!pte_none(*pte)))
106 return -EBUSY;
107 if (WARN_ON(!page))
109 return -ENOMEM; 108 return -ENOMEM;
110 set_pte_at(&init_mm, addr, pte, mk_pte(page, prot)); 109 set_pte_at(&init_mm, addr, pte, mk_pte(page, prot));
111 (*pages)++; 110 (*nr)++;
112 } while (pte++, addr += PAGE_SIZE, addr != end); 111 } while (pte++, addr += PAGE_SIZE, addr != end);
113 return 0; 112 return 0;
114} 113}
115 114
116static inline int vmap_pmd_range(pud_t *pud, unsigned long addr, 115static int vmap_pmd_range(pud_t *pud, unsigned long addr,
117 unsigned long end, pgprot_t prot, struct page ***pages) 116 unsigned long end, pgprot_t prot, struct page **pages, int *nr)
118{ 117{
119 pmd_t *pmd; 118 pmd_t *pmd;
120 unsigned long next; 119 unsigned long next;
@@ -124,14 +123,14 @@ static inline int vmap_pmd_range(pud_t *pud, unsigned long addr,
124 return -ENOMEM; 123 return -ENOMEM;
125 do { 124 do {
126 next = pmd_addr_end(addr, end); 125 next = pmd_addr_end(addr, end);
127 if (vmap_pte_range(pmd, addr, next, prot, pages)) 126 if (vmap_pte_range(pmd, addr, next, prot, pages, nr))
128 return -ENOMEM; 127 return -ENOMEM;
129 } while (pmd++, addr = next, addr != end); 128 } while (pmd++, addr = next, addr != end);
130 return 0; 129 return 0;
131} 130}
132 131
133static inline int vmap_pud_range(pgd_t *pgd, unsigned long addr, 132static int vmap_pud_range(pgd_t *pgd, unsigned long addr,
134 unsigned long end, pgprot_t prot, struct page ***pages) 133 unsigned long end, pgprot_t prot, struct page **pages, int *nr)
135{ 134{
136 pud_t *pud; 135 pud_t *pud;
137 unsigned long next; 136 unsigned long next;
@@ -141,44 +140,49 @@ static inline int vmap_pud_range(pgd_t *pgd, unsigned long addr,
141 return -ENOMEM; 140 return -ENOMEM;
142 do { 141 do {
143 next = pud_addr_end(addr, end); 142 next = pud_addr_end(addr, end);
144 if (vmap_pmd_range(pud, addr, next, prot, pages)) 143 if (vmap_pmd_range(pud, addr, next, prot, pages, nr))
145 return -ENOMEM; 144 return -ENOMEM;
146 } while (pud++, addr = next, addr != end); 145 } while (pud++, addr = next, addr != end);
147 return 0; 146 return 0;
148} 147}
149 148
150int map_vm_area(struct vm_struct *area, pgprot_t prot, struct page ***pages) 149/*
150 * Set up page tables in kva (addr, end). The ptes shall have prot "prot", and
151 * will have pfns corresponding to the "pages" array.
152 *
153 * Ie. pte at addr+N*PAGE_SIZE shall point to pfn corresponding to pages[N]
154 */
155static int vmap_page_range(unsigned long addr, unsigned long end,
156 pgprot_t prot, struct page **pages)
151{ 157{
152 pgd_t *pgd; 158 pgd_t *pgd;
153 unsigned long next; 159 unsigned long next;
154 unsigned long addr = (unsigned long) area->addr; 160 int err = 0;
155 unsigned long end = addr + area->size - PAGE_SIZE; 161 int nr = 0;
156 int err;
157 162
158 BUG_ON(addr >= end); 163 BUG_ON(addr >= end);
159 pgd = pgd_offset_k(addr); 164 pgd = pgd_offset_k(addr);
160 do { 165 do {
161 next = pgd_addr_end(addr, end); 166 next = pgd_addr_end(addr, end);
162 err = vmap_pud_range(pgd, addr, next, prot, pages); 167 err = vmap_pud_range(pgd, addr, next, prot, pages, &nr);
163 if (err) 168 if (err)
164 break; 169 break;
165 } while (pgd++, addr = next, addr != end); 170 } while (pgd++, addr = next, addr != end);
166 flush_cache_vmap((unsigned long) area->addr, end); 171 flush_cache_vmap(addr, end);
167 return err; 172
173 if (unlikely(err))
174 return err;
175 return nr;
168} 176}
169EXPORT_SYMBOL_GPL(map_vm_area);
170 177
171/* 178/*
172 * Map a vmalloc()-space virtual address to the physical page. 179 * Walk a vmap address to the struct page it maps.
173 */ 180 */
174struct page *vmalloc_to_page(const void *vmalloc_addr) 181struct page *vmalloc_to_page(const void *vmalloc_addr)
175{ 182{
176 unsigned long addr = (unsigned long) vmalloc_addr; 183 unsigned long addr = (unsigned long) vmalloc_addr;
177 struct page *page = NULL; 184 struct page *page = NULL;
178 pgd_t *pgd = pgd_offset_k(addr); 185 pgd_t *pgd = pgd_offset_k(addr);
179 pud_t *pud;
180 pmd_t *pmd;
181 pte_t *ptep, pte;
182 186
183 /* 187 /*
184 * XXX we might need to change this if we add VIRTUAL_BUG_ON for 188 * XXX we might need to change this if we add VIRTUAL_BUG_ON for
@@ -188,10 +192,12 @@ struct page *vmalloc_to_page(const void *vmalloc_addr)
188 !is_module_address(addr)); 192 !is_module_address(addr));
189 193
190 if (!pgd_none(*pgd)) { 194 if (!pgd_none(*pgd)) {
191 pud = pud_offset(pgd, addr); 195 pud_t *pud = pud_offset(pgd, addr);
192 if (!pud_none(*pud)) { 196 if (!pud_none(*pud)) {
193 pmd = pmd_offset(pud, addr); 197 pmd_t *pmd = pmd_offset(pud, addr);
194 if (!pmd_none(*pmd)) { 198 if (!pmd_none(*pmd)) {
199 pte_t *ptep, pte;
200
195 ptep = pte_offset_map(pmd, addr); 201 ptep = pte_offset_map(pmd, addr);
196 pte = *ptep; 202 pte = *ptep;
197 if (pte_present(pte)) 203 if (pte_present(pte))
@@ -213,13 +219,751 @@ unsigned long vmalloc_to_pfn(const void *vmalloc_addr)
213} 219}
214EXPORT_SYMBOL(vmalloc_to_pfn); 220EXPORT_SYMBOL(vmalloc_to_pfn);
215 221
216static struct vm_struct * 222
217__get_vm_area_node(unsigned long size, unsigned long flags, unsigned long start, 223/*** Global kva allocator ***/
218 unsigned long end, int node, gfp_t gfp_mask, void *caller) 224
225#define VM_LAZY_FREE 0x01
226#define VM_LAZY_FREEING 0x02
227#define VM_VM_AREA 0x04
228
229struct vmap_area {
230 unsigned long va_start;
231 unsigned long va_end;
232 unsigned long flags;
233 struct rb_node rb_node; /* address sorted rbtree */
234 struct list_head list; /* address sorted list */
235 struct list_head purge_list; /* "lazy purge" list */
236 void *private;
237 struct rcu_head rcu_head;
238};
239
240static DEFINE_SPINLOCK(vmap_area_lock);
241static struct rb_root vmap_area_root = RB_ROOT;
242static LIST_HEAD(vmap_area_list);
243
244static struct vmap_area *__find_vmap_area(unsigned long addr)
219{ 245{
220 struct vm_struct **p, *tmp, *area; 246 struct rb_node *n = vmap_area_root.rb_node;
221 unsigned long align = 1; 247
248 while (n) {
249 struct vmap_area *va;
250
251 va = rb_entry(n, struct vmap_area, rb_node);
252 if (addr < va->va_start)
253 n = n->rb_left;
254 else if (addr > va->va_start)
255 n = n->rb_right;
256 else
257 return va;
258 }
259
260 return NULL;
261}
262
263static void __insert_vmap_area(struct vmap_area *va)
264{
265 struct rb_node **p = &vmap_area_root.rb_node;
266 struct rb_node *parent = NULL;
267 struct rb_node *tmp;
268
269 while (*p) {
270 struct vmap_area *tmp;
271
272 parent = *p;
273 tmp = rb_entry(parent, struct vmap_area, rb_node);
274 if (va->va_start < tmp->va_end)
275 p = &(*p)->rb_left;
276 else if (va->va_end > tmp->va_start)
277 p = &(*p)->rb_right;
278 else
279 BUG();
280 }
281
282 rb_link_node(&va->rb_node, parent, p);
283 rb_insert_color(&va->rb_node, &vmap_area_root);
284
285 /* address-sort this list so it is usable like the vmlist */
286 tmp = rb_prev(&va->rb_node);
287 if (tmp) {
288 struct vmap_area *prev;
289 prev = rb_entry(tmp, struct vmap_area, rb_node);
290 list_add_rcu(&va->list, &prev->list);
291 } else
292 list_add_rcu(&va->list, &vmap_area_list);
293}
294
295static void purge_vmap_area_lazy(void);
296
297/*
298 * Allocate a region of KVA of the specified size and alignment, within the
299 * vstart and vend.
300 */
301static struct vmap_area *alloc_vmap_area(unsigned long size,
302 unsigned long align,
303 unsigned long vstart, unsigned long vend,
304 int node, gfp_t gfp_mask)
305{
306 struct vmap_area *va;
307 struct rb_node *n;
308 unsigned long addr;
309 int purged = 0;
310
311 BUG_ON(size & ~PAGE_MASK);
312
313 addr = ALIGN(vstart, align);
314
315 va = kmalloc_node(sizeof(struct vmap_area),
316 gfp_mask & GFP_RECLAIM_MASK, node);
317 if (unlikely(!va))
318 return ERR_PTR(-ENOMEM);
319
320retry:
321 spin_lock(&vmap_area_lock);
322 /* XXX: could have a last_hole cache */
323 n = vmap_area_root.rb_node;
324 if (n) {
325 struct vmap_area *first = NULL;
326
327 do {
328 struct vmap_area *tmp;
329 tmp = rb_entry(n, struct vmap_area, rb_node);
330 if (tmp->va_end >= addr) {
331 if (!first && tmp->va_start < addr + size)
332 first = tmp;
333 n = n->rb_left;
334 } else {
335 first = tmp;
336 n = n->rb_right;
337 }
338 } while (n);
339
340 if (!first)
341 goto found;
342
343 if (first->va_end < addr) {
344 n = rb_next(&first->rb_node);
345 if (n)
346 first = rb_entry(n, struct vmap_area, rb_node);
347 else
348 goto found;
349 }
350
351 while (addr + size >= first->va_start && addr + size <= vend) {
352 addr = ALIGN(first->va_end + PAGE_SIZE, align);
353
354 n = rb_next(&first->rb_node);
355 if (n)
356 first = rb_entry(n, struct vmap_area, rb_node);
357 else
358 goto found;
359 }
360 }
361found:
362 if (addr + size > vend) {
363 spin_unlock(&vmap_area_lock);
364 if (!purged) {
365 purge_vmap_area_lazy();
366 purged = 1;
367 goto retry;
368 }
369 if (printk_ratelimit())
370 printk(KERN_WARNING "vmap allocation failed: "
371 "use vmalloc=<size> to increase size.\n");
372 return ERR_PTR(-EBUSY);
373 }
374
375 BUG_ON(addr & (align-1));
376
377 va->va_start = addr;
378 va->va_end = addr + size;
379 va->flags = 0;
380 __insert_vmap_area(va);
381 spin_unlock(&vmap_area_lock);
382
383 return va;
384}
385
386static void rcu_free_va(struct rcu_head *head)
387{
388 struct vmap_area *va = container_of(head, struct vmap_area, rcu_head);
389
390 kfree(va);
391}
392
393static void __free_vmap_area(struct vmap_area *va)
394{
395 BUG_ON(RB_EMPTY_NODE(&va->rb_node));
396 rb_erase(&va->rb_node, &vmap_area_root);
397 RB_CLEAR_NODE(&va->rb_node);
398 list_del_rcu(&va->list);
399
400 call_rcu(&va->rcu_head, rcu_free_va);
401}
402
403/*
404 * Free a region of KVA allocated by alloc_vmap_area
405 */
406static void free_vmap_area(struct vmap_area *va)
407{
408 spin_lock(&vmap_area_lock);
409 __free_vmap_area(va);
410 spin_unlock(&vmap_area_lock);
411}
412
413/*
414 * Clear the pagetable entries of a given vmap_area
415 */
416static void unmap_vmap_area(struct vmap_area *va)
417{
418 vunmap_page_range(va->va_start, va->va_end);
419}
420
421/*
422 * lazy_max_pages is the maximum amount of virtual address space we gather up
423 * before attempting to purge with a TLB flush.
424 *
425 * There is a tradeoff here: a larger number will cover more kernel page tables
426 * and take slightly longer to purge, but it will linearly reduce the number of
427 * global TLB flushes that must be performed. It would seem natural to scale
428 * this number up linearly with the number of CPUs (because vmapping activity
429 * could also scale linearly with the number of CPUs), however it is likely
430 * that in practice, workloads might be constrained in other ways that mean
431 * vmap activity will not scale linearly with CPUs. Also, I want to be
432 * conservative and not introduce a big latency on huge systems, so go with
433 * a less aggressive log scale. It will still be an improvement over the old
434 * code, and it will be simple to change the scale factor if we find that it
435 * becomes a problem on bigger systems.
436 */
437static unsigned long lazy_max_pages(void)
438{
439 unsigned int log;
440
441 log = fls(num_online_cpus());
442
443 return log * (32UL * 1024 * 1024 / PAGE_SIZE);
444}
445
446static atomic_t vmap_lazy_nr = ATOMIC_INIT(0);
447
448/*
449 * Purges all lazily-freed vmap areas.
450 *
451 * If sync is 0 then don't purge if there is already a purge in progress.
452 * If force_flush is 1, then flush kernel TLBs between *start and *end even
453 * if we found no lazy vmap areas to unmap (callers can use this to optimise
454 * their own TLB flushing).
455 * Returns with *start = min(*start, lowest purged address)
456 * *end = max(*end, highest purged address)
457 */
458static void __purge_vmap_area_lazy(unsigned long *start, unsigned long *end,
459 int sync, int force_flush)
460{
461 static DEFINE_SPINLOCK(purge_lock);
462 LIST_HEAD(valist);
463 struct vmap_area *va;
464 int nr = 0;
465
466 /*
467 * If sync is 0 but force_flush is 1, we'll go sync anyway but callers
468 * should not expect such behaviour. This just simplifies locking for
469 * the case that isn't actually used at the moment anyway.
470 */
471 if (!sync && !force_flush) {
472 if (!spin_trylock(&purge_lock))
473 return;
474 } else
475 spin_lock(&purge_lock);
476
477 rcu_read_lock();
478 list_for_each_entry_rcu(va, &vmap_area_list, list) {
479 if (va->flags & VM_LAZY_FREE) {
480 if (va->va_start < *start)
481 *start = va->va_start;
482 if (va->va_end > *end)
483 *end = va->va_end;
484 nr += (va->va_end - va->va_start) >> PAGE_SHIFT;
485 unmap_vmap_area(va);
486 list_add_tail(&va->purge_list, &valist);
487 va->flags |= VM_LAZY_FREEING;
488 va->flags &= ~VM_LAZY_FREE;
489 }
490 }
491 rcu_read_unlock();
492
493 if (nr) {
494 BUG_ON(nr > atomic_read(&vmap_lazy_nr));
495 atomic_sub(nr, &vmap_lazy_nr);
496 }
497
498 if (nr || force_flush)
499 flush_tlb_kernel_range(*start, *end);
500
501 if (nr) {
502 spin_lock(&vmap_area_lock);
503 list_for_each_entry(va, &valist, purge_list)
504 __free_vmap_area(va);
505 spin_unlock(&vmap_area_lock);
506 }
507 spin_unlock(&purge_lock);
508}
509
510/*
511 * Kick off a purge of the outstanding lazy areas.
512 */
513static void purge_vmap_area_lazy(void)
514{
515 unsigned long start = ULONG_MAX, end = 0;
516
517 __purge_vmap_area_lazy(&start, &end, 0, 0);
518}
519
520/*
521 * Free and unmap a vmap area
522 */
523static void free_unmap_vmap_area(struct vmap_area *va)
524{
525 va->flags |= VM_LAZY_FREE;
526 atomic_add((va->va_end - va->va_start) >> PAGE_SHIFT, &vmap_lazy_nr);
527 if (unlikely(atomic_read(&vmap_lazy_nr) > lazy_max_pages()))
528 purge_vmap_area_lazy();
529}
530
531static struct vmap_area *find_vmap_area(unsigned long addr)
532{
533 struct vmap_area *va;
534
535 spin_lock(&vmap_area_lock);
536 va = __find_vmap_area(addr);
537 spin_unlock(&vmap_area_lock);
538
539 return va;
540}
541
542static void free_unmap_vmap_area_addr(unsigned long addr)
543{
544 struct vmap_area *va;
545
546 va = find_vmap_area(addr);
547 BUG_ON(!va);
548 free_unmap_vmap_area(va);
549}
550
551
552/*** Per cpu kva allocator ***/
553
554/*
555 * vmap space is limited especially on 32 bit architectures. Ensure there is
556 * room for at least 16 percpu vmap blocks per CPU.
557 */
558/*
559 * If we had a constant VMALLOC_START and VMALLOC_END, we'd like to be able
560 * to #define VMALLOC_SPACE (VMALLOC_END-VMALLOC_START). Guess
561 * instead (we just need a rough idea)
562 */
563#if BITS_PER_LONG == 32
564#define VMALLOC_SPACE (128UL*1024*1024)
565#else
566#define VMALLOC_SPACE (128UL*1024*1024*1024)
567#endif
568
569#define VMALLOC_PAGES (VMALLOC_SPACE / PAGE_SIZE)
570#define VMAP_MAX_ALLOC BITS_PER_LONG /* 256K with 4K pages */
571#define VMAP_BBMAP_BITS_MAX 1024 /* 4MB with 4K pages */
572#define VMAP_BBMAP_BITS_MIN (VMAP_MAX_ALLOC*2)
573#define VMAP_MIN(x, y) ((x) < (y) ? (x) : (y)) /* can't use min() */
574#define VMAP_MAX(x, y) ((x) > (y) ? (x) : (y)) /* can't use max() */
575#define VMAP_BBMAP_BITS VMAP_MIN(VMAP_BBMAP_BITS_MAX, \
576 VMAP_MAX(VMAP_BBMAP_BITS_MIN, \
577 VMALLOC_PAGES / NR_CPUS / 16))
578
579#define VMAP_BLOCK_SIZE (VMAP_BBMAP_BITS * PAGE_SIZE)
580
581struct vmap_block_queue {
582 spinlock_t lock;
583 struct list_head free;
584 struct list_head dirty;
585 unsigned int nr_dirty;
586};
587
588struct vmap_block {
589 spinlock_t lock;
590 struct vmap_area *va;
591 struct vmap_block_queue *vbq;
592 unsigned long free, dirty;
593 DECLARE_BITMAP(alloc_map, VMAP_BBMAP_BITS);
594 DECLARE_BITMAP(dirty_map, VMAP_BBMAP_BITS);
595 union {
596 struct {
597 struct list_head free_list;
598 struct list_head dirty_list;
599 };
600 struct rcu_head rcu_head;
601 };
602};
603
604/* Queue of free and dirty vmap blocks, for allocation and flushing purposes */
605static DEFINE_PER_CPU(struct vmap_block_queue, vmap_block_queue);
606
607/*
608 * Radix tree of vmap blocks, indexed by address, to quickly find a vmap block
609 * in the free path. Could get rid of this if we change the API to return a
610 * "cookie" from alloc, to be passed to free. But no big deal yet.
611 */
612static DEFINE_SPINLOCK(vmap_block_tree_lock);
613static RADIX_TREE(vmap_block_tree, GFP_ATOMIC);
614
615/*
616 * We should probably have a fallback mechanism to allocate virtual memory
617 * out of partially filled vmap blocks. However vmap block sizing should be
618 * fairly reasonable according to the vmalloc size, so it shouldn't be a
619 * big problem.
620 */
621
622static unsigned long addr_to_vb_idx(unsigned long addr)
623{
624 addr -= VMALLOC_START & ~(VMAP_BLOCK_SIZE-1);
625 addr /= VMAP_BLOCK_SIZE;
626 return addr;
627}
628
629static struct vmap_block *new_vmap_block(gfp_t gfp_mask)
630{
631 struct vmap_block_queue *vbq;
632 struct vmap_block *vb;
633 struct vmap_area *va;
634 unsigned long vb_idx;
635 int node, err;
636
637 node = numa_node_id();
638
639 vb = kmalloc_node(sizeof(struct vmap_block),
640 gfp_mask & GFP_RECLAIM_MASK, node);
641 if (unlikely(!vb))
642 return ERR_PTR(-ENOMEM);
643
644 va = alloc_vmap_area(VMAP_BLOCK_SIZE, VMAP_BLOCK_SIZE,
645 VMALLOC_START, VMALLOC_END,
646 node, gfp_mask);
647 if (unlikely(IS_ERR(va))) {
648 kfree(vb);
649 return ERR_PTR(PTR_ERR(va));
650 }
651
652 err = radix_tree_preload(gfp_mask);
653 if (unlikely(err)) {
654 kfree(vb);
655 free_vmap_area(va);
656 return ERR_PTR(err);
657 }
658
659 spin_lock_init(&vb->lock);
660 vb->va = va;
661 vb->free = VMAP_BBMAP_BITS;
662 vb->dirty = 0;
663 bitmap_zero(vb->alloc_map, VMAP_BBMAP_BITS);
664 bitmap_zero(vb->dirty_map, VMAP_BBMAP_BITS);
665 INIT_LIST_HEAD(&vb->free_list);
666 INIT_LIST_HEAD(&vb->dirty_list);
667
668 vb_idx = addr_to_vb_idx(va->va_start);
669 spin_lock(&vmap_block_tree_lock);
670 err = radix_tree_insert(&vmap_block_tree, vb_idx, vb);
671 spin_unlock(&vmap_block_tree_lock);
672 BUG_ON(err);
673 radix_tree_preload_end();
674
675 vbq = &get_cpu_var(vmap_block_queue);
676 vb->vbq = vbq;
677 spin_lock(&vbq->lock);
678 list_add(&vb->free_list, &vbq->free);
679 spin_unlock(&vbq->lock);
680 put_cpu_var(vmap_cpu_blocks);
681
682 return vb;
683}
684
685static void rcu_free_vb(struct rcu_head *head)
686{
687 struct vmap_block *vb = container_of(head, struct vmap_block, rcu_head);
688
689 kfree(vb);
690}
691
692static void free_vmap_block(struct vmap_block *vb)
693{
694 struct vmap_block *tmp;
695 unsigned long vb_idx;
696
697 spin_lock(&vb->vbq->lock);
698 if (!list_empty(&vb->free_list))
699 list_del(&vb->free_list);
700 if (!list_empty(&vb->dirty_list))
701 list_del(&vb->dirty_list);
702 spin_unlock(&vb->vbq->lock);
703
704 vb_idx = addr_to_vb_idx(vb->va->va_start);
705 spin_lock(&vmap_block_tree_lock);
706 tmp = radix_tree_delete(&vmap_block_tree, vb_idx);
707 spin_unlock(&vmap_block_tree_lock);
708 BUG_ON(tmp != vb);
709
710 free_unmap_vmap_area(vb->va);
711 call_rcu(&vb->rcu_head, rcu_free_vb);
712}
713
714static void *vb_alloc(unsigned long size, gfp_t gfp_mask)
715{
716 struct vmap_block_queue *vbq;
717 struct vmap_block *vb;
718 unsigned long addr = 0;
719 unsigned int order;
720
721 BUG_ON(size & ~PAGE_MASK);
722 BUG_ON(size > PAGE_SIZE*VMAP_MAX_ALLOC);
723 order = get_order(size);
724
725again:
726 rcu_read_lock();
727 vbq = &get_cpu_var(vmap_block_queue);
728 list_for_each_entry_rcu(vb, &vbq->free, free_list) {
729 int i;
730
731 spin_lock(&vb->lock);
732 i = bitmap_find_free_region(vb->alloc_map,
733 VMAP_BBMAP_BITS, order);
734
735 if (i >= 0) {
736 addr = vb->va->va_start + (i << PAGE_SHIFT);
737 BUG_ON(addr_to_vb_idx(addr) !=
738 addr_to_vb_idx(vb->va->va_start));
739 vb->free -= 1UL << order;
740 if (vb->free == 0) {
741 spin_lock(&vbq->lock);
742 list_del_init(&vb->free_list);
743 spin_unlock(&vbq->lock);
744 }
745 spin_unlock(&vb->lock);
746 break;
747 }
748 spin_unlock(&vb->lock);
749 }
750 put_cpu_var(vmap_cpu_blocks);
751 rcu_read_unlock();
752
753 if (!addr) {
754 vb = new_vmap_block(gfp_mask);
755 if (IS_ERR(vb))
756 return vb;
757 goto again;
758 }
759
760 return (void *)addr;
761}
762
763static void vb_free(const void *addr, unsigned long size)
764{
765 unsigned long offset;
766 unsigned long vb_idx;
767 unsigned int order;
768 struct vmap_block *vb;
769
770 BUG_ON(size & ~PAGE_MASK);
771 BUG_ON(size > PAGE_SIZE*VMAP_MAX_ALLOC);
772 order = get_order(size);
773
774 offset = (unsigned long)addr & (VMAP_BLOCK_SIZE - 1);
775
776 vb_idx = addr_to_vb_idx((unsigned long)addr);
777 rcu_read_lock();
778 vb = radix_tree_lookup(&vmap_block_tree, vb_idx);
779 rcu_read_unlock();
780 BUG_ON(!vb);
781
782 spin_lock(&vb->lock);
783 bitmap_allocate_region(vb->dirty_map, offset >> PAGE_SHIFT, order);
784 if (!vb->dirty) {
785 spin_lock(&vb->vbq->lock);
786 list_add(&vb->dirty_list, &vb->vbq->dirty);
787 spin_unlock(&vb->vbq->lock);
788 }
789 vb->dirty += 1UL << order;
790 if (vb->dirty == VMAP_BBMAP_BITS) {
791 BUG_ON(vb->free || !list_empty(&vb->free_list));
792 spin_unlock(&vb->lock);
793 free_vmap_block(vb);
794 } else
795 spin_unlock(&vb->lock);
796}
797
798/**
799 * vm_unmap_aliases - unmap outstanding lazy aliases in the vmap layer
800 *
801 * The vmap/vmalloc layer lazily flushes kernel virtual mappings primarily
802 * to amortize TLB flushing overheads. What this means is that any page you
803 * have now, may, in a former life, have been mapped into kernel virtual
804 * address by the vmap layer and so there might be some CPUs with TLB entries
805 * still referencing that page (additional to the regular 1:1 kernel mapping).
806 *
807 * vm_unmap_aliases flushes all such lazy mappings. After it returns, we can
808 * be sure that none of the pages we have control over will have any aliases
809 * from the vmap layer.
810 */
811void vm_unmap_aliases(void)
812{
813 unsigned long start = ULONG_MAX, end = 0;
814 int cpu;
815 int flush = 0;
816
817 for_each_possible_cpu(cpu) {
818 struct vmap_block_queue *vbq = &per_cpu(vmap_block_queue, cpu);
819 struct vmap_block *vb;
820
821 rcu_read_lock();
822 list_for_each_entry_rcu(vb, &vbq->free, free_list) {
823 int i;
824
825 spin_lock(&vb->lock);
826 i = find_first_bit(vb->dirty_map, VMAP_BBMAP_BITS);
827 while (i < VMAP_BBMAP_BITS) {
828 unsigned long s, e;
829 int j;
830 j = find_next_zero_bit(vb->dirty_map,
831 VMAP_BBMAP_BITS, i);
832
833 s = vb->va->va_start + (i << PAGE_SHIFT);
834 e = vb->va->va_start + (j << PAGE_SHIFT);
835 vunmap_page_range(s, e);
836 flush = 1;
837
838 if (s < start)
839 start = s;
840 if (e > end)
841 end = e;
842
843 i = j;
844 i = find_next_bit(vb->dirty_map,
845 VMAP_BBMAP_BITS, i);
846 }
847 spin_unlock(&vb->lock);
848 }
849 rcu_read_unlock();
850 }
851
852 __purge_vmap_area_lazy(&start, &end, 1, flush);
853}
854EXPORT_SYMBOL_GPL(vm_unmap_aliases);
855
856/**
857 * vm_unmap_ram - unmap linear kernel address space set up by vm_map_ram
858 * @mem: the pointer returned by vm_map_ram
859 * @count: the count passed to that vm_map_ram call (cannot unmap partial)
860 */
861void vm_unmap_ram(const void *mem, unsigned int count)
862{
863 unsigned long size = count << PAGE_SHIFT;
864 unsigned long addr = (unsigned long)mem;
865
866 BUG_ON(!addr);
867 BUG_ON(addr < VMALLOC_START);
868 BUG_ON(addr > VMALLOC_END);
869 BUG_ON(addr & (PAGE_SIZE-1));
870
871 debug_check_no_locks_freed(mem, size);
872
873 if (likely(count <= VMAP_MAX_ALLOC))
874 vb_free(mem, size);
875 else
876 free_unmap_vmap_area_addr(addr);
877}
878EXPORT_SYMBOL(vm_unmap_ram);
879
880/**
881 * vm_map_ram - map pages linearly into kernel virtual address (vmalloc space)
882 * @pages: an array of pointers to the pages to be mapped
883 * @count: number of pages
884 * @node: prefer to allocate data structures on this node
885 * @prot: memory protection to use. PAGE_KERNEL for regular RAM
886 * @returns: a pointer to the address that has been mapped, or NULL on failure
887 */
888void *vm_map_ram(struct page **pages, unsigned int count, int node, pgprot_t prot)
889{
890 unsigned long size = count << PAGE_SHIFT;
222 unsigned long addr; 891 unsigned long addr;
892 void *mem;
893
894 if (likely(count <= VMAP_MAX_ALLOC)) {
895 mem = vb_alloc(size, GFP_KERNEL);
896 if (IS_ERR(mem))
897 return NULL;
898 addr = (unsigned long)mem;
899 } else {
900 struct vmap_area *va;
901 va = alloc_vmap_area(size, PAGE_SIZE,
902 VMALLOC_START, VMALLOC_END, node, GFP_KERNEL);
903 if (IS_ERR(va))
904 return NULL;
905
906 addr = va->va_start;
907 mem = (void *)addr;
908 }
909 if (vmap_page_range(addr, addr + size, prot, pages) < 0) {
910 vm_unmap_ram(mem, count);
911 return NULL;
912 }
913 return mem;
914}
915EXPORT_SYMBOL(vm_map_ram);
916
917void __init vmalloc_init(void)
918{
919 int i;
920
921 for_each_possible_cpu(i) {
922 struct vmap_block_queue *vbq;
923
924 vbq = &per_cpu(vmap_block_queue, i);
925 spin_lock_init(&vbq->lock);
926 INIT_LIST_HEAD(&vbq->free);
927 INIT_LIST_HEAD(&vbq->dirty);
928 vbq->nr_dirty = 0;
929 }
930}
931
932void unmap_kernel_range(unsigned long addr, unsigned long size)
933{
934 unsigned long end = addr + size;
935 vunmap_page_range(addr, end);
936 flush_tlb_kernel_range(addr, end);
937}
938
939int map_vm_area(struct vm_struct *area, pgprot_t prot, struct page ***pages)
940{
941 unsigned long addr = (unsigned long)area->addr;
942 unsigned long end = addr + area->size - PAGE_SIZE;
943 int err;
944
945 err = vmap_page_range(addr, end, prot, *pages);
946 if (err > 0) {
947 *pages += err;
948 err = 0;
949 }
950
951 return err;
952}
953EXPORT_SYMBOL_GPL(map_vm_area);
954
955/*** Old vmalloc interfaces ***/
956DEFINE_RWLOCK(vmlist_lock);
957struct vm_struct *vmlist;
958
959static struct vm_struct *__get_vm_area_node(unsigned long size,
960 unsigned long flags, unsigned long start, unsigned long end,
961 int node, gfp_t gfp_mask, void *caller)
962{
963 static struct vmap_area *va;
964 struct vm_struct *area;
965 struct vm_struct *tmp, **p;
966 unsigned long align = 1;
223 967
224 BUG_ON(in_interrupt()); 968 BUG_ON(in_interrupt());
225 if (flags & VM_IOREMAP) { 969 if (flags & VM_IOREMAP) {
@@ -232,13 +976,12 @@ __get_vm_area_node(unsigned long size, unsigned long flags, unsigned long start,
232 976
233 align = 1ul << bit; 977 align = 1ul << bit;
234 } 978 }
235 addr = ALIGN(start, align); 979
236 size = PAGE_ALIGN(size); 980 size = PAGE_ALIGN(size);
237 if (unlikely(!size)) 981 if (unlikely(!size))
238 return NULL; 982 return NULL;
239 983
240 area = kmalloc_node(sizeof(*area), gfp_mask & GFP_RECLAIM_MASK, node); 984 area = kmalloc_node(sizeof(*area), gfp_mask & GFP_RECLAIM_MASK, node);
241
242 if (unlikely(!area)) 985 if (unlikely(!area))
243 return NULL; 986 return NULL;
244 987
@@ -247,48 +990,32 @@ __get_vm_area_node(unsigned long size, unsigned long flags, unsigned long start,
247 */ 990 */
248 size += PAGE_SIZE; 991 size += PAGE_SIZE;
249 992
250 write_lock(&vmlist_lock); 993 va = alloc_vmap_area(size, align, start, end, node, gfp_mask);
251 for (p = &vmlist; (tmp = *p) != NULL ;p = &tmp->next) { 994 if (IS_ERR(va)) {
252 if ((unsigned long)tmp->addr < addr) { 995 kfree(area);
253 if((unsigned long)tmp->addr + tmp->size >= addr) 996 return NULL;
254 addr = ALIGN(tmp->size +
255 (unsigned long)tmp->addr, align);
256 continue;
257 }
258 if ((size + addr) < addr)
259 goto out;
260 if (size + addr <= (unsigned long)tmp->addr)
261 goto found;
262 addr = ALIGN(tmp->size + (unsigned long)tmp->addr, align);
263 if (addr > end - size)
264 goto out;
265 } 997 }
266 if ((size + addr) < addr)
267 goto out;
268 if (addr > end - size)
269 goto out;
270
271found:
272 area->next = *p;
273 *p = area;
274 998
275 area->flags = flags; 999 area->flags = flags;
276 area->addr = (void *)addr; 1000 area->addr = (void *)va->va_start;
277 area->size = size; 1001 area->size = size;
278 area->pages = NULL; 1002 area->pages = NULL;
279 area->nr_pages = 0; 1003 area->nr_pages = 0;
280 area->phys_addr = 0; 1004 area->phys_addr = 0;
281 area->caller = caller; 1005 area->caller = caller;
1006 va->private = area;
1007 va->flags |= VM_VM_AREA;
1008
1009 write_lock(&vmlist_lock);
1010 for (p = &vmlist; (tmp = *p) != NULL; p = &tmp->next) {
1011 if (tmp->addr >= area->addr)
1012 break;
1013 }
1014 area->next = *p;
1015 *p = area;
282 write_unlock(&vmlist_lock); 1016 write_unlock(&vmlist_lock);
283 1017
284 return area; 1018 return area;
285
286out:
287 write_unlock(&vmlist_lock);
288 kfree(area);
289 if (printk_ratelimit())
290 printk(KERN_WARNING "allocation failed: out of vmalloc space - use vmalloc=<size> to increase size.\n");
291 return NULL;
292} 1019}
293 1020
294struct vm_struct *__get_vm_area(unsigned long size, unsigned long flags, 1021struct vm_struct *__get_vm_area(unsigned long size, unsigned long flags,
@@ -328,39 +1055,15 @@ struct vm_struct *get_vm_area_node(unsigned long size, unsigned long flags,
328 gfp_mask, __builtin_return_address(0)); 1055 gfp_mask, __builtin_return_address(0));
329} 1056}
330 1057
331/* Caller must hold vmlist_lock */ 1058static struct vm_struct *find_vm_area(const void *addr)
332static struct vm_struct *__find_vm_area(const void *addr)
333{ 1059{
334 struct vm_struct *tmp; 1060 struct vmap_area *va;
335 1061
336 for (tmp = vmlist; tmp != NULL; tmp = tmp->next) { 1062 va = find_vmap_area((unsigned long)addr);
337 if (tmp->addr == addr) 1063 if (va && va->flags & VM_VM_AREA)
338 break; 1064 return va->private;
339 }
340
341 return tmp;
342}
343
344/* Caller must hold vmlist_lock */
345static struct vm_struct *__remove_vm_area(const void *addr)
346{
347 struct vm_struct **p, *tmp;
348 1065
349 for (p = &vmlist ; (tmp = *p) != NULL ;p = &tmp->next) {
350 if (tmp->addr == addr)
351 goto found;
352 }
353 return NULL; 1066 return NULL;
354
355found:
356 unmap_vm_area(tmp);
357 *p = tmp->next;
358
359 /*
360 * Remove the guard page.
361 */
362 tmp->size -= PAGE_SIZE;
363 return tmp;
364} 1067}
365 1068
366/** 1069/**
@@ -373,11 +1076,24 @@ found:
373 */ 1076 */
374struct vm_struct *remove_vm_area(const void *addr) 1077struct vm_struct *remove_vm_area(const void *addr)
375{ 1078{
376 struct vm_struct *v; 1079 struct vmap_area *va;
377 write_lock(&vmlist_lock); 1080
378 v = __remove_vm_area(addr); 1081 va = find_vmap_area((unsigned long)addr);
379 write_unlock(&vmlist_lock); 1082 if (va && va->flags & VM_VM_AREA) {
380 return v; 1083 struct vm_struct *vm = va->private;
1084 struct vm_struct *tmp, **p;
1085 free_unmap_vmap_area(va);
1086 vm->size -= PAGE_SIZE;
1087
1088 write_lock(&vmlist_lock);
1089 for (p = &vmlist; (tmp = *p) != vm; p = &tmp->next)
1090 ;
1091 *p = tmp->next;
1092 write_unlock(&vmlist_lock);
1093
1094 return vm;
1095 }
1096 return NULL;
381} 1097}
382 1098
383static void __vunmap(const void *addr, int deallocate_pages) 1099static void __vunmap(const void *addr, int deallocate_pages)
@@ -487,6 +1203,8 @@ void *vmap(struct page **pages, unsigned int count,
487} 1203}
488EXPORT_SYMBOL(vmap); 1204EXPORT_SYMBOL(vmap);
489 1205
1206static void *__vmalloc_node(unsigned long size, gfp_t gfp_mask, pgprot_t prot,
1207 int node, void *caller);
490static void *__vmalloc_area_node(struct vm_struct *area, gfp_t gfp_mask, 1208static void *__vmalloc_area_node(struct vm_struct *area, gfp_t gfp_mask,
491 pgprot_t prot, int node, void *caller) 1209 pgprot_t prot, int node, void *caller)
492{ 1210{
@@ -613,10 +1331,8 @@ void *vmalloc_user(unsigned long size)
613 1331
614 ret = __vmalloc(size, GFP_KERNEL | __GFP_HIGHMEM | __GFP_ZERO, PAGE_KERNEL); 1332 ret = __vmalloc(size, GFP_KERNEL | __GFP_HIGHMEM | __GFP_ZERO, PAGE_KERNEL);
615 if (ret) { 1333 if (ret) {
616 write_lock(&vmlist_lock); 1334 area = find_vm_area(ret);
617 area = __find_vm_area(ret);
618 area->flags |= VM_USERMAP; 1335 area->flags |= VM_USERMAP;
619 write_unlock(&vmlist_lock);
620 } 1336 }
621 return ret; 1337 return ret;
622} 1338}
@@ -696,10 +1412,8 @@ void *vmalloc_32_user(unsigned long size)
696 1412
697 ret = __vmalloc(size, GFP_VMALLOC32 | __GFP_ZERO, PAGE_KERNEL); 1413 ret = __vmalloc(size, GFP_VMALLOC32 | __GFP_ZERO, PAGE_KERNEL);
698 if (ret) { 1414 if (ret) {
699 write_lock(&vmlist_lock); 1415 area = find_vm_area(ret);
700 area = __find_vm_area(ret);
701 area->flags |= VM_USERMAP; 1416 area->flags |= VM_USERMAP;
702 write_unlock(&vmlist_lock);
703 } 1417 }
704 return ret; 1418 return ret;
705} 1419}
@@ -800,26 +1514,25 @@ int remap_vmalloc_range(struct vm_area_struct *vma, void *addr,
800 struct vm_struct *area; 1514 struct vm_struct *area;
801 unsigned long uaddr = vma->vm_start; 1515 unsigned long uaddr = vma->vm_start;
802 unsigned long usize = vma->vm_end - vma->vm_start; 1516 unsigned long usize = vma->vm_end - vma->vm_start;
803 int ret;
804 1517
805 if ((PAGE_SIZE-1) & (unsigned long)addr) 1518 if ((PAGE_SIZE-1) & (unsigned long)addr)
806 return -EINVAL; 1519 return -EINVAL;
807 1520
808 read_lock(&vmlist_lock); 1521 area = find_vm_area(addr);
809 area = __find_vm_area(addr);
810 if (!area) 1522 if (!area)
811 goto out_einval_locked; 1523 return -EINVAL;
812 1524
813 if (!(area->flags & VM_USERMAP)) 1525 if (!(area->flags & VM_USERMAP))
814 goto out_einval_locked; 1526 return -EINVAL;
815 1527
816 if (usize + (pgoff << PAGE_SHIFT) > area->size - PAGE_SIZE) 1528 if (usize + (pgoff << PAGE_SHIFT) > area->size - PAGE_SIZE)
817 goto out_einval_locked; 1529 return -EINVAL;
818 read_unlock(&vmlist_lock);
819 1530
820 addr += pgoff << PAGE_SHIFT; 1531 addr += pgoff << PAGE_SHIFT;
821 do { 1532 do {
822 struct page *page = vmalloc_to_page(addr); 1533 struct page *page = vmalloc_to_page(addr);
1534 int ret;
1535
823 ret = vm_insert_page(vma, uaddr, page); 1536 ret = vm_insert_page(vma, uaddr, page);
824 if (ret) 1537 if (ret)
825 return ret; 1538 return ret;
@@ -832,11 +1545,7 @@ int remap_vmalloc_range(struct vm_area_struct *vma, void *addr,
832 /* Prevent "things" like memory migration? VM_flags need a cleanup... */ 1545 /* Prevent "things" like memory migration? VM_flags need a cleanup... */
833 vma->vm_flags |= VM_RESERVED; 1546 vma->vm_flags |= VM_RESERVED;
834 1547
835 return ret; 1548 return 0;
836
837out_einval_locked:
838 read_unlock(&vmlist_lock);
839 return -EINVAL;
840} 1549}
841EXPORT_SYMBOL(remap_vmalloc_range); 1550EXPORT_SYMBOL(remap_vmalloc_range);
842 1551
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 1ff1a58e7c10..3b5860294bb6 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -39,6 +39,7 @@
39#include <linux/freezer.h> 39#include <linux/freezer.h>
40#include <linux/memcontrol.h> 40#include <linux/memcontrol.h>
41#include <linux/delayacct.h> 41#include <linux/delayacct.h>
42#include <linux/sysctl.h>
42 43
43#include <asm/tlbflush.h> 44#include <asm/tlbflush.h>
44#include <asm/div64.h> 45#include <asm/div64.h>
@@ -78,7 +79,7 @@ struct scan_control {
78 unsigned long (*isolate_pages)(unsigned long nr, struct list_head *dst, 79 unsigned long (*isolate_pages)(unsigned long nr, struct list_head *dst,
79 unsigned long *scanned, int order, int mode, 80 unsigned long *scanned, int order, int mode,
80 struct zone *z, struct mem_cgroup *mem_cont, 81 struct zone *z, struct mem_cgroup *mem_cont,
81 int active); 82 int active, int file);
82}; 83};
83 84
84#define lru_to_page(_head) (list_entry((_head)->prev, struct page, lru)) 85#define lru_to_page(_head) (list_entry((_head)->prev, struct page, lru))
@@ -470,6 +471,85 @@ int remove_mapping(struct address_space *mapping, struct page *page)
470 return 0; 471 return 0;
471} 472}
472 473
474/**
475 * putback_lru_page - put previously isolated page onto appropriate LRU list
476 * @page: page to be put back to appropriate lru list
477 *
478 * Add previously isolated @page to appropriate LRU list.
479 * Page may still be unevictable for other reasons.
480 *
481 * lru_lock must not be held, interrupts must be enabled.
482 */
483#ifdef CONFIG_UNEVICTABLE_LRU
484void putback_lru_page(struct page *page)
485{
486 int lru;
487 int active = !!TestClearPageActive(page);
488 int was_unevictable = PageUnevictable(page);
489
490 VM_BUG_ON(PageLRU(page));
491
492redo:
493 ClearPageUnevictable(page);
494
495 if (page_evictable(page, NULL)) {
496 /*
497 * For evictable pages, we can use the cache.
498 * In event of a race, worst case is we end up with an
499 * unevictable page on [in]active list.
500 * We know how to handle that.
501 */
502 lru = active + page_is_file_cache(page);
503 lru_cache_add_lru(page, lru);
504 } else {
505 /*
506 * Put unevictable pages directly on zone's unevictable
507 * list.
508 */
509 lru = LRU_UNEVICTABLE;
510 add_page_to_unevictable_list(page);
511 }
512 mem_cgroup_move_lists(page, lru);
513
514 /*
515 * page's status can change while we move it among lru. If an evictable
516 * page is on unevictable list, it never be freed. To avoid that,
517 * check after we added it to the list, again.
518 */
519 if (lru == LRU_UNEVICTABLE && page_evictable(page, NULL)) {
520 if (!isolate_lru_page(page)) {
521 put_page(page);
522 goto redo;
523 }
524 /* This means someone else dropped this page from LRU
525 * So, it will be freed or putback to LRU again. There is
526 * nothing to do here.
527 */
528 }
529
530 if (was_unevictable && lru != LRU_UNEVICTABLE)
531 count_vm_event(UNEVICTABLE_PGRESCUED);
532 else if (!was_unevictable && lru == LRU_UNEVICTABLE)
533 count_vm_event(UNEVICTABLE_PGCULLED);
534
535 put_page(page); /* drop ref from isolate */
536}
537
538#else /* CONFIG_UNEVICTABLE_LRU */
539
540void putback_lru_page(struct page *page)
541{
542 int lru;
543 VM_BUG_ON(PageLRU(page));
544
545 lru = !!TestClearPageActive(page) + page_is_file_cache(page);
546 lru_cache_add_lru(page, lru);
547 mem_cgroup_move_lists(page, lru);
548 put_page(page);
549}
550#endif /* CONFIG_UNEVICTABLE_LRU */
551
552
473/* 553/*
474 * shrink_page_list() returns the number of reclaimed pages 554 * shrink_page_list() returns the number of reclaimed pages
475 */ 555 */
@@ -503,6 +583,9 @@ static unsigned long shrink_page_list(struct list_head *page_list,
503 583
504 sc->nr_scanned++; 584 sc->nr_scanned++;
505 585
586 if (unlikely(!page_evictable(page, NULL)))
587 goto cull_mlocked;
588
506 if (!sc->may_swap && page_mapped(page)) 589 if (!sc->may_swap && page_mapped(page))
507 goto keep_locked; 590 goto keep_locked;
508 591
@@ -539,9 +622,19 @@ static unsigned long shrink_page_list(struct list_head *page_list,
539 * Anonymous process memory has backing store? 622 * Anonymous process memory has backing store?
540 * Try to allocate it some swap space here. 623 * Try to allocate it some swap space here.
541 */ 624 */
542 if (PageAnon(page) && !PageSwapCache(page)) 625 if (PageAnon(page) && !PageSwapCache(page)) {
626 switch (try_to_munlock(page)) {
627 case SWAP_FAIL: /* shouldn't happen */
628 case SWAP_AGAIN:
629 goto keep_locked;
630 case SWAP_MLOCK:
631 goto cull_mlocked;
632 case SWAP_SUCCESS:
633 ; /* fall thru'; add to swap cache */
634 }
543 if (!add_to_swap(page, GFP_ATOMIC)) 635 if (!add_to_swap(page, GFP_ATOMIC))
544 goto activate_locked; 636 goto activate_locked;
637 }
545#endif /* CONFIG_SWAP */ 638#endif /* CONFIG_SWAP */
546 639
547 mapping = page_mapping(page); 640 mapping = page_mapping(page);
@@ -556,6 +649,8 @@ static unsigned long shrink_page_list(struct list_head *page_list,
556 goto activate_locked; 649 goto activate_locked;
557 case SWAP_AGAIN: 650 case SWAP_AGAIN:
558 goto keep_locked; 651 goto keep_locked;
652 case SWAP_MLOCK:
653 goto cull_mlocked;
559 case SWAP_SUCCESS: 654 case SWAP_SUCCESS:
560 ; /* try to free the page below */ 655 ; /* try to free the page below */
561 } 656 }
@@ -602,7 +697,7 @@ static unsigned long shrink_page_list(struct list_head *page_list,
602 * possible for a page to have PageDirty set, but it is actually 697 * possible for a page to have PageDirty set, but it is actually
603 * clean (all its buffers are clean). This happens if the 698 * clean (all its buffers are clean). This happens if the
604 * buffers were written out directly, with submit_bh(). ext3 699 * buffers were written out directly, with submit_bh(). ext3
605 * will do this, as well as the blockdev mapping. 700 * will do this, as well as the blockdev mapping.
606 * try_to_release_page() will discover that cleanness and will 701 * try_to_release_page() will discover that cleanness and will
607 * drop the buffers and mark the page clean - it can be freed. 702 * drop the buffers and mark the page clean - it can be freed.
608 * 703 *
@@ -637,7 +732,14 @@ static unsigned long shrink_page_list(struct list_head *page_list,
637 if (!mapping || !__remove_mapping(mapping, page)) 732 if (!mapping || !__remove_mapping(mapping, page))
638 goto keep_locked; 733 goto keep_locked;
639 734
640 unlock_page(page); 735 /*
736 * At this point, we have no other references and there is
737 * no way to pick any more up (removed from LRU, removed
738 * from pagecache). Can use non-atomic bitops now (and
739 * we obviously don't have to worry about waking up a process
740 * waiting on the page lock, because there are no references.
741 */
742 __clear_page_locked(page);
641free_it: 743free_it:
642 nr_reclaimed++; 744 nr_reclaimed++;
643 if (!pagevec_add(&freed_pvec, page)) { 745 if (!pagevec_add(&freed_pvec, page)) {
@@ -646,14 +748,23 @@ free_it:
646 } 748 }
647 continue; 749 continue;
648 750
751cull_mlocked:
752 unlock_page(page);
753 putback_lru_page(page);
754 continue;
755
649activate_locked: 756activate_locked:
757 /* Not a candidate for swapping, so reclaim swap space. */
758 if (PageSwapCache(page) && vm_swap_full())
759 remove_exclusive_swap_page_ref(page);
760 VM_BUG_ON(PageActive(page));
650 SetPageActive(page); 761 SetPageActive(page);
651 pgactivate++; 762 pgactivate++;
652keep_locked: 763keep_locked:
653 unlock_page(page); 764 unlock_page(page);
654keep: 765keep:
655 list_add(&page->lru, &ret_pages); 766 list_add(&page->lru, &ret_pages);
656 VM_BUG_ON(PageLRU(page)); 767 VM_BUG_ON(PageLRU(page) || PageUnevictable(page));
657 } 768 }
658 list_splice(&ret_pages, page_list); 769 list_splice(&ret_pages, page_list);
659 if (pagevec_count(&freed_pvec)) 770 if (pagevec_count(&freed_pvec))
@@ -677,7 +788,7 @@ keep:
677 * 788 *
678 * returns 0 on success, -ve errno on failure. 789 * returns 0 on success, -ve errno on failure.
679 */ 790 */
680int __isolate_lru_page(struct page *page, int mode) 791int __isolate_lru_page(struct page *page, int mode, int file)
681{ 792{
682 int ret = -EINVAL; 793 int ret = -EINVAL;
683 794
@@ -693,6 +804,17 @@ int __isolate_lru_page(struct page *page, int mode)
693 if (mode != ISOLATE_BOTH && (!PageActive(page) != !mode)) 804 if (mode != ISOLATE_BOTH && (!PageActive(page) != !mode))
694 return ret; 805 return ret;
695 806
807 if (mode != ISOLATE_BOTH && (!page_is_file_cache(page) != !file))
808 return ret;
809
810 /*
811 * When this function is being called for lumpy reclaim, we
812 * initially look into all LRU pages, active, inactive and
813 * unevictable; only give shrink_page_list evictable pages.
814 */
815 if (PageUnevictable(page))
816 return ret;
817
696 ret = -EBUSY; 818 ret = -EBUSY;
697 if (likely(get_page_unless_zero(page))) { 819 if (likely(get_page_unless_zero(page))) {
698 /* 820 /*
@@ -723,12 +845,13 @@ int __isolate_lru_page(struct page *page, int mode)
723 * @scanned: The number of pages that were scanned. 845 * @scanned: The number of pages that were scanned.
724 * @order: The caller's attempted allocation order 846 * @order: The caller's attempted allocation order
725 * @mode: One of the LRU isolation modes 847 * @mode: One of the LRU isolation modes
848 * @file: True [1] if isolating file [!anon] pages
726 * 849 *
727 * returns how many pages were moved onto *@dst. 850 * returns how many pages were moved onto *@dst.
728 */ 851 */
729static unsigned long isolate_lru_pages(unsigned long nr_to_scan, 852static unsigned long isolate_lru_pages(unsigned long nr_to_scan,
730 struct list_head *src, struct list_head *dst, 853 struct list_head *src, struct list_head *dst,
731 unsigned long *scanned, int order, int mode) 854 unsigned long *scanned, int order, int mode, int file)
732{ 855{
733 unsigned long nr_taken = 0; 856 unsigned long nr_taken = 0;
734 unsigned long scan; 857 unsigned long scan;
@@ -745,7 +868,7 @@ static unsigned long isolate_lru_pages(unsigned long nr_to_scan,
745 868
746 VM_BUG_ON(!PageLRU(page)); 869 VM_BUG_ON(!PageLRU(page));
747 870
748 switch (__isolate_lru_page(page, mode)) { 871 switch (__isolate_lru_page(page, mode, file)) {
749 case 0: 872 case 0:
750 list_move(&page->lru, dst); 873 list_move(&page->lru, dst);
751 nr_taken++; 874 nr_taken++;
@@ -788,10 +911,11 @@ static unsigned long isolate_lru_pages(unsigned long nr_to_scan,
788 break; 911 break;
789 912
790 cursor_page = pfn_to_page(pfn); 913 cursor_page = pfn_to_page(pfn);
914
791 /* Check that we have not crossed a zone boundary. */ 915 /* Check that we have not crossed a zone boundary. */
792 if (unlikely(page_zone_id(cursor_page) != zone_id)) 916 if (unlikely(page_zone_id(cursor_page) != zone_id))
793 continue; 917 continue;
794 switch (__isolate_lru_page(cursor_page, mode)) { 918 switch (__isolate_lru_page(cursor_page, mode, file)) {
795 case 0: 919 case 0:
796 list_move(&cursor_page->lru, dst); 920 list_move(&cursor_page->lru, dst);
797 nr_taken++; 921 nr_taken++;
@@ -802,7 +926,7 @@ static unsigned long isolate_lru_pages(unsigned long nr_to_scan,
802 /* else it is being freed elsewhere */ 926 /* else it is being freed elsewhere */
803 list_move(&cursor_page->lru, src); 927 list_move(&cursor_page->lru, src);
804 default: 928 default:
805 break; 929 break; /* ! on LRU or wrong list */
806 } 930 }
807 } 931 }
808 } 932 }
@@ -816,40 +940,93 @@ static unsigned long isolate_pages_global(unsigned long nr,
816 unsigned long *scanned, int order, 940 unsigned long *scanned, int order,
817 int mode, struct zone *z, 941 int mode, struct zone *z,
818 struct mem_cgroup *mem_cont, 942 struct mem_cgroup *mem_cont,
819 int active) 943 int active, int file)
820{ 944{
945 int lru = LRU_BASE;
821 if (active) 946 if (active)
822 return isolate_lru_pages(nr, &z->active_list, dst, 947 lru += LRU_ACTIVE;
823 scanned, order, mode); 948 if (file)
824 else 949 lru += LRU_FILE;
825 return isolate_lru_pages(nr, &z->inactive_list, dst, 950 return isolate_lru_pages(nr, &z->lru[lru].list, dst, scanned, order,
826 scanned, order, mode); 951 mode, !!file);
827} 952}
828 953
829/* 954/*
830 * clear_active_flags() is a helper for shrink_active_list(), clearing 955 * clear_active_flags() is a helper for shrink_active_list(), clearing
831 * any active bits from the pages in the list. 956 * any active bits from the pages in the list.
832 */ 957 */
833static unsigned long clear_active_flags(struct list_head *page_list) 958static unsigned long clear_active_flags(struct list_head *page_list,
959 unsigned int *count)
834{ 960{
835 int nr_active = 0; 961 int nr_active = 0;
962 int lru;
836 struct page *page; 963 struct page *page;
837 964
838 list_for_each_entry(page, page_list, lru) 965 list_for_each_entry(page, page_list, lru) {
966 lru = page_is_file_cache(page);
839 if (PageActive(page)) { 967 if (PageActive(page)) {
968 lru += LRU_ACTIVE;
840 ClearPageActive(page); 969 ClearPageActive(page);
841 nr_active++; 970 nr_active++;
842 } 971 }
972 count[lru]++;
973 }
843 974
844 return nr_active; 975 return nr_active;
845} 976}
846 977
978/**
979 * isolate_lru_page - tries to isolate a page from its LRU list
980 * @page: page to isolate from its LRU list
981 *
982 * Isolates a @page from an LRU list, clears PageLRU and adjusts the
983 * vmstat statistic corresponding to whatever LRU list the page was on.
984 *
985 * Returns 0 if the page was removed from an LRU list.
986 * Returns -EBUSY if the page was not on an LRU list.
987 *
988 * The returned page will have PageLRU() cleared. If it was found on
989 * the active list, it will have PageActive set. If it was found on
990 * the unevictable list, it will have the PageUnevictable bit set. That flag
991 * may need to be cleared by the caller before letting the page go.
992 *
993 * The vmstat statistic corresponding to the list on which the page was
994 * found will be decremented.
995 *
996 * Restrictions:
997 * (1) Must be called with an elevated refcount on the page. This is a
998 * fundamentnal difference from isolate_lru_pages (which is called
999 * without a stable reference).
1000 * (2) the lru_lock must not be held.
1001 * (3) interrupts must be enabled.
1002 */
1003int isolate_lru_page(struct page *page)
1004{
1005 int ret = -EBUSY;
1006
1007 if (PageLRU(page)) {
1008 struct zone *zone = page_zone(page);
1009
1010 spin_lock_irq(&zone->lru_lock);
1011 if (PageLRU(page) && get_page_unless_zero(page)) {
1012 int lru = page_lru(page);
1013 ret = 0;
1014 ClearPageLRU(page);
1015
1016 del_page_from_lru_list(zone, page, lru);
1017 }
1018 spin_unlock_irq(&zone->lru_lock);
1019 }
1020 return ret;
1021}
1022
847/* 1023/*
848 * shrink_inactive_list() is a helper for shrink_zone(). It returns the number 1024 * shrink_inactive_list() is a helper for shrink_zone(). It returns the number
849 * of reclaimed pages 1025 * of reclaimed pages
850 */ 1026 */
851static unsigned long shrink_inactive_list(unsigned long max_scan, 1027static unsigned long shrink_inactive_list(unsigned long max_scan,
852 struct zone *zone, struct scan_control *sc) 1028 struct zone *zone, struct scan_control *sc,
1029 int priority, int file)
853{ 1030{
854 LIST_HEAD(page_list); 1031 LIST_HEAD(page_list);
855 struct pagevec pvec; 1032 struct pagevec pvec;
@@ -866,20 +1043,43 @@ static unsigned long shrink_inactive_list(unsigned long max_scan,
866 unsigned long nr_scan; 1043 unsigned long nr_scan;
867 unsigned long nr_freed; 1044 unsigned long nr_freed;
868 unsigned long nr_active; 1045 unsigned long nr_active;
1046 unsigned int count[NR_LRU_LISTS] = { 0, };
1047 int mode = ISOLATE_INACTIVE;
1048
1049 /*
1050 * If we need a large contiguous chunk of memory, or have
1051 * trouble getting a small set of contiguous pages, we
1052 * will reclaim both active and inactive pages.
1053 *
1054 * We use the same threshold as pageout congestion_wait below.
1055 */
1056 if (sc->order > PAGE_ALLOC_COSTLY_ORDER)
1057 mode = ISOLATE_BOTH;
1058 else if (sc->order && priority < DEF_PRIORITY - 2)
1059 mode = ISOLATE_BOTH;
869 1060
870 nr_taken = sc->isolate_pages(sc->swap_cluster_max, 1061 nr_taken = sc->isolate_pages(sc->swap_cluster_max,
871 &page_list, &nr_scan, sc->order, 1062 &page_list, &nr_scan, sc->order, mode,
872 (sc->order > PAGE_ALLOC_COSTLY_ORDER)? 1063 zone, sc->mem_cgroup, 0, file);
873 ISOLATE_BOTH : ISOLATE_INACTIVE, 1064 nr_active = clear_active_flags(&page_list, count);
874 zone, sc->mem_cgroup, 0);
875 nr_active = clear_active_flags(&page_list);
876 __count_vm_events(PGDEACTIVATE, nr_active); 1065 __count_vm_events(PGDEACTIVATE, nr_active);
877 1066
878 __mod_zone_page_state(zone, NR_ACTIVE, -nr_active); 1067 __mod_zone_page_state(zone, NR_ACTIVE_FILE,
879 __mod_zone_page_state(zone, NR_INACTIVE, 1068 -count[LRU_ACTIVE_FILE]);
880 -(nr_taken - nr_active)); 1069 __mod_zone_page_state(zone, NR_INACTIVE_FILE,
881 if (scan_global_lru(sc)) 1070 -count[LRU_INACTIVE_FILE]);
1071 __mod_zone_page_state(zone, NR_ACTIVE_ANON,
1072 -count[LRU_ACTIVE_ANON]);
1073 __mod_zone_page_state(zone, NR_INACTIVE_ANON,
1074 -count[LRU_INACTIVE_ANON]);
1075
1076 if (scan_global_lru(sc)) {
882 zone->pages_scanned += nr_scan; 1077 zone->pages_scanned += nr_scan;
1078 zone->recent_scanned[0] += count[LRU_INACTIVE_ANON];
1079 zone->recent_scanned[0] += count[LRU_ACTIVE_ANON];
1080 zone->recent_scanned[1] += count[LRU_INACTIVE_FILE];
1081 zone->recent_scanned[1] += count[LRU_ACTIVE_FILE];
1082 }
883 spin_unlock_irq(&zone->lru_lock); 1083 spin_unlock_irq(&zone->lru_lock);
884 1084
885 nr_scanned += nr_scan; 1085 nr_scanned += nr_scan;
@@ -899,7 +1099,7 @@ static unsigned long shrink_inactive_list(unsigned long max_scan,
899 * The attempt at page out may have made some 1099 * The attempt at page out may have made some
900 * of the pages active, mark them inactive again. 1100 * of the pages active, mark them inactive again.
901 */ 1101 */
902 nr_active = clear_active_flags(&page_list); 1102 nr_active = clear_active_flags(&page_list, count);
903 count_vm_events(PGDEACTIVATE, nr_active); 1103 count_vm_events(PGDEACTIVATE, nr_active);
904 1104
905 nr_freed += shrink_page_list(&page_list, sc, 1105 nr_freed += shrink_page_list(&page_list, sc,
@@ -924,14 +1124,24 @@ static unsigned long shrink_inactive_list(unsigned long max_scan,
924 * Put back any unfreeable pages. 1124 * Put back any unfreeable pages.
925 */ 1125 */
926 while (!list_empty(&page_list)) { 1126 while (!list_empty(&page_list)) {
1127 int lru;
927 page = lru_to_page(&page_list); 1128 page = lru_to_page(&page_list);
928 VM_BUG_ON(PageLRU(page)); 1129 VM_BUG_ON(PageLRU(page));
929 SetPageLRU(page);
930 list_del(&page->lru); 1130 list_del(&page->lru);
931 if (PageActive(page)) 1131 if (unlikely(!page_evictable(page, NULL))) {
932 add_page_to_active_list(zone, page); 1132 spin_unlock_irq(&zone->lru_lock);
933 else 1133 putback_lru_page(page);
934 add_page_to_inactive_list(zone, page); 1134 spin_lock_irq(&zone->lru_lock);
1135 continue;
1136 }
1137 SetPageLRU(page);
1138 lru = page_lru(page);
1139 add_page_to_lru_list(zone, page, lru);
1140 mem_cgroup_move_lists(page, lru);
1141 if (PageActive(page) && scan_global_lru(sc)) {
1142 int file = !!page_is_file_cache(page);
1143 zone->recent_rotated[file]++;
1144 }
935 if (!pagevec_add(&pvec, page)) { 1145 if (!pagevec_add(&pvec, page)) {
936 spin_unlock_irq(&zone->lru_lock); 1146 spin_unlock_irq(&zone->lru_lock);
937 __pagevec_release(&pvec); 1147 __pagevec_release(&pvec);
@@ -962,115 +1172,7 @@ static inline void note_zone_scanning_priority(struct zone *zone, int priority)
962 1172
963static inline int zone_is_near_oom(struct zone *zone) 1173static inline int zone_is_near_oom(struct zone *zone)
964{ 1174{
965 return zone->pages_scanned >= (zone_page_state(zone, NR_ACTIVE) 1175 return zone->pages_scanned >= (zone_lru_pages(zone) * 3);
966 + zone_page_state(zone, NR_INACTIVE))*3;
967}
968
969/*
970 * Determine we should try to reclaim mapped pages.
971 * This is called only when sc->mem_cgroup is NULL.
972 */
973static int calc_reclaim_mapped(struct scan_control *sc, struct zone *zone,
974 int priority)
975{
976 long mapped_ratio;
977 long distress;
978 long swap_tendency;
979 long imbalance;
980 int reclaim_mapped = 0;
981 int prev_priority;
982
983 if (scan_global_lru(sc) && zone_is_near_oom(zone))
984 return 1;
985 /*
986 * `distress' is a measure of how much trouble we're having
987 * reclaiming pages. 0 -> no problems. 100 -> great trouble.
988 */
989 if (scan_global_lru(sc))
990 prev_priority = zone->prev_priority;
991 else
992 prev_priority = mem_cgroup_get_reclaim_priority(sc->mem_cgroup);
993
994 distress = 100 >> min(prev_priority, priority);
995
996 /*
997 * The point of this algorithm is to decide when to start
998 * reclaiming mapped memory instead of just pagecache. Work out
999 * how much memory
1000 * is mapped.
1001 */
1002 if (scan_global_lru(sc))
1003 mapped_ratio = ((global_page_state(NR_FILE_MAPPED) +
1004 global_page_state(NR_ANON_PAGES)) * 100) /
1005 vm_total_pages;
1006 else
1007 mapped_ratio = mem_cgroup_calc_mapped_ratio(sc->mem_cgroup);
1008
1009 /*
1010 * Now decide how much we really want to unmap some pages. The
1011 * mapped ratio is downgraded - just because there's a lot of
1012 * mapped memory doesn't necessarily mean that page reclaim
1013 * isn't succeeding.
1014 *
1015 * The distress ratio is important - we don't want to start
1016 * going oom.
1017 *
1018 * A 100% value of vm_swappiness overrides this algorithm
1019 * altogether.
1020 */
1021 swap_tendency = mapped_ratio / 2 + distress + sc->swappiness;
1022
1023 /*
1024 * If there's huge imbalance between active and inactive
1025 * (think active 100 times larger than inactive) we should
1026 * become more permissive, or the system will take too much
1027 * cpu before it start swapping during memory pressure.
1028 * Distress is about avoiding early-oom, this is about
1029 * making swappiness graceful despite setting it to low
1030 * values.
1031 *
1032 * Avoid div by zero with nr_inactive+1, and max resulting
1033 * value is vm_total_pages.
1034 */
1035 if (scan_global_lru(sc)) {
1036 imbalance = zone_page_state(zone, NR_ACTIVE);
1037 imbalance /= zone_page_state(zone, NR_INACTIVE) + 1;
1038 } else
1039 imbalance = mem_cgroup_reclaim_imbalance(sc->mem_cgroup);
1040
1041 /*
1042 * Reduce the effect of imbalance if swappiness is low,
1043 * this means for a swappiness very low, the imbalance
1044 * must be much higher than 100 for this logic to make
1045 * the difference.
1046 *
1047 * Max temporary value is vm_total_pages*100.
1048 */
1049 imbalance *= (vm_swappiness + 1);
1050 imbalance /= 100;
1051
1052 /*
1053 * If not much of the ram is mapped, makes the imbalance
1054 * less relevant, it's high priority we refill the inactive
1055 * list with mapped pages only in presence of high ratio of
1056 * mapped pages.
1057 *
1058 * Max temporary value is vm_total_pages*100.
1059 */
1060 imbalance *= mapped_ratio;
1061 imbalance /= 100;
1062
1063 /* apply imbalance feedback to swap_tendency */
1064 swap_tendency += imbalance;
1065
1066 /*
1067 * Now use this metric to decide whether to start moving mapped
1068 * memory onto the inactive list.
1069 */
1070 if (swap_tendency >= 100)
1071 reclaim_mapped = 1;
1072
1073 return reclaim_mapped;
1074} 1176}
1075 1177
1076/* 1178/*
@@ -1093,53 +1195,71 @@ static int calc_reclaim_mapped(struct scan_control *sc, struct zone *zone,
1093 1195
1094 1196
1095static void shrink_active_list(unsigned long nr_pages, struct zone *zone, 1197static void shrink_active_list(unsigned long nr_pages, struct zone *zone,
1096 struct scan_control *sc, int priority) 1198 struct scan_control *sc, int priority, int file)
1097{ 1199{
1098 unsigned long pgmoved; 1200 unsigned long pgmoved;
1099 int pgdeactivate = 0; 1201 int pgdeactivate = 0;
1100 unsigned long pgscanned; 1202 unsigned long pgscanned;
1101 LIST_HEAD(l_hold); /* The pages which were snipped off */ 1203 LIST_HEAD(l_hold); /* The pages which were snipped off */
1102 LIST_HEAD(l_inactive); /* Pages to go onto the inactive_list */ 1204 LIST_HEAD(l_inactive);
1103 LIST_HEAD(l_active); /* Pages to go onto the active_list */
1104 struct page *page; 1205 struct page *page;
1105 struct pagevec pvec; 1206 struct pagevec pvec;
1106 int reclaim_mapped = 0; 1207 enum lru_list lru;
1107
1108 if (sc->may_swap)
1109 reclaim_mapped = calc_reclaim_mapped(sc, zone, priority);
1110 1208
1111 lru_add_drain(); 1209 lru_add_drain();
1112 spin_lock_irq(&zone->lru_lock); 1210 spin_lock_irq(&zone->lru_lock);
1113 pgmoved = sc->isolate_pages(nr_pages, &l_hold, &pgscanned, sc->order, 1211 pgmoved = sc->isolate_pages(nr_pages, &l_hold, &pgscanned, sc->order,
1114 ISOLATE_ACTIVE, zone, 1212 ISOLATE_ACTIVE, zone,
1115 sc->mem_cgroup, 1); 1213 sc->mem_cgroup, 1, file);
1116 /* 1214 /*
1117 * zone->pages_scanned is used for detect zone's oom 1215 * zone->pages_scanned is used for detect zone's oom
1118 * mem_cgroup remembers nr_scan by itself. 1216 * mem_cgroup remembers nr_scan by itself.
1119 */ 1217 */
1120 if (scan_global_lru(sc)) 1218 if (scan_global_lru(sc)) {
1121 zone->pages_scanned += pgscanned; 1219 zone->pages_scanned += pgscanned;
1220 zone->recent_scanned[!!file] += pgmoved;
1221 }
1122 1222
1123 __mod_zone_page_state(zone, NR_ACTIVE, -pgmoved); 1223 if (file)
1224 __mod_zone_page_state(zone, NR_ACTIVE_FILE, -pgmoved);
1225 else
1226 __mod_zone_page_state(zone, NR_ACTIVE_ANON, -pgmoved);
1124 spin_unlock_irq(&zone->lru_lock); 1227 spin_unlock_irq(&zone->lru_lock);
1125 1228
1229 pgmoved = 0;
1126 while (!list_empty(&l_hold)) { 1230 while (!list_empty(&l_hold)) {
1127 cond_resched(); 1231 cond_resched();
1128 page = lru_to_page(&l_hold); 1232 page = lru_to_page(&l_hold);
1129 list_del(&page->lru); 1233 list_del(&page->lru);
1130 if (page_mapped(page)) { 1234
1131 if (!reclaim_mapped || 1235 if (unlikely(!page_evictable(page, NULL))) {
1132 (total_swap_pages == 0 && PageAnon(page)) || 1236 putback_lru_page(page);
1133 page_referenced(page, 0, sc->mem_cgroup)) { 1237 continue;
1134 list_add(&page->lru, &l_active);
1135 continue;
1136 }
1137 } 1238 }
1239
1240 /* page_referenced clears PageReferenced */
1241 if (page_mapping_inuse(page) &&
1242 page_referenced(page, 0, sc->mem_cgroup))
1243 pgmoved++;
1244
1138 list_add(&page->lru, &l_inactive); 1245 list_add(&page->lru, &l_inactive);
1139 } 1246 }
1140 1247
1248 /*
1249 * Count referenced pages from currently used mappings as
1250 * rotated, even though they are moved to the inactive list.
1251 * This helps balance scan pressure between file and anonymous
1252 * pages in get_scan_ratio.
1253 */
1254 zone->recent_rotated[!!file] += pgmoved;
1255
1256 /*
1257 * Move the pages to the [file or anon] inactive list.
1258 */
1141 pagevec_init(&pvec, 1); 1259 pagevec_init(&pvec, 1);
1260
1142 pgmoved = 0; 1261 pgmoved = 0;
1262 lru = LRU_BASE + file * LRU_FILE;
1143 spin_lock_irq(&zone->lru_lock); 1263 spin_lock_irq(&zone->lru_lock);
1144 while (!list_empty(&l_inactive)) { 1264 while (!list_empty(&l_inactive)) {
1145 page = lru_to_page(&l_inactive); 1265 page = lru_to_page(&l_inactive);
@@ -1149,11 +1269,11 @@ static void shrink_active_list(unsigned long nr_pages, struct zone *zone,
1149 VM_BUG_ON(!PageActive(page)); 1269 VM_BUG_ON(!PageActive(page));
1150 ClearPageActive(page); 1270 ClearPageActive(page);
1151 1271
1152 list_move(&page->lru, &zone->inactive_list); 1272 list_move(&page->lru, &zone->lru[lru].list);
1153 mem_cgroup_move_lists(page, false); 1273 mem_cgroup_move_lists(page, lru);
1154 pgmoved++; 1274 pgmoved++;
1155 if (!pagevec_add(&pvec, page)) { 1275 if (!pagevec_add(&pvec, page)) {
1156 __mod_zone_page_state(zone, NR_INACTIVE, pgmoved); 1276 __mod_zone_page_state(zone, NR_LRU_BASE + lru, pgmoved);
1157 spin_unlock_irq(&zone->lru_lock); 1277 spin_unlock_irq(&zone->lru_lock);
1158 pgdeactivate += pgmoved; 1278 pgdeactivate += pgmoved;
1159 pgmoved = 0; 1279 pgmoved = 0;
@@ -1163,104 +1283,189 @@ static void shrink_active_list(unsigned long nr_pages, struct zone *zone,
1163 spin_lock_irq(&zone->lru_lock); 1283 spin_lock_irq(&zone->lru_lock);
1164 } 1284 }
1165 } 1285 }
1166 __mod_zone_page_state(zone, NR_INACTIVE, pgmoved); 1286 __mod_zone_page_state(zone, NR_LRU_BASE + lru, pgmoved);
1167 pgdeactivate += pgmoved; 1287 pgdeactivate += pgmoved;
1168 if (buffer_heads_over_limit) { 1288 if (buffer_heads_over_limit) {
1169 spin_unlock_irq(&zone->lru_lock); 1289 spin_unlock_irq(&zone->lru_lock);
1170 pagevec_strip(&pvec); 1290 pagevec_strip(&pvec);
1171 spin_lock_irq(&zone->lru_lock); 1291 spin_lock_irq(&zone->lru_lock);
1172 } 1292 }
1173
1174 pgmoved = 0;
1175 while (!list_empty(&l_active)) {
1176 page = lru_to_page(&l_active);
1177 prefetchw_prev_lru_page(page, &l_active, flags);
1178 VM_BUG_ON(PageLRU(page));
1179 SetPageLRU(page);
1180 VM_BUG_ON(!PageActive(page));
1181
1182 list_move(&page->lru, &zone->active_list);
1183 mem_cgroup_move_lists(page, true);
1184 pgmoved++;
1185 if (!pagevec_add(&pvec, page)) {
1186 __mod_zone_page_state(zone, NR_ACTIVE, pgmoved);
1187 pgmoved = 0;
1188 spin_unlock_irq(&zone->lru_lock);
1189 __pagevec_release(&pvec);
1190 spin_lock_irq(&zone->lru_lock);
1191 }
1192 }
1193 __mod_zone_page_state(zone, NR_ACTIVE, pgmoved);
1194
1195 __count_zone_vm_events(PGREFILL, zone, pgscanned); 1293 __count_zone_vm_events(PGREFILL, zone, pgscanned);
1196 __count_vm_events(PGDEACTIVATE, pgdeactivate); 1294 __count_vm_events(PGDEACTIVATE, pgdeactivate);
1197 spin_unlock_irq(&zone->lru_lock); 1295 spin_unlock_irq(&zone->lru_lock);
1296 if (vm_swap_full())
1297 pagevec_swap_free(&pvec);
1198 1298
1199 pagevec_release(&pvec); 1299 pagevec_release(&pvec);
1200} 1300}
1201 1301
1302static unsigned long shrink_list(enum lru_list lru, unsigned long nr_to_scan,
1303 struct zone *zone, struct scan_control *sc, int priority)
1304{
1305 int file = is_file_lru(lru);
1306
1307 if (lru == LRU_ACTIVE_FILE) {
1308 shrink_active_list(nr_to_scan, zone, sc, priority, file);
1309 return 0;
1310 }
1311
1312 if (lru == LRU_ACTIVE_ANON &&
1313 (!scan_global_lru(sc) || inactive_anon_is_low(zone))) {
1314 shrink_active_list(nr_to_scan, zone, sc, priority, file);
1315 return 0;
1316 }
1317 return shrink_inactive_list(nr_to_scan, zone, sc, priority, file);
1318}
1319
1320/*
1321 * Determine how aggressively the anon and file LRU lists should be
1322 * scanned. The relative value of each set of LRU lists is determined
1323 * by looking at the fraction of the pages scanned we did rotate back
1324 * onto the active list instead of evict.
1325 *
1326 * percent[0] specifies how much pressure to put on ram/swap backed
1327 * memory, while percent[1] determines pressure on the file LRUs.
1328 */
1329static void get_scan_ratio(struct zone *zone, struct scan_control *sc,
1330 unsigned long *percent)
1331{
1332 unsigned long anon, file, free;
1333 unsigned long anon_prio, file_prio;
1334 unsigned long ap, fp;
1335
1336 anon = zone_page_state(zone, NR_ACTIVE_ANON) +
1337 zone_page_state(zone, NR_INACTIVE_ANON);
1338 file = zone_page_state(zone, NR_ACTIVE_FILE) +
1339 zone_page_state(zone, NR_INACTIVE_FILE);
1340 free = zone_page_state(zone, NR_FREE_PAGES);
1341
1342 /* If we have no swap space, do not bother scanning anon pages. */
1343 if (nr_swap_pages <= 0) {
1344 percent[0] = 0;
1345 percent[1] = 100;
1346 return;
1347 }
1348
1349 /* If we have very few page cache pages, force-scan anon pages. */
1350 if (unlikely(file + free <= zone->pages_high)) {
1351 percent[0] = 100;
1352 percent[1] = 0;
1353 return;
1354 }
1355
1356 /*
1357 * OK, so we have swap space and a fair amount of page cache
1358 * pages. We use the recently rotated / recently scanned
1359 * ratios to determine how valuable each cache is.
1360 *
1361 * Because workloads change over time (and to avoid overflow)
1362 * we keep these statistics as a floating average, which ends
1363 * up weighing recent references more than old ones.
1364 *
1365 * anon in [0], file in [1]
1366 */
1367 if (unlikely(zone->recent_scanned[0] > anon / 4)) {
1368 spin_lock_irq(&zone->lru_lock);
1369 zone->recent_scanned[0] /= 2;
1370 zone->recent_rotated[0] /= 2;
1371 spin_unlock_irq(&zone->lru_lock);
1372 }
1373
1374 if (unlikely(zone->recent_scanned[1] > file / 4)) {
1375 spin_lock_irq(&zone->lru_lock);
1376 zone->recent_scanned[1] /= 2;
1377 zone->recent_rotated[1] /= 2;
1378 spin_unlock_irq(&zone->lru_lock);
1379 }
1380
1381 /*
1382 * With swappiness at 100, anonymous and file have the same priority.
1383 * This scanning priority is essentially the inverse of IO cost.
1384 */
1385 anon_prio = sc->swappiness;
1386 file_prio = 200 - sc->swappiness;
1387
1388 /*
1389 * anon recent_rotated[0]
1390 * %anon = 100 * ----------- / ----------------- * IO cost
1391 * anon + file rotate_sum
1392 */
1393 ap = (anon_prio + 1) * (zone->recent_scanned[0] + 1);
1394 ap /= zone->recent_rotated[0] + 1;
1395
1396 fp = (file_prio + 1) * (zone->recent_scanned[1] + 1);
1397 fp /= zone->recent_rotated[1] + 1;
1398
1399 /* Normalize to percentages */
1400 percent[0] = 100 * ap / (ap + fp + 1);
1401 percent[1] = 100 - percent[0];
1402}
1403
1404
1202/* 1405/*
1203 * This is a basic per-zone page freer. Used by both kswapd and direct reclaim. 1406 * This is a basic per-zone page freer. Used by both kswapd and direct reclaim.
1204 */ 1407 */
1205static unsigned long shrink_zone(int priority, struct zone *zone, 1408static unsigned long shrink_zone(int priority, struct zone *zone,
1206 struct scan_control *sc) 1409 struct scan_control *sc)
1207{ 1410{
1208 unsigned long nr_active; 1411 unsigned long nr[NR_LRU_LISTS];
1209 unsigned long nr_inactive;
1210 unsigned long nr_to_scan; 1412 unsigned long nr_to_scan;
1211 unsigned long nr_reclaimed = 0; 1413 unsigned long nr_reclaimed = 0;
1414 unsigned long percent[2]; /* anon @ 0; file @ 1 */
1415 enum lru_list l;
1212 1416
1213 if (scan_global_lru(sc)) { 1417 get_scan_ratio(zone, sc, percent);
1214 /*
1215 * Add one to nr_to_scan just to make sure that the kernel
1216 * will slowly sift through the active list.
1217 */
1218 zone->nr_scan_active +=
1219 (zone_page_state(zone, NR_ACTIVE) >> priority) + 1;
1220 nr_active = zone->nr_scan_active;
1221 zone->nr_scan_inactive +=
1222 (zone_page_state(zone, NR_INACTIVE) >> priority) + 1;
1223 nr_inactive = zone->nr_scan_inactive;
1224 if (nr_inactive >= sc->swap_cluster_max)
1225 zone->nr_scan_inactive = 0;
1226 else
1227 nr_inactive = 0;
1228
1229 if (nr_active >= sc->swap_cluster_max)
1230 zone->nr_scan_active = 0;
1231 else
1232 nr_active = 0;
1233 } else {
1234 /*
1235 * This reclaim occurs not because zone memory shortage but
1236 * because memory controller hits its limit.
1237 * Then, don't modify zone reclaim related data.
1238 */
1239 nr_active = mem_cgroup_calc_reclaim_active(sc->mem_cgroup,
1240 zone, priority);
1241
1242 nr_inactive = mem_cgroup_calc_reclaim_inactive(sc->mem_cgroup,
1243 zone, priority);
1244 }
1245 1418
1419 for_each_evictable_lru(l) {
1420 if (scan_global_lru(sc)) {
1421 int file = is_file_lru(l);
1422 int scan;
1246 1423
1247 while (nr_active || nr_inactive) { 1424 scan = zone_page_state(zone, NR_LRU_BASE + l);
1248 if (nr_active) { 1425 if (priority) {
1249 nr_to_scan = min(nr_active, 1426 scan >>= priority;
1250 (unsigned long)sc->swap_cluster_max); 1427 scan = (scan * percent[file]) / 100;
1251 nr_active -= nr_to_scan; 1428 }
1252 shrink_active_list(nr_to_scan, zone, sc, priority); 1429 zone->lru[l].nr_scan += scan;
1430 nr[l] = zone->lru[l].nr_scan;
1431 if (nr[l] >= sc->swap_cluster_max)
1432 zone->lru[l].nr_scan = 0;
1433 else
1434 nr[l] = 0;
1435 } else {
1436 /*
1437 * This reclaim occurs not because zone memory shortage
1438 * but because memory controller hits its limit.
1439 * Don't modify zone reclaim related data.
1440 */
1441 nr[l] = mem_cgroup_calc_reclaim(sc->mem_cgroup, zone,
1442 priority, l);
1253 } 1443 }
1444 }
1254 1445
1255 if (nr_inactive) { 1446 while (nr[LRU_INACTIVE_ANON] || nr[LRU_ACTIVE_FILE] ||
1256 nr_to_scan = min(nr_inactive, 1447 nr[LRU_INACTIVE_FILE]) {
1448 for_each_evictable_lru(l) {
1449 if (nr[l]) {
1450 nr_to_scan = min(nr[l],
1257 (unsigned long)sc->swap_cluster_max); 1451 (unsigned long)sc->swap_cluster_max);
1258 nr_inactive -= nr_to_scan; 1452 nr[l] -= nr_to_scan;
1259 nr_reclaimed += shrink_inactive_list(nr_to_scan, zone, 1453
1260 sc); 1454 nr_reclaimed += shrink_list(l, nr_to_scan,
1455 zone, sc, priority);
1456 }
1261 } 1457 }
1262 } 1458 }
1263 1459
1460 /*
1461 * Even if we did not try to evict anon pages at all, we want to
1462 * rebalance the anon lru active/inactive ratio.
1463 */
1464 if (!scan_global_lru(sc) || inactive_anon_is_low(zone))
1465 shrink_active_list(SWAP_CLUSTER_MAX, zone, sc, priority, 0);
1466 else if (!scan_global_lru(sc))
1467 shrink_active_list(SWAP_CLUSTER_MAX, zone, sc, priority, 0);
1468
1264 throttle_vm_writeout(sc->gfp_mask); 1469 throttle_vm_writeout(sc->gfp_mask);
1265 return nr_reclaimed; 1470 return nr_reclaimed;
1266} 1471}
@@ -1321,7 +1526,7 @@ static unsigned long shrink_zones(int priority, struct zonelist *zonelist,
1321 1526
1322 return nr_reclaimed; 1527 return nr_reclaimed;
1323} 1528}
1324 1529
1325/* 1530/*
1326 * This is the main entry point to direct page reclaim. 1531 * This is the main entry point to direct page reclaim.
1327 * 1532 *
@@ -1364,8 +1569,7 @@ static unsigned long do_try_to_free_pages(struct zonelist *zonelist,
1364 if (!cpuset_zone_allowed_hardwall(zone, GFP_KERNEL)) 1569 if (!cpuset_zone_allowed_hardwall(zone, GFP_KERNEL))
1365 continue; 1570 continue;
1366 1571
1367 lru_pages += zone_page_state(zone, NR_ACTIVE) 1572 lru_pages += zone_lru_pages(zone);
1368 + zone_page_state(zone, NR_INACTIVE);
1369 } 1573 }
1370 } 1574 }
1371 1575
@@ -1555,6 +1759,14 @@ loop_again:
1555 priority != DEF_PRIORITY) 1759 priority != DEF_PRIORITY)
1556 continue; 1760 continue;
1557 1761
1762 /*
1763 * Do some background aging of the anon list, to give
1764 * pages a chance to be referenced before reclaiming.
1765 */
1766 if (inactive_anon_is_low(zone))
1767 shrink_active_list(SWAP_CLUSTER_MAX, zone,
1768 &sc, priority, 0);
1769
1558 if (!zone_watermark_ok(zone, order, zone->pages_high, 1770 if (!zone_watermark_ok(zone, order, zone->pages_high,
1559 0, 0)) { 1771 0, 0)) {
1560 end_zone = i; 1772 end_zone = i;
@@ -1567,8 +1779,7 @@ loop_again:
1567 for (i = 0; i <= end_zone; i++) { 1779 for (i = 0; i <= end_zone; i++) {
1568 struct zone *zone = pgdat->node_zones + i; 1780 struct zone *zone = pgdat->node_zones + i;
1569 1781
1570 lru_pages += zone_page_state(zone, NR_ACTIVE) 1782 lru_pages += zone_lru_pages(zone);
1571 + zone_page_state(zone, NR_INACTIVE);
1572 } 1783 }
1573 1784
1574 /* 1785 /*
@@ -1612,8 +1823,7 @@ loop_again:
1612 if (zone_is_all_unreclaimable(zone)) 1823 if (zone_is_all_unreclaimable(zone))
1613 continue; 1824 continue;
1614 if (nr_slab == 0 && zone->pages_scanned >= 1825 if (nr_slab == 0 && zone->pages_scanned >=
1615 (zone_page_state(zone, NR_ACTIVE) 1826 (zone_lru_pages(zone) * 6))
1616 + zone_page_state(zone, NR_INACTIVE)) * 6)
1617 zone_set_flag(zone, 1827 zone_set_flag(zone,
1618 ZONE_ALL_UNRECLAIMABLE); 1828 ZONE_ALL_UNRECLAIMABLE);
1619 /* 1829 /*
@@ -1667,7 +1877,7 @@ out:
1667 1877
1668/* 1878/*
1669 * The background pageout daemon, started as a kernel thread 1879 * The background pageout daemon, started as a kernel thread
1670 * from the init process. 1880 * from the init process.
1671 * 1881 *
1672 * This basically trickles out pages so that we have _some_ 1882 * This basically trickles out pages so that we have _some_
1673 * free memory available even if there is no other activity 1883 * free memory available even if there is no other activity
@@ -1761,6 +1971,14 @@ void wakeup_kswapd(struct zone *zone, int order)
1761 wake_up_interruptible(&pgdat->kswapd_wait); 1971 wake_up_interruptible(&pgdat->kswapd_wait);
1762} 1972}
1763 1973
1974unsigned long global_lru_pages(void)
1975{
1976 return global_page_state(NR_ACTIVE_ANON)
1977 + global_page_state(NR_ACTIVE_FILE)
1978 + global_page_state(NR_INACTIVE_ANON)
1979 + global_page_state(NR_INACTIVE_FILE);
1980}
1981
1764#ifdef CONFIG_PM 1982#ifdef CONFIG_PM
1765/* 1983/*
1766 * Helper function for shrink_all_memory(). Tries to reclaim 'nr_pages' pages 1984 * Helper function for shrink_all_memory(). Tries to reclaim 'nr_pages' pages
@@ -1774,6 +1992,7 @@ static unsigned long shrink_all_zones(unsigned long nr_pages, int prio,
1774{ 1992{
1775 struct zone *zone; 1993 struct zone *zone;
1776 unsigned long nr_to_scan, ret = 0; 1994 unsigned long nr_to_scan, ret = 0;
1995 enum lru_list l;
1777 1996
1778 for_each_zone(zone) { 1997 for_each_zone(zone) {
1779 1998
@@ -1783,38 +2002,31 @@ static unsigned long shrink_all_zones(unsigned long nr_pages, int prio,
1783 if (zone_is_all_unreclaimable(zone) && prio != DEF_PRIORITY) 2002 if (zone_is_all_unreclaimable(zone) && prio != DEF_PRIORITY)
1784 continue; 2003 continue;
1785 2004
1786 /* For pass = 0 we don't shrink the active list */ 2005 for_each_evictable_lru(l) {
1787 if (pass > 0) { 2006 /* For pass = 0, we don't shrink the active list */
1788 zone->nr_scan_active += 2007 if (pass == 0 &&
1789 (zone_page_state(zone, NR_ACTIVE) >> prio) + 1; 2008 (l == LRU_ACTIVE || l == LRU_ACTIVE_FILE))
1790 if (zone->nr_scan_active >= nr_pages || pass > 3) { 2009 continue;
1791 zone->nr_scan_active = 0; 2010
2011 zone->lru[l].nr_scan +=
2012 (zone_page_state(zone, NR_LRU_BASE + l)
2013 >> prio) + 1;
2014 if (zone->lru[l].nr_scan >= nr_pages || pass > 3) {
2015 zone->lru[l].nr_scan = 0;
1792 nr_to_scan = min(nr_pages, 2016 nr_to_scan = min(nr_pages,
1793 zone_page_state(zone, NR_ACTIVE)); 2017 zone_page_state(zone,
1794 shrink_active_list(nr_to_scan, zone, sc, prio); 2018 NR_LRU_BASE + l));
2019 ret += shrink_list(l, nr_to_scan, zone,
2020 sc, prio);
2021 if (ret >= nr_pages)
2022 return ret;
1795 } 2023 }
1796 } 2024 }
1797
1798 zone->nr_scan_inactive +=
1799 (zone_page_state(zone, NR_INACTIVE) >> prio) + 1;
1800 if (zone->nr_scan_inactive >= nr_pages || pass > 3) {
1801 zone->nr_scan_inactive = 0;
1802 nr_to_scan = min(nr_pages,
1803 zone_page_state(zone, NR_INACTIVE));
1804 ret += shrink_inactive_list(nr_to_scan, zone, sc);
1805 if (ret >= nr_pages)
1806 return ret;
1807 }
1808 } 2025 }
1809 2026
1810 return ret; 2027 return ret;
1811} 2028}
1812 2029
1813static unsigned long count_lru_pages(void)
1814{
1815 return global_page_state(NR_ACTIVE) + global_page_state(NR_INACTIVE);
1816}
1817
1818/* 2030/*
1819 * Try to free `nr_pages' of memory, system-wide, and return the number of 2031 * Try to free `nr_pages' of memory, system-wide, and return the number of
1820 * freed pages. 2032 * freed pages.
@@ -1840,7 +2052,7 @@ unsigned long shrink_all_memory(unsigned long nr_pages)
1840 2052
1841 current->reclaim_state = &reclaim_state; 2053 current->reclaim_state = &reclaim_state;
1842 2054
1843 lru_pages = count_lru_pages(); 2055 lru_pages = global_lru_pages();
1844 nr_slab = global_page_state(NR_SLAB_RECLAIMABLE); 2056 nr_slab = global_page_state(NR_SLAB_RECLAIMABLE);
1845 /* If slab caches are huge, it's better to hit them first */ 2057 /* If slab caches are huge, it's better to hit them first */
1846 while (nr_slab >= lru_pages) { 2058 while (nr_slab >= lru_pages) {
@@ -1883,7 +2095,7 @@ unsigned long shrink_all_memory(unsigned long nr_pages)
1883 2095
1884 reclaim_state.reclaimed_slab = 0; 2096 reclaim_state.reclaimed_slab = 0;
1885 shrink_slab(sc.nr_scanned, sc.gfp_mask, 2097 shrink_slab(sc.nr_scanned, sc.gfp_mask,
1886 count_lru_pages()); 2098 global_lru_pages());
1887 ret += reclaim_state.reclaimed_slab; 2099 ret += reclaim_state.reclaimed_slab;
1888 if (ret >= nr_pages) 2100 if (ret >= nr_pages)
1889 goto out; 2101 goto out;
@@ -1900,7 +2112,7 @@ unsigned long shrink_all_memory(unsigned long nr_pages)
1900 if (!ret) { 2112 if (!ret) {
1901 do { 2113 do {
1902 reclaim_state.reclaimed_slab = 0; 2114 reclaim_state.reclaimed_slab = 0;
1903 shrink_slab(nr_pages, sc.gfp_mask, count_lru_pages()); 2115 shrink_slab(nr_pages, sc.gfp_mask, global_lru_pages());
1904 ret += reclaim_state.reclaimed_slab; 2116 ret += reclaim_state.reclaimed_slab;
1905 } while (ret < nr_pages && reclaim_state.reclaimed_slab > 0); 2117 } while (ret < nr_pages && reclaim_state.reclaimed_slab > 0);
1906 } 2118 }
@@ -2128,3 +2340,285 @@ int zone_reclaim(struct zone *zone, gfp_t gfp_mask, unsigned int order)
2128 return ret; 2340 return ret;
2129} 2341}
2130#endif 2342#endif
2343
2344#ifdef CONFIG_UNEVICTABLE_LRU
2345/*
2346 * page_evictable - test whether a page is evictable
2347 * @page: the page to test
2348 * @vma: the VMA in which the page is or will be mapped, may be NULL
2349 *
2350 * Test whether page is evictable--i.e., should be placed on active/inactive
2351 * lists vs unevictable list. The vma argument is !NULL when called from the
2352 * fault path to determine how to instantate a new page.
2353 *
2354 * Reasons page might not be evictable:
2355 * (1) page's mapping marked unevictable
2356 * (2) page is part of an mlocked VMA
2357 *
2358 */
2359int page_evictable(struct page *page, struct vm_area_struct *vma)
2360{
2361
2362 if (mapping_unevictable(page_mapping(page)))
2363 return 0;
2364
2365 if (PageMlocked(page) || (vma && is_mlocked_vma(vma, page)))
2366 return 0;
2367
2368 return 1;
2369}
2370
2371static void show_page_path(struct page *page)
2372{
2373 char buf[256];
2374 if (page_is_file_cache(page)) {
2375 struct address_space *mapping = page->mapping;
2376 struct dentry *dentry;
2377 pgoff_t pgoff = page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT);
2378
2379 spin_lock(&mapping->i_mmap_lock);
2380 dentry = d_find_alias(mapping->host);
2381 printk(KERN_INFO "rescued: %s %lu\n",
2382 dentry_path(dentry, buf, 256), pgoff);
2383 spin_unlock(&mapping->i_mmap_lock);
2384 } else {
2385#if defined(CONFIG_MM_OWNER) && defined(CONFIG_MMU)
2386 struct anon_vma *anon_vma;
2387 struct vm_area_struct *vma;
2388
2389 anon_vma = page_lock_anon_vma(page);
2390 if (!anon_vma)
2391 return;
2392
2393 list_for_each_entry(vma, &anon_vma->head, anon_vma_node) {
2394 printk(KERN_INFO "rescued: anon %s\n",
2395 vma->vm_mm->owner->comm);
2396 break;
2397 }
2398 page_unlock_anon_vma(anon_vma);
2399#endif
2400 }
2401}
2402
2403
2404/**
2405 * check_move_unevictable_page - check page for evictability and move to appropriate zone lru list
2406 * @page: page to check evictability and move to appropriate lru list
2407 * @zone: zone page is in
2408 *
2409 * Checks a page for evictability and moves the page to the appropriate
2410 * zone lru list.
2411 *
2412 * Restrictions: zone->lru_lock must be held, page must be on LRU and must
2413 * have PageUnevictable set.
2414 */
2415static void check_move_unevictable_page(struct page *page, struct zone *zone)
2416{
2417 VM_BUG_ON(PageActive(page));
2418
2419retry:
2420 ClearPageUnevictable(page);
2421 if (page_evictable(page, NULL)) {
2422 enum lru_list l = LRU_INACTIVE_ANON + page_is_file_cache(page);
2423
2424 show_page_path(page);
2425
2426 __dec_zone_state(zone, NR_UNEVICTABLE);
2427 list_move(&page->lru, &zone->lru[l].list);
2428 __inc_zone_state(zone, NR_INACTIVE_ANON + l);
2429 __count_vm_event(UNEVICTABLE_PGRESCUED);
2430 } else {
2431 /*
2432 * rotate unevictable list
2433 */
2434 SetPageUnevictable(page);
2435 list_move(&page->lru, &zone->lru[LRU_UNEVICTABLE].list);
2436 if (page_evictable(page, NULL))
2437 goto retry;
2438 }
2439}
2440
2441/**
2442 * scan_mapping_unevictable_pages - scan an address space for evictable pages
2443 * @mapping: struct address_space to scan for evictable pages
2444 *
2445 * Scan all pages in mapping. Check unevictable pages for
2446 * evictability and move them to the appropriate zone lru list.
2447 */
2448void scan_mapping_unevictable_pages(struct address_space *mapping)
2449{
2450 pgoff_t next = 0;
2451 pgoff_t end = (i_size_read(mapping->host) + PAGE_CACHE_SIZE - 1) >>
2452 PAGE_CACHE_SHIFT;
2453 struct zone *zone;
2454 struct pagevec pvec;
2455
2456 if (mapping->nrpages == 0)
2457 return;
2458
2459 pagevec_init(&pvec, 0);
2460 while (next < end &&
2461 pagevec_lookup(&pvec, mapping, next, PAGEVEC_SIZE)) {
2462 int i;
2463 int pg_scanned = 0;
2464
2465 zone = NULL;
2466
2467 for (i = 0; i < pagevec_count(&pvec); i++) {
2468 struct page *page = pvec.pages[i];
2469 pgoff_t page_index = page->index;
2470 struct zone *pagezone = page_zone(page);
2471
2472 pg_scanned++;
2473 if (page_index > next)
2474 next = page_index;
2475 next++;
2476
2477 if (pagezone != zone) {
2478 if (zone)
2479 spin_unlock_irq(&zone->lru_lock);
2480 zone = pagezone;
2481 spin_lock_irq(&zone->lru_lock);
2482 }
2483
2484 if (PageLRU(page) && PageUnevictable(page))
2485 check_move_unevictable_page(page, zone);
2486 }
2487 if (zone)
2488 spin_unlock_irq(&zone->lru_lock);
2489 pagevec_release(&pvec);
2490
2491 count_vm_events(UNEVICTABLE_PGSCANNED, pg_scanned);
2492 }
2493
2494}
2495
2496/**
2497 * scan_zone_unevictable_pages - check unevictable list for evictable pages
2498 * @zone - zone of which to scan the unevictable list
2499 *
2500 * Scan @zone's unevictable LRU lists to check for pages that have become
2501 * evictable. Move those that have to @zone's inactive list where they
2502 * become candidates for reclaim, unless shrink_inactive_zone() decides
2503 * to reactivate them. Pages that are still unevictable are rotated
2504 * back onto @zone's unevictable list.
2505 */
2506#define SCAN_UNEVICTABLE_BATCH_SIZE 16UL /* arbitrary lock hold batch size */
2507void scan_zone_unevictable_pages(struct zone *zone)
2508{
2509 struct list_head *l_unevictable = &zone->lru[LRU_UNEVICTABLE].list;
2510 unsigned long scan;
2511 unsigned long nr_to_scan = zone_page_state(zone, NR_UNEVICTABLE);
2512
2513 while (nr_to_scan > 0) {
2514 unsigned long batch_size = min(nr_to_scan,
2515 SCAN_UNEVICTABLE_BATCH_SIZE);
2516
2517 spin_lock_irq(&zone->lru_lock);
2518 for (scan = 0; scan < batch_size; scan++) {
2519 struct page *page = lru_to_page(l_unevictable);
2520
2521 if (!trylock_page(page))
2522 continue;
2523
2524 prefetchw_prev_lru_page(page, l_unevictable, flags);
2525
2526 if (likely(PageLRU(page) && PageUnevictable(page)))
2527 check_move_unevictable_page(page, zone);
2528
2529 unlock_page(page);
2530 }
2531 spin_unlock_irq(&zone->lru_lock);
2532
2533 nr_to_scan -= batch_size;
2534 }
2535}
2536
2537
2538/**
2539 * scan_all_zones_unevictable_pages - scan all unevictable lists for evictable pages
2540 *
2541 * A really big hammer: scan all zones' unevictable LRU lists to check for
2542 * pages that have become evictable. Move those back to the zones'
2543 * inactive list where they become candidates for reclaim.
2544 * This occurs when, e.g., we have unswappable pages on the unevictable lists,
2545 * and we add swap to the system. As such, it runs in the context of a task
2546 * that has possibly/probably made some previously unevictable pages
2547 * evictable.
2548 */
2549void scan_all_zones_unevictable_pages(void)
2550{
2551 struct zone *zone;
2552
2553 for_each_zone(zone) {
2554 scan_zone_unevictable_pages(zone);
2555 }
2556}
2557
2558/*
2559 * scan_unevictable_pages [vm] sysctl handler. On demand re-scan of
2560 * all nodes' unevictable lists for evictable pages
2561 */
2562unsigned long scan_unevictable_pages;
2563
2564int scan_unevictable_handler(struct ctl_table *table, int write,
2565 struct file *file, void __user *buffer,
2566 size_t *length, loff_t *ppos)
2567{
2568 proc_doulongvec_minmax(table, write, file, buffer, length, ppos);
2569
2570 if (write && *(unsigned long *)table->data)
2571 scan_all_zones_unevictable_pages();
2572
2573 scan_unevictable_pages = 0;
2574 return 0;
2575}
2576
2577/*
2578 * per node 'scan_unevictable_pages' attribute. On demand re-scan of
2579 * a specified node's per zone unevictable lists for evictable pages.
2580 */
2581
2582static ssize_t read_scan_unevictable_node(struct sys_device *dev,
2583 struct sysdev_attribute *attr,
2584 char *buf)
2585{
2586 return sprintf(buf, "0\n"); /* always zero; should fit... */
2587}
2588
2589static ssize_t write_scan_unevictable_node(struct sys_device *dev,
2590 struct sysdev_attribute *attr,
2591 const char *buf, size_t count)
2592{
2593 struct zone *node_zones = NODE_DATA(dev->id)->node_zones;
2594 struct zone *zone;
2595 unsigned long res;
2596 unsigned long req = strict_strtoul(buf, 10, &res);
2597
2598 if (!req)
2599 return 1; /* zero is no-op */
2600
2601 for (zone = node_zones; zone - node_zones < MAX_NR_ZONES; ++zone) {
2602 if (!populated_zone(zone))
2603 continue;
2604 scan_zone_unevictable_pages(zone);
2605 }
2606 return 1;
2607}
2608
2609
2610static SYSDEV_ATTR(scan_unevictable_pages, S_IRUGO | S_IWUSR,
2611 read_scan_unevictable_node,
2612 write_scan_unevictable_node);
2613
2614int scan_unevictable_register_node(struct node *node)
2615{
2616 return sysdev_create_file(&node->sysdev, &attr_scan_unevictable_pages);
2617}
2618
2619void scan_unevictable_unregister_node(struct node *node)
2620{
2621 sysdev_remove_file(&node->sysdev, &attr_scan_unevictable_pages);
2622}
2623
2624#endif
diff --git a/mm/vmstat.c b/mm/vmstat.c
index d7826af2fb07..9343227c5c60 100644
--- a/mm/vmstat.c
+++ b/mm/vmstat.c
@@ -619,8 +619,14 @@ const struct seq_operations pagetypeinfo_op = {
619static const char * const vmstat_text[] = { 619static const char * const vmstat_text[] = {
620 /* Zoned VM counters */ 620 /* Zoned VM counters */
621 "nr_free_pages", 621 "nr_free_pages",
622 "nr_inactive", 622 "nr_inactive_anon",
623 "nr_active", 623 "nr_active_anon",
624 "nr_inactive_file",
625 "nr_active_file",
626#ifdef CONFIG_UNEVICTABLE_LRU
627 "nr_unevictable",
628 "nr_mlock",
629#endif
624 "nr_anon_pages", 630 "nr_anon_pages",
625 "nr_mapped", 631 "nr_mapped",
626 "nr_file_pages", 632 "nr_file_pages",
@@ -675,6 +681,16 @@ static const char * const vmstat_text[] = {
675 "htlb_buddy_alloc_success", 681 "htlb_buddy_alloc_success",
676 "htlb_buddy_alloc_fail", 682 "htlb_buddy_alloc_fail",
677#endif 683#endif
684#ifdef CONFIG_UNEVICTABLE_LRU
685 "unevictable_pgs_culled",
686 "unevictable_pgs_scanned",
687 "unevictable_pgs_rescued",
688 "unevictable_pgs_mlocked",
689 "unevictable_pgs_munlocked",
690 "unevictable_pgs_cleared",
691 "unevictable_pgs_stranded",
692 "unevictable_pgs_mlockfreed",
693#endif
678#endif 694#endif
679}; 695};
680 696
@@ -688,7 +704,7 @@ static void zoneinfo_show_print(struct seq_file *m, pg_data_t *pgdat,
688 "\n min %lu" 704 "\n min %lu"
689 "\n low %lu" 705 "\n low %lu"
690 "\n high %lu" 706 "\n high %lu"
691 "\n scanned %lu (a: %lu i: %lu)" 707 "\n scanned %lu (aa: %lu ia: %lu af: %lu if: %lu)"
692 "\n spanned %lu" 708 "\n spanned %lu"
693 "\n present %lu", 709 "\n present %lu",
694 zone_page_state(zone, NR_FREE_PAGES), 710 zone_page_state(zone, NR_FREE_PAGES),
@@ -696,7 +712,10 @@ static void zoneinfo_show_print(struct seq_file *m, pg_data_t *pgdat,
696 zone->pages_low, 712 zone->pages_low,
697 zone->pages_high, 713 zone->pages_high,
698 zone->pages_scanned, 714 zone->pages_scanned,
699 zone->nr_scan_active, zone->nr_scan_inactive, 715 zone->lru[LRU_ACTIVE_ANON].nr_scan,
716 zone->lru[LRU_INACTIVE_ANON].nr_scan,
717 zone->lru[LRU_ACTIVE_FILE].nr_scan,
718 zone->lru[LRU_INACTIVE_FILE].nr_scan,
700 zone->spanned_pages, 719 zone->spanned_pages,
701 zone->present_pages); 720 zone->present_pages);
702 721
@@ -733,10 +752,12 @@ static void zoneinfo_show_print(struct seq_file *m, pg_data_t *pgdat,
733 seq_printf(m, 752 seq_printf(m,
734 "\n all_unreclaimable: %u" 753 "\n all_unreclaimable: %u"
735 "\n prev_priority: %i" 754 "\n prev_priority: %i"
736 "\n start_pfn: %lu", 755 "\n start_pfn: %lu"
756 "\n inactive_ratio: %u",
737 zone_is_all_unreclaimable(zone), 757 zone_is_all_unreclaimable(zone),
738 zone->prev_priority, 758 zone->prev_priority,
739 zone->zone_start_pfn); 759 zone->zone_start_pfn,
760 zone->inactive_ratio);
740 seq_putc(m, '\n'); 761 seq_putc(m, '\n');
741} 762}
742 763
diff --git a/net/bridge/br_netfilter.c b/net/bridge/br_netfilter.c
index a4abed5b4c44..fa5cda4e552a 100644
--- a/net/bridge/br_netfilter.c
+++ b/net/bridge/br_netfilter.c
@@ -719,7 +719,7 @@ static unsigned int br_nf_forward_arp(unsigned int hook, struct sk_buff *skb,
719 return NF_ACCEPT; 719 return NF_ACCEPT;
720 } 720 }
721 *d = (struct net_device *)in; 721 *d = (struct net_device *)in;
722 NF_HOOK(NF_ARP, NF_ARP_FORWARD, skb, (struct net_device *)in, 722 NF_HOOK(NFPROTO_ARP, NF_ARP_FORWARD, skb, (struct net_device *)in,
723 (struct net_device *)out, br_nf_forward_finish); 723 (struct net_device *)out, br_nf_forward_finish);
724 724
725 return NF_STOLEN; 725 return NF_STOLEN;
diff --git a/net/core/dev.c b/net/core/dev.c
index 868ec0ba8b77..b8a4fd0806af 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -924,10 +924,10 @@ int dev_change_name(struct net_device *dev, const char *newname)
924 strlcpy(dev->name, newname, IFNAMSIZ); 924 strlcpy(dev->name, newname, IFNAMSIZ);
925 925
926rollback: 926rollback:
927 err = device_rename(&dev->dev, dev->name); 927 ret = device_rename(&dev->dev, dev->name);
928 if (err) { 928 if (ret) {
929 memcpy(dev->name, oldname, IFNAMSIZ); 929 memcpy(dev->name, oldname, IFNAMSIZ);
930 return err; 930 return ret;
931 } 931 }
932 932
933 write_lock_bh(&dev_base_lock); 933 write_lock_bh(&dev_base_lock);
diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c
index 11062780bb02..d4ce1224e008 100644
--- a/net/dccp/ipv6.c
+++ b/net/dccp/ipv6.c
@@ -259,7 +259,7 @@ static int dccp_v6_send_response(struct sock *sk, struct request_sock *req)
259 fl.fl6_flowlabel = 0; 259 fl.fl6_flowlabel = 0;
260 fl.oif = ireq6->iif; 260 fl.oif = ireq6->iif;
261 fl.fl_ip_dport = inet_rsk(req)->rmt_port; 261 fl.fl_ip_dport = inet_rsk(req)->rmt_port;
262 fl.fl_ip_sport = inet_sk(sk)->sport; 262 fl.fl_ip_sport = inet_rsk(req)->loc_port;
263 security_req_classify_flow(req, &fl); 263 security_req_classify_flow(req, &fl);
264 264
265 opt = np->opt; 265 opt = np->opt;
@@ -558,7 +558,7 @@ static struct sock *dccp_v6_request_recv_sock(struct sock *sk,
558 ipv6_addr_copy(&fl.fl6_src, &ireq6->loc_addr); 558 ipv6_addr_copy(&fl.fl6_src, &ireq6->loc_addr);
559 fl.oif = sk->sk_bound_dev_if; 559 fl.oif = sk->sk_bound_dev_if;
560 fl.fl_ip_dport = inet_rsk(req)->rmt_port; 560 fl.fl_ip_dport = inet_rsk(req)->rmt_port;
561 fl.fl_ip_sport = inet_sk(sk)->sport; 561 fl.fl_ip_sport = inet_rsk(req)->loc_port;
562 security_sk_classify_flow(sk, &fl); 562 security_sk_classify_flow(sk, &fl);
563 563
564 if (ip6_dst_lookup(sk, &dst, &fl)) 564 if (ip6_dst_lookup(sk, &dst, &fl))
diff --git a/net/dccp/minisocks.c b/net/dccp/minisocks.c
index b2804e2d1b8c..e6bf99e3e41a 100644
--- a/net/dccp/minisocks.c
+++ b/net/dccp/minisocks.c
@@ -309,6 +309,7 @@ void dccp_reqsk_init(struct request_sock *req, struct sk_buff *skb)
309 struct dccp_request_sock *dreq = dccp_rsk(req); 309 struct dccp_request_sock *dreq = dccp_rsk(req);
310 310
311 inet_rsk(req)->rmt_port = dccp_hdr(skb)->dccph_sport; 311 inet_rsk(req)->rmt_port = dccp_hdr(skb)->dccph_sport;
312 inet_rsk(req)->loc_port = dccp_hdr(skb)->dccph_dport;
312 inet_rsk(req)->acked = 0; 313 inet_rsk(req)->acked = 0;
313 req->rcv_wnd = sysctl_dccp_feat_sequence_window; 314 req->rcv_wnd = sysctl_dccp_feat_sequence_window;
314 dreq->dreq_timestamp_echo = 0; 315 dreq->dreq_timestamp_echo = 0;
diff --git a/net/dccp/output.c b/net/dccp/output.c
index d06945c7d3df..809d803d5006 100644
--- a/net/dccp/output.c
+++ b/net/dccp/output.c
@@ -347,7 +347,7 @@ struct sk_buff *dccp_make_response(struct sock *sk, struct dst_entry *dst,
347 /* Build and checksum header */ 347 /* Build and checksum header */
348 dh = dccp_zeroed_hdr(skb, dccp_header_size); 348 dh = dccp_zeroed_hdr(skb, dccp_header_size);
349 349
350 dh->dccph_sport = inet_sk(sk)->sport; 350 dh->dccph_sport = inet_rsk(req)->loc_port;
351 dh->dccph_dport = inet_rsk(req)->rmt_port; 351 dh->dccph_dport = inet_rsk(req)->rmt_port;
352 dh->dccph_doff = (dccp_header_size + 352 dh->dccph_doff = (dccp_header_size +
353 DCCP_SKB_CB(skb)->dccpd_opt_len) / 4; 353 DCCP_SKB_CB(skb)->dccpd_opt_len) / 4;
diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c
index b043eda60b04..1a9dd66511fc 100644
--- a/net/ipv4/arp.c
+++ b/net/ipv4/arp.c
@@ -663,7 +663,7 @@ out:
663void arp_xmit(struct sk_buff *skb) 663void arp_xmit(struct sk_buff *skb)
664{ 664{
665 /* Send it off, maybe filter it using firewalling first. */ 665 /* Send it off, maybe filter it using firewalling first. */
666 NF_HOOK(NF_ARP, NF_ARP_OUT, skb, NULL, skb->dev, dev_queue_xmit); 666 NF_HOOK(NFPROTO_ARP, NF_ARP_OUT, skb, NULL, skb->dev, dev_queue_xmit);
667} 667}
668 668
669/* 669/*
@@ -928,7 +928,7 @@ static int arp_rcv(struct sk_buff *skb, struct net_device *dev,
928 928
929 memset(NEIGH_CB(skb), 0, sizeof(struct neighbour_cb)); 929 memset(NEIGH_CB(skb), 0, sizeof(struct neighbour_cb));
930 930
931 return NF_HOOK(NF_ARP, NF_ARP_IN, skb, dev, NULL, arp_process); 931 return NF_HOOK(NFPROTO_ARP, NF_ARP_IN, skb, dev, NULL, arp_process);
932 932
933freeskb: 933freeskb:
934 kfree_skb(skb); 934 kfree_skb(skb);
diff --git a/net/ipv4/netfilter/nf_nat_snmp_basic.c b/net/ipv4/netfilter/nf_nat_snmp_basic.c
index ffeaffc3fffe..8303e4b406c0 100644
--- a/net/ipv4/netfilter/nf_nat_snmp_basic.c
+++ b/net/ipv4/netfilter/nf_nat_snmp_basic.c
@@ -742,6 +742,7 @@ static unsigned char snmp_object_decode(struct asn1_ctx *ctx,
742 *obj = kmalloc(sizeof(struct snmp_object) + len, 742 *obj = kmalloc(sizeof(struct snmp_object) + len,
743 GFP_ATOMIC); 743 GFP_ATOMIC);
744 if (*obj == NULL) { 744 if (*obj == NULL) {
745 kfree(p);
745 kfree(id); 746 kfree(id);
746 if (net_ratelimit()) 747 if (net_ratelimit())
747 printk("OOM in bsalg (%d)\n", __LINE__); 748 printk("OOM in bsalg (%d)\n", __LINE__);
diff --git a/net/ipv6/syncookies.c b/net/ipv6/syncookies.c
index ec394cf5a19b..676c80b5b14b 100644
--- a/net/ipv6/syncookies.c
+++ b/net/ipv6/syncookies.c
@@ -204,6 +204,7 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb)
204 204
205 req->mss = mss; 205 req->mss = mss;
206 ireq->rmt_port = th->source; 206 ireq->rmt_port = th->source;
207 ireq->loc_port = th->dest;
207 ipv6_addr_copy(&ireq6->rmt_addr, &ipv6_hdr(skb)->saddr); 208 ipv6_addr_copy(&ireq6->rmt_addr, &ipv6_hdr(skb)->saddr);
208 ipv6_addr_copy(&ireq6->loc_addr, &ipv6_hdr(skb)->daddr); 209 ipv6_addr_copy(&ireq6->loc_addr, &ipv6_hdr(skb)->daddr);
209 if (ipv6_opt_accepted(sk, skb) || 210 if (ipv6_opt_accepted(sk, skb) ||
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index e5310c9b84dc..b6b356b7912a 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -476,7 +476,7 @@ static int tcp_v6_send_synack(struct sock *sk, struct request_sock *req)
476 fl.fl6_flowlabel = 0; 476 fl.fl6_flowlabel = 0;
477 fl.oif = treq->iif; 477 fl.oif = treq->iif;
478 fl.fl_ip_dport = inet_rsk(req)->rmt_port; 478 fl.fl_ip_dport = inet_rsk(req)->rmt_port;
479 fl.fl_ip_sport = inet_sk(sk)->sport; 479 fl.fl_ip_sport = inet_rsk(req)->loc_port;
480 security_req_classify_flow(req, &fl); 480 security_req_classify_flow(req, &fl);
481 481
482 opt = np->opt; 482 opt = np->opt;
@@ -1309,7 +1309,7 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
1309 ipv6_addr_copy(&fl.fl6_src, &treq->loc_addr); 1309 ipv6_addr_copy(&fl.fl6_src, &treq->loc_addr);
1310 fl.oif = sk->sk_bound_dev_if; 1310 fl.oif = sk->sk_bound_dev_if;
1311 fl.fl_ip_dport = inet_rsk(req)->rmt_port; 1311 fl.fl_ip_dport = inet_rsk(req)->rmt_port;
1312 fl.fl_ip_sport = inet_sk(sk)->sport; 1312 fl.fl_ip_sport = inet_rsk(req)->loc_port;
1313 security_req_classify_flow(req, &fl); 1313 security_req_classify_flow(req, &fl);
1314 1314
1315 if (ip6_dst_lookup(sk, &dst, &fl)) 1315 if (ip6_dst_lookup(sk, &dst, &fl))
@@ -1865,7 +1865,7 @@ static void get_openreq6(struct seq_file *seq,
1865 i, 1865 i,
1866 src->s6_addr32[0], src->s6_addr32[1], 1866 src->s6_addr32[0], src->s6_addr32[1],
1867 src->s6_addr32[2], src->s6_addr32[3], 1867 src->s6_addr32[2], src->s6_addr32[3],
1868 ntohs(inet_sk(sk)->sport), 1868 ntohs(inet_rsk(req)->loc_port),
1869 dest->s6_addr32[0], dest->s6_addr32[1], 1869 dest->s6_addr32[0], dest->s6_addr32[1],
1870 dest->s6_addr32[2], dest->s6_addr32[3], 1870 dest->s6_addr32[2], dest->s6_addr32[3],
1871 ntohs(inet_rsk(req)->rmt_port), 1871 ntohs(inet_rsk(req)->rmt_port),
diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig
index 78892cf2b021..25dcef9f2194 100644
--- a/net/netfilter/Kconfig
+++ b/net/netfilter/Kconfig
@@ -271,7 +271,6 @@ config NF_CONNTRACK_TFTP
271config NF_CT_NETLINK 271config NF_CT_NETLINK
272 tristate 'Connection tracking netlink interface' 272 tristate 'Connection tracking netlink interface'
273 select NETFILTER_NETLINK 273 select NETFILTER_NETLINK
274 depends on NF_NAT=n || NF_NAT
275 default m if NETFILTER_ADVANCED=n 274 default m if NETFILTER_ADVANCED=n
276 help 275 help
277 This option enables support for a netlink-based userspace interface 276 This option enables support for a netlink-based userspace interface
diff --git a/net/netfilter/ipvs/Kconfig b/net/netfilter/ipvs/Kconfig
index 05048e403266..79a698052218 100644
--- a/net/netfilter/ipvs/Kconfig
+++ b/net/netfilter/ipvs/Kconfig
@@ -25,11 +25,13 @@ menuconfig IP_VS
25if IP_VS 25if IP_VS
26 26
27config IP_VS_IPV6 27config IP_VS_IPV6
28 bool "IPv6 support for IPVS (DANGEROUS)" 28 bool "IPv6 support for IPVS"
29 depends on EXPERIMENTAL && (IPV6 = y || IP_VS = IPV6) 29 depends on EXPERIMENTAL && (IPV6 = y || IP_VS = IPV6)
30 ---help--- 30 ---help---
31 Add IPv6 support to IPVS. This is incomplete and might be dangerous. 31 Add IPv6 support to IPVS. This is incomplete and might be dangerous.
32 32
33 See http://www.mindbasket.com/ipvs for more information.
34
33 Say N if unsure. 35 Say N if unsure.
34 36
35config IP_VS_DEBUG 37config IP_VS_DEBUG
diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index 2e4ad9671e19..a040d46f85d6 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -813,6 +813,7 @@ out:
813 return err; 813 return err;
814} 814}
815 815
816#ifdef CONFIG_NF_NAT_NEEDED
816static int 817static int
817ctnetlink_parse_nat_setup(struct nf_conn *ct, 818ctnetlink_parse_nat_setup(struct nf_conn *ct,
818 enum nf_nat_manip_type manip, 819 enum nf_nat_manip_type manip,
@@ -840,6 +841,7 @@ ctnetlink_parse_nat_setup(struct nf_conn *ct,
840 841
841 return parse_nat_setup(ct, manip, attr); 842 return parse_nat_setup(ct, manip, attr);
842} 843}
844#endif
843 845
844static int 846static int
845ctnetlink_change_status(struct nf_conn *ct, struct nlattr *cda[]) 847ctnetlink_change_status(struct nf_conn *ct, struct nlattr *cda[])
diff --git a/net/netfilter/xt_NFQUEUE.c b/net/netfilter/xt_NFQUEUE.c
index 2cc1fff49307..f9977b3311f7 100644
--- a/net/netfilter/xt_NFQUEUE.c
+++ b/net/netfilter/xt_NFQUEUE.c
@@ -48,7 +48,7 @@ static struct xt_target nfqueue_tg_reg[] __read_mostly = {
48 }, 48 },
49 { 49 {
50 .name = "NFQUEUE", 50 .name = "NFQUEUE",
51 .family = NF_ARP, 51 .family = NFPROTO_ARP,
52 .target = nfqueue_tg, 52 .target = nfqueue_tg,
53 .targetsize = sizeof(struct xt_NFQ_info), 53 .targetsize = sizeof(struct xt_NFQ_info),
54 .me = THIS_MODULE, 54 .me = THIS_MODULE,
diff --git a/net/netfilter/xt_iprange.c b/net/netfilter/xt_iprange.c
index 6f62c36948d9..7ac54eab0b00 100644
--- a/net/netfilter/xt_iprange.c
+++ b/net/netfilter/xt_iprange.c
@@ -61,7 +61,7 @@ iprange_mt4(const struct sk_buff *skb, const struct xt_match_param *par)
61 if (info->flags & IPRANGE_SRC) { 61 if (info->flags & IPRANGE_SRC) {
62 m = ntohl(iph->saddr) < ntohl(info->src_min.ip); 62 m = ntohl(iph->saddr) < ntohl(info->src_min.ip);
63 m |= ntohl(iph->saddr) > ntohl(info->src_max.ip); 63 m |= ntohl(iph->saddr) > ntohl(info->src_max.ip);
64 m ^= info->flags & IPRANGE_SRC_INV; 64 m ^= !!(info->flags & IPRANGE_SRC_INV);
65 if (m) { 65 if (m) {
66 pr_debug("src IP " NIPQUAD_FMT " NOT in range %s" 66 pr_debug("src IP " NIPQUAD_FMT " NOT in range %s"
67 NIPQUAD_FMT "-" NIPQUAD_FMT "\n", 67 NIPQUAD_FMT "-" NIPQUAD_FMT "\n",
@@ -75,7 +75,7 @@ iprange_mt4(const struct sk_buff *skb, const struct xt_match_param *par)
75 if (info->flags & IPRANGE_DST) { 75 if (info->flags & IPRANGE_DST) {
76 m = ntohl(iph->daddr) < ntohl(info->dst_min.ip); 76 m = ntohl(iph->daddr) < ntohl(info->dst_min.ip);
77 m |= ntohl(iph->daddr) > ntohl(info->dst_max.ip); 77 m |= ntohl(iph->daddr) > ntohl(info->dst_max.ip);
78 m ^= info->flags & IPRANGE_DST_INV; 78 m ^= !!(info->flags & IPRANGE_DST_INV);
79 if (m) { 79 if (m) {
80 pr_debug("dst IP " NIPQUAD_FMT " NOT in range %s" 80 pr_debug("dst IP " NIPQUAD_FMT " NOT in range %s"
81 NIPQUAD_FMT "-" NIPQUAD_FMT "\n", 81 NIPQUAD_FMT "-" NIPQUAD_FMT "\n",
@@ -114,14 +114,14 @@ iprange_mt6(const struct sk_buff *skb, const struct xt_match_param *par)
114 if (info->flags & IPRANGE_SRC) { 114 if (info->flags & IPRANGE_SRC) {
115 m = iprange_ipv6_sub(&iph->saddr, &info->src_min.in6) < 0; 115 m = iprange_ipv6_sub(&iph->saddr, &info->src_min.in6) < 0;
116 m |= iprange_ipv6_sub(&iph->saddr, &info->src_max.in6) > 0; 116 m |= iprange_ipv6_sub(&iph->saddr, &info->src_max.in6) > 0;
117 m ^= info->flags & IPRANGE_SRC_INV; 117 m ^= !!(info->flags & IPRANGE_SRC_INV);
118 if (m) 118 if (m)
119 return false; 119 return false;
120 } 120 }
121 if (info->flags & IPRANGE_DST) { 121 if (info->flags & IPRANGE_DST) {
122 m = iprange_ipv6_sub(&iph->daddr, &info->dst_min.in6) < 0; 122 m = iprange_ipv6_sub(&iph->daddr, &info->dst_min.in6) < 0;
123 m |= iprange_ipv6_sub(&iph->daddr, &info->dst_max.in6) > 0; 123 m |= iprange_ipv6_sub(&iph->daddr, &info->dst_max.in6) > 0;
124 m ^= info->flags & IPRANGE_DST_INV; 124 m ^= !!(info->flags & IPRANGE_DST_INV);
125 if (m) 125 if (m)
126 return false; 126 return false;
127 } 127 }
diff --git a/net/netfilter/xt_recent.c b/net/netfilter/xt_recent.c
index 4ebd4ca9a991..280c471bcdf4 100644
--- a/net/netfilter/xt_recent.c
+++ b/net/netfilter/xt_recent.c
@@ -318,15 +318,15 @@ static bool recent_mt_check(const struct xt_mtchk_param *par)
318 for (i = 0; i < ip_list_hash_size; i++) 318 for (i = 0; i < ip_list_hash_size; i++)
319 INIT_LIST_HEAD(&t->iphash[i]); 319 INIT_LIST_HEAD(&t->iphash[i]);
320#ifdef CONFIG_PROC_FS 320#ifdef CONFIG_PROC_FS
321 t->proc = proc_create(t->name, ip_list_perms, recent_proc_dir, 321 t->proc = proc_create_data(t->name, ip_list_perms, recent_proc_dir,
322 &recent_mt_fops); 322 &recent_mt_fops, t);
323 if (t->proc == NULL) { 323 if (t->proc == NULL) {
324 kfree(t); 324 kfree(t);
325 goto out; 325 goto out;
326 } 326 }
327#ifdef CONFIG_NETFILTER_XT_MATCH_RECENT_PROC_COMPAT 327#ifdef CONFIG_NETFILTER_XT_MATCH_RECENT_PROC_COMPAT
328 t->proc_old = proc_create(t->name, ip_list_perms, proc_old_dir, 328 t->proc_old = proc_create_data(t->name, ip_list_perms, proc_old_dir,
329 &recent_old_fops); 329 &recent_old_fops, t);
330 if (t->proc_old == NULL) { 330 if (t->proc_old == NULL) {
331 remove_proc_entry(t->name, proc_old_dir); 331 remove_proc_entry(t->name, proc_old_dir);
332 kfree(t); 332 kfree(t);
@@ -334,11 +334,9 @@ static bool recent_mt_check(const struct xt_mtchk_param *par)
334 } 334 }
335 t->proc_old->uid = ip_list_uid; 335 t->proc_old->uid = ip_list_uid;
336 t->proc_old->gid = ip_list_gid; 336 t->proc_old->gid = ip_list_gid;
337 t->proc_old->data = t;
338#endif 337#endif
339 t->proc->uid = ip_list_uid; 338 t->proc->uid = ip_list_uid;
340 t->proc->gid = ip_list_gid; 339 t->proc->gid = ip_list_gid;
341 t->proc->data = t;
342#endif 340#endif
343 spin_lock_bh(&recent_lock); 341 spin_lock_bh(&recent_lock);
344 list_add_tail(&t->list, &tables); 342 list_add_tail(&t->list, &tables);
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index 7b5572d6beb5..93cd30ce6501 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -326,6 +326,7 @@ struct Qdisc_ops noop_qdisc_ops __read_mostly = {
326 326
327static struct netdev_queue noop_netdev_queue = { 327static struct netdev_queue noop_netdev_queue = {
328 .qdisc = &noop_qdisc, 328 .qdisc = &noop_qdisc,
329 .qdisc_sleeping = &noop_qdisc,
329}; 330};
330 331
331struct Qdisc noop_qdisc = { 332struct Qdisc noop_qdisc = {
@@ -352,6 +353,7 @@ static struct Qdisc_ops noqueue_qdisc_ops __read_mostly = {
352static struct Qdisc noqueue_qdisc; 353static struct Qdisc noqueue_qdisc;
353static struct netdev_queue noqueue_netdev_queue = { 354static struct netdev_queue noqueue_netdev_queue = {
354 .qdisc = &noqueue_qdisc, 355 .qdisc = &noqueue_qdisc,
356 .qdisc_sleeping = &noqueue_qdisc,
355}; 357};
356 358
357static struct Qdisc noqueue_qdisc = { 359static struct Qdisc noqueue_qdisc = {
diff --git a/security/device_cgroup.c b/security/device_cgroup.c
index 46f23971f7e4..5ba78701adc3 100644
--- a/security/device_cgroup.c
+++ b/security/device_cgroup.c
@@ -1,5 +1,5 @@
1/* 1/*
2 * dev_cgroup.c - device cgroup subsystem 2 * device_cgroup.c - device cgroup subsystem
3 * 3 *
4 * Copyright 2007 IBM Corp 4 * Copyright 2007 IBM Corp
5 */ 5 */
@@ -10,6 +10,7 @@
10#include <linux/list.h> 10#include <linux/list.h>
11#include <linux/uaccess.h> 11#include <linux/uaccess.h>
12#include <linux/seq_file.h> 12#include <linux/seq_file.h>
13#include <linux/rcupdate.h>
13 14
14#define ACC_MKNOD 1 15#define ACC_MKNOD 1
15#define ACC_READ 2 16#define ACC_READ 2
@@ -22,18 +23,8 @@
22 23
23/* 24/*
24 * whitelist locking rules: 25 * whitelist locking rules:
25 * cgroup_lock() cannot be taken under dev_cgroup->lock. 26 * hold cgroup_lock() for update/read.
26 * dev_cgroup->lock can be taken with or without cgroup_lock(). 27 * hold rcu_read_lock() for read.
27 *
28 * modifications always require cgroup_lock
29 * modifications to a list which is visible require the
30 * dev_cgroup->lock *and* cgroup_lock()
31 * walking the list requires dev_cgroup->lock or cgroup_lock().
32 *
33 * reasoning: dev_whitelist_copy() needs to kmalloc, so needs
34 * a mutex, which the cgroup_lock() is. Since modifying
35 * a visible list requires both locks, either lock can be
36 * taken for walking the list.
37 */ 28 */
38 29
39struct dev_whitelist_item { 30struct dev_whitelist_item {
@@ -47,7 +38,6 @@ struct dev_whitelist_item {
47struct dev_cgroup { 38struct dev_cgroup {
48 struct cgroup_subsys_state css; 39 struct cgroup_subsys_state css;
49 struct list_head whitelist; 40 struct list_head whitelist;
50 spinlock_t lock;
51}; 41};
52 42
53static inline struct dev_cgroup *css_to_devcgroup(struct cgroup_subsys_state *s) 43static inline struct dev_cgroup *css_to_devcgroup(struct cgroup_subsys_state *s)
@@ -84,13 +74,9 @@ static int dev_whitelist_copy(struct list_head *dest, struct list_head *orig)
84 struct dev_whitelist_item *wh, *tmp, *new; 74 struct dev_whitelist_item *wh, *tmp, *new;
85 75
86 list_for_each_entry(wh, orig, list) { 76 list_for_each_entry(wh, orig, list) {
87 new = kmalloc(sizeof(*wh), GFP_KERNEL); 77 new = kmemdup(wh, sizeof(*wh), GFP_KERNEL);
88 if (!new) 78 if (!new)
89 goto free_and_exit; 79 goto free_and_exit;
90 new->major = wh->major;
91 new->minor = wh->minor;
92 new->type = wh->type;
93 new->access = wh->access;
94 list_add_tail(&new->list, dest); 80 list_add_tail(&new->list, dest);
95 } 81 }
96 82
@@ -107,19 +93,16 @@ free_and_exit:
107/* Stupid prototype - don't bother combining existing entries */ 93/* Stupid prototype - don't bother combining existing entries */
108/* 94/*
109 * called under cgroup_lock() 95 * called under cgroup_lock()
110 * since the list is visible to other tasks, we need the spinlock also
111 */ 96 */
112static int dev_whitelist_add(struct dev_cgroup *dev_cgroup, 97static int dev_whitelist_add(struct dev_cgroup *dev_cgroup,
113 struct dev_whitelist_item *wh) 98 struct dev_whitelist_item *wh)
114{ 99{
115 struct dev_whitelist_item *whcopy, *walk; 100 struct dev_whitelist_item *whcopy, *walk;
116 101
117 whcopy = kmalloc(sizeof(*whcopy), GFP_KERNEL); 102 whcopy = kmemdup(wh, sizeof(*wh), GFP_KERNEL);
118 if (!whcopy) 103 if (!whcopy)
119 return -ENOMEM; 104 return -ENOMEM;
120 105
121 memcpy(whcopy, wh, sizeof(*whcopy));
122 spin_lock(&dev_cgroup->lock);
123 list_for_each_entry(walk, &dev_cgroup->whitelist, list) { 106 list_for_each_entry(walk, &dev_cgroup->whitelist, list) {
124 if (walk->type != wh->type) 107 if (walk->type != wh->type)
125 continue; 108 continue;
@@ -135,7 +118,6 @@ static int dev_whitelist_add(struct dev_cgroup *dev_cgroup,
135 118
136 if (whcopy != NULL) 119 if (whcopy != NULL)
137 list_add_tail_rcu(&whcopy->list, &dev_cgroup->whitelist); 120 list_add_tail_rcu(&whcopy->list, &dev_cgroup->whitelist);
138 spin_unlock(&dev_cgroup->lock);
139 return 0; 121 return 0;
140} 122}
141 123
@@ -149,14 +131,12 @@ static void whitelist_item_free(struct rcu_head *rcu)
149 131
150/* 132/*
151 * called under cgroup_lock() 133 * called under cgroup_lock()
152 * since the list is visible to other tasks, we need the spinlock also
153 */ 134 */
154static void dev_whitelist_rm(struct dev_cgroup *dev_cgroup, 135static void dev_whitelist_rm(struct dev_cgroup *dev_cgroup,
155 struct dev_whitelist_item *wh) 136 struct dev_whitelist_item *wh)
156{ 137{
157 struct dev_whitelist_item *walk, *tmp; 138 struct dev_whitelist_item *walk, *tmp;
158 139
159 spin_lock(&dev_cgroup->lock);
160 list_for_each_entry_safe(walk, tmp, &dev_cgroup->whitelist, list) { 140 list_for_each_entry_safe(walk, tmp, &dev_cgroup->whitelist, list) {
161 if (walk->type == DEV_ALL) 141 if (walk->type == DEV_ALL)
162 goto remove; 142 goto remove;
@@ -174,7 +154,6 @@ remove:
174 call_rcu(&walk->rcu, whitelist_item_free); 154 call_rcu(&walk->rcu, whitelist_item_free);
175 } 155 }
176 } 156 }
177 spin_unlock(&dev_cgroup->lock);
178} 157}
179 158
180/* 159/*
@@ -214,7 +193,6 @@ static struct cgroup_subsys_state *devcgroup_create(struct cgroup_subsys *ss,
214 } 193 }
215 } 194 }
216 195
217 spin_lock_init(&dev_cgroup->lock);
218 return &dev_cgroup->css; 196 return &dev_cgroup->css;
219} 197}
220 198
@@ -330,15 +308,11 @@ static int parent_has_perm(struct dev_cgroup *childcg,
330{ 308{
331 struct cgroup *pcg = childcg->css.cgroup->parent; 309 struct cgroup *pcg = childcg->css.cgroup->parent;
332 struct dev_cgroup *parent; 310 struct dev_cgroup *parent;
333 int ret;
334 311
335 if (!pcg) 312 if (!pcg)
336 return 1; 313 return 1;
337 parent = cgroup_to_devcgroup(pcg); 314 parent = cgroup_to_devcgroup(pcg);
338 spin_lock(&parent->lock); 315 return may_access_whitelist(parent, wh);
339 ret = may_access_whitelist(parent, wh);
340 spin_unlock(&parent->lock);
341 return ret;
342} 316}
343 317
344/* 318/*
@@ -357,17 +331,14 @@ static int parent_has_perm(struct dev_cgroup *childcg,
357static int devcgroup_update_access(struct dev_cgroup *devcgroup, 331static int devcgroup_update_access(struct dev_cgroup *devcgroup,
358 int filetype, const char *buffer) 332 int filetype, const char *buffer)
359{ 333{
360 struct dev_cgroup *cur_devcgroup;
361 const char *b; 334 const char *b;
362 char *endp; 335 char *endp;
363 int retval = 0, count; 336 int count;
364 struct dev_whitelist_item wh; 337 struct dev_whitelist_item wh;
365 338
366 if (!capable(CAP_SYS_ADMIN)) 339 if (!capable(CAP_SYS_ADMIN))
367 return -EPERM; 340 return -EPERM;
368 341
369 cur_devcgroup = task_devcgroup(current);
370
371 memset(&wh, 0, sizeof(wh)); 342 memset(&wh, 0, sizeof(wh));
372 b = buffer; 343 b = buffer;
373 344
@@ -437,7 +408,6 @@ static int devcgroup_update_access(struct dev_cgroup *devcgroup,
437 } 408 }
438 409
439handle: 410handle:
440 retval = 0;
441 switch (filetype) { 411 switch (filetype) {
442 case DEVCG_ALLOW: 412 case DEVCG_ALLOW:
443 if (!parent_has_perm(devcgroup, &wh)) 413 if (!parent_has_perm(devcgroup, &wh))
diff --git a/sound/core/pcm_misc.c b/sound/core/pcm_misc.c
index 89b7f549bebd..ea2bf82c9373 100644
--- a/sound/core/pcm_misc.c
+++ b/sound/core/pcm_misc.c
@@ -319,6 +319,7 @@ EXPORT_SYMBOL(snd_pcm_format_physical_width);
319/** 319/**
320 * snd_pcm_format_size - return the byte size of samples on the given format 320 * snd_pcm_format_size - return the byte size of samples on the given format
321 * @format: the format to check 321 * @format: the format to check
322 * @samples: sampling rate
322 * 323 *
323 * Returns the byte size of the given samples for the format, or a 324 * Returns the byte size of the given samples for the format, or a
324 * negative error code if unknown format. 325 * negative error code if unknown format.
diff --git a/sound/drivers/dummy.c b/sound/drivers/dummy.c
index e5e749f3e0ef..73be7e14a603 100644
--- a/sound/drivers/dummy.c
+++ b/sound/drivers/dummy.c
@@ -51,7 +51,7 @@ static int emu10k1_playback_constraints(struct snd_pcm_runtime *runtime)
51 if (err < 0) 51 if (err < 0)
52 return err; 52 return err;
53 err = snd_pcm_hw_constraint_minmax(runtime, SNDRV_PCM_HW_PARAM_BUFFER_BYTES, 256, UINT_MAX); 53 err = snd_pcm_hw_constraint_minmax(runtime, SNDRV_PCM_HW_PARAM_BUFFER_BYTES, 256, UINT_MAX);
54 if (err) < 0) 54 if (err < 0)
55 return err; 55 return err;
56 return 0; 56 return 0;
57} 57}
diff --git a/sound/pci/ca0106/ca0106_main.c b/sound/pci/ca0106/ca0106_main.c
index a7d89662acf6..88fbf285d2b7 100644
--- a/sound/pci/ca0106/ca0106_main.c
+++ b/sound/pci/ca0106/ca0106_main.c
@@ -759,7 +759,6 @@ static int snd_ca0106_pcm_prepare_playback(struct snd_pcm_substream *substream)
759 SPCS_CHANNELNUM_LEFT | SPCS_SOURCENUM_UNSPEC | 759 SPCS_CHANNELNUM_LEFT | SPCS_SOURCENUM_UNSPEC |
760 SPCS_GENERATIONSTATUS | 0x00001200 | 760 SPCS_GENERATIONSTATUS | 0x00001200 |
761 0x00000000 | SPCS_EMPHASIS_NONE | SPCS_COPYRIGHT ); 761 0x00000000 | SPCS_EMPHASIS_NONE | SPCS_COPYRIGHT );
762 }
763#endif 762#endif
764 763
765 return 0; 764 return 0;
diff --git a/sound/ppc/snd_ps3.c b/sound/ppc/snd_ps3.c
index 20d0e328288a..8f9e3859c37c 100644
--- a/sound/ppc/snd_ps3.c
+++ b/sound/ppc/snd_ps3.c
@@ -666,6 +666,7 @@ static int snd_ps3_init_avsetting(struct snd_ps3_card_info *card)
666 card->avs.avs_audio_width = PS3AV_CMD_AUDIO_WORD_BITS_16; 666 card->avs.avs_audio_width = PS3AV_CMD_AUDIO_WORD_BITS_16;
667 card->avs.avs_audio_format = PS3AV_CMD_AUDIO_FORMAT_PCM; 667 card->avs.avs_audio_format = PS3AV_CMD_AUDIO_FORMAT_PCM;
668 card->avs.avs_audio_source = PS3AV_CMD_AUDIO_SOURCE_SERIAL; 668 card->avs.avs_audio_source = PS3AV_CMD_AUDIO_SOURCE_SERIAL;
669 memcpy(card->avs.avs_cs_info, ps3av_mode_cs_info, 8);
669 670
670 ret = snd_ps3_change_avsetting(card); 671 ret = snd_ps3_change_avsetting(card);
671 672
@@ -685,6 +686,7 @@ static int snd_ps3_set_avsetting(struct snd_pcm_substream *substream)
685{ 686{
686 struct snd_ps3_card_info *card = snd_pcm_substream_chip(substream); 687 struct snd_ps3_card_info *card = snd_pcm_substream_chip(substream);
687 struct snd_ps3_avsetting_info avs; 688 struct snd_ps3_avsetting_info avs;
689 int ret;
688 690
689 avs = card->avs; 691 avs = card->avs;
690 692
@@ -729,19 +731,92 @@ static int snd_ps3_set_avsetting(struct snd_pcm_substream *substream)
729 return 1; 731 return 1;
730 } 732 }
731 733
732 if ((card->avs.avs_audio_width != avs.avs_audio_width) || 734 memcpy(avs.avs_cs_info, ps3av_mode_cs_info, 8);
733 (card->avs.avs_audio_rate != avs.avs_audio_rate)) {
734 card->avs = avs;
735 snd_ps3_change_avsetting(card);
736 735
736 if (memcmp(&card->avs, &avs, sizeof(avs))) {
737 pr_debug("%s: after freq=%d width=%d\n", __func__, 737 pr_debug("%s: after freq=%d width=%d\n", __func__,
738 card->avs.avs_audio_rate, card->avs.avs_audio_width); 738 card->avs.avs_audio_rate, card->avs.avs_audio_width);
739 739
740 return 0; 740 card->avs = avs;
741 snd_ps3_change_avsetting(card);
742 ret = 0;
741 } else 743 } else
744 ret = 1;
745
746 /* check CS non-audio bit and mute accordingly */
747 if (avs.avs_cs_info[0] & 0x02)
748 ps3av_audio_mute_analog(1); /* mute if non-audio */
749 else
750 ps3av_audio_mute_analog(0);
751
752 return ret;
753}
754
755/*
756 * SPDIF status bits controls
757 */
758static int snd_ps3_spdif_mask_info(struct snd_kcontrol *kcontrol,
759 struct snd_ctl_elem_info *uinfo)
760{
761 uinfo->type = SNDRV_CTL_ELEM_TYPE_IEC958;
762 uinfo->count = 1;
763 return 0;
764}
765
766/* FIXME: ps3av_set_audio_mode() assumes only consumer mode */
767static int snd_ps3_spdif_cmask_get(struct snd_kcontrol *kcontrol,
768 struct snd_ctl_elem_value *ucontrol)
769{
770 memset(ucontrol->value.iec958.status, 0xff, 8);
771 return 0;
772}
773
774static int snd_ps3_spdif_pmask_get(struct snd_kcontrol *kcontrol,
775 struct snd_ctl_elem_value *ucontrol)
776{
777 return 0;
778}
779
780static int snd_ps3_spdif_default_get(struct snd_kcontrol *kcontrol,
781 struct snd_ctl_elem_value *ucontrol)
782{
783 memcpy(ucontrol->value.iec958.status, ps3av_mode_cs_info, 8);
784 return 0;
785}
786
787static int snd_ps3_spdif_default_put(struct snd_kcontrol *kcontrol,
788 struct snd_ctl_elem_value *ucontrol)
789{
790 if (memcmp(ps3av_mode_cs_info, ucontrol->value.iec958.status, 8)) {
791 memcpy(ps3av_mode_cs_info, ucontrol->value.iec958.status, 8);
742 return 1; 792 return 1;
793 }
794 return 0;
743} 795}
744 796
797static struct snd_kcontrol_new spdif_ctls[] = {
798 {
799 .access = SNDRV_CTL_ELEM_ACCESS_READ,
800 .iface = SNDRV_CTL_ELEM_IFACE_PCM,
801 .name = SNDRV_CTL_NAME_IEC958("",PLAYBACK,CON_MASK),
802 .info = snd_ps3_spdif_mask_info,
803 .get = snd_ps3_spdif_cmask_get,
804 },
805 {
806 .access = SNDRV_CTL_ELEM_ACCESS_READ,
807 .iface = SNDRV_CTL_ELEM_IFACE_PCM,
808 .name = SNDRV_CTL_NAME_IEC958("",PLAYBACK,PRO_MASK),
809 .info = snd_ps3_spdif_mask_info,
810 .get = snd_ps3_spdif_pmask_get,
811 },
812 {
813 .iface = SNDRV_CTL_ELEM_IFACE_PCM,
814 .name = SNDRV_CTL_NAME_IEC958("",PLAYBACK,DEFAULT),
815 .info = snd_ps3_spdif_mask_info,
816 .get = snd_ps3_spdif_default_get,
817 .put = snd_ps3_spdif_default_put,
818 },
819};
745 820
746 821
747static int snd_ps3_map_mmio(void) 822static int snd_ps3_map_mmio(void)
@@ -842,7 +917,7 @@ static void snd_ps3_audio_set_base_addr(uint64_t ioaddr_start)
842 917
843static int __init snd_ps3_driver_probe(struct ps3_system_bus_device *dev) 918static int __init snd_ps3_driver_probe(struct ps3_system_bus_device *dev)
844{ 919{
845 int ret; 920 int i, ret;
846 u64 lpar_addr, lpar_size; 921 u64 lpar_addr, lpar_size;
847 922
848 BUG_ON(!firmware_has_feature(FW_FEATURE_PS3_LV1)); 923 BUG_ON(!firmware_has_feature(FW_FEATURE_PS3_LV1));
@@ -903,6 +978,15 @@ static int __init snd_ps3_driver_probe(struct ps3_system_bus_device *dev)
903 strcpy(the_card.card->driver, "PS3"); 978 strcpy(the_card.card->driver, "PS3");
904 strcpy(the_card.card->shortname, "PS3"); 979 strcpy(the_card.card->shortname, "PS3");
905 strcpy(the_card.card->longname, "PS3 sound"); 980 strcpy(the_card.card->longname, "PS3 sound");
981
982 /* create control elements */
983 for (i = 0; i < ARRAY_SIZE(spdif_ctls); i++) {
984 ret = snd_ctl_add(the_card.card,
985 snd_ctl_new1(&spdif_ctls[i], &the_card));
986 if (ret < 0)
987 goto clean_card;
988 }
989
906 /* create PCM devices instance */ 990 /* create PCM devices instance */
907 /* NOTE:this driver works assuming pcm:substream = 1:1 */ 991 /* NOTE:this driver works assuming pcm:substream = 1:1 */
908 ret = snd_pcm_new(the_card.card, 992 ret = snd_pcm_new(the_card.card,
diff --git a/sound/ppc/snd_ps3.h b/sound/ppc/snd_ps3.h
index 4b7e6fbbe500..326fb29e82d8 100644
--- a/sound/ppc/snd_ps3.h
+++ b/sound/ppc/snd_ps3.h
@@ -51,6 +51,7 @@ struct snd_ps3_avsetting_info {
51 uint32_t avs_audio_width; 51 uint32_t avs_audio_width;
52 uint32_t avs_audio_format; /* fixed */ 52 uint32_t avs_audio_format; /* fixed */
53 uint32_t avs_audio_source; /* fixed */ 53 uint32_t avs_audio_source; /* fixed */
54 unsigned char avs_cs_info[8];
54}; 55};
55/* 56/*
56 * PS3 audio 'card' instance 57 * PS3 audio 'card' instance
diff --git a/sound/soc/omap/omap-mcbsp.c b/sound/soc/omap/omap-mcbsp.c
index 0a063a98a661..853b33ae3435 100644
--- a/sound/soc/omap/omap-mcbsp.c
+++ b/sound/soc/omap/omap-mcbsp.c
@@ -43,6 +43,7 @@
43struct omap_mcbsp_data { 43struct omap_mcbsp_data {
44 unsigned int bus_id; 44 unsigned int bus_id;
45 struct omap_mcbsp_reg_cfg regs; 45 struct omap_mcbsp_reg_cfg regs;
46 unsigned int fmt;
46 /* 47 /*
47 * Flags indicating is the bus already activated and configured by 48 * Flags indicating is the bus already activated and configured by
48 * another substream 49 * another substream
@@ -200,6 +201,7 @@ static int omap_mcbsp_dai_hw_params(struct snd_pcm_substream *substream,
200 struct omap_mcbsp_data *mcbsp_data = to_mcbsp(cpu_dai->private_data); 201 struct omap_mcbsp_data *mcbsp_data = to_mcbsp(cpu_dai->private_data);
201 struct omap_mcbsp_reg_cfg *regs = &mcbsp_data->regs; 202 struct omap_mcbsp_reg_cfg *regs = &mcbsp_data->regs;
202 int dma, bus_id = mcbsp_data->bus_id, id = cpu_dai->id; 203 int dma, bus_id = mcbsp_data->bus_id, id = cpu_dai->id;
204 int wlen;
203 unsigned long port; 205 unsigned long port;
204 206
205 if (cpu_class_is_omap1()) { 207 if (cpu_class_is_omap1()) {
@@ -244,19 +246,29 @@ static int omap_mcbsp_dai_hw_params(struct snd_pcm_substream *substream,
244 switch (params_format(params)) { 246 switch (params_format(params)) {
245 case SNDRV_PCM_FORMAT_S16_LE: 247 case SNDRV_PCM_FORMAT_S16_LE:
246 /* Set word lengths */ 248 /* Set word lengths */
249 wlen = 16;
247 regs->rcr2 |= RWDLEN2(OMAP_MCBSP_WORD_16); 250 regs->rcr2 |= RWDLEN2(OMAP_MCBSP_WORD_16);
248 regs->rcr1 |= RWDLEN1(OMAP_MCBSP_WORD_16); 251 regs->rcr1 |= RWDLEN1(OMAP_MCBSP_WORD_16);
249 regs->xcr2 |= XWDLEN2(OMAP_MCBSP_WORD_16); 252 regs->xcr2 |= XWDLEN2(OMAP_MCBSP_WORD_16);
250 regs->xcr1 |= XWDLEN1(OMAP_MCBSP_WORD_16); 253 regs->xcr1 |= XWDLEN1(OMAP_MCBSP_WORD_16);
251 /* Set FS period and length in terms of bit clock periods */
252 regs->srgr2 |= FPER(16 * 2 - 1);
253 regs->srgr1 |= FWID(16 - 1);
254 break; 254 break;
255 default: 255 default:
256 /* Unsupported PCM format */ 256 /* Unsupported PCM format */
257 return -EINVAL; 257 return -EINVAL;
258 } 258 }
259 259
260 /* Set FS period and length in terms of bit clock periods */
261 switch (mcbsp_data->fmt & SND_SOC_DAIFMT_FORMAT_MASK) {
262 case SND_SOC_DAIFMT_I2S:
263 regs->srgr2 |= FPER(wlen * 2 - 1);
264 regs->srgr1 |= FWID(wlen - 1);
265 break;
266 case SND_SOC_DAIFMT_DSP_A:
267 regs->srgr2 |= FPER(wlen * 2 - 1);
268 regs->srgr1 |= FWID(0);
269 break;
270 }
271
260 omap_mcbsp_config(bus_id, &mcbsp_data->regs); 272 omap_mcbsp_config(bus_id, &mcbsp_data->regs);
261 mcbsp_data->configured = 1; 273 mcbsp_data->configured = 1;
262 274
@@ -272,10 +284,12 @@ static int omap_mcbsp_dai_set_dai_fmt(struct snd_soc_dai *cpu_dai,
272{ 284{
273 struct omap_mcbsp_data *mcbsp_data = to_mcbsp(cpu_dai->private_data); 285 struct omap_mcbsp_data *mcbsp_data = to_mcbsp(cpu_dai->private_data);
274 struct omap_mcbsp_reg_cfg *regs = &mcbsp_data->regs; 286 struct omap_mcbsp_reg_cfg *regs = &mcbsp_data->regs;
287 unsigned int temp_fmt = fmt;
275 288
276 if (mcbsp_data->configured) 289 if (mcbsp_data->configured)
277 return 0; 290 return 0;
278 291
292 mcbsp_data->fmt = fmt;
279 memset(regs, 0, sizeof(*regs)); 293 memset(regs, 0, sizeof(*regs));
280 /* Generic McBSP register settings */ 294 /* Generic McBSP register settings */
281 regs->spcr2 |= XINTM(3) | FREE; 295 regs->spcr2 |= XINTM(3) | FREE;
@@ -293,6 +307,8 @@ static int omap_mcbsp_dai_set_dai_fmt(struct snd_soc_dai *cpu_dai,
293 /* 0-bit data delay */ 307 /* 0-bit data delay */
294 regs->rcr2 |= RDATDLY(0); 308 regs->rcr2 |= RDATDLY(0);
295 regs->xcr2 |= XDATDLY(0); 309 regs->xcr2 |= XDATDLY(0);
310 /* Invert bit clock and FS polarity configuration for DSP_A */
311 temp_fmt ^= SND_SOC_DAIFMT_IB_IF;
296 break; 312 break;
297 default: 313 default:
298 /* Unsupported data format */ 314 /* Unsupported data format */
@@ -316,7 +332,7 @@ static int omap_mcbsp_dai_set_dai_fmt(struct snd_soc_dai *cpu_dai,
316 } 332 }
317 333
318 /* Set bit clock (CLKX/CLKR) and FS polarities */ 334 /* Set bit clock (CLKX/CLKR) and FS polarities */
319 switch (fmt & SND_SOC_DAIFMT_INV_MASK) { 335 switch (temp_fmt & SND_SOC_DAIFMT_INV_MASK) {
320 case SND_SOC_DAIFMT_NB_NF: 336 case SND_SOC_DAIFMT_NB_NF:
321 /* 337 /*
322 * Normal BCLK + FS. 338 * Normal BCLK + FS.