aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--.mailmap1
-rw-r--r--Documentation/ABI/testing/sysfs-devices-memory51
-rw-r--r--Documentation/DMA-mapping.txt2
-rw-r--r--Documentation/filesystems/Locking2
-rw-r--r--Documentation/filesystems/proc.txt26
-rw-r--r--Documentation/hwmon/adt747019
-rw-r--r--Documentation/ide/warm-plug-howto.txt5
-rw-r--r--Documentation/ioctl/ioctl-number.txt8
-rw-r--r--Documentation/kernel-doc-nano-HOWTO.txt34
-rw-r--r--Documentation/kernel-parameters.txt29
-rw-r--r--Documentation/kprobes.txt5
-rw-r--r--Documentation/magic-number.txt6
-rw-r--r--Documentation/memory-hotplug.txt16
-rw-r--r--Documentation/mips/AU1xxx_IDE.README2
-rw-r--r--Documentation/powerpc/cpu_features.txt2
-rw-r--r--Documentation/s390/Debugging390.txt2
-rw-r--r--Documentation/s390/cds.txt2
-rw-r--r--Documentation/s390/s390dbf.txt2
-rw-r--r--Documentation/sysctl/vm.txt3
-rw-r--r--Documentation/vm/unevictable-lru.txt63
-rw-r--r--Documentation/x86/zero-page.txt2
-rw-r--r--MAINTAINERS9
-rw-r--r--arch/alpha/include/asm/atomic.h9
-rw-r--r--arch/arm/include/asm/atomic.h3
-rw-r--r--arch/arm/kernel/kprobes.c2
-rw-r--r--arch/arm/mach-s3c2410/include/mach/spi.h2
-rw-r--r--arch/avr32/include/asm/atomic.h2
-rw-r--r--arch/avr32/kernel/traps.c1
-rw-r--r--arch/blackfin/include/asm/atomic.h4
-rw-r--r--arch/cris/include/asm/atomic.h4
-rw-r--r--arch/h8300/include/asm/atomic.h3
-rw-r--r--arch/ia64/include/asm/atomic.h6
-rw-r--r--arch/ia64/kernel/kprobes.c8
-rw-r--r--arch/ia64/mm/init.c2
-rw-r--r--arch/m68knommu/include/asm/atomic.h2
-rw-r--r--arch/mips/include/asm/atomic.h5
-rw-r--r--arch/parisc/include/asm/atomic.h11
-rw-r--r--arch/powerpc/include/asm/atomic.h4
-rw-r--r--arch/powerpc/include/asm/hugetlb.h6
-rw-r--r--arch/powerpc/kernel/kprobes.c7
-rw-r--r--arch/powerpc/mm/hugetlbpage.c7
-rw-r--r--arch/powerpc/mm/mem.c2
-rw-r--r--arch/s390/include/asm/atomic.h7
-rw-r--r--arch/s390/kernel/kprobes.c7
-rw-r--r--arch/s390/mm/init.c2
-rw-r--r--arch/sh/include/asm/atomic.h7
-rw-r--r--arch/sh/kernel/traps_32.c1
-rw-r--r--arch/sh/mm/init.c3
-rw-r--r--arch/sparc/include/asm/atomic_32.h2
-rw-r--r--arch/sparc/include/asm/atomic_64.h3
-rw-r--r--arch/um/kernel/trap.c24
-rw-r--r--arch/x86/include/asm/atomic_32.h10
-rw-r--r--arch/x86/include/asm/atomic_64.h18
-rw-r--r--arch/x86/include/asm/unwind.h13
-rw-r--r--arch/x86/kernel/kprobes.c7
-rw-r--r--arch/x86/kernel/traps.c2
-rw-r--r--arch/x86/mm/fault.c24
-rw-r--r--arch/x86/mm/init_32.c2
-rw-r--r--arch/x86/mm/init_64.c2
-rw-r--r--drivers/acpi/processor_idle.c1
-rw-r--r--drivers/base/memory.c19
-rw-r--r--drivers/base/node.c103
-rw-r--r--drivers/char/Kconfig2
-rw-r--r--drivers/char/consolemap.c2
-rw-r--r--drivers/char/mem.c3
-rw-r--r--drivers/char/random.c5
-rw-r--r--drivers/char/sysrq.c20
-rw-r--r--drivers/edac/Kconfig7
-rw-r--r--drivers/edac/Makefile1
-rw-r--r--drivers/edac/edac_device.c2
-rw-r--r--drivers/edac/edac_mc.c2
-rw-r--r--drivers/edac/edac_pci.c2
-rw-r--r--drivers/edac/edac_pci_sysfs.c6
-rw-r--r--drivers/edac/i5400_edac.c1476
-rw-r--r--drivers/edac/i82875p_edac.c4
-rw-r--r--drivers/edac/mpc85xx_edac.c74
-rw-r--r--drivers/firmware/dmi_scan.c6
-rw-r--r--drivers/gpio/Kconfig7
-rw-r--r--drivers/gpio/pca953x.c12
-rw-r--r--drivers/gpio/twl4030-gpio.c54
-rw-r--r--drivers/gpu/drm/drm_fops.c4
-rw-r--r--drivers/hwmon/adt7462.c14
-rw-r--r--drivers/hwmon/adt7470.c227
-rw-r--r--drivers/hwmon/adt7473.c10
-rw-r--r--drivers/hwmon/applesmc.c10
-rw-r--r--drivers/hwmon/ibmpex.c2
-rw-r--r--drivers/ide/Kconfig7
-rw-r--r--drivers/ide/Makefile1
-rw-r--r--drivers/ide/aec62xx.c4
-rw-r--r--drivers/ide/alim15x3.c10
-rw-r--r--drivers/ide/amd74xx.c4
-rw-r--r--drivers/ide/au1xxx-ide.c11
-rw-r--r--drivers/ide/cmd640.c5
-rw-r--r--drivers/ide/cmd64x.c23
-rw-r--r--drivers/ide/cs5520.c2
-rw-r--r--drivers/ide/cy82c693.c2
-rw-r--r--drivers/ide/falconide.c1
-rw-r--r--drivers/ide/hpt366.c23
-rw-r--r--drivers/ide/icside.c12
-rw-r--r--drivers/ide/ide-acpi.c22
-rw-r--r--drivers/ide/ide-atapi.c13
-rw-r--r--drivers/ide/ide-cd.c118
-rw-r--r--drivers/ide/ide-cd.h34
-rw-r--r--drivers/ide/ide-disk.c4
-rw-r--r--drivers/ide/ide-dma-sff.c63
-rw-r--r--drivers/ide/ide-dma.c2
-rw-r--r--drivers/ide/ide-floppy.c2
-rw-r--r--drivers/ide/ide-gd.c3
-rw-r--r--drivers/ide/ide-gd.h10
-rw-r--r--drivers/ide/ide-h8300.c1
-rw-r--r--drivers/ide/ide-io.c410
-rw-r--r--drivers/ide/ide-iops.c100
-rw-r--r--drivers/ide/ide-lib.c9
-rw-r--r--drivers/ide/ide-park.c16
-rw-r--r--drivers/ide/ide-pm.c8
-rw-r--r--drivers/ide/ide-probe.c474
-rw-r--r--drivers/ide/ide-proc.c23
-rw-r--r--drivers/ide/ide-tape.c20
-rw-r--r--drivers/ide/ide-taskfile.c14
-rw-r--r--drivers/ide/ide.c187
-rw-r--r--drivers/ide/it8172.c166
-rw-r--r--drivers/ide/it8213.c4
-rw-r--r--drivers/ide/it821x.c13
-rw-r--r--drivers/ide/ns87415.c14
-rw-r--r--drivers/ide/palm_bk3710.c3
-rw-r--r--drivers/ide/pdc202xx_new.c4
-rw-r--r--drivers/ide/pdc202xx_old.c14
-rw-r--r--drivers/ide/piix.c8
-rw-r--r--drivers/ide/pmac.c7
-rw-r--r--drivers/ide/q40ide.c1
-rw-r--r--drivers/ide/qd65xx.c7
-rw-r--r--drivers/ide/qd65xx.h4
-rw-r--r--drivers/ide/sc1200.c9
-rw-r--r--drivers/ide/scc_pata.c33
-rw-r--r--drivers/ide/serverworks.c2
-rw-r--r--drivers/ide/setup-pci.c12
-rw-r--r--drivers/ide/sgiioc4.c15
-rw-r--r--drivers/ide/siimage.c11
-rw-r--r--drivers/ide/sis5513.c2
-rw-r--r--drivers/ide/sl82c105.c5
-rw-r--r--drivers/ide/slc90e66.c4
-rw-r--r--drivers/ide/tc86c001.c17
-rw-r--r--drivers/ide/triflex.c2
-rw-r--r--drivers/ide/trm290.c10
-rw-r--r--drivers/ide/tx4939ide.c23
-rw-r--r--drivers/ide/umc8672.c13
-rw-r--r--drivers/ide/via82cxxx.c2
-rw-r--r--drivers/macintosh/therm_adt746x.c8
-rw-r--r--drivers/media/dvb/dvb-core/dvbdev.c5
-rw-r--r--drivers/media/video/v4l1-compat.c4
-rw-r--r--drivers/message/i2o/device.c2
-rw-r--r--drivers/message/i2o/driver.c1
-rw-r--r--drivers/misc/ibmasm/module.c3
-rw-r--r--drivers/misc/ioc4.c36
-rw-r--r--drivers/misc/tifm_7xx1.c3
-rw-r--r--drivers/parport/ieee1284.c2
-rw-r--r--drivers/rapidio/rio-driver.c1
-rw-r--r--drivers/rtc/Kconfig94
-rw-r--r--drivers/rtc/Makefile4
-rw-r--r--drivers/rtc/class.c16
-rw-r--r--drivers/rtc/interface.c16
-rw-r--r--drivers/rtc/rtc-at32ap700x.c4
-rw-r--r--drivers/rtc/rtc-au1xxx.c153
-rw-r--r--drivers/rtc/rtc-bfin.c2
-rw-r--r--drivers/rtc/rtc-cmos.c15
-rw-r--r--drivers/rtc/rtc-ds1216.c30
-rw-r--r--drivers/rtc/rtc-ds1390.c72
-rw-r--r--drivers/rtc/rtc-ds1511.c19
-rw-r--r--drivers/rtc/rtc-ds1553.c15
-rw-r--r--drivers/rtc/rtc-ds1672.c22
-rw-r--r--drivers/rtc/rtc-ds3234.c172
-rw-r--r--drivers/rtc/rtc-ep93xx.c13
-rw-r--r--drivers/rtc/rtc-m48t59.c2
-rw-r--r--drivers/rtc/rtc-max6902.c176
-rw-r--r--drivers/rtc/rtc-mv.c163
-rw-r--r--drivers/rtc/rtc-pxa.c489
-rw-r--r--drivers/rtc/rtc-s3c.c3
-rw-r--r--drivers/rtc/rtc-sh.c13
-rw-r--r--drivers/rtc/rtc-stk17ta8.c15
-rw-r--r--drivers/rtc/rtc-test.c8
-rw-r--r--drivers/rtc/rtc-twl4030.c5
-rw-r--r--drivers/rtc/rtc-tx4939.c317
-rw-r--r--drivers/rtc/rtc-vr41xx.c11
-rw-r--r--drivers/spi/Kconfig18
-rw-r--r--drivers/spi/Makefile1
-rw-r--r--drivers/spi/atmel_spi.c131
-rw-r--r--drivers/spi/pxa2xx_spi.c3
-rw-r--r--drivers/spi/spi_gpio.c360
-rw-r--r--drivers/spi/spi_s3c24xx.c38
-rw-r--r--drivers/video/aty/aty128fb.c2
-rw-r--r--drivers/video/bfin-t350mcqb-fb.c6
-rw-r--r--drivers/video/carminefb.c2
-rw-r--r--drivers/video/cyber2000fb.c3
-rw-r--r--drivers/video/fbmem.c4
-rw-r--r--drivers/video/gbefb.c7
-rw-r--r--drivers/video/geode/gx1fb_core.c3
-rw-r--r--drivers/video/geode/gxfb_core.c8
-rw-r--r--drivers/video/geode/lxfb_core.c9
-rw-r--r--drivers/video/gxt4500.c4
-rw-r--r--drivers/video/i810/i810_accel.c18
-rw-r--r--drivers/video/intelfb/intelfbdrv.c24
-rw-r--r--drivers/video/modedb.c2
-rw-r--r--drivers/video/neofb.c6
-rw-r--r--drivers/video/nvidia/nv_accel.c12
-rw-r--r--drivers/video/pm3fb.c6
-rw-r--r--drivers/video/sm501fb.c6
-rw-r--r--drivers/video/via/viafbdev.c18
-rw-r--r--firmware/dsp56k/bootstrap.asm26
-rw-r--r--fs/Kconfig17
-rw-r--r--fs/autofs4/autofs_i.h2
-rw-r--r--fs/autofs4/dev-ioctl.c75
-rw-r--r--fs/autofs4/expire.c4
-rw-r--r--fs/autofs4/inode.c14
-rw-r--r--fs/autofs4/waitq.c8
-rw-r--r--fs/bfs/inode.c45
-rw-r--r--fs/binfmt_misc.c2
-rw-r--r--fs/block_dev.c2
-rw-r--r--fs/buffer.c1
-rw-r--r--fs/char_dev.c2
-rw-r--r--fs/compat.c6
-rw-r--r--fs/direct-io.c13
-rw-r--r--fs/ecryptfs/crypto.c514
-rw-r--r--fs/ecryptfs/ecryptfs_kernel.h105
-rw-r--r--fs/ecryptfs/file.c30
-rw-r--r--fs/ecryptfs/inode.c297
-rw-r--r--fs/ecryptfs/keystore.c651
-rw-r--r--fs/ecryptfs/main.c126
-rw-r--r--fs/ecryptfs/messaging.c4
-rw-r--r--fs/ecryptfs/miscdev.c18
-rw-r--r--fs/exec.c29
-rw-r--r--fs/ext4/ext4.h6
-rw-r--r--fs/ext4/inode.c2
-rw-r--r--fs/fs-writeback.c92
-rw-r--r--fs/fuse/control.c6
-rw-r--r--fs/fuse/dev.c113
-rw-r--r--fs/fuse/dir.c48
-rw-r--r--fs/fuse/file.c457
-rw-r--r--fs/fuse/fuse_i.h83
-rw-r--r--fs/fuse/inode.c157
-rw-r--r--fs/hugetlbfs/inode.c12
-rw-r--r--fs/inode.c12
-rw-r--r--fs/minix/dir.c2
-rw-r--r--fs/mpage.c6
-rw-r--r--fs/ncpfs/getopt.c1
-rw-r--r--fs/proc/task_mmu.c8
-rw-r--r--fs/select.c76
-rw-r--r--fs/sync.c2
-rw-r--r--fs/ubifs/super.c9
-rw-r--r--include/asm-frv/atomic.h4
-rw-r--r--include/asm-generic/bug.h17
-rw-r--r--include/asm-generic/local.h1
-rw-r--r--include/asm-generic/memory_model.h7
-rw-r--r--include/asm-m32r/atomic.h8
-rw-r--r--include/asm-m68k/atomic.h3
-rw-r--r--include/asm-mn10300/atomic.h9
-rw-r--r--include/asm-xtensa/atomic.h3
-rw-r--r--include/linux/auto_dev-ioctl.h75
-rw-r--r--include/linux/auto_fs4.h62
-rw-r--r--include/linux/binfmts.h3
-rw-r--r--include/linux/cgroup.h14
-rw-r--r--include/linux/cpuset.h6
-rw-r--r--include/linux/fs.h1
-rw-r--r--include/linux/fuse.h79
-rw-r--r--include/linux/gfp.h6
-rw-r--r--include/linux/hugetlb.h6
-rw-r--r--include/linux/i2c/twl4030.h9
-rw-r--r--include/linux/ide.h171
-rw-r--r--include/linux/interrupt.h1
-rw-r--r--include/linux/kernel.h6
-rw-r--r--include/linux/kprobes.h15
-rw-r--r--include/linux/memory.h6
-rw-r--r--include/linux/memory_hotplug.h2
-rw-r--r--include/linux/migrate.h4
-rw-r--r--include/linux/miscdevice.h42
-rw-r--r--include/linux/mm.h5
-rw-r--r--include/linux/module.h15
-rw-r--r--include/linux/node.h13
-rw-r--r--include/linux/page-flags.h26
-rw-r--r--include/linux/pagevec.h7
-rw-r--r--include/linux/pci_ids.h1
-rw-r--r--include/linux/percpu_counter.h8
-rw-r--r--include/linux/poll.h15
-rw-r--r--include/linux/rio_drv.h1
-rw-r--r--include/linux/rmap.h5
-rw-r--r--include/linux/sched.h3
-rw-r--r--include/linux/spi/spi_gpio.h60
-rw-r--r--include/linux/swap.h54
-rw-r--r--include/linux/types.h10
-rw-r--r--include/linux/unwind.h68
-rw-r--r--include/linux/vmalloc.h4
-rw-r--r--include/linux/writeback.h16
-rw-r--r--init/Kconfig4
-rw-r--r--init/do_mounts.c4
-rw-r--r--init/do_mounts_md.c2
-rw-r--r--init/main.c9
-rw-r--r--ipc/ipc_sysctl.c46
-rw-r--r--ipc/sem.c1
-rw-r--r--ipc/shm.c15
-rw-r--r--kernel/cgroup.c33
-rw-r--r--kernel/compat.c5
-rw-r--r--kernel/cpuset.c34
-rw-r--r--kernel/dma-coherent.c42
-rw-r--r--kernel/exit.c21
-rw-r--r--kernel/fork.c17
-rw-r--r--kernel/kmod.c4
-rw-r--r--kernel/kprobes.c281
-rw-r--r--kernel/module.c33
-rw-r--r--kernel/panic.c2
-rw-r--r--kernel/profile.c1
-rw-r--r--kernel/signal.c3
-rw-r--r--kernel/sys.c2
-rw-r--r--kernel/sysctl.c27
-rw-r--r--kernel/test_kprobes.c210
-rw-r--r--kernel/time.c4
-rw-r--r--kernel/tsacct.c4
-rw-r--r--lib/bust_spinlocks.c2
-rw-r--r--lib/fault-inject.c1
-rw-r--r--lib/percpu_counter.c18
-rw-r--r--lib/prio_heap.c2
-rw-r--r--lib/proportions.c2
-rw-r--r--lib/radix-tree.c2
-rw-r--r--lib/vsprintf.c4
-rw-r--r--mm/Kconfig6
-rw-r--r--mm/Makefile4
-rw-r--r--mm/backing-dev.c6
-rw-r--r--mm/bootmem.c8
-rw-r--r--mm/filemap.c30
-rw-r--r--mm/filemap_xip.c2
-rw-r--r--mm/fremap.c2
-rw-r--r--mm/hugetlb.c46
-rw-r--r--mm/internal.h2
-rw-r--r--mm/memcontrol.c3
-rw-r--r--mm/memory.c176
-rw-r--r--mm/memory_hotplug.c20
-rw-r--r--mm/migrate.c89
-rw-r--r--mm/mlock.c9
-rw-r--r--mm/mmap.c22
-rw-r--r--mm/mprotect.c6
-rw-r--r--mm/oom_kill.c109
-rw-r--r--mm/page-writeback.c245
-rw-r--r--mm/page_alloc.c135
-rw-r--r--mm/page_cgroup.c2
-rw-r--r--mm/page_io.c6
-rw-r--r--mm/rmap.c60
-rw-r--r--mm/shmem.c82
-rw-r--r--mm/swap.c44
-rw-r--r--mm/swap_state.c31
-rw-r--r--mm/swapfile.c576
-rw-r--r--mm/tiny-shmem.c134
-rw-r--r--mm/vmalloc.c50
-rw-r--r--mm/vmscan.c143
-rw-r--r--samples/firmware_class/firmware_sample_driver.c2
-rw-r--r--samples/kobject/kobject-example.c4
-rw-r--r--samples/kobject/kset-example.c4
-rw-r--r--samples/markers/marker-example.c4
-rw-r--r--samples/tracepoints/tracepoint-probe-sample.c4
-rw-r--r--samples/tracepoints/tracepoint-probe-sample2.c4
-rw-r--r--samples/tracepoints/tracepoint-sample.c4
-rwxr-xr-xscripts/checkpatch.pl172
-rw-r--r--scripts/markup_oops.pl162
-rw-r--r--sound/core/sound.c4
361 files changed, 10136 insertions, 4303 deletions
diff --git a/.mailmap b/.mailmap
index 97f7b4fb6139..4e83e7b52d15 100644
--- a/.mailmap
+++ b/.mailmap
@@ -32,6 +32,7 @@ Christoph Hellwig <hch@lst.de>
32Corey Minyard <minyard@acm.org> 32Corey Minyard <minyard@acm.org>
33David Brownell <david-b@pacbell.net> 33David Brownell <david-b@pacbell.net>
34David Woodhouse <dwmw2@shinybook.infradead.org> 34David Woodhouse <dwmw2@shinybook.infradead.org>
35Dmitry Eremin-Solenikov <dbaryshkov@gmail.com>
35Domen Puncer <domen@coderock.org> 36Domen Puncer <domen@coderock.org>
36Douglas Gilbert <dougg@torque.net> 37Douglas Gilbert <dougg@torque.net>
37Ed L. Cashin <ecashin@coraid.com> 38Ed L. Cashin <ecashin@coraid.com>
diff --git a/Documentation/ABI/testing/sysfs-devices-memory b/Documentation/ABI/testing/sysfs-devices-memory
index 7a16fe1e2270..9fe91c02ee40 100644
--- a/Documentation/ABI/testing/sysfs-devices-memory
+++ b/Documentation/ABI/testing/sysfs-devices-memory
@@ -6,7 +6,6 @@ Description:
6 internal state of the kernel memory blocks. Files could be 6 internal state of the kernel memory blocks. Files could be
7 added or removed dynamically to represent hot-add/remove 7 added or removed dynamically to represent hot-add/remove
8 operations. 8 operations.
9
10Users: hotplug memory add/remove tools 9Users: hotplug memory add/remove tools
11 https://w3.opensource.ibm.com/projects/powerpc-utils/ 10 https://w3.opensource.ibm.com/projects/powerpc-utils/
12 11
@@ -19,6 +18,56 @@ Description:
19 This is useful for a user-level agent to determine 18 This is useful for a user-level agent to determine
20 identify removable sections of the memory before attempting 19 identify removable sections of the memory before attempting
21 potentially expensive hot-remove memory operation 20 potentially expensive hot-remove memory operation
21Users: hotplug memory remove tools
22 https://w3.opensource.ibm.com/projects/powerpc-utils/
23
24What: /sys/devices/system/memory/memoryX/phys_device
25Date: September 2008
26Contact: Badari Pulavarty <pbadari@us.ibm.com>
27Description:
28 The file /sys/devices/system/memory/memoryX/phys_device
29 is read-only and is designed to show the name of physical
30 memory device. Implementation is currently incomplete.
22 31
32What: /sys/devices/system/memory/memoryX/phys_index
33Date: September 2008
34Contact: Badari Pulavarty <pbadari@us.ibm.com>
35Description:
36 The file /sys/devices/system/memory/memoryX/phys_index
37 is read-only and contains the section ID in hexadecimal
38 which is equivalent to decimal X contained in the
39 memory section directory name.
40
41What: /sys/devices/system/memory/memoryX/state
42Date: September 2008
43Contact: Badari Pulavarty <pbadari@us.ibm.com>
44Description:
45 The file /sys/devices/system/memory/memoryX/state
46 is read-write. When read, it's contents show the
47 online/offline state of the memory section. When written,
48 root can toggle the the online/offline state of a removable
49 memory section (see removable file description above)
50 using the following commands.
51 # echo online > /sys/devices/system/memory/memoryX/state
52 # echo offline > /sys/devices/system/memory/memoryX/state
53
54 For example, if /sys/devices/system/memory/memory22/removable
55 contains a value of 1 and
56 /sys/devices/system/memory/memory22/state contains the
57 string "online" the following command can be executed by
58 by root to offline that section.
59 # echo offline > /sys/devices/system/memory/memory22/state
23Users: hotplug memory remove tools 60Users: hotplug memory remove tools
24 https://w3.opensource.ibm.com/projects/powerpc-utils/ 61 https://w3.opensource.ibm.com/projects/powerpc-utils/
62
63What: /sys/devices/system/node/nodeX/memoryY
64Date: September 2008
65Contact: Gary Hade <garyhade@us.ibm.com>
66Description:
67 When CONFIG_NUMA is enabled
68 /sys/devices/system/node/nodeX/memoryY is a symbolic link that
69 points to the corresponding /sys/devices/system/memory/memoryY
70 memory section directory. For example, the following symbolic
71 link is created for memory section 9 on node0.
72 /sys/devices/system/node/node0/memory9 -> ../../memory/memory9
73
diff --git a/Documentation/DMA-mapping.txt b/Documentation/DMA-mapping.txt
index c74fec8c2351..b2a4d6d244d9 100644
--- a/Documentation/DMA-mapping.txt
+++ b/Documentation/DMA-mapping.txt
@@ -26,7 +26,7 @@ mapped only for the time they are actually used and unmapped after the DMA
26transfer. 26transfer.
27 27
28The following API will work of course even on platforms where no such 28The following API will work of course even on platforms where no such
29hardware exists, see e.g. include/asm-i386/pci.h for how it is implemented on 29hardware exists, see e.g. arch/x86/include/asm/pci.h for how it is implemented on
30top of the virt_to_bus interface. 30top of the virt_to_bus interface.
31 31
32First of all, you should make sure 32First of all, you should make sure
diff --git a/Documentation/filesystems/Locking b/Documentation/filesystems/Locking
index ccec55394380..cfbfa15a46ba 100644
--- a/Documentation/filesystems/Locking
+++ b/Documentation/filesystems/Locking
@@ -397,7 +397,7 @@ prototypes:
397}; 397};
398 398
399locking rules: 399locking rules:
400 All except ->poll() may block. 400 All may block.
401 BKL 401 BKL
402llseek: no (see below) 402llseek: no (see below)
403read: no 403read: no
diff --git a/Documentation/filesystems/proc.txt b/Documentation/filesystems/proc.txt
index 71df353e367c..32e94635484f 100644
--- a/Documentation/filesystems/proc.txt
+++ b/Documentation/filesystems/proc.txt
@@ -1385,6 +1385,15 @@ swapcache reclaim. Decreasing vfs_cache_pressure causes the kernel to prefer
1385to retain dentry and inode caches. Increasing vfs_cache_pressure beyond 100 1385to retain dentry and inode caches. Increasing vfs_cache_pressure beyond 100
1386causes the kernel to prefer to reclaim dentries and inodes. 1386causes the kernel to prefer to reclaim dentries and inodes.
1387 1387
1388dirty_background_bytes
1389----------------------
1390
1391Contains the amount of dirty memory at which the pdflush background writeback
1392daemon will start writeback.
1393
1394If dirty_background_bytes is written, dirty_background_ratio becomes a function
1395of its value (dirty_background_bytes / the amount of dirtyable system memory).
1396
1388dirty_background_ratio 1397dirty_background_ratio
1389---------------------- 1398----------------------
1390 1399
@@ -1393,14 +1402,29 @@ pages + file cache, not including locked pages and HugePages), the number of
1393pages at which the pdflush background writeback daemon will start writing out 1402pages at which the pdflush background writeback daemon will start writing out
1394dirty data. 1403dirty data.
1395 1404
1405If dirty_background_ratio is written, dirty_background_bytes becomes a function
1406of its value (dirty_background_ratio * the amount of dirtyable system memory).
1407
1408dirty_bytes
1409-----------
1410
1411Contains the amount of dirty memory at which a process generating disk writes
1412will itself start writeback.
1413
1414If dirty_bytes is written, dirty_ratio becomes a function of its value
1415(dirty_bytes / the amount of dirtyable system memory).
1416
1396dirty_ratio 1417dirty_ratio
1397----------------- 1418-----------
1398 1419
1399Contains, as a percentage of the dirtyable system memory (free pages + mapped 1420Contains, as a percentage of the dirtyable system memory (free pages + mapped
1400pages + file cache, not including locked pages and HugePages), the number of 1421pages + file cache, not including locked pages and HugePages), the number of
1401pages at which a process which is generating disk writes will itself start 1422pages at which a process which is generating disk writes will itself start
1402writing out dirty data. 1423writing out dirty data.
1403 1424
1425If dirty_ratio is written, dirty_bytes becomes a function of its value
1426(dirty_ratio * the amount of dirtyable system memory).
1427
1404dirty_writeback_centisecs 1428dirty_writeback_centisecs
1405------------------------- 1429-------------------------
1406 1430
diff --git a/Documentation/hwmon/adt7470 b/Documentation/hwmon/adt7470
index 75d13ca147cc..8ce4aa0a0f55 100644
--- a/Documentation/hwmon/adt7470
+++ b/Documentation/hwmon/adt7470
@@ -31,15 +31,11 @@ Each of the measured inputs (temperature, fan speed) has corresponding high/low
31limit values. The ADT7470 will signal an ALARM if any measured value exceeds 31limit values. The ADT7470 will signal an ALARM if any measured value exceeds
32either limit. 32either limit.
33 33
34The ADT7470 DOES NOT sample all inputs continuously. A single pin on the 34The ADT7470 samples all inputs continuously. A kernel thread is started up for
35ADT7470 is connected to a multitude of thermal diodes, but the chip must be 35the purpose of periodically querying the temperature sensors, thus allowing the
36instructed explicitly to read the multitude of diodes. If you want to use 36automatic fan pwm control to set the fan speed. The driver will not read the
37automatic fan control mode, you must manually read any of the temperature 37registers more often than once every 5 seconds. Further, configuration data is
38sensors or the fan control algorithm will not run. The chip WILL NOT DO THIS 38only read once per minute.
39AUTOMATICALLY; this must be done from userspace. This may be a bug in the chip
40design, given that many other AD chips take care of this. The driver will not
41read the registers more often than once every 5 seconds. Further,
42configuration data is only read once per minute.
43 39
44Special Features 40Special Features
45---------------- 41----------------
@@ -72,5 +68,6 @@ pwm#_auto_point2_temp.
72Notes 68Notes
73----- 69-----
74 70
75As stated above, the temperature inputs must be read periodically from 71The temperature inputs no longer need to be read periodically from userspace in
76userspace in order for the automatic pwm algorithm to run. 72order for the automatic pwm algorithm to run. This was the case for earlier
73versions of the driver.
diff --git a/Documentation/ide/warm-plug-howto.txt b/Documentation/ide/warm-plug-howto.txt
index d5885468b072..98152bcd515a 100644
--- a/Documentation/ide/warm-plug-howto.txt
+++ b/Documentation/ide/warm-plug-howto.txt
@@ -11,3 +11,8 @@ unplug old device(s) and plug new device(s)
11# echo -n "1" > /sys/class/ide_port/idex/scan 11# echo -n "1" > /sys/class/ide_port/idex/scan
12 12
13done 13done
14
15NOTE: please make sure that partitions are unmounted and that there are
16no other active references to devices before doing "delete_devices" step,
17also do not attempt "scan" step on devices currently in use -- otherwise
18results may be unpredictable and lead to data loss if you're unlucky
diff --git a/Documentation/ioctl/ioctl-number.txt b/Documentation/ioctl/ioctl-number.txt
index 824699174436..f1d639903325 100644
--- a/Documentation/ioctl/ioctl-number.txt
+++ b/Documentation/ioctl/ioctl-number.txt
@@ -84,7 +84,7 @@ Code Seq# Include File Comments
84'B' C0-FF advanced bbus 84'B' C0-FF advanced bbus
85 <mailto:maassen@uni-freiburg.de> 85 <mailto:maassen@uni-freiburg.de>
86'C' all linux/soundcard.h 86'C' all linux/soundcard.h
87'D' all asm-s390/dasd.h 87'D' all arch/s390/include/asm/dasd.h
88'E' all linux/input.h 88'E' all linux/input.h
89'F' all linux/fb.h 89'F' all linux/fb.h
90'H' all linux/hiddev.h 90'H' all linux/hiddev.h
@@ -105,7 +105,7 @@ Code Seq# Include File Comments
105'S' 80-81 scsi/scsi_ioctl.h conflict! 105'S' 80-81 scsi/scsi_ioctl.h conflict!
106'S' 82-FF scsi/scsi.h conflict! 106'S' 82-FF scsi/scsi.h conflict!
107'T' all linux/soundcard.h conflict! 107'T' all linux/soundcard.h conflict!
108'T' all asm-i386/ioctls.h conflict! 108'T' all arch/x86/include/asm/ioctls.h conflict!
109'U' 00-EF linux/drivers/usb/usb.h 109'U' 00-EF linux/drivers/usb/usb.h
110'V' all linux/vt.h 110'V' all linux/vt.h
111'W' 00-1F linux/watchdog.h conflict! 111'W' 00-1F linux/watchdog.h conflict!
@@ -120,7 +120,7 @@ Code Seq# Include File Comments
120 <mailto:natalia@nikhefk.nikhef.nl> 120 <mailto:natalia@nikhefk.nikhef.nl>
121'c' 00-7F linux/comstats.h conflict! 121'c' 00-7F linux/comstats.h conflict!
122'c' 00-7F linux/coda.h conflict! 122'c' 00-7F linux/coda.h conflict!
123'c' 80-9F asm-s390/chsc.h 123'c' 80-9F arch/s390/include/asm/chsc.h
124'd' 00-FF linux/char/drm/drm/h conflict! 124'd' 00-FF linux/char/drm/drm/h conflict!
125'd' 00-DF linux/video_decoder.h conflict! 125'd' 00-DF linux/video_decoder.h conflict!
126'd' F0-FF linux/digi1.h 126'd' F0-FF linux/digi1.h
@@ -170,7 +170,7 @@ Code Seq# Include File Comments
170 <mailto:oe@port.de> 170 <mailto:oe@port.de>
1710x80 00-1F linux/fb.h 1710x80 00-1F linux/fb.h
1720x81 00-1F linux/videotext.h 1720x81 00-1F linux/videotext.h
1730x89 00-06 asm-i386/sockios.h 1730x89 00-06 arch/x86/include/asm/sockios.h
1740x89 0B-DF linux/sockios.h 1740x89 0B-DF linux/sockios.h
1750x89 E0-EF linux/sockios.h SIOCPROTOPRIVATE range 1750x89 E0-EF linux/sockios.h SIOCPROTOPRIVATE range
1760x89 F0-FF linux/sockios.h SIOCDEVPRIVATE range 1760x89 F0-FF linux/sockios.h SIOCDEVPRIVATE range
diff --git a/Documentation/kernel-doc-nano-HOWTO.txt b/Documentation/kernel-doc-nano-HOWTO.txt
index c6841eee9598..d73fbd2b2b45 100644
--- a/Documentation/kernel-doc-nano-HOWTO.txt
+++ b/Documentation/kernel-doc-nano-HOWTO.txt
@@ -71,6 +71,11 @@ The @argument descriptions must begin on the very next line following
71this opening short function description line, with no intervening 71this opening short function description line, with no intervening
72empty comment lines. 72empty comment lines.
73 73
74If a function parameter is "..." (varargs), it should be listed in
75kernel-doc notation as:
76 * @...: description
77
78
74Example kernel-doc data structure comment. 79Example kernel-doc data structure comment.
75 80
76/** 81/**
@@ -282,6 +287,32 @@ struct my_struct {
282}; 287};
283 288
284 289
290Including documentation blocks in source files
291----------------------------------------------
292
293To facilitate having source code and comments close together, you can
294include kernel-doc documentation blocks that are free-form comments
295instead of being kernel-doc for functions, structures, unions,
296enums, or typedefs. This could be used for something like a
297theory of operation for a driver or library code, for example.
298
299This is done by using a DOC: section keyword with a section title. E.g.:
300
301/**
302 * DOC: Theory of Operation
303 *
304 * The whizbang foobar is a dilly of a gizmo. It can do whatever you
305 * want it to do, at any time. It reads your mind. Here's how it works.
306 *
307 * foo bar splat
308 *
309 * The only drawback to this gizmo is that is can sometimes damage
310 * hardware, software, or its subject(s).
311 */
312
313DOC: sections are used in SGML templates files as indicated below.
314
315
285How to make new SGML template files 316How to make new SGML template files
286----------------------------------- 317-----------------------------------
287 318
@@ -302,6 +333,9 @@ exported using EXPORT_SYMBOL.
302!F<filename> <function [functions...]> is replaced by the 333!F<filename> <function [functions...]> is replaced by the
303documentation, in <filename>, for the functions listed. 334documentation, in <filename>, for the functions listed.
304 335
336!P<filename> <section title> is replaced by the contents of the DOC:
337section titled <section title> from <filename>.
338Spaces are allowed in <section title>; do not quote the <section title>.
305 339
306Tim. 340Tim.
307*/ <twaugh@redhat.com> 341*/ <twaugh@redhat.com>
diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index a2d8805c03d5..0b3f6711d2f1 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -469,8 +469,8 @@ and is between 256 and 4096 characters. It is defined in the file
469 469
470 clearcpuid=BITNUM [X86] 470 clearcpuid=BITNUM [X86]
471 Disable CPUID feature X for the kernel. See 471 Disable CPUID feature X for the kernel. See
472 include/asm-x86/cpufeature.h for the valid bit numbers. 472 arch/x86/include/asm/cpufeature.h for the valid bit
473 Note the Linux specific bits are not necessarily 473 numbers. Note the Linux specific bits are not necessarily
474 stable over kernel options, but the vendor specific 474 stable over kernel options, but the vendor specific
475 ones should be. 475 ones should be.
476 Also note that user programs calling CPUID directly 476 Also note that user programs calling CPUID directly
@@ -551,6 +551,11 @@ and is between 256 and 4096 characters. It is defined in the file
551 not work reliably with all consoles, but is known 551 not work reliably with all consoles, but is known
552 to work with serial and VGA consoles. 552 to work with serial and VGA consoles.
553 553
554 coredump_filter=
555 [KNL] Change the default value for
556 /proc/<pid>/coredump_filter.
557 See also Documentation/filesystems/proc.txt.
558
554 cpcihp_generic= [HW,PCI] Generic port I/O CompactPCI driver 559 cpcihp_generic= [HW,PCI] Generic port I/O CompactPCI driver
555 Format: 560 Format:
556 <first_slot>,<last_slot>,<port>,<enum_bit>[,<debug>] 561 <first_slot>,<last_slot>,<port>,<enum_bit>[,<debug>]
@@ -1117,6 +1122,8 @@ and is between 256 and 4096 characters. It is defined in the file
1117 If there are multiple matching configurations changing 1122 If there are multiple matching configurations changing
1118 the same attribute, the last one is used. 1123 the same attribute, the last one is used.
1119 1124
1125 lmb=debug [KNL] Enable lmb debug messages.
1126
1120 load_ramdisk= [RAM] List of ramdisks to load from floppy 1127 load_ramdisk= [RAM] List of ramdisks to load from floppy
1121 See Documentation/blockdev/ramdisk.txt. 1128 See Documentation/blockdev/ramdisk.txt.
1122 1129
@@ -1569,6 +1576,10 @@ and is between 256 and 4096 characters. It is defined in the file
1569 1576
1570 nr_uarts= [SERIAL] maximum number of UARTs to be registered. 1577 nr_uarts= [SERIAL] maximum number of UARTs to be registered.
1571 1578
1579 ohci1394_dma=early [HW] enable debugging via the ohci1394 driver.
1580 See Documentation/debugging-via-ohci1394.txt for more
1581 info.
1582
1572 olpc_ec_timeout= [OLPC] ms delay when issuing EC commands 1583 olpc_ec_timeout= [OLPC] ms delay when issuing EC commands
1573 Rather than timing out after 20 ms if an EC 1584 Rather than timing out after 20 ms if an EC
1574 command is not properly ACKed, override the length 1585 command is not properly ACKed, override the length
@@ -1793,10 +1804,10 @@ and is between 256 and 4096 characters. It is defined in the file
1793 autoconfiguration. 1804 autoconfiguration.
1794 Ranges are in pairs (memory base and size). 1805 Ranges are in pairs (memory base and size).
1795 1806
1796 dynamic_printk 1807 dynamic_printk Enables pr_debug()/dev_dbg() calls if
1797 Enables pr_debug()/dev_dbg() calls if 1808 CONFIG_DYNAMIC_PRINTK_DEBUG has been enabled.
1798 CONFIG_DYNAMIC_PRINTK_DEBUG has been enabled. These can also 1809 These can also be switched on/off via
1799 be switched on/off via <debugfs>/dynamic_printk/modules 1810 <debugfs>/dynamic_printk/modules
1800 1811
1801 print-fatal-signals= 1812 print-fatal-signals=
1802 [KNL] debug: print fatal signals 1813 [KNL] debug: print fatal signals
@@ -1884,7 +1895,7 @@ and is between 256 and 4096 characters. It is defined in the file
1884 1895
1885 reboot= [BUGS=X86-32,BUGS=ARM,BUGS=IA-64] Rebooting mode 1896 reboot= [BUGS=X86-32,BUGS=ARM,BUGS=IA-64] Rebooting mode
1886 Format: <reboot_mode>[,<reboot_mode2>[,...]] 1897 Format: <reboot_mode>[,<reboot_mode2>[,...]]
1887 See arch/*/kernel/reboot.c or arch/*/kernel/process.c 1898 See arch/*/kernel/reboot.c or arch/*/kernel/process.c
1888 1899
1889 relax_domain_level= 1900 relax_domain_level=
1890 [KNL, SMP] Set scheduler's default relax_domain_level. 1901 [KNL, SMP] Set scheduler's default relax_domain_level.
@@ -2432,8 +2443,8 @@ and is between 256 and 4096 characters. It is defined in the file
2432 Format: 2443 Format:
2433 <irq>,<irq_mask>,<io>,<full_duplex>,<do_sound>,<lockup_hack>[,<irq2>[,<irq3>[,<irq4>]]] 2444 <irq>,<irq_mask>,<io>,<full_duplex>,<do_sound>,<lockup_hack>[,<irq2>[,<irq3>[,<irq4>]]]
2434 2445
2435 norandmaps Don't use address space randomization 2446 norandmaps Don't use address space randomization. Equivalent to
2436 Equivalent to echo 0 > /proc/sys/kernel/randomize_va_space 2447 echo 0 > /proc/sys/kernel/randomize_va_space
2437 2448
2438______________________________________________________________________ 2449______________________________________________________________________
2439 2450
diff --git a/Documentation/kprobes.txt b/Documentation/kprobes.txt
index a79633d702bf..48b3de90eb1e 100644
--- a/Documentation/kprobes.txt
+++ b/Documentation/kprobes.txt
@@ -497,7 +497,10 @@ The first column provides the kernel address where the probe is inserted.
497The second column identifies the type of probe (k - kprobe, r - kretprobe 497The second column identifies the type of probe (k - kprobe, r - kretprobe
498and j - jprobe), while the third column specifies the symbol+offset of 498and j - jprobe), while the third column specifies the symbol+offset of
499the probe. If the probed function belongs to a module, the module name 499the probe. If the probed function belongs to a module, the module name
500is also specified. 500is also specified. Following columns show probe status. If the probe is on
501a virtual address that is no longer valid (module init sections, module
502virtual addresses that correspond to modules that've been unloaded),
503such probes are marked with [GONE].
501 504
502/debug/kprobes/enabled: Turn kprobes ON/OFF 505/debug/kprobes/enabled: Turn kprobes ON/OFF
503 506
diff --git a/Documentation/magic-number.txt b/Documentation/magic-number.txt
index 95070028d15e..505f19607542 100644
--- a/Documentation/magic-number.txt
+++ b/Documentation/magic-number.txt
@@ -125,14 +125,14 @@ TRIDENT_CARD_MAGIC 0x5072696E trident_card sound/oss/trident.c
125ROUTER_MAGIC 0x524d4157 wan_device include/linux/wanrouter.h 125ROUTER_MAGIC 0x524d4157 wan_device include/linux/wanrouter.h
126SCC_MAGIC 0x52696368 gs_port drivers/char/scc.h 126SCC_MAGIC 0x52696368 gs_port drivers/char/scc.h
127SAVEKMSG_MAGIC1 0x53415645 savekmsg arch/*/amiga/config.c 127SAVEKMSG_MAGIC1 0x53415645 savekmsg arch/*/amiga/config.c
128GDA_MAGIC 0x58464552 gda include/asm-mips64/sn/gda.h 128GDA_MAGIC 0x58464552 gda arch/mips/include/asm/sn/gda.h
129RED_MAGIC1 0x5a2cf071 (any) mm/slab.c 129RED_MAGIC1 0x5a2cf071 (any) mm/slab.c
130STL_PORTMAGIC 0x5a7182c9 stlport include/linux/stallion.h 130STL_PORTMAGIC 0x5a7182c9 stlport include/linux/stallion.h
131EEPROM_MAGIC_VALUE 0x5ab478d2 lanai_dev drivers/atm/lanai.c 131EEPROM_MAGIC_VALUE 0x5ab478d2 lanai_dev drivers/atm/lanai.c
132HDLCDRV_MAGIC 0x5ac6e778 hdlcdrv_state include/linux/hdlcdrv.h 132HDLCDRV_MAGIC 0x5ac6e778 hdlcdrv_state include/linux/hdlcdrv.h
133EPCA_MAGIC 0x5c6df104 channel include/linux/epca.h 133EPCA_MAGIC 0x5c6df104 channel include/linux/epca.h
134PCXX_MAGIC 0x5c6df104 channel drivers/char/pcxx.h 134PCXX_MAGIC 0x5c6df104 channel drivers/char/pcxx.h
135KV_MAGIC 0x5f4b565f kernel_vars_s include/asm-mips64/sn/klkernvars.h 135KV_MAGIC 0x5f4b565f kernel_vars_s arch/mips/include/asm/sn/klkernvars.h
136I810_STATE_MAGIC 0x63657373 i810_state sound/oss/i810_audio.c 136I810_STATE_MAGIC 0x63657373 i810_state sound/oss/i810_audio.c
137TRIDENT_STATE_MAGIC 0x63657373 trient_state sound/oss/trident.c 137TRIDENT_STATE_MAGIC 0x63657373 trient_state sound/oss/trident.c
138M3_CARD_MAGIC 0x646e6f50 m3_card sound/oss/maestro3.c 138M3_CARD_MAGIC 0x646e6f50 m3_card sound/oss/maestro3.c
@@ -158,7 +158,7 @@ CCB_MAGIC 0xf2691ad2 ccb drivers/scsi/ncr53c8xx.c
158QUEUE_MAGIC_FREE 0xf7e1c9a3 queue_entry drivers/scsi/arm/queue.c 158QUEUE_MAGIC_FREE 0xf7e1c9a3 queue_entry drivers/scsi/arm/queue.c
159QUEUE_MAGIC_USED 0xf7e1cc33 queue_entry drivers/scsi/arm/queue.c 159QUEUE_MAGIC_USED 0xf7e1cc33 queue_entry drivers/scsi/arm/queue.c
160HTB_CMAGIC 0xFEFAFEF1 htb_class net/sched/sch_htb.c 160HTB_CMAGIC 0xFEFAFEF1 htb_class net/sched/sch_htb.c
161NMI_MAGIC 0x48414d4d455201 nmi_s include/asm-mips64/sn/nmi.h 161NMI_MAGIC 0x48414d4d455201 nmi_s arch/mips/include/asm/sn/nmi.h
162 162
163Note that there are also defined special per-driver magic numbers in sound 163Note that there are also defined special per-driver magic numbers in sound
164memory management. See include/sound/sndmagic.h for complete list of them. Many 164memory management. See include/sound/sndmagic.h for complete list of them. Many
diff --git a/Documentation/memory-hotplug.txt b/Documentation/memory-hotplug.txt
index 168117bd6ee8..4c2ecf537a4a 100644
--- a/Documentation/memory-hotplug.txt
+++ b/Documentation/memory-hotplug.txt
@@ -124,7 +124,7 @@ config options.
124 This option can be kernel module too. 124 This option can be kernel module too.
125 125
126-------------------------------- 126--------------------------------
1273 sysfs files for memory hotplug 1274 sysfs files for memory hotplug
128-------------------------------- 128--------------------------------
129All sections have their device information under /sys/devices/system/memory as 129All sections have their device information under /sys/devices/system/memory as
130 130
@@ -138,11 +138,12 @@ For example, assume 1GiB section size. A device for a memory starting at
138(0x100000000 / 1Gib = 4) 138(0x100000000 / 1Gib = 4)
139This device covers address range [0x100000000 ... 0x140000000) 139This device covers address range [0x100000000 ... 0x140000000)
140 140
141Under each section, you can see 3 files. 141Under each section, you can see 4 files.
142 142
143/sys/devices/system/memory/memoryXXX/phys_index 143/sys/devices/system/memory/memoryXXX/phys_index
144/sys/devices/system/memory/memoryXXX/phys_device 144/sys/devices/system/memory/memoryXXX/phys_device
145/sys/devices/system/memory/memoryXXX/state 145/sys/devices/system/memory/memoryXXX/state
146/sys/devices/system/memory/memoryXXX/removable
146 147
147'phys_index' : read-only and contains section id, same as XXX. 148'phys_index' : read-only and contains section id, same as XXX.
148'state' : read-write 149'state' : read-write
@@ -150,10 +151,20 @@ Under each section, you can see 3 files.
150 at write: user can specify "online", "offline" command 151 at write: user can specify "online", "offline" command
151'phys_device': read-only: designed to show the name of physical memory device. 152'phys_device': read-only: designed to show the name of physical memory device.
152 This is not well implemented now. 153 This is not well implemented now.
154'removable' : read-only: contains an integer value indicating
155 whether the memory section is removable or not
156 removable. A value of 1 indicates that the memory
157 section is removable and a value of 0 indicates that
158 it is not removable.
153 159
154NOTE: 160NOTE:
155 These directories/files appear after physical memory hotplug phase. 161 These directories/files appear after physical memory hotplug phase.
156 162
163If CONFIG_NUMA is enabled the
164/sys/devices/system/memory/memoryXXX memory section
165directories can also be accessed via symbolic links located in
166the /sys/devices/system/node/node* directories. For example:
167/sys/devices/system/node/node0/memory9 -> ../../memory/memory9
157 168
158-------------------------------- 169--------------------------------
1594. Physical memory hot-add phase 1704. Physical memory hot-add phase
@@ -365,7 +376,6 @@ node if necessary.
365 - allowing memory hot-add to ZONE_MOVABLE. maybe we need some switch like 376 - allowing memory hot-add to ZONE_MOVABLE. maybe we need some switch like
366 sysctl or new control file. 377 sysctl or new control file.
367 - showing memory section and physical device relationship. 378 - showing memory section and physical device relationship.
368 - showing memory section and node relationship (maybe good for NUMA)
369 - showing memory section is under ZONE_MOVABLE or not 379 - showing memory section is under ZONE_MOVABLE or not
370 - test and make it better memory offlining. 380 - test and make it better memory offlining.
371 - support HugeTLB page migration and offlining. 381 - support HugeTLB page migration and offlining.
diff --git a/Documentation/mips/AU1xxx_IDE.README b/Documentation/mips/AU1xxx_IDE.README
index 25a6ed1aaa5b..f54962aea84d 100644
--- a/Documentation/mips/AU1xxx_IDE.README
+++ b/Documentation/mips/AU1xxx_IDE.README
@@ -44,7 +44,7 @@ FILES, CONFIGS AND COMPATABILITY
44 44
45Two files are introduced: 45Two files are introduced:
46 46
47 a) 'include/asm-mips/mach-au1x00/au1xxx_ide.h' 47 a) 'arch/mips/include/asm/mach-au1x00/au1xxx_ide.h'
48 containes : struct _auide_hwif 48 containes : struct _auide_hwif
49 timing parameters for PIO mode 0/1/2/3/4 49 timing parameters for PIO mode 0/1/2/3/4
50 timing parameters for MWDMA 0/1/2 50 timing parameters for MWDMA 0/1/2
diff --git a/Documentation/powerpc/cpu_features.txt b/Documentation/powerpc/cpu_features.txt
index 472739880e87..ffa4183fdb8b 100644
--- a/Documentation/powerpc/cpu_features.txt
+++ b/Documentation/powerpc/cpu_features.txt
@@ -31,7 +31,7 @@ anyways).
31 31
32After detecting the processor type, the kernel patches out sections of code 32After detecting the processor type, the kernel patches out sections of code
33that shouldn't be used by writing nop's over it. Using cpufeatures requires 33that shouldn't be used by writing nop's over it. Using cpufeatures requires
34just 2 macros (found in include/asm-ppc/cputable.h), as seen in head.S 34just 2 macros (found in arch/powerpc/include/asm/cputable.h), as seen in head.S
35transfer_to_handler: 35transfer_to_handler:
36 36
37 #ifdef CONFIG_ALTIVEC 37 #ifdef CONFIG_ALTIVEC
diff --git a/Documentation/s390/Debugging390.txt b/Documentation/s390/Debugging390.txt
index d30a281c570f..10711d9f0788 100644
--- a/Documentation/s390/Debugging390.txt
+++ b/Documentation/s390/Debugging390.txt
@@ -1402,7 +1402,7 @@ Syscalls are implemented on Linux for S390 by the Supervisor call instruction (S
1402possibilities of these as the instruction is made up of a 0xA opcode & the second byte being 1402possibilities of these as the instruction is made up of a 0xA opcode & the second byte being
1403the syscall number. They are traced using the simple command. 1403the syscall number. They are traced using the simple command.
1404TR SVC <Optional value or range> 1404TR SVC <Optional value or range>
1405the syscalls are defined in linux/include/asm-s390/unistd.h 1405the syscalls are defined in linux/arch/s390/include/asm/unistd.h
1406e.g. to trace all file opens just do 1406e.g. to trace all file opens just do
1407TR SVC 5 ( as this is the syscall number of open ) 1407TR SVC 5 ( as this is the syscall number of open )
1408 1408
diff --git a/Documentation/s390/cds.txt b/Documentation/s390/cds.txt
index c4b7b2bd369a..480a78ef5a1e 100644
--- a/Documentation/s390/cds.txt
+++ b/Documentation/s390/cds.txt
@@ -98,7 +98,7 @@ platform. Some of the interface routines are specific to Linux/390 and some
98of them can be found on other Linux platforms implementations too. 98of them can be found on other Linux platforms implementations too.
99Miscellaneous function prototypes, data declarations, and macro definitions 99Miscellaneous function prototypes, data declarations, and macro definitions
100can be found in the architecture specific C header file 100can be found in the architecture specific C header file
101linux/include/asm-s390/irq.h. 101linux/arch/s390/include/asm/irq.h.
102 102
103Overview of CDS interface concepts 103Overview of CDS interface concepts
104 104
diff --git a/Documentation/s390/s390dbf.txt b/Documentation/s390/s390dbf.txt
index e05420973698..2d10053dd97e 100644
--- a/Documentation/s390/s390dbf.txt
+++ b/Documentation/s390/s390dbf.txt
@@ -2,7 +2,7 @@ S390 Debug Feature
2================== 2==================
3 3
4files: arch/s390/kernel/debug.c 4files: arch/s390/kernel/debug.c
5 include/asm-s390/debug.h 5 arch/s390/include/asm/debug.h
6 6
7Description: 7Description:
8------------ 8------------
diff --git a/Documentation/sysctl/vm.txt b/Documentation/sysctl/vm.txt
index d79eeda7a699..cd05994a49e6 100644
--- a/Documentation/sysctl/vm.txt
+++ b/Documentation/sysctl/vm.txt
@@ -41,7 +41,8 @@ Currently, these files are in /proc/sys/vm:
41 41
42============================================================== 42==============================================================
43 43
44dirty_ratio, dirty_background_ratio, dirty_expire_centisecs, 44dirty_bytes, dirty_ratio, dirty_background_bytes,
45dirty_background_ratio, dirty_expire_centisecs,
45dirty_writeback_centisecs, highmem_is_dirtyable, 46dirty_writeback_centisecs, highmem_is_dirtyable,
46vfs_cache_pressure, laptop_mode, block_dump, swap_token_timeout, 47vfs_cache_pressure, laptop_mode, block_dump, swap_token_timeout,
47drop-caches, hugepages_treat_as_movable: 48drop-caches, hugepages_treat_as_movable:
diff --git a/Documentation/vm/unevictable-lru.txt b/Documentation/vm/unevictable-lru.txt
index 125eed560e5a..0706a7282a8c 100644
--- a/Documentation/vm/unevictable-lru.txt
+++ b/Documentation/vm/unevictable-lru.txt
@@ -137,13 +137,6 @@ shrink_page_list() where they will be detected when vmscan walks the reverse
137map in try_to_unmap(). If try_to_unmap() returns SWAP_MLOCK, shrink_page_list() 137map in try_to_unmap(). If try_to_unmap() returns SWAP_MLOCK, shrink_page_list()
138will cull the page at that point. 138will cull the page at that point.
139 139
140Note that for anonymous pages, shrink_page_list() attempts to add the page to
141the swap cache before it tries to unmap the page. To avoid this unnecessary
142consumption of swap space, shrink_page_list() calls try_to_munlock() to check
143whether any VM_LOCKED vmas map the page without attempting to unmap the page.
144If try_to_munlock() returns SWAP_MLOCK, shrink_page_list() will cull the page
145without consuming swap space. try_to_munlock() will be described below.
146
147To "cull" an unevictable page, vmscan simply puts the page back on the lru 140To "cull" an unevictable page, vmscan simply puts the page back on the lru
148list using putback_lru_page()--the inverse operation to isolate_lru_page()-- 141list using putback_lru_page()--the inverse operation to isolate_lru_page()--
149after dropping the page lock. Because the condition which makes the page 142after dropping the page lock. Because the condition which makes the page
@@ -190,8 +183,8 @@ several places:
190 in the VM_LOCKED flag being set for the vma. 183 in the VM_LOCKED flag being set for the vma.
1913) in the fault path, if mlocked pages are "culled" in the fault path, 1843) in the fault path, if mlocked pages are "culled" in the fault path,
192 and when a VM_LOCKED stack segment is expanded. 185 and when a VM_LOCKED stack segment is expanded.
1934) as mentioned above, in vmscan:shrink_page_list() with attempting to 1864) as mentioned above, in vmscan:shrink_page_list() when attempting to
194 reclaim a page in a VM_LOCKED vma--via try_to_unmap() or try_to_munlock(). 187 reclaim a page in a VM_LOCKED vma via try_to_unmap().
195 188
196Mlocked pages become unlocked and rescued from the unevictable list when: 189Mlocked pages become unlocked and rescued from the unevictable list when:
197 190
@@ -260,9 +253,9 @@ mlock_fixup() filters several classes of "special" vmas:
260 253
2612) vmas mapping hugetlbfs page are already effectively pinned into memory. 2542) vmas mapping hugetlbfs page are already effectively pinned into memory.
262 We don't need nor want to mlock() these pages. However, to preserve the 255 We don't need nor want to mlock() these pages. However, to preserve the
263 prior behavior of mlock()--before the unevictable/mlock changes--mlock_fixup() 256 prior behavior of mlock()--before the unevictable/mlock changes--
264 will call make_pages_present() in the hugetlbfs vma range to allocate the 257 mlock_fixup() will call make_pages_present() in the hugetlbfs vma range
265 huge pages and populate the ptes. 258 to allocate the huge pages and populate the ptes.
266 259
2673) vmas with VM_DONTEXPAND|VM_RESERVED are generally user space mappings of 2603) vmas with VM_DONTEXPAND|VM_RESERVED are generally user space mappings of
268 kernel pages, such as the vdso page, relay channel pages, etc. These pages 261 kernel pages, such as the vdso page, relay channel pages, etc. These pages
@@ -322,7 +315,7 @@ __mlock_vma_pages_range()--the same function used to mlock a vma range--
322passing a flag to indicate that munlock() is being performed. 315passing a flag to indicate that munlock() is being performed.
323 316
324Because the vma access protections could have been changed to PROT_NONE after 317Because the vma access protections could have been changed to PROT_NONE after
325faulting in and mlocking some pages, get_user_pages() was unreliable for visiting 318faulting in and mlocking pages, get_user_pages() was unreliable for visiting
326these pages for munlocking. Because we don't want to leave pages mlocked(), 319these pages for munlocking. Because we don't want to leave pages mlocked(),
327get_user_pages() was enhanced to accept a flag to ignore the permissions when 320get_user_pages() was enhanced to accept a flag to ignore the permissions when
328fetching the pages--all of which should be resident as a result of previous 321fetching the pages--all of which should be resident as a result of previous
@@ -416,8 +409,8 @@ Mlocked Pages: munmap()/exit()/exec() System Call Handling
416When unmapping an mlocked region of memory, whether by an explicit call to 409When unmapping an mlocked region of memory, whether by an explicit call to
417munmap() or via an internal unmap from exit() or exec() processing, we must 410munmap() or via an internal unmap from exit() or exec() processing, we must
418munlock the pages if we're removing the last VM_LOCKED vma that maps the pages. 411munlock the pages if we're removing the last VM_LOCKED vma that maps the pages.
419Before the unevictable/mlock changes, mlocking did not mark the pages in any way, 412Before the unevictable/mlock changes, mlocking did not mark the pages in any
420so unmapping them required no processing. 413way, so unmapping them required no processing.
421 414
422To munlock a range of memory under the unevictable/mlock infrastructure, the 415To munlock a range of memory under the unevictable/mlock infrastructure, the
423munmap() hander and task address space tear down function call 416munmap() hander and task address space tear down function call
@@ -517,12 +510,10 @@ couldn't be mlocked.
517Mlocked pages: try_to_munlock() Reverse Map Scan 510Mlocked pages: try_to_munlock() Reverse Map Scan
518 511
519TODO/FIXME: a better name might be page_mlocked()--analogous to the 512TODO/FIXME: a better name might be page_mlocked()--analogous to the
520page_referenced() reverse map walker--especially if we continue to call this 513page_referenced() reverse map walker.
521from shrink_page_list(). See related TODO/FIXME below.
522 514
523When munlock_vma_page()--see "Mlocked Pages: munlock()/munlockall() System 515When munlock_vma_page()--see "Mlocked Pages: munlock()/munlockall()
524Call Handling" above--tries to munlock a page, or when shrink_page_list() 516System Call Handling" above--tries to munlock a page, it needs to
525encounters an anonymous page that is not yet in the swap cache, they need to
526determine whether or not the page is mapped by any VM_LOCKED vma, without 517determine whether or not the page is mapped by any VM_LOCKED vma, without
527actually attempting to unmap all ptes from the page. For this purpose, the 518actually attempting to unmap all ptes from the page. For this purpose, the
528unevictable/mlock infrastructure introduced a variant of try_to_unmap() called 519unevictable/mlock infrastructure introduced a variant of try_to_unmap() called
@@ -535,10 +526,7 @@ for VM_LOCKED vmas. When such a vma is found for anonymous pages and file
535pages mapped in linear VMAs, as in the try_to_unmap() case, the functions 526pages mapped in linear VMAs, as in the try_to_unmap() case, the functions
536attempt to acquire the associated mmap semphore, mlock the page via 527attempt to acquire the associated mmap semphore, mlock the page via
537mlock_vma_page() and return SWAP_MLOCK. This effectively undoes the 528mlock_vma_page() and return SWAP_MLOCK. This effectively undoes the
538pre-clearing of the page's PG_mlocked done by munlock_vma_page() and informs 529pre-clearing of the page's PG_mlocked done by munlock_vma_page.
539shrink_page_list() that the anonymous page should be culled rather than added
540to the swap cache in preparation for a try_to_unmap() that will almost
541certainly fail.
542 530
543If try_to_unmap() is unable to acquire a VM_LOCKED vma's associated mmap 531If try_to_unmap() is unable to acquire a VM_LOCKED vma's associated mmap
544semaphore, it will return SWAP_AGAIN. This will allow shrink_page_list() 532semaphore, it will return SWAP_AGAIN. This will allow shrink_page_list()
@@ -557,10 +545,7 @@ However, the scan can terminate when it encounters a VM_LOCKED vma and can
557successfully acquire the vma's mmap semphore for read and mlock the page. 545successfully acquire the vma's mmap semphore for read and mlock the page.
558Although try_to_munlock() can be called many [very many!] times when 546Although try_to_munlock() can be called many [very many!] times when
559munlock()ing a large region or tearing down a large address space that has been 547munlock()ing a large region or tearing down a large address space that has been
560mlocked via mlockall(), overall this is a fairly rare event. In addition, 548mlocked via mlockall(), overall this is a fairly rare event.
561although shrink_page_list() calls try_to_munlock() for every anonymous page that
562it handles that is not yet in the swap cache, on average anonymous pages will
563have very short reverse map lists.
564 549
565Mlocked Page: Page Reclaim in shrink_*_list() 550Mlocked Page: Page Reclaim in shrink_*_list()
566 551
@@ -588,8 +573,8 @@ Some examples of these unevictable pages on the LRU lists are:
588 munlock_vma_page() was forced to let the page back on to the normal 573 munlock_vma_page() was forced to let the page back on to the normal
589 LRU list for vmscan to handle. 574 LRU list for vmscan to handle.
590 575
591shrink_inactive_list() also culls any unevictable pages that it finds 576shrink_inactive_list() also culls any unevictable pages that it finds on
592on the inactive lists, again diverting them to the appropriate zone's unevictable 577the inactive lists, again diverting them to the appropriate zone's unevictable
593lru list. shrink_inactive_list() should only see SHM_LOCKed pages that became 578lru list. shrink_inactive_list() should only see SHM_LOCKed pages that became
594SHM_LOCKed after shrink_active_list() had moved them to the inactive list, or 579SHM_LOCKed after shrink_active_list() had moved them to the inactive list, or
595pages mapped into VM_LOCKED vmas that munlock_vma_page() couldn't isolate from 580pages mapped into VM_LOCKED vmas that munlock_vma_page() couldn't isolate from
@@ -597,19 +582,7 @@ the lru to recheck via try_to_munlock(). shrink_inactive_list() won't notice
597the latter, but will pass on to shrink_page_list(). 582the latter, but will pass on to shrink_page_list().
598 583
599shrink_page_list() again culls obviously unevictable pages that it could 584shrink_page_list() again culls obviously unevictable pages that it could
600encounter for similar reason to shrink_inactive_list(). As already discussed, 585encounter for similar reason to shrink_inactive_list(). Pages mapped into
601shrink_page_list() proactively looks for anonymous pages that should have
602PG_mlocked set but don't--these would not be detected by page_evictable()--to
603avoid adding them to the swap cache unnecessarily. File pages mapped into
604VM_LOCKED vmas but without PG_mlocked set will make it all the way to 586VM_LOCKED vmas but without PG_mlocked set will make it all the way to
605try_to_unmap(). shrink_page_list() will divert them to the unevictable list when 587try_to_unmap(). shrink_page_list() will divert them to the unevictable list
606try_to_unmap() returns SWAP_MLOCK, as discussed above. 588when try_to_unmap() returns SWAP_MLOCK, as discussed above.
607
608TODO/FIXME: If we can enhance the swap cache to reliably remove entries
609with page_count(page) > 2, as long as all ptes are mapped to the page and
610not the swap entry, we can probably remove the call to try_to_munlock() in
611shrink_page_list() and just remove the page from the swap cache when
612try_to_unmap() returns SWAP_MLOCK. Currently, remove_exclusive_swap_page()
613doesn't seem to allow that.
614
615
diff --git a/Documentation/x86/zero-page.txt b/Documentation/x86/zero-page.txt
index 169ad423a3d1..4f913857b8a2 100644
--- a/Documentation/x86/zero-page.txt
+++ b/Documentation/x86/zero-page.txt
@@ -3,7 +3,7 @@ protocol of kernel. These should be filled by bootloader or 16-bit
3real-mode setup code of the kernel. References/settings to it mainly 3real-mode setup code of the kernel. References/settings to it mainly
4are in: 4are in:
5 5
6 include/asm-x86/bootparam.h 6 arch/x86/include/asm/bootparam.h
7 7
8 8
9Offset Proto Name Meaning 9Offset Proto Name Meaning
diff --git a/MAINTAINERS b/MAINTAINERS
index 141aff67bd6d..094dd52d7309 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -616,7 +616,7 @@ M: mkpetch@internode.on.net
616S: Maintained 616S: Maintained
617 617
618ARM/TOSA MACHINE SUPPORT 618ARM/TOSA MACHINE SUPPORT
619P: Dmitry Baryshkov 619P: Dmitry Eremin-Solenikov
620M: dbaryshkov@gmail.com 620M: dbaryshkov@gmail.com
621P: Dirk Opfer 621P: Dirk Opfer
622M: dirk@opfer-online.de 622M: dirk@opfer-online.de
@@ -1092,11 +1092,8 @@ S: Maintained
1092 1092
1093CHECKPATCH 1093CHECKPATCH
1094P: Andy Whitcroft 1094P: Andy Whitcroft
1095M: apw@shadowen.org 1095M: apw@canonical.com
1096P: Randy Dunlap 1096L: linux-kernel@vger.kernel.org
1097M: rdunlap@xenotime.net
1098P: Joel Schopp
1099M: jschopp@austin.ibm.com
1100S: Supported 1097S: Supported
1101 1098
1102CISCO 10G ETHERNET DRIVER 1099CISCO 10G ETHERNET DRIVER
diff --git a/arch/alpha/include/asm/atomic.h b/arch/alpha/include/asm/atomic.h
index ca88e54dec93..62b363584b2b 100644
--- a/arch/alpha/include/asm/atomic.h
+++ b/arch/alpha/include/asm/atomic.h
@@ -1,6 +1,7 @@
1#ifndef _ALPHA_ATOMIC_H 1#ifndef _ALPHA_ATOMIC_H
2#define _ALPHA_ATOMIC_H 2#define _ALPHA_ATOMIC_H
3 3
4#include <linux/types.h>
4#include <asm/barrier.h> 5#include <asm/barrier.h>
5#include <asm/system.h> 6#include <asm/system.h>
6 7
@@ -13,14 +14,6 @@
13 */ 14 */
14 15
15 16
16/*
17 * Counter is volatile to make sure gcc doesn't try to be clever
18 * and move things around on us. We need to use _exactly_ the address
19 * the user gave us, not some alias that contains the same information.
20 */
21typedef struct { volatile int counter; } atomic_t;
22typedef struct { volatile long counter; } atomic64_t;
23
24#define ATOMIC_INIT(i) ( (atomic_t) { (i) } ) 17#define ATOMIC_INIT(i) ( (atomic_t) { (i) } )
25#define ATOMIC64_INIT(i) ( (atomic64_t) { (i) } ) 18#define ATOMIC64_INIT(i) ( (atomic64_t) { (i) } )
26 19
diff --git a/arch/arm/include/asm/atomic.h b/arch/arm/include/asm/atomic.h
index 325f881ccb50..ee99723b3a6c 100644
--- a/arch/arm/include/asm/atomic.h
+++ b/arch/arm/include/asm/atomic.h
@@ -12,10 +12,9 @@
12#define __ASM_ARM_ATOMIC_H 12#define __ASM_ARM_ATOMIC_H
13 13
14#include <linux/compiler.h> 14#include <linux/compiler.h>
15#include <linux/types.h>
15#include <asm/system.h> 16#include <asm/system.h>
16 17
17typedef struct { volatile int counter; } atomic_t;
18
19#define ATOMIC_INIT(i) { (i) } 18#define ATOMIC_INIT(i) { (i) }
20 19
21#ifdef __KERNEL__ 20#ifdef __KERNEL__
diff --git a/arch/arm/kernel/kprobes.c b/arch/arm/kernel/kprobes.c
index 3f9abe0e9aff..f692efddd449 100644
--- a/arch/arm/kernel/kprobes.c
+++ b/arch/arm/kernel/kprobes.c
@@ -92,9 +92,7 @@ void __kprobes arch_disarm_kprobe(struct kprobe *p)
92void __kprobes arch_remove_kprobe(struct kprobe *p) 92void __kprobes arch_remove_kprobe(struct kprobe *p)
93{ 93{
94 if (p->ainsn.insn) { 94 if (p->ainsn.insn) {
95 mutex_lock(&kprobe_mutex);
96 free_insn_slot(p->ainsn.insn, 0); 95 free_insn_slot(p->ainsn.insn, 0);
97 mutex_unlock(&kprobe_mutex);
98 p->ainsn.insn = NULL; 96 p->ainsn.insn = NULL;
99 } 97 }
100} 98}
diff --git a/arch/arm/mach-s3c2410/include/mach/spi.h b/arch/arm/mach-s3c2410/include/mach/spi.h
index 774f3adfe8ad..1d300fb112b1 100644
--- a/arch/arm/mach-s3c2410/include/mach/spi.h
+++ b/arch/arm/mach-s3c2410/include/mach/spi.h
@@ -14,7 +14,7 @@
14#define __ASM_ARCH_SPI_H __FILE__ 14#define __ASM_ARCH_SPI_H __FILE__
15 15
16struct s3c2410_spi_info { 16struct s3c2410_spi_info {
17 unsigned long pin_cs; /* simple gpio cs */ 17 int pin_cs; /* simple gpio cs */
18 unsigned int num_cs; /* total chipselects */ 18 unsigned int num_cs; /* total chipselects */
19 int bus_num; /* bus number to use. */ 19 int bus_num; /* bus number to use. */
20 20
diff --git a/arch/avr32/include/asm/atomic.h b/arch/avr32/include/asm/atomic.h
index 7ef3862a73d0..318815107748 100644
--- a/arch/avr32/include/asm/atomic.h
+++ b/arch/avr32/include/asm/atomic.h
@@ -14,9 +14,9 @@
14#ifndef __ASM_AVR32_ATOMIC_H 14#ifndef __ASM_AVR32_ATOMIC_H
15#define __ASM_AVR32_ATOMIC_H 15#define __ASM_AVR32_ATOMIC_H
16 16
17#include <linux/types.h>
17#include <asm/system.h> 18#include <asm/system.h>
18 19
19typedef struct { volatile int counter; } atomic_t;
20#define ATOMIC_INIT(i) { (i) } 20#define ATOMIC_INIT(i) { (i) }
21 21
22#define atomic_read(v) ((v)->counter) 22#define atomic_read(v) ((v)->counter)
diff --git a/arch/avr32/kernel/traps.c b/arch/avr32/kernel/traps.c
index 0d987373bc01..d547c8df157d 100644
--- a/arch/avr32/kernel/traps.c
+++ b/arch/avr32/kernel/traps.c
@@ -7,6 +7,7 @@
7 */ 7 */
8 8
9#include <linux/bug.h> 9#include <linux/bug.h>
10#include <linux/hardirq.h>
10#include <linux/init.h> 11#include <linux/init.h>
11#include <linux/kallsyms.h> 12#include <linux/kallsyms.h>
12#include <linux/kdebug.h> 13#include <linux/kdebug.h>
diff --git a/arch/blackfin/include/asm/atomic.h b/arch/blackfin/include/asm/atomic.h
index 7cf508718605..25776c19064b 100644
--- a/arch/blackfin/include/asm/atomic.h
+++ b/arch/blackfin/include/asm/atomic.h
@@ -1,6 +1,7 @@
1#ifndef __ARCH_BLACKFIN_ATOMIC__ 1#ifndef __ARCH_BLACKFIN_ATOMIC__
2#define __ARCH_BLACKFIN_ATOMIC__ 2#define __ARCH_BLACKFIN_ATOMIC__
3 3
4#include <linux/types.h>
4#include <asm/system.h> /* local_irq_XXX() */ 5#include <asm/system.h> /* local_irq_XXX() */
5 6
6/* 7/*
@@ -13,9 +14,6 @@
13 * Tony Kou (tonyko@lineo.ca) Lineo Inc. 2001 14 * Tony Kou (tonyko@lineo.ca) Lineo Inc. 2001
14 */ 15 */
15 16
16typedef struct {
17 int counter;
18} atomic_t;
19#define ATOMIC_INIT(i) { (i) } 17#define ATOMIC_INIT(i) { (i) }
20 18
21#define atomic_read(v) ((v)->counter) 19#define atomic_read(v) ((v)->counter)
diff --git a/arch/cris/include/asm/atomic.h b/arch/cris/include/asm/atomic.h
index f71ea686a2ea..5718dd8902a1 100644
--- a/arch/cris/include/asm/atomic.h
+++ b/arch/cris/include/asm/atomic.h
@@ -4,7 +4,7 @@
4#define __ASM_CRIS_ATOMIC__ 4#define __ASM_CRIS_ATOMIC__
5 5
6#include <linux/compiler.h> 6#include <linux/compiler.h>
7 7#include <linux/types.h>
8#include <asm/system.h> 8#include <asm/system.h>
9#include <arch/atomic.h> 9#include <arch/atomic.h>
10 10
@@ -13,8 +13,6 @@
13 * resource counting etc.. 13 * resource counting etc..
14 */ 14 */
15 15
16typedef struct { volatile int counter; } atomic_t;
17
18#define ATOMIC_INIT(i) { (i) } 16#define ATOMIC_INIT(i) { (i) }
19 17
20#define atomic_read(v) ((v)->counter) 18#define atomic_read(v) ((v)->counter)
diff --git a/arch/h8300/include/asm/atomic.h b/arch/h8300/include/asm/atomic.h
index b4cf0ea97ede..833186c8dc3b 100644
--- a/arch/h8300/include/asm/atomic.h
+++ b/arch/h8300/include/asm/atomic.h
@@ -1,12 +1,13 @@
1#ifndef __ARCH_H8300_ATOMIC__ 1#ifndef __ARCH_H8300_ATOMIC__
2#define __ARCH_H8300_ATOMIC__ 2#define __ARCH_H8300_ATOMIC__
3 3
4#include <linux/types.h>
5
4/* 6/*
5 * Atomic operations that C can't guarantee us. Useful for 7 * Atomic operations that C can't guarantee us. Useful for
6 * resource counting etc.. 8 * resource counting etc..
7 */ 9 */
8 10
9typedef struct { int counter; } atomic_t;
10#define ATOMIC_INIT(i) { (i) } 11#define ATOMIC_INIT(i) { (i) }
11 12
12#define atomic_read(v) ((v)->counter) 13#define atomic_read(v) ((v)->counter)
diff --git a/arch/ia64/include/asm/atomic.h b/arch/ia64/include/asm/atomic.h
index 50c2b83fd5a0..d37292bd9875 100644
--- a/arch/ia64/include/asm/atomic.h
+++ b/arch/ia64/include/asm/atomic.h
@@ -17,12 +17,6 @@
17#include <asm/intrinsics.h> 17#include <asm/intrinsics.h>
18#include <asm/system.h> 18#include <asm/system.h>
19 19
20/*
21 * On IA-64, counter must always be volatile to ensure that that the
22 * memory accesses are ordered.
23 */
24typedef struct { volatile __s32 counter; } atomic_t;
25typedef struct { volatile __s64 counter; } atomic64_t;
26 20
27#define ATOMIC_INIT(i) ((atomic_t) { (i) }) 21#define ATOMIC_INIT(i) ((atomic_t) { (i) })
28#define ATOMIC64_INIT(i) ((atomic64_t) { (i) }) 22#define ATOMIC64_INIT(i) ((atomic64_t) { (i) })
diff --git a/arch/ia64/kernel/kprobes.c b/arch/ia64/kernel/kprobes.c
index f07688da947c..097b84d54e73 100644
--- a/arch/ia64/kernel/kprobes.c
+++ b/arch/ia64/kernel/kprobes.c
@@ -670,9 +670,11 @@ void __kprobes arch_disarm_kprobe(struct kprobe *p)
670 670
671void __kprobes arch_remove_kprobe(struct kprobe *p) 671void __kprobes arch_remove_kprobe(struct kprobe *p)
672{ 672{
673 mutex_lock(&kprobe_mutex); 673 if (p->ainsn.insn) {
674 free_insn_slot(p->ainsn.insn, p->ainsn.inst_flag & INST_FLAG_BOOSTABLE); 674 free_insn_slot(p->ainsn.insn,
675 mutex_unlock(&kprobe_mutex); 675 p->ainsn.inst_flag & INST_FLAG_BOOSTABLE);
676 p->ainsn.insn = NULL;
677 }
676} 678}
677/* 679/*
678 * We are resuming execution after a single step fault, so the pt_regs 680 * We are resuming execution after a single step fault, so the pt_regs
diff --git a/arch/ia64/mm/init.c b/arch/ia64/mm/init.c
index 054bcd9439aa..56e12903973c 100644
--- a/arch/ia64/mm/init.c
+++ b/arch/ia64/mm/init.c
@@ -692,7 +692,7 @@ int arch_add_memory(int nid, u64 start, u64 size)
692 pgdat = NODE_DATA(nid); 692 pgdat = NODE_DATA(nid);
693 693
694 zone = pgdat->node_zones + ZONE_NORMAL; 694 zone = pgdat->node_zones + ZONE_NORMAL;
695 ret = __add_pages(zone, start_pfn, nr_pages); 695 ret = __add_pages(nid, zone, start_pfn, nr_pages);
696 696
697 if (ret) 697 if (ret)
698 printk("%s: Problem encountered in __add_pages() as ret=%d\n", 698 printk("%s: Problem encountered in __add_pages() as ret=%d\n",
diff --git a/arch/m68knommu/include/asm/atomic.h b/arch/m68knommu/include/asm/atomic.h
index d5632a305dae..6bb674855a3f 100644
--- a/arch/m68knommu/include/asm/atomic.h
+++ b/arch/m68knommu/include/asm/atomic.h
@@ -1,6 +1,7 @@
1#ifndef __ARCH_M68KNOMMU_ATOMIC__ 1#ifndef __ARCH_M68KNOMMU_ATOMIC__
2#define __ARCH_M68KNOMMU_ATOMIC__ 2#define __ARCH_M68KNOMMU_ATOMIC__
3 3
4#include <linux/types.h>
4#include <asm/system.h> 5#include <asm/system.h>
5 6
6/* 7/*
@@ -12,7 +13,6 @@
12 * We do not have SMP m68k systems, so we don't have to deal with that. 13 * We do not have SMP m68k systems, so we don't have to deal with that.
13 */ 14 */
14 15
15typedef struct { int counter; } atomic_t;
16#define ATOMIC_INIT(i) { (i) } 16#define ATOMIC_INIT(i) { (i) }
17 17
18#define atomic_read(v) ((v)->counter) 18#define atomic_read(v) ((v)->counter)
diff --git a/arch/mips/include/asm/atomic.h b/arch/mips/include/asm/atomic.h
index 1232be3885b0..c996c3b4d074 100644
--- a/arch/mips/include/asm/atomic.h
+++ b/arch/mips/include/asm/atomic.h
@@ -15,13 +15,12 @@
15#define _ASM_ATOMIC_H 15#define _ASM_ATOMIC_H
16 16
17#include <linux/irqflags.h> 17#include <linux/irqflags.h>
18#include <linux/types.h>
18#include <asm/barrier.h> 19#include <asm/barrier.h>
19#include <asm/cpu-features.h> 20#include <asm/cpu-features.h>
20#include <asm/war.h> 21#include <asm/war.h>
21#include <asm/system.h> 22#include <asm/system.h>
22 23
23typedef struct { volatile int counter; } atomic_t;
24
25#define ATOMIC_INIT(i) { (i) } 24#define ATOMIC_INIT(i) { (i) }
26 25
27/* 26/*
@@ -404,8 +403,6 @@ static __inline__ int atomic_add_unless(atomic_t *v, int a, int u)
404 403
405#ifdef CONFIG_64BIT 404#ifdef CONFIG_64BIT
406 405
407typedef struct { volatile long counter; } atomic64_t;
408
409#define ATOMIC64_INIT(i) { (i) } 406#define ATOMIC64_INIT(i) { (i) }
410 407
411/* 408/*
diff --git a/arch/parisc/include/asm/atomic.h b/arch/parisc/include/asm/atomic.h
index 57fcc4a5ebb4..edbfe25c5fc1 100644
--- a/arch/parisc/include/asm/atomic.h
+++ b/arch/parisc/include/asm/atomic.h
@@ -155,14 +155,11 @@ static inline unsigned long __cmpxchg_local(volatile void *ptr,
155#define cmpxchg64_local(ptr, o, n) __cmpxchg64_local_generic((ptr), (o), (n)) 155#define cmpxchg64_local(ptr, o, n) __cmpxchg64_local_generic((ptr), (o), (n))
156#endif 156#endif
157 157
158/* Note that we need not lock read accesses - aligned word writes/reads 158/*
159 * are atomic, so a reader never sees unconsistent values. 159 * Note that we need not lock read accesses - aligned word writes/reads
160 * 160 * are atomic, so a reader never sees inconsistent values.
161 * Cache-line alignment would conflict with, for example, linux/module.h
162 */ 161 */
163 162
164typedef struct { volatile int counter; } atomic_t;
165
166/* It's possible to reduce all atomic operations to either 163/* It's possible to reduce all atomic operations to either
167 * __atomic_add_return, atomic_set and atomic_read (the latter 164 * __atomic_add_return, atomic_set and atomic_read (the latter
168 * is there only for consistency). 165 * is there only for consistency).
@@ -260,8 +257,6 @@ static __inline__ int atomic_add_unless(atomic_t *v, int a, int u)
260 257
261#ifdef CONFIG_64BIT 258#ifdef CONFIG_64BIT
262 259
263typedef struct { volatile s64 counter; } atomic64_t;
264
265#define ATOMIC64_INIT(i) ((atomic64_t) { (i) }) 260#define ATOMIC64_INIT(i) ((atomic64_t) { (i) })
266 261
267static __inline__ int 262static __inline__ int
diff --git a/arch/powerpc/include/asm/atomic.h b/arch/powerpc/include/asm/atomic.h
index 499be5bdd6fa..b401950f5259 100644
--- a/arch/powerpc/include/asm/atomic.h
+++ b/arch/powerpc/include/asm/atomic.h
@@ -5,7 +5,7 @@
5 * PowerPC atomic operations 5 * PowerPC atomic operations
6 */ 6 */
7 7
8typedef struct { int counter; } atomic_t; 8#include <linux/types.h>
9 9
10#ifdef __KERNEL__ 10#ifdef __KERNEL__
11#include <linux/compiler.h> 11#include <linux/compiler.h>
@@ -251,8 +251,6 @@ static __inline__ int atomic_dec_if_positive(atomic_t *v)
251 251
252#ifdef __powerpc64__ 252#ifdef __powerpc64__
253 253
254typedef struct { long counter; } atomic64_t;
255
256#define ATOMIC64_INIT(i) { (i) } 254#define ATOMIC64_INIT(i) { (i) }
257 255
258static __inline__ long atomic64_read(const atomic64_t *v) 256static __inline__ long atomic64_read(const atomic64_t *v)
diff --git a/arch/powerpc/include/asm/hugetlb.h b/arch/powerpc/include/asm/hugetlb.h
index 26f0d0ab27a5..b1dafb6a9743 100644
--- a/arch/powerpc/include/asm/hugetlb.h
+++ b/arch/powerpc/include/asm/hugetlb.h
@@ -18,6 +18,12 @@ pte_t huge_ptep_get_and_clear(struct mm_struct *mm, unsigned long addr,
18 pte_t *ptep); 18 pte_t *ptep);
19 19
20/* 20/*
21 * The version of vma_mmu_pagesize() in arch/powerpc/mm/hugetlbpage.c needs
22 * to override the version in mm/hugetlb.c
23 */
24#define vma_mmu_pagesize vma_mmu_pagesize
25
26/*
21 * If the arch doesn't supply something else, assume that hugepage 27 * If the arch doesn't supply something else, assume that hugepage
22 * size aligned regions are ok without further preparation. 28 * size aligned regions are ok without further preparation.
23 */ 29 */
diff --git a/arch/powerpc/kernel/kprobes.c b/arch/powerpc/kernel/kprobes.c
index de79915452c8..989edcdf0297 100644
--- a/arch/powerpc/kernel/kprobes.c
+++ b/arch/powerpc/kernel/kprobes.c
@@ -96,9 +96,10 @@ void __kprobes arch_disarm_kprobe(struct kprobe *p)
96 96
97void __kprobes arch_remove_kprobe(struct kprobe *p) 97void __kprobes arch_remove_kprobe(struct kprobe *p)
98{ 98{
99 mutex_lock(&kprobe_mutex); 99 if (p->ainsn.insn) {
100 free_insn_slot(p->ainsn.insn, 0); 100 free_insn_slot(p->ainsn.insn, 0);
101 mutex_unlock(&kprobe_mutex); 101 p->ainsn.insn = NULL;
102 }
102} 103}
103 104
104static void __kprobes prepare_singlestep(struct kprobe *p, struct pt_regs *regs) 105static void __kprobes prepare_singlestep(struct kprobe *p, struct pt_regs *regs)
diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c
index 201c7a5486cb..9920d6a7cf29 100644
--- a/arch/powerpc/mm/hugetlbpage.c
+++ b/arch/powerpc/mm/hugetlbpage.c
@@ -512,6 +512,13 @@ unsigned long hugetlb_get_unmapped_area(struct file *file, unsigned long addr,
512 return slice_get_unmapped_area(addr, len, flags, mmu_psize, 1, 0); 512 return slice_get_unmapped_area(addr, len, flags, mmu_psize, 1, 0);
513} 513}
514 514
515unsigned long vma_mmu_pagesize(struct vm_area_struct *vma)
516{
517 unsigned int psize = get_slice_psize(vma->vm_mm, vma->vm_start);
518
519 return 1UL << mmu_psize_to_shift(psize);
520}
521
515/* 522/*
516 * Called by asm hashtable.S for doing lazy icache flush 523 * Called by asm hashtable.S for doing lazy icache flush
517 */ 524 */
diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c
index 53b06ebb3f2f..f00f09a77f12 100644
--- a/arch/powerpc/mm/mem.c
+++ b/arch/powerpc/mm/mem.c
@@ -132,7 +132,7 @@ int arch_add_memory(int nid, u64 start, u64 size)
132 /* this should work for most non-highmem platforms */ 132 /* this should work for most non-highmem platforms */
133 zone = pgdata->node_zones; 133 zone = pgdata->node_zones;
134 134
135 return __add_pages(zone, start_pfn, nr_pages); 135 return __add_pages(nid, zone, start_pfn, nr_pages);
136} 136}
137#endif /* CONFIG_MEMORY_HOTPLUG */ 137#endif /* CONFIG_MEMORY_HOTPLUG */
138 138
diff --git a/arch/s390/include/asm/atomic.h b/arch/s390/include/asm/atomic.h
index 2d184655bc5d..de432f2de2d2 100644
--- a/arch/s390/include/asm/atomic.h
+++ b/arch/s390/include/asm/atomic.h
@@ -2,6 +2,7 @@
2#define __ARCH_S390_ATOMIC__ 2#define __ARCH_S390_ATOMIC__
3 3
4#include <linux/compiler.h> 4#include <linux/compiler.h>
5#include <linux/types.h>
5 6
6/* 7/*
7 * include/asm-s390/atomic.h 8 * include/asm-s390/atomic.h
@@ -23,9 +24,6 @@
23 * S390 uses 'Compare And Swap' for atomicity in SMP enviroment 24 * S390 uses 'Compare And Swap' for atomicity in SMP enviroment
24 */ 25 */
25 26
26typedef struct {
27 int counter;
28} __attribute__ ((aligned (4))) atomic_t;
29#define ATOMIC_INIT(i) { (i) } 27#define ATOMIC_INIT(i) { (i) }
30 28
31#ifdef __KERNEL__ 29#ifdef __KERNEL__
@@ -149,9 +147,6 @@ static __inline__ int atomic_add_unless(atomic_t *v, int a, int u)
149#undef __CS_LOOP 147#undef __CS_LOOP
150 148
151#ifdef __s390x__ 149#ifdef __s390x__
152typedef struct {
153 long long counter;
154} __attribute__ ((aligned (8))) atomic64_t;
155#define ATOMIC64_INIT(i) { (i) } 150#define ATOMIC64_INIT(i) { (i) }
156 151
157#if __GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ > 2) 152#if __GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ > 2)
diff --git a/arch/s390/kernel/kprobes.c b/arch/s390/kernel/kprobes.c
index 569079ec4ff0..9b92856632cf 100644
--- a/arch/s390/kernel/kprobes.c
+++ b/arch/s390/kernel/kprobes.c
@@ -218,9 +218,10 @@ void __kprobes arch_disarm_kprobe(struct kprobe *p)
218 218
219void __kprobes arch_remove_kprobe(struct kprobe *p) 219void __kprobes arch_remove_kprobe(struct kprobe *p)
220{ 220{
221 mutex_lock(&kprobe_mutex); 221 if (p->ainsn.insn) {
222 free_insn_slot(p->ainsn.insn, 0); 222 free_insn_slot(p->ainsn.insn, 0);
223 mutex_unlock(&kprobe_mutex); 223 p->ainsn.insn = NULL;
224 }
224} 225}
225 226
226static void __kprobes prepare_singlestep(struct kprobe *p, struct pt_regs *regs) 227static void __kprobes prepare_singlestep(struct kprobe *p, struct pt_regs *regs)
diff --git a/arch/s390/mm/init.c b/arch/s390/mm/init.c
index 158b0d6d7046..f0258ca3b17e 100644
--- a/arch/s390/mm/init.c
+++ b/arch/s390/mm/init.c
@@ -183,7 +183,7 @@ int arch_add_memory(int nid, u64 start, u64 size)
183 rc = vmem_add_mapping(start, size); 183 rc = vmem_add_mapping(start, size);
184 if (rc) 184 if (rc)
185 return rc; 185 return rc;
186 rc = __add_pages(zone, PFN_DOWN(start), PFN_DOWN(size)); 186 rc = __add_pages(nid, zone, PFN_DOWN(start), PFN_DOWN(size));
187 if (rc) 187 if (rc)
188 vmem_remove_mapping(start, size); 188 vmem_remove_mapping(start, size);
189 return rc; 189 return rc;
diff --git a/arch/sh/include/asm/atomic.h b/arch/sh/include/asm/atomic.h
index c043ef003028..6327ffbb1992 100644
--- a/arch/sh/include/asm/atomic.h
+++ b/arch/sh/include/asm/atomic.h
@@ -7,16 +7,15 @@
7 * 7 *
8 */ 8 */
9 9
10typedef struct { volatile int counter; } atomic_t; 10#include <linux/compiler.h>
11#include <linux/types.h>
12#include <asm/system.h>
11 13
12#define ATOMIC_INIT(i) ( (atomic_t) { (i) } ) 14#define ATOMIC_INIT(i) ( (atomic_t) { (i) } )
13 15
14#define atomic_read(v) ((v)->counter) 16#define atomic_read(v) ((v)->counter)
15#define atomic_set(v,i) ((v)->counter = (i)) 17#define atomic_set(v,i) ((v)->counter = (i))
16 18
17#include <linux/compiler.h>
18#include <asm/system.h>
19
20#if defined(CONFIG_GUSA_RB) 19#if defined(CONFIG_GUSA_RB)
21#include <asm/atomic-grb.h> 20#include <asm/atomic-grb.h>
22#elif defined(CONFIG_CPU_SH4A) 21#elif defined(CONFIG_CPU_SH4A)
diff --git a/arch/sh/kernel/traps_32.c b/arch/sh/kernel/traps_32.c
index 88807a2aacc3..c0aa3d83ec0e 100644
--- a/arch/sh/kernel/traps_32.c
+++ b/arch/sh/kernel/traps_32.c
@@ -13,6 +13,7 @@
13 */ 13 */
14#include <linux/kernel.h> 14#include <linux/kernel.h>
15#include <linux/ptrace.h> 15#include <linux/ptrace.h>
16#include <linux/hardirq.h>
16#include <linux/init.h> 17#include <linux/init.h>
17#include <linux/spinlock.h> 18#include <linux/spinlock.h>
18#include <linux/module.h> 19#include <linux/module.h>
diff --git a/arch/sh/mm/init.c b/arch/sh/mm/init.c
index 6cbef8caeb56..3edf297c829b 100644
--- a/arch/sh/mm/init.c
+++ b/arch/sh/mm/init.c
@@ -311,7 +311,8 @@ int arch_add_memory(int nid, u64 start, u64 size)
311 pgdat = NODE_DATA(nid); 311 pgdat = NODE_DATA(nid);
312 312
313 /* We only have ZONE_NORMAL, so this is easy.. */ 313 /* We only have ZONE_NORMAL, so this is easy.. */
314 ret = __add_pages(pgdat->node_zones + ZONE_NORMAL, start_pfn, nr_pages); 314 ret = __add_pages(nid, pgdat->node_zones + ZONE_NORMAL,
315 start_pfn, nr_pages);
315 if (unlikely(ret)) 316 if (unlikely(ret))
316 printk("%s: Failed, __add_pages() == %d\n", __func__, ret); 317 printk("%s: Failed, __add_pages() == %d\n", __func__, ret);
317 318
diff --git a/arch/sparc/include/asm/atomic_32.h b/arch/sparc/include/asm/atomic_32.h
index 5c944b5a8040..ce465975a6a5 100644
--- a/arch/sparc/include/asm/atomic_32.h
+++ b/arch/sparc/include/asm/atomic_32.h
@@ -13,8 +13,6 @@
13 13
14#include <linux/types.h> 14#include <linux/types.h>
15 15
16typedef struct { volatile int counter; } atomic_t;
17
18#ifdef __KERNEL__ 16#ifdef __KERNEL__
19 17
20#define ATOMIC_INIT(i) { (i) } 18#define ATOMIC_INIT(i) { (i) }
diff --git a/arch/sparc/include/asm/atomic_64.h b/arch/sparc/include/asm/atomic_64.h
index 5982c5ae7f07..a0a706492696 100644
--- a/arch/sparc/include/asm/atomic_64.h
+++ b/arch/sparc/include/asm/atomic_64.h
@@ -10,9 +10,6 @@
10#include <linux/types.h> 10#include <linux/types.h>
11#include <asm/system.h> 11#include <asm/system.h>
12 12
13typedef struct { volatile int counter; } atomic_t;
14typedef struct { volatile __s64 counter; } atomic64_t;
15
16#define ATOMIC_INIT(i) { (i) } 13#define ATOMIC_INIT(i) { (i) }
17#define ATOMIC64_INIT(i) { (i) } 14#define ATOMIC64_INIT(i) { (i) }
18 15
diff --git a/arch/um/kernel/trap.c b/arch/um/kernel/trap.c
index 44e490419495..7384d8accfe7 100644
--- a/arch/um/kernel/trap.c
+++ b/arch/um/kernel/trap.c
@@ -64,11 +64,10 @@ good_area:
64 64
65 do { 65 do {
66 int fault; 66 int fault;
67survive: 67
68 fault = handle_mm_fault(mm, vma, address, is_write); 68 fault = handle_mm_fault(mm, vma, address, is_write);
69 if (unlikely(fault & VM_FAULT_ERROR)) { 69 if (unlikely(fault & VM_FAULT_ERROR)) {
70 if (fault & VM_FAULT_OOM) { 70 if (fault & VM_FAULT_OOM) {
71 err = -ENOMEM;
72 goto out_of_memory; 71 goto out_of_memory;
73 } else if (fault & VM_FAULT_SIGBUS) { 72 } else if (fault & VM_FAULT_SIGBUS) {
74 err = -EACCES; 73 err = -EACCES;
@@ -104,18 +103,14 @@ out:
104out_nosemaphore: 103out_nosemaphore:
105 return err; 104 return err;
106 105
107/*
108 * We ran out of memory, or some other thing happened to us that made
109 * us unable to handle the page fault gracefully.
110 */
111out_of_memory: 106out_of_memory:
112 if (is_global_init(current)) { 107 /*
113 up_read(&mm->mmap_sem); 108 * We ran out of memory, call the OOM killer, and return the userspace
114 yield(); 109 * (which will retry the fault, or kill us if we got oom-killed).
115 down_read(&mm->mmap_sem); 110 */
116 goto survive; 111 up_read(&mm->mmap_sem);
117 } 112 pagefault_out_of_memory();
118 goto out; 113 return 0;
119} 114}
120 115
121static void bad_segv(struct faultinfo fi, unsigned long ip) 116static void bad_segv(struct faultinfo fi, unsigned long ip)
@@ -214,9 +209,6 @@ unsigned long segv(struct faultinfo fi, unsigned long ip, int is_user,
214 si.si_addr = (void __user *)address; 209 si.si_addr = (void __user *)address;
215 current->thread.arch.faultinfo = fi; 210 current->thread.arch.faultinfo = fi;
216 force_sig_info(SIGBUS, &si, current); 211 force_sig_info(SIGBUS, &si, current);
217 } else if (err == -ENOMEM) {
218 printk(KERN_INFO "VM: killing process %s\n", current->comm);
219 do_exit(SIGKILL);
220 } else { 212 } else {
221 BUG_ON(err != -EFAULT); 213 BUG_ON(err != -EFAULT);
222 si.si_signo = SIGSEGV; 214 si.si_signo = SIGSEGV;
diff --git a/arch/x86/include/asm/atomic_32.h b/arch/x86/include/asm/atomic_32.h
index ad5b9f6ecddf..85b46fba4229 100644
--- a/arch/x86/include/asm/atomic_32.h
+++ b/arch/x86/include/asm/atomic_32.h
@@ -2,6 +2,7 @@
2#define _ASM_X86_ATOMIC_32_H 2#define _ASM_X86_ATOMIC_32_H
3 3
4#include <linux/compiler.h> 4#include <linux/compiler.h>
5#include <linux/types.h>
5#include <asm/processor.h> 6#include <asm/processor.h>
6#include <asm/cmpxchg.h> 7#include <asm/cmpxchg.h>
7 8
@@ -10,15 +11,6 @@
10 * resource counting etc.. 11 * resource counting etc..
11 */ 12 */
12 13
13/*
14 * Make sure gcc doesn't try to be clever and move things around
15 * on us. We need to use _exactly_ the address the user gave us,
16 * not some alias that contains the same information.
17 */
18typedef struct {
19 int counter;
20} atomic_t;
21
22#define ATOMIC_INIT(i) { (i) } 14#define ATOMIC_INIT(i) { (i) }
23 15
24/** 16/**
diff --git a/arch/x86/include/asm/atomic_64.h b/arch/x86/include/asm/atomic_64.h
index 279d2a731f3f..8c21731984da 100644
--- a/arch/x86/include/asm/atomic_64.h
+++ b/arch/x86/include/asm/atomic_64.h
@@ -1,25 +1,15 @@
1#ifndef _ASM_X86_ATOMIC_64_H 1#ifndef _ASM_X86_ATOMIC_64_H
2#define _ASM_X86_ATOMIC_64_H 2#define _ASM_X86_ATOMIC_64_H
3 3
4#include <linux/types.h>
4#include <asm/alternative.h> 5#include <asm/alternative.h>
5#include <asm/cmpxchg.h> 6#include <asm/cmpxchg.h>
6 7
7/* atomic_t should be 32 bit signed type */
8
9/* 8/*
10 * Atomic operations that C can't guarantee us. Useful for 9 * Atomic operations that C can't guarantee us. Useful for
11 * resource counting etc.. 10 * resource counting etc..
12 */ 11 */
13 12
14/*
15 * Make sure gcc doesn't try to be clever and move things around
16 * on us. We need to use _exactly_ the address the user gave us,
17 * not some alias that contains the same information.
18 */
19typedef struct {
20 int counter;
21} atomic_t;
22
23#define ATOMIC_INIT(i) { (i) } 13#define ATOMIC_INIT(i) { (i) }
24 14
25/** 15/**
@@ -191,11 +181,7 @@ static inline int atomic_sub_return(int i, atomic_t *v)
191#define atomic_inc_return(v) (atomic_add_return(1, v)) 181#define atomic_inc_return(v) (atomic_add_return(1, v))
192#define atomic_dec_return(v) (atomic_sub_return(1, v)) 182#define atomic_dec_return(v) (atomic_sub_return(1, v))
193 183
194/* An 64bit atomic type */ 184/* The 64-bit atomic type */
195
196typedef struct {
197 long counter;
198} atomic64_t;
199 185
200#define ATOMIC64_INIT(i) { (i) } 186#define ATOMIC64_INIT(i) { (i) }
201 187
diff --git a/arch/x86/include/asm/unwind.h b/arch/x86/include/asm/unwind.h
deleted file mode 100644
index 8b064bd9c553..000000000000
--- a/arch/x86/include/asm/unwind.h
+++ /dev/null
@@ -1,13 +0,0 @@
1#ifndef _ASM_X86_UNWIND_H
2#define _ASM_X86_UNWIND_H
3
4#define UNW_PC(frame) ((void)(frame), 0UL)
5#define UNW_SP(frame) ((void)(frame), 0UL)
6#define UNW_FP(frame) ((void)(frame), 0UL)
7
8static inline int arch_unw_user_mode(const void *info)
9{
10 return 0;
11}
12
13#endif /* _ASM_X86_UNWIND_H */
diff --git a/arch/x86/kernel/kprobes.c b/arch/x86/kernel/kprobes.c
index 6c27679ec6aa..eead6f8f9218 100644
--- a/arch/x86/kernel/kprobes.c
+++ b/arch/x86/kernel/kprobes.c
@@ -376,9 +376,10 @@ void __kprobes arch_disarm_kprobe(struct kprobe *p)
376 376
377void __kprobes arch_remove_kprobe(struct kprobe *p) 377void __kprobes arch_remove_kprobe(struct kprobe *p)
378{ 378{
379 mutex_lock(&kprobe_mutex); 379 if (p->ainsn.insn) {
380 free_insn_slot(p->ainsn.insn, (p->ainsn.boostable == 1)); 380 free_insn_slot(p->ainsn.insn, (p->ainsn.boostable == 1));
381 mutex_unlock(&kprobe_mutex); 381 p->ainsn.insn = NULL;
382 }
382} 383}
383 384
384static void __kprobes save_previous_kprobe(struct kprobe_ctlblk *kcb) 385static void __kprobes save_previous_kprobe(struct kprobe_ctlblk *kcb)
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index ce6650eb64e9..c9a666cdd3db 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -20,7 +20,6 @@
20#include <linux/module.h> 20#include <linux/module.h>
21#include <linux/ptrace.h> 21#include <linux/ptrace.h>
22#include <linux/string.h> 22#include <linux/string.h>
23#include <linux/unwind.h>
24#include <linux/delay.h> 23#include <linux/delay.h>
25#include <linux/errno.h> 24#include <linux/errno.h>
26#include <linux/kexec.h> 25#include <linux/kexec.h>
@@ -51,7 +50,6 @@
51#include <asm/debugreg.h> 50#include <asm/debugreg.h>
52#include <asm/atomic.h> 51#include <asm/atomic.h>
53#include <asm/system.h> 52#include <asm/system.h>
54#include <asm/unwind.h>
55#include <asm/traps.h> 53#include <asm/traps.h>
56#include <asm/desc.h> 54#include <asm/desc.h>
57#include <asm/i387.h> 55#include <asm/i387.h>
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index 57ec8c86a877..9e268b6b204e 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -667,7 +667,6 @@ void __kprobes do_page_fault(struct pt_regs *regs, unsigned long error_code)
667 if (unlikely(in_atomic() || !mm)) 667 if (unlikely(in_atomic() || !mm))
668 goto bad_area_nosemaphore; 668 goto bad_area_nosemaphore;
669 669
670again:
671 /* 670 /*
672 * When running in the kernel we expect faults to occur only to 671 * When running in the kernel we expect faults to occur only to
673 * addresses in user space. All other faults represent errors in the 672 * addresses in user space. All other faults represent errors in the
@@ -859,25 +858,14 @@ no_context:
859 oops_end(flags, regs, sig); 858 oops_end(flags, regs, sig);
860#endif 859#endif
861 860
862/*
863 * We ran out of memory, or some other thing happened to us that made
864 * us unable to handle the page fault gracefully.
865 */
866out_of_memory: 861out_of_memory:
862 /*
863 * We ran out of memory, call the OOM killer, and return the userspace
864 * (which will retry the fault, or kill us if we got oom-killed).
865 */
867 up_read(&mm->mmap_sem); 866 up_read(&mm->mmap_sem);
868 if (is_global_init(tsk)) { 867 pagefault_out_of_memory();
869 yield(); 868 return;
870 /*
871 * Re-lookup the vma - in theory the vma tree might
872 * have changed:
873 */
874 goto again;
875 }
876
877 printk("VM: killing process %s\n", tsk->comm);
878 if (error_code & PF_USER)
879 do_group_exit(SIGKILL);
880 goto no_context;
881 869
882do_sigbus: 870do_sigbus:
883 up_read(&mm->mmap_sem); 871 up_read(&mm->mmap_sem);
diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c
index f99a6c6c432e..544d724caeee 100644
--- a/arch/x86/mm/init_32.c
+++ b/arch/x86/mm/init_32.c
@@ -1079,7 +1079,7 @@ int arch_add_memory(int nid, u64 start, u64 size)
1079 unsigned long start_pfn = start >> PAGE_SHIFT; 1079 unsigned long start_pfn = start >> PAGE_SHIFT;
1080 unsigned long nr_pages = size >> PAGE_SHIFT; 1080 unsigned long nr_pages = size >> PAGE_SHIFT;
1081 1081
1082 return __add_pages(zone, start_pfn, nr_pages); 1082 return __add_pages(nid, zone, start_pfn, nr_pages);
1083} 1083}
1084#endif 1084#endif
1085 1085
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index 9f7a0d24d42a..54c437e96541 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -857,7 +857,7 @@ int arch_add_memory(int nid, u64 start, u64 size)
857 if (last_mapped_pfn > max_pfn_mapped) 857 if (last_mapped_pfn > max_pfn_mapped)
858 max_pfn_mapped = last_mapped_pfn; 858 max_pfn_mapped = last_mapped_pfn;
859 859
860 ret = __add_pages(zone, start_pfn, nr_pages); 860 ret = __add_pages(nid, zone, start_pfn, nr_pages);
861 WARN_ON_ONCE(ret); 861 WARN_ON_ONCE(ret);
862 862
863 return ret; 863 return ret;
diff --git a/drivers/acpi/processor_idle.c b/drivers/acpi/processor_idle.c
index 38aca048e951..66a9d8145562 100644
--- a/drivers/acpi/processor_idle.c
+++ b/drivers/acpi/processor_idle.c
@@ -41,6 +41,7 @@
41#include <linux/pm_qos_params.h> 41#include <linux/pm_qos_params.h>
42#include <linux/clockchips.h> 42#include <linux/clockchips.h>
43#include <linux/cpuidle.h> 43#include <linux/cpuidle.h>
44#include <linux/irqflags.h>
44 45
45/* 46/*
46 * Include the apic definitions for x86 to have the APIC timer related defines 47 * Include the apic definitions for x86 to have the APIC timer related defines
diff --git a/drivers/base/memory.c b/drivers/base/memory.c
index 5260e9e0df48..989429cfed88 100644
--- a/drivers/base/memory.c
+++ b/drivers/base/memory.c
@@ -347,8 +347,9 @@ static inline int memory_probe_init(void)
347 * section belongs to... 347 * section belongs to...
348 */ 348 */
349 349
350static int add_memory_block(unsigned long node_id, struct mem_section *section, 350static int add_memory_block(int nid, struct mem_section *section,
351 unsigned long state, int phys_device) 351 unsigned long state, int phys_device,
352 enum mem_add_context context)
352{ 353{
353 struct memory_block *mem = kzalloc(sizeof(*mem), GFP_KERNEL); 354 struct memory_block *mem = kzalloc(sizeof(*mem), GFP_KERNEL);
354 int ret = 0; 355 int ret = 0;
@@ -370,6 +371,10 @@ static int add_memory_block(unsigned long node_id, struct mem_section *section,
370 ret = mem_create_simple_file(mem, phys_device); 371 ret = mem_create_simple_file(mem, phys_device);
371 if (!ret) 372 if (!ret)
372 ret = mem_create_simple_file(mem, removable); 373 ret = mem_create_simple_file(mem, removable);
374 if (!ret) {
375 if (context == HOTPLUG)
376 ret = register_mem_sect_under_node(mem, nid);
377 }
373 378
374 return ret; 379 return ret;
375} 380}
@@ -382,7 +387,7 @@ static int add_memory_block(unsigned long node_id, struct mem_section *section,
382 * 387 *
383 * This could be made generic for all sysdev classes. 388 * This could be made generic for all sysdev classes.
384 */ 389 */
385static struct memory_block *find_memory_block(struct mem_section *section) 390struct memory_block *find_memory_block(struct mem_section *section)
386{ 391{
387 struct kobject *kobj; 392 struct kobject *kobj;
388 struct sys_device *sysdev; 393 struct sys_device *sysdev;
@@ -411,6 +416,7 @@ int remove_memory_block(unsigned long node_id, struct mem_section *section,
411 struct memory_block *mem; 416 struct memory_block *mem;
412 417
413 mem = find_memory_block(section); 418 mem = find_memory_block(section);
419 unregister_mem_sect_under_nodes(mem);
414 mem_remove_simple_file(mem, phys_index); 420 mem_remove_simple_file(mem, phys_index);
415 mem_remove_simple_file(mem, state); 421 mem_remove_simple_file(mem, state);
416 mem_remove_simple_file(mem, phys_device); 422 mem_remove_simple_file(mem, phys_device);
@@ -424,9 +430,9 @@ int remove_memory_block(unsigned long node_id, struct mem_section *section,
424 * need an interface for the VM to add new memory regions, 430 * need an interface for the VM to add new memory regions,
425 * but without onlining it. 431 * but without onlining it.
426 */ 432 */
427int register_new_memory(struct mem_section *section) 433int register_new_memory(int nid, struct mem_section *section)
428{ 434{
429 return add_memory_block(0, section, MEM_OFFLINE, 0); 435 return add_memory_block(nid, section, MEM_OFFLINE, 0, HOTPLUG);
430} 436}
431 437
432int unregister_memory_section(struct mem_section *section) 438int unregister_memory_section(struct mem_section *section)
@@ -458,7 +464,8 @@ int __init memory_dev_init(void)
458 for (i = 0; i < NR_MEM_SECTIONS; i++) { 464 for (i = 0; i < NR_MEM_SECTIONS; i++) {
459 if (!present_section_nr(i)) 465 if (!present_section_nr(i))
460 continue; 466 continue;
461 err = add_memory_block(0, __nr_to_section(i), MEM_ONLINE, 0); 467 err = add_memory_block(0, __nr_to_section(i), MEM_ONLINE,
468 0, BOOT);
462 if (!ret) 469 if (!ret)
463 ret = err; 470 ret = err;
464 } 471 }
diff --git a/drivers/base/node.c b/drivers/base/node.c
index 91636cd8b6c9..43fa90b837ee 100644
--- a/drivers/base/node.c
+++ b/drivers/base/node.c
@@ -6,6 +6,7 @@
6#include <linux/module.h> 6#include <linux/module.h>
7#include <linux/init.h> 7#include <linux/init.h>
8#include <linux/mm.h> 8#include <linux/mm.h>
9#include <linux/memory.h>
9#include <linux/node.h> 10#include <linux/node.h>
10#include <linux/hugetlb.h> 11#include <linux/hugetlb.h>
11#include <linux/cpumask.h> 12#include <linux/cpumask.h>
@@ -248,6 +249,105 @@ int unregister_cpu_under_node(unsigned int cpu, unsigned int nid)
248 return 0; 249 return 0;
249} 250}
250 251
252#ifdef CONFIG_MEMORY_HOTPLUG_SPARSE
253#define page_initialized(page) (page->lru.next)
254
255static int get_nid_for_pfn(unsigned long pfn)
256{
257 struct page *page;
258
259 if (!pfn_valid_within(pfn))
260 return -1;
261 page = pfn_to_page(pfn);
262 if (!page_initialized(page))
263 return -1;
264 return pfn_to_nid(pfn);
265}
266
267/* register memory section under specified node if it spans that node */
268int register_mem_sect_under_node(struct memory_block *mem_blk, int nid)
269{
270 unsigned long pfn, sect_start_pfn, sect_end_pfn;
271
272 if (!mem_blk)
273 return -EFAULT;
274 if (!node_online(nid))
275 return 0;
276 sect_start_pfn = section_nr_to_pfn(mem_blk->phys_index);
277 sect_end_pfn = sect_start_pfn + PAGES_PER_SECTION - 1;
278 for (pfn = sect_start_pfn; pfn <= sect_end_pfn; pfn++) {
279 int page_nid;
280
281 page_nid = get_nid_for_pfn(pfn);
282 if (page_nid < 0)
283 continue;
284 if (page_nid != nid)
285 continue;
286 return sysfs_create_link_nowarn(&node_devices[nid].sysdev.kobj,
287 &mem_blk->sysdev.kobj,
288 kobject_name(&mem_blk->sysdev.kobj));
289 }
290 /* mem section does not span the specified node */
291 return 0;
292}
293
294/* unregister memory section under all nodes that it spans */
295int unregister_mem_sect_under_nodes(struct memory_block *mem_blk)
296{
297 nodemask_t unlinked_nodes;
298 unsigned long pfn, sect_start_pfn, sect_end_pfn;
299
300 if (!mem_blk)
301 return -EFAULT;
302 nodes_clear(unlinked_nodes);
303 sect_start_pfn = section_nr_to_pfn(mem_blk->phys_index);
304 sect_end_pfn = sect_start_pfn + PAGES_PER_SECTION - 1;
305 for (pfn = sect_start_pfn; pfn <= sect_end_pfn; pfn++) {
306 unsigned int nid;
307
308 nid = get_nid_for_pfn(pfn);
309 if (nid < 0)
310 continue;
311 if (!node_online(nid))
312 continue;
313 if (node_test_and_set(nid, unlinked_nodes))
314 continue;
315 sysfs_remove_link(&node_devices[nid].sysdev.kobj,
316 kobject_name(&mem_blk->sysdev.kobj));
317 }
318 return 0;
319}
320
321static int link_mem_sections(int nid)
322{
323 unsigned long start_pfn = NODE_DATA(nid)->node_start_pfn;
324 unsigned long end_pfn = start_pfn + NODE_DATA(nid)->node_spanned_pages;
325 unsigned long pfn;
326 int err = 0;
327
328 for (pfn = start_pfn; pfn < end_pfn; pfn += PAGES_PER_SECTION) {
329 unsigned long section_nr = pfn_to_section_nr(pfn);
330 struct mem_section *mem_sect;
331 struct memory_block *mem_blk;
332 int ret;
333
334 if (!present_section_nr(section_nr))
335 continue;
336 mem_sect = __nr_to_section(section_nr);
337 mem_blk = find_memory_block(mem_sect);
338 ret = register_mem_sect_under_node(mem_blk, nid);
339 if (!err)
340 err = ret;
341
342 /* discard ref obtained in find_memory_block() */
343 kobject_put(&mem_blk->sysdev.kobj);
344 }
345 return err;
346}
347#else
348static int link_mem_sections(int nid) { return 0; }
349#endif /* CONFIG_MEMORY_HOTPLUG_SPARSE */
350
251int register_one_node(int nid) 351int register_one_node(int nid)
252{ 352{
253 int error = 0; 353 int error = 0;
@@ -267,6 +367,9 @@ int register_one_node(int nid)
267 if (cpu_to_node(cpu) == nid) 367 if (cpu_to_node(cpu) == nid)
268 register_cpu_under_node(cpu, nid); 368 register_cpu_under_node(cpu, nid);
269 } 369 }
370
371 /* link memory sections under this node */
372 error = link_mem_sections(nid);
270 } 373 }
271 374
272 return error; 375 return error;
diff --git a/drivers/char/Kconfig b/drivers/char/Kconfig
index 1697043119bd..35914b6e1d2a 100644
--- a/drivers/char/Kconfig
+++ b/drivers/char/Kconfig
@@ -841,7 +841,7 @@ config JS_RTC
841 841
842config GEN_RTC 842config GEN_RTC
843 tristate "Generic /dev/rtc emulation" 843 tristate "Generic /dev/rtc emulation"
844 depends on RTC!=y && !IA64 && !ARM && !M32R && !MIPS && !SPARC && !FRV && !S390 && !SUPERH && !AVR32 844 depends on RTC!=y && !IA64 && !ARM && !M32R && !MIPS && !SPARC && !FRV && !S390 && !SUPERH && !AVR32 && !BLACKFIN
845 ---help--- 845 ---help---
846 If you say Y here and create a character special file /dev/rtc with 846 If you say Y here and create a character special file /dev/rtc with
847 major number 10 and minor number 135 using mknod ("man mknod"), you 847 major number 10 and minor number 135 using mknod ("man mknod"), you
diff --git a/drivers/char/consolemap.c b/drivers/char/consolemap.c
index 4246b8e36cb3..45d3e80156d4 100644
--- a/drivers/char/consolemap.c
+++ b/drivers/char/consolemap.c
@@ -554,7 +554,7 @@ int con_set_unimap(struct vc_data *vc, ushort ct, struct unipair __user *list)
554 __get_user(fontpos, &list->fontpos); 554 __get_user(fontpos, &list->fontpos);
555 if ((err1 = con_insert_unipair(p, unicode,fontpos)) != 0) 555 if ((err1 = con_insert_unipair(p, unicode,fontpos)) != 0)
556 err = err1; 556 err = err1;
557 list++; 557 list++;
558 } 558 }
559 559
560 if (con_unify_unimap(vc, p)) 560 if (con_unify_unimap(vc, p))
diff --git a/drivers/char/mem.c b/drivers/char/mem.c
index 6431f6921a67..3586b3b3df3f 100644
--- a/drivers/char/mem.c
+++ b/drivers/char/mem.c
@@ -425,9 +425,6 @@ static ssize_t read_oldmem(struct file *file, char __user *buf,
425} 425}
426#endif 426#endif
427 427
428extern long vread(char *buf, char *addr, unsigned long count);
429extern long vwrite(char *buf, char *addr, unsigned long count);
430
431#ifdef CONFIG_DEVKMEM 428#ifdef CONFIG_DEVKMEM
432/* 429/*
433 * This function reads the *virtual* memory as seen by the kernel. 430 * This function reads the *virtual* memory as seen by the kernel.
diff --git a/drivers/char/random.c b/drivers/char/random.c
index c7afc068c28d..7c13581ca9cd 100644
--- a/drivers/char/random.c
+++ b/drivers/char/random.c
@@ -407,7 +407,7 @@ struct entropy_store {
407 /* read-write data: */ 407 /* read-write data: */
408 spinlock_t lock; 408 spinlock_t lock;
409 unsigned add_ptr; 409 unsigned add_ptr;
410 int entropy_count; /* Must at no time exceed ->POOLBITS! */ 410 int entropy_count;
411 int input_rotate; 411 int input_rotate;
412}; 412};
413 413
@@ -767,11 +767,10 @@ static size_t account(struct entropy_store *r, size_t nbytes, int min,
767{ 767{
768 unsigned long flags; 768 unsigned long flags;
769 769
770 BUG_ON(r->entropy_count > r->poolinfo->POOLBITS);
771
772 /* Hold lock while accounting */ 770 /* Hold lock while accounting */
773 spin_lock_irqsave(&r->lock, flags); 771 spin_lock_irqsave(&r->lock, flags);
774 772
773 BUG_ON(r->entropy_count > r->poolinfo->POOLBITS);
775 DEBUG_ENT("trying to extract %d bits from %s\n", 774 DEBUG_ENT("trying to extract %d bits from %s\n",
776 nbytes * 8, r->name); 775 nbytes * 8, r->name);
777 776
diff --git a/drivers/char/sysrq.c b/drivers/char/sysrq.c
index 94966edfb44d..d41b9f6f7903 100644
--- a/drivers/char/sysrq.c
+++ b/drivers/char/sysrq.c
@@ -82,7 +82,7 @@ static void sysrq_handle_loglevel(int key, struct tty_struct *tty)
82} 82}
83static struct sysrq_key_op sysrq_loglevel_op = { 83static struct sysrq_key_op sysrq_loglevel_op = {
84 .handler = sysrq_handle_loglevel, 84 .handler = sysrq_handle_loglevel,
85 .help_msg = "loglevel0-8", 85 .help_msg = "loglevel(0-9)",
86 .action_msg = "Changing Loglevel", 86 .action_msg = "Changing Loglevel",
87 .enable_mask = SYSRQ_ENABLE_LOG, 87 .enable_mask = SYSRQ_ENABLE_LOG,
88}; 88};
@@ -233,7 +233,7 @@ static void sysrq_handle_showallcpus(int key, struct tty_struct *tty)
233 233
234static struct sysrq_key_op sysrq_showallcpus_op = { 234static struct sysrq_key_op sysrq_showallcpus_op = {
235 .handler = sysrq_handle_showallcpus, 235 .handler = sysrq_handle_showallcpus,
236 .help_msg = "aLlcpus", 236 .help_msg = "show-backtrace-all-active-cpus(L)",
237 .action_msg = "Show backtrace of all active CPUs", 237 .action_msg = "Show backtrace of all active CPUs",
238 .enable_mask = SYSRQ_ENABLE_DUMP, 238 .enable_mask = SYSRQ_ENABLE_DUMP,
239}; 239};
@@ -247,7 +247,7 @@ static void sysrq_handle_showregs(int key, struct tty_struct *tty)
247} 247}
248static struct sysrq_key_op sysrq_showregs_op = { 248static struct sysrq_key_op sysrq_showregs_op = {
249 .handler = sysrq_handle_showregs, 249 .handler = sysrq_handle_showregs,
250 .help_msg = "showPc", 250 .help_msg = "show-registers(P)",
251 .action_msg = "Show Regs", 251 .action_msg = "Show Regs",
252 .enable_mask = SYSRQ_ENABLE_DUMP, 252 .enable_mask = SYSRQ_ENABLE_DUMP,
253}; 253};
@@ -258,7 +258,7 @@ static void sysrq_handle_showstate(int key, struct tty_struct *tty)
258} 258}
259static struct sysrq_key_op sysrq_showstate_op = { 259static struct sysrq_key_op sysrq_showstate_op = {
260 .handler = sysrq_handle_showstate, 260 .handler = sysrq_handle_showstate,
261 .help_msg = "showTasks", 261 .help_msg = "show-task-states(T)",
262 .action_msg = "Show State", 262 .action_msg = "Show State",
263 .enable_mask = SYSRQ_ENABLE_DUMP, 263 .enable_mask = SYSRQ_ENABLE_DUMP,
264}; 264};
@@ -269,7 +269,7 @@ static void sysrq_handle_showstate_blocked(int key, struct tty_struct *tty)
269} 269}
270static struct sysrq_key_op sysrq_showstate_blocked_op = { 270static struct sysrq_key_op sysrq_showstate_blocked_op = {
271 .handler = sysrq_handle_showstate_blocked, 271 .handler = sysrq_handle_showstate_blocked,
272 .help_msg = "shoW-blocked-tasks", 272 .help_msg = "show-blocked-tasks(W)",
273 .action_msg = "Show Blocked State", 273 .action_msg = "Show Blocked State",
274 .enable_mask = SYSRQ_ENABLE_DUMP, 274 .enable_mask = SYSRQ_ENABLE_DUMP,
275}; 275};
@@ -297,7 +297,7 @@ static void sysrq_handle_showmem(int key, struct tty_struct *tty)
297} 297}
298static struct sysrq_key_op sysrq_showmem_op = { 298static struct sysrq_key_op sysrq_showmem_op = {
299 .handler = sysrq_handle_showmem, 299 .handler = sysrq_handle_showmem,
300 .help_msg = "showMem", 300 .help_msg = "show-memory-usage(M)",
301 .action_msg = "Show Memory", 301 .action_msg = "Show Memory",
302 .enable_mask = SYSRQ_ENABLE_DUMP, 302 .enable_mask = SYSRQ_ENABLE_DUMP,
303}; 303};
@@ -323,7 +323,7 @@ static void sysrq_handle_term(int key, struct tty_struct *tty)
323} 323}
324static struct sysrq_key_op sysrq_term_op = { 324static struct sysrq_key_op sysrq_term_op = {
325 .handler = sysrq_handle_term, 325 .handler = sysrq_handle_term,
326 .help_msg = "tErm", 326 .help_msg = "terminate-all-tasks(E)",
327 .action_msg = "Terminate All Tasks", 327 .action_msg = "Terminate All Tasks",
328 .enable_mask = SYSRQ_ENABLE_SIGNAL, 328 .enable_mask = SYSRQ_ENABLE_SIGNAL,
329}; 329};
@@ -341,7 +341,7 @@ static void sysrq_handle_moom(int key, struct tty_struct *tty)
341} 341}
342static struct sysrq_key_op sysrq_moom_op = { 342static struct sysrq_key_op sysrq_moom_op = {
343 .handler = sysrq_handle_moom, 343 .handler = sysrq_handle_moom,
344 .help_msg = "Full", 344 .help_msg = "memory-full-oom-kill(F)",
345 .action_msg = "Manual OOM execution", 345 .action_msg = "Manual OOM execution",
346 .enable_mask = SYSRQ_ENABLE_SIGNAL, 346 .enable_mask = SYSRQ_ENABLE_SIGNAL,
347}; 347};
@@ -353,7 +353,7 @@ static void sysrq_handle_kill(int key, struct tty_struct *tty)
353} 353}
354static struct sysrq_key_op sysrq_kill_op = { 354static struct sysrq_key_op sysrq_kill_op = {
355 .handler = sysrq_handle_kill, 355 .handler = sysrq_handle_kill,
356 .help_msg = "kIll", 356 .help_msg = "kill-all-tasks(I)",
357 .action_msg = "Kill All Tasks", 357 .action_msg = "Kill All Tasks",
358 .enable_mask = SYSRQ_ENABLE_SIGNAL, 358 .enable_mask = SYSRQ_ENABLE_SIGNAL,
359}; 359};
@@ -364,7 +364,7 @@ static void sysrq_handle_unrt(int key, struct tty_struct *tty)
364} 364}
365static struct sysrq_key_op sysrq_unrt_op = { 365static struct sysrq_key_op sysrq_unrt_op = {
366 .handler = sysrq_handle_unrt, 366 .handler = sysrq_handle_unrt,
367 .help_msg = "Nice", 367 .help_msg = "nice-all-RT-tasks(N)",
368 .action_msg = "Nice All RT Tasks", 368 .action_msg = "Nice All RT Tasks",
369 .enable_mask = SYSRQ_ENABLE_RTNICE, 369 .enable_mask = SYSRQ_ENABLE_RTNICE,
370}; 370};
diff --git a/drivers/edac/Kconfig b/drivers/edac/Kconfig
index e2667a8c2997..eee47fd16d79 100644
--- a/drivers/edac/Kconfig
+++ b/drivers/edac/Kconfig
@@ -109,6 +109,13 @@ config EDAC_X38
109 Support for error detection and correction on the Intel 109 Support for error detection and correction on the Intel
110 X38 server chipsets. 110 X38 server chipsets.
111 111
112config EDAC_I5400
113 tristate "Intel 5400 (Seaburg) chipsets"
114 depends on EDAC_MM_EDAC && PCI && X86
115 help
116 Support for error detection and correction the Intel
117 i5400 MCH chipset (Seaburg).
118
112config EDAC_I82860 119config EDAC_I82860
113 tristate "Intel 82860" 120 tristate "Intel 82860"
114 depends on EDAC_MM_EDAC && PCI && X86_32 121 depends on EDAC_MM_EDAC && PCI && X86_32
diff --git a/drivers/edac/Makefile b/drivers/edac/Makefile
index 62c2d9bad8dc..b75196927de3 100644
--- a/drivers/edac/Makefile
+++ b/drivers/edac/Makefile
@@ -20,6 +20,7 @@ endif
20obj-$(CONFIG_EDAC_AMD76X) += amd76x_edac.o 20obj-$(CONFIG_EDAC_AMD76X) += amd76x_edac.o
21obj-$(CONFIG_EDAC_I5000) += i5000_edac.o 21obj-$(CONFIG_EDAC_I5000) += i5000_edac.o
22obj-$(CONFIG_EDAC_I5100) += i5100_edac.o 22obj-$(CONFIG_EDAC_I5100) += i5100_edac.o
23obj-$(CONFIG_EDAC_I5400) += i5400_edac.o
23obj-$(CONFIG_EDAC_E7XXX) += e7xxx_edac.o 24obj-$(CONFIG_EDAC_E7XXX) += e7xxx_edac.o
24obj-$(CONFIG_EDAC_E752X) += e752x_edac.o 25obj-$(CONFIG_EDAC_E752X) += e752x_edac.o
25obj-$(CONFIG_EDAC_I82443BXGX) += i82443bxgx_edac.o 26obj-$(CONFIG_EDAC_I82443BXGX) += i82443bxgx_edac.o
diff --git a/drivers/edac/edac_device.c b/drivers/edac/edac_device.c
index 4041e9143283..ca9113e1c106 100644
--- a/drivers/edac/edac_device.c
+++ b/drivers/edac/edac_device.c
@@ -333,7 +333,7 @@ static int add_edac_dev_to_global_list(struct edac_device_ctl_info *edac_dev)
333fail0: 333fail0:
334 edac_printk(KERN_WARNING, EDAC_MC, 334 edac_printk(KERN_WARNING, EDAC_MC,
335 "%s (%s) %s %s already assigned %d\n", 335 "%s (%s) %s %s already assigned %d\n",
336 rover->dev->bus_id, edac_dev_name(rover), 336 dev_name(rover->dev), edac_dev_name(rover),
337 rover->mod_name, rover->ctl_name, rover->dev_idx); 337 rover->mod_name, rover->ctl_name, rover->dev_idx);
338 return 1; 338 return 1;
339 339
diff --git a/drivers/edac/edac_mc.c b/drivers/edac/edac_mc.c
index d110392d48f4..25d66940b4fa 100644
--- a/drivers/edac/edac_mc.c
+++ b/drivers/edac/edac_mc.c
@@ -401,7 +401,7 @@ static int add_mc_to_global_list(struct mem_ctl_info *mci)
401 401
402fail0: 402fail0:
403 edac_printk(KERN_WARNING, EDAC_MC, 403 edac_printk(KERN_WARNING, EDAC_MC,
404 "%s (%s) %s %s already assigned %d\n", p->dev->bus_id, 404 "%s (%s) %s %s already assigned %d\n", dev_name(p->dev),
405 edac_dev_name(mci), p->mod_name, p->ctl_name, p->mc_idx); 405 edac_dev_name(mci), p->mod_name, p->ctl_name, p->mc_idx);
406 return 1; 406 return 1;
407 407
diff --git a/drivers/edac/edac_pci.c b/drivers/edac/edac_pci.c
index 22ec9d5d4312..5d3c8083a40e 100644
--- a/drivers/edac/edac_pci.c
+++ b/drivers/edac/edac_pci.c
@@ -150,7 +150,7 @@ static int add_edac_pci_to_global_list(struct edac_pci_ctl_info *pci)
150fail0: 150fail0:
151 edac_printk(KERN_WARNING, EDAC_PCI, 151 edac_printk(KERN_WARNING, EDAC_PCI,
152 "%s (%s) %s %s already assigned %d\n", 152 "%s (%s) %s %s already assigned %d\n",
153 rover->dev->bus_id, edac_dev_name(rover), 153 dev_name(rover->dev), edac_dev_name(rover),
154 rover->mod_name, rover->ctl_name, rover->pci_idx); 154 rover->mod_name, rover->ctl_name, rover->pci_idx);
155 return 1; 155 return 1;
156 156
diff --git a/drivers/edac/edac_pci_sysfs.c b/drivers/edac/edac_pci_sysfs.c
index 5c153dccc95e..422728cfe994 100644
--- a/drivers/edac/edac_pci_sysfs.c
+++ b/drivers/edac/edac_pci_sysfs.c
@@ -569,7 +569,7 @@ static void edac_pci_dev_parity_test(struct pci_dev *dev)
569 569
570 local_irq_restore(flags); 570 local_irq_restore(flags);
571 571
572 debugf4("PCI STATUS= 0x%04x %s\n", status, dev->dev.bus_id); 572 debugf4("PCI STATUS= 0x%04x %s\n", status, dev_name(&dev->dev));
573 573
574 /* check the status reg for errors on boards NOT marked as broken 574 /* check the status reg for errors on boards NOT marked as broken
575 * if broken, we cannot trust any of the status bits 575 * if broken, we cannot trust any of the status bits
@@ -600,13 +600,13 @@ static void edac_pci_dev_parity_test(struct pci_dev *dev)
600 } 600 }
601 601
602 602
603 debugf4("PCI HEADER TYPE= 0x%02x %s\n", header_type, dev->dev.bus_id); 603 debugf4("PCI HEADER TYPE= 0x%02x %s\n", header_type, dev_name(&dev->dev));
604 604
605 if ((header_type & 0x7F) == PCI_HEADER_TYPE_BRIDGE) { 605 if ((header_type & 0x7F) == PCI_HEADER_TYPE_BRIDGE) {
606 /* On bridges, need to examine secondary status register */ 606 /* On bridges, need to examine secondary status register */
607 status = get_pci_parity_status(dev, 1); 607 status = get_pci_parity_status(dev, 1);
608 608
609 debugf4("PCI SEC_STATUS= 0x%04x %s\n", status, dev->dev.bus_id); 609 debugf4("PCI SEC_STATUS= 0x%04x %s\n", status, dev_name(&dev->dev));
610 610
611 /* check the secondary status reg for errors, 611 /* check the secondary status reg for errors,
612 * on NOT broken boards 612 * on NOT broken boards
diff --git a/drivers/edac/i5400_edac.c b/drivers/edac/i5400_edac.c
new file mode 100644
index 000000000000..b08b6d8e2dc7
--- /dev/null
+++ b/drivers/edac/i5400_edac.c
@@ -0,0 +1,1476 @@
1/*
2 * Intel 5400 class Memory Controllers kernel module (Seaburg)
3 *
4 * This file may be distributed under the terms of the
5 * GNU General Public License.
6 *
7 * Copyright (c) 2008 by:
8 * Ben Woodard <woodard@redhat.com>
9 * Mauro Carvalho Chehab <mchehab@redhat.com>
10 *
11 * Red Hat Inc. http://www.redhat.com
12 *
13 * Forked and adapted from the i5000_edac driver which was
14 * written by Douglas Thompson Linux Networx <norsk5@xmission.com>
15 *
16 * This module is based on the following document:
17 *
18 * Intel 5400 Chipset Memory Controller Hub (MCH) - Datasheet
19 * http://developer.intel.com/design/chipsets/datashts/313070.htm
20 *
21 */
22
23#include <linux/module.h>
24#include <linux/init.h>
25#include <linux/pci.h>
26#include <linux/pci_ids.h>
27#include <linux/slab.h>
28#include <linux/edac.h>
29#include <linux/mmzone.h>
30
31#include "edac_core.h"
32
33/*
34 * Alter this version for the I5400 module when modifications are made
35 */
36#define I5400_REVISION " Ver: 1.0.0 " __DATE__
37
38#define EDAC_MOD_STR "i5400_edac"
39
40#define i5400_printk(level, fmt, arg...) \
41 edac_printk(level, "i5400", fmt, ##arg)
42
43#define i5400_mc_printk(mci, level, fmt, arg...) \
44 edac_mc_chipset_printk(mci, level, "i5400", fmt, ##arg)
45
46/* Limits for i5400 */
47#define NUM_MTRS_PER_BRANCH 4
48#define CHANNELS_PER_BRANCH 2
49#define MAX_CHANNELS 4
50#define MAX_DIMMS (MAX_CHANNELS * 4) /* Up to 4 DIMM's per channel */
51#define MAX_CSROWS (MAX_DIMMS * 2) /* max possible csrows per channel */
52
53/* Device 16,
54 * Function 0: System Address
55 * Function 1: Memory Branch Map, Control, Errors Register
56 * Function 2: FSB Error Registers
57 *
58 * All 3 functions of Device 16 (0,1,2) share the SAME DID and
59 * uses PCI_DEVICE_ID_INTEL_5400_ERR for device 16 (0,1,2),
60 * PCI_DEVICE_ID_INTEL_5400_FBD0 and PCI_DEVICE_ID_INTEL_5400_FBD1
61 * for device 21 (0,1).
62 */
63
64 /* OFFSETS for Function 0 */
65#define AMBASE 0x48 /* AMB Mem Mapped Reg Region Base */
66#define MAXCH 0x56 /* Max Channel Number */
67#define MAXDIMMPERCH 0x57 /* Max DIMM PER Channel Number */
68
69 /* OFFSETS for Function 1 */
70#define TOLM 0x6C
71#define REDMEMB 0x7C
72#define REC_ECC_LOCATOR_ODD(x) ((x) & 0x3fe00) /* bits [17:9] indicate ODD, [8:0] indicate EVEN */
73#define MIR0 0x80
74#define MIR1 0x84
75#define AMIR0 0x8c
76#define AMIR1 0x90
77
78 /* Fatal error registers */
79#define FERR_FAT_FBD 0x98 /* also called as FERR_FAT_FB_DIMM at datasheet */
80#define FERR_FAT_FBDCHAN (3<<28) /* channel index where the highest-order error occurred */
81
82#define NERR_FAT_FBD 0x9c
83#define FERR_NF_FBD 0xa0 /* also called as FERR_NFAT_FB_DIMM at datasheet */
84
85 /* Non-fatal error register */
86#define NERR_NF_FBD 0xa4
87
88 /* Enable error mask */
89#define EMASK_FBD 0xa8
90
91#define ERR0_FBD 0xac
92#define ERR1_FBD 0xb0
93#define ERR2_FBD 0xb4
94#define MCERR_FBD 0xb8
95
96 /* No OFFSETS for Device 16 Function 2 */
97
98/*
99 * Device 21,
100 * Function 0: Memory Map Branch 0
101 *
102 * Device 22,
103 * Function 0: Memory Map Branch 1
104 */
105
106 /* OFFSETS for Function 0 */
107#define AMBPRESENT_0 0x64
108#define AMBPRESENT_1 0x66
109#define MTR0 0x80
110#define MTR1 0x82
111#define MTR2 0x84
112#define MTR3 0x86
113
114 /* OFFSETS for Function 1 */
115#define NRECFGLOG 0x74
116#define RECFGLOG 0x78
117#define NRECMEMA 0xbe
118#define NRECMEMB 0xc0
119#define NRECFB_DIMMA 0xc4
120#define NRECFB_DIMMB 0xc8
121#define NRECFB_DIMMC 0xcc
122#define NRECFB_DIMMD 0xd0
123#define NRECFB_DIMME 0xd4
124#define NRECFB_DIMMF 0xd8
125#define REDMEMA 0xdC
126#define RECMEMA 0xf0
127#define RECMEMB 0xf4
128#define RECFB_DIMMA 0xf8
129#define RECFB_DIMMB 0xec
130#define RECFB_DIMMC 0xf0
131#define RECFB_DIMMD 0xf4
132#define RECFB_DIMME 0xf8
133#define RECFB_DIMMF 0xfC
134
135/*
136 * Error indicator bits and masks
137 * Error masks are according with Table 5-17 of i5400 datasheet
138 */
139
140enum error_mask {
141 EMASK_M1 = 1<<0, /* Memory Write error on non-redundant retry */
142 EMASK_M2 = 1<<1, /* Memory or FB-DIMM configuration CRC read error */
143 EMASK_M3 = 1<<2, /* Reserved */
144 EMASK_M4 = 1<<3, /* Uncorrectable Data ECC on Replay */
145 EMASK_M5 = 1<<4, /* Aliased Uncorrectable Non-Mirrored Demand Data ECC */
146 EMASK_M6 = 1<<5, /* Unsupported on i5400 */
147 EMASK_M7 = 1<<6, /* Aliased Uncorrectable Resilver- or Spare-Copy Data ECC */
148 EMASK_M8 = 1<<7, /* Aliased Uncorrectable Patrol Data ECC */
149 EMASK_M9 = 1<<8, /* Non-Aliased Uncorrectable Non-Mirrored Demand Data ECC */
150 EMASK_M10 = 1<<9, /* Unsupported on i5400 */
151 EMASK_M11 = 1<<10, /* Non-Aliased Uncorrectable Resilver- or Spare-Copy Data ECC */
152 EMASK_M12 = 1<<11, /* Non-Aliased Uncorrectable Patrol Data ECC */
153 EMASK_M13 = 1<<12, /* Memory Write error on first attempt */
154 EMASK_M14 = 1<<13, /* FB-DIMM Configuration Write error on first attempt */
155 EMASK_M15 = 1<<14, /* Memory or FB-DIMM configuration CRC read error */
156 EMASK_M16 = 1<<15, /* Channel Failed-Over Occurred */
157 EMASK_M17 = 1<<16, /* Correctable Non-Mirrored Demand Data ECC */
158 EMASK_M18 = 1<<17, /* Unsupported on i5400 */
159 EMASK_M19 = 1<<18, /* Correctable Resilver- or Spare-Copy Data ECC */
160 EMASK_M20 = 1<<19, /* Correctable Patrol Data ECC */
161 EMASK_M21 = 1<<20, /* FB-DIMM Northbound parity error on FB-DIMM Sync Status */
162 EMASK_M22 = 1<<21, /* SPD protocol Error */
163 EMASK_M23 = 1<<22, /* Non-Redundant Fast Reset Timeout */
164 EMASK_M24 = 1<<23, /* Refresh error */
165 EMASK_M25 = 1<<24, /* Memory Write error on redundant retry */
166 EMASK_M26 = 1<<25, /* Redundant Fast Reset Timeout */
167 EMASK_M27 = 1<<26, /* Correctable Counter Threshold Exceeded */
168 EMASK_M28 = 1<<27, /* DIMM-Spare Copy Completed */
169 EMASK_M29 = 1<<28, /* DIMM-Isolation Completed */
170};
171
172/*
173 * Names to translate bit error into something useful
174 */
175static const char *error_name[] = {
176 [0] = "Memory Write error on non-redundant retry",
177 [1] = "Memory or FB-DIMM configuration CRC read error",
178 /* Reserved */
179 [3] = "Uncorrectable Data ECC on Replay",
180 [4] = "Aliased Uncorrectable Non-Mirrored Demand Data ECC",
181 /* M6 Unsupported on i5400 */
182 [6] = "Aliased Uncorrectable Resilver- or Spare-Copy Data ECC",
183 [7] = "Aliased Uncorrectable Patrol Data ECC",
184 [8] = "Non-Aliased Uncorrectable Non-Mirrored Demand Data ECC",
185 /* M10 Unsupported on i5400 */
186 [10] = "Non-Aliased Uncorrectable Resilver- or Spare-Copy Data ECC",
187 [11] = "Non-Aliased Uncorrectable Patrol Data ECC",
188 [12] = "Memory Write error on first attempt",
189 [13] = "FB-DIMM Configuration Write error on first attempt",
190 [14] = "Memory or FB-DIMM configuration CRC read error",
191 [15] = "Channel Failed-Over Occurred",
192 [16] = "Correctable Non-Mirrored Demand Data ECC",
193 /* M18 Unsupported on i5400 */
194 [18] = "Correctable Resilver- or Spare-Copy Data ECC",
195 [19] = "Correctable Patrol Data ECC",
196 [20] = "FB-DIMM Northbound parity error on FB-DIMM Sync Status",
197 [21] = "SPD protocol Error",
198 [22] = "Non-Redundant Fast Reset Timeout",
199 [23] = "Refresh error",
200 [24] = "Memory Write error on redundant retry",
201 [25] = "Redundant Fast Reset Timeout",
202 [26] = "Correctable Counter Threshold Exceeded",
203 [27] = "DIMM-Spare Copy Completed",
204 [28] = "DIMM-Isolation Completed",
205};
206
207/* Fatal errors */
208#define ERROR_FAT_MASK (EMASK_M1 | \
209 EMASK_M2 | \
210 EMASK_M23)
211
212/* Correctable errors */
213#define ERROR_NF_CORRECTABLE (EMASK_M27 | \
214 EMASK_M20 | \
215 EMASK_M19 | \
216 EMASK_M18 | \
217 EMASK_M17 | \
218 EMASK_M16)
219#define ERROR_NF_DIMM_SPARE (EMASK_M29 | \
220 EMASK_M28)
221#define ERROR_NF_SPD_PROTOCOL (EMASK_M22)
222#define ERROR_NF_NORTH_CRC (EMASK_M21)
223
224/* Recoverable errors */
225#define ERROR_NF_RECOVERABLE (EMASK_M26 | \
226 EMASK_M25 | \
227 EMASK_M24 | \
228 EMASK_M15 | \
229 EMASK_M14 | \
230 EMASK_M13 | \
231 EMASK_M12 | \
232 EMASK_M11 | \
233 EMASK_M9 | \
234 EMASK_M8 | \
235 EMASK_M7 | \
236 EMASK_M5)
237
238/* uncorrectable errors */
239#define ERROR_NF_UNCORRECTABLE (EMASK_M4)
240
241/* mask to all non-fatal errors */
242#define ERROR_NF_MASK (ERROR_NF_CORRECTABLE | \
243 ERROR_NF_UNCORRECTABLE | \
244 ERROR_NF_RECOVERABLE | \
245 ERROR_NF_DIMM_SPARE | \
246 ERROR_NF_SPD_PROTOCOL | \
247 ERROR_NF_NORTH_CRC)
248
249/*
250 * Define error masks for the several registers
251 */
252
253/* Enable all fatal and non fatal errors */
254#define ENABLE_EMASK_ALL (ERROR_FAT_MASK | ERROR_NF_MASK)
255
256/* mask for fatal error registers */
257#define FERR_FAT_MASK ERROR_FAT_MASK
258
259/* masks for non-fatal error register */
260static inline int to_nf_mask(unsigned int mask)
261{
262 return (mask & EMASK_M29) | (mask >> 3);
263};
264
265static inline int from_nf_ferr(unsigned int mask)
266{
267 return (mask & EMASK_M29) | /* Bit 28 */
268 (mask & ((1 << 28) - 1) << 3); /* Bits 0 to 27 */
269};
270
271#define FERR_NF_MASK to_nf_mask(ERROR_NF_MASK)
272#define FERR_NF_CORRECTABLE to_nf_mask(ERROR_NF_CORRECTABLE)
273#define FERR_NF_DIMM_SPARE to_nf_mask(ERROR_NF_DIMM_SPARE)
274#define FERR_NF_SPD_PROTOCOL to_nf_mask(ERROR_NF_SPD_PROTOCOL)
275#define FERR_NF_NORTH_CRC to_nf_mask(ERROR_NF_NORTH_CRC)
276#define FERR_NF_RECOVERABLE to_nf_mask(ERROR_NF_RECOVERABLE)
277#define FERR_NF_UNCORRECTABLE to_nf_mask(ERROR_NF_UNCORRECTABLE)
278
279/* Defines to extract the vaious fields from the
280 * MTRx - Memory Technology Registers
281 */
282#define MTR_DIMMS_PRESENT(mtr) ((mtr) & (1 << 10))
283#define MTR_DIMMS_ETHROTTLE(mtr) ((mtr) & (1 << 9))
284#define MTR_DRAM_WIDTH(mtr) (((mtr) & (1 << 8)) ? 8 : 4)
285#define MTR_DRAM_BANKS(mtr) (((mtr) & (1 << 6)) ? 8 : 4)
286#define MTR_DRAM_BANKS_ADDR_BITS(mtr) ((MTR_DRAM_BANKS(mtr) == 8) ? 3 : 2)
287#define MTR_DIMM_RANK(mtr) (((mtr) >> 5) & 0x1)
288#define MTR_DIMM_RANK_ADDR_BITS(mtr) (MTR_DIMM_RANK(mtr) ? 2 : 1)
289#define MTR_DIMM_ROWS(mtr) (((mtr) >> 2) & 0x3)
290#define MTR_DIMM_ROWS_ADDR_BITS(mtr) (MTR_DIMM_ROWS(mtr) + 13)
291#define MTR_DIMM_COLS(mtr) ((mtr) & 0x3)
292#define MTR_DIMM_COLS_ADDR_BITS(mtr) (MTR_DIMM_COLS(mtr) + 10)
293
294/* This applies to FERR_NF_FB-DIMM as well as FERR_FAT_FB-DIMM */
295static inline int extract_fbdchan_indx(u32 x)
296{
297 return (x>>28) & 0x3;
298}
299
300#ifdef CONFIG_EDAC_DEBUG
301/* MTR NUMROW */
302static const char *numrow_toString[] = {
303 "8,192 - 13 rows",
304 "16,384 - 14 rows",
305 "32,768 - 15 rows",
306 "65,536 - 16 rows"
307};
308
309/* MTR NUMCOL */
310static const char *numcol_toString[] = {
311 "1,024 - 10 columns",
312 "2,048 - 11 columns",
313 "4,096 - 12 columns",
314 "reserved"
315};
316#endif
317
318/* Device name and register DID (Device ID) */
319struct i5400_dev_info {
320 const char *ctl_name; /* name for this device */
321 u16 fsb_mapping_errors; /* DID for the branchmap,control */
322};
323
324/* Table of devices attributes supported by this driver */
325static const struct i5400_dev_info i5400_devs[] = {
326 {
327 .ctl_name = "I5400",
328 .fsb_mapping_errors = PCI_DEVICE_ID_INTEL_5400_ERR,
329 },
330};
331
332struct i5400_dimm_info {
333 int megabytes; /* size, 0 means not present */
334 int dual_rank;
335};
336
337/* driver private data structure */
338struct i5400_pvt {
339 struct pci_dev *system_address; /* 16.0 */
340 struct pci_dev *branchmap_werrors; /* 16.1 */
341 struct pci_dev *fsb_error_regs; /* 16.2 */
342 struct pci_dev *branch_0; /* 21.0 */
343 struct pci_dev *branch_1; /* 22.0 */
344
345 u16 tolm; /* top of low memory */
346 u64 ambase; /* AMB BAR */
347
348 u16 mir0, mir1;
349
350 u16 b0_mtr[NUM_MTRS_PER_BRANCH]; /* Memory Technlogy Reg */
351 u16 b0_ambpresent0; /* Branch 0, Channel 0 */
352 u16 b0_ambpresent1; /* Brnach 0, Channel 1 */
353
354 u16 b1_mtr[NUM_MTRS_PER_BRANCH]; /* Memory Technlogy Reg */
355 u16 b1_ambpresent0; /* Branch 1, Channel 8 */
356 u16 b1_ambpresent1; /* Branch 1, Channel 1 */
357
358 /* DIMM information matrix, allocating architecture maximums */
359 struct i5400_dimm_info dimm_info[MAX_CSROWS][MAX_CHANNELS];
360
361 /* Actual values for this controller */
362 int maxch; /* Max channels */
363 int maxdimmperch; /* Max DIMMs per channel */
364};
365
366/* I5400 MCH error information retrieved from Hardware */
367struct i5400_error_info {
368 /* These registers are always read from the MC */
369 u32 ferr_fat_fbd; /* First Errors Fatal */
370 u32 nerr_fat_fbd; /* Next Errors Fatal */
371 u32 ferr_nf_fbd; /* First Errors Non-Fatal */
372 u32 nerr_nf_fbd; /* Next Errors Non-Fatal */
373
374 /* These registers are input ONLY if there was a Recoverable Error */
375 u32 redmemb; /* Recoverable Mem Data Error log B */
376 u16 recmema; /* Recoverable Mem Error log A */
377 u32 recmemb; /* Recoverable Mem Error log B */
378
379 /* These registers are input ONLY if there was a Non-Rec Error */
380 u16 nrecmema; /* Non-Recoverable Mem log A */
381 u16 nrecmemb; /* Non-Recoverable Mem log B */
382
383};
384
385/* note that nrec_rdwr changed from NRECMEMA to NRECMEMB between the 5000 and
386 5400 better to use an inline function than a macro in this case */
387static inline int nrec_bank(struct i5400_error_info *info)
388{
389 return ((info->nrecmema) >> 12) & 0x7;
390}
391static inline int nrec_rank(struct i5400_error_info *info)
392{
393 return ((info->nrecmema) >> 8) & 0xf;
394}
395static inline int nrec_buf_id(struct i5400_error_info *info)
396{
397 return ((info->nrecmema)) & 0xff;
398}
399static inline int nrec_rdwr(struct i5400_error_info *info)
400{
401 return (info->nrecmemb) >> 31;
402}
403/* This applies to both NREC and REC string so it can be used with nrec_rdwr
404 and rec_rdwr */
405static inline const char *rdwr_str(int rdwr)
406{
407 return rdwr ? "Write" : "Read";
408}
409static inline int nrec_cas(struct i5400_error_info *info)
410{
411 return ((info->nrecmemb) >> 16) & 0x1fff;
412}
413static inline int nrec_ras(struct i5400_error_info *info)
414{
415 return (info->nrecmemb) & 0xffff;
416}
417static inline int rec_bank(struct i5400_error_info *info)
418{
419 return ((info->recmema) >> 12) & 0x7;
420}
421static inline int rec_rank(struct i5400_error_info *info)
422{
423 return ((info->recmema) >> 8) & 0xf;
424}
425static inline int rec_rdwr(struct i5400_error_info *info)
426{
427 return (info->recmemb) >> 31;
428}
429static inline int rec_cas(struct i5400_error_info *info)
430{
431 return ((info->recmemb) >> 16) & 0x1fff;
432}
433static inline int rec_ras(struct i5400_error_info *info)
434{
435 return (info->recmemb) & 0xffff;
436}
437
438static struct edac_pci_ctl_info *i5400_pci;
439
440/*
441 * i5400_get_error_info Retrieve the hardware error information from
442 * the hardware and cache it in the 'info'
443 * structure
444 */
445static void i5400_get_error_info(struct mem_ctl_info *mci,
446 struct i5400_error_info *info)
447{
448 struct i5400_pvt *pvt;
449 u32 value;
450
451 pvt = mci->pvt_info;
452
453 /* read in the 1st FATAL error register */
454 pci_read_config_dword(pvt->branchmap_werrors, FERR_FAT_FBD, &value);
455
456 /* Mask only the bits that the doc says are valid
457 */
458 value &= (FERR_FAT_FBDCHAN | FERR_FAT_MASK);
459
460 /* If there is an error, then read in the
461 NEXT FATAL error register and the Memory Error Log Register A
462 */
463 if (value & FERR_FAT_MASK) {
464 info->ferr_fat_fbd = value;
465
466 /* harvest the various error data we need */
467 pci_read_config_dword(pvt->branchmap_werrors,
468 NERR_FAT_FBD, &info->nerr_fat_fbd);
469 pci_read_config_word(pvt->branchmap_werrors,
470 NRECMEMA, &info->nrecmema);
471 pci_read_config_word(pvt->branchmap_werrors,
472 NRECMEMB, &info->nrecmemb);
473
474 /* Clear the error bits, by writing them back */
475 pci_write_config_dword(pvt->branchmap_werrors,
476 FERR_FAT_FBD, value);
477 } else {
478 info->ferr_fat_fbd = 0;
479 info->nerr_fat_fbd = 0;
480 info->nrecmema = 0;
481 info->nrecmemb = 0;
482 }
483
484 /* read in the 1st NON-FATAL error register */
485 pci_read_config_dword(pvt->branchmap_werrors, FERR_NF_FBD, &value);
486
487 /* If there is an error, then read in the 1st NON-FATAL error
488 * register as well */
489 if (value & FERR_NF_MASK) {
490 info->ferr_nf_fbd = value;
491
492 /* harvest the various error data we need */
493 pci_read_config_dword(pvt->branchmap_werrors,
494 NERR_NF_FBD, &info->nerr_nf_fbd);
495 pci_read_config_word(pvt->branchmap_werrors,
496 RECMEMA, &info->recmema);
497 pci_read_config_dword(pvt->branchmap_werrors,
498 RECMEMB, &info->recmemb);
499 pci_read_config_dword(pvt->branchmap_werrors,
500 REDMEMB, &info->redmemb);
501
502 /* Clear the error bits, by writing them back */
503 pci_write_config_dword(pvt->branchmap_werrors,
504 FERR_NF_FBD, value);
505 } else {
506 info->ferr_nf_fbd = 0;
507 info->nerr_nf_fbd = 0;
508 info->recmema = 0;
509 info->recmemb = 0;
510 info->redmemb = 0;
511 }
512}
513
514/*
515 * i5400_proccess_non_recoverable_info(struct mem_ctl_info *mci,
516 * struct i5400_error_info *info,
517 * int handle_errors);
518 *
519 * handle the Intel FATAL and unrecoverable errors, if any
520 */
521static void i5400_proccess_non_recoverable_info(struct mem_ctl_info *mci,
522 struct i5400_error_info *info,
523 unsigned long allErrors)
524{
525 char msg[EDAC_MC_LABEL_LEN + 1 + 90 + 80];
526 int branch;
527 int channel;
528 int bank;
529 int buf_id;
530 int rank;
531 int rdwr;
532 int ras, cas;
533 int errnum;
534 char *type = NULL;
535
536 if (!allErrors)
537 return; /* if no error, return now */
538
539 if (allErrors & ERROR_FAT_MASK)
540 type = "FATAL";
541 else if (allErrors & FERR_NF_UNCORRECTABLE)
542 type = "NON-FATAL uncorrected";
543 else
544 type = "NON-FATAL recoverable";
545
546 /* ONLY ONE of the possible error bits will be set, as per the docs */
547
548 branch = extract_fbdchan_indx(info->ferr_fat_fbd);
549 channel = branch;
550
551 /* Use the NON-Recoverable macros to extract data */
552 bank = nrec_bank(info);
553 rank = nrec_rank(info);
554 buf_id = nrec_buf_id(info);
555 rdwr = nrec_rdwr(info);
556 ras = nrec_ras(info);
557 cas = nrec_cas(info);
558
559 debugf0("\t\tCSROW= %d Channels= %d,%d (Branch= %d "
560 "DRAM Bank= %d Buffer ID = %d rdwr= %s ras= %d cas= %d)\n",
561 rank, channel, channel + 1, branch >> 1, bank,
562 buf_id, rdwr_str(rdwr), ras, cas);
563
564 /* Only 1 bit will be on */
565 errnum = find_first_bit(&allErrors, ARRAY_SIZE(error_name));
566
567 /* Form out message */
568 snprintf(msg, sizeof(msg),
569 "%s (Branch=%d DRAM-Bank=%d Buffer ID = %d RDWR=%s "
570 "RAS=%d CAS=%d %s Err=0x%lx (%s))",
571 type, branch >> 1, bank, buf_id, rdwr_str(rdwr), ras, cas,
572 type, allErrors, error_name[errnum]);
573
574 /* Call the helper to output message */
575 edac_mc_handle_fbd_ue(mci, rank, channel, channel + 1, msg);
576}
577
578/*
579 * i5400_process_fatal_error_info(struct mem_ctl_info *mci,
580 * struct i5400_error_info *info,
581 * int handle_errors);
582 *
583 * handle the Intel NON-FATAL errors, if any
584 */
585static void i5400_process_nonfatal_error_info(struct mem_ctl_info *mci,
586 struct i5400_error_info *info)
587{
588 char msg[EDAC_MC_LABEL_LEN + 1 + 90 + 80];
589 unsigned long allErrors;
590 int branch;
591 int channel;
592 int bank;
593 int rank;
594 int rdwr;
595 int ras, cas;
596 int errnum;
597
598 /* mask off the Error bits that are possible */
599 allErrors = from_nf_ferr(info->ferr_nf_fbd & FERR_NF_MASK);
600 if (!allErrors)
601 return; /* if no error, return now */
602
603 /* ONLY ONE of the possible error bits will be set, as per the docs */
604
605 if (allErrors & (ERROR_NF_UNCORRECTABLE | ERROR_NF_RECOVERABLE)) {
606 i5400_proccess_non_recoverable_info(mci, info, allErrors);
607 return;
608 }
609
610 /* Correctable errors */
611 if (allErrors & ERROR_NF_CORRECTABLE) {
612 debugf0("\tCorrected bits= 0x%lx\n", allErrors);
613
614 branch = extract_fbdchan_indx(info->ferr_nf_fbd);
615
616 channel = 0;
617 if (REC_ECC_LOCATOR_ODD(info->redmemb))
618 channel = 1;
619
620 /* Convert channel to be based from zero, instead of
621 * from branch base of 0 */
622 channel += branch;
623
624 bank = rec_bank(info);
625 rank = rec_rank(info);
626 rdwr = rec_rdwr(info);
627 ras = rec_ras(info);
628 cas = rec_cas(info);
629
630 /* Only 1 bit will be on */
631 errnum = find_first_bit(&allErrors, ARRAY_SIZE(error_name));
632
633 debugf0("\t\tCSROW= %d Channel= %d (Branch %d "
634 "DRAM Bank= %d rdwr= %s ras= %d cas= %d)\n",
635 rank, channel, branch >> 1, bank,
636 rdwr_str(rdwr), ras, cas);
637
638 /* Form out message */
639 snprintf(msg, sizeof(msg),
640 "Corrected error (Branch=%d DRAM-Bank=%d RDWR=%s "
641 "RAS=%d CAS=%d, CE Err=0x%lx (%s))",
642 branch >> 1, bank, rdwr_str(rdwr), ras, cas,
643 allErrors, error_name[errnum]);
644
645 /* Call the helper to output message */
646 edac_mc_handle_fbd_ce(mci, rank, channel, msg);
647
648 return;
649 }
650
651 /* Miscelaneous errors */
652 errnum = find_first_bit(&allErrors, ARRAY_SIZE(error_name));
653
654 branch = extract_fbdchan_indx(info->ferr_nf_fbd);
655
656 i5400_mc_printk(mci, KERN_EMERG,
657 "Non-Fatal misc error (Branch=%d Err=%#lx (%s))",
658 branch >> 1, allErrors, error_name[errnum]);
659}
660
661/*
662 * i5400_process_error_info Process the error info that is
663 * in the 'info' structure, previously retrieved from hardware
664 */
665static void i5400_process_error_info(struct mem_ctl_info *mci,
666 struct i5400_error_info *info)
667{ u32 allErrors;
668
669 /* First handle any fatal errors that occurred */
670 allErrors = (info->ferr_fat_fbd & FERR_FAT_MASK);
671 i5400_proccess_non_recoverable_info(mci, info, allErrors);
672
673 /* now handle any non-fatal errors that occurred */
674 i5400_process_nonfatal_error_info(mci, info);
675}
676
677/*
678 * i5400_clear_error Retrieve any error from the hardware
679 * but do NOT process that error.
680 * Used for 'clearing' out of previous errors
681 * Called by the Core module.
682 */
683static void i5400_clear_error(struct mem_ctl_info *mci)
684{
685 struct i5400_error_info info;
686
687 i5400_get_error_info(mci, &info);
688}
689
690/*
691 * i5400_check_error Retrieve and process errors reported by the
692 * hardware. Called by the Core module.
693 */
694static void i5400_check_error(struct mem_ctl_info *mci)
695{
696 struct i5400_error_info info;
697 debugf4("MC%d: " __FILE__ ": %s()\n", mci->mc_idx, __func__);
698 i5400_get_error_info(mci, &info);
699 i5400_process_error_info(mci, &info);
700}
701
702/*
703 * i5400_put_devices 'put' all the devices that we have
704 * reserved via 'get'
705 */
706static void i5400_put_devices(struct mem_ctl_info *mci)
707{
708 struct i5400_pvt *pvt;
709
710 pvt = mci->pvt_info;
711
712 /* Decrement usage count for devices */
713 pci_dev_put(pvt->branch_1);
714 pci_dev_put(pvt->branch_0);
715 pci_dev_put(pvt->fsb_error_regs);
716 pci_dev_put(pvt->branchmap_werrors);
717}
718
719/*
720 * i5400_get_devices Find and perform 'get' operation on the MCH's
721 * device/functions we want to reference for this driver
722 *
723 * Need to 'get' device 16 func 1 and func 2
724 */
725static int i5400_get_devices(struct mem_ctl_info *mci, int dev_idx)
726{
727 struct i5400_pvt *pvt;
728 struct pci_dev *pdev;
729
730 pvt = mci->pvt_info;
731 pvt->branchmap_werrors = NULL;
732 pvt->fsb_error_regs = NULL;
733 pvt->branch_0 = NULL;
734 pvt->branch_1 = NULL;
735
736 /* Attempt to 'get' the MCH register we want */
737 pdev = NULL;
738 while (!pvt->branchmap_werrors || !pvt->fsb_error_regs) {
739 pdev = pci_get_device(PCI_VENDOR_ID_INTEL,
740 PCI_DEVICE_ID_INTEL_5400_ERR, pdev);
741 if (!pdev) {
742 /* End of list, leave */
743 i5400_printk(KERN_ERR,
744 "'system address,Process Bus' "
745 "device not found:"
746 "vendor 0x%x device 0x%x ERR funcs "
747 "(broken BIOS?)\n",
748 PCI_VENDOR_ID_INTEL,
749 PCI_DEVICE_ID_INTEL_5400_ERR);
750 goto error;
751 }
752
753 /* Store device 16 funcs 1 and 2 */
754 switch (PCI_FUNC(pdev->devfn)) {
755 case 1:
756 pvt->branchmap_werrors = pdev;
757 break;
758 case 2:
759 pvt->fsb_error_regs = pdev;
760 break;
761 }
762 }
763
764 debugf1("System Address, processor bus- PCI Bus ID: %s %x:%x\n",
765 pci_name(pvt->system_address),
766 pvt->system_address->vendor, pvt->system_address->device);
767 debugf1("Branchmap, control and errors - PCI Bus ID: %s %x:%x\n",
768 pci_name(pvt->branchmap_werrors),
769 pvt->branchmap_werrors->vendor, pvt->branchmap_werrors->device);
770 debugf1("FSB Error Regs - PCI Bus ID: %s %x:%x\n",
771 pci_name(pvt->fsb_error_regs),
772 pvt->fsb_error_regs->vendor, pvt->fsb_error_regs->device);
773
774 pvt->branch_0 = pci_get_device(PCI_VENDOR_ID_INTEL,
775 PCI_DEVICE_ID_INTEL_5400_FBD0, NULL);
776 if (!pvt->branch_0) {
777 i5400_printk(KERN_ERR,
778 "MC: 'BRANCH 0' device not found:"
779 "vendor 0x%x device 0x%x Func 0 (broken BIOS?)\n",
780 PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_5400_FBD0);
781 goto error;
782 }
783
784 /* If this device claims to have more than 2 channels then
785 * fetch Branch 1's information
786 */
787 if (pvt->maxch < CHANNELS_PER_BRANCH)
788 return 0;
789
790 pvt->branch_1 = pci_get_device(PCI_VENDOR_ID_INTEL,
791 PCI_DEVICE_ID_INTEL_5400_FBD1, NULL);
792 if (!pvt->branch_1) {
793 i5400_printk(KERN_ERR,
794 "MC: 'BRANCH 1' device not found:"
795 "vendor 0x%x device 0x%x Func 0 "
796 "(broken BIOS?)\n",
797 PCI_VENDOR_ID_INTEL,
798 PCI_DEVICE_ID_INTEL_5400_FBD1);
799 goto error;
800 }
801
802 return 0;
803
804error:
805 i5400_put_devices(mci);
806 return -ENODEV;
807}
808
809/*
810 * determine_amb_present
811 *
812 * the information is contained in NUM_MTRS_PER_BRANCH different
813 * registers determining which of the NUM_MTRS_PER_BRANCH requires
814 * knowing which channel is in question
815 *
816 * 2 branches, each with 2 channels
817 * b0_ambpresent0 for channel '0'
818 * b0_ambpresent1 for channel '1'
819 * b1_ambpresent0 for channel '2'
820 * b1_ambpresent1 for channel '3'
821 */
822static int determine_amb_present_reg(struct i5400_pvt *pvt, int channel)
823{
824 int amb_present;
825
826 if (channel < CHANNELS_PER_BRANCH) {
827 if (channel & 0x1)
828 amb_present = pvt->b0_ambpresent1;
829 else
830 amb_present = pvt->b0_ambpresent0;
831 } else {
832 if (channel & 0x1)
833 amb_present = pvt->b1_ambpresent1;
834 else
835 amb_present = pvt->b1_ambpresent0;
836 }
837
838 return amb_present;
839}
840
841/*
842 * determine_mtr(pvt, csrow, channel)
843 *
844 * return the proper MTR register as determine by the csrow and desired channel
845 */
846static int determine_mtr(struct i5400_pvt *pvt, int csrow, int channel)
847{
848 int mtr;
849 int n;
850
851 /* There is one MTR for each slot pair of FB-DIMMs,
852 Each slot may have one or two ranks (2 csrows),
853 Each slot pair may be at branch 0 or branch 1.
854 So, csrow should be divided by eight
855 */
856 n = csrow >> 3;
857
858 if (n >= NUM_MTRS_PER_BRANCH) {
859 debugf0("ERROR: trying to access an invalid csrow: %d\n",
860 csrow);
861 return 0;
862 }
863
864 if (channel < CHANNELS_PER_BRANCH)
865 mtr = pvt->b0_mtr[n];
866 else
867 mtr = pvt->b1_mtr[n];
868
869 return mtr;
870}
871
872/*
873 */
874static void decode_mtr(int slot_row, u16 mtr)
875{
876 int ans;
877
878 ans = MTR_DIMMS_PRESENT(mtr);
879
880 debugf2("\tMTR%d=0x%x: DIMMs are %s\n", slot_row, mtr,
881 ans ? "Present" : "NOT Present");
882 if (!ans)
883 return;
884
885 debugf2("\t\tWIDTH: x%d\n", MTR_DRAM_WIDTH(mtr));
886
887 debugf2("\t\tELECTRICAL THROTTLING is %s\n",
888 MTR_DIMMS_ETHROTTLE(mtr) ? "enabled" : "disabled");
889
890 debugf2("\t\tNUMBANK: %d bank(s)\n", MTR_DRAM_BANKS(mtr));
891 debugf2("\t\tNUMRANK: %s\n", MTR_DIMM_RANK(mtr) ? "double" : "single");
892 debugf2("\t\tNUMROW: %s\n", numrow_toString[MTR_DIMM_ROWS(mtr)]);
893 debugf2("\t\tNUMCOL: %s\n", numcol_toString[MTR_DIMM_COLS(mtr)]);
894}
895
896static void handle_channel(struct i5400_pvt *pvt, int csrow, int channel,
897 struct i5400_dimm_info *dinfo)
898{
899 int mtr;
900 int amb_present_reg;
901 int addrBits;
902
903 mtr = determine_mtr(pvt, csrow, channel);
904 if (MTR_DIMMS_PRESENT(mtr)) {
905 amb_present_reg = determine_amb_present_reg(pvt, channel);
906
907 /* Determine if there is a DIMM present in this DIMM slot */
908 if (amb_present_reg & (1 << (csrow >> 1))) {
909 dinfo->dual_rank = MTR_DIMM_RANK(mtr);
910
911 if (!((dinfo->dual_rank == 0) &&
912 ((csrow & 0x1) == 0x1))) {
913 /* Start with the number of bits for a Bank
914 * on the DRAM */
915 addrBits = MTR_DRAM_BANKS_ADDR_BITS(mtr);
916 /* Add thenumber of ROW bits */
917 addrBits += MTR_DIMM_ROWS_ADDR_BITS(mtr);
918 /* add the number of COLUMN bits */
919 addrBits += MTR_DIMM_COLS_ADDR_BITS(mtr);
920
921 addrBits += 6; /* add 64 bits per DIMM */
922 addrBits -= 20; /* divide by 2^^20 */
923 addrBits -= 3; /* 8 bits per bytes */
924
925 dinfo->megabytes = 1 << addrBits;
926 }
927 }
928 }
929}
930
931/*
932 * calculate_dimm_size
933 *
934 * also will output a DIMM matrix map, if debug is enabled, for viewing
935 * how the DIMMs are populated
936 */
937static void calculate_dimm_size(struct i5400_pvt *pvt)
938{
939 struct i5400_dimm_info *dinfo;
940 int csrow, max_csrows;
941 char *p, *mem_buffer;
942 int space, n;
943 int channel;
944
945 /* ================= Generate some debug output ================= */
946 space = PAGE_SIZE;
947 mem_buffer = p = kmalloc(space, GFP_KERNEL);
948 if (p == NULL) {
949 i5400_printk(KERN_ERR, "MC: %s:%s() kmalloc() failed\n",
950 __FILE__, __func__);
951 return;
952 }
953
954 /* Scan all the actual CSROWS (which is # of DIMMS * 2)
955 * and calculate the information for each DIMM
956 * Start with the highest csrow first, to display it first
957 * and work toward the 0th csrow
958 */
959 max_csrows = pvt->maxdimmperch * 2;
960 for (csrow = max_csrows - 1; csrow >= 0; csrow--) {
961
962 /* on an odd csrow, first output a 'boundary' marker,
963 * then reset the message buffer */
964 if (csrow & 0x1) {
965 n = snprintf(p, space, "---------------------------"
966 "--------------------------------");
967 p += n;
968 space -= n;
969 debugf2("%s\n", mem_buffer);
970 p = mem_buffer;
971 space = PAGE_SIZE;
972 }
973 n = snprintf(p, space, "csrow %2d ", csrow);
974 p += n;
975 space -= n;
976
977 for (channel = 0; channel < pvt->maxch; channel++) {
978 dinfo = &pvt->dimm_info[csrow][channel];
979 handle_channel(pvt, csrow, channel, dinfo);
980 n = snprintf(p, space, "%4d MB | ", dinfo->megabytes);
981 p += n;
982 space -= n;
983 }
984 debugf2("%s\n", mem_buffer);
985 p = mem_buffer;
986 space = PAGE_SIZE;
987 }
988
989 /* Output the last bottom 'boundary' marker */
990 n = snprintf(p, space, "---------------------------"
991 "--------------------------------");
992 p += n;
993 space -= n;
994 debugf2("%s\n", mem_buffer);
995 p = mem_buffer;
996 space = PAGE_SIZE;
997
998 /* now output the 'channel' labels */
999 n = snprintf(p, space, " ");
1000 p += n;
1001 space -= n;
1002 for (channel = 0; channel < pvt->maxch; channel++) {
1003 n = snprintf(p, space, "channel %d | ", channel);
1004 p += n;
1005 space -= n;
1006 }
1007
1008 /* output the last message and free buffer */
1009 debugf2("%s\n", mem_buffer);
1010 kfree(mem_buffer);
1011}
1012
1013/*
1014 * i5400_get_mc_regs read in the necessary registers and
1015 * cache locally
1016 *
1017 * Fills in the private data members
1018 */
1019static void i5400_get_mc_regs(struct mem_ctl_info *mci)
1020{
1021 struct i5400_pvt *pvt;
1022 u32 actual_tolm;
1023 u16 limit;
1024 int slot_row;
1025 int maxch;
1026 int maxdimmperch;
1027 int way0, way1;
1028
1029 pvt = mci->pvt_info;
1030
1031 pci_read_config_dword(pvt->system_address, AMBASE,
1032 (u32 *) &pvt->ambase);
1033 pci_read_config_dword(pvt->system_address, AMBASE + sizeof(u32),
1034 ((u32 *) &pvt->ambase) + sizeof(u32));
1035
1036 maxdimmperch = pvt->maxdimmperch;
1037 maxch = pvt->maxch;
1038
1039 debugf2("AMBASE= 0x%lx MAXCH= %d MAX-DIMM-Per-CH= %d\n",
1040 (long unsigned int)pvt->ambase, pvt->maxch, pvt->maxdimmperch);
1041
1042 /* Get the Branch Map regs */
1043 pci_read_config_word(pvt->branchmap_werrors, TOLM, &pvt->tolm);
1044 pvt->tolm >>= 12;
1045 debugf2("\nTOLM (number of 256M regions) =%u (0x%x)\n", pvt->tolm,
1046 pvt->tolm);
1047
1048 actual_tolm = (u32) ((1000l * pvt->tolm) >> (30 - 28));
1049 debugf2("Actual TOLM byte addr=%u.%03u GB (0x%x)\n",
1050 actual_tolm/1000, actual_tolm % 1000, pvt->tolm << 28);
1051
1052 pci_read_config_word(pvt->branchmap_werrors, MIR0, &pvt->mir0);
1053 pci_read_config_word(pvt->branchmap_werrors, MIR1, &pvt->mir1);
1054
1055 /* Get the MIR[0-1] regs */
1056 limit = (pvt->mir0 >> 4) & 0x0fff;
1057 way0 = pvt->mir0 & 0x1;
1058 way1 = pvt->mir0 & 0x2;
1059 debugf2("MIR0: limit= 0x%x WAY1= %u WAY0= %x\n", limit, way1, way0);
1060 limit = (pvt->mir1 >> 4) & 0xfff;
1061 way0 = pvt->mir1 & 0x1;
1062 way1 = pvt->mir1 & 0x2;
1063 debugf2("MIR1: limit= 0x%x WAY1= %u WAY0= %x\n", limit, way1, way0);
1064
1065 /* Get the set of MTR[0-3] regs by each branch */
1066 for (slot_row = 0; slot_row < NUM_MTRS_PER_BRANCH; slot_row++) {
1067 int where = MTR0 + (slot_row * sizeof(u32));
1068
1069 /* Branch 0 set of MTR registers */
1070 pci_read_config_word(pvt->branch_0, where,
1071 &pvt->b0_mtr[slot_row]);
1072
1073 debugf2("MTR%d where=0x%x B0 value=0x%x\n", slot_row, where,
1074 pvt->b0_mtr[slot_row]);
1075
1076 if (pvt->maxch < CHANNELS_PER_BRANCH) {
1077 pvt->b1_mtr[slot_row] = 0;
1078 continue;
1079 }
1080
1081 /* Branch 1 set of MTR registers */
1082 pci_read_config_word(pvt->branch_1, where,
1083 &pvt->b1_mtr[slot_row]);
1084 debugf2("MTR%d where=0x%x B1 value=0x%x\n", slot_row, where,
1085 pvt->b1_mtr[slot_row]);
1086 }
1087
1088 /* Read and dump branch 0's MTRs */
1089 debugf2("\nMemory Technology Registers:\n");
1090 debugf2(" Branch 0:\n");
1091 for (slot_row = 0; slot_row < NUM_MTRS_PER_BRANCH; slot_row++)
1092 decode_mtr(slot_row, pvt->b0_mtr[slot_row]);
1093
1094 pci_read_config_word(pvt->branch_0, AMBPRESENT_0,
1095 &pvt->b0_ambpresent0);
1096 debugf2("\t\tAMB-Branch 0-present0 0x%x:\n", pvt->b0_ambpresent0);
1097 pci_read_config_word(pvt->branch_0, AMBPRESENT_1,
1098 &pvt->b0_ambpresent1);
1099 debugf2("\t\tAMB-Branch 0-present1 0x%x:\n", pvt->b0_ambpresent1);
1100
1101 /* Only if we have 2 branchs (4 channels) */
1102 if (pvt->maxch < CHANNELS_PER_BRANCH) {
1103 pvt->b1_ambpresent0 = 0;
1104 pvt->b1_ambpresent1 = 0;
1105 } else {
1106 /* Read and dump branch 1's MTRs */
1107 debugf2(" Branch 1:\n");
1108 for (slot_row = 0; slot_row < NUM_MTRS_PER_BRANCH; slot_row++)
1109 decode_mtr(slot_row, pvt->b1_mtr[slot_row]);
1110
1111 pci_read_config_word(pvt->branch_1, AMBPRESENT_0,
1112 &pvt->b1_ambpresent0);
1113 debugf2("\t\tAMB-Branch 1-present0 0x%x:\n",
1114 pvt->b1_ambpresent0);
1115 pci_read_config_word(pvt->branch_1, AMBPRESENT_1,
1116 &pvt->b1_ambpresent1);
1117 debugf2("\t\tAMB-Branch 1-present1 0x%x:\n",
1118 pvt->b1_ambpresent1);
1119 }
1120
1121 /* Go and determine the size of each DIMM and place in an
1122 * orderly matrix */
1123 calculate_dimm_size(pvt);
1124}
1125
1126/*
1127 * i5400_init_csrows Initialize the 'csrows' table within
1128 * the mci control structure with the
1129 * addressing of memory.
1130 *
1131 * return:
1132 * 0 success
1133 * 1 no actual memory found on this MC
1134 */
1135static int i5400_init_csrows(struct mem_ctl_info *mci)
1136{
1137 struct i5400_pvt *pvt;
1138 struct csrow_info *p_csrow;
1139 int empty, channel_count;
1140 int max_csrows;
1141 int mtr;
1142 int csrow_megs;
1143 int channel;
1144 int csrow;
1145
1146 pvt = mci->pvt_info;
1147
1148 channel_count = pvt->maxch;
1149 max_csrows = pvt->maxdimmperch * 2;
1150
1151 empty = 1; /* Assume NO memory */
1152
1153 for (csrow = 0; csrow < max_csrows; csrow++) {
1154 p_csrow = &mci->csrows[csrow];
1155
1156 p_csrow->csrow_idx = csrow;
1157
1158 /* use branch 0 for the basis */
1159 mtr = determine_mtr(pvt, csrow, 0);
1160
1161 /* if no DIMMS on this row, continue */
1162 if (!MTR_DIMMS_PRESENT(mtr))
1163 continue;
1164
1165 /* FAKE OUT VALUES, FIXME */
1166 p_csrow->first_page = 0 + csrow * 20;
1167 p_csrow->last_page = 9 + csrow * 20;
1168 p_csrow->page_mask = 0xFFF;
1169
1170 p_csrow->grain = 8;
1171
1172 csrow_megs = 0;
1173 for (channel = 0; channel < pvt->maxch; channel++)
1174 csrow_megs += pvt->dimm_info[csrow][channel].megabytes;
1175
1176 p_csrow->nr_pages = csrow_megs << 8;
1177
1178 /* Assume DDR2 for now */
1179 p_csrow->mtype = MEM_FB_DDR2;
1180
1181 /* ask what device type on this row */
1182 if (MTR_DRAM_WIDTH(mtr))
1183 p_csrow->dtype = DEV_X8;
1184 else
1185 p_csrow->dtype = DEV_X4;
1186
1187 p_csrow->edac_mode = EDAC_S8ECD8ED;
1188
1189 empty = 0;
1190 }
1191
1192 return empty;
1193}
1194
1195/*
1196 * i5400_enable_error_reporting
1197 * Turn on the memory reporting features of the hardware
1198 */
1199static void i5400_enable_error_reporting(struct mem_ctl_info *mci)
1200{
1201 struct i5400_pvt *pvt;
1202 u32 fbd_error_mask;
1203
1204 pvt = mci->pvt_info;
1205
1206 /* Read the FBD Error Mask Register */
1207 pci_read_config_dword(pvt->branchmap_werrors, EMASK_FBD,
1208 &fbd_error_mask);
1209
1210 /* Enable with a '0' */
1211 fbd_error_mask &= ~(ENABLE_EMASK_ALL);
1212
1213 pci_write_config_dword(pvt->branchmap_werrors, EMASK_FBD,
1214 fbd_error_mask);
1215}
1216
1217/*
1218 * i5400_get_dimm_and_channel_counts(pdev, &num_csrows, &num_channels)
1219 *
1220 * ask the device how many channels are present and how many CSROWS
1221 * as well
1222 */
1223static void i5400_get_dimm_and_channel_counts(struct pci_dev *pdev,
1224 int *num_dimms_per_channel,
1225 int *num_channels)
1226{
1227 u8 value;
1228
1229 /* Need to retrieve just how many channels and dimms per channel are
1230 * supported on this memory controller
1231 */
1232 pci_read_config_byte(pdev, MAXDIMMPERCH, &value);
1233 *num_dimms_per_channel = (int)value * 2;
1234
1235 pci_read_config_byte(pdev, MAXCH, &value);
1236 *num_channels = (int)value;
1237}
1238
1239/*
1240 * i5400_probe1 Probe for ONE instance of device to see if it is
1241 * present.
1242 * return:
1243 * 0 for FOUND a device
1244 * < 0 for error code
1245 */
1246static int i5400_probe1(struct pci_dev *pdev, int dev_idx)
1247{
1248 struct mem_ctl_info *mci;
1249 struct i5400_pvt *pvt;
1250 int num_channels;
1251 int num_dimms_per_channel;
1252 int num_csrows;
1253
1254 if (dev_idx >= ARRAY_SIZE(i5400_devs))
1255 return -EINVAL;
1256
1257 debugf0("MC: " __FILE__ ": %s(), pdev bus %u dev=0x%x fn=0x%x\n",
1258 __func__,
1259 pdev->bus->number,
1260 PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn));
1261
1262 /* We only are looking for func 0 of the set */
1263 if (PCI_FUNC(pdev->devfn) != 0)
1264 return -ENODEV;
1265
1266 /* Ask the devices for the number of CSROWS and CHANNELS so
1267 * that we can calculate the memory resources, etc
1268 *
1269 * The Chipset will report what it can handle which will be greater
1270 * or equal to what the motherboard manufacturer will implement.
1271 *
1272 * As we don't have a motherboard identification routine to determine
1273 * actual number of slots/dimms per channel, we thus utilize the
1274 * resource as specified by the chipset. Thus, we might have
1275 * have more DIMMs per channel than actually on the mobo, but this
1276 * allows the driver to support upto the chipset max, without
1277 * some fancy mobo determination.
1278 */
1279 i5400_get_dimm_and_channel_counts(pdev, &num_dimms_per_channel,
1280 &num_channels);
1281 num_csrows = num_dimms_per_channel * 2;
1282
1283 debugf0("MC: %s(): Number of - Channels= %d DIMMS= %d CSROWS= %d\n",
1284 __func__, num_channels, num_dimms_per_channel, num_csrows);
1285
1286 /* allocate a new MC control structure */
1287 mci = edac_mc_alloc(sizeof(*pvt), num_csrows, num_channels, 0);
1288
1289 if (mci == NULL)
1290 return -ENOMEM;
1291
1292 debugf0("MC: " __FILE__ ": %s(): mci = %p\n", __func__, mci);
1293
1294 mci->dev = &pdev->dev; /* record ptr to the generic device */
1295
1296 pvt = mci->pvt_info;
1297 pvt->system_address = pdev; /* Record this device in our private */
1298 pvt->maxch = num_channels;
1299 pvt->maxdimmperch = num_dimms_per_channel;
1300
1301 /* 'get' the pci devices we want to reserve for our use */
1302 if (i5400_get_devices(mci, dev_idx))
1303 goto fail0;
1304
1305 /* Time to get serious */
1306 i5400_get_mc_regs(mci); /* retrieve the hardware registers */
1307
1308 mci->mc_idx = 0;
1309 mci->mtype_cap = MEM_FLAG_FB_DDR2;
1310 mci->edac_ctl_cap = EDAC_FLAG_NONE;
1311 mci->edac_cap = EDAC_FLAG_NONE;
1312 mci->mod_name = "i5400_edac.c";
1313 mci->mod_ver = I5400_REVISION;
1314 mci->ctl_name = i5400_devs[dev_idx].ctl_name;
1315 mci->dev_name = pci_name(pdev);
1316 mci->ctl_page_to_phys = NULL;
1317
1318 /* Set the function pointer to an actual operation function */
1319 mci->edac_check = i5400_check_error;
1320
1321 /* initialize the MC control structure 'csrows' table
1322 * with the mapping and control information */
1323 if (i5400_init_csrows(mci)) {
1324 debugf0("MC: Setting mci->edac_cap to EDAC_FLAG_NONE\n"
1325 " because i5400_init_csrows() returned nonzero "
1326 "value\n");
1327 mci->edac_cap = EDAC_FLAG_NONE; /* no csrows found */
1328 } else {
1329 debugf1("MC: Enable error reporting now\n");
1330 i5400_enable_error_reporting(mci);
1331 }
1332
1333 /* add this new MC control structure to EDAC's list of MCs */
1334 if (edac_mc_add_mc(mci)) {
1335 debugf0("MC: " __FILE__
1336 ": %s(): failed edac_mc_add_mc()\n", __func__);
1337 /* FIXME: perhaps some code should go here that disables error
1338 * reporting if we just enabled it
1339 */
1340 goto fail1;
1341 }
1342
1343 i5400_clear_error(mci);
1344
1345 /* allocating generic PCI control info */
1346 i5400_pci = edac_pci_create_generic_ctl(&pdev->dev, EDAC_MOD_STR);
1347 if (!i5400_pci) {
1348 printk(KERN_WARNING
1349 "%s(): Unable to create PCI control\n",
1350 __func__);
1351 printk(KERN_WARNING
1352 "%s(): PCI error report via EDAC not setup\n",
1353 __func__);
1354 }
1355
1356 return 0;
1357
1358 /* Error exit unwinding stack */
1359fail1:
1360
1361 i5400_put_devices(mci);
1362
1363fail0:
1364 edac_mc_free(mci);
1365 return -ENODEV;
1366}
1367
1368/*
1369 * i5400_init_one constructor for one instance of device
1370 *
1371 * returns:
1372 * negative on error
1373 * count (>= 0)
1374 */
1375static int __devinit i5400_init_one(struct pci_dev *pdev,
1376 const struct pci_device_id *id)
1377{
1378 int rc;
1379
1380 debugf0("MC: " __FILE__ ": %s()\n", __func__);
1381
1382 /* wake up device */
1383 rc = pci_enable_device(pdev);
1384 if (rc == -EIO)
1385 return rc;
1386
1387 /* now probe and enable the device */
1388 return i5400_probe1(pdev, id->driver_data);
1389}
1390
1391/*
1392 * i5400_remove_one destructor for one instance of device
1393 *
1394 */
1395static void __devexit i5400_remove_one(struct pci_dev *pdev)
1396{
1397 struct mem_ctl_info *mci;
1398
1399 debugf0(__FILE__ ": %s()\n", __func__);
1400
1401 if (i5400_pci)
1402 edac_pci_release_generic_ctl(i5400_pci);
1403
1404 mci = edac_mc_del_mc(&pdev->dev);
1405 if (!mci)
1406 return;
1407
1408 /* retrieve references to resources, and free those resources */
1409 i5400_put_devices(mci);
1410
1411 edac_mc_free(mci);
1412}
1413
1414/*
1415 * pci_device_id table for which devices we are looking for
1416 *
1417 * The "E500P" device is the first device supported.
1418 */
1419static const struct pci_device_id i5400_pci_tbl[] __devinitdata = {
1420 {PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_5400_ERR)},
1421 {0,} /* 0 terminated list. */
1422};
1423
1424MODULE_DEVICE_TABLE(pci, i5400_pci_tbl);
1425
1426/*
1427 * i5400_driver pci_driver structure for this module
1428 *
1429 */
1430static struct pci_driver i5400_driver = {
1431 .name = "i5400_edac",
1432 .probe = i5400_init_one,
1433 .remove = __devexit_p(i5400_remove_one),
1434 .id_table = i5400_pci_tbl,
1435};
1436
1437/*
1438 * i5400_init Module entry function
1439 * Try to initialize this module for its devices
1440 */
1441static int __init i5400_init(void)
1442{
1443 int pci_rc;
1444
1445 debugf2("MC: " __FILE__ ": %s()\n", __func__);
1446
1447 /* Ensure that the OPSTATE is set correctly for POLL or NMI */
1448 opstate_init();
1449
1450 pci_rc = pci_register_driver(&i5400_driver);
1451
1452 return (pci_rc < 0) ? pci_rc : 0;
1453}
1454
1455/*
1456 * i5400_exit() Module exit function
1457 * Unregister the driver
1458 */
1459static void __exit i5400_exit(void)
1460{
1461 debugf2("MC: " __FILE__ ": %s()\n", __func__);
1462 pci_unregister_driver(&i5400_driver);
1463}
1464
1465module_init(i5400_init);
1466module_exit(i5400_exit);
1467
1468MODULE_LICENSE("GPL");
1469MODULE_AUTHOR("Ben Woodard <woodard@redhat.com>");
1470MODULE_AUTHOR("Mauro Carvalho Chehab <mchehab@redhat.com>");
1471MODULE_AUTHOR("Red Hat Inc. (http://www.redhat.com)");
1472MODULE_DESCRIPTION("MC Driver for Intel I5400 memory controllers - "
1473 I5400_REVISION);
1474
1475module_param(edac_op_state, int, 0444);
1476MODULE_PARM_DESC(edac_op_state, "EDAC Error Reporting state: 0=Poll,1=NMI");
diff --git a/drivers/edac/i82875p_edac.c b/drivers/edac/i82875p_edac.c
index ebb037b78758..b2d83b95033d 100644
--- a/drivers/edac/i82875p_edac.c
+++ b/drivers/edac/i82875p_edac.c
@@ -311,9 +311,7 @@ static int i82875p_setup_overfl_dev(struct pci_dev *pdev,
311 } 311 }
312 312
313 /* cache is irrelevant for PCI bus reads/writes */ 313 /* cache is irrelevant for PCI bus reads/writes */
314 window = ioremap_nocache(pci_resource_start(dev, 0), 314 window = pci_ioremap_bar(dev, 0);
315 pci_resource_len(dev, 0));
316
317 if (window == NULL) { 315 if (window == NULL) {
318 i82875p_printk(KERN_ERR, "%s(): Failed to ioremap bar6\n", 316 i82875p_printk(KERN_ERR, "%s(): Failed to ioremap bar6\n",
319 __func__); 317 __func__);
diff --git a/drivers/edac/mpc85xx_edac.c b/drivers/edac/mpc85xx_edac.c
index 0cfcb2d075a0..853ef37ec006 100644
--- a/drivers/edac/mpc85xx_edac.c
+++ b/drivers/edac/mpc85xx_edac.c
@@ -630,27 +630,22 @@ static int mpc85xx_l2_err_remove(struct of_device *op)
630} 630}
631 631
632static struct of_device_id mpc85xx_l2_err_of_match[] = { 632static struct of_device_id mpc85xx_l2_err_of_match[] = {
633 { 633/* deprecate the fsl,85.. forms in the future, 2.6.30? */
634 .compatible = "fsl,8540-l2-cache-controller", 634 { .compatible = "fsl,8540-l2-cache-controller", },
635 }, 635 { .compatible = "fsl,8541-l2-cache-controller", },
636 { 636 { .compatible = "fsl,8544-l2-cache-controller", },
637 .compatible = "fsl,8541-l2-cache-controller", 637 { .compatible = "fsl,8548-l2-cache-controller", },
638 }, 638 { .compatible = "fsl,8555-l2-cache-controller", },
639 { 639 { .compatible = "fsl,8568-l2-cache-controller", },
640 .compatible = "fsl,8544-l2-cache-controller", 640 { .compatible = "fsl,mpc8536-l2-cache-controller", },
641 }, 641 { .compatible = "fsl,mpc8540-l2-cache-controller", },
642 { 642 { .compatible = "fsl,mpc8541-l2-cache-controller", },
643 .compatible = "fsl,8548-l2-cache-controller", 643 { .compatible = "fsl,mpc8544-l2-cache-controller", },
644 }, 644 { .compatible = "fsl,mpc8548-l2-cache-controller", },
645 { 645 { .compatible = "fsl,mpc8555-l2-cache-controller", },
646 .compatible = "fsl,8555-l2-cache-controller", 646 { .compatible = "fsl,mpc8560-l2-cache-controller", },
647 }, 647 { .compatible = "fsl,mpc8568-l2-cache-controller", },
648 { 648 { .compatible = "fsl,mpc8572-l2-cache-controller", },
649 .compatible = "fsl,8568-l2-cache-controller",
650 },
651 {
652 .compatible = "fsl,mpc8572-l2-cache-controller",
653 },
654 {}, 649 {},
655}; 650};
656 651
@@ -967,27 +962,22 @@ static int mpc85xx_mc_err_remove(struct of_device *op)
967} 962}
968 963
969static struct of_device_id mpc85xx_mc_err_of_match[] = { 964static struct of_device_id mpc85xx_mc_err_of_match[] = {
970 { 965/* deprecate the fsl,85.. forms in the future, 2.6.30? */
971 .compatible = "fsl,8540-memory-controller", 966 { .compatible = "fsl,8540-memory-controller", },
972 }, 967 { .compatible = "fsl,8541-memory-controller", },
973 { 968 { .compatible = "fsl,8544-memory-controller", },
974 .compatible = "fsl,8541-memory-controller", 969 { .compatible = "fsl,8548-memory-controller", },
975 }, 970 { .compatible = "fsl,8555-memory-controller", },
976 { 971 { .compatible = "fsl,8568-memory-controller", },
977 .compatible = "fsl,8544-memory-controller", 972 { .compatible = "fsl,mpc8536-memory-controller", },
978 }, 973 { .compatible = "fsl,mpc8540-memory-controller", },
979 { 974 { .compatible = "fsl,mpc8541-memory-controller", },
980 .compatible = "fsl,8548-memory-controller", 975 { .compatible = "fsl,mpc8544-memory-controller", },
981 }, 976 { .compatible = "fsl,mpc8548-memory-controller", },
982 { 977 { .compatible = "fsl,mpc8555-memory-controller", },
983 .compatible = "fsl,8555-memory-controller", 978 { .compatible = "fsl,mpc8560-memory-controller", },
984 }, 979 { .compatible = "fsl,mpc8568-memory-controller", },
985 { 980 { .compatible = "fsl,mpc8572-memory-controller", },
986 .compatible = "fsl,8568-memory-controller",
987 },
988 {
989 .compatible = "fsl,mpc8572-memory-controller",
990 },
991 {}, 981 {},
992}; 982};
993 983
diff --git a/drivers/firmware/dmi_scan.c b/drivers/firmware/dmi_scan.c
index 78b989d202a3..d76adfea5df7 100644
--- a/drivers/firmware/dmi_scan.c
+++ b/drivers/firmware/dmi_scan.c
@@ -468,8 +468,8 @@ const char *dmi_get_system_info(int field)
468EXPORT_SYMBOL(dmi_get_system_info); 468EXPORT_SYMBOL(dmi_get_system_info);
469 469
470/** 470/**
471 * dmi_name_in_serial - Check if string is in the DMI product serial 471 * dmi_name_in_serial - Check if string is in the DMI product serial information
472 * information. 472 * @str: string to check for
473 */ 473 */
474int dmi_name_in_serial(const char *str) 474int dmi_name_in_serial(const char *str)
475{ 475{
@@ -585,6 +585,8 @@ EXPORT_SYMBOL_GPL(dmi_walk);
585 585
586/** 586/**
587 * dmi_match - compare a string to the dmi field (if exists) 587 * dmi_match - compare a string to the dmi field (if exists)
588 * @f: DMI field identifier
589 * @str: string to compare the DMI field to
588 * 590 *
589 * Returns true if the requested field equals to the str (including NULL). 591 * Returns true if the requested field equals to the str (including NULL).
590 */ 592 */
diff --git a/drivers/gpio/Kconfig b/drivers/gpio/Kconfig
index 48f49d93d249..3d2565441b36 100644
--- a/drivers/gpio/Kconfig
+++ b/drivers/gpio/Kconfig
@@ -95,7 +95,7 @@ config GPIO_MAX732X
95 number for these GPIOs. 95 number for these GPIOs.
96 96
97config GPIO_PCA953X 97config GPIO_PCA953X
98 tristate "PCA953x, PCA955x, and MAX7310 I/O ports" 98 tristate "PCA953x, PCA955x, TCA64xx, and MAX7310 I/O ports"
99 depends on I2C 99 depends on I2C
100 help 100 help
101 Say yes here to provide access to several register-oriented 101 Say yes here to provide access to several register-oriented
@@ -104,9 +104,10 @@ config GPIO_PCA953X
104 104
105 4 bits: pca9536, pca9537 105 4 bits: pca9536, pca9537
106 106
107 8 bits: max7310, pca9534, pca9538, pca9554, pca9557 107 8 bits: max7310, pca9534, pca9538, pca9554, pca9557,
108 tca6408
108 109
109 16 bits: pca9535, pca9539, pca9555 110 16 bits: pca9535, pca9539, pca9555, tca6416
110 111
111 This driver can also be built as a module. If so, the module 112 This driver can also be built as a module. If so, the module
112 will be called pca953x. 113 will be called pca953x.
diff --git a/drivers/gpio/pca953x.c b/drivers/gpio/pca953x.c
index 9ceeb89f1325..37f35388a2ae 100644
--- a/drivers/gpio/pca953x.c
+++ b/drivers/gpio/pca953x.c
@@ -33,7 +33,12 @@ static const struct i2c_device_id pca953x_id[] = {
33 { "pca9554", 8, }, 33 { "pca9554", 8, },
34 { "pca9555", 16, }, 34 { "pca9555", 16, },
35 { "pca9557", 8, }, 35 { "pca9557", 8, },
36
36 { "max7310", 8, }, 37 { "max7310", 8, },
38 { "pca6107", 8, },
39 { "tca6408", 8, },
40 { "tca6416", 16, },
41 /* NYET: { "tca6424", 24, }, */
37 { } 42 { }
38}; 43};
39MODULE_DEVICE_TABLE(i2c, pca953x_id); 44MODULE_DEVICE_TABLE(i2c, pca953x_id);
@@ -47,9 +52,6 @@ struct pca953x_chip {
47 struct gpio_chip gpio_chip; 52 struct gpio_chip gpio_chip;
48}; 53};
49 54
50/* NOTE: we can't currently rely on fault codes to come from SMBus
51 * calls, so we map all errors to EIO here and return zero otherwise.
52 */
53static int pca953x_write_reg(struct pca953x_chip *chip, int reg, uint16_t val) 55static int pca953x_write_reg(struct pca953x_chip *chip, int reg, uint16_t val)
54{ 56{
55 int ret; 57 int ret;
@@ -61,7 +63,7 @@ static int pca953x_write_reg(struct pca953x_chip *chip, int reg, uint16_t val)
61 63
62 if (ret < 0) { 64 if (ret < 0) {
63 dev_err(&chip->client->dev, "failed writing register\n"); 65 dev_err(&chip->client->dev, "failed writing register\n");
64 return -EIO; 66 return ret;
65 } 67 }
66 68
67 return 0; 69 return 0;
@@ -78,7 +80,7 @@ static int pca953x_read_reg(struct pca953x_chip *chip, int reg, uint16_t *val)
78 80
79 if (ret < 0) { 81 if (ret < 0) {
80 dev_err(&chip->client->dev, "failed reading register\n"); 82 dev_err(&chip->client->dev, "failed reading register\n");
81 return -EIO; 83 return ret;
82 } 84 }
83 85
84 *val = (uint16_t)ret; 86 *val = (uint16_t)ret;
diff --git a/drivers/gpio/twl4030-gpio.c b/drivers/gpio/twl4030-gpio.c
index 37d3eec8730a..afad14792141 100644
--- a/drivers/gpio/twl4030-gpio.c
+++ b/drivers/gpio/twl4030-gpio.c
@@ -202,37 +202,6 @@ static int twl4030_get_gpio_datain(int gpio)
202 return ret; 202 return ret;
203} 203}
204 204
205/*
206 * Configure debounce timing value for a GPIO pin on TWL4030
207 */
208int twl4030_set_gpio_debounce(int gpio, int enable)
209{
210 u8 d_bnk = gpio >> 3;
211 u8 d_msk = BIT(gpio & 0x7);
212 u8 reg = 0;
213 u8 base = 0;
214 int ret = 0;
215
216 if (unlikely((gpio >= TWL4030_GPIO_MAX)
217 || !(gpio_usage_count & BIT(gpio))))
218 return -EPERM;
219
220 base = REG_GPIO_DEBEN1 + d_bnk;
221 mutex_lock(&gpio_lock);
222 ret = gpio_twl4030_read(base);
223 if (ret >= 0) {
224 if (enable)
225 reg = ret | d_msk;
226 else
227 reg = ret & ~d_msk;
228
229 ret = gpio_twl4030_write(base, reg);
230 }
231 mutex_unlock(&gpio_lock);
232 return ret;
233}
234EXPORT_SYMBOL(twl4030_set_gpio_debounce);
235
236/*----------------------------------------------------------------------*/ 205/*----------------------------------------------------------------------*/
237 206
238static int twl_request(struct gpio_chip *chip, unsigned offset) 207static int twl_request(struct gpio_chip *chip, unsigned offset)
@@ -405,6 +374,23 @@ static int __devinit gpio_twl4030_pulls(u32 ups, u32 downs)
405 REG_GPIOPUPDCTR1, 5); 374 REG_GPIOPUPDCTR1, 5);
406} 375}
407 376
377static int __devinit gpio_twl4030_debounce(u32 debounce, u8 mmc_cd)
378{
379 u8 message[4];
380
381 /* 30 msec of debouncing is always used for MMC card detect,
382 * and is optional for everything else.
383 */
384 message[1] = (debounce & 0xff) | (mmc_cd & 0x03);
385 debounce >>= 8;
386 message[2] = (debounce & 0xff);
387 debounce >>= 8;
388 message[3] = (debounce & 0x03);
389
390 return twl4030_i2c_write(TWL4030_MODULE_GPIO, message,
391 REG_GPIO_DEBEN1, 3);
392}
393
408static int gpio_twl4030_remove(struct platform_device *pdev); 394static int gpio_twl4030_remove(struct platform_device *pdev);
409 395
410static int __devinit gpio_twl4030_probe(struct platform_device *pdev) 396static int __devinit gpio_twl4030_probe(struct platform_device *pdev)
@@ -439,6 +425,12 @@ no_irqs:
439 pdata->pullups, pdata->pulldowns, 425 pdata->pullups, pdata->pulldowns,
440 ret); 426 ret);
441 427
428 ret = gpio_twl4030_debounce(pdata->debounce, pdata->mmc_cd);
429 if (ret)
430 dev_dbg(&pdev->dev, "debounce %.03x %.01x --> %d\n",
431 pdata->debounce, pdata->mmc_cd,
432 ret);
433
442 twl_gpiochip.base = pdata->gpio_base; 434 twl_gpiochip.base = pdata->gpio_base;
443 twl_gpiochip.ngpio = TWL4030_GPIO_MAX; 435 twl_gpiochip.ngpio = TWL4030_GPIO_MAX;
444 twl_gpiochip.dev = &pdev->dev; 436 twl_gpiochip.dev = &pdev->dev;
diff --git a/drivers/gpu/drm/drm_fops.c b/drivers/gpu/drm/drm_fops.c
index 3733e36d135e..b06a53715853 100644
--- a/drivers/gpu/drm/drm_fops.c
+++ b/drivers/gpu/drm/drm_fops.c
@@ -183,6 +183,10 @@ int drm_stub_open(struct inode *inode, struct file *filp)
183 183
184 old_fops = filp->f_op; 184 old_fops = filp->f_op;
185 filp->f_op = fops_get(&dev->driver->fops); 185 filp->f_op = fops_get(&dev->driver->fops);
186 if (filp->f_op == NULL) {
187 filp->f_op = old_fops;
188 goto out;
189 }
186 if (filp->f_op->open && (err = filp->f_op->open(inode, filp))) { 190 if (filp->f_op->open && (err = filp->f_op->open(inode, filp))) {
187 fops_put(filp->f_op); 191 fops_put(filp->f_op);
188 filp->f_op = fops_get(old_fops); 192 filp->f_op = fops_get(old_fops);
diff --git a/drivers/hwmon/adt7462.c b/drivers/hwmon/adt7462.c
index 66107b4dc12a..1852f27bac51 100644
--- a/drivers/hwmon/adt7462.c
+++ b/drivers/hwmon/adt7462.c
@@ -204,8 +204,6 @@ I2C_CLIENT_INSMOD_1(adt7462);
204#define MASK_AND_SHIFT(value, prefix) \ 204#define MASK_AND_SHIFT(value, prefix) \
205 (((value) & prefix##_MASK) >> prefix##_SHIFT) 205 (((value) & prefix##_MASK) >> prefix##_SHIFT)
206 206
207#define ROUND_DIV(x, divisor) (((x) + ((divisor) / 2)) / (divisor))
208
209struct adt7462_data { 207struct adt7462_data {
210 struct device *hwmon_dev; 208 struct device *hwmon_dev;
211 struct attribute_group attrs; 209 struct attribute_group attrs;
@@ -840,7 +838,7 @@ static ssize_t set_temp_min(struct device *dev,
840 if (strict_strtol(buf, 10, &temp) || !temp_enabled(data, attr->index)) 838 if (strict_strtol(buf, 10, &temp) || !temp_enabled(data, attr->index))
841 return -EINVAL; 839 return -EINVAL;
842 840
843 temp = ROUND_DIV(temp, 1000) + 64; 841 temp = DIV_ROUND_CLOSEST(temp, 1000) + 64;
844 temp = SENSORS_LIMIT(temp, 0, 255); 842 temp = SENSORS_LIMIT(temp, 0, 255);
845 843
846 mutex_lock(&data->lock); 844 mutex_lock(&data->lock);
@@ -878,7 +876,7 @@ static ssize_t set_temp_max(struct device *dev,
878 if (strict_strtol(buf, 10, &temp) || !temp_enabled(data, attr->index)) 876 if (strict_strtol(buf, 10, &temp) || !temp_enabled(data, attr->index))
879 return -EINVAL; 877 return -EINVAL;
880 878
881 temp = ROUND_DIV(temp, 1000) + 64; 879 temp = DIV_ROUND_CLOSEST(temp, 1000) + 64;
882 temp = SENSORS_LIMIT(temp, 0, 255); 880 temp = SENSORS_LIMIT(temp, 0, 255);
883 881
884 mutex_lock(&data->lock); 882 mutex_lock(&data->lock);
@@ -943,7 +941,7 @@ static ssize_t set_volt_max(struct device *dev,
943 return -EINVAL; 941 return -EINVAL;
944 942
945 temp *= 1000; /* convert mV to uV */ 943 temp *= 1000; /* convert mV to uV */
946 temp = ROUND_DIV(temp, x); 944 temp = DIV_ROUND_CLOSEST(temp, x);
947 temp = SENSORS_LIMIT(temp, 0, 255); 945 temp = SENSORS_LIMIT(temp, 0, 255);
948 946
949 mutex_lock(&data->lock); 947 mutex_lock(&data->lock);
@@ -985,7 +983,7 @@ static ssize_t set_volt_min(struct device *dev,
985 return -EINVAL; 983 return -EINVAL;
986 984
987 temp *= 1000; /* convert mV to uV */ 985 temp *= 1000; /* convert mV to uV */
988 temp = ROUND_DIV(temp, x); 986 temp = DIV_ROUND_CLOSEST(temp, x);
989 temp = SENSORS_LIMIT(temp, 0, 255); 987 temp = SENSORS_LIMIT(temp, 0, 255);
990 988
991 mutex_lock(&data->lock); 989 mutex_lock(&data->lock);
@@ -1250,7 +1248,7 @@ static ssize_t set_pwm_hyst(struct device *dev,
1250 if (strict_strtol(buf, 10, &temp)) 1248 if (strict_strtol(buf, 10, &temp))
1251 return -EINVAL; 1249 return -EINVAL;
1252 1250
1253 temp = ROUND_DIV(temp, 1000); 1251 temp = DIV_ROUND_CLOSEST(temp, 1000);
1254 temp = SENSORS_LIMIT(temp, 0, 15); 1252 temp = SENSORS_LIMIT(temp, 0, 15);
1255 1253
1256 /* package things up */ 1254 /* package things up */
@@ -1337,7 +1335,7 @@ static ssize_t set_pwm_tmin(struct device *dev,
1337 if (strict_strtol(buf, 10, &temp)) 1335 if (strict_strtol(buf, 10, &temp))
1338 return -EINVAL; 1336 return -EINVAL;
1339 1337
1340 temp = ROUND_DIV(temp, 1000) + 64; 1338 temp = DIV_ROUND_CLOSEST(temp, 1000) + 64;
1341 temp = SENSORS_LIMIT(temp, 0, 255); 1339 temp = SENSORS_LIMIT(temp, 0, 255);
1342 1340
1343 mutex_lock(&data->lock); 1341 mutex_lock(&data->lock);
diff --git a/drivers/hwmon/adt7470.c b/drivers/hwmon/adt7470.c
index 1311a595147e..633e1a1e9d79 100644
--- a/drivers/hwmon/adt7470.c
+++ b/drivers/hwmon/adt7470.c
@@ -28,6 +28,7 @@
28#include <linux/mutex.h> 28#include <linux/mutex.h>
29#include <linux/delay.h> 29#include <linux/delay.h>
30#include <linux/log2.h> 30#include <linux/log2.h>
31#include <linux/kthread.h>
31 32
32/* Addresses to scan */ 33/* Addresses to scan */
33static const unsigned short normal_i2c[] = { 0x2C, 0x2E, 0x2F, I2C_CLIENT_END }; 34static const unsigned short normal_i2c[] = { 0x2C, 0x2E, 0x2F, I2C_CLIENT_END };
@@ -74,6 +75,7 @@ I2C_CLIENT_INSMOD_1(adt7470);
74#define ADT7470_REG_PWM12_CFG 0x68 75#define ADT7470_REG_PWM12_CFG 0x68
75#define ADT7470_PWM2_AUTO_MASK 0x40 76#define ADT7470_PWM2_AUTO_MASK 0x40
76#define ADT7470_PWM1_AUTO_MASK 0x80 77#define ADT7470_PWM1_AUTO_MASK 0x80
78#define ADT7470_PWM_AUTO_MASK 0xC0
77#define ADT7470_REG_PWM34_CFG 0x69 79#define ADT7470_REG_PWM34_CFG 0x69
78#define ADT7470_PWM3_AUTO_MASK 0x40 80#define ADT7470_PWM3_AUTO_MASK 0x40
79#define ADT7470_PWM4_AUTO_MASK 0x80 81#define ADT7470_PWM4_AUTO_MASK 0x80
@@ -128,8 +130,11 @@ I2C_CLIENT_INSMOD_1(adt7470);
128/* How often do we reread sensor limit values? (In jiffies) */ 130/* How often do we reread sensor limit values? (In jiffies) */
129#define LIMIT_REFRESH_INTERVAL (60 * HZ) 131#define LIMIT_REFRESH_INTERVAL (60 * HZ)
130 132
131/* sleep 1s while gathering temperature data */ 133/* Wait at least 200ms per sensor for 10 sensors */
132#define TEMP_COLLECTION_TIME 1000 134#define TEMP_COLLECTION_TIME 2000
135
136/* auto update thing won't fire more than every 2s */
137#define AUTO_UPDATE_INTERVAL 2000
133 138
134/* datasheet says to divide this number by the fan reading to get fan rpm */ 139/* datasheet says to divide this number by the fan reading to get fan rpm */
135#define FAN_PERIOD_TO_RPM(x) ((90000 * 60) / (x)) 140#define FAN_PERIOD_TO_RPM(x) ((90000 * 60) / (x))
@@ -137,8 +142,6 @@ I2C_CLIENT_INSMOD_1(adt7470);
137#define FAN_PERIOD_INVALID 65535 142#define FAN_PERIOD_INVALID 65535
138#define FAN_DATA_VALID(x) ((x) && (x) != FAN_PERIOD_INVALID) 143#define FAN_DATA_VALID(x) ((x) && (x) != FAN_PERIOD_INVALID)
139 144
140#define ROUND_DIV(x, divisor) (((x) + ((divisor) / 2)) / (divisor))
141
142struct adt7470_data { 145struct adt7470_data {
143 struct device *hwmon_dev; 146 struct device *hwmon_dev;
144 struct attribute_group attrs; 147 struct attribute_group attrs;
@@ -148,6 +151,9 @@ struct adt7470_data {
148 unsigned long sensors_last_updated; /* In jiffies */ 151 unsigned long sensors_last_updated; /* In jiffies */
149 unsigned long limits_last_updated; /* In jiffies */ 152 unsigned long limits_last_updated; /* In jiffies */
150 153
154 int num_temp_sensors; /* -1 = probe */
155 int temperatures_probed;
156
151 s8 temp[ADT7470_TEMP_COUNT]; 157 s8 temp[ADT7470_TEMP_COUNT];
152 s8 temp_min[ADT7470_TEMP_COUNT]; 158 s8 temp_min[ADT7470_TEMP_COUNT];
153 s8 temp_max[ADT7470_TEMP_COUNT]; 159 s8 temp_max[ADT7470_TEMP_COUNT];
@@ -163,6 +169,10 @@ struct adt7470_data {
163 u8 pwm_min[ADT7470_PWM_COUNT]; 169 u8 pwm_min[ADT7470_PWM_COUNT];
164 s8 pwm_tmin[ADT7470_PWM_COUNT]; 170 s8 pwm_tmin[ADT7470_PWM_COUNT];
165 u8 pwm_auto_temp[ADT7470_PWM_COUNT]; 171 u8 pwm_auto_temp[ADT7470_PWM_COUNT];
172
173 struct task_struct *auto_update;
174 struct completion auto_update_stop;
175 unsigned int auto_update_interval;
166}; 176};
167 177
168static int adt7470_probe(struct i2c_client *client, 178static int adt7470_probe(struct i2c_client *client,
@@ -220,40 +230,126 @@ static void adt7470_init_client(struct i2c_client *client)
220 } 230 }
221} 231}
222 232
223static struct adt7470_data *adt7470_update_device(struct device *dev) 233/* Probe for temperature sensors. Assumes lock is held */
234static int adt7470_read_temperatures(struct i2c_client *client,
235 struct adt7470_data *data)
224{ 236{
225 struct i2c_client *client = to_i2c_client(dev); 237 unsigned long res;
226 struct adt7470_data *data = i2c_get_clientdata(client);
227 unsigned long local_jiffies = jiffies;
228 u8 cfg;
229 int i; 238 int i;
239 u8 cfg, pwm[4], pwm_cfg[2];
230 240
231 mutex_lock(&data->lock); 241 /* save pwm[1-4] config register */
232 if (time_before(local_jiffies, data->sensors_last_updated + 242 pwm_cfg[0] = i2c_smbus_read_byte_data(client, ADT7470_REG_PWM_CFG(0));
233 SENSOR_REFRESH_INTERVAL) 243 pwm_cfg[1] = i2c_smbus_read_byte_data(client, ADT7470_REG_PWM_CFG(2));
234 && data->sensors_valid) 244
235 goto no_sensor_update; 245 /* set manual pwm to whatever it is set to now */
246 for (i = 0; i < ADT7470_FAN_COUNT; i++)
247 pwm[i] = i2c_smbus_read_byte_data(client, ADT7470_REG_PWM(i));
248
249 /* put pwm in manual mode */
250 i2c_smbus_write_byte_data(client, ADT7470_REG_PWM_CFG(0),
251 pwm_cfg[0] & ~(ADT7470_PWM_AUTO_MASK));
252 i2c_smbus_write_byte_data(client, ADT7470_REG_PWM_CFG(2),
253 pwm_cfg[1] & ~(ADT7470_PWM_AUTO_MASK));
254
255 /* write pwm control to whatever it was */
256 for (i = 0; i < ADT7470_FAN_COUNT; i++)
257 i2c_smbus_write_byte_data(client, ADT7470_REG_PWM(i), pwm[i]);
236 258
237 /* start reading temperature sensors */ 259 /* start reading temperature sensors */
238 cfg = i2c_smbus_read_byte_data(client, ADT7470_REG_CFG); 260 cfg = i2c_smbus_read_byte_data(client, ADT7470_REG_CFG);
239 cfg |= 0x80; 261 cfg |= 0x80;
240 i2c_smbus_write_byte_data(client, ADT7470_REG_CFG, cfg); 262 i2c_smbus_write_byte_data(client, ADT7470_REG_CFG, cfg);
241 263
242 /* 264 /* Delay is 200ms * number of temp sensors. */
243 * Delay is 200ms * number of tmp05 sensors. Too bad 265 res = msleep_interruptible((data->num_temp_sensors >= 0 ?
244 * there's no way to figure out how many are connected. 266 data->num_temp_sensors * 200 :
245 * For now, assume 1s will work. 267 TEMP_COLLECTION_TIME));
246 */
247 msleep(TEMP_COLLECTION_TIME);
248 268
249 /* done reading temperature sensors */ 269 /* done reading temperature sensors */
250 cfg = i2c_smbus_read_byte_data(client, ADT7470_REG_CFG); 270 cfg = i2c_smbus_read_byte_data(client, ADT7470_REG_CFG);
251 cfg &= ~0x80; 271 cfg &= ~0x80;
252 i2c_smbus_write_byte_data(client, ADT7470_REG_CFG, cfg); 272 i2c_smbus_write_byte_data(client, ADT7470_REG_CFG, cfg);
253 273
254 for (i = 0; i < ADT7470_TEMP_COUNT; i++) 274 /* restore pwm[1-4] config registers */
275 i2c_smbus_write_byte_data(client, ADT7470_REG_PWM_CFG(0), pwm_cfg[0]);
276 i2c_smbus_write_byte_data(client, ADT7470_REG_PWM_CFG(2), pwm_cfg[1]);
277
278 if (res) {
279 printk(KERN_ERR "ha ha, interrupted");
280 return -EAGAIN;
281 }
282
283 /* Only count fans if we have to */
284 if (data->num_temp_sensors >= 0)
285 return 0;
286
287 for (i = 0; i < ADT7470_TEMP_COUNT; i++) {
255 data->temp[i] = i2c_smbus_read_byte_data(client, 288 data->temp[i] = i2c_smbus_read_byte_data(client,
256 ADT7470_TEMP_REG(i)); 289 ADT7470_TEMP_REG(i));
290 if (data->temp[i])
291 data->num_temp_sensors = i + 1;
292 }
293 data->temperatures_probed = 1;
294 return 0;
295}
296
297static int adt7470_update_thread(void *p)
298{
299 struct i2c_client *client = p;
300 struct adt7470_data *data = i2c_get_clientdata(client);
301
302 while (!kthread_should_stop()) {
303 mutex_lock(&data->lock);
304 adt7470_read_temperatures(client, data);
305 mutex_unlock(&data->lock);
306 if (kthread_should_stop())
307 break;
308 msleep_interruptible(data->auto_update_interval);
309 }
310
311 complete_all(&data->auto_update_stop);
312 return 0;
313}
314
315static struct adt7470_data *adt7470_update_device(struct device *dev)
316{
317 struct i2c_client *client = to_i2c_client(dev);
318 struct adt7470_data *data = i2c_get_clientdata(client);
319 unsigned long local_jiffies = jiffies;
320 u8 cfg;
321 int i;
322 int need_sensors = 1;
323 int need_limits = 1;
324
325 /*
326 * Figure out if we need to update the shadow registers.
327 * Lockless means that we may occasionally report out of
328 * date data.
329 */
330 if (time_before(local_jiffies, data->sensors_last_updated +
331 SENSOR_REFRESH_INTERVAL) &&
332 data->sensors_valid)
333 need_sensors = 0;
334
335 if (time_before(local_jiffies, data->limits_last_updated +
336 LIMIT_REFRESH_INTERVAL) &&
337 data->limits_valid)
338 need_limits = 0;
339
340 if (!need_sensors && !need_limits)
341 return data;
342
343 mutex_lock(&data->lock);
344 if (!need_sensors)
345 goto no_sensor_update;
346
347 if (!data->temperatures_probed)
348 adt7470_read_temperatures(client, data);
349 else
350 for (i = 0; i < ADT7470_TEMP_COUNT; i++)
351 data->temp[i] = i2c_smbus_read_byte_data(client,
352 ADT7470_TEMP_REG(i));
257 353
258 for (i = 0; i < ADT7470_FAN_COUNT; i++) 354 for (i = 0; i < ADT7470_FAN_COUNT; i++)
259 data->fan[i] = adt7470_read_word_data(client, 355 data->fan[i] = adt7470_read_word_data(client,
@@ -302,9 +398,7 @@ static struct adt7470_data *adt7470_update_device(struct device *dev)
302 data->sensors_valid = 1; 398 data->sensors_valid = 1;
303 399
304no_sensor_update: 400no_sensor_update:
305 if (time_before(local_jiffies, data->limits_last_updated + 401 if (!need_limits)
306 LIMIT_REFRESH_INTERVAL)
307 && data->limits_valid)
308 goto out; 402 goto out;
309 403
310 for (i = 0; i < ADT7470_TEMP_COUNT; i++) { 404 for (i = 0; i < ADT7470_TEMP_COUNT; i++) {
@@ -338,6 +432,66 @@ out:
338 return data; 432 return data;
339} 433}
340 434
435static ssize_t show_auto_update_interval(struct device *dev,
436 struct device_attribute *devattr,
437 char *buf)
438{
439 struct adt7470_data *data = adt7470_update_device(dev);
440 return sprintf(buf, "%d\n", data->auto_update_interval);
441}
442
443static ssize_t set_auto_update_interval(struct device *dev,
444 struct device_attribute *devattr,
445 const char *buf,
446 size_t count)
447{
448 struct i2c_client *client = to_i2c_client(dev);
449 struct adt7470_data *data = i2c_get_clientdata(client);
450 long temp;
451
452 if (strict_strtol(buf, 10, &temp))
453 return -EINVAL;
454
455 temp = SENSORS_LIMIT(temp, 0, 60000);
456
457 mutex_lock(&data->lock);
458 data->auto_update_interval = temp;
459 mutex_unlock(&data->lock);
460
461 return count;
462}
463
464static ssize_t show_num_temp_sensors(struct device *dev,
465 struct device_attribute *devattr,
466 char *buf)
467{
468 struct adt7470_data *data = adt7470_update_device(dev);
469 return sprintf(buf, "%d\n", data->num_temp_sensors);
470}
471
472static ssize_t set_num_temp_sensors(struct device *dev,
473 struct device_attribute *devattr,
474 const char *buf,
475 size_t count)
476{
477 struct i2c_client *client = to_i2c_client(dev);
478 struct adt7470_data *data = i2c_get_clientdata(client);
479 long temp;
480
481 if (strict_strtol(buf, 10, &temp))
482 return -EINVAL;
483
484 temp = SENSORS_LIMIT(temp, -1, 10);
485
486 mutex_lock(&data->lock);
487 data->num_temp_sensors = temp;
488 if (temp < 0)
489 data->temperatures_probed = 0;
490 mutex_unlock(&data->lock);
491
492 return count;
493}
494
341static ssize_t show_temp_min(struct device *dev, 495static ssize_t show_temp_min(struct device *dev,
342 struct device_attribute *devattr, 496 struct device_attribute *devattr,
343 char *buf) 497 char *buf)
@@ -360,7 +514,7 @@ static ssize_t set_temp_min(struct device *dev,
360 if (strict_strtol(buf, 10, &temp)) 514 if (strict_strtol(buf, 10, &temp))
361 return -EINVAL; 515 return -EINVAL;
362 516
363 temp = ROUND_DIV(temp, 1000); 517 temp = DIV_ROUND_CLOSEST(temp, 1000);
364 temp = SENSORS_LIMIT(temp, 0, 255); 518 temp = SENSORS_LIMIT(temp, 0, 255);
365 519
366 mutex_lock(&data->lock); 520 mutex_lock(&data->lock);
@@ -394,7 +548,7 @@ static ssize_t set_temp_max(struct device *dev,
394 if (strict_strtol(buf, 10, &temp)) 548 if (strict_strtol(buf, 10, &temp))
395 return -EINVAL; 549 return -EINVAL;
396 550
397 temp = ROUND_DIV(temp, 1000); 551 temp = DIV_ROUND_CLOSEST(temp, 1000);
398 temp = SENSORS_LIMIT(temp, 0, 255); 552 temp = SENSORS_LIMIT(temp, 0, 255);
399 553
400 mutex_lock(&data->lock); 554 mutex_lock(&data->lock);
@@ -671,7 +825,7 @@ static ssize_t set_pwm_tmin(struct device *dev,
671 if (strict_strtol(buf, 10, &temp)) 825 if (strict_strtol(buf, 10, &temp))
672 return -EINVAL; 826 return -EINVAL;
673 827
674 temp = ROUND_DIV(temp, 1000); 828 temp = DIV_ROUND_CLOSEST(temp, 1000);
675 temp = SENSORS_LIMIT(temp, 0, 255); 829 temp = SENSORS_LIMIT(temp, 0, 255);
676 830
677 mutex_lock(&data->lock); 831 mutex_lock(&data->lock);
@@ -804,6 +958,10 @@ static ssize_t show_alarm(struct device *dev,
804} 958}
805 959
806static DEVICE_ATTR(alarm_mask, S_IRUGO, show_alarm_mask, NULL); 960static DEVICE_ATTR(alarm_mask, S_IRUGO, show_alarm_mask, NULL);
961static DEVICE_ATTR(num_temp_sensors, S_IWUSR | S_IRUGO, show_num_temp_sensors,
962 set_num_temp_sensors);
963static DEVICE_ATTR(auto_update_interval, S_IWUSR | S_IRUGO,
964 show_auto_update_interval, set_auto_update_interval);
807 965
808static SENSOR_DEVICE_ATTR(temp1_max, S_IWUSR | S_IRUGO, show_temp_max, 966static SENSOR_DEVICE_ATTR(temp1_max, S_IWUSR | S_IRUGO, show_temp_max,
809 set_temp_max, 0); 967 set_temp_max, 0);
@@ -976,6 +1134,8 @@ static SENSOR_DEVICE_ATTR(pwm4_auto_channels_temp, S_IWUSR | S_IRUGO,
976static struct attribute *adt7470_attr[] = 1134static struct attribute *adt7470_attr[] =
977{ 1135{
978 &dev_attr_alarm_mask.attr, 1136 &dev_attr_alarm_mask.attr,
1137 &dev_attr_num_temp_sensors.attr,
1138 &dev_attr_auto_update_interval.attr,
979 &sensor_dev_attr_temp1_max.dev_attr.attr, 1139 &sensor_dev_attr_temp1_max.dev_attr.attr,
980 &sensor_dev_attr_temp2_max.dev_attr.attr, 1140 &sensor_dev_attr_temp2_max.dev_attr.attr,
981 &sensor_dev_attr_temp3_max.dev_attr.attr, 1141 &sensor_dev_attr_temp3_max.dev_attr.attr,
@@ -1108,6 +1268,9 @@ static int adt7470_probe(struct i2c_client *client,
1108 goto exit; 1268 goto exit;
1109 } 1269 }
1110 1270
1271 data->num_temp_sensors = -1;
1272 data->auto_update_interval = AUTO_UPDATE_INTERVAL;
1273
1111 i2c_set_clientdata(client, data); 1274 i2c_set_clientdata(client, data);
1112 mutex_init(&data->lock); 1275 mutex_init(&data->lock);
1113 1276
@@ -1127,8 +1290,16 @@ static int adt7470_probe(struct i2c_client *client,
1127 goto exit_remove; 1290 goto exit_remove;
1128 } 1291 }
1129 1292
1293 init_completion(&data->auto_update_stop);
1294 data->auto_update = kthread_run(adt7470_update_thread, client,
1295 dev_name(data->hwmon_dev));
1296 if (IS_ERR(data->auto_update))
1297 goto exit_unregister;
1298
1130 return 0; 1299 return 0;
1131 1300
1301exit_unregister:
1302 hwmon_device_unregister(data->hwmon_dev);
1132exit_remove: 1303exit_remove:
1133 sysfs_remove_group(&client->dev.kobj, &data->attrs); 1304 sysfs_remove_group(&client->dev.kobj, &data->attrs);
1134exit_free: 1305exit_free:
@@ -1141,6 +1312,8 @@ static int adt7470_remove(struct i2c_client *client)
1141{ 1312{
1142 struct adt7470_data *data = i2c_get_clientdata(client); 1313 struct adt7470_data *data = i2c_get_clientdata(client);
1143 1314
1315 kthread_stop(data->auto_update);
1316 wait_for_completion(&data->auto_update_stop);
1144 hwmon_device_unregister(data->hwmon_dev); 1317 hwmon_device_unregister(data->hwmon_dev);
1145 sysfs_remove_group(&client->dev.kobj, &data->attrs); 1318 sysfs_remove_group(&client->dev.kobj, &data->attrs);
1146 kfree(data); 1319 kfree(data);
diff --git a/drivers/hwmon/adt7473.c b/drivers/hwmon/adt7473.c
index 18aa30866a6c..0a6ce2367b42 100644
--- a/drivers/hwmon/adt7473.c
+++ b/drivers/hwmon/adt7473.c
@@ -129,8 +129,6 @@ I2C_CLIENT_INSMOD_1(adt7473);
129#define FAN_PERIOD_INVALID 65535 129#define FAN_PERIOD_INVALID 65535
130#define FAN_DATA_VALID(x) ((x) && (x) != FAN_PERIOD_INVALID) 130#define FAN_DATA_VALID(x) ((x) && (x) != FAN_PERIOD_INVALID)
131 131
132#define ROUND_DIV(x, divisor) (((x) + ((divisor) / 2)) / (divisor))
133
134struct adt7473_data { 132struct adt7473_data {
135 struct device *hwmon_dev; 133 struct device *hwmon_dev;
136 struct attribute_group attrs; 134 struct attribute_group attrs;
@@ -459,7 +457,7 @@ static ssize_t set_temp_min(struct device *dev,
459 if (strict_strtol(buf, 10, &temp)) 457 if (strict_strtol(buf, 10, &temp))
460 return -EINVAL; 458 return -EINVAL;
461 459
462 temp = ROUND_DIV(temp, 1000); 460 temp = DIV_ROUND_CLOSEST(temp, 1000);
463 temp = encode_temp(data->temp_twos_complement, temp); 461 temp = encode_temp(data->temp_twos_complement, temp);
464 462
465 mutex_lock(&data->lock); 463 mutex_lock(&data->lock);
@@ -495,7 +493,7 @@ static ssize_t set_temp_max(struct device *dev,
495 if (strict_strtol(buf, 10, &temp)) 493 if (strict_strtol(buf, 10, &temp))
496 return -EINVAL; 494 return -EINVAL;
497 495
498 temp = ROUND_DIV(temp, 1000); 496 temp = DIV_ROUND_CLOSEST(temp, 1000);
499 temp = encode_temp(data->temp_twos_complement, temp); 497 temp = encode_temp(data->temp_twos_complement, temp);
500 498
501 mutex_lock(&data->lock); 499 mutex_lock(&data->lock);
@@ -720,7 +718,7 @@ static ssize_t set_temp_tmax(struct device *dev,
720 if (strict_strtol(buf, 10, &temp)) 718 if (strict_strtol(buf, 10, &temp))
721 return -EINVAL; 719 return -EINVAL;
722 720
723 temp = ROUND_DIV(temp, 1000); 721 temp = DIV_ROUND_CLOSEST(temp, 1000);
724 temp = encode_temp(data->temp_twos_complement, temp); 722 temp = encode_temp(data->temp_twos_complement, temp);
725 723
726 mutex_lock(&data->lock); 724 mutex_lock(&data->lock);
@@ -756,7 +754,7 @@ static ssize_t set_temp_tmin(struct device *dev,
756 if (strict_strtol(buf, 10, &temp)) 754 if (strict_strtol(buf, 10, &temp))
757 return -EINVAL; 755 return -EINVAL;
758 756
759 temp = ROUND_DIV(temp, 1000); 757 temp = DIV_ROUND_CLOSEST(temp, 1000);
760 temp = encode_temp(data->temp_twos_complement, temp); 758 temp = encode_temp(data->temp_twos_complement, temp);
761 759
762 mutex_lock(&data->lock); 760 mutex_lock(&data->lock);
diff --git a/drivers/hwmon/applesmc.c b/drivers/hwmon/applesmc.c
index 086c2a5cef0b..dca47a591baf 100644
--- a/drivers/hwmon/applesmc.c
+++ b/drivers/hwmon/applesmc.c
@@ -131,6 +131,10 @@ static const char* temperature_sensors_sets[][36] = {
131/* Set 14: iMac 6,1 */ 131/* Set 14: iMac 6,1 */
132 { "TA0P", "TC0D", "TC0H", "TC0P", "TG0D", "TG0H", "TG0P", "TH0P", 132 { "TA0P", "TC0D", "TC0H", "TC0P", "TG0D", "TG0H", "TG0P", "TH0P",
133 "TO0P", "Tp0P", NULL }, 133 "TO0P", "Tp0P", NULL },
134/* Set 15: MacBook Air 2,1 */
135 { "TB0T", "TB1S", "TB1T", "TB2S", "TB2T", "TC0D", "TN0D", "TTF0",
136 "TV0P", "TVFP", "TW0P", "Th0P", "Tp0P", "Tp1P", "TpFP", "Ts0P",
137 "Ts0S", NULL },
134}; 138};
135 139
136/* List of keys used to read/write fan speeds */ 140/* List of keys used to read/write fan speeds */
@@ -1301,11 +1305,17 @@ static __initdata struct dmi_match_data applesmc_dmi_data[] = {
1301 { .accelerometer = 0, .light = 0, .temperature_set = 13 }, 1305 { .accelerometer = 0, .light = 0, .temperature_set = 13 },
1302/* iMac 6: light sensor only, temperature set 14 */ 1306/* iMac 6: light sensor only, temperature set 14 */
1303 { .accelerometer = 0, .light = 0, .temperature_set = 14 }, 1307 { .accelerometer = 0, .light = 0, .temperature_set = 14 },
1308/* MacBook Air 2,1: accelerometer, backlight and temperature set 15 */
1309 { .accelerometer = 1, .light = 1, .temperature_set = 15 },
1304}; 1310};
1305 1311
1306/* Note that DMI_MATCH(...,"MacBook") will match "MacBookPro1,1". 1312/* Note that DMI_MATCH(...,"MacBook") will match "MacBookPro1,1".
1307 * So we need to put "Apple MacBook Pro" before "Apple MacBook". */ 1313 * So we need to put "Apple MacBook Pro" before "Apple MacBook". */
1308static __initdata struct dmi_system_id applesmc_whitelist[] = { 1314static __initdata struct dmi_system_id applesmc_whitelist[] = {
1315 { applesmc_dmi_match, "Apple MacBook Air 2", {
1316 DMI_MATCH(DMI_BOARD_VENDOR, "Apple"),
1317 DMI_MATCH(DMI_PRODUCT_NAME, "MacBookAir2") },
1318 &applesmc_dmi_data[15]},
1309 { applesmc_dmi_match, "Apple MacBook Air", { 1319 { applesmc_dmi_match, "Apple MacBook Air", {
1310 DMI_MATCH(DMI_BOARD_VENDOR, "Apple"), 1320 DMI_MATCH(DMI_BOARD_VENDOR, "Apple"),
1311 DMI_MATCH(DMI_PRODUCT_NAME, "MacBookAir") }, 1321 DMI_MATCH(DMI_PRODUCT_NAME, "MacBookAir") },
diff --git a/drivers/hwmon/ibmpex.c b/drivers/hwmon/ibmpex.c
index 537d9fb2ff88..a36363312f2f 100644
--- a/drivers/hwmon/ibmpex.c
+++ b/drivers/hwmon/ibmpex.c
@@ -40,7 +40,7 @@
40 40
41static inline u16 extract_value(const char *data, int offset) 41static inline u16 extract_value(const char *data, int offset)
42{ 42{
43 return be16_to_cpup((u16 *)&data[offset]); 43 return be16_to_cpup((__be16 *)&data[offset]);
44} 44}
45 45
46#define TEMP_SENSOR 1 46#define TEMP_SENSOR 1
diff --git a/drivers/ide/Kconfig b/drivers/ide/Kconfig
index 4ee85fcf9aaf..3f9503867e6b 100644
--- a/drivers/ide/Kconfig
+++ b/drivers/ide/Kconfig
@@ -511,6 +511,13 @@ config BLK_DEV_PIIX
511 This allows the kernel to change PIO, DMA and UDMA speeds and to 511 This allows the kernel to change PIO, DMA and UDMA speeds and to
512 configure the chip to optimum performance. 512 configure the chip to optimum performance.
513 513
514config BLK_DEV_IT8172
515 tristate "IT8172 IDE support"
516 select BLK_DEV_IDEDMA_PCI
517 help
518 This driver adds support for the IDE controller on the
519 IT8172 System Controller.
520
514config BLK_DEV_IT8213 521config BLK_DEV_IT8213
515 tristate "IT8213 IDE support" 522 tristate "IT8213 IDE support"
516 select BLK_DEV_IDEDMA_PCI 523 select BLK_DEV_IDEDMA_PCI
diff --git a/drivers/ide/Makefile b/drivers/ide/Makefile
index 410728992e6a..c2b9c93f0095 100644
--- a/drivers/ide/Makefile
+++ b/drivers/ide/Makefile
@@ -47,6 +47,7 @@ obj-$(CONFIG_BLK_DEV_SC1200) += sc1200.o
47obj-$(CONFIG_BLK_DEV_CY82C693) += cy82c693.o 47obj-$(CONFIG_BLK_DEV_CY82C693) += cy82c693.o
48obj-$(CONFIG_BLK_DEV_DELKIN) += delkin_cb.o 48obj-$(CONFIG_BLK_DEV_DELKIN) += delkin_cb.o
49obj-$(CONFIG_BLK_DEV_HPT366) += hpt366.o 49obj-$(CONFIG_BLK_DEV_HPT366) += hpt366.o
50obj-$(CONFIG_BLK_DEV_IT8172) += it8172.o
50obj-$(CONFIG_BLK_DEV_IT8213) += it8213.o 51obj-$(CONFIG_BLK_DEV_IT8213) += it8213.o
51obj-$(CONFIG_BLK_DEV_IT821X) += it821x.o 52obj-$(CONFIG_BLK_DEV_IT821X) += it821x.o
52obj-$(CONFIG_BLK_DEV_JMICRON) += jmicron.o 53obj-$(CONFIG_BLK_DEV_JMICRON) += jmicron.o
diff --git a/drivers/ide/aec62xx.c b/drivers/ide/aec62xx.c
index 4142c698e0d3..4485b9c6f0e6 100644
--- a/drivers/ide/aec62xx.c
+++ b/drivers/ide/aec62xx.c
@@ -83,7 +83,7 @@ static u8 pci_bus_clock_list_ultra (u8 speed, struct chipset_bus_clock_list_entr
83 83
84static void aec6210_set_mode(ide_drive_t *drive, const u8 speed) 84static void aec6210_set_mode(ide_drive_t *drive, const u8 speed)
85{ 85{
86 ide_hwif_t *hwif = HWIF(drive); 86 ide_hwif_t *hwif = drive->hwif;
87 struct pci_dev *dev = to_pci_dev(hwif->dev); 87 struct pci_dev *dev = to_pci_dev(hwif->dev);
88 struct ide_host *host = pci_get_drvdata(dev); 88 struct ide_host *host = pci_get_drvdata(dev);
89 struct chipset_bus_clock_list_entry *bus_clock = host->host_priv; 89 struct chipset_bus_clock_list_entry *bus_clock = host->host_priv;
@@ -111,7 +111,7 @@ static void aec6210_set_mode(ide_drive_t *drive, const u8 speed)
111 111
112static void aec6260_set_mode(ide_drive_t *drive, const u8 speed) 112static void aec6260_set_mode(ide_drive_t *drive, const u8 speed)
113{ 113{
114 ide_hwif_t *hwif = HWIF(drive); 114 ide_hwif_t *hwif = drive->hwif;
115 struct pci_dev *dev = to_pci_dev(hwif->dev); 115 struct pci_dev *dev = to_pci_dev(hwif->dev);
116 struct ide_host *host = pci_get_drvdata(dev); 116 struct ide_host *host = pci_get_drvdata(dev);
117 struct chipset_bus_clock_list_entry *bus_clock = host->host_priv; 117 struct chipset_bus_clock_list_entry *bus_clock = host->host_priv;
diff --git a/drivers/ide/alim15x3.c b/drivers/ide/alim15x3.c
index 45d2356bb725..66f43083408b 100644
--- a/drivers/ide/alim15x3.c
+++ b/drivers/ide/alim15x3.c
@@ -68,7 +68,7 @@ static struct pci_dev *isa_dev;
68 68
69static void ali_set_pio_mode(ide_drive_t *drive, const u8 pio) 69static void ali_set_pio_mode(ide_drive_t *drive, const u8 pio)
70{ 70{
71 ide_hwif_t *hwif = HWIF(drive); 71 ide_hwif_t *hwif = drive->hwif;
72 struct pci_dev *dev = to_pci_dev(hwif->dev); 72 struct pci_dev *dev = to_pci_dev(hwif->dev);
73 struct ide_timing *t = ide_timing_find_mode(XFER_PIO_0 + pio); 73 struct ide_timing *t = ide_timing_find_mode(XFER_PIO_0 + pio);
74 int s_time = t->setup, a_time = t->active, c_time = t->cycle; 74 int s_time = t->setup, a_time = t->active, c_time = t->cycle;
@@ -150,7 +150,7 @@ static u8 ali_udma_filter(ide_drive_t *drive)
150 150
151static void ali_set_dma_mode(ide_drive_t *drive, const u8 speed) 151static void ali_set_dma_mode(ide_drive_t *drive, const u8 speed)
152{ 152{
153 ide_hwif_t *hwif = HWIF(drive); 153 ide_hwif_t *hwif = drive->hwif;
154 struct pci_dev *dev = to_pci_dev(hwif->dev); 154 struct pci_dev *dev = to_pci_dev(hwif->dev);
155 u8 speed1 = speed; 155 u8 speed1 = speed;
156 u8 unit = drive->dn & 1; 156 u8 unit = drive->dn & 1;
@@ -198,7 +198,7 @@ static void ali_set_dma_mode(ide_drive_t *drive, const u8 speed)
198static int ali15x3_dma_setup(ide_drive_t *drive) 198static int ali15x3_dma_setup(ide_drive_t *drive)
199{ 199{
200 if (m5229_revision < 0xC2 && drive->media != ide_disk) { 200 if (m5229_revision < 0xC2 && drive->media != ide_disk) {
201 if (rq_data_dir(drive->hwif->hwgroup->rq)) 201 if (rq_data_dir(drive->hwif->rq))
202 return 1; /* try PIO instead of DMA */ 202 return 1; /* try PIO instead of DMA */
203 } 203 }
204 return ide_dma_setup(drive); 204 return ide_dma_setup(drive);
@@ -490,8 +490,6 @@ static int __devinit init_dma_ali15x3(ide_hwif_t *hwif,
490 if (ide_allocate_dma_engine(hwif)) 490 if (ide_allocate_dma_engine(hwif))
491 return -1; 491 return -1;
492 492
493 hwif->dma_ops = &sff_dma_ops;
494
495 return 0; 493 return 0;
496} 494}
497 495
@@ -511,6 +509,7 @@ static const struct ide_dma_ops ali_dma_ops = {
511 .dma_test_irq = ide_dma_test_irq, 509 .dma_test_irq = ide_dma_test_irq,
512 .dma_lost_irq = ide_dma_lost_irq, 510 .dma_lost_irq = ide_dma_lost_irq,
513 .dma_timeout = ide_dma_timeout, 511 .dma_timeout = ide_dma_timeout,
512 .dma_sff_read_status = ide_dma_sff_read_status,
514}; 513};
515 514
516static const struct ide_port_info ali15x3_chipset __devinitdata = { 515static const struct ide_port_info ali15x3_chipset __devinitdata = {
@@ -519,6 +518,7 @@ static const struct ide_port_info ali15x3_chipset __devinitdata = {
519 .init_hwif = init_hwif_ali15x3, 518 .init_hwif = init_hwif_ali15x3,
520 .init_dma = init_dma_ali15x3, 519 .init_dma = init_dma_ali15x3,
521 .port_ops = &ali_port_ops, 520 .port_ops = &ali_port_ops,
521 .dma_ops = &sff_dma_ops,
522 .pio_mask = ATA_PIO5, 522 .pio_mask = ATA_PIO5,
523 .swdma_mask = ATA_SWDMA2, 523 .swdma_mask = ATA_SWDMA2,
524 .mwdma_mask = ATA_MWDMA2, 524 .mwdma_mask = ATA_MWDMA2,
diff --git a/drivers/ide/amd74xx.c b/drivers/ide/amd74xx.c
index c6bcd3014a29..69660a431cd9 100644
--- a/drivers/ide/amd74xx.c
+++ b/drivers/ide/amd74xx.c
@@ -82,7 +82,7 @@ static void amd_set_drive(ide_drive_t *drive, const u8 speed)
82{ 82{
83 ide_hwif_t *hwif = drive->hwif; 83 ide_hwif_t *hwif = drive->hwif;
84 struct pci_dev *dev = to_pci_dev(hwif->dev); 84 struct pci_dev *dev = to_pci_dev(hwif->dev);
85 ide_drive_t *peer = hwif->drives + (~drive->dn & 1); 85 ide_drive_t *peer = ide_get_pair_dev(drive);
86 struct ide_timing t, p; 86 struct ide_timing t, p;
87 int T, UT; 87 int T, UT;
88 u8 udma_mask = hwif->ultra_mask; 88 u8 udma_mask = hwif->ultra_mask;
@@ -92,7 +92,7 @@ static void amd_set_drive(ide_drive_t *drive, const u8 speed)
92 92
93 ide_timing_compute(drive, speed, &t, T, UT); 93 ide_timing_compute(drive, speed, &t, T, UT);
94 94
95 if (peer->dev_flags & IDE_DFLAG_PRESENT) { 95 if (peer) {
96 ide_timing_compute(peer, peer->current_speed, &p, T, UT); 96 ide_timing_compute(peer, peer->current_speed, &p, T, UT);
97 ide_timing_merge(&p, &t, &t, IDE_TIMING_8BIT); 97 ide_timing_merge(&p, &t, &t, IDE_TIMING_8BIT);
98 } 98 }
diff --git a/drivers/ide/au1xxx-ide.c b/drivers/ide/au1xxx-ide.c
index 0ec8fd1e4dcb..79a2dfed8eb7 100644
--- a/drivers/ide/au1xxx-ide.c
+++ b/drivers/ide/au1xxx-ide.c
@@ -212,8 +212,8 @@ static void auide_set_dma_mode(ide_drive_t *drive, const u8 speed)
212static int auide_build_dmatable(ide_drive_t *drive) 212static int auide_build_dmatable(ide_drive_t *drive)
213{ 213{
214 int i, iswrite, count = 0; 214 int i, iswrite, count = 0;
215 ide_hwif_t *hwif = HWIF(drive); 215 ide_hwif_t *hwif = drive->hwif;
216 struct request *rq = HWGROUP(drive)->rq; 216 struct request *rq = hwif->rq;
217 _auide_hwif *ahwif = &auide_hwif; 217 _auide_hwif *ahwif = &auide_hwif;
218 struct scatterlist *sg; 218 struct scatterlist *sg;
219 219
@@ -286,7 +286,7 @@ static int auide_build_dmatable(ide_drive_t *drive)
286 286
287static int auide_dma_end(ide_drive_t *drive) 287static int auide_dma_end(ide_drive_t *drive)
288{ 288{
289 ide_hwif_t *hwif = HWIF(drive); 289 ide_hwif_t *hwif = drive->hwif;
290 290
291 if (hwif->sg_nents) { 291 if (hwif->sg_nents) {
292 ide_destroy_dmatable(drive); 292 ide_destroy_dmatable(drive);
@@ -309,8 +309,8 @@ static void auide_dma_exec_cmd(ide_drive_t *drive, u8 command)
309} 309}
310 310
311static int auide_dma_setup(ide_drive_t *drive) 311static int auide_dma_setup(ide_drive_t *drive)
312{ 312{
313 struct request *rq = HWGROUP(drive)->rq; 313 struct request *rq = drive->hwif->rq;
314 314
315 if (!auide_build_dmatable(drive)) { 315 if (!auide_build_dmatable(drive)) {
316 ide_map_sg(drive, rq); 316 ide_map_sg(drive, rq);
@@ -502,7 +502,6 @@ static const struct ide_tp_ops au1xxx_tp_ops = {
502 .exec_command = ide_exec_command, 502 .exec_command = ide_exec_command,
503 .read_status = ide_read_status, 503 .read_status = ide_read_status,
504 .read_altstatus = ide_read_altstatus, 504 .read_altstatus = ide_read_altstatus,
505 .read_sff_dma_status = ide_read_sff_dma_status,
506 505
507 .set_irq = ide_set_irq, 506 .set_irq = ide_set_irq,
508 507
diff --git a/drivers/ide/cmd640.c b/drivers/ide/cmd640.c
index e4306647d00d..8890276fef7f 100644
--- a/drivers/ide/cmd640.c
+++ b/drivers/ide/cmd640.c
@@ -467,11 +467,10 @@ static void program_drive_counts(ide_drive_t *drive, unsigned int index)
467 * so we merge the timings, using the slowest value for each timing. 467 * so we merge the timings, using the slowest value for each timing.
468 */ 468 */
469 if (index > 1) { 469 if (index > 1) {
470 ide_hwif_t *hwif = drive->hwif; 470 ide_drive_t *peer = ide_get_pair_dev(drive);
471 ide_drive_t *peer = &hwif->drives[!(drive->dn & 1)];
472 unsigned int mate = index ^ 1; 471 unsigned int mate = index ^ 1;
473 472
474 if (peer->dev_flags & IDE_DFLAG_PRESENT) { 473 if (peer) {
475 if (setup_count < setup_counts[mate]) 474 if (setup_count < setup_counts[mate])
476 setup_count = setup_counts[mate]; 475 setup_count = setup_counts[mate];
477 if (active_count < active_counts[mate]) 476 if (active_count < active_counts[mate])
diff --git a/drivers/ide/cmd64x.c b/drivers/ide/cmd64x.c
index 3623bf013bcf..2f9688d87ecd 100644
--- a/drivers/ide/cmd64x.c
+++ b/drivers/ide/cmd64x.c
@@ -115,7 +115,7 @@ static void program_cycle_times (ide_drive_t *drive, int cycle_time, int active_
115 */ 115 */
116static void cmd64x_tune_pio(ide_drive_t *drive, const u8 pio) 116static void cmd64x_tune_pio(ide_drive_t *drive, const u8 pio)
117{ 117{
118 ide_hwif_t *hwif = HWIF(drive); 118 ide_hwif_t *hwif = drive->hwif;
119 struct pci_dev *dev = to_pci_dev(hwif->dev); 119 struct pci_dev *dev = to_pci_dev(hwif->dev);
120 struct ide_timing *t = ide_timing_find_mode(XFER_PIO_0 + pio); 120 struct ide_timing *t = ide_timing_find_mode(XFER_PIO_0 + pio);
121 unsigned int cycle_time; 121 unsigned int cycle_time;
@@ -138,10 +138,12 @@ static void cmd64x_tune_pio(ide_drive_t *drive, const u8 pio)
138 * the slowest address setup timing ourselves. 138 * the slowest address setup timing ourselves.
139 */ 139 */
140 if (hwif->channel) { 140 if (hwif->channel) {
141 ide_drive_t *drives = hwif->drives; 141 ide_drive_t *pair = ide_get_pair_dev(drive);
142 142
143 drive->drive_data = setup_count; 143 drive->drive_data = setup_count;
144 setup_count = max(drives[0].drive_data, drives[1].drive_data); 144
145 if (pair)
146 setup_count = max_t(u8, setup_count, pair->drive_data);
145 } 147 }
146 148
147 if (setup_count > 5) /* shouldn't actually happen... */ 149 if (setup_count > 5) /* shouldn't actually happen... */
@@ -180,7 +182,7 @@ static void cmd64x_set_pio_mode(ide_drive_t *drive, const u8 pio)
180 182
181static void cmd64x_set_dma_mode(ide_drive_t *drive, const u8 speed) 183static void cmd64x_set_dma_mode(ide_drive_t *drive, const u8 speed)
182{ 184{
183 ide_hwif_t *hwif = HWIF(drive); 185 ide_hwif_t *hwif = drive->hwif;
184 struct pci_dev *dev = to_pci_dev(hwif->dev); 186 struct pci_dev *dev = to_pci_dev(hwif->dev);
185 u8 unit = drive->dn & 0x01; 187 u8 unit = drive->dn & 0x01;
186 u8 regU = 0, pciU = hwif->channel ? UDIDETCR1 : UDIDETCR0; 188 u8 regU = 0, pciU = hwif->channel ? UDIDETCR1 : UDIDETCR0;
@@ -226,7 +228,7 @@ static void cmd64x_set_dma_mode(ide_drive_t *drive, const u8 speed)
226 228
227static int cmd648_dma_end(ide_drive_t *drive) 229static int cmd648_dma_end(ide_drive_t *drive)
228{ 230{
229 ide_hwif_t *hwif = HWIF(drive); 231 ide_hwif_t *hwif = drive->hwif;
230 unsigned long base = hwif->dma_base - (hwif->channel * 8); 232 unsigned long base = hwif->dma_base - (hwif->channel * 8);
231 int err = ide_dma_end(drive); 233 int err = ide_dma_end(drive);
232 u8 irq_mask = hwif->channel ? MRDMODE_INTR_CH1 : 234 u8 irq_mask = hwif->channel ? MRDMODE_INTR_CH1 :
@@ -242,7 +244,7 @@ static int cmd648_dma_end(ide_drive_t *drive)
242 244
243static int cmd64x_dma_end(ide_drive_t *drive) 245static int cmd64x_dma_end(ide_drive_t *drive)
244{ 246{
245 ide_hwif_t *hwif = HWIF(drive); 247 ide_hwif_t *hwif = drive->hwif;
246 struct pci_dev *dev = to_pci_dev(hwif->dev); 248 struct pci_dev *dev = to_pci_dev(hwif->dev);
247 int irq_reg = hwif->channel ? ARTTIM23 : CFR; 249 int irq_reg = hwif->channel ? ARTTIM23 : CFR;
248 u8 irq_mask = hwif->channel ? ARTTIM23_INTR_CH1 : 250 u8 irq_mask = hwif->channel ? ARTTIM23_INTR_CH1 :
@@ -259,7 +261,7 @@ static int cmd64x_dma_end(ide_drive_t *drive)
259 261
260static int cmd648_dma_test_irq(ide_drive_t *drive) 262static int cmd648_dma_test_irq(ide_drive_t *drive)
261{ 263{
262 ide_hwif_t *hwif = HWIF(drive); 264 ide_hwif_t *hwif = drive->hwif;
263 unsigned long base = hwif->dma_base - (hwif->channel * 8); 265 unsigned long base = hwif->dma_base - (hwif->channel * 8);
264 u8 irq_mask = hwif->channel ? MRDMODE_INTR_CH1 : 266 u8 irq_mask = hwif->channel ? MRDMODE_INTR_CH1 :
265 MRDMODE_INTR_CH0; 267 MRDMODE_INTR_CH0;
@@ -282,7 +284,7 @@ static int cmd648_dma_test_irq(ide_drive_t *drive)
282 284
283static int cmd64x_dma_test_irq(ide_drive_t *drive) 285static int cmd64x_dma_test_irq(ide_drive_t *drive)
284{ 286{
285 ide_hwif_t *hwif = HWIF(drive); 287 ide_hwif_t *hwif = drive->hwif;
286 struct pci_dev *dev = to_pci_dev(hwif->dev); 288 struct pci_dev *dev = to_pci_dev(hwif->dev);
287 int irq_reg = hwif->channel ? ARTTIM23 : CFR; 289 int irq_reg = hwif->channel ? ARTTIM23 : CFR;
288 u8 irq_mask = hwif->channel ? ARTTIM23_INTR_CH1 : 290 u8 irq_mask = hwif->channel ? ARTTIM23_INTR_CH1 :
@@ -313,7 +315,7 @@ static int cmd64x_dma_test_irq(ide_drive_t *drive)
313 315
314static int cmd646_1_dma_end(ide_drive_t *drive) 316static int cmd646_1_dma_end(ide_drive_t *drive)
315{ 317{
316 ide_hwif_t *hwif = HWIF(drive); 318 ide_hwif_t *hwif = drive->hwif;
317 u8 dma_stat = 0, dma_cmd = 0; 319 u8 dma_stat = 0, dma_cmd = 0;
318 320
319 drive->waiting_for_dma = 0; 321 drive->waiting_for_dma = 0;
@@ -383,6 +385,7 @@ static const struct ide_dma_ops cmd64x_dma_ops = {
383 .dma_test_irq = cmd64x_dma_test_irq, 385 .dma_test_irq = cmd64x_dma_test_irq,
384 .dma_lost_irq = ide_dma_lost_irq, 386 .dma_lost_irq = ide_dma_lost_irq,
385 .dma_timeout = ide_dma_timeout, 387 .dma_timeout = ide_dma_timeout,
388 .dma_sff_read_status = ide_dma_sff_read_status,
386}; 389};
387 390
388static const struct ide_dma_ops cmd646_rev1_dma_ops = { 391static const struct ide_dma_ops cmd646_rev1_dma_ops = {
@@ -394,6 +397,7 @@ static const struct ide_dma_ops cmd646_rev1_dma_ops = {
394 .dma_test_irq = ide_dma_test_irq, 397 .dma_test_irq = ide_dma_test_irq,
395 .dma_lost_irq = ide_dma_lost_irq, 398 .dma_lost_irq = ide_dma_lost_irq,
396 .dma_timeout = ide_dma_timeout, 399 .dma_timeout = ide_dma_timeout,
400 .dma_sff_read_status = ide_dma_sff_read_status,
397}; 401};
398 402
399static const struct ide_dma_ops cmd648_dma_ops = { 403static const struct ide_dma_ops cmd648_dma_ops = {
@@ -405,6 +409,7 @@ static const struct ide_dma_ops cmd648_dma_ops = {
405 .dma_test_irq = cmd648_dma_test_irq, 409 .dma_test_irq = cmd648_dma_test_irq,
406 .dma_lost_irq = ide_dma_lost_irq, 410 .dma_lost_irq = ide_dma_lost_irq,
407 .dma_timeout = ide_dma_timeout, 411 .dma_timeout = ide_dma_timeout,
412 .dma_sff_read_status = ide_dma_sff_read_status,
408}; 413};
409 414
410static const struct ide_port_info cmd64x_chipsets[] __devinitdata = { 415static const struct ide_port_info cmd64x_chipsets[] __devinitdata = {
diff --git a/drivers/ide/cs5520.c b/drivers/ide/cs5520.c
index 5efb467f8fa0..d003bec56ff9 100644
--- a/drivers/ide/cs5520.c
+++ b/drivers/ide/cs5520.c
@@ -59,7 +59,7 @@ static struct pio_clocks cs5520_pio_clocks[]={
59 59
60static void cs5520_set_pio_mode(ide_drive_t *drive, const u8 pio) 60static void cs5520_set_pio_mode(ide_drive_t *drive, const u8 pio)
61{ 61{
62 ide_hwif_t *hwif = HWIF(drive); 62 ide_hwif_t *hwif = drive->hwif;
63 struct pci_dev *pdev = to_pci_dev(hwif->dev); 63 struct pci_dev *pdev = to_pci_dev(hwif->dev);
64 int controller = drive->dn > 1 ? 1 : 0; 64 int controller = drive->dn > 1 ? 1 : 0;
65 65
diff --git a/drivers/ide/cy82c693.c b/drivers/ide/cy82c693.c
index d37baf8ecc5f..74fc5401f407 100644
--- a/drivers/ide/cy82c693.c
+++ b/drivers/ide/cy82c693.c
@@ -203,7 +203,7 @@ static void cy82c693_set_dma_mode(ide_drive_t *drive, const u8 mode)
203 203
204static void cy82c693_set_pio_mode(ide_drive_t *drive, const u8 pio) 204static void cy82c693_set_pio_mode(ide_drive_t *drive, const u8 pio)
205{ 205{
206 ide_hwif_t *hwif = HWIF(drive); 206 ide_hwif_t *hwif = drive->hwif;
207 struct pci_dev *dev = to_pci_dev(hwif->dev); 207 struct pci_dev *dev = to_pci_dev(hwif->dev);
208 pio_clocks_t pclk; 208 pio_clocks_t pclk;
209 unsigned int addrCtrl; 209 unsigned int addrCtrl;
diff --git a/drivers/ide/falconide.c b/drivers/ide/falconide.c
index 39d500d84b07..a5ba820d69bb 100644
--- a/drivers/ide/falconide.c
+++ b/drivers/ide/falconide.c
@@ -70,7 +70,6 @@ static const struct ide_tp_ops falconide_tp_ops = {
70 .exec_command = ide_exec_command, 70 .exec_command = ide_exec_command,
71 .read_status = ide_read_status, 71 .read_status = ide_read_status,
72 .read_altstatus = ide_read_altstatus, 72 .read_altstatus = ide_read_altstatus,
73 .read_sff_dma_status = ide_read_sff_dma_status,
74 73
75 .set_irq = ide_set_irq, 74 .set_irq = ide_set_irq,
76 75
diff --git a/drivers/ide/hpt366.c b/drivers/ide/hpt366.c
index b18e10d99d2e..3eb9b5c63a0f 100644
--- a/drivers/ide/hpt366.c
+++ b/drivers/ide/hpt366.c
@@ -626,7 +626,7 @@ static struct hpt_info *hpt3xx_get_info(struct device *dev)
626 626
627static u8 hpt3xx_udma_filter(ide_drive_t *drive) 627static u8 hpt3xx_udma_filter(ide_drive_t *drive)
628{ 628{
629 ide_hwif_t *hwif = HWIF(drive); 629 ide_hwif_t *hwif = drive->hwif;
630 struct hpt_info *info = hpt3xx_get_info(hwif->dev); 630 struct hpt_info *info = hpt3xx_get_info(hwif->dev);
631 u8 mask = hwif->ultra_mask; 631 u8 mask = hwif->ultra_mask;
632 632
@@ -665,7 +665,7 @@ static u8 hpt3xx_udma_filter(ide_drive_t *drive)
665 665
666static u8 hpt3xx_mdma_filter(ide_drive_t *drive) 666static u8 hpt3xx_mdma_filter(ide_drive_t *drive)
667{ 667{
668 ide_hwif_t *hwif = HWIF(drive); 668 ide_hwif_t *hwif = drive->hwif;
669 struct hpt_info *info = hpt3xx_get_info(hwif->dev); 669 struct hpt_info *info = hpt3xx_get_info(hwif->dev);
670 670
671 switch (info->chip_type) { 671 switch (info->chip_type) {
@@ -743,7 +743,7 @@ static void hpt3xx_quirkproc(ide_drive_t *drive)
743 743
744static void hpt3xx_maskproc(ide_drive_t *drive, int mask) 744static void hpt3xx_maskproc(ide_drive_t *drive, int mask)
745{ 745{
746 ide_hwif_t *hwif = HWIF(drive); 746 ide_hwif_t *hwif = drive->hwif;
747 struct pci_dev *dev = to_pci_dev(hwif->dev); 747 struct pci_dev *dev = to_pci_dev(hwif->dev);
748 struct hpt_info *info = hpt3xx_get_info(hwif->dev); 748 struct hpt_info *info = hpt3xx_get_info(hwif->dev);
749 749
@@ -788,7 +788,7 @@ static void hpt366_dma_lost_irq(ide_drive_t *drive)
788 788
789static void hpt370_clear_engine(ide_drive_t *drive) 789static void hpt370_clear_engine(ide_drive_t *drive)
790{ 790{
791 ide_hwif_t *hwif = HWIF(drive); 791 ide_hwif_t *hwif = drive->hwif;
792 struct pci_dev *dev = to_pci_dev(hwif->dev); 792 struct pci_dev *dev = to_pci_dev(hwif->dev);
793 793
794 pci_write_config_byte(dev, hwif->select_data, 0x37); 794 pci_write_config_byte(dev, hwif->select_data, 0x37);
@@ -797,7 +797,7 @@ static void hpt370_clear_engine(ide_drive_t *drive)
797 797
798static void hpt370_irq_timeout(ide_drive_t *drive) 798static void hpt370_irq_timeout(ide_drive_t *drive)
799{ 799{
800 ide_hwif_t *hwif = HWIF(drive); 800 ide_hwif_t *hwif = drive->hwif;
801 struct pci_dev *dev = to_pci_dev(hwif->dev); 801 struct pci_dev *dev = to_pci_dev(hwif->dev);
802 u16 bfifo = 0; 802 u16 bfifo = 0;
803 u8 dma_cmd; 803 u8 dma_cmd;
@@ -822,7 +822,7 @@ static void hpt370_dma_start(ide_drive_t *drive)
822 822
823static int hpt370_dma_end(ide_drive_t *drive) 823static int hpt370_dma_end(ide_drive_t *drive)
824{ 824{
825 ide_hwif_t *hwif = HWIF(drive); 825 ide_hwif_t *hwif = drive->hwif;
826 u8 dma_stat = inb(hwif->dma_base + ATA_DMA_STATUS); 826 u8 dma_stat = inb(hwif->dma_base + ATA_DMA_STATUS);
827 827
828 if (dma_stat & 0x01) { 828 if (dma_stat & 0x01) {
@@ -844,7 +844,7 @@ static void hpt370_dma_timeout(ide_drive_t *drive)
844/* returns 1 if DMA IRQ issued, 0 otherwise */ 844/* returns 1 if DMA IRQ issued, 0 otherwise */
845static int hpt374_dma_test_irq(ide_drive_t *drive) 845static int hpt374_dma_test_irq(ide_drive_t *drive)
846{ 846{
847 ide_hwif_t *hwif = HWIF(drive); 847 ide_hwif_t *hwif = drive->hwif;
848 struct pci_dev *dev = to_pci_dev(hwif->dev); 848 struct pci_dev *dev = to_pci_dev(hwif->dev);
849 u16 bfifo = 0; 849 u16 bfifo = 0;
850 u8 dma_stat; 850 u8 dma_stat;
@@ -865,7 +865,7 @@ static int hpt374_dma_test_irq(ide_drive_t *drive)
865 865
866static int hpt374_dma_end(ide_drive_t *drive) 866static int hpt374_dma_end(ide_drive_t *drive)
867{ 867{
868 ide_hwif_t *hwif = HWIF(drive); 868 ide_hwif_t *hwif = drive->hwif;
869 struct pci_dev *dev = to_pci_dev(hwif->dev); 869 struct pci_dev *dev = to_pci_dev(hwif->dev);
870 u8 mcr = 0, mcr_addr = hwif->select_data; 870 u8 mcr = 0, mcr_addr = hwif->select_data;
871 u8 bwsr = 0, mask = hwif->channel ? 0x02 : 0x01; 871 u8 bwsr = 0, mask = hwif->channel ? 0x02 : 0x01;
@@ -927,7 +927,7 @@ static void hpt3xxn_set_clock(ide_hwif_t *hwif, u8 mode)
927 927
928static void hpt3xxn_rw_disk(ide_drive_t *drive, struct request *rq) 928static void hpt3xxn_rw_disk(ide_drive_t *drive, struct request *rq)
929{ 929{
930 hpt3xxn_set_clock(HWIF(drive), rq_data_dir(rq) ? 0x23 : 0x21); 930 hpt3xxn_set_clock(drive->hwif, rq_data_dir(rq) ? 0x23 : 0x21);
931} 931}
932 932
933/** 933/**
@@ -1349,8 +1349,6 @@ static int __devinit init_dma_hpt366(ide_hwif_t *hwif,
1349 if (ide_allocate_dma_engine(hwif)) 1349 if (ide_allocate_dma_engine(hwif))
1350 return -1; 1350 return -1;
1351 1351
1352 hwif->dma_ops = &sff_dma_ops;
1353
1354 return 0; 1352 return 0;
1355} 1353}
1356 1354
@@ -1426,6 +1424,7 @@ static const struct ide_dma_ops hpt37x_dma_ops = {
1426 .dma_test_irq = hpt374_dma_test_irq, 1424 .dma_test_irq = hpt374_dma_test_irq,
1427 .dma_lost_irq = ide_dma_lost_irq, 1425 .dma_lost_irq = ide_dma_lost_irq,
1428 .dma_timeout = ide_dma_timeout, 1426 .dma_timeout = ide_dma_timeout,
1427 .dma_sff_read_status = ide_dma_sff_read_status,
1429}; 1428};
1430 1429
1431static const struct ide_dma_ops hpt370_dma_ops = { 1430static const struct ide_dma_ops hpt370_dma_ops = {
@@ -1437,6 +1436,7 @@ static const struct ide_dma_ops hpt370_dma_ops = {
1437 .dma_test_irq = ide_dma_test_irq, 1436 .dma_test_irq = ide_dma_test_irq,
1438 .dma_lost_irq = ide_dma_lost_irq, 1437 .dma_lost_irq = ide_dma_lost_irq,
1439 .dma_timeout = hpt370_dma_timeout, 1438 .dma_timeout = hpt370_dma_timeout,
1439 .dma_sff_read_status = ide_dma_sff_read_status,
1440}; 1440};
1441 1441
1442static const struct ide_dma_ops hpt36x_dma_ops = { 1442static const struct ide_dma_ops hpt36x_dma_ops = {
@@ -1448,6 +1448,7 @@ static const struct ide_dma_ops hpt36x_dma_ops = {
1448 .dma_test_irq = ide_dma_test_irq, 1448 .dma_test_irq = ide_dma_test_irq,
1449 .dma_lost_irq = hpt366_dma_lost_irq, 1449 .dma_lost_irq = hpt366_dma_lost_irq,
1450 .dma_timeout = ide_dma_timeout, 1450 .dma_timeout = ide_dma_timeout,
1451 .dma_sff_read_status = ide_dma_sff_read_status,
1451}; 1452};
1452 1453
1453static const struct ide_port_info hpt366_chipsets[] __devinitdata = { 1454static const struct ide_port_info hpt366_chipsets[] __devinitdata = {
diff --git a/drivers/ide/icside.c b/drivers/ide/icside.c
index 81f70caeb40f..97a35c667aee 100644
--- a/drivers/ide/icside.c
+++ b/drivers/ide/icside.c
@@ -166,7 +166,7 @@ static const expansioncard_ops_t icside_ops_arcin_v6 = {
166 */ 166 */
167static void icside_maskproc(ide_drive_t *drive, int mask) 167static void icside_maskproc(ide_drive_t *drive, int mask)
168{ 168{
169 ide_hwif_t *hwif = HWIF(drive); 169 ide_hwif_t *hwif = drive->hwif;
170 struct expansion_card *ec = ECARD_DEV(hwif->dev); 170 struct expansion_card *ec = ECARD_DEV(hwif->dev);
171 struct icside_state *state = ecard_get_drvdata(ec); 171 struct icside_state *state = ecard_get_drvdata(ec);
172 unsigned long flags; 172 unsigned long flags;
@@ -284,7 +284,7 @@ static void icside_dma_host_set(ide_drive_t *drive, int on)
284 284
285static int icside_dma_end(ide_drive_t *drive) 285static int icside_dma_end(ide_drive_t *drive)
286{ 286{
287 ide_hwif_t *hwif = HWIF(drive); 287 ide_hwif_t *hwif = drive->hwif;
288 struct expansion_card *ec = ECARD_DEV(hwif->dev); 288 struct expansion_card *ec = ECARD_DEV(hwif->dev);
289 289
290 drive->waiting_for_dma = 0; 290 drive->waiting_for_dma = 0;
@@ -299,7 +299,7 @@ static int icside_dma_end(ide_drive_t *drive)
299 299
300static void icside_dma_start(ide_drive_t *drive) 300static void icside_dma_start(ide_drive_t *drive)
301{ 301{
302 ide_hwif_t *hwif = HWIF(drive); 302 ide_hwif_t *hwif = drive->hwif;
303 struct expansion_card *ec = ECARD_DEV(hwif->dev); 303 struct expansion_card *ec = ECARD_DEV(hwif->dev);
304 304
305 /* We can not enable DMA on both channels simultaneously. */ 305 /* We can not enable DMA on both channels simultaneously. */
@@ -309,10 +309,10 @@ static void icside_dma_start(ide_drive_t *drive)
309 309
310static int icside_dma_setup(ide_drive_t *drive) 310static int icside_dma_setup(ide_drive_t *drive)
311{ 311{
312 ide_hwif_t *hwif = HWIF(drive); 312 ide_hwif_t *hwif = drive->hwif;
313 struct expansion_card *ec = ECARD_DEV(hwif->dev); 313 struct expansion_card *ec = ECARD_DEV(hwif->dev);
314 struct icside_state *state = ecard_get_drvdata(ec); 314 struct icside_state *state = ecard_get_drvdata(ec);
315 struct request *rq = hwif->hwgroup->rq; 315 struct request *rq = hwif->rq;
316 unsigned int dma_mode; 316 unsigned int dma_mode;
317 317
318 if (rq_data_dir(rq)) 318 if (rq_data_dir(rq))
@@ -362,7 +362,7 @@ static void icside_dma_exec_cmd(ide_drive_t *drive, u8 cmd)
362 362
363static int icside_dma_test_irq(ide_drive_t *drive) 363static int icside_dma_test_irq(ide_drive_t *drive)
364{ 364{
365 ide_hwif_t *hwif = HWIF(drive); 365 ide_hwif_t *hwif = drive->hwif;
366 struct expansion_card *ec = ECARD_DEV(hwif->dev); 366 struct expansion_card *ec = ECARD_DEV(hwif->dev);
367 struct icside_state *state = ecard_get_drvdata(ec); 367 struct icside_state *state = ecard_get_drvdata(ec);
368 368
diff --git a/drivers/ide/ide-acpi.c b/drivers/ide/ide-acpi.c
index fd4a36433050..2f9e941968d6 100644
--- a/drivers/ide/ide-acpi.c
+++ b/drivers/ide/ide-acpi.c
@@ -218,7 +218,7 @@ static acpi_handle ide_acpi_hwif_get_handle(ide_hwif_t *hwif)
218 */ 218 */
219static acpi_handle ide_acpi_drive_get_handle(ide_drive_t *drive) 219static acpi_handle ide_acpi_drive_get_handle(ide_drive_t *drive)
220{ 220{
221 ide_hwif_t *hwif = HWIF(drive); 221 ide_hwif_t *hwif = drive->hwif;
222 int port; 222 int port;
223 acpi_handle drive_handle; 223 acpi_handle drive_handle;
224 224
@@ -263,7 +263,7 @@ static int do_drive_get_GTF(ide_drive_t *drive,
263 acpi_status status; 263 acpi_status status;
264 struct acpi_buffer output; 264 struct acpi_buffer output;
265 union acpi_object *out_obj; 265 union acpi_object *out_obj;
266 ide_hwif_t *hwif = HWIF(drive); 266 ide_hwif_t *hwif = drive->hwif;
267 struct device *dev = hwif->gendev.parent; 267 struct device *dev = hwif->gendev.parent;
268 int err = -ENODEV; 268 int err = -ENODEV;
269 int port; 269 int port;
@@ -641,7 +641,8 @@ void ide_acpi_push_timing(ide_hwif_t *hwif)
641 */ 641 */
642void ide_acpi_set_state(ide_hwif_t *hwif, int on) 642void ide_acpi_set_state(ide_hwif_t *hwif, int on)
643{ 643{
644 int unit; 644 ide_drive_t *drive;
645 int i;
645 646
646 if (ide_noacpi || ide_noacpi_psx) 647 if (ide_noacpi || ide_noacpi_psx)
647 return; 648 return;
@@ -655,9 +656,8 @@ void ide_acpi_set_state(ide_hwif_t *hwif, int on)
655 /* channel first and then drives for power on and verse versa for power off */ 656 /* channel first and then drives for power on and verse versa for power off */
656 if (on) 657 if (on)
657 acpi_bus_set_power(hwif->acpidata->obj_handle, ACPI_STATE_D0); 658 acpi_bus_set_power(hwif->acpidata->obj_handle, ACPI_STATE_D0);
658 for (unit = 0; unit < MAX_DRIVES; ++unit) {
659 ide_drive_t *drive = &hwif->drives[unit];
660 659
660 ide_port_for_each_dev(i, drive, hwif) {
661 if (!drive->acpidata->obj_handle) 661 if (!drive->acpidata->obj_handle)
662 drive->acpidata->obj_handle = ide_acpi_drive_get_handle(drive); 662 drive->acpidata->obj_handle = ide_acpi_drive_get_handle(drive);
663 663
@@ -711,15 +711,13 @@ void ide_acpi_port_init_devices(ide_hwif_t *hwif)
711 * for both drives, regardless whether they are connected 711 * for both drives, regardless whether they are connected
712 * or not. 712 * or not.
713 */ 713 */
714 hwif->drives[0].acpidata = &hwif->acpidata->master; 714 hwif->devices[0]->acpidata = &hwif->acpidata->master;
715 hwif->drives[1].acpidata = &hwif->acpidata->slave; 715 hwif->devices[1]->acpidata = &hwif->acpidata->slave;
716 716
717 /* 717 /*
718 * Send IDENTIFY for each drive 718 * Send IDENTIFY for each drive
719 */ 719 */
720 for (i = 0; i < MAX_DRIVES; i++) { 720 ide_port_for_each_dev(i, drive, hwif) {
721 drive = &hwif->drives[i];
722
723 memset(drive->acpidata, 0, sizeof(*drive->acpidata)); 721 memset(drive->acpidata, 0, sizeof(*drive->acpidata));
724 722
725 if ((drive->dev_flags & IDE_DFLAG_PRESENT) == 0) 723 if ((drive->dev_flags & IDE_DFLAG_PRESENT) == 0)
@@ -744,9 +742,7 @@ void ide_acpi_port_init_devices(ide_hwif_t *hwif)
744 ide_acpi_get_timing(hwif); 742 ide_acpi_get_timing(hwif);
745 ide_acpi_push_timing(hwif); 743 ide_acpi_push_timing(hwif);
746 744
747 for (i = 0; i < MAX_DRIVES; i++) { 745 ide_port_for_each_dev(i, drive, hwif) {
748 drive = &hwif->drives[i];
749
750 if (drive->dev_flags & IDE_DFLAG_PRESENT) 746 if (drive->dev_flags & IDE_DFLAG_PRESENT)
751 /* Execute ACPI startup code */ 747 /* Execute ACPI startup code */
752 ide_acpi_exec_tfs(drive); 748 ide_acpi_exec_tfs(drive);
diff --git a/drivers/ide/ide-atapi.c b/drivers/ide/ide-atapi.c
index e8688c0f8645..e96c01260598 100644
--- a/drivers/ide/ide-atapi.c
+++ b/drivers/ide/ide-atapi.c
@@ -243,7 +243,7 @@ EXPORT_SYMBOL_GPL(ide_retry_pc);
243 243
244int ide_cd_expiry(ide_drive_t *drive) 244int ide_cd_expiry(ide_drive_t *drive)
245{ 245{
246 struct request *rq = HWGROUP(drive)->rq; 246 struct request *rq = drive->hwif->rq;
247 unsigned long wait = 0; 247 unsigned long wait = 0;
248 248
249 debug_log("%s: rq->cmd[0]: 0x%x\n", __func__, rq->cmd[0]); 249 debug_log("%s: rq->cmd[0]: 0x%x\n", __func__, rq->cmd[0]);
@@ -294,7 +294,7 @@ static ide_startstop_t ide_pc_intr(ide_drive_t *drive)
294{ 294{
295 struct ide_atapi_pc *pc = drive->pc; 295 struct ide_atapi_pc *pc = drive->pc;
296 ide_hwif_t *hwif = drive->hwif; 296 ide_hwif_t *hwif = drive->hwif;
297 struct request *rq = hwif->hwgroup->rq; 297 struct request *rq = hwif->rq;
298 const struct ide_tp_ops *tp_ops = hwif->tp_ops; 298 const struct ide_tp_ops *tp_ops = hwif->tp_ops;
299 xfer_func_t *xferfunc; 299 xfer_func_t *xferfunc;
300 unsigned int timeout, temp; 300 unsigned int timeout, temp;
@@ -491,7 +491,7 @@ static ide_startstop_t ide_transfer_pc(ide_drive_t *drive)
491{ 491{
492 struct ide_atapi_pc *uninitialized_var(pc); 492 struct ide_atapi_pc *uninitialized_var(pc);
493 ide_hwif_t *hwif = drive->hwif; 493 ide_hwif_t *hwif = drive->hwif;
494 struct request *rq = hwif->hwgroup->rq; 494 struct request *rq = hwif->rq;
495 ide_expiry_t *expiry; 495 ide_expiry_t *expiry;
496 unsigned int timeout; 496 unsigned int timeout;
497 int cmd_len; 497 int cmd_len;
@@ -549,7 +549,10 @@ static ide_startstop_t ide_transfer_pc(ide_drive_t *drive)
549 } 549 }
550 550
551 /* Set the interrupt routine */ 551 /* Set the interrupt routine */
552 ide_set_handler(drive, ide_pc_intr, timeout, expiry); 552 ide_set_handler(drive,
553 (dev_is_idecd(drive) ? drive->irq_handler
554 : ide_pc_intr),
555 timeout, expiry);
553 556
554 /* Begin DMA, if necessary */ 557 /* Begin DMA, if necessary */
555 if (dev_is_idecd(drive)) { 558 if (dev_is_idecd(drive)) {
@@ -580,7 +583,7 @@ ide_startstop_t ide_issue_pc(ide_drive_t *drive)
580 583
581 if (dev_is_idecd(drive)) { 584 if (dev_is_idecd(drive)) {
582 tf_flags = IDE_TFLAG_OUT_NSECT | IDE_TFLAG_OUT_LBAL; 585 tf_flags = IDE_TFLAG_OUT_NSECT | IDE_TFLAG_OUT_LBAL;
583 bcount = ide_cd_get_xferlen(hwif->hwgroup->rq); 586 bcount = ide_cd_get_xferlen(hwif->rq);
584 expiry = ide_cd_expiry; 587 expiry = ide_cd_expiry;
585 timeout = ATAPI_WAIT_PC; 588 timeout = ATAPI_WAIT_PC;
586 589
diff --git a/drivers/ide/ide-cd.c b/drivers/ide/ide-cd.c
index 1a7410f88249..cae69372cf45 100644
--- a/drivers/ide/ide-cd.c
+++ b/drivers/ide/ide-cd.c
@@ -239,7 +239,7 @@ static void cdrom_queue_request_sense(ide_drive_t *drive, void *sense,
239 239
240static void cdrom_end_request(ide_drive_t *drive, int uptodate) 240static void cdrom_end_request(ide_drive_t *drive, int uptodate)
241{ 241{
242 struct request *rq = HWGROUP(drive)->rq; 242 struct request *rq = drive->hwif->rq;
243 int nsectors = rq->hard_cur_sectors; 243 int nsectors = rq->hard_cur_sectors;
244 244
245 ide_debug_log(IDE_DBG_FUNC, "Call %s, cmd: 0x%x, uptodate: 0x%x, " 245 ide_debug_log(IDE_DBG_FUNC, "Call %s, cmd: 0x%x, uptodate: 0x%x, "
@@ -306,8 +306,7 @@ static void ide_dump_status_no_sense(ide_drive_t *drive, const char *msg, u8 st)
306static int cdrom_decode_status(ide_drive_t *drive, int good_stat, int *stat_ret) 306static int cdrom_decode_status(ide_drive_t *drive, int good_stat, int *stat_ret)
307{ 307{
308 ide_hwif_t *hwif = drive->hwif; 308 ide_hwif_t *hwif = drive->hwif;
309 ide_hwgroup_t *hwgroup = hwif->hwgroup; 309 struct request *rq = hwif->rq;
310 struct request *rq = hwgroup->rq;
311 int stat, err, sense_key; 310 int stat, err, sense_key;
312 311
313 /* check for errors */ 312 /* check for errors */
@@ -502,7 +501,7 @@ end_request:
502 blkdev_dequeue_request(rq); 501 blkdev_dequeue_request(rq);
503 spin_unlock_irqrestore(q->queue_lock, flags); 502 spin_unlock_irqrestore(q->queue_lock, flags);
504 503
505 hwgroup->rq = NULL; 504 hwif->rq = NULL;
506 505
507 cdrom_queue_request_sense(drive, rq->sense, rq); 506 cdrom_queue_request_sense(drive, rq->sense, rq);
508 } else 507 } else
@@ -511,106 +510,6 @@ end_request:
511 return 1; 510 return 1;
512} 511}
513 512
514static ide_startstop_t cdrom_transfer_packet_command(ide_drive_t *);
515static ide_startstop_t cdrom_newpc_intr(ide_drive_t *);
516
517/*
518 * Set up the device registers for transferring a packet command on DEV,
519 * expecting to later transfer XFERLEN bytes. HANDLER is the routine
520 * which actually transfers the command to the drive. If this is a
521 * drq_interrupt device, this routine will arrange for HANDLER to be
522 * called when the interrupt from the drive arrives. Otherwise, HANDLER
523 * will be called immediately after the drive is prepared for the transfer.
524 */
525static ide_startstop_t cdrom_start_packet_command(ide_drive_t *drive)
526{
527 ide_hwif_t *hwif = drive->hwif;
528 struct request *rq = hwif->hwgroup->rq;
529 int xferlen;
530
531 xferlen = ide_cd_get_xferlen(rq);
532
533 ide_debug_log(IDE_DBG_PC, "Call %s, xferlen: %d\n", __func__, xferlen);
534
535 /* FIXME: for Virtual DMA we must check harder */
536 if (drive->dma)
537 drive->dma = !hwif->dma_ops->dma_setup(drive);
538
539 /* set up the controller registers */
540 ide_pktcmd_tf_load(drive, IDE_TFLAG_OUT_NSECT | IDE_TFLAG_OUT_LBAL,
541 xferlen, drive->dma);
542
543 if (drive->atapi_flags & IDE_AFLAG_DRQ_INTERRUPT) {
544 /* waiting for CDB interrupt, not DMA yet. */
545 if (drive->dma)
546 drive->waiting_for_dma = 0;
547
548 /* packet command */
549 ide_execute_command(drive, ATA_CMD_PACKET,
550 cdrom_transfer_packet_command,
551 ATAPI_WAIT_PC, ide_cd_expiry);
552 return ide_started;
553 } else {
554 ide_execute_pkt_cmd(drive);
555
556 return cdrom_transfer_packet_command(drive);
557 }
558}
559
560/*
561 * Send a packet command to DRIVE described by CMD_BUF and CMD_LEN. The device
562 * registers must have already been prepared by cdrom_start_packet_command.
563 * HANDLER is the interrupt handler to call when the command completes or
564 * there's data ready.
565 */
566#define ATAPI_MIN_CDB_BYTES 12
567static ide_startstop_t cdrom_transfer_packet_command(ide_drive_t *drive)
568{
569 ide_hwif_t *hwif = drive->hwif;
570 struct request *rq = hwif->hwgroup->rq;
571 int cmd_len;
572 ide_startstop_t startstop;
573
574 ide_debug_log(IDE_DBG_PC, "Call %s\n", __func__);
575
576 if (drive->atapi_flags & IDE_AFLAG_DRQ_INTERRUPT) {
577 /*
578 * Here we should have been called after receiving an interrupt
579 * from the device. DRQ should how be set.
580 */
581
582 /* check for errors */
583 if (cdrom_decode_status(drive, ATA_DRQ, NULL))
584 return ide_stopped;
585
586 /* ok, next interrupt will be DMA interrupt */
587 if (drive->dma)
588 drive->waiting_for_dma = 1;
589 } else {
590 /* otherwise, we must wait for DRQ to get set */
591 if (ide_wait_stat(&startstop, drive, ATA_DRQ,
592 ATA_BUSY, WAIT_READY))
593 return startstop;
594 }
595
596 /* arm the interrupt handler */
597 ide_set_handler(drive, cdrom_newpc_intr, rq->timeout, ide_cd_expiry);
598
599 /* ATAPI commands get padded out to 12 bytes minimum */
600 cmd_len = COMMAND_SIZE(rq->cmd[0]);
601 if (cmd_len < ATAPI_MIN_CDB_BYTES)
602 cmd_len = ATAPI_MIN_CDB_BYTES;
603
604 /* send the command to the device */
605 hwif->tp_ops->output_data(drive, NULL, rq->cmd, cmd_len);
606
607 /* start the DMA if need be */
608 if (drive->dma)
609 hwif->dma_ops->dma_start(drive);
610
611 return ide_started;
612}
613
614/* 513/*
615 * Check the contents of the interrupt reason register from the cdrom 514 * Check the contents of the interrupt reason register from the cdrom
616 * and attempt to recover if there are problems. Returns 0 if everything's 515 * and attempt to recover if there are problems. Returns 0 if everything's
@@ -854,8 +753,7 @@ static int cdrom_newpc_intr_dummy_cb(struct request *rq)
854static ide_startstop_t cdrom_newpc_intr(ide_drive_t *drive) 753static ide_startstop_t cdrom_newpc_intr(ide_drive_t *drive)
855{ 754{
856 ide_hwif_t *hwif = drive->hwif; 755 ide_hwif_t *hwif = drive->hwif;
857 ide_hwgroup_t *hwgroup = hwif->hwgroup; 756 struct request *rq = hwif->rq;
858 struct request *rq = hwgroup->rq;
859 xfer_func_t *xferfunc; 757 xfer_func_t *xferfunc;
860 ide_expiry_t *expiry = NULL; 758 ide_expiry_t *expiry = NULL;
861 int dma_error = 0, dma, stat, thislen, uptodate = 0; 759 int dma_error = 0, dma, stat, thislen, uptodate = 0;
@@ -1061,7 +959,7 @@ end_request:
1061 if (blk_end_request(rq, 0, dlen)) 959 if (blk_end_request(rq, 0, dlen))
1062 BUG(); 960 BUG();
1063 961
1064 hwgroup->rq = NULL; 962 hwif->rq = NULL;
1065 } else { 963 } else {
1066 if (!uptodate) 964 if (!uptodate)
1067 rq->cmd_flags |= REQ_FAILED; 965 rq->cmd_flags |= REQ_FAILED;
@@ -1183,7 +1081,7 @@ static ide_startstop_t ide_cd_do_request(ide_drive_t *drive, struct request *rq,
1183 return ide_stopped; 1081 return ide_stopped;
1184 } 1082 }
1185 1083
1186 return cdrom_start_packet_command(drive); 1084 return ide_issue_pc(drive);
1187} 1085}
1188 1086
1189/* 1087/*
@@ -1916,7 +1814,7 @@ static void ide_cd_release(struct kref *kref)
1916 1814
1917static int ide_cd_probe(ide_drive_t *); 1815static int ide_cd_probe(ide_drive_t *);
1918 1816
1919static ide_driver_t ide_cdrom_driver = { 1817static struct ide_driver ide_cdrom_driver = {
1920 .gen_driver = { 1818 .gen_driver = {
1921 .owner = THIS_MODULE, 1819 .owner = THIS_MODULE,
1922 .name = "ide-cdrom", 1820 .name = "ide-cdrom",
@@ -1927,7 +1825,6 @@ static ide_driver_t ide_cdrom_driver = {
1927 .version = IDECD_VERSION, 1825 .version = IDECD_VERSION,
1928 .do_request = ide_cd_do_request, 1826 .do_request = ide_cd_do_request,
1929 .end_request = ide_end_request, 1827 .end_request = ide_end_request,
1930 .error = __ide_error,
1931#ifdef CONFIG_IDE_PROC_FS 1828#ifdef CONFIG_IDE_PROC_FS
1932 .proc_entries = ide_cd_proc_entries, 1829 .proc_entries = ide_cd_proc_entries,
1933 .proc_devsets = ide_cd_proc_devsets, 1830 .proc_devsets = ide_cd_proc_devsets,
@@ -2082,6 +1979,7 @@ static int ide_cd_probe(ide_drive_t *drive)
2082 } 1979 }
2083 1980
2084 drive->debug_mask = debug_mask; 1981 drive->debug_mask = debug_mask;
1982 drive->irq_handler = cdrom_newpc_intr;
2085 1983
2086 info = kzalloc(sizeof(struct cdrom_info), GFP_KERNEL); 1984 info = kzalloc(sizeof(struct cdrom_info), GFP_KERNEL);
2087 if (info == NULL) { 1985 if (info == NULL) {
diff --git a/drivers/ide/ide-cd.h b/drivers/ide/ide-cd.h
index bf676b262181..ac40d6cb90a2 100644
--- a/drivers/ide/ide-cd.h
+++ b/drivers/ide/ide-cd.h
@@ -33,33 +33,33 @@
33 33
34/* Structure of a MSF cdrom address. */ 34/* Structure of a MSF cdrom address. */
35struct atapi_msf { 35struct atapi_msf {
36 byte reserved; 36 u8 reserved;
37 byte minute; 37 u8 minute;
38 byte second; 38 u8 second;
39 byte frame; 39 u8 frame;
40}; 40};
41 41
42/* Space to hold the disk TOC. */ 42/* Space to hold the disk TOC. */
43#define MAX_TRACKS 99 43#define MAX_TRACKS 99
44struct atapi_toc_header { 44struct atapi_toc_header {
45 unsigned short toc_length; 45 unsigned short toc_length;
46 byte first_track; 46 u8 first_track;
47 byte last_track; 47 u8 last_track;
48}; 48};
49 49
50struct atapi_toc_entry { 50struct atapi_toc_entry {
51 byte reserved1; 51 u8 reserved1;
52#if defined(__BIG_ENDIAN_BITFIELD) 52#if defined(__BIG_ENDIAN_BITFIELD)
53 __u8 adr : 4; 53 u8 adr : 4;
54 __u8 control : 4; 54 u8 control : 4;
55#elif defined(__LITTLE_ENDIAN_BITFIELD) 55#elif defined(__LITTLE_ENDIAN_BITFIELD)
56 __u8 control : 4; 56 u8 control : 4;
57 __u8 adr : 4; 57 u8 adr : 4;
58#else 58#else
59#error "Please fix <asm/byteorder.h>" 59#error "Please fix <asm/byteorder.h>"
60#endif 60#endif
61 byte track; 61 u8 track;
62 byte reserved2; 62 u8 reserved2;
63 union { 63 union {
64 unsigned lba; 64 unsigned lba;
65 struct atapi_msf msf; 65 struct atapi_msf msf;
@@ -77,10 +77,10 @@ struct atapi_toc {
77 77
78/* Extra per-device info for cdrom drives. */ 78/* Extra per-device info for cdrom drives. */
79struct cdrom_info { 79struct cdrom_info {
80 ide_drive_t *drive; 80 ide_drive_t *drive;
81 ide_driver_t *driver; 81 struct ide_driver *driver;
82 struct gendisk *disk; 82 struct gendisk *disk;
83 struct kref kref; 83 struct kref kref;
84 84
85 /* Buffer for table of contents. NULL if we haven't allocated 85 /* Buffer for table of contents. NULL if we haven't allocated
86 a TOC buffer for this device yet. */ 86 a TOC buffer for this device yet. */
diff --git a/drivers/ide/ide-disk.c b/drivers/ide/ide-disk.c
index eb9fac4d0f0c..4088a622873e 100644
--- a/drivers/ide/ide-disk.c
+++ b/drivers/ide/ide-disk.c
@@ -89,7 +89,7 @@ static void ide_tf_set_cmd(ide_drive_t *drive, ide_task_t *task, u8 dma)
89static ide_startstop_t __ide_do_rw_disk(ide_drive_t *drive, struct request *rq, 89static ide_startstop_t __ide_do_rw_disk(ide_drive_t *drive, struct request *rq,
90 sector_t block) 90 sector_t block)
91{ 91{
92 ide_hwif_t *hwif = HWIF(drive); 92 ide_hwif_t *hwif = drive->hwif;
93 u16 nsectors = (u16)rq->nr_sectors; 93 u16 nsectors = (u16)rq->nr_sectors;
94 u8 lba48 = !!(drive->dev_flags & IDE_DFLAG_LBA48); 94 u8 lba48 = !!(drive->dev_flags & IDE_DFLAG_LBA48);
95 u8 dma = !!(drive->dev_flags & IDE_DFLAG_USING_DMA); 95 u8 dma = !!(drive->dev_flags & IDE_DFLAG_USING_DMA);
@@ -187,7 +187,7 @@ static ide_startstop_t __ide_do_rw_disk(ide_drive_t *drive, struct request *rq,
187static ide_startstop_t ide_do_rw_disk(ide_drive_t *drive, struct request *rq, 187static ide_startstop_t ide_do_rw_disk(ide_drive_t *drive, struct request *rq,
188 sector_t block) 188 sector_t block)
189{ 189{
190 ide_hwif_t *hwif = HWIF(drive); 190 ide_hwif_t *hwif = drive->hwif;
191 191
192 BUG_ON(drive->dev_flags & IDE_DFLAG_BLOCKED); 192 BUG_ON(drive->dev_flags & IDE_DFLAG_BLOCKED);
193 193
diff --git a/drivers/ide/ide-dma-sff.c b/drivers/ide/ide-dma-sff.c
index f6d2d44d8a9a..123d393658af 100644
--- a/drivers/ide/ide-dma-sff.c
+++ b/drivers/ide/ide-dma-sff.c
@@ -50,6 +50,27 @@ int config_drive_for_dma(ide_drive_t *drive)
50 return 0; 50 return 0;
51} 51}
52 52
53u8 ide_dma_sff_read_status(ide_hwif_t *hwif)
54{
55 unsigned long addr = hwif->dma_base + ATA_DMA_STATUS;
56
57 if (hwif->host_flags & IDE_HFLAG_MMIO)
58 return readb((void __iomem *)addr);
59 else
60 return inb(addr);
61}
62EXPORT_SYMBOL_GPL(ide_dma_sff_read_status);
63
64static void ide_dma_sff_write_status(ide_hwif_t *hwif, u8 val)
65{
66 unsigned long addr = hwif->dma_base + ATA_DMA_STATUS;
67
68 if (hwif->host_flags & IDE_HFLAG_MMIO)
69 writeb(val, (void __iomem *)addr);
70 else
71 outb(val, addr);
72}
73
53/** 74/**
54 * ide_dma_host_set - Enable/disable DMA on a host 75 * ide_dma_host_set - Enable/disable DMA on a host
55 * @drive: drive to control 76 * @drive: drive to control
@@ -62,18 +83,14 @@ void ide_dma_host_set(ide_drive_t *drive, int on)
62{ 83{
63 ide_hwif_t *hwif = drive->hwif; 84 ide_hwif_t *hwif = drive->hwif;
64 u8 unit = drive->dn & 1; 85 u8 unit = drive->dn & 1;
65 u8 dma_stat = hwif->tp_ops->read_sff_dma_status(hwif); 86 u8 dma_stat = hwif->dma_ops->dma_sff_read_status(hwif);
66 87
67 if (on) 88 if (on)
68 dma_stat |= (1 << (5 + unit)); 89 dma_stat |= (1 << (5 + unit));
69 else 90 else
70 dma_stat &= ~(1 << (5 + unit)); 91 dma_stat &= ~(1 << (5 + unit));
71 92
72 if (hwif->host_flags & IDE_HFLAG_MMIO) 93 ide_dma_sff_write_status(hwif, dma_stat);
73 writeb(dma_stat,
74 (void __iomem *)(hwif->dma_base + ATA_DMA_STATUS));
75 else
76 outb(dma_stat, hwif->dma_base + ATA_DMA_STATUS);
77} 94}
78EXPORT_SYMBOL_GPL(ide_dma_host_set); 95EXPORT_SYMBOL_GPL(ide_dma_host_set);
79 96
@@ -175,7 +192,7 @@ EXPORT_SYMBOL_GPL(ide_build_dmatable);
175int ide_dma_setup(ide_drive_t *drive) 192int ide_dma_setup(ide_drive_t *drive)
176{ 193{
177 ide_hwif_t *hwif = drive->hwif; 194 ide_hwif_t *hwif = drive->hwif;
178 struct request *rq = hwif->hwgroup->rq; 195 struct request *rq = hwif->rq;
179 unsigned int reading = rq_data_dir(rq) ? 0 : ATA_DMA_WR; 196 unsigned int reading = rq_data_dir(rq) ? 0 : ATA_DMA_WR;
180 u8 mmio = (hwif->host_flags & IDE_HFLAG_MMIO) ? 1 : 0; 197 u8 mmio = (hwif->host_flags & IDE_HFLAG_MMIO) ? 1 : 0;
181 u8 dma_stat; 198 u8 dma_stat;
@@ -187,7 +204,7 @@ int ide_dma_setup(ide_drive_t *drive)
187 } 204 }
188 205
189 /* PRD table */ 206 /* PRD table */
190 if (hwif->host_flags & IDE_HFLAG_MMIO) 207 if (mmio)
191 writel(hwif->dmatable_dma, 208 writel(hwif->dmatable_dma,
192 (void __iomem *)(hwif->dma_base + ATA_DMA_TABLE_OFS)); 209 (void __iomem *)(hwif->dma_base + ATA_DMA_TABLE_OFS));
193 else 210 else
@@ -200,15 +217,10 @@ int ide_dma_setup(ide_drive_t *drive)
200 outb(reading, hwif->dma_base + ATA_DMA_CMD); 217 outb(reading, hwif->dma_base + ATA_DMA_CMD);
201 218
202 /* read DMA status for INTR & ERROR flags */ 219 /* read DMA status for INTR & ERROR flags */
203 dma_stat = hwif->tp_ops->read_sff_dma_status(hwif); 220 dma_stat = hwif->dma_ops->dma_sff_read_status(hwif);
204 221
205 /* clear INTR & ERROR flags */ 222 /* clear INTR & ERROR flags */
206 if (mmio) 223 ide_dma_sff_write_status(hwif, dma_stat | ATA_DMA_ERR | ATA_DMA_INTR);
207 writeb(dma_stat | ATA_DMA_ERR | ATA_DMA_INTR,
208 (void __iomem *)(hwif->dma_base + ATA_DMA_STATUS));
209 else
210 outb(dma_stat | ATA_DMA_ERR | ATA_DMA_INTR,
211 hwif->dma_base + ATA_DMA_STATUS);
212 224
213 drive->waiting_for_dma = 1; 225 drive->waiting_for_dma = 1;
214 return 0; 226 return 0;
@@ -232,7 +244,7 @@ EXPORT_SYMBOL_GPL(ide_dma_setup);
232static int dma_timer_expiry(ide_drive_t *drive) 244static int dma_timer_expiry(ide_drive_t *drive)
233{ 245{
234 ide_hwif_t *hwif = drive->hwif; 246 ide_hwif_t *hwif = drive->hwif;
235 u8 dma_stat = hwif->tp_ops->read_sff_dma_status(hwif); 247 u8 dma_stat = hwif->dma_ops->dma_sff_read_status(hwif);
236 248
237 printk(KERN_WARNING "%s: %s: DMA status (0x%02x)\n", 249 printk(KERN_WARNING "%s: %s: DMA status (0x%02x)\n",
238 drive->name, __func__, dma_stat); 250 drive->name, __func__, dma_stat);
@@ -240,7 +252,7 @@ static int dma_timer_expiry(ide_drive_t *drive)
240 if ((dma_stat & 0x18) == 0x18) /* BUSY Stupid Early Timer !! */ 252 if ((dma_stat & 0x18) == 0x18) /* BUSY Stupid Early Timer !! */
241 return WAIT_CMD; 253 return WAIT_CMD;
242 254
243 hwif->hwgroup->expiry = NULL; /* one free ride for now */ 255 hwif->expiry = NULL; /* one free ride for now */
244 256
245 if (dma_stat & ATA_DMA_ERR) /* ERROR */ 257 if (dma_stat & ATA_DMA_ERR) /* ERROR */
246 return -1; 258 return -1;
@@ -289,13 +301,12 @@ EXPORT_SYMBOL_GPL(ide_dma_start);
289int ide_dma_end(ide_drive_t *drive) 301int ide_dma_end(ide_drive_t *drive)
290{ 302{
291 ide_hwif_t *hwif = drive->hwif; 303 ide_hwif_t *hwif = drive->hwif;
292 u8 mmio = (hwif->host_flags & IDE_HFLAG_MMIO) ? 1 : 0;
293 u8 dma_stat = 0, dma_cmd = 0, mask; 304 u8 dma_stat = 0, dma_cmd = 0, mask;
294 305
295 drive->waiting_for_dma = 0; 306 drive->waiting_for_dma = 0;
296 307
297 /* stop DMA */ 308 /* stop DMA */
298 if (mmio) { 309 if (hwif->host_flags & IDE_HFLAG_MMIO) {
299 dma_cmd = readb((void __iomem *)(hwif->dma_base + ATA_DMA_CMD)); 310 dma_cmd = readb((void __iomem *)(hwif->dma_base + ATA_DMA_CMD));
300 writeb(dma_cmd & ~ATA_DMA_START, 311 writeb(dma_cmd & ~ATA_DMA_START,
301 (void __iomem *)(hwif->dma_base + ATA_DMA_CMD)); 312 (void __iomem *)(hwif->dma_base + ATA_DMA_CMD));
@@ -305,15 +316,10 @@ int ide_dma_end(ide_drive_t *drive)
305 } 316 }
306 317
307 /* get DMA status */ 318 /* get DMA status */
308 dma_stat = hwif->tp_ops->read_sff_dma_status(hwif); 319 dma_stat = hwif->dma_ops->dma_sff_read_status(hwif);
309 320
310 if (mmio) 321 /* clear INTR & ERROR bits */
311 /* clear the INTR & ERROR bits */ 322 ide_dma_sff_write_status(hwif, dma_stat | ATA_DMA_ERR | ATA_DMA_INTR);
312 writeb(dma_stat | ATA_DMA_ERR | ATA_DMA_INTR,
313 (void __iomem *)(hwif->dma_base + ATA_DMA_STATUS));
314 else
315 outb(dma_stat | ATA_DMA_ERR | ATA_DMA_INTR,
316 hwif->dma_base + ATA_DMA_STATUS);
317 323
318 /* purge DMA mappings */ 324 /* purge DMA mappings */
319 ide_destroy_dmatable(drive); 325 ide_destroy_dmatable(drive);
@@ -331,7 +337,7 @@ EXPORT_SYMBOL_GPL(ide_dma_end);
331int ide_dma_test_irq(ide_drive_t *drive) 337int ide_dma_test_irq(ide_drive_t *drive)
332{ 338{
333 ide_hwif_t *hwif = drive->hwif; 339 ide_hwif_t *hwif = drive->hwif;
334 u8 dma_stat = hwif->tp_ops->read_sff_dma_status(hwif); 340 u8 dma_stat = hwif->dma_ops->dma_sff_read_status(hwif);
335 341
336 return (dma_stat & ATA_DMA_INTR) ? 1 : 0; 342 return (dma_stat & ATA_DMA_INTR) ? 1 : 0;
337} 343}
@@ -346,5 +352,6 @@ const struct ide_dma_ops sff_dma_ops = {
346 .dma_test_irq = ide_dma_test_irq, 352 .dma_test_irq = ide_dma_test_irq,
347 .dma_timeout = ide_dma_timeout, 353 .dma_timeout = ide_dma_timeout,
348 .dma_lost_irq = ide_dma_lost_irq, 354 .dma_lost_irq = ide_dma_lost_irq,
355 .dma_sff_read_status = ide_dma_sff_read_status,
349}; 356};
350EXPORT_SYMBOL_GPL(sff_dma_ops); 357EXPORT_SYMBOL_GPL(sff_dma_ops);
diff --git a/drivers/ide/ide-dma.c b/drivers/ide/ide-dma.c
index fffd11717b2d..72ebab0bc755 100644
--- a/drivers/ide/ide-dma.c
+++ b/drivers/ide/ide-dma.c
@@ -96,7 +96,7 @@ ide_startstop_t ide_dma_intr(ide_drive_t *drive)
96 96
97 if (OK_STAT(stat, DRIVE_READY, drive->bad_wstat | ATA_DRQ)) { 97 if (OK_STAT(stat, DRIVE_READY, drive->bad_wstat | ATA_DRQ)) {
98 if (!dma_stat) { 98 if (!dma_stat) {
99 struct request *rq = hwif->hwgroup->rq; 99 struct request *rq = hwif->rq;
100 100
101 task_end_request(drive, rq, stat); 101 task_end_request(drive, rq, stat);
102 return ide_stopped; 102 return ide_stopped;
diff --git a/drivers/ide/ide-floppy.c b/drivers/ide/ide-floppy.c
index 0a48e2dc53a2..3eab1c6c9b31 100644
--- a/drivers/ide/ide-floppy.c
+++ b/drivers/ide/ide-floppy.c
@@ -71,7 +71,7 @@
71static int ide_floppy_end_request(ide_drive_t *drive, int uptodate, int nsecs) 71static int ide_floppy_end_request(ide_drive_t *drive, int uptodate, int nsecs)
72{ 72{
73 struct ide_disk_obj *floppy = drive->driver_data; 73 struct ide_disk_obj *floppy = drive->driver_data;
74 struct request *rq = HWGROUP(drive)->rq; 74 struct request *rq = drive->hwif->rq;
75 int error; 75 int error;
76 76
77 ide_debug_log(IDE_DBG_FUNC, "Call %s\n", __func__); 77 ide_debug_log(IDE_DBG_FUNC, "Call %s\n", __func__);
diff --git a/drivers/ide/ide-gd.c b/drivers/ide/ide-gd.c
index b8078b3231f7..7857b209c6df 100644
--- a/drivers/ide/ide-gd.c
+++ b/drivers/ide/ide-gd.c
@@ -149,7 +149,7 @@ static int ide_gd_end_request(ide_drive_t *drive, int uptodate, int nrsecs)
149 return drive->disk_ops->end_request(drive, uptodate, nrsecs); 149 return drive->disk_ops->end_request(drive, uptodate, nrsecs);
150} 150}
151 151
152static ide_driver_t ide_gd_driver = { 152static struct ide_driver ide_gd_driver = {
153 .gen_driver = { 153 .gen_driver = {
154 .owner = THIS_MODULE, 154 .owner = THIS_MODULE,
155 .name = "ide-gd", 155 .name = "ide-gd",
@@ -162,7 +162,6 @@ static ide_driver_t ide_gd_driver = {
162 .version = IDE_GD_VERSION, 162 .version = IDE_GD_VERSION,
163 .do_request = ide_gd_do_request, 163 .do_request = ide_gd_do_request,
164 .end_request = ide_gd_end_request, 164 .end_request = ide_gd_end_request,
165 .error = __ide_error,
166#ifdef CONFIG_IDE_PROC_FS 165#ifdef CONFIG_IDE_PROC_FS
167 .proc_entries = ide_disk_proc_entries, 166 .proc_entries = ide_disk_proc_entries,
168 .proc_devsets = ide_disk_proc_devsets, 167 .proc_devsets = ide_disk_proc_devsets,
diff --git a/drivers/ide/ide-gd.h b/drivers/ide/ide-gd.h
index 7d3d101713e0..a86779f0756b 100644
--- a/drivers/ide/ide-gd.h
+++ b/drivers/ide/ide-gd.h
@@ -14,11 +14,11 @@
14#endif 14#endif
15 15
16struct ide_disk_obj { 16struct ide_disk_obj {
17 ide_drive_t *drive; 17 ide_drive_t *drive;
18 ide_driver_t *driver; 18 struct ide_driver *driver;
19 struct gendisk *disk; 19 struct gendisk *disk;
20 struct kref kref; 20 struct kref kref;
21 unsigned int openers; /* protected by BKL for now */ 21 unsigned int openers; /* protected by BKL for now */
22 22
23 /* Last failed packet command */ 23 /* Last failed packet command */
24 struct ide_atapi_pc *failed_pc; 24 struct ide_atapi_pc *failed_pc;
diff --git a/drivers/ide/ide-h8300.c b/drivers/ide/ide-h8300.c
index e2cdd2e9cdec..9270d3255ee0 100644
--- a/drivers/ide/ide-h8300.c
+++ b/drivers/ide/ide-h8300.c
@@ -159,7 +159,6 @@ static const struct ide_tp_ops h8300_tp_ops = {
159 .exec_command = ide_exec_command, 159 .exec_command = ide_exec_command,
160 .read_status = ide_read_status, 160 .read_status = ide_read_status,
161 .read_altstatus = ide_read_altstatus, 161 .read_altstatus = ide_read_altstatus,
162 .read_sff_dma_status = ide_read_sff_dma_status,
163 162
164 .set_irq = ide_set_irq, 163 .set_irq = ide_set_irq,
165 164
diff --git a/drivers/ide/ide-io.c b/drivers/ide/ide-io.c
index 1c36a8e83d36..cc163319dfbd 100644
--- a/drivers/ide/ide-io.c
+++ b/drivers/ide/ide-io.c
@@ -88,7 +88,7 @@ static int __ide_end_request(ide_drive_t *drive, struct request *rq,
88 ret = 0; 88 ret = 0;
89 89
90 if (ret == 0 && dequeue) 90 if (ret == 0 && dequeue)
91 drive->hwif->hwgroup->rq = NULL; 91 drive->hwif->rq = NULL;
92 92
93 return ret; 93 return ret;
94} 94}
@@ -107,7 +107,7 @@ static int __ide_end_request(ide_drive_t *drive, struct request *rq,
107int ide_end_request (ide_drive_t *drive, int uptodate, int nr_sectors) 107int ide_end_request (ide_drive_t *drive, int uptodate, int nr_sectors)
108{ 108{
109 unsigned int nr_bytes = nr_sectors << 9; 109 unsigned int nr_bytes = nr_sectors << 9;
110 struct request *rq = drive->hwif->hwgroup->rq; 110 struct request *rq = drive->hwif->rq;
111 111
112 if (!nr_bytes) { 112 if (!nr_bytes) {
113 if (blk_pc_request(rq)) 113 if (blk_pc_request(rq))
@@ -160,8 +160,8 @@ EXPORT_SYMBOL_GPL(ide_end_dequeued_request);
160 160
161void ide_end_drive_cmd (ide_drive_t *drive, u8 stat, u8 err) 161void ide_end_drive_cmd (ide_drive_t *drive, u8 stat, u8 err)
162{ 162{
163 ide_hwgroup_t *hwgroup = drive->hwif->hwgroup; 163 ide_hwif_t *hwif = drive->hwif;
164 struct request *rq = hwgroup->rq; 164 struct request *rq = hwif->rq;
165 165
166 if (rq->cmd_type == REQ_TYPE_ATA_TASKFILE) { 166 if (rq->cmd_type == REQ_TYPE_ATA_TASKFILE) {
167 ide_task_t *task = (ide_task_t *)rq->special; 167 ide_task_t *task = (ide_task_t *)rq->special;
@@ -186,7 +186,7 @@ void ide_end_drive_cmd (ide_drive_t *drive, u8 stat, u8 err)
186 return; 186 return;
187 } 187 }
188 188
189 hwgroup->rq = NULL; 189 hwif->rq = NULL;
190 190
191 rq->errors = err; 191 rq->errors = err;
192 192
@@ -199,9 +199,9 @@ EXPORT_SYMBOL(ide_end_drive_cmd);
199static void ide_kill_rq(ide_drive_t *drive, struct request *rq) 199static void ide_kill_rq(ide_drive_t *drive, struct request *rq)
200{ 200{
201 if (rq->rq_disk) { 201 if (rq->rq_disk) {
202 ide_driver_t *drv; 202 struct ide_driver *drv;
203 203
204 drv = *(ide_driver_t **)rq->rq_disk->private_data; 204 drv = *(struct ide_driver **)rq->rq_disk->private_data;
205 drv->end_request(drive, 0, 0); 205 drv->end_request(drive, 0, 0);
206 } else 206 } else
207 ide_end_request(drive, 0, 0); 207 ide_end_request(drive, 0, 0);
@@ -291,7 +291,7 @@ static ide_startstop_t ide_atapi_error(ide_drive_t *drive, struct request *rq, u
291 return ide_stopped; 291 return ide_stopped;
292} 292}
293 293
294ide_startstop_t 294static ide_startstop_t
295__ide_error(ide_drive_t *drive, struct request *rq, u8 stat, u8 err) 295__ide_error(ide_drive_t *drive, struct request *rq, u8 stat, u8 err)
296{ 296{
297 if (drive->media == ide_disk) 297 if (drive->media == ide_disk)
@@ -299,8 +299,6 @@ __ide_error(ide_drive_t *drive, struct request *rq, u8 stat, u8 err)
299 return ide_atapi_error(drive, rq, stat, err); 299 return ide_atapi_error(drive, rq, stat, err);
300} 300}
301 301
302EXPORT_SYMBOL_GPL(__ide_error);
303
304/** 302/**
305 * ide_error - handle an error on the IDE 303 * ide_error - handle an error on the IDE
306 * @drive: drive the error occurred on 304 * @drive: drive the error occurred on
@@ -321,7 +319,8 @@ ide_startstop_t ide_error (ide_drive_t *drive, const char *msg, u8 stat)
321 319
322 err = ide_dump_status(drive, msg, stat); 320 err = ide_dump_status(drive, msg, stat);
323 321
324 if ((rq = HWGROUP(drive)->rq) == NULL) 322 rq = drive->hwif->rq;
323 if (rq == NULL)
325 return ide_stopped; 324 return ide_stopped;
326 325
327 /* retry only "normal" I/O: */ 326 /* retry only "normal" I/O: */
@@ -331,15 +330,8 @@ ide_startstop_t ide_error (ide_drive_t *drive, const char *msg, u8 stat)
331 return ide_stopped; 330 return ide_stopped;
332 } 331 }
333 332
334 if (rq->rq_disk) { 333 return __ide_error(drive, rq, stat, err);
335 ide_driver_t *drv;
336
337 drv = *(ide_driver_t **)rq->rq_disk->private_data;
338 return drv->error(drive, rq, stat, err);
339 } else
340 return __ide_error(drive, rq, stat, err);
341} 334}
342
343EXPORT_SYMBOL_GPL(ide_error); 335EXPORT_SYMBOL_GPL(ide_error);
344 336
345static void ide_tf_set_specify_cmd(ide_drive_t *drive, struct ide_taskfile *tf) 337static void ide_tf_set_specify_cmd(ide_drive_t *drive, struct ide_taskfile *tf)
@@ -462,7 +454,7 @@ EXPORT_SYMBOL_GPL(ide_init_sg_cmd);
462static ide_startstop_t execute_drive_cmd (ide_drive_t *drive, 454static ide_startstop_t execute_drive_cmd (ide_drive_t *drive,
463 struct request *rq) 455 struct request *rq)
464{ 456{
465 ide_hwif_t *hwif = HWIF(drive); 457 ide_hwif_t *hwif = drive->hwif;
466 ide_task_t *task = rq->special; 458 ide_task_t *task = rq->special;
467 459
468 if (task) { 460 if (task) {
@@ -586,7 +578,7 @@ static ide_startstop_t start_request (ide_drive_t *drive, struct request *rq)
586 578
587#ifdef DEBUG 579#ifdef DEBUG
588 printk("%s: start_request: current=0x%08lx\n", 580 printk("%s: start_request: current=0x%08lx\n",
589 HWIF(drive)->name, (unsigned long) rq); 581 drive->hwif->name, (unsigned long) rq);
590#endif 582#endif
591 583
592 /* bail early if we've exceeded max_failures */ 584 /* bail early if we've exceeded max_failures */
@@ -605,7 +597,7 @@ static ide_startstop_t start_request (ide_drive_t *drive, struct request *rq)
605 return startstop; 597 return startstop;
606 } 598 }
607 if (!drive->special.all) { 599 if (!drive->special.all) {
608 ide_driver_t *drv; 600 struct ide_driver *drv;
609 601
610 /* 602 /*
611 * We reset the drive so we need to issue a SETFEATURES. 603 * We reset the drive so we need to issue a SETFEATURES.
@@ -638,7 +630,7 @@ static ide_startstop_t start_request (ide_drive_t *drive, struct request *rq)
638 */ 630 */
639 return ide_special_rq(drive, rq); 631 return ide_special_rq(drive, rq);
640 632
641 drv = *(ide_driver_t **)rq->rq_disk->private_data; 633 drv = *(struct ide_driver **)rq->rq_disk->private_data;
642 634
643 return drv->do_request(drive, rq, rq->sector); 635 return drv->do_request(drive, rq, rq->sector);
644 } 636 }
@@ -654,7 +646,7 @@ kill_rq:
654 * @timeout: time to stall for (jiffies) 646 * @timeout: time to stall for (jiffies)
655 * 647 *
656 * ide_stall_queue() can be used by a drive to give excess bandwidth back 648 * ide_stall_queue() can be used by a drive to give excess bandwidth back
657 * to the hwgroup by sleeping for timeout jiffies. 649 * to the port by sleeping for timeout jiffies.
658 */ 650 */
659 651
660void ide_stall_queue (ide_drive_t *drive, unsigned long timeout) 652void ide_stall_queue (ide_drive_t *drive, unsigned long timeout)
@@ -666,45 +658,53 @@ void ide_stall_queue (ide_drive_t *drive, unsigned long timeout)
666} 658}
667EXPORT_SYMBOL(ide_stall_queue); 659EXPORT_SYMBOL(ide_stall_queue);
668 660
661static inline int ide_lock_port(ide_hwif_t *hwif)
662{
663 if (hwif->busy)
664 return 1;
665
666 hwif->busy = 1;
667
668 return 0;
669}
670
671static inline void ide_unlock_port(ide_hwif_t *hwif)
672{
673 hwif->busy = 0;
674}
675
676static inline int ide_lock_host(struct ide_host *host, ide_hwif_t *hwif)
677{
678 int rc = 0;
679
680 if (host->host_flags & IDE_HFLAG_SERIALIZE) {
681 rc = test_and_set_bit_lock(IDE_HOST_BUSY, &host->host_busy);
682 if (rc == 0) {
683 /* for atari only */
684 ide_get_lock(ide_intr, hwif);
685 }
686 }
687 return rc;
688}
689
690static inline void ide_unlock_host(struct ide_host *host)
691{
692 if (host->host_flags & IDE_HFLAG_SERIALIZE) {
693 /* for atari only */
694 ide_release_lock();
695 clear_bit_unlock(IDE_HOST_BUSY, &host->host_busy);
696 }
697}
698
669/* 699/*
670 * Issue a new request to a drive from hwgroup 700 * Issue a new request to a device.
671 *
672 * A hwgroup is a serialized group of IDE interfaces. Usually there is
673 * exactly one hwif (interface) per hwgroup, but buggy controllers (eg. CMD640)
674 * may have both interfaces in a single hwgroup to "serialize" access.
675 * Or possibly multiple ISA interfaces can share a common IRQ by being grouped
676 * together into one hwgroup for serialized access.
677 *
678 * Note also that several hwgroups can end up sharing a single IRQ,
679 * possibly along with many other devices. This is especially common in
680 * PCI-based systems with off-board IDE controller cards.
681 *
682 * The IDE driver uses a per-hwgroup lock to protect the hwgroup->busy flag.
683 *
684 * The first thread into the driver for a particular hwgroup sets the
685 * hwgroup->busy flag to indicate that this hwgroup is now active,
686 * and then initiates processing of the top request from the request queue.
687 *
688 * Other threads attempting entry notice the busy setting, and will simply
689 * queue their new requests and exit immediately. Note that hwgroup->busy
690 * remains set even when the driver is merely awaiting the next interrupt.
691 * Thus, the meaning is "this hwgroup is busy processing a request".
692 *
693 * When processing of a request completes, the completing thread or IRQ-handler
694 * will start the next request from the queue. If no more work remains,
695 * the driver will clear the hwgroup->busy flag and exit.
696 *
697 * The per-hwgroup spinlock is used to protect all access to the
698 * hwgroup->busy flag, but is otherwise not needed for most processing in
699 * the driver. This makes the driver much more friendlier to shared IRQs
700 * than previous designs, while remaining 100% (?) SMP safe and capable.
701 */ 701 */
702void do_ide_request(struct request_queue *q) 702void do_ide_request(struct request_queue *q)
703{ 703{
704 ide_drive_t *drive = q->queuedata; 704 ide_drive_t *drive = q->queuedata;
705 ide_hwif_t *hwif = drive->hwif; 705 ide_hwif_t *hwif = drive->hwif;
706 ide_hwgroup_t *hwgroup = hwif->hwgroup; 706 struct ide_host *host = hwif->host;
707 struct request *rq; 707 struct request *rq = NULL;
708 ide_startstop_t startstop; 708 ide_startstop_t startstop;
709 709
710 /* 710 /*
@@ -721,32 +721,40 @@ void do_ide_request(struct request_queue *q)
721 blk_remove_plug(q); 721 blk_remove_plug(q);
722 722
723 spin_unlock_irq(q->queue_lock); 723 spin_unlock_irq(q->queue_lock);
724 spin_lock_irq(&hwgroup->lock);
725 724
726 if (!ide_lock_hwgroup(hwgroup)) { 725 if (ide_lock_host(host, hwif))
726 goto plug_device_2;
727
728 spin_lock_irq(&hwif->lock);
729
730 if (!ide_lock_port(hwif)) {
731 ide_hwif_t *prev_port;
727repeat: 732repeat:
728 hwgroup->rq = NULL; 733 prev_port = hwif->host->cur_port;
734 hwif->rq = NULL;
729 735
730 if (drive->dev_flags & IDE_DFLAG_SLEEPING) { 736 if (drive->dev_flags & IDE_DFLAG_SLEEPING) {
731 if (time_before(drive->sleep, jiffies)) { 737 if (time_before(drive->sleep, jiffies)) {
732 ide_unlock_hwgroup(hwgroup); 738 ide_unlock_port(hwif);
733 goto plug_device; 739 goto plug_device;
734 } 740 }
735 } 741 }
736 742
737 if (hwif != hwgroup->hwif) { 743 if ((hwif->host->host_flags & IDE_HFLAG_SERIALIZE) &&
744 hwif != prev_port) {
738 /* 745 /*
739 * set nIEN for previous hwif, drives in the 746 * set nIEN for previous port, drives in the
740 * quirk_list may not like intr setups/cleanups 747 * quirk_list may not like intr setups/cleanups
741 */ 748 */
742 if (drive->quirk_list == 0) 749 if (prev_port && prev_port->cur_dev->quirk_list == 0)
743 hwif->tp_ops->set_irq(hwif, 0); 750 prev_port->tp_ops->set_irq(prev_port, 0);
751
752 hwif->host->cur_port = hwif;
744 } 753 }
745 hwgroup->hwif = hwif; 754 hwif->cur_dev = drive;
746 hwgroup->drive = drive;
747 drive->dev_flags &= ~(IDE_DFLAG_SLEEPING | IDE_DFLAG_PARKED); 755 drive->dev_flags &= ~(IDE_DFLAG_SLEEPING | IDE_DFLAG_PARKED);
748 756
749 spin_unlock_irq(&hwgroup->lock); 757 spin_unlock_irq(&hwif->lock);
750 spin_lock_irq(q->queue_lock); 758 spin_lock_irq(q->queue_lock);
751 /* 759 /*
752 * we know that the queue isn't empty, but this can happen 760 * we know that the queue isn't empty, but this can happen
@@ -754,10 +762,10 @@ repeat:
754 */ 762 */
755 rq = elv_next_request(drive->queue); 763 rq = elv_next_request(drive->queue);
756 spin_unlock_irq(q->queue_lock); 764 spin_unlock_irq(q->queue_lock);
757 spin_lock_irq(&hwgroup->lock); 765 spin_lock_irq(&hwif->lock);
758 766
759 if (!rq) { 767 if (!rq) {
760 ide_unlock_hwgroup(hwgroup); 768 ide_unlock_port(hwif);
761 goto out; 769 goto out;
762 } 770 }
763 771
@@ -778,27 +786,31 @@ repeat:
778 blk_pm_request(rq) == 0 && 786 blk_pm_request(rq) == 0 &&
779 (rq->cmd_flags & REQ_PREEMPT) == 0) { 787 (rq->cmd_flags & REQ_PREEMPT) == 0) {
780 /* there should be no pending command at this point */ 788 /* there should be no pending command at this point */
781 ide_unlock_hwgroup(hwgroup); 789 ide_unlock_port(hwif);
782 goto plug_device; 790 goto plug_device;
783 } 791 }
784 792
785 hwgroup->rq = rq; 793 hwif->rq = rq;
786 794
787 spin_unlock_irq(&hwgroup->lock); 795 spin_unlock_irq(&hwif->lock);
788 startstop = start_request(drive, rq); 796 startstop = start_request(drive, rq);
789 spin_lock_irq(&hwgroup->lock); 797 spin_lock_irq(&hwif->lock);
790 798
791 if (startstop == ide_stopped) 799 if (startstop == ide_stopped)
792 goto repeat; 800 goto repeat;
793 } else 801 } else
794 goto plug_device; 802 goto plug_device;
795out: 803out:
796 spin_unlock_irq(&hwgroup->lock); 804 spin_unlock_irq(&hwif->lock);
805 if (rq == NULL)
806 ide_unlock_host(host);
797 spin_lock_irq(q->queue_lock); 807 spin_lock_irq(q->queue_lock);
798 return; 808 return;
799 809
800plug_device: 810plug_device:
801 spin_unlock_irq(&hwgroup->lock); 811 spin_unlock_irq(&hwif->lock);
812 ide_unlock_host(host);
813plug_device_2:
802 spin_lock_irq(q->queue_lock); 814 spin_lock_irq(q->queue_lock);
803 815
804 if (!elv_queue_empty(q)) 816 if (!elv_queue_empty(q))
@@ -806,13 +818,13 @@ plug_device:
806} 818}
807 819
808/* 820/*
809 * un-busy the hwgroup etc, and clear any pending DMA status. we want to 821 * un-busy the port etc, and clear any pending DMA status. we want to
810 * retry the current request in pio mode instead of risking tossing it 822 * retry the current request in pio mode instead of risking tossing it
811 * all away 823 * all away
812 */ 824 */
813static ide_startstop_t ide_dma_timeout_retry(ide_drive_t *drive, int error) 825static ide_startstop_t ide_dma_timeout_retry(ide_drive_t *drive, int error)
814{ 826{
815 ide_hwif_t *hwif = HWIF(drive); 827 ide_hwif_t *hwif = drive->hwif;
816 struct request *rq; 828 struct request *rq;
817 ide_startstop_t ret = ide_stopped; 829 ide_startstop_t ret = ide_stopped;
818 830
@@ -840,15 +852,14 @@ static ide_startstop_t ide_dma_timeout_retry(ide_drive_t *drive, int error)
840 ide_dma_off_quietly(drive); 852 ide_dma_off_quietly(drive);
841 853
842 /* 854 /*
843 * un-busy drive etc (hwgroup->busy is cleared on return) and 855 * un-busy drive etc and make sure request is sane
844 * make sure request is sane
845 */ 856 */
846 rq = HWGROUP(drive)->rq;
847 857
858 rq = hwif->rq;
848 if (!rq) 859 if (!rq)
849 goto out; 860 goto out;
850 861
851 HWGROUP(drive)->rq = NULL; 862 hwif->rq = NULL;
852 863
853 rq->errors = 0; 864 rq->errors = 0;
854 865
@@ -876,7 +887,7 @@ static void ide_plug_device(ide_drive_t *drive)
876 887
877/** 888/**
878 * ide_timer_expiry - handle lack of an IDE interrupt 889 * ide_timer_expiry - handle lack of an IDE interrupt
879 * @data: timer callback magic (hwgroup) 890 * @data: timer callback magic (hwif)
880 * 891 *
881 * An IDE command has timed out before the expected drive return 892 * An IDE command has timed out before the expected drive return
882 * occurred. At this point we attempt to clean up the current 893 * occurred. At this point we attempt to clean up the current
@@ -890,18 +901,18 @@ static void ide_plug_device(ide_drive_t *drive)
890 901
891void ide_timer_expiry (unsigned long data) 902void ide_timer_expiry (unsigned long data)
892{ 903{
893 ide_hwgroup_t *hwgroup = (ide_hwgroup_t *) data; 904 ide_hwif_t *hwif = (ide_hwif_t *)data;
894 ide_drive_t *uninitialized_var(drive); 905 ide_drive_t *uninitialized_var(drive);
895 ide_handler_t *handler; 906 ide_handler_t *handler;
896 ide_expiry_t *expiry;
897 unsigned long flags; 907 unsigned long flags;
898 unsigned long wait = -1; 908 unsigned long wait = -1;
899 int plug_device = 0; 909 int plug_device = 0;
900 910
901 spin_lock_irqsave(&hwgroup->lock, flags); 911 spin_lock_irqsave(&hwif->lock, flags);
902 912
903 if (((handler = hwgroup->handler) == NULL) || 913 handler = hwif->handler;
904 (hwgroup->req_gen != hwgroup->req_gen_timer)) { 914
915 if (handler == NULL || hwif->req_gen != hwif->req_gen_timer) {
905 /* 916 /*
906 * Either a marginal timeout occurred 917 * Either a marginal timeout occurred
907 * (got the interrupt just as timer expired), 918 * (got the interrupt just as timer expired),
@@ -909,72 +920,68 @@ void ide_timer_expiry (unsigned long data)
909 * Either way, we don't really want to complain about anything. 920 * Either way, we don't really want to complain about anything.
910 */ 921 */
911 } else { 922 } else {
912 drive = hwgroup->drive; 923 ide_expiry_t *expiry = hwif->expiry;
913 if (!drive) { 924 ide_startstop_t startstop = ide_stopped;
914 printk(KERN_ERR "ide_timer_expiry: hwgroup->drive was NULL\n"); 925
915 hwgroup->handler = NULL; 926 drive = hwif->cur_dev;
916 } else { 927
917 ide_hwif_t *hwif; 928 if (expiry) {
918 ide_startstop_t startstop = ide_stopped; 929 wait = expiry(drive);
919 930 if (wait > 0) { /* continue */
920 if ((expiry = hwgroup->expiry) != NULL) { 931 /* reset timer */
921 /* continue */ 932 hwif->timer.expires = jiffies + wait;
922 if ((wait = expiry(drive)) > 0) { 933 hwif->req_gen_timer = hwif->req_gen;
923 /* reset timer */ 934 add_timer(&hwif->timer);
924 hwgroup->timer.expires = jiffies + wait; 935 spin_unlock_irqrestore(&hwif->lock, flags);
925 hwgroup->req_gen_timer = hwgroup->req_gen; 936 return;
926 add_timer(&hwgroup->timer);
927 spin_unlock_irqrestore(&hwgroup->lock, flags);
928 return;
929 }
930 }
931 hwgroup->handler = NULL;
932 /*
933 * We need to simulate a real interrupt when invoking
934 * the handler() function, which means we need to
935 * globally mask the specific IRQ:
936 */
937 spin_unlock(&hwgroup->lock);
938 hwif = HWIF(drive);
939 /* disable_irq_nosync ?? */
940 disable_irq(hwif->irq);
941 /* local CPU only,
942 * as if we were handling an interrupt */
943 local_irq_disable();
944 if (hwgroup->polling) {
945 startstop = handler(drive);
946 } else if (drive_is_ready(drive)) {
947 if (drive->waiting_for_dma)
948 hwif->dma_ops->dma_lost_irq(drive);
949 (void)ide_ack_intr(hwif);
950 printk(KERN_WARNING "%s: lost interrupt\n", drive->name);
951 startstop = handler(drive);
952 } else {
953 if (drive->waiting_for_dma) {
954 startstop = ide_dma_timeout_retry(drive, wait);
955 } else
956 startstop =
957 ide_error(drive, "irq timeout",
958 hwif->tp_ops->read_status(hwif));
959 }
960 spin_lock_irq(&hwgroup->lock);
961 enable_irq(hwif->irq);
962 if (startstop == ide_stopped) {
963 ide_unlock_hwgroup(hwgroup);
964 plug_device = 1;
965 } 937 }
966 } 938 }
939 hwif->handler = NULL;
940 /*
941 * We need to simulate a real interrupt when invoking
942 * the handler() function, which means we need to
943 * globally mask the specific IRQ:
944 */
945 spin_unlock(&hwif->lock);
946 /* disable_irq_nosync ?? */
947 disable_irq(hwif->irq);
948 /* local CPU only, as if we were handling an interrupt */
949 local_irq_disable();
950 if (hwif->polling) {
951 startstop = handler(drive);
952 } else if (drive_is_ready(drive)) {
953 if (drive->waiting_for_dma)
954 hwif->dma_ops->dma_lost_irq(drive);
955 (void)ide_ack_intr(hwif);
956 printk(KERN_WARNING "%s: lost interrupt\n",
957 drive->name);
958 startstop = handler(drive);
959 } else {
960 if (drive->waiting_for_dma)
961 startstop = ide_dma_timeout_retry(drive, wait);
962 else
963 startstop = ide_error(drive, "irq timeout",
964 hwif->tp_ops->read_status(hwif));
965 }
966 spin_lock_irq(&hwif->lock);
967 enable_irq(hwif->irq);
968 if (startstop == ide_stopped) {
969 ide_unlock_port(hwif);
970 plug_device = 1;
971 }
967 } 972 }
968 spin_unlock_irqrestore(&hwgroup->lock, flags); 973 spin_unlock_irqrestore(&hwif->lock, flags);
969 974
970 if (plug_device) 975 if (plug_device) {
976 ide_unlock_host(hwif->host);
971 ide_plug_device(drive); 977 ide_plug_device(drive);
978 }
972} 979}
973 980
974/** 981/**
975 * unexpected_intr - handle an unexpected IDE interrupt 982 * unexpected_intr - handle an unexpected IDE interrupt
976 * @irq: interrupt line 983 * @irq: interrupt line
977 * @hwgroup: hwgroup being processed 984 * @hwif: port being processed
978 * 985 *
979 * There's nothing really useful we can do with an unexpected interrupt, 986 * There's nothing really useful we can do with an unexpected interrupt,
980 * other than reading the status register (to clear it), and logging it. 987 * other than reading the status register (to clear it), and logging it.
@@ -998,52 +1005,38 @@ void ide_timer_expiry (unsigned long data)
998 * before completing the issuance of any new drive command, so we will not 1005 * before completing the issuance of any new drive command, so we will not
999 * be accidentally invoked as a result of any valid command completion 1006 * be accidentally invoked as a result of any valid command completion
1000 * interrupt. 1007 * interrupt.
1001 *
1002 * Note that we must walk the entire hwgroup here. We know which hwif
1003 * is doing the current command, but we don't know which hwif burped
1004 * mysteriously.
1005 */ 1008 */
1006
1007static void unexpected_intr (int irq, ide_hwgroup_t *hwgroup)
1008{
1009 u8 stat;
1010 ide_hwif_t *hwif = hwgroup->hwif;
1011 1009
1012 /* 1010static void unexpected_intr(int irq, ide_hwif_t *hwif)
1013 * handle the unexpected interrupt 1011{
1014 */ 1012 u8 stat = hwif->tp_ops->read_status(hwif);
1015 do { 1013
1016 if (hwif->irq == irq) { 1014 if (!OK_STAT(stat, ATA_DRDY, BAD_STAT)) {
1017 stat = hwif->tp_ops->read_status(hwif); 1015 /* Try to not flood the console with msgs */
1018 1016 static unsigned long last_msgtime, count;
1019 if (!OK_STAT(stat, ATA_DRDY, BAD_STAT)) { 1017 ++count;
1020 /* Try to not flood the console with msgs */ 1018
1021 static unsigned long last_msgtime, count; 1019 if (time_after(jiffies, last_msgtime + HZ)) {
1022 ++count; 1020 last_msgtime = jiffies;
1023 if (time_after(jiffies, last_msgtime + HZ)) { 1021 printk(KERN_ERR "%s: unexpected interrupt, "
1024 last_msgtime = jiffies; 1022 "status=0x%02x, count=%ld\n",
1025 printk(KERN_ERR "%s%s: unexpected interrupt, " 1023 hwif->name, stat, count);
1026 "status=0x%02x, count=%ld\n",
1027 hwif->name,
1028 (hwif->next==hwgroup->hwif) ? "" : "(?)", stat, count);
1029 }
1030 }
1031 } 1024 }
1032 } while ((hwif = hwif->next) != hwgroup->hwif); 1025 }
1033} 1026}
1034 1027
1035/** 1028/**
1036 * ide_intr - default IDE interrupt handler 1029 * ide_intr - default IDE interrupt handler
1037 * @irq: interrupt number 1030 * @irq: interrupt number
1038 * @dev_id: hwif group 1031 * @dev_id: hwif
1039 * @regs: unused weirdness from the kernel irq layer 1032 * @regs: unused weirdness from the kernel irq layer
1040 * 1033 *
1041 * This is the default IRQ handler for the IDE layer. You should 1034 * This is the default IRQ handler for the IDE layer. You should
1042 * not need to override it. If you do be aware it is subtle in 1035 * not need to override it. If you do be aware it is subtle in
1043 * places 1036 * places
1044 * 1037 *
1045 * hwgroup->hwif is the interface in the group currently performing 1038 * hwif is the interface in the group currently performing
1046 * a command. hwgroup->drive is the drive and hwgroup->handler is 1039 * a command. hwif->cur_dev is the drive and hwif->handler is
1047 * the IRQ handler to call. As we issue a command the handlers 1040 * the IRQ handler to call. As we issue a command the handlers
1048 * step through multiple states, reassigning the handler to the 1041 * step through multiple states, reassigning the handler to the
1049 * next step in the process. Unlike a smart SCSI controller IDE 1042 * next step in the process. Unlike a smart SCSI controller IDE
@@ -1054,26 +1047,32 @@ static void unexpected_intr (int irq, ide_hwgroup_t *hwgroup)
1054 * 1047 *
1055 * The handler eventually returns ide_stopped to indicate the 1048 * The handler eventually returns ide_stopped to indicate the
1056 * request completed. At this point we issue the next request 1049 * request completed. At this point we issue the next request
1057 * on the hwgroup and the process begins again. 1050 * on the port and the process begins again.
1058 */ 1051 */
1059 1052
1060irqreturn_t ide_intr (int irq, void *dev_id) 1053irqreturn_t ide_intr (int irq, void *dev_id)
1061{ 1054{
1062 unsigned long flags; 1055 ide_hwif_t *hwif = (ide_hwif_t *)dev_id;
1063 ide_hwgroup_t *hwgroup = (ide_hwgroup_t *)dev_id;
1064 ide_hwif_t *hwif = hwgroup->hwif;
1065 ide_drive_t *uninitialized_var(drive); 1056 ide_drive_t *uninitialized_var(drive);
1066 ide_handler_t *handler; 1057 ide_handler_t *handler;
1058 unsigned long flags;
1067 ide_startstop_t startstop; 1059 ide_startstop_t startstop;
1068 irqreturn_t irq_ret = IRQ_NONE; 1060 irqreturn_t irq_ret = IRQ_NONE;
1069 int plug_device = 0; 1061 int plug_device = 0;
1070 1062
1071 spin_lock_irqsave(&hwgroup->lock, flags); 1063 if (hwif->host->host_flags & IDE_HFLAG_SERIALIZE) {
1064 if (hwif != hwif->host->cur_port)
1065 goto out_early;
1066 }
1067
1068 spin_lock_irqsave(&hwif->lock, flags);
1072 1069
1073 if (!ide_ack_intr(hwif)) 1070 if (!ide_ack_intr(hwif))
1074 goto out; 1071 goto out;
1075 1072
1076 if ((handler = hwgroup->handler) == NULL || hwgroup->polling) { 1073 handler = hwif->handler;
1074
1075 if (handler == NULL || hwif->polling) {
1077 /* 1076 /*
1078 * Not expecting an interrupt from this drive. 1077 * Not expecting an interrupt from this drive.
1079 * That means this could be: 1078 * That means this could be:
@@ -1097,7 +1096,7 @@ irqreturn_t ide_intr (int irq, void *dev_id)
1097 * Probably not a shared PCI interrupt, 1096 * Probably not a shared PCI interrupt,
1098 * so we can safely try to do something about it: 1097 * so we can safely try to do something about it:
1099 */ 1098 */
1100 unexpected_intr(irq, hwgroup); 1099 unexpected_intr(irq, hwif);
1101#ifdef CONFIG_BLK_DEV_IDEPCI 1100#ifdef CONFIG_BLK_DEV_IDEPCI
1102 } else { 1101 } else {
1103 /* 1102 /*
@@ -1110,16 +1109,7 @@ irqreturn_t ide_intr (int irq, void *dev_id)
1110 goto out; 1109 goto out;
1111 } 1110 }
1112 1111
1113 drive = hwgroup->drive; 1112 drive = hwif->cur_dev;
1114 if (!drive) {
1115 /*
1116 * This should NEVER happen, and there isn't much
1117 * we could do about it here.
1118 *
1119 * [Note - this can occur if the drive is hot unplugged]
1120 */
1121 goto out_handled;
1122 }
1123 1113
1124 if (!drive_is_ready(drive)) 1114 if (!drive_is_ready(drive))
1125 /* 1115 /*
@@ -1131,10 +1121,10 @@ irqreturn_t ide_intr (int irq, void *dev_id)
1131 */ 1121 */
1132 goto out; 1122 goto out;
1133 1123
1134 hwgroup->handler = NULL; 1124 hwif->handler = NULL;
1135 hwgroup->req_gen++; 1125 hwif->req_gen++;
1136 del_timer(&hwgroup->timer); 1126 del_timer(&hwif->timer);
1137 spin_unlock(&hwgroup->lock); 1127 spin_unlock(&hwif->lock);
1138 1128
1139 if (hwif->port_ops && hwif->port_ops->clear_irq) 1129 if (hwif->port_ops && hwif->port_ops->clear_irq)
1140 hwif->port_ops->clear_irq(drive); 1130 hwif->port_ops->clear_irq(drive);
@@ -1145,7 +1135,7 @@ irqreturn_t ide_intr (int irq, void *dev_id)
1145 /* service this interrupt, may set handler for next interrupt */ 1135 /* service this interrupt, may set handler for next interrupt */
1146 startstop = handler(drive); 1136 startstop = handler(drive);
1147 1137
1148 spin_lock_irq(&hwgroup->lock); 1138 spin_lock_irq(&hwif->lock);
1149 /* 1139 /*
1150 * Note that handler() may have set things up for another 1140 * Note that handler() may have set things up for another
1151 * interrupt to occur soon, but it cannot happen until 1141 * interrupt to occur soon, but it cannot happen until
@@ -1154,20 +1144,18 @@ irqreturn_t ide_intr (int irq, void *dev_id)
1154 * won't allow another of the same (on any CPU) until we return. 1144 * won't allow another of the same (on any CPU) until we return.
1155 */ 1145 */
1156 if (startstop == ide_stopped) { 1146 if (startstop == ide_stopped) {
1157 if (hwgroup->handler == NULL) { /* paranoia */ 1147 BUG_ON(hwif->handler);
1158 ide_unlock_hwgroup(hwgroup); 1148 ide_unlock_port(hwif);
1159 plug_device = 1; 1149 plug_device = 1;
1160 } else
1161 printk(KERN_ERR "%s: %s: huh? expected NULL handler "
1162 "on exit\n", __func__, drive->name);
1163 } 1150 }
1164out_handled:
1165 irq_ret = IRQ_HANDLED; 1151 irq_ret = IRQ_HANDLED;
1166out: 1152out:
1167 spin_unlock_irqrestore(&hwgroup->lock, flags); 1153 spin_unlock_irqrestore(&hwif->lock, flags);
1168 1154out_early:
1169 if (plug_device) 1155 if (plug_device) {
1156 ide_unlock_host(hwif->host);
1170 ide_plug_device(drive); 1157 ide_plug_device(drive);
1158 }
1171 1159
1172 return irq_ret; 1160 return irq_ret;
1173} 1161}
@@ -1189,15 +1177,13 @@ out:
1189 1177
1190void ide_do_drive_cmd(ide_drive_t *drive, struct request *rq) 1178void ide_do_drive_cmd(ide_drive_t *drive, struct request *rq)
1191{ 1179{
1192 ide_hwgroup_t *hwgroup = drive->hwif->hwgroup;
1193 struct request_queue *q = drive->queue; 1180 struct request_queue *q = drive->queue;
1194 unsigned long flags; 1181 unsigned long flags;
1195 1182
1196 hwgroup->rq = NULL; 1183 drive->hwif->rq = NULL;
1197 1184
1198 spin_lock_irqsave(q->queue_lock, flags); 1185 spin_lock_irqsave(q->queue_lock, flags);
1199 __elv_add_request(q, rq, ELEVATOR_INSERT_FRONT, 0); 1186 __elv_add_request(q, rq, ELEVATOR_INSERT_FRONT, 0);
1200 blk_start_queueing(q);
1201 spin_unlock_irqrestore(q->queue_lock, flags); 1187 spin_unlock_irqrestore(q->queue_lock, flags);
1202} 1188}
1203EXPORT_SYMBOL(ide_do_drive_cmd); 1189EXPORT_SYMBOL(ide_do_drive_cmd);
diff --git a/drivers/ide/ide-iops.c b/drivers/ide/ide-iops.c
index ad8bd6539283..e728cfe7273f 100644
--- a/drivers/ide/ide-iops.c
+++ b/drivers/ide/ide-iops.c
@@ -105,15 +105,6 @@ u8 ide_read_altstatus(ide_hwif_t *hwif)
105} 105}
106EXPORT_SYMBOL_GPL(ide_read_altstatus); 106EXPORT_SYMBOL_GPL(ide_read_altstatus);
107 107
108u8 ide_read_sff_dma_status(ide_hwif_t *hwif)
109{
110 if (hwif->host_flags & IDE_HFLAG_MMIO)
111 return readb((void __iomem *)(hwif->dma_base + ATA_DMA_STATUS));
112 else
113 return inb(hwif->dma_base + ATA_DMA_STATUS);
114}
115EXPORT_SYMBOL_GPL(ide_read_sff_dma_status);
116
117void ide_set_irq(ide_hwif_t *hwif, int on) 108void ide_set_irq(ide_hwif_t *hwif, int on)
118{ 109{
119 u8 ctl = ATA_DEVCTL_OBS; 110 u8 ctl = ATA_DEVCTL_OBS;
@@ -388,7 +379,6 @@ const struct ide_tp_ops default_tp_ops = {
388 .exec_command = ide_exec_command, 379 .exec_command = ide_exec_command,
389 .read_status = ide_read_status, 380 .read_status = ide_read_status,
390 .read_altstatus = ide_read_altstatus, 381 .read_altstatus = ide_read_altstatus,
391 .read_sff_dma_status = ide_read_sff_dma_status,
392 382
393 .set_irq = ide_set_irq, 383 .set_irq = ide_set_irq,
394 384
@@ -451,7 +441,7 @@ EXPORT_SYMBOL(ide_fixstring);
451 */ 441 */
452int drive_is_ready (ide_drive_t *drive) 442int drive_is_ready (ide_drive_t *drive)
453{ 443{
454 ide_hwif_t *hwif = HWIF(drive); 444 ide_hwif_t *hwif = drive->hwif;
455 u8 stat = 0; 445 u8 stat = 0;
456 446
457 if (drive->waiting_for_dma) 447 if (drive->waiting_for_dma)
@@ -503,7 +493,8 @@ static int __ide_wait_stat(ide_drive_t *drive, u8 good, u8 bad, unsigned long ti
503 stat = tp_ops->read_status(hwif); 493 stat = tp_ops->read_status(hwif);
504 494
505 if (stat & ATA_BUSY) { 495 if (stat & ATA_BUSY) {
506 local_irq_set(flags); 496 local_irq_save(flags);
497 local_irq_enable_in_hardirq();
507 timeout += jiffies; 498 timeout += jiffies;
508 while ((stat = tp_ops->read_status(hwif)) & ATA_BUSY) { 499 while ((stat = tp_ops->read_status(hwif)) & ATA_BUSY) {
509 if (time_after(jiffies, timeout)) { 500 if (time_after(jiffies, timeout)) {
@@ -822,25 +813,25 @@ int ide_config_drive_speed(ide_drive_t *drive, u8 speed)
822static void __ide_set_handler (ide_drive_t *drive, ide_handler_t *handler, 813static void __ide_set_handler (ide_drive_t *drive, ide_handler_t *handler,
823 unsigned int timeout, ide_expiry_t *expiry) 814 unsigned int timeout, ide_expiry_t *expiry)
824{ 815{
825 ide_hwgroup_t *hwgroup = HWGROUP(drive); 816 ide_hwif_t *hwif = drive->hwif;
826 817
827 BUG_ON(hwgroup->handler); 818 BUG_ON(hwif->handler);
828 hwgroup->handler = handler; 819 hwif->handler = handler;
829 hwgroup->expiry = expiry; 820 hwif->expiry = expiry;
830 hwgroup->timer.expires = jiffies + timeout; 821 hwif->timer.expires = jiffies + timeout;
831 hwgroup->req_gen_timer = hwgroup->req_gen; 822 hwif->req_gen_timer = hwif->req_gen;
832 add_timer(&hwgroup->timer); 823 add_timer(&hwif->timer);
833} 824}
834 825
835void ide_set_handler (ide_drive_t *drive, ide_handler_t *handler, 826void ide_set_handler (ide_drive_t *drive, ide_handler_t *handler,
836 unsigned int timeout, ide_expiry_t *expiry) 827 unsigned int timeout, ide_expiry_t *expiry)
837{ 828{
838 ide_hwgroup_t *hwgroup = drive->hwif->hwgroup; 829 ide_hwif_t *hwif = drive->hwif;
839 unsigned long flags; 830 unsigned long flags;
840 831
841 spin_lock_irqsave(&hwgroup->lock, flags); 832 spin_lock_irqsave(&hwif->lock, flags);
842 __ide_set_handler(drive, handler, timeout, expiry); 833 __ide_set_handler(drive, handler, timeout, expiry);
843 spin_unlock_irqrestore(&hwgroup->lock, flags); 834 spin_unlock_irqrestore(&hwif->lock, flags);
844} 835}
845 836
846EXPORT_SYMBOL(ide_set_handler); 837EXPORT_SYMBOL(ide_set_handler);
@@ -863,10 +854,9 @@ void ide_execute_command(ide_drive_t *drive, u8 cmd, ide_handler_t *handler,
863 unsigned timeout, ide_expiry_t *expiry) 854 unsigned timeout, ide_expiry_t *expiry)
864{ 855{
865 ide_hwif_t *hwif = drive->hwif; 856 ide_hwif_t *hwif = drive->hwif;
866 ide_hwgroup_t *hwgroup = hwif->hwgroup;
867 unsigned long flags; 857 unsigned long flags;
868 858
869 spin_lock_irqsave(&hwgroup->lock, flags); 859 spin_lock_irqsave(&hwif->lock, flags);
870 __ide_set_handler(drive, handler, timeout, expiry); 860 __ide_set_handler(drive, handler, timeout, expiry);
871 hwif->tp_ops->exec_command(hwif, cmd); 861 hwif->tp_ops->exec_command(hwif, cmd);
872 /* 862 /*
@@ -876,26 +866,25 @@ void ide_execute_command(ide_drive_t *drive, u8 cmd, ide_handler_t *handler,
876 * FIXME: we could skip this delay with care on non shared devices 866 * FIXME: we could skip this delay with care on non shared devices
877 */ 867 */
878 ndelay(400); 868 ndelay(400);
879 spin_unlock_irqrestore(&hwgroup->lock, flags); 869 spin_unlock_irqrestore(&hwif->lock, flags);
880} 870}
881EXPORT_SYMBOL(ide_execute_command); 871EXPORT_SYMBOL(ide_execute_command);
882 872
883void ide_execute_pkt_cmd(ide_drive_t *drive) 873void ide_execute_pkt_cmd(ide_drive_t *drive)
884{ 874{
885 ide_hwif_t *hwif = drive->hwif; 875 ide_hwif_t *hwif = drive->hwif;
886 ide_hwgroup_t *hwgroup = hwif->hwgroup;
887 unsigned long flags; 876 unsigned long flags;
888 877
889 spin_lock_irqsave(&hwgroup->lock, flags); 878 spin_lock_irqsave(&hwif->lock, flags);
890 hwif->tp_ops->exec_command(hwif, ATA_CMD_PACKET); 879 hwif->tp_ops->exec_command(hwif, ATA_CMD_PACKET);
891 ndelay(400); 880 ndelay(400);
892 spin_unlock_irqrestore(&hwgroup->lock, flags); 881 spin_unlock_irqrestore(&hwif->lock, flags);
893} 882}
894EXPORT_SYMBOL_GPL(ide_execute_pkt_cmd); 883EXPORT_SYMBOL_GPL(ide_execute_pkt_cmd);
895 884
896static inline void ide_complete_drive_reset(ide_drive_t *drive, int err) 885static inline void ide_complete_drive_reset(ide_drive_t *drive, int err)
897{ 886{
898 struct request *rq = drive->hwif->hwgroup->rq; 887 struct request *rq = drive->hwif->rq;
899 888
900 if (rq && blk_special_request(rq) && rq->cmd[0] == REQ_DRIVE_RESET) 889 if (rq && blk_special_request(rq) && rq->cmd[0] == REQ_DRIVE_RESET)
901 ide_end_request(drive, err ? err : 1, 0); 890 ide_end_request(drive, err ? err : 1, 0);
@@ -913,7 +902,6 @@ static ide_startstop_t do_reset1 (ide_drive_t *, int);
913static ide_startstop_t atapi_reset_pollfunc (ide_drive_t *drive) 902static ide_startstop_t atapi_reset_pollfunc (ide_drive_t *drive)
914{ 903{
915 ide_hwif_t *hwif = drive->hwif; 904 ide_hwif_t *hwif = drive->hwif;
916 ide_hwgroup_t *hwgroup = hwif->hwgroup;
917 u8 stat; 905 u8 stat;
918 906
919 SELECT_DRIVE(drive); 907 SELECT_DRIVE(drive);
@@ -923,20 +911,20 @@ static ide_startstop_t atapi_reset_pollfunc (ide_drive_t *drive)
923 if (OK_STAT(stat, 0, ATA_BUSY)) 911 if (OK_STAT(stat, 0, ATA_BUSY))
924 printk("%s: ATAPI reset complete\n", drive->name); 912 printk("%s: ATAPI reset complete\n", drive->name);
925 else { 913 else {
926 if (time_before(jiffies, hwgroup->poll_timeout)) { 914 if (time_before(jiffies, hwif->poll_timeout)) {
927 ide_set_handler(drive, &atapi_reset_pollfunc, HZ/20, NULL); 915 ide_set_handler(drive, &atapi_reset_pollfunc, HZ/20, NULL);
928 /* continue polling */ 916 /* continue polling */
929 return ide_started; 917 return ide_started;
930 } 918 }
931 /* end of polling */ 919 /* end of polling */
932 hwgroup->polling = 0; 920 hwif->polling = 0;
933 printk("%s: ATAPI reset timed-out, status=0x%02x\n", 921 printk("%s: ATAPI reset timed-out, status=0x%02x\n",
934 drive->name, stat); 922 drive->name, stat);
935 /* do it the old fashioned way */ 923 /* do it the old fashioned way */
936 return do_reset1(drive, 1); 924 return do_reset1(drive, 1);
937 } 925 }
938 /* done polling */ 926 /* done polling */
939 hwgroup->polling = 0; 927 hwif->polling = 0;
940 ide_complete_drive_reset(drive, 0); 928 ide_complete_drive_reset(drive, 0);
941 return ide_stopped; 929 return ide_stopped;
942} 930}
@@ -968,8 +956,7 @@ static void ide_reset_report_error(ide_hwif_t *hwif, u8 err)
968 */ 956 */
969static ide_startstop_t reset_pollfunc (ide_drive_t *drive) 957static ide_startstop_t reset_pollfunc (ide_drive_t *drive)
970{ 958{
971 ide_hwgroup_t *hwgroup = HWGROUP(drive); 959 ide_hwif_t *hwif = drive->hwif;
972 ide_hwif_t *hwif = HWIF(drive);
973 const struct ide_port_ops *port_ops = hwif->port_ops; 960 const struct ide_port_ops *port_ops = hwif->port_ops;
974 u8 tmp; 961 u8 tmp;
975 int err = 0; 962 int err = 0;
@@ -986,7 +973,7 @@ static ide_startstop_t reset_pollfunc (ide_drive_t *drive)
986 tmp = hwif->tp_ops->read_status(hwif); 973 tmp = hwif->tp_ops->read_status(hwif);
987 974
988 if (!OK_STAT(tmp, 0, ATA_BUSY)) { 975 if (!OK_STAT(tmp, 0, ATA_BUSY)) {
989 if (time_before(jiffies, hwgroup->poll_timeout)) { 976 if (time_before(jiffies, hwif->poll_timeout)) {
990 ide_set_handler(drive, &reset_pollfunc, HZ/20, NULL); 977 ide_set_handler(drive, &reset_pollfunc, HZ/20, NULL);
991 /* continue polling */ 978 /* continue polling */
992 return ide_started; 979 return ide_started;
@@ -1007,7 +994,7 @@ static ide_startstop_t reset_pollfunc (ide_drive_t *drive)
1007 } 994 }
1008 } 995 }
1009out: 996out:
1010 hwgroup->polling = 0; /* done polling */ 997 hwif->polling = 0; /* done polling */
1011 ide_complete_drive_reset(drive, err); 998 ide_complete_drive_reset(drive, err);
1012 return ide_stopped; 999 return ide_stopped;
1013} 1000}
@@ -1081,18 +1068,18 @@ static void pre_reset(ide_drive_t *drive)
1081static ide_startstop_t do_reset1 (ide_drive_t *drive, int do_not_try_atapi) 1068static ide_startstop_t do_reset1 (ide_drive_t *drive, int do_not_try_atapi)
1082{ 1069{
1083 ide_hwif_t *hwif = drive->hwif; 1070 ide_hwif_t *hwif = drive->hwif;
1084 ide_hwgroup_t *hwgroup = hwif->hwgroup;
1085 struct ide_io_ports *io_ports = &hwif->io_ports; 1071 struct ide_io_ports *io_ports = &hwif->io_ports;
1086 const struct ide_tp_ops *tp_ops = hwif->tp_ops; 1072 const struct ide_tp_ops *tp_ops = hwif->tp_ops;
1087 const struct ide_port_ops *port_ops; 1073 const struct ide_port_ops *port_ops;
1074 ide_drive_t *tdrive;
1088 unsigned long flags, timeout; 1075 unsigned long flags, timeout;
1089 unsigned int unit; 1076 int i;
1090 DEFINE_WAIT(wait); 1077 DEFINE_WAIT(wait);
1091 1078
1092 spin_lock_irqsave(&hwgroup->lock, flags); 1079 spin_lock_irqsave(&hwif->lock, flags);
1093 1080
1094 /* We must not reset with running handlers */ 1081 /* We must not reset with running handlers */
1095 BUG_ON(hwgroup->handler != NULL); 1082 BUG_ON(hwif->handler != NULL);
1096 1083
1097 /* For an ATAPI device, first try an ATAPI SRST. */ 1084 /* For an ATAPI device, first try an ATAPI SRST. */
1098 if (drive->media != ide_disk && !do_not_try_atapi) { 1085 if (drive->media != ide_disk && !do_not_try_atapi) {
@@ -1101,10 +1088,10 @@ static ide_startstop_t do_reset1 (ide_drive_t *drive, int do_not_try_atapi)
1101 udelay (20); 1088 udelay (20);
1102 tp_ops->exec_command(hwif, ATA_CMD_DEV_RESET); 1089 tp_ops->exec_command(hwif, ATA_CMD_DEV_RESET);
1103 ndelay(400); 1090 ndelay(400);
1104 hwgroup->poll_timeout = jiffies + WAIT_WORSTCASE; 1091 hwif->poll_timeout = jiffies + WAIT_WORSTCASE;
1105 hwgroup->polling = 1; 1092 hwif->polling = 1;
1106 __ide_set_handler(drive, &atapi_reset_pollfunc, HZ/20, NULL); 1093 __ide_set_handler(drive, &atapi_reset_pollfunc, HZ/20, NULL);
1107 spin_unlock_irqrestore(&hwgroup->lock, flags); 1094 spin_unlock_irqrestore(&hwif->lock, flags);
1108 return ide_started; 1095 return ide_started;
1109 } 1096 }
1110 1097
@@ -1114,9 +1101,7 @@ static ide_startstop_t do_reset1 (ide_drive_t *drive, int do_not_try_atapi)
1114 1101
1115 prepare_to_wait(&ide_park_wq, &wait, TASK_UNINTERRUPTIBLE); 1102 prepare_to_wait(&ide_park_wq, &wait, TASK_UNINTERRUPTIBLE);
1116 timeout = jiffies; 1103 timeout = jiffies;
1117 for (unit = 0; unit < MAX_DRIVES; unit++) { 1104 ide_port_for_each_dev(i, tdrive, hwif) {
1118 ide_drive_t *tdrive = &hwif->drives[unit];
1119
1120 if (tdrive->dev_flags & IDE_DFLAG_PRESENT && 1105 if (tdrive->dev_flags & IDE_DFLAG_PRESENT &&
1121 tdrive->dev_flags & IDE_DFLAG_PARKED && 1106 tdrive->dev_flags & IDE_DFLAG_PARKED &&
1122 time_after(tdrive->sleep, timeout)) 1107 time_after(tdrive->sleep, timeout))
@@ -1127,9 +1112,9 @@ static ide_startstop_t do_reset1 (ide_drive_t *drive, int do_not_try_atapi)
1127 if (time_before_eq(timeout, now)) 1112 if (time_before_eq(timeout, now))
1128 break; 1113 break;
1129 1114
1130 spin_unlock_irqrestore(&hwgroup->lock, flags); 1115 spin_unlock_irqrestore(&hwif->lock, flags);
1131 timeout = schedule_timeout_uninterruptible(timeout - now); 1116 timeout = schedule_timeout_uninterruptible(timeout - now);
1132 spin_lock_irqsave(&hwgroup->lock, flags); 1117 spin_lock_irqsave(&hwif->lock, flags);
1133 } while (timeout); 1118 } while (timeout);
1134 finish_wait(&ide_park_wq, &wait); 1119 finish_wait(&ide_park_wq, &wait);
1135 1120
@@ -1137,11 +1122,11 @@ static ide_startstop_t do_reset1 (ide_drive_t *drive, int do_not_try_atapi)
1137 * First, reset any device state data we were maintaining 1122 * First, reset any device state data we were maintaining
1138 * for any of the drives on this interface. 1123 * for any of the drives on this interface.
1139 */ 1124 */
1140 for (unit = 0; unit < MAX_DRIVES; ++unit) 1125 ide_port_for_each_dev(i, tdrive, hwif)
1141 pre_reset(&hwif->drives[unit]); 1126 pre_reset(tdrive);
1142 1127
1143 if (io_ports->ctl_addr == 0) { 1128 if (io_ports->ctl_addr == 0) {
1144 spin_unlock_irqrestore(&hwgroup->lock, flags); 1129 spin_unlock_irqrestore(&hwif->lock, flags);
1145 ide_complete_drive_reset(drive, -ENXIO); 1130 ide_complete_drive_reset(drive, -ENXIO);
1146 return ide_stopped; 1131 return ide_stopped;
1147 } 1132 }
@@ -1164,8 +1149,8 @@ static ide_startstop_t do_reset1 (ide_drive_t *drive, int do_not_try_atapi)
1164 tp_ops->set_irq(hwif, drive->quirk_list == 2); 1149 tp_ops->set_irq(hwif, drive->quirk_list == 2);
1165 /* more than enough time */ 1150 /* more than enough time */
1166 udelay(10); 1151 udelay(10);
1167 hwgroup->poll_timeout = jiffies + WAIT_WORSTCASE; 1152 hwif->poll_timeout = jiffies + WAIT_WORSTCASE;
1168 hwgroup->polling = 1; 1153 hwif->polling = 1;
1169 __ide_set_handler(drive, &reset_pollfunc, HZ/20, NULL); 1154 __ide_set_handler(drive, &reset_pollfunc, HZ/20, NULL);
1170 1155
1171 /* 1156 /*
@@ -1177,7 +1162,7 @@ static ide_startstop_t do_reset1 (ide_drive_t *drive, int do_not_try_atapi)
1177 if (port_ops && port_ops->resetproc) 1162 if (port_ops && port_ops->resetproc)
1178 port_ops->resetproc(drive); 1163 port_ops->resetproc(drive);
1179 1164
1180 spin_unlock_irqrestore(&hwgroup->lock, flags); 1165 spin_unlock_irqrestore(&hwif->lock, flags);
1181 return ide_started; 1166 return ide_started;
1182} 1167}
1183 1168
@@ -1221,6 +1206,3 @@ int ide_wait_not_busy(ide_hwif_t *hwif, unsigned long timeout)
1221 } 1206 }
1222 return -EBUSY; 1207 return -EBUSY;
1223} 1208}
1224
1225EXPORT_SYMBOL_GPL(ide_wait_not_busy);
1226
diff --git a/drivers/ide/ide-lib.c b/drivers/ide/ide-lib.c
index 9f6e33d8a8b2..09526a0de734 100644
--- a/drivers/ide/ide-lib.c
+++ b/drivers/ide/ide-lib.c
@@ -273,7 +273,7 @@ int ide_set_xfer_rate(ide_drive_t *drive, u8 rate)
273 273
274static void ide_dump_opcode(ide_drive_t *drive) 274static void ide_dump_opcode(ide_drive_t *drive)
275{ 275{
276 struct request *rq = drive->hwif->hwgroup->rq; 276 struct request *rq = drive->hwif->rq;
277 ide_task_t *task = NULL; 277 ide_task_t *task = NULL;
278 278
279 if (!rq) 279 if (!rq)
@@ -346,10 +346,13 @@ static void ide_dump_ata_error(ide_drive_t *drive, u8 err)
346 printk(KERN_CONT "}"); 346 printk(KERN_CONT "}");
347 if ((err & (ATA_BBK | ATA_ABORTED)) == ATA_BBK || 347 if ((err & (ATA_BBK | ATA_ABORTED)) == ATA_BBK ||
348 (err & (ATA_UNC | ATA_IDNF | ATA_AMNF))) { 348 (err & (ATA_UNC | ATA_IDNF | ATA_AMNF))) {
349 struct request *rq = drive->hwif->rq;
350
349 ide_dump_sector(drive); 351 ide_dump_sector(drive);
350 if (HWGROUP(drive) && HWGROUP(drive)->rq) 352
353 if (rq)
351 printk(KERN_CONT ", sector=%llu", 354 printk(KERN_CONT ", sector=%llu",
352 (unsigned long long)HWGROUP(drive)->rq->sector); 355 (unsigned long long)rq->sector);
353 } 356 }
354 printk(KERN_CONT "\n"); 357 printk(KERN_CONT "\n");
355} 358}
diff --git a/drivers/ide/ide-park.c b/drivers/ide/ide-park.c
index 678454ac2483..c875a957596c 100644
--- a/drivers/ide/ide-park.c
+++ b/drivers/ide/ide-park.c
@@ -7,22 +7,22 @@ DECLARE_WAIT_QUEUE_HEAD(ide_park_wq);
7 7
8static void issue_park_cmd(ide_drive_t *drive, unsigned long timeout) 8static void issue_park_cmd(ide_drive_t *drive, unsigned long timeout)
9{ 9{
10 ide_hwgroup_t *hwgroup = drive->hwif->hwgroup; 10 ide_hwif_t *hwif = drive->hwif;
11 struct request_queue *q = drive->queue; 11 struct request_queue *q = drive->queue;
12 struct request *rq; 12 struct request *rq;
13 int rc; 13 int rc;
14 14
15 timeout += jiffies; 15 timeout += jiffies;
16 spin_lock_irq(&hwgroup->lock); 16 spin_lock_irq(&hwif->lock);
17 if (drive->dev_flags & IDE_DFLAG_PARKED) { 17 if (drive->dev_flags & IDE_DFLAG_PARKED) {
18 int reset_timer = time_before(timeout, drive->sleep); 18 int reset_timer = time_before(timeout, drive->sleep);
19 int start_queue = 0; 19 int start_queue = 0;
20 20
21 drive->sleep = timeout; 21 drive->sleep = timeout;
22 wake_up_all(&ide_park_wq); 22 wake_up_all(&ide_park_wq);
23 if (reset_timer && del_timer(&hwgroup->timer)) 23 if (reset_timer && del_timer(&hwif->timer))
24 start_queue = 1; 24 start_queue = 1;
25 spin_unlock_irq(&hwgroup->lock); 25 spin_unlock_irq(&hwif->lock);
26 26
27 if (start_queue) { 27 if (start_queue) {
28 spin_lock_irq(q->queue_lock); 28 spin_lock_irq(q->queue_lock);
@@ -31,7 +31,7 @@ static void issue_park_cmd(ide_drive_t *drive, unsigned long timeout)
31 } 31 }
32 return; 32 return;
33 } 33 }
34 spin_unlock_irq(&hwgroup->lock); 34 spin_unlock_irq(&hwif->lock);
35 35
36 rq = blk_get_request(q, READ, __GFP_WAIT); 36 rq = blk_get_request(q, READ, __GFP_WAIT);
37 rq->cmd[0] = REQ_PARK_HEADS; 37 rq->cmd[0] = REQ_PARK_HEADS;
@@ -64,21 +64,21 @@ ssize_t ide_park_show(struct device *dev, struct device_attribute *attr,
64 char *buf) 64 char *buf)
65{ 65{
66 ide_drive_t *drive = to_ide_device(dev); 66 ide_drive_t *drive = to_ide_device(dev);
67 ide_hwgroup_t *hwgroup = drive->hwif->hwgroup; 67 ide_hwif_t *hwif = drive->hwif;
68 unsigned long now; 68 unsigned long now;
69 unsigned int msecs; 69 unsigned int msecs;
70 70
71 if (drive->dev_flags & IDE_DFLAG_NO_UNLOAD) 71 if (drive->dev_flags & IDE_DFLAG_NO_UNLOAD)
72 return -EOPNOTSUPP; 72 return -EOPNOTSUPP;
73 73
74 spin_lock_irq(&hwgroup->lock); 74 spin_lock_irq(&hwif->lock);
75 now = jiffies; 75 now = jiffies;
76 if (drive->dev_flags & IDE_DFLAG_PARKED && 76 if (drive->dev_flags & IDE_DFLAG_PARKED &&
77 time_after(drive->sleep, now)) 77 time_after(drive->sleep, now))
78 msecs = jiffies_to_msecs(drive->sleep - now); 78 msecs = jiffies_to_msecs(drive->sleep - now);
79 else 79 else
80 msecs = 0; 80 msecs = 0;
81 spin_unlock_irq(&hwgroup->lock); 81 spin_unlock_irq(&hwif->lock);
82 82
83 return snprintf(buf, 20, "%u\n", msecs); 83 return snprintf(buf, 20, "%u\n", msecs);
84} 84}
diff --git a/drivers/ide/ide-pm.c b/drivers/ide/ide-pm.c
index 8282c6086e6a..4b3bf6a06b70 100644
--- a/drivers/ide/ide-pm.c
+++ b/drivers/ide/ide-pm.c
@@ -5,7 +5,7 @@
5int generic_ide_suspend(struct device *dev, pm_message_t mesg) 5int generic_ide_suspend(struct device *dev, pm_message_t mesg)
6{ 6{
7 ide_drive_t *drive = dev->driver_data, *pair = ide_get_pair_dev(drive); 7 ide_drive_t *drive = dev->driver_data, *pair = ide_get_pair_dev(drive);
8 ide_hwif_t *hwif = HWIF(drive); 8 ide_hwif_t *hwif = drive->hwif;
9 struct request *rq; 9 struct request *rq;
10 struct request_pm_state rqpm; 10 struct request_pm_state rqpm;
11 ide_task_t args; 11 ide_task_t args;
@@ -39,7 +39,7 @@ int generic_ide_suspend(struct device *dev, pm_message_t mesg)
39int generic_ide_resume(struct device *dev) 39int generic_ide_resume(struct device *dev)
40{ 40{
41 ide_drive_t *drive = dev->driver_data, *pair = ide_get_pair_dev(drive); 41 ide_drive_t *drive = dev->driver_data, *pair = ide_get_pair_dev(drive);
42 ide_hwif_t *hwif = HWIF(drive); 42 ide_hwif_t *hwif = drive->hwif;
43 struct request *rq; 43 struct request *rq;
44 struct request_pm_state rqpm; 44 struct request_pm_state rqpm;
45 ide_task_t args; 45 ide_task_t args;
@@ -67,7 +67,7 @@ int generic_ide_resume(struct device *dev)
67 blk_put_request(rq); 67 blk_put_request(rq);
68 68
69 if (err == 0 && dev->driver) { 69 if (err == 0 && dev->driver) {
70 ide_driver_t *drv = to_ide_driver(dev->driver); 70 struct ide_driver *drv = to_ide_driver(dev->driver);
71 71
72 if (drv->resume) 72 if (drv->resume)
73 drv->resume(drive); 73 drv->resume(drive);
@@ -194,7 +194,7 @@ void ide_complete_pm_request(ide_drive_t *drive, struct request *rq)
194 } 194 }
195 spin_unlock_irqrestore(q->queue_lock, flags); 195 spin_unlock_irqrestore(q->queue_lock, flags);
196 196
197 drive->hwif->hwgroup->rq = NULL; 197 drive->hwif->rq = NULL;
198 198
199 if (blk_end_request(rq, 0, 0)) 199 if (blk_end_request(rq, 0, 0))
200 BUG(); 200 BUG();
diff --git a/drivers/ide/ide-probe.c b/drivers/ide/ide-probe.c
index c5adb7b9c5b5..0ccbb4459fb9 100644
--- a/drivers/ide/ide-probe.c
+++ b/drivers/ide/ide-probe.c
@@ -189,7 +189,7 @@ static void ide_classify_atapi_dev(ide_drive_t *drive)
189 189
190static void do_identify(ide_drive_t *drive, u8 cmd) 190static void do_identify(ide_drive_t *drive, u8 cmd)
191{ 191{
192 ide_hwif_t *hwif = HWIF(drive); 192 ide_hwif_t *hwif = drive->hwif;
193 u16 *id = drive->id; 193 u16 *id = drive->id;
194 char *m = (char *)&id[ATA_ID_PROD]; 194 char *m = (char *)&id[ATA_ID_PROD];
195 unsigned long flags; 195 unsigned long flags;
@@ -266,7 +266,7 @@ err_misc:
266 266
267static int actual_try_to_identify (ide_drive_t *drive, u8 cmd) 267static int actual_try_to_identify (ide_drive_t *drive, u8 cmd)
268{ 268{
269 ide_hwif_t *hwif = HWIF(drive); 269 ide_hwif_t *hwif = drive->hwif;
270 struct ide_io_ports *io_ports = &hwif->io_ports; 270 struct ide_io_ports *io_ports = &hwif->io_ports;
271 const struct ide_tp_ops *tp_ops = hwif->tp_ops; 271 const struct ide_tp_ops *tp_ops = hwif->tp_ops;
272 int use_altstatus = 0, rc; 272 int use_altstatus = 0, rc;
@@ -341,7 +341,7 @@ static int actual_try_to_identify (ide_drive_t *drive, u8 cmd)
341 341
342static int try_to_identify (ide_drive_t *drive, u8 cmd) 342static int try_to_identify (ide_drive_t *drive, u8 cmd)
343{ 343{
344 ide_hwif_t *hwif = HWIF(drive); 344 ide_hwif_t *hwif = drive->hwif;
345 const struct ide_tp_ops *tp_ops = hwif->tp_ops; 345 const struct ide_tp_ops *tp_ops = hwif->tp_ops;
346 int retval; 346 int retval;
347 int autoprobe = 0; 347 int autoprobe = 0;
@@ -438,7 +438,7 @@ static u8 ide_read_device(ide_drive_t *drive)
438 438
439static int do_probe (ide_drive_t *drive, u8 cmd) 439static int do_probe (ide_drive_t *drive, u8 cmd)
440{ 440{
441 ide_hwif_t *hwif = HWIF(drive); 441 ide_hwif_t *hwif = drive->hwif;
442 const struct ide_tp_ops *tp_ops = hwif->tp_ops; 442 const struct ide_tp_ops *tp_ops = hwif->tp_ops;
443 int rc; 443 int rc;
444 u8 present = !!(drive->dev_flags & IDE_DFLAG_PRESENT), stat; 444 u8 present = !!(drive->dev_flags & IDE_DFLAG_PRESENT), stat;
@@ -463,7 +463,7 @@ static int do_probe (ide_drive_t *drive, u8 cmd)
463 if (ide_read_device(drive) != drive->select && present == 0) { 463 if (ide_read_device(drive) != drive->select && present == 0) {
464 if (drive->dn & 1) { 464 if (drive->dn & 1) {
465 /* exit with drive0 selected */ 465 /* exit with drive0 selected */
466 SELECT_DRIVE(&hwif->drives[0]); 466 SELECT_DRIVE(hwif->devices[0]);
467 /* allow ATA_BUSY to assert & clear */ 467 /* allow ATA_BUSY to assert & clear */
468 msleep(50); 468 msleep(50);
469 } 469 }
@@ -509,7 +509,7 @@ static int do_probe (ide_drive_t *drive, u8 cmd)
509 } 509 }
510 if (drive->dn & 1) { 510 if (drive->dn & 1) {
511 /* exit with drive0 selected */ 511 /* exit with drive0 selected */
512 SELECT_DRIVE(&hwif->drives[0]); 512 SELECT_DRIVE(hwif->devices[0]);
513 msleep(50); 513 msleep(50);
514 /* ensure drive irq is clear */ 514 /* ensure drive irq is clear */
515 (void)tp_ops->read_status(hwif); 515 (void)tp_ops->read_status(hwif);
@@ -522,7 +522,7 @@ static int do_probe (ide_drive_t *drive, u8 cmd)
522 */ 522 */
523static void enable_nest (ide_drive_t *drive) 523static void enable_nest (ide_drive_t *drive)
524{ 524{
525 ide_hwif_t *hwif = HWIF(drive); 525 ide_hwif_t *hwif = drive->hwif;
526 const struct ide_tp_ops *tp_ops = hwif->tp_ops; 526 const struct ide_tp_ops *tp_ops = hwif->tp_ops;
527 u8 stat; 527 u8 stat;
528 528
@@ -697,7 +697,8 @@ out:
697 697
698static int ide_port_wait_ready(ide_hwif_t *hwif) 698static int ide_port_wait_ready(ide_hwif_t *hwif)
699{ 699{
700 int unit, rc; 700 ide_drive_t *drive;
701 int i, rc;
701 702
702 printk(KERN_DEBUG "Probing IDE interface %s...\n", hwif->name); 703 printk(KERN_DEBUG "Probing IDE interface %s...\n", hwif->name);
703 704
@@ -714,9 +715,7 @@ static int ide_port_wait_ready(ide_hwif_t *hwif)
714 return rc; 715 return rc;
715 716
716 /* Now make sure both master & slave are ready */ 717 /* Now make sure both master & slave are ready */
717 for (unit = 0; unit < MAX_DRIVES; unit++) { 718 ide_port_for_each_dev(i, drive, hwif) {
718 ide_drive_t *drive = &hwif->drives[unit];
719
720 /* Ignore disks that we will not probe for later. */ 719 /* Ignore disks that we will not probe for later. */
721 if ((drive->dev_flags & IDE_DFLAG_NOPROBE) == 0 || 720 if ((drive->dev_flags & IDE_DFLAG_NOPROBE) == 0 ||
722 (drive->dev_flags & IDE_DFLAG_PRESENT)) { 721 (drive->dev_flags & IDE_DFLAG_PRESENT)) {
@@ -732,8 +731,8 @@ static int ide_port_wait_ready(ide_hwif_t *hwif)
732 } 731 }
733out: 732out:
734 /* Exit function with master reselected (let's be sane) */ 733 /* Exit function with master reselected (let's be sane) */
735 if (unit) 734 if (i)
736 SELECT_DRIVE(&hwif->drives[0]); 735 SELECT_DRIVE(hwif->devices[0]);
737 736
738 return rc; 737 return rc;
739} 738}
@@ -749,7 +748,7 @@ out:
749 748
750void ide_undecoded_slave(ide_drive_t *dev1) 749void ide_undecoded_slave(ide_drive_t *dev1)
751{ 750{
752 ide_drive_t *dev0 = &dev1->hwif->drives[0]; 751 ide_drive_t *dev0 = dev1->hwif->devices[0];
753 752
754 if ((dev1->dn & 1) == 0 || (dev0->dev_flags & IDE_DFLAG_PRESENT) == 0) 753 if ((dev1->dn & 1) == 0 || (dev0->dev_flags & IDE_DFLAG_PRESENT) == 0)
755 return; 754 return;
@@ -778,14 +777,15 @@ EXPORT_SYMBOL_GPL(ide_undecoded_slave);
778 777
779static int ide_probe_port(ide_hwif_t *hwif) 778static int ide_probe_port(ide_hwif_t *hwif)
780{ 779{
780 ide_drive_t *drive;
781 unsigned long flags; 781 unsigned long flags;
782 unsigned int irqd; 782 unsigned int irqd;
783 int unit, rc = -ENODEV; 783 int i, rc = -ENODEV;
784 784
785 BUG_ON(hwif->present); 785 BUG_ON(hwif->present);
786 786
787 if ((hwif->drives[0].dev_flags & IDE_DFLAG_NOPROBE) && 787 if ((hwif->devices[0]->dev_flags & IDE_DFLAG_NOPROBE) &&
788 (hwif->drives[1].dev_flags & IDE_DFLAG_NOPROBE)) 788 (hwif->devices[1]->dev_flags & IDE_DFLAG_NOPROBE))
789 return -EACCES; 789 return -EACCES;
790 790
791 /* 791 /*
@@ -796,7 +796,8 @@ static int ide_probe_port(ide_hwif_t *hwif)
796 if (irqd) 796 if (irqd)
797 disable_irq(hwif->irq); 797 disable_irq(hwif->irq);
798 798
799 local_irq_set(flags); 799 local_irq_save(flags);
800 local_irq_enable_in_hardirq();
800 801
801 if (ide_port_wait_ready(hwif) == -EBUSY) 802 if (ide_port_wait_ready(hwif) == -EBUSY)
802 printk(KERN_DEBUG "%s: Wait for ready failed before probe !\n", hwif->name); 803 printk(KERN_DEBUG "%s: Wait for ready failed before probe !\n", hwif->name);
@@ -805,9 +806,7 @@ static int ide_probe_port(ide_hwif_t *hwif)
805 * Second drive should only exist if first drive was found, 806 * Second drive should only exist if first drive was found,
806 * but a lot of cdrom drives are configured as single slaves. 807 * but a lot of cdrom drives are configured as single slaves.
807 */ 808 */
808 for (unit = 0; unit < MAX_DRIVES; ++unit) { 809 ide_port_for_each_dev(i, drive, hwif) {
809 ide_drive_t *drive = &hwif->drives[unit];
810
811 (void) probe_for_drive(drive); 810 (void) probe_for_drive(drive);
812 if (drive->dev_flags & IDE_DFLAG_PRESENT) 811 if (drive->dev_flags & IDE_DFLAG_PRESENT)
813 rc = 0; 812 rc = 0;
@@ -828,20 +827,17 @@ static int ide_probe_port(ide_hwif_t *hwif)
828static void ide_port_tune_devices(ide_hwif_t *hwif) 827static void ide_port_tune_devices(ide_hwif_t *hwif)
829{ 828{
830 const struct ide_port_ops *port_ops = hwif->port_ops; 829 const struct ide_port_ops *port_ops = hwif->port_ops;
831 int unit; 830 ide_drive_t *drive;
832 831 int i;
833 for (unit = 0; unit < MAX_DRIVES; unit++) {
834 ide_drive_t *drive = &hwif->drives[unit];
835 832
833 ide_port_for_each_dev(i, drive, hwif) {
836 if (drive->dev_flags & IDE_DFLAG_PRESENT) { 834 if (drive->dev_flags & IDE_DFLAG_PRESENT) {
837 if (port_ops && port_ops->quirkproc) 835 if (port_ops && port_ops->quirkproc)
838 port_ops->quirkproc(drive); 836 port_ops->quirkproc(drive);
839 } 837 }
840 } 838 }
841 839
842 for (unit = 0; unit < MAX_DRIVES; ++unit) { 840 ide_port_for_each_dev(i, drive, hwif) {
843 ide_drive_t *drive = &hwif->drives[unit];
844
845 if (drive->dev_flags & IDE_DFLAG_PRESENT) { 841 if (drive->dev_flags & IDE_DFLAG_PRESENT) {
846 ide_set_max_pio(drive); 842 ide_set_max_pio(drive);
847 843
@@ -852,11 +848,8 @@ static void ide_port_tune_devices(ide_hwif_t *hwif)
852 } 848 }
853 } 849 }
854 850
855 for (unit = 0; unit < MAX_DRIVES; ++unit) { 851 ide_port_for_each_dev(i, drive, hwif) {
856 ide_drive_t *drive = &hwif->drives[unit]; 852 if (hwif->host_flags & IDE_HFLAG_NO_IO_32BIT)
857
858 if ((hwif->host_flags & IDE_HFLAG_NO_IO_32BIT) ||
859 drive->id[ATA_ID_DWORD_IO])
860 drive->dev_flags |= IDE_DFLAG_NO_IO_32BIT; 853 drive->dev_flags |= IDE_DFLAG_NO_IO_32BIT;
861 else 854 else
862 drive->dev_flags &= ~IDE_DFLAG_NO_IO_32BIT; 855 drive->dev_flags &= ~IDE_DFLAG_NO_IO_32BIT;
@@ -869,7 +862,7 @@ static void ide_port_tune_devices(ide_hwif_t *hwif)
869static int ide_init_queue(ide_drive_t *drive) 862static int ide_init_queue(ide_drive_t *drive)
870{ 863{
871 struct request_queue *q; 864 struct request_queue *q;
872 ide_hwif_t *hwif = HWIF(drive); 865 ide_hwif_t *hwif = drive->hwif;
873 int max_sectors = 256; 866 int max_sectors = 256;
874 int max_sg_entries = PRD_ENTRIES; 867 int max_sg_entries = PRD_ENTRIES;
875 868
@@ -918,36 +911,19 @@ static int ide_init_queue(ide_drive_t *drive)
918 return 0; 911 return 0;
919} 912}
920 913
921static void ide_add_drive_to_hwgroup(ide_drive_t *drive) 914static DEFINE_MUTEX(ide_cfg_mtx);
922{
923 ide_hwgroup_t *hwgroup = drive->hwif->hwgroup;
924
925 spin_lock_irq(&hwgroup->lock);
926 if (!hwgroup->drive) {
927 /* first drive for hwgroup. */
928 drive->next = drive;
929 hwgroup->drive = drive;
930 hwgroup->hwif = HWIF(hwgroup->drive);
931 } else {
932 drive->next = hwgroup->drive->next;
933 hwgroup->drive->next = drive;
934 }
935 spin_unlock_irq(&hwgroup->lock);
936}
937 915
938/* 916/*
939 * For any present drive: 917 * For any present drive:
940 * - allocate the block device queue 918 * - allocate the block device queue
941 * - link drive into the hwgroup
942 */ 919 */
943static int ide_port_setup_devices(ide_hwif_t *hwif) 920static int ide_port_setup_devices(ide_hwif_t *hwif)
944{ 921{
922 ide_drive_t *drive;
945 int i, j = 0; 923 int i, j = 0;
946 924
947 mutex_lock(&ide_cfg_mtx); 925 mutex_lock(&ide_cfg_mtx);
948 for (i = 0; i < MAX_DRIVES; i++) { 926 ide_port_for_each_dev(i, drive, hwif) {
949 ide_drive_t *drive = &hwif->drives[i];
950
951 if ((drive->dev_flags & IDE_DFLAG_PRESENT) == 0) 927 if ((drive->dev_flags & IDE_DFLAG_PRESENT) == 0)
952 continue; 928 continue;
953 929
@@ -961,139 +937,39 @@ static int ide_port_setup_devices(ide_hwif_t *hwif)
961 } 937 }
962 938
963 j++; 939 j++;
964
965 ide_add_drive_to_hwgroup(drive);
966 } 940 }
967 mutex_unlock(&ide_cfg_mtx); 941 mutex_unlock(&ide_cfg_mtx);
968 942
969 return j; 943 return j;
970} 944}
971 945
972static ide_hwif_t *ide_ports[MAX_HWIFS];
973
974void ide_remove_port_from_hwgroup(ide_hwif_t *hwif)
975{
976 ide_hwgroup_t *hwgroup = hwif->hwgroup;
977
978 ide_ports[hwif->index] = NULL;
979
980 spin_lock_irq(&hwgroup->lock);
981 /*
982 * Remove us from the hwgroup, and free
983 * the hwgroup if we were the only member
984 */
985 if (hwif->next == hwif) {
986 BUG_ON(hwgroup->hwif != hwif);
987 kfree(hwgroup);
988 } else {
989 /* There is another interface in hwgroup.
990 * Unlink us, and set hwgroup->drive and ->hwif to
991 * something sane.
992 */
993 ide_hwif_t *g = hwgroup->hwif;
994
995 while (g->next != hwif)
996 g = g->next;
997 g->next = hwif->next;
998 if (hwgroup->hwif == hwif) {
999 /* Chose a random hwif for hwgroup->hwif.
1000 * It's guaranteed that there are no drives
1001 * left in the hwgroup.
1002 */
1003 BUG_ON(hwgroup->drive != NULL);
1004 hwgroup->hwif = g;
1005 }
1006 BUG_ON(hwgroup->hwif == hwif);
1007 }
1008 spin_unlock_irq(&hwgroup->lock);
1009}
1010
1011/* 946/*
1012 * This routine sets up the irq for an ide interface, and creates a new 947 * This routine sets up the IRQ for an IDE interface.
1013 * hwgroup for the irq/hwif if none was previously assigned.
1014 *
1015 * Much of the code is for correctly detecting/handling irq sharing
1016 * and irq serialization situations. This is somewhat complex because
1017 * it handles static as well as dynamic (PCMCIA) IDE interfaces.
1018 */ 948 */
1019static int init_irq (ide_hwif_t *hwif) 949static int init_irq (ide_hwif_t *hwif)
1020{ 950{
1021 struct ide_io_ports *io_ports = &hwif->io_ports; 951 struct ide_io_ports *io_ports = &hwif->io_ports;
1022 unsigned int index; 952 int sa = 0;
1023 ide_hwgroup_t *hwgroup;
1024 ide_hwif_t *match = NULL;
1025 953
1026 mutex_lock(&ide_cfg_mtx); 954 mutex_lock(&ide_cfg_mtx);
1027 hwif->hwgroup = NULL; 955 spin_lock_init(&hwif->lock);
1028 956
1029 for (index = 0; index < MAX_HWIFS; index++) { 957 init_timer(&hwif->timer);
1030 ide_hwif_t *h = ide_ports[index]; 958 hwif->timer.function = &ide_timer_expiry;
959 hwif->timer.data = (unsigned long)hwif;
1031 960
1032 if (h && h->hwgroup) { /* scan only initialized ports */
1033 if (hwif->host->host_flags & IDE_HFLAG_SERIALIZE) {
1034 if (hwif->host == h->host)
1035 match = h;
1036 }
1037 }
1038 }
1039
1040 /*
1041 * If we are still without a hwgroup, then form a new one
1042 */
1043 if (match) {
1044 hwgroup = match->hwgroup;
1045 hwif->hwgroup = hwgroup;
1046 /*
1047 * Link us into the hwgroup.
1048 * This must be done early, do ensure that unexpected_intr
1049 * can find the hwif and prevent irq storms.
1050 * No drives are attached to the new hwif, choose_drive
1051 * can't do anything stupid (yet).
1052 * Add ourself as the 2nd entry to the hwgroup->hwif
1053 * linked list, the first entry is the hwif that owns
1054 * hwgroup->handler - do not change that.
1055 */
1056 spin_lock_irq(&hwgroup->lock);
1057 hwif->next = hwgroup->hwif->next;
1058 hwgroup->hwif->next = hwif;
1059 BUG_ON(hwif->next == hwif);
1060 spin_unlock_irq(&hwgroup->lock);
1061 } else {
1062 hwgroup = kmalloc_node(sizeof(*hwgroup), GFP_KERNEL|__GFP_ZERO,
1063 hwif_to_node(hwif));
1064 if (hwgroup == NULL)
1065 goto out_up;
1066
1067 spin_lock_init(&hwgroup->lock);
1068
1069 hwif->hwgroup = hwgroup;
1070 hwgroup->hwif = hwif->next = hwif;
1071
1072 init_timer(&hwgroup->timer);
1073 hwgroup->timer.function = &ide_timer_expiry;
1074 hwgroup->timer.data = (unsigned long) hwgroup;
1075 }
1076
1077 ide_ports[hwif->index] = hwif;
1078
1079 /*
1080 * Allocate the irq, if not already obtained for another hwif
1081 */
1082 if (!match || match->irq != hwif->irq) {
1083 int sa = 0;
1084#if defined(__mc68000__) 961#if defined(__mc68000__)
1085 sa = IRQF_SHARED; 962 sa = IRQF_SHARED;
1086#endif /* __mc68000__ */ 963#endif /* __mc68000__ */
1087 964
1088 if (hwif->chipset == ide_pci) 965 if (hwif->chipset == ide_pci)
1089 sa = IRQF_SHARED; 966 sa = IRQF_SHARED;
1090 967
1091 if (io_ports->ctl_addr) 968 if (io_ports->ctl_addr)
1092 hwif->tp_ops->set_irq(hwif, 1); 969 hwif->tp_ops->set_irq(hwif, 1);
1093 970
1094 if (request_irq(hwif->irq,&ide_intr,sa,hwif->name,hwgroup)) 971 if (request_irq(hwif->irq, &ide_intr, sa, hwif->name, hwif))
1095 goto out_unlink; 972 goto out_up;
1096 }
1097 973
1098 if (!hwif->rqsize) { 974 if (!hwif->rqsize) {
1099 if ((hwif->host_flags & IDE_HFLAG_NO_LBA48) || 975 if ((hwif->host_flags & IDE_HFLAG_NO_LBA48) ||
@@ -1111,14 +987,12 @@ static int init_irq (ide_hwif_t *hwif)
1111 printk(KERN_INFO "%s at 0x%08lx on irq %d", hwif->name, 987 printk(KERN_INFO "%s at 0x%08lx on irq %d", hwif->name,
1112 io_ports->data_addr, hwif->irq); 988 io_ports->data_addr, hwif->irq);
1113#endif /* __mc68000__ */ 989#endif /* __mc68000__ */
1114 if (match) 990 if (hwif->host->host_flags & IDE_HFLAG_SERIALIZE)
1115 printk(KERN_CONT " (serialized with %s)", match->name); 991 printk(KERN_CONT " (serialized)");
1116 printk(KERN_CONT "\n"); 992 printk(KERN_CONT "\n");
1117 993
1118 mutex_unlock(&ide_cfg_mtx); 994 mutex_unlock(&ide_cfg_mtx);
1119 return 0; 995 return 0;
1120out_unlink:
1121 ide_remove_port_from_hwgroup(hwif);
1122out_up: 996out_up:
1123 mutex_unlock(&ide_cfg_mtx); 997 mutex_unlock(&ide_cfg_mtx);
1124 return 1; 998 return 1;
@@ -1134,7 +1008,7 @@ static struct kobject *ata_probe(dev_t dev, int *part, void *data)
1134{ 1008{
1135 ide_hwif_t *hwif = data; 1009 ide_hwif_t *hwif = data;
1136 int unit = *part >> PARTN_BITS; 1010 int unit = *part >> PARTN_BITS;
1137 ide_drive_t *drive = &hwif->drives[unit]; 1011 ide_drive_t *drive = hwif->devices[unit];
1138 1012
1139 if ((drive->dev_flags & IDE_DFLAG_PRESENT) == 0) 1013 if ((drive->dev_flags & IDE_DFLAG_PRESENT) == 0)
1140 return NULL; 1014 return NULL;
@@ -1196,47 +1070,23 @@ void ide_init_disk(struct gendisk *disk, ide_drive_t *drive)
1196 1070
1197EXPORT_SYMBOL_GPL(ide_init_disk); 1071EXPORT_SYMBOL_GPL(ide_init_disk);
1198 1072
1199static void ide_remove_drive_from_hwgroup(ide_drive_t *drive)
1200{
1201 ide_hwgroup_t *hwgroup = drive->hwif->hwgroup;
1202
1203 if (drive == drive->next) {
1204 /* special case: last drive from hwgroup. */
1205 BUG_ON(hwgroup->drive != drive);
1206 hwgroup->drive = NULL;
1207 } else {
1208 ide_drive_t *walk;
1209
1210 walk = hwgroup->drive;
1211 while (walk->next != drive)
1212 walk = walk->next;
1213 walk->next = drive->next;
1214 if (hwgroup->drive == drive) {
1215 hwgroup->drive = drive->next;
1216 hwgroup->hwif = hwgroup->drive->hwif;
1217 }
1218 }
1219 BUG_ON(hwgroup->drive == drive);
1220}
1221
1222static void drive_release_dev (struct device *dev) 1073static void drive_release_dev (struct device *dev)
1223{ 1074{
1224 ide_drive_t *drive = container_of(dev, ide_drive_t, gendev); 1075 ide_drive_t *drive = container_of(dev, ide_drive_t, gendev);
1225 ide_hwgroup_t *hwgroup = drive->hwif->hwgroup; 1076 ide_hwif_t *hwif = drive->hwif;
1226 1077
1227 ide_proc_unregister_device(drive); 1078 ide_proc_unregister_device(drive);
1228 1079
1229 spin_lock_irq(&hwgroup->lock); 1080 spin_lock_irq(&hwif->lock);
1230 ide_remove_drive_from_hwgroup(drive);
1231 kfree(drive->id); 1081 kfree(drive->id);
1232 drive->id = NULL; 1082 drive->id = NULL;
1233 drive->dev_flags &= ~IDE_DFLAG_PRESENT; 1083 drive->dev_flags &= ~IDE_DFLAG_PRESENT;
1234 /* Messed up locking ... */ 1084 /* Messed up locking ... */
1235 spin_unlock_irq(&hwgroup->lock); 1085 spin_unlock_irq(&hwif->lock);
1236 blk_cleanup_queue(drive->queue); 1086 blk_cleanup_queue(drive->queue);
1237 spin_lock_irq(&hwgroup->lock); 1087 spin_lock_irq(&hwif->lock);
1238 drive->queue = NULL; 1088 drive->queue = NULL;
1239 spin_unlock_irq(&hwgroup->lock); 1089 spin_unlock_irq(&hwif->lock);
1240 1090
1241 complete(&drive->gendev_rel_comp); 1091 complete(&drive->gendev_rel_comp);
1242} 1092}
@@ -1302,10 +1152,10 @@ out:
1302 1152
1303static void hwif_register_devices(ide_hwif_t *hwif) 1153static void hwif_register_devices(ide_hwif_t *hwif)
1304{ 1154{
1155 ide_drive_t *drive;
1305 unsigned int i; 1156 unsigned int i;
1306 1157
1307 for (i = 0; i < MAX_DRIVES; i++) { 1158 ide_port_for_each_dev(i, drive, hwif) {
1308 ide_drive_t *drive = &hwif->drives[i];
1309 struct device *dev = &drive->gendev; 1159 struct device *dev = &drive->gendev;
1310 int ret; 1160 int ret;
1311 1161
@@ -1328,11 +1178,10 @@ static void hwif_register_devices(ide_hwif_t *hwif)
1328static void ide_port_init_devices(ide_hwif_t *hwif) 1178static void ide_port_init_devices(ide_hwif_t *hwif)
1329{ 1179{
1330 const struct ide_port_ops *port_ops = hwif->port_ops; 1180 const struct ide_port_ops *port_ops = hwif->port_ops;
1181 ide_drive_t *drive;
1331 int i; 1182 int i;
1332 1183
1333 for (i = 0; i < MAX_DRIVES; i++) { 1184 ide_port_for_each_dev(i, drive, hwif) {
1334 ide_drive_t *drive = &hwif->drives[i];
1335
1336 drive->dn = i + hwif->channel * 2; 1185 drive->dn = i + hwif->channel * 2;
1337 1186
1338 if (hwif->host_flags & IDE_HFLAG_IO_32BIT) 1187 if (hwif->host_flags & IDE_HFLAG_IO_32BIT)
@@ -1380,6 +1229,8 @@ static void ide_init_port(ide_hwif_t *hwif, unsigned int port,
1380 if ((d->host_flags & IDE_HFLAG_NO_DMA) == 0) { 1229 if ((d->host_flags & IDE_HFLAG_NO_DMA) == 0) {
1381 int rc; 1230 int rc;
1382 1231
1232 hwif->dma_ops = d->dma_ops;
1233
1383 if (d->init_dma) 1234 if (d->init_dma)
1384 rc = d->init_dma(hwif, d); 1235 rc = d->init_dma(hwif, d);
1385 else 1236 else
@@ -1387,12 +1238,13 @@ static void ide_init_port(ide_hwif_t *hwif, unsigned int port,
1387 1238
1388 if (rc < 0) { 1239 if (rc < 0) {
1389 printk(KERN_INFO "%s: DMA disabled\n", hwif->name); 1240 printk(KERN_INFO "%s: DMA disabled\n", hwif->name);
1241
1242 hwif->dma_ops = NULL;
1390 hwif->dma_base = 0; 1243 hwif->dma_base = 0;
1391 hwif->swdma_mask = 0; 1244 hwif->swdma_mask = 0;
1392 hwif->mwdma_mask = 0; 1245 hwif->mwdma_mask = 0;
1393 hwif->ultra_mask = 0; 1246 hwif->ultra_mask = 0;
1394 } else if (d->dma_ops) 1247 }
1395 hwif->dma_ops = d->dma_ops;
1396 } 1248 }
1397 1249
1398 if ((d->host_flags & IDE_HFLAG_SERIALIZE) || 1250 if ((d->host_flags & IDE_HFLAG_SERIALIZE) ||
@@ -1417,6 +1269,66 @@ static void ide_port_cable_detect(ide_hwif_t *hwif)
1417 } 1269 }
1418} 1270}
1419 1271
1272static const u8 ide_hwif_to_major[] =
1273 { IDE0_MAJOR, IDE1_MAJOR, IDE2_MAJOR, IDE3_MAJOR, IDE4_MAJOR,
1274 IDE5_MAJOR, IDE6_MAJOR, IDE7_MAJOR, IDE8_MAJOR, IDE9_MAJOR };
1275
1276static void ide_port_init_devices_data(ide_hwif_t *hwif)
1277{
1278 ide_drive_t *drive;
1279 int i;
1280
1281 ide_port_for_each_dev(i, drive, hwif) {
1282 u8 j = (hwif->index * MAX_DRIVES) + i;
1283
1284 memset(drive, 0, sizeof(*drive));
1285
1286 drive->media = ide_disk;
1287 drive->select = (i << 4) | ATA_DEVICE_OBS;
1288 drive->hwif = hwif;
1289 drive->ready_stat = ATA_DRDY;
1290 drive->bad_wstat = BAD_W_STAT;
1291 drive->special.b.recalibrate = 1;
1292 drive->special.b.set_geometry = 1;
1293 drive->name[0] = 'h';
1294 drive->name[1] = 'd';
1295 drive->name[2] = 'a' + j;
1296 drive->max_failures = IDE_DEFAULT_MAX_FAILURES;
1297
1298 INIT_LIST_HEAD(&drive->list);
1299 init_completion(&drive->gendev_rel_comp);
1300 }
1301}
1302
1303static void ide_init_port_data(ide_hwif_t *hwif, unsigned int index)
1304{
1305 /* fill in any non-zero initial values */
1306 hwif->index = index;
1307 hwif->major = ide_hwif_to_major[index];
1308
1309 hwif->name[0] = 'i';
1310 hwif->name[1] = 'd';
1311 hwif->name[2] = 'e';
1312 hwif->name[3] = '0' + index;
1313
1314 init_completion(&hwif->gendev_rel_comp);
1315
1316 hwif->tp_ops = &default_tp_ops;
1317
1318 ide_port_init_devices_data(hwif);
1319}
1320
1321static void ide_init_port_hw(ide_hwif_t *hwif, hw_regs_t *hw)
1322{
1323 memcpy(&hwif->io_ports, &hw->io_ports, sizeof(hwif->io_ports));
1324 hwif->irq = hw->irq;
1325 hwif->chipset = hw->chipset;
1326 hwif->dev = hw->dev;
1327 hwif->gendev.parent = hw->parent ? hw->parent : hw->dev;
1328 hwif->ack_intr = hw->ack_intr;
1329 hwif->config_data = hw->config;
1330}
1331
1420static unsigned int ide_indexes; 1332static unsigned int ide_indexes;
1421 1333
1422/** 1334/**
@@ -1466,12 +1378,43 @@ static void ide_free_port_slot(int idx)
1466 mutex_unlock(&ide_cfg_mtx); 1378 mutex_unlock(&ide_cfg_mtx);
1467} 1379}
1468 1380
1381static void ide_port_free_devices(ide_hwif_t *hwif)
1382{
1383 ide_drive_t *drive;
1384 int i;
1385
1386 ide_port_for_each_dev(i, drive, hwif)
1387 kfree(drive);
1388}
1389
1390static int ide_port_alloc_devices(ide_hwif_t *hwif, int node)
1391{
1392 int i;
1393
1394 for (i = 0; i < MAX_DRIVES; i++) {
1395 ide_drive_t *drive;
1396
1397 drive = kzalloc_node(sizeof(*drive), GFP_KERNEL, node);
1398 if (drive == NULL)
1399 goto out_nomem;
1400
1401 hwif->devices[i] = drive;
1402 }
1403 return 0;
1404
1405out_nomem:
1406 ide_port_free_devices(hwif);
1407 return -ENOMEM;
1408}
1409
1469struct ide_host *ide_host_alloc(const struct ide_port_info *d, hw_regs_t **hws) 1410struct ide_host *ide_host_alloc(const struct ide_port_info *d, hw_regs_t **hws)
1470{ 1411{
1471 struct ide_host *host; 1412 struct ide_host *host;
1413 struct device *dev = hws[0] ? hws[0]->dev : NULL;
1414 int node = dev ? dev_to_node(dev) : -1;
1472 int i; 1415 int i;
1473 1416
1474 host = kzalloc(sizeof(*host), GFP_KERNEL); 1417 host = kzalloc_node(sizeof(*host), GFP_KERNEL, node);
1475 if (host == NULL) 1418 if (host == NULL)
1476 return NULL; 1419 return NULL;
1477 1420
@@ -1482,10 +1425,15 @@ struct ide_host *ide_host_alloc(const struct ide_port_info *d, hw_regs_t **hws)
1482 if (hws[i] == NULL) 1425 if (hws[i] == NULL)
1483 continue; 1426 continue;
1484 1427
1485 hwif = kzalloc(sizeof(*hwif), GFP_KERNEL); 1428 hwif = kzalloc_node(sizeof(*hwif), GFP_KERNEL, node);
1486 if (hwif == NULL) 1429 if (hwif == NULL)
1487 continue; 1430 continue;
1488 1431
1432 if (ide_port_alloc_devices(hwif, node) < 0) {
1433 kfree(hwif);
1434 continue;
1435 }
1436
1489 idx = ide_find_port_slot(d); 1437 idx = ide_find_port_slot(d);
1490 if (idx < 0) { 1438 if (idx < 0) {
1491 printk(KERN_ERR "%s: no free slot for interface\n", 1439 printk(KERN_ERR "%s: no free slot for interface\n",
@@ -1507,8 +1455,7 @@ struct ide_host *ide_host_alloc(const struct ide_port_info *d, hw_regs_t **hws)
1507 return NULL; 1455 return NULL;
1508 } 1456 }
1509 1457
1510 if (hws[0]) 1458 host->dev[0] = dev;
1511 host->dev[0] = hws[0]->dev;
1512 1459
1513 if (d) { 1460 if (d) {
1514 host->init_chipset = d->init_chipset; 1461 host->init_chipset = d->init_chipset;
@@ -1525,9 +1472,7 @@ int ide_host_register(struct ide_host *host, const struct ide_port_info *d,
1525 ide_hwif_t *hwif, *mate = NULL; 1472 ide_hwif_t *hwif, *mate = NULL;
1526 int i, j = 0; 1473 int i, j = 0;
1527 1474
1528 for (i = 0; i < MAX_HOST_PORTS; i++) { 1475 ide_host_for_each_port(i, hwif, host) {
1529 hwif = host->ports[i];
1530
1531 if (hwif == NULL) { 1476 if (hwif == NULL) {
1532 mate = NULL; 1477 mate = NULL;
1533 continue; 1478 continue;
@@ -1553,9 +1498,7 @@ int ide_host_register(struct ide_host *host, const struct ide_port_info *d,
1553 ide_port_init_devices(hwif); 1498 ide_port_init_devices(hwif);
1554 } 1499 }
1555 1500
1556 for (i = 0; i < MAX_HOST_PORTS; i++) { 1501 ide_host_for_each_port(i, hwif, host) {
1557 hwif = host->ports[i];
1558
1559 if (hwif == NULL) 1502 if (hwif == NULL)
1560 continue; 1503 continue;
1561 1504
@@ -1570,9 +1513,7 @@ int ide_host_register(struct ide_host *host, const struct ide_port_info *d,
1570 ide_port_tune_devices(hwif); 1513 ide_port_tune_devices(hwif);
1571 } 1514 }
1572 1515
1573 for (i = 0; i < MAX_HOST_PORTS; i++) { 1516 ide_host_for_each_port(i, hwif, host) {
1574 hwif = host->ports[i];
1575
1576 if (hwif == NULL) 1517 if (hwif == NULL)
1577 continue; 1518 continue;
1578 1519
@@ -1597,9 +1538,7 @@ int ide_host_register(struct ide_host *host, const struct ide_port_info *d,
1597 ide_acpi_port_init_devices(hwif); 1538 ide_acpi_port_init_devices(hwif);
1598 } 1539 }
1599 1540
1600 for (i = 0; i < MAX_HOST_PORTS; i++) { 1541 ide_host_for_each_port(i, hwif, host) {
1601 hwif = host->ports[i];
1602
1603 if (hwif == NULL) 1542 if (hwif == NULL)
1604 continue; 1543 continue;
1605 1544
@@ -1607,9 +1546,7 @@ int ide_host_register(struct ide_host *host, const struct ide_port_info *d,
1607 hwif_register_devices(hwif); 1546 hwif_register_devices(hwif);
1608 } 1547 }
1609 1548
1610 for (i = 0; i < MAX_HOST_PORTS; i++) { 1549 ide_host_for_each_port(i, hwif, host) {
1611 hwif = host->ports[i];
1612
1613 if (hwif == NULL) 1550 if (hwif == NULL)
1614 continue; 1551 continue;
1615 1552
@@ -1647,17 +1584,85 @@ int ide_host_add(const struct ide_port_info *d, hw_regs_t **hws,
1647} 1584}
1648EXPORT_SYMBOL_GPL(ide_host_add); 1585EXPORT_SYMBOL_GPL(ide_host_add);
1649 1586
1587static void __ide_port_unregister_devices(ide_hwif_t *hwif)
1588{
1589 ide_drive_t *drive;
1590 int i;
1591
1592 ide_port_for_each_dev(i, drive, hwif) {
1593 if (drive->dev_flags & IDE_DFLAG_PRESENT) {
1594 device_unregister(&drive->gendev);
1595 wait_for_completion(&drive->gendev_rel_comp);
1596 }
1597 }
1598}
1599
1600void ide_port_unregister_devices(ide_hwif_t *hwif)
1601{
1602 mutex_lock(&ide_cfg_mtx);
1603 __ide_port_unregister_devices(hwif);
1604 hwif->present = 0;
1605 ide_port_init_devices_data(hwif);
1606 mutex_unlock(&ide_cfg_mtx);
1607}
1608EXPORT_SYMBOL_GPL(ide_port_unregister_devices);
1609
1610/**
1611 * ide_unregister - free an IDE interface
1612 * @hwif: IDE interface
1613 *
1614 * Perform the final unregister of an IDE interface.
1615 *
1616 * Locking:
1617 * The caller must not hold the IDE locks.
1618 *
1619 * It is up to the caller to be sure there is no pending I/O here,
1620 * and that the interface will not be reopened (present/vanishing
1621 * locking isn't yet done BTW).
1622 */
1623
1624static void ide_unregister(ide_hwif_t *hwif)
1625{
1626 BUG_ON(in_interrupt());
1627 BUG_ON(irqs_disabled());
1628
1629 mutex_lock(&ide_cfg_mtx);
1630
1631 if (hwif->present) {
1632 __ide_port_unregister_devices(hwif);
1633 hwif->present = 0;
1634 }
1635
1636 ide_proc_unregister_port(hwif);
1637
1638 free_irq(hwif->irq, hwif);
1639
1640 device_unregister(hwif->portdev);
1641 device_unregister(&hwif->gendev);
1642 wait_for_completion(&hwif->gendev_rel_comp);
1643
1644 /*
1645 * Remove us from the kernel's knowledge
1646 */
1647 blk_unregister_region(MKDEV(hwif->major, 0), MAX_DRIVES<<PARTN_BITS);
1648 kfree(hwif->sg_table);
1649 unregister_blkdev(hwif->major, hwif->name);
1650
1651 ide_release_dma_engine(hwif);
1652
1653 mutex_unlock(&ide_cfg_mtx);
1654}
1655
1650void ide_host_free(struct ide_host *host) 1656void ide_host_free(struct ide_host *host)
1651{ 1657{
1652 ide_hwif_t *hwif; 1658 ide_hwif_t *hwif;
1653 int i; 1659 int i;
1654 1660
1655 for (i = 0; i < MAX_HOST_PORTS; i++) { 1661 ide_host_for_each_port(i, hwif, host) {
1656 hwif = host->ports[i];
1657
1658 if (hwif == NULL) 1662 if (hwif == NULL)
1659 continue; 1663 continue;
1660 1664
1665 ide_port_free_devices(hwif);
1661 ide_free_port_slot(hwif->index); 1666 ide_free_port_slot(hwif->index);
1662 kfree(hwif); 1667 kfree(hwif);
1663 } 1668 }
@@ -1668,11 +1673,12 @@ EXPORT_SYMBOL_GPL(ide_host_free);
1668 1673
1669void ide_host_remove(struct ide_host *host) 1674void ide_host_remove(struct ide_host *host)
1670{ 1675{
1676 ide_hwif_t *hwif;
1671 int i; 1677 int i;
1672 1678
1673 for (i = 0; i < MAX_HOST_PORTS; i++) { 1679 ide_host_for_each_port(i, hwif, host) {
1674 if (host->ports[i]) 1680 if (hwif)
1675 ide_unregister(host->ports[i]); 1681 ide_unregister(hwif);
1676 } 1682 }
1677 1683
1678 ide_host_free(host); 1684 ide_host_free(host);
@@ -1691,8 +1697,8 @@ void ide_port_scan(ide_hwif_t *hwif)
1691 hwif->present = 1; 1697 hwif->present = 1;
1692 1698
1693 ide_port_tune_devices(hwif); 1699 ide_port_tune_devices(hwif);
1694 ide_acpi_port_init_devices(hwif);
1695 ide_port_setup_devices(hwif); 1700 ide_port_setup_devices(hwif);
1701 ide_acpi_port_init_devices(hwif);
1696 hwif_register_devices(hwif); 1702 hwif_register_devices(hwif);
1697 ide_proc_port_register_devices(hwif); 1703 ide_proc_port_register_devices(hwif);
1698} 1704}
diff --git a/drivers/ide/ide-proc.c b/drivers/ide/ide-proc.c
index a14e2938e4f3..1d8978b3314a 100644
--- a/drivers/ide/ide-proc.c
+++ b/drivers/ide/ide-proc.c
@@ -439,13 +439,13 @@ static int proc_ide_read_dmodel
439static int proc_ide_read_driver 439static int proc_ide_read_driver
440 (char *page, char **start, off_t off, int count, int *eof, void *data) 440 (char *page, char **start, off_t off, int count, int *eof, void *data)
441{ 441{
442 ide_drive_t *drive = (ide_drive_t *) data; 442 ide_drive_t *drive = (ide_drive_t *)data;
443 struct device *dev = &drive->gendev; 443 struct device *dev = &drive->gendev;
444 ide_driver_t *ide_drv; 444 struct ide_driver *ide_drv;
445 int len; 445 int len;
446 446
447 if (dev->driver) { 447 if (dev->driver) {
448 ide_drv = container_of(dev->driver, ide_driver_t, gen_driver); 448 ide_drv = to_ide_driver(dev->driver);
449 len = sprintf(page, "%s version %s\n", 449 len = sprintf(page, "%s version %s\n",
450 dev->driver->name, ide_drv->version); 450 dev->driver->name, ide_drv->version);
451 } else 451 } else
@@ -555,7 +555,7 @@ static void ide_remove_proc_entries(struct proc_dir_entry *dir, ide_proc_entry_t
555 } 555 }
556} 556}
557 557
558void ide_proc_register_driver(ide_drive_t *drive, ide_driver_t *driver) 558void ide_proc_register_driver(ide_drive_t *drive, struct ide_driver *driver)
559{ 559{
560 mutex_lock(&ide_setting_mtx); 560 mutex_lock(&ide_setting_mtx);
561 drive->settings = driver->proc_devsets(drive); 561 drive->settings = driver->proc_devsets(drive);
@@ -577,7 +577,7 @@ EXPORT_SYMBOL(ide_proc_register_driver);
577 * Takes ide_setting_mtx. 577 * Takes ide_setting_mtx.
578 */ 578 */
579 579
580void ide_proc_unregister_driver(ide_drive_t *drive, ide_driver_t *driver) 580void ide_proc_unregister_driver(ide_drive_t *drive, struct ide_driver *driver)
581{ 581{
582 ide_remove_proc_entries(drive->proc, driver->proc_entries(drive)); 582 ide_remove_proc_entries(drive->proc, driver->proc_entries(drive));
583 583
@@ -593,14 +593,13 @@ EXPORT_SYMBOL(ide_proc_unregister_driver);
593 593
594void ide_proc_port_register_devices(ide_hwif_t *hwif) 594void ide_proc_port_register_devices(ide_hwif_t *hwif)
595{ 595{
596 int d;
597 struct proc_dir_entry *ent; 596 struct proc_dir_entry *ent;
598 struct proc_dir_entry *parent = hwif->proc; 597 struct proc_dir_entry *parent = hwif->proc;
598 ide_drive_t *drive;
599 char name[64]; 599 char name[64];
600 int i;
600 601
601 for (d = 0; d < MAX_DRIVES; d++) { 602 ide_port_for_each_dev(i, drive, hwif) {
602 ide_drive_t *drive = &hwif->drives[d];
603
604 if ((drive->dev_flags & IDE_DFLAG_PRESENT) == 0 || drive->proc) 603 if ((drive->dev_flags & IDE_DFLAG_PRESENT) == 0 || drive->proc)
605 continue; 604 continue;
606 605
@@ -653,7 +652,7 @@ void ide_proc_unregister_port(ide_hwif_t *hwif)
653 652
654static int proc_print_driver(struct device_driver *drv, void *data) 653static int proc_print_driver(struct device_driver *drv, void *data)
655{ 654{
656 ide_driver_t *ide_drv = container_of(drv, ide_driver_t, gen_driver); 655 struct ide_driver *ide_drv = to_ide_driver(drv);
657 struct seq_file *s = data; 656 struct seq_file *s = data;
658 657
659 seq_printf(s, "%s version %s\n", drv->name, ide_drv->version); 658 seq_printf(s, "%s version %s\n", drv->name, ide_drv->version);
diff --git a/drivers/ide/ide-tape.c b/drivers/ide/ide-tape.c
index 5d2aa22cd6e4..d7ecd3c79757 100644
--- a/drivers/ide/ide-tape.c
+++ b/drivers/ide/ide-tape.c
@@ -166,10 +166,10 @@ struct idetape_bh {
166 * to an interrupt or a timer event is stored in the struct defined below. 166 * to an interrupt or a timer event is stored in the struct defined below.
167 */ 167 */
168typedef struct ide_tape_obj { 168typedef struct ide_tape_obj {
169 ide_drive_t *drive; 169 ide_drive_t *drive;
170 ide_driver_t *driver; 170 struct ide_driver *driver;
171 struct gendisk *disk; 171 struct gendisk *disk;
172 struct kref kref; 172 struct kref kref;
173 173
174 /* 174 /*
175 * failed_pc points to the last failed packet command, or contains 175 * failed_pc points to the last failed packet command, or contains
@@ -479,7 +479,7 @@ static void ide_tape_kfree_buffer(idetape_tape_t *tape)
479 479
480static int idetape_end_request(ide_drive_t *drive, int uptodate, int nr_sects) 480static int idetape_end_request(ide_drive_t *drive, int uptodate, int nr_sects)
481{ 481{
482 struct request *rq = HWGROUP(drive)->rq; 482 struct request *rq = drive->hwif->rq;
483 idetape_tape_t *tape = drive->driver_data; 483 idetape_tape_t *tape = drive->driver_data;
484 unsigned long flags; 484 unsigned long flags;
485 int error; 485 int error;
@@ -531,7 +531,7 @@ static void ide_tape_callback(ide_drive_t *drive, int dsc)
531 printk(KERN_ERR "ide-tape: Error in REQUEST SENSE " 531 printk(KERN_ERR "ide-tape: Error in REQUEST SENSE "
532 "itself - Aborting request!\n"); 532 "itself - Aborting request!\n");
533 } else if (pc->c[0] == READ_6 || pc->c[0] == WRITE_6) { 533 } else if (pc->c[0] == READ_6 || pc->c[0] == WRITE_6) {
534 struct request *rq = drive->hwif->hwgroup->rq; 534 struct request *rq = drive->hwif->rq;
535 int blocks = pc->xferred / tape->blk_size; 535 int blocks = pc->xferred / tape->blk_size;
536 536
537 tape->avg_size += blocks * tape->blk_size; 537 tape->avg_size += blocks * tape->blk_size;
@@ -576,7 +576,7 @@ static void ide_tape_callback(ide_drive_t *drive, int dsc)
576 576
577/* 577/*
578 * Postpone the current request so that ide.c will be able to service requests 578 * Postpone the current request so that ide.c will be able to service requests
579 * from another device on the same hwgroup while we are polling for DSC. 579 * from another device on the same port while we are polling for DSC.
580 */ 580 */
581static void idetape_postpone_request(ide_drive_t *drive) 581static void idetape_postpone_request(ide_drive_t *drive)
582{ 582{
@@ -584,7 +584,8 @@ static void idetape_postpone_request(ide_drive_t *drive)
584 584
585 debug_log(DBG_PROCS, "Enter %s\n", __func__); 585 debug_log(DBG_PROCS, "Enter %s\n", __func__);
586 586
587 tape->postponed_rq = HWGROUP(drive)->rq; 587 tape->postponed_rq = drive->hwif->rq;
588
588 ide_stall_queue(drive, tape->dsc_poll_freq); 589 ide_stall_queue(drive, tape->dsc_poll_freq);
589} 590}
590 591
@@ -2312,7 +2313,7 @@ static const struct ide_proc_devset *ide_tape_proc_devsets(ide_drive_t *drive)
2312 2313
2313static int ide_tape_probe(ide_drive_t *); 2314static int ide_tape_probe(ide_drive_t *);
2314 2315
2315static ide_driver_t idetape_driver = { 2316static struct ide_driver idetape_driver = {
2316 .gen_driver = { 2317 .gen_driver = {
2317 .owner = THIS_MODULE, 2318 .owner = THIS_MODULE,
2318 .name = "ide-tape", 2319 .name = "ide-tape",
@@ -2323,7 +2324,6 @@ static ide_driver_t idetape_driver = {
2323 .version = IDETAPE_VERSION, 2324 .version = IDETAPE_VERSION,
2324 .do_request = idetape_do_request, 2325 .do_request = idetape_do_request,
2325 .end_request = idetape_end_request, 2326 .end_request = idetape_end_request,
2326 .error = __ide_error,
2327#ifdef CONFIG_IDE_PROC_FS 2327#ifdef CONFIG_IDE_PROC_FS
2328 .proc_entries = ide_tape_proc_entries, 2328 .proc_entries = ide_tape_proc_entries,
2329 .proc_devsets = ide_tape_proc_devsets, 2329 .proc_devsets = ide_tape_proc_devsets,
diff --git a/drivers/ide/ide-taskfile.c b/drivers/ide/ide-taskfile.c
index bf4fb9d8d176..16138bce84a7 100644
--- a/drivers/ide/ide-taskfile.c
+++ b/drivers/ide/ide-taskfile.c
@@ -58,7 +58,7 @@ static ide_startstop_t task_in_intr(ide_drive_t *);
58 58
59ide_startstop_t do_rw_taskfile (ide_drive_t *drive, ide_task_t *task) 59ide_startstop_t do_rw_taskfile (ide_drive_t *drive, ide_task_t *task)
60{ 60{
61 ide_hwif_t *hwif = HWIF(drive); 61 ide_hwif_t *hwif = drive->hwif;
62 struct ide_taskfile *tf = &task->tf; 62 struct ide_taskfile *tf = &task->tf;
63 ide_handler_t *handler = NULL; 63 ide_handler_t *handler = NULL;
64 const struct ide_tp_ops *tp_ops = hwif->tp_ops; 64 const struct ide_tp_ops *tp_ops = hwif->tp_ops;
@@ -309,9 +309,9 @@ static ide_startstop_t task_error(ide_drive_t *drive, struct request *rq,
309 } 309 }
310 310
311 if (sectors > 0) { 311 if (sectors > 0) {
312 ide_driver_t *drv; 312 struct ide_driver *drv;
313 313
314 drv = *(ide_driver_t **)rq->rq_disk->private_data; 314 drv = *(struct ide_driver **)rq->rq_disk->private_data;
315 drv->end_request(drive, 1, sectors); 315 drv->end_request(drive, 1, sectors);
316 } 316 }
317 } 317 }
@@ -328,9 +328,9 @@ void task_end_request(ide_drive_t *drive, struct request *rq, u8 stat)
328 } 328 }
329 329
330 if (rq->rq_disk) { 330 if (rq->rq_disk) {
331 ide_driver_t *drv; 331 struct ide_driver *drv;
332 332
333 drv = *(ide_driver_t **)rq->rq_disk->private_data;; 333 drv = *(struct ide_driver **)rq->rq_disk->private_data;;
334 drv->end_request(drive, 1, rq->nr_sectors); 334 drv->end_request(drive, 1, rq->nr_sectors);
335 } else 335 } else
336 ide_end_request(drive, 1, rq->nr_sectors); 336 ide_end_request(drive, 1, rq->nr_sectors);
@@ -361,7 +361,7 @@ static ide_startstop_t task_in_unexpected(ide_drive_t *drive, struct request *rq
361static ide_startstop_t task_in_intr(ide_drive_t *drive) 361static ide_startstop_t task_in_intr(ide_drive_t *drive)
362{ 362{
363 ide_hwif_t *hwif = drive->hwif; 363 ide_hwif_t *hwif = drive->hwif;
364 struct request *rq = hwif->hwgroup->rq; 364 struct request *rq = hwif->rq;
365 u8 stat = hwif->tp_ops->read_status(hwif); 365 u8 stat = hwif->tp_ops->read_status(hwif);
366 366
367 /* Error? */ 367 /* Error? */
@@ -395,7 +395,7 @@ static ide_startstop_t task_in_intr(ide_drive_t *drive)
395static ide_startstop_t task_out_intr (ide_drive_t *drive) 395static ide_startstop_t task_out_intr (ide_drive_t *drive)
396{ 396{
397 ide_hwif_t *hwif = drive->hwif; 397 ide_hwif_t *hwif = drive->hwif;
398 struct request *rq = HWGROUP(drive)->rq; 398 struct request *rq = hwif->rq;
399 u8 stat = hwif->tp_ops->read_status(hwif); 399 u8 stat = hwif->tp_ops->read_status(hwif);
400 400
401 if (!OK_STAT(stat, DRIVE_READY, drive->bad_wstat)) 401 if (!OK_STAT(stat, DRIVE_READY, drive->bad_wstat))
diff --git a/drivers/ide/ide.c b/drivers/ide/ide.c
index 46a2d4ca812b..258805da15c3 100644
--- a/drivers/ide/ide.c
+++ b/drivers/ide/ide.c
@@ -60,179 +60,8 @@
60#include <linux/completion.h> 60#include <linux/completion.h>
61#include <linux/device.h> 61#include <linux/device.h>
62 62
63
64/* default maximum number of failures */
65#define IDE_DEFAULT_MAX_FAILURES 1
66
67struct class *ide_port_class; 63struct class *ide_port_class;
68 64
69static const u8 ide_hwif_to_major[] = { IDE0_MAJOR, IDE1_MAJOR,
70 IDE2_MAJOR, IDE3_MAJOR,
71 IDE4_MAJOR, IDE5_MAJOR,
72 IDE6_MAJOR, IDE7_MAJOR,
73 IDE8_MAJOR, IDE9_MAJOR };
74
75DEFINE_MUTEX(ide_cfg_mtx);
76
77static void ide_port_init_devices_data(ide_hwif_t *);
78
79/*
80 * Do not even *think* about calling this!
81 */
82void ide_init_port_data(ide_hwif_t *hwif, unsigned int index)
83{
84 /* bulk initialize hwif & drive info with zeros */
85 memset(hwif, 0, sizeof(ide_hwif_t));
86
87 /* fill in any non-zero initial values */
88 hwif->index = index;
89 hwif->major = ide_hwif_to_major[index];
90
91 hwif->name[0] = 'i';
92 hwif->name[1] = 'd';
93 hwif->name[2] = 'e';
94 hwif->name[3] = '0' + index;
95
96 init_completion(&hwif->gendev_rel_comp);
97
98 hwif->tp_ops = &default_tp_ops;
99
100 ide_port_init_devices_data(hwif);
101}
102
103static void ide_port_init_devices_data(ide_hwif_t *hwif)
104{
105 int unit;
106
107 for (unit = 0; unit < MAX_DRIVES; ++unit) {
108 ide_drive_t *drive = &hwif->drives[unit];
109 u8 j = (hwif->index * MAX_DRIVES) + unit;
110
111 memset(drive, 0, sizeof(*drive));
112
113 drive->media = ide_disk;
114 drive->select = (unit << 4) | ATA_DEVICE_OBS;
115 drive->hwif = hwif;
116 drive->ready_stat = ATA_DRDY;
117 drive->bad_wstat = BAD_W_STAT;
118 drive->special.b.recalibrate = 1;
119 drive->special.b.set_geometry = 1;
120 drive->name[0] = 'h';
121 drive->name[1] = 'd';
122 drive->name[2] = 'a' + j;
123 drive->max_failures = IDE_DEFAULT_MAX_FAILURES;
124
125 INIT_LIST_HEAD(&drive->list);
126 init_completion(&drive->gendev_rel_comp);
127 }
128}
129
130static void __ide_port_unregister_devices(ide_hwif_t *hwif)
131{
132 int i;
133
134 for (i = 0; i < MAX_DRIVES; i++) {
135 ide_drive_t *drive = &hwif->drives[i];
136
137 if (drive->dev_flags & IDE_DFLAG_PRESENT) {
138 device_unregister(&drive->gendev);
139 wait_for_completion(&drive->gendev_rel_comp);
140 }
141 }
142}
143
144void ide_port_unregister_devices(ide_hwif_t *hwif)
145{
146 mutex_lock(&ide_cfg_mtx);
147 __ide_port_unregister_devices(hwif);
148 hwif->present = 0;
149 ide_port_init_devices_data(hwif);
150 mutex_unlock(&ide_cfg_mtx);
151}
152EXPORT_SYMBOL_GPL(ide_port_unregister_devices);
153
154/**
155 * ide_unregister - free an IDE interface
156 * @hwif: IDE interface
157 *
158 * Perform the final unregister of an IDE interface. At the moment
159 * we don't refcount interfaces so this will also get split up.
160 *
161 * Locking:
162 * The caller must not hold the IDE locks
163 * The drive present/vanishing is not yet properly locked
164 * Take care with the callbacks. These have been split to avoid
165 * deadlocking the IDE layer. The shutdown callback is called
166 * before we take the lock and free resources. It is up to the
167 * caller to be sure there is no pending I/O here, and that
168 * the interface will not be reopened (present/vanishing locking
169 * isn't yet done BTW). After we commit to the final kill we
170 * call the cleanup callback with the ide locks held.
171 *
172 * Unregister restores the hwif structures to the default state.
173 * This is raving bonkers.
174 */
175
176void ide_unregister(ide_hwif_t *hwif)
177{
178 ide_hwif_t *g;
179 ide_hwgroup_t *hwgroup;
180 int irq_count = 0;
181
182 BUG_ON(in_interrupt());
183 BUG_ON(irqs_disabled());
184
185 mutex_lock(&ide_cfg_mtx);
186
187 if (hwif->present) {
188 __ide_port_unregister_devices(hwif);
189 hwif->present = 0;
190 }
191
192 ide_proc_unregister_port(hwif);
193
194 hwgroup = hwif->hwgroup;
195 /*
196 * free the irq if we were the only hwif using it
197 */
198 g = hwgroup->hwif;
199 do {
200 if (g->irq == hwif->irq)
201 ++irq_count;
202 g = g->next;
203 } while (g != hwgroup->hwif);
204 if (irq_count == 1)
205 free_irq(hwif->irq, hwgroup);
206
207 ide_remove_port_from_hwgroup(hwif);
208
209 device_unregister(hwif->portdev);
210 device_unregister(&hwif->gendev);
211 wait_for_completion(&hwif->gendev_rel_comp);
212
213 /*
214 * Remove us from the kernel's knowledge
215 */
216 blk_unregister_region(MKDEV(hwif->major, 0), MAX_DRIVES<<PARTN_BITS);
217 kfree(hwif->sg_table);
218 unregister_blkdev(hwif->major, hwif->name);
219
220 ide_release_dma_engine(hwif);
221
222 mutex_unlock(&ide_cfg_mtx);
223}
224
225void ide_init_port_hw(ide_hwif_t *hwif, hw_regs_t *hw)
226{
227 memcpy(&hwif->io_ports, &hw->io_ports, sizeof(hwif->io_ports));
228 hwif->irq = hw->irq;
229 hwif->chipset = hw->chipset;
230 hwif->dev = hw->dev;
231 hwif->gendev.parent = hw->parent ? hw->parent : hw->dev;
232 hwif->ack_intr = hw->ack_intr;
233 hwif->config_data = hw->config;
234}
235
236/* 65/*
237 * Locks for IDE setting functionality 66 * Locks for IDE setting functionality
238 */ 67 */
@@ -330,7 +159,6 @@ static int set_pio_mode_abuse(ide_hwif_t *hwif, u8 req_pio)
330static int set_pio_mode(ide_drive_t *drive, int arg) 159static int set_pio_mode(ide_drive_t *drive, int arg)
331{ 160{
332 ide_hwif_t *hwif = drive->hwif; 161 ide_hwif_t *hwif = drive->hwif;
333 ide_hwgroup_t *hwgroup = hwif->hwgroup;
334 const struct ide_port_ops *port_ops = hwif->port_ops; 162 const struct ide_port_ops *port_ops = hwif->port_ops;
335 163
336 if (arg < 0 || arg > 255) 164 if (arg < 0 || arg > 255)
@@ -345,9 +173,9 @@ static int set_pio_mode(ide_drive_t *drive, int arg)
345 unsigned long flags; 173 unsigned long flags;
346 174
347 /* take lock for IDE_DFLAG_[NO_]UNMASK/[NO_]IO_32BIT */ 175 /* take lock for IDE_DFLAG_[NO_]UNMASK/[NO_]IO_32BIT */
348 spin_lock_irqsave(&hwgroup->lock, flags); 176 spin_lock_irqsave(&hwif->lock, flags);
349 port_ops->set_pio_mode(drive, arg); 177 port_ops->set_pio_mode(drive, arg);
350 spin_unlock_irqrestore(&hwgroup->lock, flags); 178 spin_unlock_irqrestore(&hwif->lock, flags);
351 } else 179 } else
352 port_ops->set_pio_mode(drive, arg); 180 port_ops->set_pio_mode(drive, arg);
353 } else { 181 } else {
@@ -453,7 +281,7 @@ static int ide_uevent(struct device *dev, struct kobj_uevent_env *env)
453static int generic_ide_probe(struct device *dev) 281static int generic_ide_probe(struct device *dev)
454{ 282{
455 ide_drive_t *drive = to_ide_device(dev); 283 ide_drive_t *drive = to_ide_device(dev);
456 ide_driver_t *drv = to_ide_driver(dev->driver); 284 struct ide_driver *drv = to_ide_driver(dev->driver);
457 285
458 return drv->probe ? drv->probe(drive) : -ENODEV; 286 return drv->probe ? drv->probe(drive) : -ENODEV;
459} 287}
@@ -461,7 +289,7 @@ static int generic_ide_probe(struct device *dev)
461static int generic_ide_remove(struct device *dev) 289static int generic_ide_remove(struct device *dev)
462{ 290{
463 ide_drive_t *drive = to_ide_device(dev); 291 ide_drive_t *drive = to_ide_device(dev);
464 ide_driver_t *drv = to_ide_driver(dev->driver); 292 struct ide_driver *drv = to_ide_driver(dev->driver);
465 293
466 if (drv->remove) 294 if (drv->remove)
467 drv->remove(drive); 295 drv->remove(drive);
@@ -472,7 +300,7 @@ static int generic_ide_remove(struct device *dev)
472static void generic_ide_shutdown(struct device *dev) 300static void generic_ide_shutdown(struct device *dev)
473{ 301{
474 ide_drive_t *drive = to_ide_device(dev); 302 ide_drive_t *drive = to_ide_device(dev);
475 ide_driver_t *drv = to_ide_driver(dev->driver); 303 struct ide_driver *drv = to_ide_driver(dev->driver);
476 304
477 if (dev->driver && drv->shutdown) 305 if (dev->driver && drv->shutdown)
478 drv->shutdown(drive); 306 drv->shutdown(drive);
@@ -660,6 +488,7 @@ MODULE_PARM_DESC(ignore_cable, "ignore cable detection");
660 488
661void ide_port_apply_params(ide_hwif_t *hwif) 489void ide_port_apply_params(ide_hwif_t *hwif)
662{ 490{
491 ide_drive_t *drive;
663 int i; 492 int i;
664 493
665 if (ide_ignore_cable & (1 << hwif->index)) { 494 if (ide_ignore_cable & (1 << hwif->index)) {
@@ -668,8 +497,8 @@ void ide_port_apply_params(ide_hwif_t *hwif)
668 hwif->cbl = ATA_CBL_PATA40_SHORT; 497 hwif->cbl = ATA_CBL_PATA40_SHORT;
669 } 498 }
670 499
671 for (i = 0; i < MAX_DRIVES; i++) 500 ide_port_for_each_dev(i, drive, hwif)
672 ide_dev_apply_params(&hwif->drives[i], i); 501 ide_dev_apply_params(drive, i);
673} 502}
674 503
675/* 504/*
diff --git a/drivers/ide/it8172.c b/drivers/ide/it8172.c
new file mode 100644
index 000000000000..e021078cd06b
--- /dev/null
+++ b/drivers/ide/it8172.c
@@ -0,0 +1,166 @@
1/*
2 *
3 * BRIEF MODULE DESCRIPTION
4 * IT8172 IDE controller support
5 *
6 * Copyright (C) 2000 MontaVista Software Inc.
7 * Copyright (C) 2008 Shane McDonald
8 *
9 * This program is free software; you can redistribute it and/or modify it
10 * under the terms of the GNU General Public License as published by the
11 * Free Software Foundation; either version 2 of the License, or (at your
12 * option) any later version.
13 *
14 * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
15 * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
16 * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN
17 * NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
18 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
19 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
20 * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
21 * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
22 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
23 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
24 *
25 * You should have received a copy of the GNU General Public License along
26 * with this program; if not, write to the Free Software Foundation, Inc.,
27 * 675 Mass Ave, Cambridge, MA 02139, USA.
28 */
29
30#include <linux/module.h>
31#include <linux/types.h>
32#include <linux/kernel.h>
33#include <linux/ioport.h>
34#include <linux/pci.h>
35#include <linux/ide.h>
36#include <linux/init.h>
37
38#define DRV_NAME "IT8172"
39
40static void it8172_set_pio_mode(ide_drive_t *drive, const u8 pio)
41{
42 ide_hwif_t *hwif = drive->hwif;
43 struct pci_dev *dev = to_pci_dev(hwif->dev);
44 u16 drive_enables;
45 u32 drive_timing;
46
47 /*
48 * The highest value of DIOR/DIOW pulse width and recovery time
49 * that can be set in the IT8172 is 8 PCI clock cycles. As a result,
50 * it cannot be configured for PIO mode 0. This table sets these
51 * parameters to the maximum supported by the IT8172.
52 */
53 static const u8 timings[] = { 0x3f, 0x3c, 0x1b, 0x12, 0x0a };
54
55 pci_read_config_word(dev, 0x40, &drive_enables);
56 pci_read_config_dword(dev, 0x44, &drive_timing);
57
58 /*
59 * Enable port 0x44. The IT8172 spec is confused; it calls
60 * this register the "Slave IDE Timing Register", but in fact,
61 * it controls timing for both master and slave drives.
62 */
63 drive_enables |= 0x4000;
64
65 drive_enables &= drive->dn ? 0xc006 : 0xc060;
66 if (drive->media == ide_disk)
67 /* enable prefetch */
68 drive_enables |= 0x0004 << (drive->dn * 4);
69 if (ata_id_has_iordy(drive->id))
70 /* enable IORDY sample-point */
71 drive_enables |= 0x0002 << (drive->dn * 4);
72
73 drive_timing &= drive->dn ? 0x00003f00 : 0x000fc000;
74 drive_timing |= timings[pio] << (drive->dn * 6 + 8);
75
76 pci_write_config_word(dev, 0x40, drive_enables);
77 pci_write_config_dword(dev, 0x44, drive_timing);
78}
79
80static void it8172_set_dma_mode(ide_drive_t *drive, const u8 speed)
81{
82 ide_hwif_t *hwif = drive->hwif;
83 struct pci_dev *dev = to_pci_dev(hwif->dev);
84 int a_speed = 3 << (drive->dn * 4);
85 int u_flag = 1 << drive->dn;
86 int u_speed = 0;
87 u8 reg48, reg4a;
88
89 pci_read_config_byte(dev, 0x48, &reg48);
90 pci_read_config_byte(dev, 0x4a, &reg4a);
91
92 if (speed >= XFER_UDMA_0) {
93 u8 udma = speed - XFER_UDMA_0;
94 u_speed = udma << (drive->dn * 4);
95
96 pci_write_config_byte(dev, 0x48, reg48 | u_flag);
97 reg4a &= ~a_speed;
98 pci_write_config_byte(dev, 0x4a, reg4a | u_speed);
99 } else {
100 const u8 mwdma_to_pio[] = { 0, 3, 4 };
101 u8 pio;
102
103 pci_write_config_byte(dev, 0x48, reg48 & ~u_flag);
104 pci_write_config_byte(dev, 0x4a, reg4a & ~a_speed);
105
106 pio = mwdma_to_pio[speed - XFER_MW_DMA_0];
107
108 it8172_set_pio_mode(drive, pio);
109 }
110}
111
112
113static const struct ide_port_ops it8172_port_ops = {
114 .set_pio_mode = it8172_set_pio_mode,
115 .set_dma_mode = it8172_set_dma_mode,
116};
117
118static const struct ide_port_info it8172_port_info __devinitdata = {
119 .name = DRV_NAME,
120 .port_ops = &it8172_port_ops,
121 .enablebits = { {0x41, 0x80, 0x80}, {0x00, 0x00, 0x00} },
122 .host_flags = IDE_HFLAG_SINGLE,
123 .pio_mask = ATA_PIO4 & ~ATA_PIO0,
124 .mwdma_mask = ATA_MWDMA2,
125 .udma_mask = ATA_UDMA2,
126};
127
128static int __devinit it8172_init_one(struct pci_dev *dev,
129 const struct pci_device_id *id)
130{
131 if ((dev->class >> 8) != PCI_CLASS_STORAGE_IDE)
132 return -ENODEV; /* IT8172 is more than an IDE controller */
133 return ide_pci_init_one(dev, &it8172_port_info, NULL);
134}
135
136static struct pci_device_id it8172_pci_tbl[] = {
137 { PCI_VDEVICE(ITE, PCI_DEVICE_ID_ITE_8172), 0 },
138 { 0, },
139};
140MODULE_DEVICE_TABLE(pci, it8172_pci_tbl);
141
142static struct pci_driver it8172_pci_driver = {
143 .name = "IT8172_IDE",
144 .id_table = it8172_pci_tbl,
145 .probe = it8172_init_one,
146 .remove = ide_pci_remove,
147 .suspend = ide_pci_suspend,
148 .resume = ide_pci_resume,
149};
150
151static int __init it8172_ide_init(void)
152{
153 return ide_pci_register_driver(&it8172_pci_driver);
154}
155
156static void __exit it8172_ide_exit(void)
157{
158 pci_unregister_driver(&it8172_pci_driver);
159}
160
161module_init(it8172_ide_init);
162module_exit(it8172_ide_exit);
163
164MODULE_AUTHOR("Steve Longerbeam");
165MODULE_DESCRIPTION("PCI driver module for ITE 8172 IDE");
166MODULE_LICENSE("GPL");
diff --git a/drivers/ide/it8213.c b/drivers/ide/it8213.c
index 7c2feeb3c5ec..d7969b6d139e 100644
--- a/drivers/ide/it8213.c
+++ b/drivers/ide/it8213.c
@@ -25,7 +25,7 @@
25 25
26static void it8213_set_pio_mode(ide_drive_t *drive, const u8 pio) 26static void it8213_set_pio_mode(ide_drive_t *drive, const u8 pio)
27{ 27{
28 ide_hwif_t *hwif = HWIF(drive); 28 ide_hwif_t *hwif = drive->hwif;
29 struct pci_dev *dev = to_pci_dev(hwif->dev); 29 struct pci_dev *dev = to_pci_dev(hwif->dev);
30 int is_slave = drive->dn & 1; 30 int is_slave = drive->dn & 1;
31 int master_port = 0x40; 31 int master_port = 0x40;
@@ -82,7 +82,7 @@ static void it8213_set_pio_mode(ide_drive_t *drive, const u8 pio)
82 82
83static void it8213_set_dma_mode(ide_drive_t *drive, const u8 speed) 83static void it8213_set_dma_mode(ide_drive_t *drive, const u8 speed)
84{ 84{
85 ide_hwif_t *hwif = HWIF(drive); 85 ide_hwif_t *hwif = drive->hwif;
86 struct pci_dev *dev = to_pci_dev(hwif->dev); 86 struct pci_dev *dev = to_pci_dev(hwif->dev);
87 u8 maslave = 0x40; 87 u8 maslave = 0x40;
88 int a_speed = 3 << (drive->dn * 4); 88 int a_speed = 3 << (drive->dn * 4);
diff --git a/drivers/ide/it821x.c b/drivers/ide/it821x.c
index ef004089761b..0be27ac1f077 100644
--- a/drivers/ide/it821x.c
+++ b/drivers/ide/it821x.c
@@ -167,12 +167,10 @@ static void it821x_clock_strategy(ide_drive_t *drive)
167 ide_hwif_t *hwif = drive->hwif; 167 ide_hwif_t *hwif = drive->hwif;
168 struct pci_dev *dev = to_pci_dev(hwif->dev); 168 struct pci_dev *dev = to_pci_dev(hwif->dev);
169 struct it821x_dev *itdev = ide_get_hwifdata(hwif); 169 struct it821x_dev *itdev = ide_get_hwifdata(hwif);
170 ide_drive_t *pair; 170 ide_drive_t *pair = ide_get_pair_dev(drive);
171 int clock, altclock, sel = 0; 171 int clock, altclock, sel = 0;
172 u8 unit = drive->dn & 1, v; 172 u8 unit = drive->dn & 1, v;
173 173
174 pair = &hwif->drives[1 - unit];
175
176 if(itdev->want[0][0] > itdev->want[1][0]) { 174 if(itdev->want[0][0] > itdev->want[1][0]) {
177 clock = itdev->want[0][1]; 175 clock = itdev->want[0][1];
178 altclock = itdev->want[1][1]; 176 altclock = itdev->want[1][1];
@@ -239,15 +237,13 @@ static void it821x_set_pio_mode(ide_drive_t *drive, const u8 pio)
239{ 237{
240 ide_hwif_t *hwif = drive->hwif; 238 ide_hwif_t *hwif = drive->hwif;
241 struct it821x_dev *itdev = ide_get_hwifdata(hwif); 239 struct it821x_dev *itdev = ide_get_hwifdata(hwif);
242 ide_drive_t *pair; 240 ide_drive_t *pair = ide_get_pair_dev(drive);
243 u8 unit = drive->dn & 1, set_pio = pio; 241 u8 unit = drive->dn & 1, set_pio = pio;
244 242
245 /* Spec says 89 ref driver uses 88 */ 243 /* Spec says 89 ref driver uses 88 */
246 static u16 pio_timings[]= { 0xAA88, 0xA382, 0xA181, 0x3332, 0x3121 }; 244 static u16 pio_timings[]= { 0xAA88, 0xA382, 0xA181, 0x3332, 0x3121 };
247 static u8 pio_want[] = { ATA_66, ATA_66, ATA_66, ATA_66, ATA_ANY }; 245 static u8 pio_want[] = { ATA_66, ATA_66, ATA_66, ATA_66, ATA_ANY };
248 246
249 pair = &hwif->drives[1 - unit];
250
251 /* 247 /*
252 * Compute the best PIO mode we can for a given device. We must 248 * Compute the best PIO mode we can for a given device. We must
253 * pick a speed that does not cause problems with the other device 249 * pick a speed that does not cause problems with the other device
@@ -279,7 +275,7 @@ static void it821x_set_pio_mode(ide_drive_t *drive, const u8 pio)
279 * the shared MWDMA/PIO timing register. 275 * the shared MWDMA/PIO timing register.
280 */ 276 */
281 277
282static void it821x_tune_mwdma (ide_drive_t *drive, byte mode_wanted) 278static void it821x_tune_mwdma(ide_drive_t *drive, u8 mode_wanted)
283{ 279{
284 ide_hwif_t *hwif = drive->hwif; 280 ide_hwif_t *hwif = drive->hwif;
285 struct pci_dev *dev = to_pci_dev(hwif->dev); 281 struct pci_dev *dev = to_pci_dev(hwif->dev);
@@ -316,7 +312,7 @@ static void it821x_tune_mwdma (ide_drive_t *drive, byte mode_wanted)
316 * controller when doing UDMA modes in pass through. 312 * controller when doing UDMA modes in pass through.
317 */ 313 */
318 314
319static void it821x_tune_udma (ide_drive_t *drive, byte mode_wanted) 315static void it821x_tune_udma(ide_drive_t *drive, u8 mode_wanted)
320{ 316{
321 ide_hwif_t *hwif = drive->hwif; 317 ide_hwif_t *hwif = drive->hwif;
322 struct pci_dev *dev = to_pci_dev(hwif->dev); 318 struct pci_dev *dev = to_pci_dev(hwif->dev);
@@ -516,6 +512,7 @@ static struct ide_dma_ops it821x_pass_through_dma_ops = {
516 .dma_test_irq = ide_dma_test_irq, 512 .dma_test_irq = ide_dma_test_irq,
517 .dma_timeout = ide_dma_timeout, 513 .dma_timeout = ide_dma_timeout,
518 .dma_lost_irq = ide_dma_lost_irq, 514 .dma_lost_irq = ide_dma_lost_irq,
515 .dma_sff_read_status = ide_dma_sff_read_status,
519}; 516};
520 517
521/** 518/**
diff --git a/drivers/ide/ns87415.c b/drivers/ide/ns87415.c
index 13789060f407..83643ed9a426 100644
--- a/drivers/ide/ns87415.c
+++ b/drivers/ide/ns87415.c
@@ -56,7 +56,7 @@ static u8 superio_read_status(ide_hwif_t *hwif)
56 return superio_ide_inb(hwif->io_ports.status_addr); 56 return superio_ide_inb(hwif->io_ports.status_addr);
57} 57}
58 58
59static u8 superio_read_sff_dma_status(ide_hwif_t *hwif) 59static u8 superio_dma_sff_read_status(ide_hwif_t *hwif)
60{ 60{
61 return superio_ide_inb(hwif->dma_base + ATA_DMA_STATUS); 61 return superio_ide_inb(hwif->dma_base + ATA_DMA_STATUS);
62} 62}
@@ -109,7 +109,6 @@ static const struct ide_tp_ops superio_tp_ops = {
109 .exec_command = ide_exec_command, 109 .exec_command = ide_exec_command,
110 .read_status = superio_read_status, 110 .read_status = superio_read_status,
111 .read_altstatus = ide_read_altstatus, 111 .read_altstatus = ide_read_altstatus,
112 .read_sff_dma_status = superio_read_sff_dma_status,
113 112
114 .set_irq = ide_set_irq, 113 .set_irq = ide_set_irq,
115 114
@@ -132,18 +131,20 @@ static void __devinit superio_init_iops(struct hwif_s *hwif)
132 tmp = superio_ide_inb(dma_stat); 131 tmp = superio_ide_inb(dma_stat);
133 outb(tmp | 0x66, dma_stat); 132 outb(tmp | 0x66, dma_stat);
134} 133}
134#else
135#define superio_dma_sff_read_status ide_dma_sff_read_status
135#endif 136#endif
136 137
137static unsigned int ns87415_count = 0, ns87415_control[MAX_HWIFS] = { 0 }; 138static unsigned int ns87415_count = 0, ns87415_control[MAX_HWIFS] = { 0 };
138 139
139/* 140/*
140 * This routine either enables/disables (according to IDE_DFLAG_PRESENT) 141 * This routine either enables/disables (according to IDE_DFLAG_PRESENT)
141 * the IRQ associated with the port (HWIF(drive)), 142 * the IRQ associated with the port,
142 * and selects either PIO or DMA handshaking for the next I/O operation. 143 * and selects either PIO or DMA handshaking for the next I/O operation.
143 */ 144 */
144static void ns87415_prepare_drive (ide_drive_t *drive, unsigned int use_dma) 145static void ns87415_prepare_drive (ide_drive_t *drive, unsigned int use_dma)
145{ 146{
146 ide_hwif_t *hwif = HWIF(drive); 147 ide_hwif_t *hwif = drive->hwif;
147 struct pci_dev *dev = to_pci_dev(hwif->dev); 148 struct pci_dev *dev = to_pci_dev(hwif->dev);
148 unsigned int bit, other, new, *old = (unsigned int *) hwif->select_data; 149 unsigned int bit, other, new, *old = (unsigned int *) hwif->select_data;
149 unsigned long flags; 150 unsigned long flags;
@@ -197,11 +198,11 @@ static void ns87415_selectproc (ide_drive_t *drive)
197 198
198static int ns87415_dma_end(ide_drive_t *drive) 199static int ns87415_dma_end(ide_drive_t *drive)
199{ 200{
200 ide_hwif_t *hwif = HWIF(drive); 201 ide_hwif_t *hwif = drive->hwif;
201 u8 dma_stat = 0, dma_cmd = 0; 202 u8 dma_stat = 0, dma_cmd = 0;
202 203
203 drive->waiting_for_dma = 0; 204 drive->waiting_for_dma = 0;
204 dma_stat = hwif->tp_ops->read_sff_dma_status(hwif); 205 dma_stat = hwif->dma_ops->dma_sff_read_status(hwif);
205 /* get DMA command mode */ 206 /* get DMA command mode */
206 dma_cmd = inb(hwif->dma_base + ATA_DMA_CMD); 207 dma_cmd = inb(hwif->dma_base + ATA_DMA_CMD);
207 /* stop DMA */ 208 /* stop DMA */
@@ -308,6 +309,7 @@ static const struct ide_dma_ops ns87415_dma_ops = {
308 .dma_test_irq = ide_dma_test_irq, 309 .dma_test_irq = ide_dma_test_irq,
309 .dma_lost_irq = ide_dma_lost_irq, 310 .dma_lost_irq = ide_dma_lost_irq,
310 .dma_timeout = ide_dma_timeout, 311 .dma_timeout = ide_dma_timeout,
312 .dma_sff_read_status = superio_dma_sff_read_status,
311}; 313};
312 314
313static const struct ide_port_info ns87415_chipset __devinitdata = { 315static const struct ide_port_info ns87415_chipset __devinitdata = {
diff --git a/drivers/ide/palm_bk3710.c b/drivers/ide/palm_bk3710.c
index 122ed3c072fd..a7ac490c9ae3 100644
--- a/drivers/ide/palm_bk3710.c
+++ b/drivers/ide/palm_bk3710.c
@@ -324,8 +324,6 @@ static int __devinit palm_bk3710_init_dma(ide_hwif_t *hwif,
324 324
325 hwif->dma_base = hwif->io_ports.data_addr - IDE_PALM_ATA_PRI_REG_OFFSET; 325 hwif->dma_base = hwif->io_ports.data_addr - IDE_PALM_ATA_PRI_REG_OFFSET;
326 326
327 hwif->dma_ops = &sff_dma_ops;
328
329 return 0; 327 return 0;
330} 328}
331 329
@@ -338,6 +336,7 @@ static const struct ide_port_ops palm_bk3710_ports_ops = {
338static struct ide_port_info __devinitdata palm_bk3710_port_info = { 336static struct ide_port_info __devinitdata palm_bk3710_port_info = {
339 .init_dma = palm_bk3710_init_dma, 337 .init_dma = palm_bk3710_init_dma,
340 .port_ops = &palm_bk3710_ports_ops, 338 .port_ops = &palm_bk3710_ports_ops,
339 .dma_ops = &sff_dma_ops,
341 .host_flags = IDE_HFLAG_MMIO, 340 .host_flags = IDE_HFLAG_MMIO,
342 .pio_mask = ATA_PIO4, 341 .pio_mask = ATA_PIO4,
343 .mwdma_mask = ATA_MWDMA2, 342 .mwdma_mask = ATA_MWDMA2,
diff --git a/drivers/ide/pdc202xx_new.c b/drivers/ide/pdc202xx_new.c
index 211ae46e3e0c..f21290c4b447 100644
--- a/drivers/ide/pdc202xx_new.c
+++ b/drivers/ide/pdc202xx_new.c
@@ -143,7 +143,7 @@ static struct udma_timing {
143 143
144static void pdcnew_set_dma_mode(ide_drive_t *drive, const u8 speed) 144static void pdcnew_set_dma_mode(ide_drive_t *drive, const u8 speed)
145{ 145{
146 ide_hwif_t *hwif = HWIF(drive); 146 ide_hwif_t *hwif = drive->hwif;
147 struct pci_dev *dev = to_pci_dev(hwif->dev); 147 struct pci_dev *dev = to_pci_dev(hwif->dev);
148 u8 adj = (drive->dn & 1) ? 0x08 : 0x00; 148 u8 adj = (drive->dn & 1) ? 0x08 : 0x00;
149 149
@@ -219,7 +219,7 @@ static void pdcnew_reset(ide_drive_t *drive)
219 * Deleted this because it is redundant from the caller. 219 * Deleted this because it is redundant from the caller.
220 */ 220 */
221 printk(KERN_WARNING "pdc202xx_new: %s channel reset.\n", 221 printk(KERN_WARNING "pdc202xx_new: %s channel reset.\n",
222 HWIF(drive)->channel ? "Secondary" : "Primary"); 222 drive->hwif->channel ? "Secondary" : "Primary");
223} 223}
224 224
225/** 225/**
diff --git a/drivers/ide/pdc202xx_old.c b/drivers/ide/pdc202xx_old.c
index 624e62e5cc9a..97193323aebf 100644
--- a/drivers/ide/pdc202xx_old.c
+++ b/drivers/ide/pdc202xx_old.c
@@ -39,7 +39,7 @@ static void pdc_old_disable_66MHz_clock(ide_hwif_t *);
39 39
40static void pdc202xx_set_mode(ide_drive_t *drive, const u8 speed) 40static void pdc202xx_set_mode(ide_drive_t *drive, const u8 speed)
41{ 41{
42 ide_hwif_t *hwif = HWIF(drive); 42 ide_hwif_t *hwif = drive->hwif;
43 struct pci_dev *dev = to_pci_dev(hwif->dev); 43 struct pci_dev *dev = to_pci_dev(hwif->dev);
44 u8 drive_pci = 0x60 + (drive->dn << 2); 44 u8 drive_pci = 0x60 + (drive->dn << 2);
45 45
@@ -169,8 +169,8 @@ static void pdc202xx_dma_start(ide_drive_t *drive)
169 if (drive->current_speed > XFER_UDMA_2) 169 if (drive->current_speed > XFER_UDMA_2)
170 pdc_old_enable_66MHz_clock(drive->hwif); 170 pdc_old_enable_66MHz_clock(drive->hwif);
171 if (drive->media != ide_disk || (drive->dev_flags & IDE_DFLAG_LBA48)) { 171 if (drive->media != ide_disk || (drive->dev_flags & IDE_DFLAG_LBA48)) {
172 struct request *rq = HWGROUP(drive)->rq; 172 ide_hwif_t *hwif = drive->hwif;
173 ide_hwif_t *hwif = HWIF(drive); 173 struct request *rq = hwif->rq;
174 unsigned long high_16 = hwif->extra_base - 16; 174 unsigned long high_16 = hwif->extra_base - 16;
175 unsigned long atapi_reg = high_16 + (hwif->channel ? 0x24 : 0x20); 175 unsigned long atapi_reg = high_16 + (hwif->channel ? 0x24 : 0x20);
176 u32 word_count = 0; 176 u32 word_count = 0;
@@ -189,7 +189,7 @@ static void pdc202xx_dma_start(ide_drive_t *drive)
189static int pdc202xx_dma_end(ide_drive_t *drive) 189static int pdc202xx_dma_end(ide_drive_t *drive)
190{ 190{
191 if (drive->media != ide_disk || (drive->dev_flags & IDE_DFLAG_LBA48)) { 191 if (drive->media != ide_disk || (drive->dev_flags & IDE_DFLAG_LBA48)) {
192 ide_hwif_t *hwif = HWIF(drive); 192 ide_hwif_t *hwif = drive->hwif;
193 unsigned long high_16 = hwif->extra_base - 16; 193 unsigned long high_16 = hwif->extra_base - 16;
194 unsigned long atapi_reg = high_16 + (hwif->channel ? 0x24 : 0x20); 194 unsigned long atapi_reg = high_16 + (hwif->channel ? 0x24 : 0x20);
195 u8 clock = 0; 195 u8 clock = 0;
@@ -205,7 +205,7 @@ static int pdc202xx_dma_end(ide_drive_t *drive)
205 205
206static int pdc202xx_dma_test_irq(ide_drive_t *drive) 206static int pdc202xx_dma_test_irq(ide_drive_t *drive)
207{ 207{
208 ide_hwif_t *hwif = HWIF(drive); 208 ide_hwif_t *hwif = drive->hwif;
209 unsigned long high_16 = hwif->extra_base - 16; 209 unsigned long high_16 = hwif->extra_base - 16;
210 u8 dma_stat = inb(hwif->dma_base + ATA_DMA_STATUS); 210 u8 dma_stat = inb(hwif->dma_base + ATA_DMA_STATUS);
211 u8 sc1d = inb(high_16 + 0x001d); 211 u8 sc1d = inb(high_16 + 0x001d);
@@ -243,7 +243,7 @@ static void pdc202xx_reset_host (ide_hwif_t *hwif)
243 243
244static void pdc202xx_reset (ide_drive_t *drive) 244static void pdc202xx_reset (ide_drive_t *drive)
245{ 245{
246 ide_hwif_t *hwif = HWIF(drive); 246 ide_hwif_t *hwif = drive->hwif;
247 ide_hwif_t *mate = hwif->mate; 247 ide_hwif_t *mate = hwif->mate;
248 248
249 pdc202xx_reset_host(hwif); 249 pdc202xx_reset_host(hwif);
@@ -337,6 +337,7 @@ static const struct ide_dma_ops pdc20246_dma_ops = {
337 .dma_test_irq = pdc202xx_dma_test_irq, 337 .dma_test_irq = pdc202xx_dma_test_irq,
338 .dma_lost_irq = pdc202xx_dma_lost_irq, 338 .dma_lost_irq = pdc202xx_dma_lost_irq,
339 .dma_timeout = pdc202xx_dma_timeout, 339 .dma_timeout = pdc202xx_dma_timeout,
340 .dma_sff_read_status = ide_dma_sff_read_status,
340}; 341};
341 342
342static const struct ide_dma_ops pdc2026x_dma_ops = { 343static const struct ide_dma_ops pdc2026x_dma_ops = {
@@ -348,6 +349,7 @@ static const struct ide_dma_ops pdc2026x_dma_ops = {
348 .dma_test_irq = pdc202xx_dma_test_irq, 349 .dma_test_irq = pdc202xx_dma_test_irq,
349 .dma_lost_irq = pdc202xx_dma_lost_irq, 350 .dma_lost_irq = pdc202xx_dma_lost_irq,
350 .dma_timeout = pdc202xx_dma_timeout, 351 .dma_timeout = pdc202xx_dma_timeout,
352 .dma_sff_read_status = ide_dma_sff_read_status,
351}; 353};
352 354
353#define DECLARE_PDC2026X_DEV(udma, sectors) \ 355#define DECLARE_PDC2026X_DEV(udma, sectors) \
diff --git a/drivers/ide/piix.c b/drivers/ide/piix.c
index 61d2d920a5cd..f1e2e4ef0d71 100644
--- a/drivers/ide/piix.c
+++ b/drivers/ide/piix.c
@@ -67,7 +67,7 @@ static int no_piix_dma;
67 67
68static void piix_set_pio_mode(ide_drive_t *drive, const u8 pio) 68static void piix_set_pio_mode(ide_drive_t *drive, const u8 pio)
69{ 69{
70 ide_hwif_t *hwif = HWIF(drive); 70 ide_hwif_t *hwif = drive->hwif;
71 struct pci_dev *dev = to_pci_dev(hwif->dev); 71 struct pci_dev *dev = to_pci_dev(hwif->dev);
72 int is_slave = drive->dn & 1; 72 int is_slave = drive->dn & 1;
73 int master_port = hwif->channel ? 0x42 : 0x40; 73 int master_port = hwif->channel ? 0x42 : 0x40;
@@ -136,7 +136,7 @@ static void piix_set_pio_mode(ide_drive_t *drive, const u8 pio)
136 136
137static void piix_set_dma_mode(ide_drive_t *drive, const u8 speed) 137static void piix_set_dma_mode(ide_drive_t *drive, const u8 speed)
138{ 138{
139 ide_hwif_t *hwif = HWIF(drive); 139 ide_hwif_t *hwif = drive->hwif;
140 struct pci_dev *dev = to_pci_dev(hwif->dev); 140 struct pci_dev *dev = to_pci_dev(hwif->dev);
141 u8 maslave = hwif->channel ? 0x42 : 0x40; 141 u8 maslave = hwif->channel ? 0x42 : 0x40;
142 int a_speed = 3 << (drive->dn * 4); 142 int a_speed = 3 << (drive->dn * 4);
@@ -224,7 +224,7 @@ static unsigned int init_chipset_ich(struct pci_dev *dev)
224 */ 224 */
225static void ich_clear_irq(ide_drive_t *drive) 225static void ich_clear_irq(ide_drive_t *drive)
226{ 226{
227 ide_hwif_t *hwif = HWIF(drive); 227 ide_hwif_t *hwif = drive->hwif;
228 u8 dma_stat; 228 u8 dma_stat;
229 229
230 /* 230 /*
@@ -260,6 +260,8 @@ static const struct ich_laptop ich_laptop[] = {
260 { 0x27DF, 0x103C, 0x30A1 }, /* ICH7 on HP Compaq nc2400 */ 260 { 0x27DF, 0x103C, 0x30A1 }, /* ICH7 on HP Compaq nc2400 */
261 { 0x27DF, 0x1071, 0xD221 }, /* ICH7 on Hercules EC-900 */ 261 { 0x27DF, 0x1071, 0xD221 }, /* ICH7 on Hercules EC-900 */
262 { 0x24CA, 0x1025, 0x0061 }, /* ICH4 on Acer Aspire 2023WLMi */ 262 { 0x24CA, 0x1025, 0x0061 }, /* ICH4 on Acer Aspire 2023WLMi */
263 { 0x24CA, 0x1025, 0x003d }, /* ICH4 on ACER TM290 */
264 { 0x266F, 0x1025, 0x0066 }, /* ICH6 on ACER Aspire 1694WLMi */
263 { 0x2653, 0x1043, 0x82D8 }, /* ICH6M on Asus Eee 701 */ 265 { 0x2653, 0x1043, 0x82D8 }, /* ICH6M on Asus Eee 701 */
264 /* end marker */ 266 /* end marker */
265 { 0, } 267 { 0, }
diff --git a/drivers/ide/pmac.c b/drivers/ide/pmac.c
index 7c481bb56fab..74625e821a43 100644
--- a/drivers/ide/pmac.c
+++ b/drivers/ide/pmac.c
@@ -955,7 +955,6 @@ static const struct ide_tp_ops pmac_tp_ops = {
955 .exec_command = pmac_exec_command, 955 .exec_command = pmac_exec_command,
956 .read_status = ide_read_status, 956 .read_status = ide_read_status,
957 .read_altstatus = ide_read_altstatus, 957 .read_altstatus = ide_read_altstatus,
958 .read_sff_dma_status = ide_read_sff_dma_status,
959 958
960 .set_irq = pmac_set_irq, 959 .set_irq = pmac_set_irq,
961 960
@@ -1513,10 +1512,10 @@ use_pio_instead:
1513static int 1512static int
1514pmac_ide_dma_setup(ide_drive_t *drive) 1513pmac_ide_dma_setup(ide_drive_t *drive)
1515{ 1514{
1516 ide_hwif_t *hwif = HWIF(drive); 1515 ide_hwif_t *hwif = drive->hwif;
1517 pmac_ide_hwif_t *pmif = 1516 pmac_ide_hwif_t *pmif =
1518 (pmac_ide_hwif_t *)dev_get_drvdata(hwif->gendev.parent); 1517 (pmac_ide_hwif_t *)dev_get_drvdata(hwif->gendev.parent);
1519 struct request *rq = HWGROUP(drive)->rq; 1518 struct request *rq = hwif->rq;
1520 u8 unit = drive->dn & 1, ata4 = (pmif->kind == controller_kl_ata4); 1519 u8 unit = drive->dn & 1, ata4 = (pmif->kind == controller_kl_ata4);
1521 1520
1522 if (!pmac_ide_build_dmatable(drive, rq)) { 1521 if (!pmac_ide_build_dmatable(drive, rq)) {
@@ -1637,7 +1636,7 @@ pmac_ide_dma_test_irq (ide_drive_t *drive)
1637 break; 1636 break;
1638 if (++timeout > 100) { 1637 if (++timeout > 100) {
1639 printk(KERN_WARNING "ide%d, ide_dma_test_irq \ 1638 printk(KERN_WARNING "ide%d, ide_dma_test_irq \
1640 timeout flushing channel\n", HWIF(drive)->index); 1639 timeout flushing channel\n", hwif->index);
1641 break; 1640 break;
1642 } 1641 }
1643 } 1642 }
diff --git a/drivers/ide/q40ide.c b/drivers/ide/q40ide.c
index 4af4a8ce4cdf..9f9c0b3cc3a3 100644
--- a/drivers/ide/q40ide.c
+++ b/drivers/ide/q40ide.c
@@ -99,7 +99,6 @@ static const struct ide_tp_ops q40ide_tp_ops = {
99 .exec_command = ide_exec_command, 99 .exec_command = ide_exec_command,
100 .read_status = ide_read_status, 100 .read_status = ide_read_status,
101 .read_altstatus = ide_read_altstatus, 101 .read_altstatus = ide_read_altstatus,
102 .read_sff_dma_status = ide_read_sff_dma_status,
103 102
104 .set_irq = ide_set_irq, 103 .set_irq = ide_set_irq,
105 104
diff --git a/drivers/ide/qd65xx.c b/drivers/ide/qd65xx.c
index bc27c7aba936..5b2e3af43c4b 100644
--- a/drivers/ide/qd65xx.c
+++ b/drivers/ide/qd65xx.c
@@ -202,7 +202,8 @@ static void qd6500_set_pio_mode(ide_drive_t *drive, const u8 pio)
202 recovery_time = drive->id[ATA_ID_EIDE_PIO] - 120; 202 recovery_time = drive->id[ATA_ID_EIDE_PIO] - 120;
203 } 203 }
204 204
205 qd_set_timing(drive, qd6500_compute_timing(HWIF(drive), active_time, recovery_time)); 205 qd_set_timing(drive, qd6500_compute_timing(drive->hwif,
206 active_time, recovery_time));
206} 207}
207 208
208static void qd6580_set_pio_mode(ide_drive_t *drive, const u8 pio) 209static void qd6580_set_pio_mode(ide_drive_t *drive, const u8 pio)
@@ -245,11 +246,11 @@ static void qd6580_set_pio_mode(ide_drive_t *drive, const u8 pio)
245 printk(KERN_INFO "%s: PIO mode%d\n", drive->name,pio); 246 printk(KERN_INFO "%s: PIO mode%d\n", drive->name,pio);
246 } 247 }
247 248
248 if (!HWIF(drive)->channel && drive->media != ide_disk) { 249 if (!hwif->channel && drive->media != ide_disk) {
249 outb(0x5f, QD_CONTROL_PORT); 250 outb(0x5f, QD_CONTROL_PORT);
250 printk(KERN_WARNING "%s: ATAPI: disabled read-ahead FIFO " 251 printk(KERN_WARNING "%s: ATAPI: disabled read-ahead FIFO "
251 "and post-write buffer on %s.\n", 252 "and post-write buffer on %s.\n",
252 drive->name, HWIF(drive)->name); 253 drive->name, hwif->name);
253 } 254 }
254 255
255 qd_set_timing(drive, qd6580_compute_timing(active_time, recovery_time)); 256 qd_set_timing(drive, qd6580_compute_timing(active_time, recovery_time));
diff --git a/drivers/ide/qd65xx.h b/drivers/ide/qd65xx.h
index c83dea85e621..6636f9665d16 100644
--- a/drivers/ide/qd65xx.h
+++ b/drivers/ide/qd65xx.h
@@ -31,8 +31,8 @@
31 31
32#define QD_CONFIG(hwif) ((hwif)->config_data & 0x00ff) 32#define QD_CONFIG(hwif) ((hwif)->config_data & 0x00ff)
33 33
34#define QD_TIMING(drive) (byte)(((drive)->drive_data) & 0x00ff) 34#define QD_TIMING(drive) (u8)(((drive)->drive_data) & 0x00ff)
35#define QD_TIMREG(drive) (byte)((((drive)->drive_data) & 0xff00) >> 8) 35#define QD_TIMREG(drive) (u8)((((drive)->drive_data) & 0xff00) >> 8)
36 36
37#define QD6500_DEF_DATA ((QD_TIM1_PORT<<8) | (QD_ID3 ? 0x0c : 0x08)) 37#define QD6500_DEF_DATA ((QD_TIM1_PORT<<8) | (QD_ID3 ? 0x0c : 0x08))
38#define QD6580_DEF_DATA ((QD_TIM1_PORT<<8) | (QD_ID3 ? 0x0a : 0x00)) 38#define QD6580_DEF_DATA ((QD_TIM1_PORT<<8) | (QD_ID3 ? 0x0a : 0x00))
diff --git a/drivers/ide/sc1200.c b/drivers/ide/sc1200.c
index ec7f766ef5e4..dbdd2985a0d8 100644
--- a/drivers/ide/sc1200.c
+++ b/drivers/ide/sc1200.c
@@ -125,7 +125,7 @@ out:
125 125
126static void sc1200_set_dma_mode(ide_drive_t *drive, const u8 mode) 126static void sc1200_set_dma_mode(ide_drive_t *drive, const u8 mode)
127{ 127{
128 ide_hwif_t *hwif = HWIF(drive); 128 ide_hwif_t *hwif = drive->hwif;
129 struct pci_dev *dev = to_pci_dev(hwif->dev); 129 struct pci_dev *dev = to_pci_dev(hwif->dev);
130 unsigned int reg, timings; 130 unsigned int reg, timings;
131 unsigned short pci_clock; 131 unsigned short pci_clock;
@@ -170,9 +170,9 @@ static void sc1200_set_dma_mode(ide_drive_t *drive, const u8 mode)
170 */ 170 */
171static int sc1200_dma_end(ide_drive_t *drive) 171static int sc1200_dma_end(ide_drive_t *drive)
172{ 172{
173 ide_hwif_t *hwif = HWIF(drive); 173 ide_hwif_t *hwif = drive->hwif;
174 unsigned long dma_base = hwif->dma_base; 174 unsigned long dma_base = hwif->dma_base;
175 byte dma_stat; 175 u8 dma_stat;
176 176
177 dma_stat = inb(dma_base+2); /* get DMA status */ 177 dma_stat = inb(dma_base+2); /* get DMA status */
178 178
@@ -199,7 +199,7 @@ static int sc1200_dma_end(ide_drive_t *drive)
199 199
200static void sc1200_set_pio_mode(ide_drive_t *drive, const u8 pio) 200static void sc1200_set_pio_mode(ide_drive_t *drive, const u8 pio)
201{ 201{
202 ide_hwif_t *hwif = HWIF(drive); 202 ide_hwif_t *hwif = drive->hwif;
203 int mode = -1; 203 int mode = -1;
204 204
205 /* 205 /*
@@ -292,6 +292,7 @@ static const struct ide_dma_ops sc1200_dma_ops = {
292 .dma_test_irq = ide_dma_test_irq, 292 .dma_test_irq = ide_dma_test_irq,
293 .dma_lost_irq = ide_dma_lost_irq, 293 .dma_lost_irq = ide_dma_lost_irq,
294 .dma_timeout = ide_dma_timeout, 294 .dma_timeout = ide_dma_timeout,
295 .dma_sff_read_status = ide_dma_sff_read_status,
295}; 296};
296 297
297static const struct ide_port_info sc1200_chipset __devinitdata = { 298static const struct ide_port_info sc1200_chipset __devinitdata = {
diff --git a/drivers/ide/scc_pata.c b/drivers/ide/scc_pata.c
index 0f48f9dacfa5..8d2314b6327c 100644
--- a/drivers/ide/scc_pata.c
+++ b/drivers/ide/scc_pata.c
@@ -143,7 +143,7 @@ static u8 scc_read_altstatus(ide_hwif_t *hwif)
143 return (u8)in_be32((void *)hwif->io_ports.ctl_addr); 143 return (u8)in_be32((void *)hwif->io_ports.ctl_addr);
144} 144}
145 145
146static u8 scc_read_sff_dma_status(ide_hwif_t *hwif) 146static u8 scc_dma_sff_read_status(ide_hwif_t *hwif)
147{ 147{
148 return (u8)in_be32((void *)(hwif->dma_base + 4)); 148 return (u8)in_be32((void *)(hwif->dma_base + 4));
149} 149}
@@ -217,7 +217,7 @@ scc_ide_outsl(unsigned long port, void *addr, u32 count)
217 217
218static void scc_set_pio_mode(ide_drive_t *drive, const u8 pio) 218static void scc_set_pio_mode(ide_drive_t *drive, const u8 pio)
219{ 219{
220 ide_hwif_t *hwif = HWIF(drive); 220 ide_hwif_t *hwif = drive->hwif;
221 struct scc_ports *ports = ide_get_hwifdata(hwif); 221 struct scc_ports *ports = ide_get_hwifdata(hwif);
222 unsigned long ctl_base = ports->ctl; 222 unsigned long ctl_base = ports->ctl;
223 unsigned long cckctrl_port = ctl_base + 0xff0; 223 unsigned long cckctrl_port = ctl_base + 0xff0;
@@ -249,7 +249,7 @@ static void scc_set_pio_mode(ide_drive_t *drive, const u8 pio)
249 249
250static void scc_set_dma_mode(ide_drive_t *drive, const u8 speed) 250static void scc_set_dma_mode(ide_drive_t *drive, const u8 speed)
251{ 251{
252 ide_hwif_t *hwif = HWIF(drive); 252 ide_hwif_t *hwif = drive->hwif;
253 struct scc_ports *ports = ide_get_hwifdata(hwif); 253 struct scc_ports *ports = ide_get_hwifdata(hwif);
254 unsigned long ctl_base = ports->ctl; 254 unsigned long ctl_base = ports->ctl;
255 unsigned long cckctrl_port = ctl_base + 0xff0; 255 unsigned long cckctrl_port = ctl_base + 0xff0;
@@ -259,7 +259,7 @@ static void scc_set_dma_mode(ide_drive_t *drive, const u8 speed)
259 unsigned long scrcst_port = ctl_base + 0x014; 259 unsigned long scrcst_port = ctl_base + 0x014;
260 unsigned long udenvt_port = ctl_base + 0x018; 260 unsigned long udenvt_port = ctl_base + 0x018;
261 unsigned long tdvhsel_port = ctl_base + 0x020; 261 unsigned long tdvhsel_port = ctl_base + 0x020;
262 int is_slave = (&hwif->drives[1] == drive); 262 int is_slave = drive->dn & 1;
263 int offset, idx; 263 int offset, idx;
264 unsigned long reg; 264 unsigned long reg;
265 unsigned long jcactsel; 265 unsigned long jcactsel;
@@ -292,7 +292,7 @@ static void scc_dma_host_set(ide_drive_t *drive, int on)
292{ 292{
293 ide_hwif_t *hwif = drive->hwif; 293 ide_hwif_t *hwif = drive->hwif;
294 u8 unit = drive->dn & 1; 294 u8 unit = drive->dn & 1;
295 u8 dma_stat = scc_ide_inb(hwif->dma_base + 4); 295 u8 dma_stat = scc_dma_sff_read_status(hwif);
296 296
297 if (on) 297 if (on)
298 dma_stat |= (1 << (5 + unit)); 298 dma_stat |= (1 << (5 + unit));
@@ -316,7 +316,7 @@ static void scc_dma_host_set(ide_drive_t *drive, int on)
316static int scc_dma_setup(ide_drive_t *drive) 316static int scc_dma_setup(ide_drive_t *drive)
317{ 317{
318 ide_hwif_t *hwif = drive->hwif; 318 ide_hwif_t *hwif = drive->hwif;
319 struct request *rq = HWGROUP(drive)->rq; 319 struct request *rq = hwif->rq;
320 unsigned int reading; 320 unsigned int reading;
321 u8 dma_stat; 321 u8 dma_stat;
322 322
@@ -338,7 +338,7 @@ static int scc_dma_setup(ide_drive_t *drive)
338 out_be32((void __iomem *)hwif->dma_base, reading); 338 out_be32((void __iomem *)hwif->dma_base, reading);
339 339
340 /* read DMA status for INTR & ERROR flags */ 340 /* read DMA status for INTR & ERROR flags */
341 dma_stat = in_be32((void __iomem *)(hwif->dma_base + 4)); 341 dma_stat = scc_dma_sff_read_status(hwif);
342 342
343 /* clear INTR & ERROR flags */ 343 /* clear INTR & ERROR flags */
344 out_be32((void __iomem *)(hwif->dma_base + 4), dma_stat | 6); 344 out_be32((void __iomem *)(hwif->dma_base + 4), dma_stat | 6);
@@ -367,7 +367,7 @@ static int __scc_dma_end(ide_drive_t *drive)
367 /* stop DMA */ 367 /* stop DMA */
368 scc_ide_outb(dma_cmd & ~1, hwif->dma_base); 368 scc_ide_outb(dma_cmd & ~1, hwif->dma_base);
369 /* get DMA status */ 369 /* get DMA status */
370 dma_stat = scc_ide_inb(hwif->dma_base + 4); 370 dma_stat = scc_dma_sff_read_status(hwif);
371 /* clear the INTR & ERROR bits */ 371 /* clear the INTR & ERROR bits */
372 scc_ide_outb(dma_stat | 6, hwif->dma_base + 4); 372 scc_ide_outb(dma_stat | 6, hwif->dma_base + 4);
373 /* purge DMA mappings */ 373 /* purge DMA mappings */
@@ -387,7 +387,7 @@ static int __scc_dma_end(ide_drive_t *drive)
387 387
388static int scc_dma_end(ide_drive_t *drive) 388static int scc_dma_end(ide_drive_t *drive)
389{ 389{
390 ide_hwif_t *hwif = HWIF(drive); 390 ide_hwif_t *hwif = drive->hwif;
391 void __iomem *dma_base = (void __iomem *)hwif->dma_base; 391 void __iomem *dma_base = (void __iomem *)hwif->dma_base;
392 unsigned long intsts_port = hwif->dma_base + 0x014; 392 unsigned long intsts_port = hwif->dma_base + 0x014;
393 u32 reg; 393 u32 reg;
@@ -405,17 +405,18 @@ static int scc_dma_end(ide_drive_t *drive)
405 drive->name); 405 drive->name);
406 data_loss = 1; 406 data_loss = 1;
407 if (retry++) { 407 if (retry++) {
408 struct request *rq = HWGROUP(drive)->rq; 408 struct request *rq = hwif->rq;
409 int unit; 409 ide_drive_t *drive;
410 int i;
411
410 /* ERROR_RESET and drive->crc_count are needed 412 /* ERROR_RESET and drive->crc_count are needed
411 * to reduce DMA transfer mode in retry process. 413 * to reduce DMA transfer mode in retry process.
412 */ 414 */
413 if (rq) 415 if (rq)
414 rq->errors |= ERROR_RESET; 416 rq->errors |= ERROR_RESET;
415 for (unit = 0; unit < MAX_DRIVES; unit++) { 417
416 ide_drive_t *drive = &hwif->drives[unit]; 418 ide_port_for_each_dev(i, drive, hwif)
417 drive->crc_count++; 419 drive->crc_count++;
418 }
419 } 420 }
420 } 421 }
421 } 422 }
@@ -496,7 +497,7 @@ static int scc_dma_end(ide_drive_t *drive)
496/* returns 1 if dma irq issued, 0 otherwise */ 497/* returns 1 if dma irq issued, 0 otherwise */
497static int scc_dma_test_irq(ide_drive_t *drive) 498static int scc_dma_test_irq(ide_drive_t *drive)
498{ 499{
499 ide_hwif_t *hwif = HWIF(drive); 500 ide_hwif_t *hwif = drive->hwif;
500 u32 int_stat = in_be32((void __iomem *)hwif->dma_base + 0x014); 501 u32 int_stat = in_be32((void __iomem *)hwif->dma_base + 0x014);
501 502
502 /* SCC errata A252,A308 workaround: Step4 */ 503 /* SCC errata A252,A308 workaround: Step4 */
@@ -852,7 +853,6 @@ static const struct ide_tp_ops scc_tp_ops = {
852 .exec_command = scc_exec_command, 853 .exec_command = scc_exec_command,
853 .read_status = scc_read_status, 854 .read_status = scc_read_status,
854 .read_altstatus = scc_read_altstatus, 855 .read_altstatus = scc_read_altstatus,
855 .read_sff_dma_status = scc_read_sff_dma_status,
856 856
857 .set_irq = scc_set_irq, 857 .set_irq = scc_set_irq,
858 858
@@ -879,6 +879,7 @@ static const struct ide_dma_ops scc_dma_ops = {
879 .dma_test_irq = scc_dma_test_irq, 879 .dma_test_irq = scc_dma_test_irq,
880 .dma_lost_irq = ide_dma_lost_irq, 880 .dma_lost_irq = ide_dma_lost_irq,
881 .dma_timeout = ide_dma_timeout, 881 .dma_timeout = ide_dma_timeout,
882 .dma_sff_read_status = scc_dma_sff_read_status,
882}; 883};
883 884
884#define DECLARE_SCC_DEV(name_str) \ 885#define DECLARE_SCC_DEV(name_str) \
diff --git a/drivers/ide/serverworks.c b/drivers/ide/serverworks.c
index 437bc919dafd..382102ba467b 100644
--- a/drivers/ide/serverworks.c
+++ b/drivers/ide/serverworks.c
@@ -151,7 +151,7 @@ static void svwks_set_dma_mode(ide_drive_t *drive, const u8 speed)
151 static const u8 dma_modes[] = { 0x77, 0x21, 0x20 }; 151 static const u8 dma_modes[] = { 0x77, 0x21, 0x20 };
152 static const u8 drive_pci2[] = { 0x45, 0x44, 0x47, 0x46 }; 152 static const u8 drive_pci2[] = { 0x45, 0x44, 0x47, 0x46 };
153 153
154 ide_hwif_t *hwif = HWIF(drive); 154 ide_hwif_t *hwif = drive->hwif;
155 struct pci_dev *dev = to_pci_dev(hwif->dev); 155 struct pci_dev *dev = to_pci_dev(hwif->dev);
156 u8 unit = drive->dn & 1; 156 u8 unit = drive->dn & 1;
157 157
diff --git a/drivers/ide/setup-pci.c b/drivers/ide/setup-pci.c
index 9f1f9163a136..e85d1ed29c2a 100644
--- a/drivers/ide/setup-pci.c
+++ b/drivers/ide/setup-pci.c
@@ -130,7 +130,7 @@ int ide_pci_check_simplex(ide_hwif_t *hwif, const struct ide_port_info *d)
130 * we tune the drive then try to grab DMA ownership if we want to be 130 * we tune the drive then try to grab DMA ownership if we want to be
131 * the DMA end. This has to be become dynamic to handle hot-plug. 131 * the DMA end. This has to be become dynamic to handle hot-plug.
132 */ 132 */
133 dma_stat = hwif->tp_ops->read_sff_dma_status(hwif); 133 dma_stat = hwif->dma_ops->dma_sff_read_status(hwif);
134 if ((dma_stat & 0x80) && hwif->mate && hwif->mate->dma_base) { 134 if ((dma_stat & 0x80) && hwif->mate && hwif->mate->dma_base) {
135 printk(KERN_INFO "%s %s: simplex device: DMA disabled\n", 135 printk(KERN_INFO "%s %s: simplex device: DMA disabled\n",
136 d->name, pci_name(dev)); 136 d->name, pci_name(dev));
@@ -377,6 +377,9 @@ int ide_hwif_setup_dma(ide_hwif_t *hwif, const struct ide_port_info *d)
377 377
378 hwif->dma_base = base; 378 hwif->dma_base = base;
379 379
380 if (hwif->dma_ops == NULL)
381 hwif->dma_ops = &sff_dma_ops;
382
380 if (ide_pci_check_simplex(hwif, d) < 0) 383 if (ide_pci_check_simplex(hwif, d) < 0)
381 return -1; 384 return -1;
382 385
@@ -393,8 +396,6 @@ int ide_hwif_setup_dma(ide_hwif_t *hwif, const struct ide_port_info *d)
393 396
394 if (ide_allocate_dma_engine(hwif)) 397 if (ide_allocate_dma_engine(hwif))
395 return -1; 398 return -1;
396
397 hwif->dma_ops = &sff_dma_ops;
398 } 399 }
399 400
400 return 0; 401 return 0;
@@ -471,7 +472,7 @@ void ide_pci_setup_ports(struct pci_dev *dev, const struct ide_port_info *d,
471 */ 472 */
472 473
473 for (port = 0; port < channels; ++port) { 474 for (port = 0; port < channels; ++port) {
474 const ide_pci_enablebit_t *e = &(d->enablebits[port]); 475 const struct ide_pci_enablebit *e = &d->enablebits[port];
475 476
476 if (e->reg && (pci_read_config_byte(dev, e->reg, &tmp) || 477 if (e->reg && (pci_read_config_byte(dev, e->reg, &tmp) ||
477 (tmp & e->mask) != e->val)) { 478 (tmp & e->mask) != e->val)) {
@@ -519,8 +520,7 @@ static int do_ide_setup_pci_device(struct pci_dev *dev,
519 if (ret < 0) 520 if (ret < 0)
520 goto out; 521 goto out;
521 522
522 /* Is it an "IDE storage" device in non-PCI mode? */ 523 if (ide_pci_is_in_compatibility_mode(dev)) {
523 if ((dev->class >> 8) == PCI_CLASS_STORAGE_IDE && (dev->class & 5) != 5) {
524 if (noisy) 524 if (noisy)
525 printk(KERN_INFO "%s %s: not 100%% native mode: will " 525 printk(KERN_INFO "%s %s: not 100%% native mode: will "
526 "probe irqs later\n", d->name, pci_name(dev)); 526 "probe irqs later\n", d->name, pci_name(dev));
diff --git a/drivers/ide/sgiioc4.c b/drivers/ide/sgiioc4.c
index a687a7dfea6f..fdb9d7037694 100644
--- a/drivers/ide/sgiioc4.c
+++ b/drivers/ide/sgiioc4.c
@@ -123,7 +123,7 @@ static int
123sgiioc4_clearirq(ide_drive_t * drive) 123sgiioc4_clearirq(ide_drive_t * drive)
124{ 124{
125 u32 intr_reg; 125 u32 intr_reg;
126 ide_hwif_t *hwif = HWIF(drive); 126 ide_hwif_t *hwif = drive->hwif;
127 struct ide_io_ports *io_ports = &hwif->io_ports; 127 struct ide_io_ports *io_ports = &hwif->io_ports;
128 unsigned long other_ir = io_ports->irq_addr + (IOC4_INTR_REG << 2); 128 unsigned long other_ir = io_ports->irq_addr + (IOC4_INTR_REG << 2);
129 129
@@ -181,7 +181,7 @@ sgiioc4_clearirq(ide_drive_t * drive)
181 181
182static void sgiioc4_dma_start(ide_drive_t *drive) 182static void sgiioc4_dma_start(ide_drive_t *drive)
183{ 183{
184 ide_hwif_t *hwif = HWIF(drive); 184 ide_hwif_t *hwif = drive->hwif;
185 unsigned long ioc4_dma_addr = hwif->dma_base + IOC4_DMA_CTRL * 4; 185 unsigned long ioc4_dma_addr = hwif->dma_base + IOC4_DMA_CTRL * 4;
186 unsigned int reg = readl((void __iomem *)ioc4_dma_addr); 186 unsigned int reg = readl((void __iomem *)ioc4_dma_addr);
187 unsigned int temp_reg = reg | IOC4_S_DMA_START; 187 unsigned int temp_reg = reg | IOC4_S_DMA_START;
@@ -209,7 +209,7 @@ sgiioc4_ide_dma_stop(ide_hwif_t *hwif, u64 dma_base)
209static int sgiioc4_dma_end(ide_drive_t *drive) 209static int sgiioc4_dma_end(ide_drive_t *drive)
210{ 210{
211 u32 ioc4_dma, bc_dev, bc_mem, num, valid = 0, cnt = 0; 211 u32 ioc4_dma, bc_dev, bc_mem, num, valid = 0, cnt = 0;
212 ide_hwif_t *hwif = HWIF(drive); 212 ide_hwif_t *hwif = drive->hwif;
213 unsigned long dma_base = hwif->dma_base; 213 unsigned long dma_base = hwif->dma_base;
214 int dma_stat = 0; 214 int dma_stat = 0;
215 unsigned long *ending_dma = ide_get_hwifdata(hwif); 215 unsigned long *ending_dma = ide_get_hwifdata(hwif);
@@ -271,7 +271,7 @@ static void sgiioc4_set_dma_mode(ide_drive_t *drive, const u8 speed)
271/* returns 1 if dma irq issued, 0 otherwise */ 271/* returns 1 if dma irq issued, 0 otherwise */
272static int sgiioc4_dma_test_irq(ide_drive_t *drive) 272static int sgiioc4_dma_test_irq(ide_drive_t *drive)
273{ 273{
274 return sgiioc4_checkirq(HWIF(drive)); 274 return sgiioc4_checkirq(drive->hwif);
275} 275}
276 276
277static void sgiioc4_dma_host_set(ide_drive_t *drive, int on) 277static void sgiioc4_dma_host_set(ide_drive_t *drive, int on)
@@ -367,7 +367,7 @@ static void
367sgiioc4_configure_for_dma(int dma_direction, ide_drive_t * drive) 367sgiioc4_configure_for_dma(int dma_direction, ide_drive_t * drive)
368{ 368{
369 u32 ioc4_dma; 369 u32 ioc4_dma;
370 ide_hwif_t *hwif = HWIF(drive); 370 ide_hwif_t *hwif = drive->hwif;
371 unsigned long dma_base = hwif->dma_base; 371 unsigned long dma_base = hwif->dma_base;
372 unsigned long ioc4_dma_addr = dma_base + IOC4_DMA_CTRL * 4; 372 unsigned long ioc4_dma_addr = dma_base + IOC4_DMA_CTRL * 4;
373 u32 dma_addr, ending_dma_addr; 373 u32 dma_addr, ending_dma_addr;
@@ -427,7 +427,7 @@ sgiioc4_configure_for_dma(int dma_direction, ide_drive_t * drive)
427static unsigned int 427static unsigned int
428sgiioc4_build_dma_table(ide_drive_t * drive, struct request *rq, int ddir) 428sgiioc4_build_dma_table(ide_drive_t * drive, struct request *rq, int ddir)
429{ 429{
430 ide_hwif_t *hwif = HWIF(drive); 430 ide_hwif_t *hwif = drive->hwif;
431 unsigned int *table = hwif->dmatable_cpu; 431 unsigned int *table = hwif->dmatable_cpu;
432 unsigned int count = 0, i = 1; 432 unsigned int count = 0, i = 1;
433 struct scatterlist *sg; 433 struct scatterlist *sg;
@@ -492,7 +492,7 @@ use_pio_instead:
492 492
493static int sgiioc4_dma_setup(ide_drive_t *drive) 493static int sgiioc4_dma_setup(ide_drive_t *drive)
494{ 494{
495 struct request *rq = HWGROUP(drive)->rq; 495 struct request *rq = drive->hwif->rq;
496 unsigned int count = 0; 496 unsigned int count = 0;
497 int ddir; 497 int ddir;
498 498
@@ -523,7 +523,6 @@ static const struct ide_tp_ops sgiioc4_tp_ops = {
523 .exec_command = ide_exec_command, 523 .exec_command = ide_exec_command,
524 .read_status = sgiioc4_read_status, 524 .read_status = sgiioc4_read_status,
525 .read_altstatus = ide_read_altstatus, 525 .read_altstatus = ide_read_altstatus,
526 .read_sff_dma_status = ide_read_sff_dma_status,
527 526
528 .set_irq = ide_set_irq, 527 .set_irq = ide_set_irq,
529 528
diff --git a/drivers/ide/siimage.c b/drivers/ide/siimage.c
index 7d622d20bc4c..cb2b352b876b 100644
--- a/drivers/ide/siimage.c
+++ b/drivers/ide/siimage.c
@@ -114,7 +114,7 @@ static unsigned long siimage_selreg(ide_hwif_t *hwif, int r)
114 114
115static inline unsigned long siimage_seldev(ide_drive_t *drive, int r) 115static inline unsigned long siimage_seldev(ide_drive_t *drive, int r)
116{ 116{
117 ide_hwif_t *hwif = HWIF(drive); 117 ide_hwif_t *hwif = drive->hwif;
118 unsigned long base = (unsigned long)hwif->hwif_data; 118 unsigned long base = (unsigned long)hwif->hwif_data;
119 u8 unit = drive->dn & 1; 119 u8 unit = drive->dn & 1;
120 120
@@ -243,7 +243,7 @@ static void sil_set_pio_mode(ide_drive_t *drive, u8 pio)
243 static const u16 tf_speed[] = { 0x328a, 0x2283, 0x1281, 0x10c3, 0x10c1 }; 243 static const u16 tf_speed[] = { 0x328a, 0x2283, 0x1281, 0x10c3, 0x10c1 };
244 static const u16 data_speed[] = { 0x328a, 0x2283, 0x1104, 0x10c3, 0x10c1 }; 244 static const u16 data_speed[] = { 0x328a, 0x2283, 0x1104, 0x10c3, 0x10c1 };
245 245
246 ide_hwif_t *hwif = HWIF(drive); 246 ide_hwif_t *hwif = drive->hwif;
247 struct pci_dev *dev = to_pci_dev(hwif->dev); 247 struct pci_dev *dev = to_pci_dev(hwif->dev);
248 ide_drive_t *pair = ide_get_pair_dev(drive); 248 ide_drive_t *pair = ide_get_pair_dev(drive);
249 u32 speedt = 0; 249 u32 speedt = 0;
@@ -300,7 +300,7 @@ static void sil_set_dma_mode(ide_drive_t *drive, const u8 speed)
300 static const u8 ultra5[] = { 0x0C, 0x07, 0x05, 0x04, 0x02, 0x01 }; 300 static const u8 ultra5[] = { 0x0C, 0x07, 0x05, 0x04, 0x02, 0x01 };
301 static const u16 dma[] = { 0x2208, 0x10C2, 0x10C1 }; 301 static const u16 dma[] = { 0x2208, 0x10C2, 0x10C1 };
302 302
303 ide_hwif_t *hwif = HWIF(drive); 303 ide_hwif_t *hwif = drive->hwif;
304 struct pci_dev *dev = to_pci_dev(hwif->dev); 304 struct pci_dev *dev = to_pci_dev(hwif->dev);
305 unsigned long base = (unsigned long)hwif->hwif_data; 305 unsigned long base = (unsigned long)hwif->hwif_data;
306 u16 ultra = 0, multi = 0; 306 u16 ultra = 0, multi = 0;
@@ -340,7 +340,7 @@ static void sil_set_dma_mode(ide_drive_t *drive, const u8 speed)
340/* returns 1 if dma irq issued, 0 otherwise */ 340/* returns 1 if dma irq issued, 0 otherwise */
341static int siimage_io_dma_test_irq(ide_drive_t *drive) 341static int siimage_io_dma_test_irq(ide_drive_t *drive)
342{ 342{
343 ide_hwif_t *hwif = HWIF(drive); 343 ide_hwif_t *hwif = drive->hwif;
344 struct pci_dev *dev = to_pci_dev(hwif->dev); 344 struct pci_dev *dev = to_pci_dev(hwif->dev);
345 u8 dma_altstat = 0; 345 u8 dma_altstat = 0;
346 unsigned long addr = siimage_selreg(hwif, 1); 346 unsigned long addr = siimage_selreg(hwif, 1);
@@ -367,7 +367,7 @@ static int siimage_io_dma_test_irq(ide_drive_t *drive)
367 367
368static int siimage_mmio_dma_test_irq(ide_drive_t *drive) 368static int siimage_mmio_dma_test_irq(ide_drive_t *drive)
369{ 369{
370 ide_hwif_t *hwif = HWIF(drive); 370 ide_hwif_t *hwif = drive->hwif;
371 unsigned long addr = siimage_selreg(hwif, 0x1); 371 unsigned long addr = siimage_selreg(hwif, 0x1);
372 void __iomem *sata_error_addr 372 void __iomem *sata_error_addr
373 = (void __iomem *)hwif->sata_scr[SATA_ERROR_OFFSET]; 373 = (void __iomem *)hwif->sata_scr[SATA_ERROR_OFFSET];
@@ -717,6 +717,7 @@ static const struct ide_dma_ops sil_dma_ops = {
717 .dma_test_irq = siimage_dma_test_irq, 717 .dma_test_irq = siimage_dma_test_irq,
718 .dma_timeout = ide_dma_timeout, 718 .dma_timeout = ide_dma_timeout,
719 .dma_lost_irq = ide_dma_lost_irq, 719 .dma_lost_irq = ide_dma_lost_irq,
720 .dma_sff_read_status = ide_dma_sff_read_status,
720}; 721};
721 722
722#define DECLARE_SII_DEV(p_ops) \ 723#define DECLARE_SII_DEV(p_ops) \
diff --git a/drivers/ide/sis5513.c b/drivers/ide/sis5513.c
index ad32e18c5ba3..9ec1a4a4432c 100644
--- a/drivers/ide/sis5513.c
+++ b/drivers/ide/sis5513.c
@@ -274,7 +274,7 @@ static void sis_program_timings(ide_drive_t *drive, const u8 mode)
274 274
275static void config_drive_art_rwp(ide_drive_t *drive) 275static void config_drive_art_rwp(ide_drive_t *drive)
276{ 276{
277 ide_hwif_t *hwif = HWIF(drive); 277 ide_hwif_t *hwif = drive->hwif;
278 struct pci_dev *dev = to_pci_dev(hwif->dev); 278 struct pci_dev *dev = to_pci_dev(hwif->dev);
279 u8 reg4bh = 0; 279 u8 reg4bh = 0;
280 u8 rw_prefetch = 0; 280 u8 rw_prefetch = 0;
diff --git a/drivers/ide/sl82c105.c b/drivers/ide/sl82c105.c
index 84dc33602ff8..48cc748c5043 100644
--- a/drivers/ide/sl82c105.c
+++ b/drivers/ide/sl82c105.c
@@ -140,7 +140,7 @@ static inline void sl82c105_reset_host(struct pci_dev *dev)
140 */ 140 */
141static void sl82c105_dma_lost_irq(ide_drive_t *drive) 141static void sl82c105_dma_lost_irq(ide_drive_t *drive)
142{ 142{
143 ide_hwif_t *hwif = HWIF(drive); 143 ide_hwif_t *hwif = drive->hwif;
144 struct pci_dev *dev = to_pci_dev(hwif->dev); 144 struct pci_dev *dev = to_pci_dev(hwif->dev);
145 u32 val, mask = hwif->channel ? CTRL_IDE_IRQB : CTRL_IDE_IRQA; 145 u32 val, mask = hwif->channel ? CTRL_IDE_IRQB : CTRL_IDE_IRQA;
146 u8 dma_cmd; 146 u8 dma_cmd;
@@ -177,7 +177,7 @@ static void sl82c105_dma_lost_irq(ide_drive_t *drive)
177 */ 177 */
178static void sl82c105_dma_start(ide_drive_t *drive) 178static void sl82c105_dma_start(ide_drive_t *drive)
179{ 179{
180 ide_hwif_t *hwif = HWIF(drive); 180 ide_hwif_t *hwif = drive->hwif;
181 struct pci_dev *dev = to_pci_dev(hwif->dev); 181 struct pci_dev *dev = to_pci_dev(hwif->dev);
182 int reg = 0x44 + drive->dn * 4; 182 int reg = 0x44 + drive->dn * 4;
183 183
@@ -299,6 +299,7 @@ static const struct ide_dma_ops sl82c105_dma_ops = {
299 .dma_test_irq = ide_dma_test_irq, 299 .dma_test_irq = ide_dma_test_irq,
300 .dma_lost_irq = sl82c105_dma_lost_irq, 300 .dma_lost_irq = sl82c105_dma_lost_irq,
301 .dma_timeout = sl82c105_dma_timeout, 301 .dma_timeout = sl82c105_dma_timeout,
302 .dma_sff_read_status = ide_dma_sff_read_status,
302}; 303};
303 304
304static const struct ide_port_info sl82c105_chipset __devinitdata = { 305static const struct ide_port_info sl82c105_chipset __devinitdata = {
diff --git a/drivers/ide/slc90e66.c b/drivers/ide/slc90e66.c
index 0f759e4ed779..40b4b94a4288 100644
--- a/drivers/ide/slc90e66.c
+++ b/drivers/ide/slc90e66.c
@@ -20,7 +20,7 @@ static DEFINE_SPINLOCK(slc90e66_lock);
20 20
21static void slc90e66_set_pio_mode(ide_drive_t *drive, const u8 pio) 21static void slc90e66_set_pio_mode(ide_drive_t *drive, const u8 pio)
22{ 22{
23 ide_hwif_t *hwif = HWIF(drive); 23 ide_hwif_t *hwif = drive->hwif;
24 struct pci_dev *dev = to_pci_dev(hwif->dev); 24 struct pci_dev *dev = to_pci_dev(hwif->dev);
25 int is_slave = drive->dn & 1; 25 int is_slave = drive->dn & 1;
26 int master_port = hwif->channel ? 0x42 : 0x40; 26 int master_port = hwif->channel ? 0x42 : 0x40;
@@ -73,7 +73,7 @@ static void slc90e66_set_pio_mode(ide_drive_t *drive, const u8 pio)
73 73
74static void slc90e66_set_dma_mode(ide_drive_t *drive, const u8 speed) 74static void slc90e66_set_dma_mode(ide_drive_t *drive, const u8 speed)
75{ 75{
76 ide_hwif_t *hwif = HWIF(drive); 76 ide_hwif_t *hwif = drive->hwif;
77 struct pci_dev *dev = to_pci_dev(hwif->dev); 77 struct pci_dev *dev = to_pci_dev(hwif->dev);
78 u8 maslave = hwif->channel ? 0x42 : 0x40; 78 u8 maslave = hwif->channel ? 0x42 : 0x40;
79 int sitre = 0, a_speed = 7 << (drive->dn * 4); 79 int sitre = 0, a_speed = 7 << (drive->dn * 4);
diff --git a/drivers/ide/tc86c001.c b/drivers/ide/tc86c001.c
index 93e2cce4b296..84109f5a1632 100644
--- a/drivers/ide/tc86c001.c
+++ b/drivers/ide/tc86c001.c
@@ -15,7 +15,7 @@
15 15
16static void tc86c001_set_mode(ide_drive_t *drive, const u8 speed) 16static void tc86c001_set_mode(ide_drive_t *drive, const u8 speed)
17{ 17{
18 ide_hwif_t *hwif = HWIF(drive); 18 ide_hwif_t *hwif = drive->hwif;
19 unsigned long scr_port = hwif->config_data + (drive->dn ? 0x02 : 0x00); 19 unsigned long scr_port = hwif->config_data + (drive->dn ? 0x02 : 0x00);
20 u16 mode, scr = inw(scr_port); 20 u16 mode, scr = inw(scr_port);
21 21
@@ -62,13 +62,12 @@ static void tc86c001_set_pio_mode(ide_drive_t *drive, const u8 pio)
62 */ 62 */
63static int tc86c001_timer_expiry(ide_drive_t *drive) 63static int tc86c001_timer_expiry(ide_drive_t *drive)
64{ 64{
65 ide_hwif_t *hwif = HWIF(drive); 65 ide_hwif_t *hwif = drive->hwif;
66 ide_expiry_t *expiry = ide_get_hwifdata(hwif); 66 ide_expiry_t *expiry = ide_get_hwifdata(hwif);
67 ide_hwgroup_t *hwgroup = HWGROUP(drive);
68 u8 dma_stat = inb(hwif->dma_base + ATA_DMA_STATUS); 67 u8 dma_stat = inb(hwif->dma_base + ATA_DMA_STATUS);
69 68
70 /* Restore a higher level driver's expiry handler first. */ 69 /* Restore a higher level driver's expiry handler first. */
71 hwgroup->expiry = expiry; 70 hwif->expiry = expiry;
72 71
73 if ((dma_stat & 5) == 1) { /* DMA active and no interrupt */ 72 if ((dma_stat & 5) == 1) { /* DMA active and no interrupt */
74 unsigned long sc_base = hwif->config_data; 73 unsigned long sc_base = hwif->config_data;
@@ -110,11 +109,10 @@ static int tc86c001_timer_expiry(ide_drive_t *drive)
110 109
111static void tc86c001_dma_start(ide_drive_t *drive) 110static void tc86c001_dma_start(ide_drive_t *drive)
112{ 111{
113 ide_hwif_t *hwif = HWIF(drive); 112 ide_hwif_t *hwif = drive->hwif;
114 ide_hwgroup_t *hwgroup = HWGROUP(drive);
115 unsigned long sc_base = hwif->config_data; 113 unsigned long sc_base = hwif->config_data;
116 unsigned long twcr_port = sc_base + (drive->dn ? 0x06 : 0x04); 114 unsigned long twcr_port = sc_base + (drive->dn ? 0x06 : 0x04);
117 unsigned long nsectors = hwgroup->rq->nr_sectors; 115 unsigned long nsectors = hwif->rq->nr_sectors;
118 116
119 /* 117 /*
120 * We have to manually load the sector count and size into 118 * We have to manually load the sector count and size into
@@ -125,8 +123,8 @@ static void tc86c001_dma_start(ide_drive_t *drive)
125 outw(SECTOR_SIZE / 2, twcr_port); /* Transfer Word Count 1/2 */ 123 outw(SECTOR_SIZE / 2, twcr_port); /* Transfer Word Count 1/2 */
126 124
127 /* Install our timeout expiry hook, saving the current handler... */ 125 /* Install our timeout expiry hook, saving the current handler... */
128 ide_set_hwifdata(hwif, hwgroup->expiry); 126 ide_set_hwifdata(hwif, hwif->expiry);
129 hwgroup->expiry = &tc86c001_timer_expiry; 127 hwif->expiry = &tc86c001_timer_expiry;
130 128
131 ide_dma_start(drive); 129 ide_dma_start(drive);
132} 130}
@@ -190,6 +188,7 @@ static const struct ide_dma_ops tc86c001_dma_ops = {
190 .dma_test_irq = ide_dma_test_irq, 188 .dma_test_irq = ide_dma_test_irq,
191 .dma_lost_irq = ide_dma_lost_irq, 189 .dma_lost_irq = ide_dma_lost_irq,
192 .dma_timeout = ide_dma_timeout, 190 .dma_timeout = ide_dma_timeout,
191 .dma_sff_read_status = ide_dma_sff_read_status,
193}; 192};
194 193
195static const struct ide_port_info tc86c001_chipset __devinitdata = { 194static const struct ide_port_info tc86c001_chipset __devinitdata = {
diff --git a/drivers/ide/triflex.c b/drivers/ide/triflex.c
index b6ff40336aa9..8773c3ba7462 100644
--- a/drivers/ide/triflex.c
+++ b/drivers/ide/triflex.c
@@ -36,7 +36,7 @@
36 36
37static void triflex_set_mode(ide_drive_t *drive, const u8 speed) 37static void triflex_set_mode(ide_drive_t *drive, const u8 speed)
38{ 38{
39 ide_hwif_t *hwif = HWIF(drive); 39 ide_hwif_t *hwif = drive->hwif;
40 struct pci_dev *dev = to_pci_dev(hwif->dev); 40 struct pci_dev *dev = to_pci_dev(hwif->dev);
41 u32 triflex_timings = 0; 41 u32 triflex_timings = 0;
42 u16 timing = 0; 42 u16 timing = 0;
diff --git a/drivers/ide/trm290.c b/drivers/ide/trm290.c
index 2a5ea90cf8b8..b6a1285a4021 100644
--- a/drivers/ide/trm290.c
+++ b/drivers/ide/trm290.c
@@ -144,7 +144,7 @@
144 144
145static void trm290_prepare_drive (ide_drive_t *drive, unsigned int use_dma) 145static void trm290_prepare_drive (ide_drive_t *drive, unsigned int use_dma)
146{ 146{
147 ide_hwif_t *hwif = HWIF(drive); 147 ide_hwif_t *hwif = drive->hwif;
148 u16 reg = 0; 148 u16 reg = 0;
149 unsigned long flags; 149 unsigned long flags;
150 150
@@ -184,7 +184,7 @@ static void trm290_dma_exec_cmd(ide_drive_t *drive, u8 command)
184static int trm290_dma_setup(ide_drive_t *drive) 184static int trm290_dma_setup(ide_drive_t *drive)
185{ 185{
186 ide_hwif_t *hwif = drive->hwif; 186 ide_hwif_t *hwif = drive->hwif;
187 struct request *rq = hwif->hwgroup->rq; 187 struct request *rq = hwif->rq;
188 unsigned int count, rw; 188 unsigned int count, rw;
189 189
190 if (rq_data_dir(rq)) { 190 if (rq_data_dir(rq)) {
@@ -222,15 +222,15 @@ static int trm290_dma_end(ide_drive_t *drive)
222 drive->waiting_for_dma = 0; 222 drive->waiting_for_dma = 0;
223 /* purge DMA mappings */ 223 /* purge DMA mappings */
224 ide_destroy_dmatable(drive); 224 ide_destroy_dmatable(drive);
225 status = inw(HWIF(drive)->dma_base + 2); 225 status = inw(drive->hwif->dma_base + 2);
226
226 return status != 0x00ff; 227 return status != 0x00ff;
227} 228}
228 229
229static int trm290_dma_test_irq(ide_drive_t *drive) 230static int trm290_dma_test_irq(ide_drive_t *drive)
230{ 231{
231 u16 status; 232 u16 status = inw(drive->hwif->dma_base + 2);
232 233
233 status = inw(HWIF(drive)->dma_base + 2);
234 return status == 0x00ff; 234 return status == 0x00ff;
235} 235}
236 236
diff --git a/drivers/ide/tx4939ide.c b/drivers/ide/tx4939ide.c
index 4a8c5a21bd4c..882f6f07c476 100644
--- a/drivers/ide/tx4939ide.c
+++ b/drivers/ide/tx4939ide.c
@@ -293,7 +293,7 @@ static int tx4939ide_dma_setup(ide_drive_t *drive)
293{ 293{
294 ide_hwif_t *hwif = drive->hwif; 294 ide_hwif_t *hwif = drive->hwif;
295 void __iomem *base = TX4939IDE_BASE(hwif); 295 void __iomem *base = TX4939IDE_BASE(hwif);
296 struct request *rq = hwif->hwgroup->rq; 296 struct request *rq = hwif->rq;
297 u8 reading; 297 u8 reading;
298 int nent; 298 int nent;
299 299
@@ -397,6 +397,17 @@ static int tx4939ide_dma_test_irq(ide_drive_t *drive)
397 return found; 397 return found;
398} 398}
399 399
400#ifdef __BIG_ENDIAN
401static u8 tx4939ide_dma_sff_read_status(ide_hwif_t *hwif)
402{
403 void __iomem *base = TX4939IDE_BASE(hwif);
404
405 return tx4939ide_readb(base, TX4939IDE_DMA_Stat);
406}
407#else
408#define tx4939ide_dma_sff_read_status ide_dma_sff_read_status
409#endif
410
400static void tx4939ide_init_hwif(ide_hwif_t *hwif) 411static void tx4939ide_init_hwif(ide_hwif_t *hwif)
401{ 412{
402 void __iomem *base = TX4939IDE_BASE(hwif); 413 void __iomem *base = TX4939IDE_BASE(hwif);
@@ -443,13 +454,6 @@ static void tx4939ide_tf_load_fixup(ide_drive_t *drive, ide_task_t *task)
443 454
444#ifdef __BIG_ENDIAN 455#ifdef __BIG_ENDIAN
445 456
446static u8 tx4939ide_read_sff_dma_status(ide_hwif_t *hwif)
447{
448 void __iomem *base = TX4939IDE_BASE(hwif);
449
450 return tx4939ide_readb(base, TX4939IDE_DMA_Stat);
451}
452
453/* custom iops (independent from SWAP_IO_SPACE) */ 457/* custom iops (independent from SWAP_IO_SPACE) */
454static u8 tx4939ide_inb(unsigned long port) 458static u8 tx4939ide_inb(unsigned long port)
455{ 459{
@@ -585,7 +589,6 @@ static const struct ide_tp_ops tx4939ide_tp_ops = {
585 .exec_command = ide_exec_command, 589 .exec_command = ide_exec_command,
586 .read_status = ide_read_status, 590 .read_status = ide_read_status,
587 .read_altstatus = ide_read_altstatus, 591 .read_altstatus = ide_read_altstatus,
588 .read_sff_dma_status = tx4939ide_read_sff_dma_status,
589 592
590 .set_irq = ide_set_irq, 593 .set_irq = ide_set_irq,
591 594
@@ -609,7 +612,6 @@ static const struct ide_tp_ops tx4939ide_tp_ops = {
609 .exec_command = ide_exec_command, 612 .exec_command = ide_exec_command,
610 .read_status = ide_read_status, 613 .read_status = ide_read_status,
611 .read_altstatus = ide_read_altstatus, 614 .read_altstatus = ide_read_altstatus,
612 .read_sff_dma_status = ide_read_sff_dma_status,
613 615
614 .set_irq = ide_set_irq, 616 .set_irq = ide_set_irq,
615 617
@@ -638,6 +640,7 @@ static const struct ide_dma_ops tx4939ide_dma_ops = {
638 .dma_test_irq = tx4939ide_dma_test_irq, 640 .dma_test_irq = tx4939ide_dma_test_irq,
639 .dma_lost_irq = ide_dma_lost_irq, 641 .dma_lost_irq = ide_dma_lost_irq,
640 .dma_timeout = ide_dma_timeout, 642 .dma_timeout = ide_dma_timeout,
643 .dma_sff_read_status = tx4939ide_dma_sff_read_status,
641}; 644};
642 645
643static const struct ide_port_info tx4939ide_port_info __initdata = { 646static const struct ide_port_info tx4939ide_port_info __initdata = {
diff --git a/drivers/ide/umc8672.c b/drivers/ide/umc8672.c
index e29978cf6197..0608d41fb6d0 100644
--- a/drivers/ide/umc8672.c
+++ b/drivers/ide/umc8672.c
@@ -106,22 +106,21 @@ static void umc_set_speeds(u8 speeds[])
106 106
107static void umc_set_pio_mode(ide_drive_t *drive, const u8 pio) 107static void umc_set_pio_mode(ide_drive_t *drive, const u8 pio)
108{ 108{
109 ide_hwif_t *hwif = drive->hwif; 109 ide_hwif_t *hwif = drive->hwif, *mate = hwif->mate;
110 ide_hwgroup_t *mate_hwgroup = hwif->mate ? hwif->mate->hwgroup : NULL;
111 unsigned long uninitialized_var(flags); 110 unsigned long uninitialized_var(flags);
112 111
113 printk("%s: setting umc8672 to PIO mode%d (speed %d)\n", 112 printk("%s: setting umc8672 to PIO mode%d (speed %d)\n",
114 drive->name, pio, pio_to_umc[pio]); 113 drive->name, pio, pio_to_umc[pio]);
115 if (mate_hwgroup) 114 if (mate)
116 spin_lock_irqsave(&mate_hwgroup->lock, flags); 115 spin_lock_irqsave(&mate->lock, flags);
117 if (mate_hwgroup && mate_hwgroup->handler) { 116 if (mate && mate->handler) {
118 printk(KERN_ERR "umc8672: other interface is busy: exiting tune_umc()\n"); 117 printk(KERN_ERR "umc8672: other interface is busy: exiting tune_umc()\n");
119 } else { 118 } else {
120 current_speeds[drive->name[2] - 'a'] = pio_to_umc[pio]; 119 current_speeds[drive->name[2] - 'a'] = pio_to_umc[pio];
121 umc_set_speeds(current_speeds); 120 umc_set_speeds(current_speeds);
122 } 121 }
123 if (mate_hwgroup) 122 if (mate)
124 spin_unlock_irqrestore(&mate_hwgroup->lock, flags); 123 spin_unlock_irqrestore(&mate->lock, flags);
125} 124}
126 125
127static const struct ide_port_ops umc8672_port_ops = { 126static const struct ide_port_ops umc8672_port_ops = {
diff --git a/drivers/ide/via82cxxx.c b/drivers/ide/via82cxxx.c
index 2a812d3207e9..fecc0e03c3fc 100644
--- a/drivers/ide/via82cxxx.c
+++ b/drivers/ide/via82cxxx.c
@@ -178,7 +178,7 @@ static void via_set_drive(ide_drive_t *drive, const u8 speed)
178 ide_timing_merge(&p, &t, &t, IDE_TIMING_8BIT); 178 ide_timing_merge(&p, &t, &t, IDE_TIMING_8BIT);
179 } 179 }
180 180
181 via_set_speed(HWIF(drive), drive->dn, &t); 181 via_set_speed(hwif, drive->dn, &t);
182} 182}
183 183
184/** 184/**
diff --git a/drivers/macintosh/therm_adt746x.c b/drivers/macintosh/therm_adt746x.c
index 22bf981d393b..82607add69a9 100644
--- a/drivers/macintosh/therm_adt746x.c
+++ b/drivers/macintosh/therm_adt746x.c
@@ -554,7 +554,7 @@ thermostat_init(void)
554 const u32 *prop; 554 const u32 *prop;
555 int i = 0, offset = 0; 555 int i = 0, offset = 0;
556 int err; 556 int err;
557 557
558 np = of_find_node_by_name(NULL, "fan"); 558 np = of_find_node_by_name(NULL, "fan");
559 if (!np) 559 if (!np)
560 return -ENODEV; 560 return -ENODEV;
@@ -613,13 +613,13 @@ thermostat_init(void)
613 } 613 }
614 614
615 of_dev = of_platform_device_create(np, "temperatures", NULL); 615 of_dev = of_platform_device_create(np, "temperatures", NULL);
616 616 of_node_put(np);
617
617 if (of_dev == NULL) { 618 if (of_dev == NULL) {
618 printk(KERN_ERR "Can't register temperatures device !\n"); 619 printk(KERN_ERR "Can't register temperatures device !\n");
619 of_node_put(np);
620 return -ENODEV; 620 return -ENODEV;
621 } 621 }
622 622
623 err = device_create_file(&of_dev->dev, &dev_attr_sensor1_temperature); 623 err = device_create_file(&of_dev->dev, &dev_attr_sensor1_temperature);
624 err |= device_create_file(&of_dev->dev, &dev_attr_sensor2_temperature); 624 err |= device_create_file(&of_dev->dev, &dev_attr_sensor2_temperature);
625 err |= device_create_file(&of_dev->dev, &dev_attr_sensor1_limit); 625 err |= device_create_file(&of_dev->dev, &dev_attr_sensor1_limit);
diff --git a/drivers/media/dvb/dvb-core/dvbdev.c b/drivers/media/dvb/dvb-core/dvbdev.c
index 65d69665f1fc..6a32680dbb1b 100644
--- a/drivers/media/dvb/dvb-core/dvbdev.c
+++ b/drivers/media/dvb/dvb-core/dvbdev.c
@@ -79,6 +79,10 @@ static int dvb_device_open(struct inode *inode, struct file *file)
79 file->private_data = dvbdev; 79 file->private_data = dvbdev;
80 old_fops = file->f_op; 80 old_fops = file->f_op;
81 file->f_op = fops_get(dvbdev->fops); 81 file->f_op = fops_get(dvbdev->fops);
82 if (file->f_op == NULL) {
83 file->f_op = old_fops;
84 goto fail;
85 }
82 if(file->f_op->open) 86 if(file->f_op->open)
83 err = file->f_op->open(inode,file); 87 err = file->f_op->open(inode,file);
84 if (err) { 88 if (err) {
@@ -90,6 +94,7 @@ static int dvb_device_open(struct inode *inode, struct file *file)
90 unlock_kernel(); 94 unlock_kernel();
91 return err; 95 return err;
92 } 96 }
97fail:
93 up_read(&minor_rwsem); 98 up_read(&minor_rwsem);
94 unlock_kernel(); 99 unlock_kernel();
95 return -ENODEV; 100 return -ENODEV;
diff --git a/drivers/media/video/v4l1-compat.c b/drivers/media/video/v4l1-compat.c
index d450cab20be4..b617bf05e2d7 100644
--- a/drivers/media/video/v4l1-compat.c
+++ b/drivers/media/video/v4l1-compat.c
@@ -203,7 +203,6 @@ static int poll_one(struct file *file, struct poll_wqueues *pwq)
203 table = &pwq->pt; 203 table = &pwq->pt;
204 for (;;) { 204 for (;;) {
205 int mask; 205 int mask;
206 set_current_state(TASK_INTERRUPTIBLE);
207 mask = file->f_op->poll(file, table); 206 mask = file->f_op->poll(file, table);
208 if (mask & POLLIN) 207 if (mask & POLLIN)
209 break; 208 break;
@@ -212,9 +211,8 @@ static int poll_one(struct file *file, struct poll_wqueues *pwq)
212 retval = -ERESTARTSYS; 211 retval = -ERESTARTSYS;
213 break; 212 break;
214 } 213 }
215 schedule(); 214 poll_schedule(pwq, TASK_INTERRUPTIBLE);
216 } 215 }
217 set_current_state(TASK_RUNNING);
218 poll_freewait(pwq); 216 poll_freewait(pwq);
219 return retval; 217 return retval;
220} 218}
diff --git a/drivers/message/i2o/device.c b/drivers/message/i2o/device.c
index a7dd03e8d332..0ee4264f5db7 100644
--- a/drivers/message/i2o/device.c
+++ b/drivers/message/i2o/device.c
@@ -52,7 +52,6 @@ static inline int i2o_device_issue_claim(struct i2o_device *dev, u32 cmd,
52/** 52/**
53 * i2o_device_claim - claim a device for use by an OSM 53 * i2o_device_claim - claim a device for use by an OSM
54 * @dev: I2O device to claim 54 * @dev: I2O device to claim
55 * @drv: I2O driver which wants to claim the device
56 * 55 *
57 * Do the leg work to assign a device to a given OSM. If the claim succeeds, 56 * Do the leg work to assign a device to a given OSM. If the claim succeeds,
58 * the owner is the primary. If the attempt fails a negative errno code 57 * the owner is the primary. If the attempt fails a negative errno code
@@ -80,7 +79,6 @@ int i2o_device_claim(struct i2o_device *dev)
80/** 79/**
81 * i2o_device_claim_release - release a device that the OSM is using 80 * i2o_device_claim_release - release a device that the OSM is using
82 * @dev: device to release 81 * @dev: device to release
83 * @drv: driver which claimed the device
84 * 82 *
85 * Drop a claim by an OSM on a given I2O device. 83 * Drop a claim by an OSM on a given I2O device.
86 * 84 *
diff --git a/drivers/message/i2o/driver.c b/drivers/message/i2o/driver.c
index e0d474b17433..a0421efe04ca 100644
--- a/drivers/message/i2o/driver.c
+++ b/drivers/message/i2o/driver.c
@@ -173,7 +173,6 @@ void i2o_driver_unregister(struct i2o_driver *drv)
173 * i2o_driver_dispatch - dispatch an I2O reply message 173 * i2o_driver_dispatch - dispatch an I2O reply message
174 * @c: I2O controller of the message 174 * @c: I2O controller of the message
175 * @m: I2O message number 175 * @m: I2O message number
176 * @msg: I2O message to be delivered
177 * 176 *
178 * The reply is delivered to the driver from which the original message 177 * The reply is delivered to the driver from which the original message
179 * was. This function is only called from interrupt context. 178 * was. This function is only called from interrupt context.
diff --git a/drivers/misc/ibmasm/module.c b/drivers/misc/ibmasm/module.c
index b5f6add34b0b..dc14b0b9cbfa 100644
--- a/drivers/misc/ibmasm/module.c
+++ b/drivers/misc/ibmasm/module.c
@@ -104,8 +104,7 @@ static int __devinit ibmasm_init_one(struct pci_dev *pdev, const struct pci_devi
104 } 104 }
105 105
106 sp->irq = pdev->irq; 106 sp->irq = pdev->irq;
107 sp->base_address = ioremap(pci_resource_start(pdev, 0), 107 sp->base_address = pci_ioremap_bar(pdev, 0);
108 pci_resource_len(pdev, 0));
109 if (!sp->base_address) { 108 if (!sp->base_address) {
110 dev_err(sp->dev, "Failed to ioremap pci memory\n"); 109 dev_err(sp->dev, "Failed to ioremap pci memory\n");
111 result = -ENODEV; 110 result = -ENODEV;
diff --git a/drivers/misc/ioc4.c b/drivers/misc/ioc4.c
index 6f76573e7c8a..60b0b1a4fb3a 100644
--- a/drivers/misc/ioc4.c
+++ b/drivers/misc/ioc4.c
@@ -269,6 +269,16 @@ ioc4_variant(struct ioc4_driver_data *idd)
269 return IOC4_VARIANT_PCI_RT; 269 return IOC4_VARIANT_PCI_RT;
270} 270}
271 271
272static void
273ioc4_load_modules(struct work_struct *work)
274{
275 /* arg just has to be freed */
276
277 request_module("sgiioc4");
278
279 kfree(work);
280}
281
272/* Adds a new instance of an IOC4 card */ 282/* Adds a new instance of an IOC4 card */
273static int 283static int
274ioc4_probe(struct pci_dev *pdev, const struct pci_device_id *pci_id) 284ioc4_probe(struct pci_dev *pdev, const struct pci_device_id *pci_id)
@@ -378,6 +388,30 @@ ioc4_probe(struct pci_dev *pdev, const struct pci_device_id *pci_id)
378 } 388 }
379 mutex_unlock(&ioc4_mutex); 389 mutex_unlock(&ioc4_mutex);
380 390
391 /* Request sgiioc4 IDE driver on boards that bring that functionality
392 * off of IOC4. The root filesystem may be hosted on a drive connected
393 * to IOC4, so we need to make sure the sgiioc4 driver is loaded as it
394 * won't be picked up by modprobes due to the ioc4 module owning the
395 * PCI device.
396 */
397 if (idd->idd_variant != IOC4_VARIANT_PCI_RT) {
398 struct work_struct *work;
399 work = kzalloc(sizeof(struct work_struct), GFP_KERNEL);
400 if (!work) {
401 printk(KERN_WARNING
402 "%s: IOC4 unable to allocate memory for "
403 "load of sub-modules.\n", __func__);
404 } else {
405 /* Request the module from a work procedure as the
406 * modprobe goes out to a userland helper and that
407 * will hang if done directly from ioc4_probe().
408 */
409 printk(KERN_INFO "IOC4 loading sgiioc4 submodule\n");
410 INIT_WORK(work, ioc4_load_modules);
411 schedule_work(work);
412 }
413 }
414
381 return 0; 415 return 0;
382 416
383out_misc_region: 417out_misc_region:
@@ -462,6 +496,8 @@ ioc4_init(void)
462static void __devexit 496static void __devexit
463ioc4_exit(void) 497ioc4_exit(void)
464{ 498{
499 /* Ensure ioc4_load_modules() has completed before exiting */
500 flush_scheduled_work();
465 pci_unregister_driver(&ioc4_driver); 501 pci_unregister_driver(&ioc4_driver);
466} 502}
467 503
diff --git a/drivers/misc/tifm_7xx1.c b/drivers/misc/tifm_7xx1.c
index e71eba31decb..be5672a98702 100644
--- a/drivers/misc/tifm_7xx1.c
+++ b/drivers/misc/tifm_7xx1.c
@@ -354,8 +354,7 @@ static int tifm_7xx1_probe(struct pci_dev *dev,
354 fm->has_ms_pif = tifm_7xx1_has_ms_pif; 354 fm->has_ms_pif = tifm_7xx1_has_ms_pif;
355 pci_set_drvdata(dev, fm); 355 pci_set_drvdata(dev, fm);
356 356
357 fm->addr = ioremap(pci_resource_start(dev, 0), 357 fm->addr = pci_ioremap_bar(dev, 0);
358 pci_resource_len(dev, 0));
359 if (!fm->addr) 358 if (!fm->addr)
360 goto err_out_free; 359 goto err_out_free;
361 360
diff --git a/drivers/parport/ieee1284.c b/drivers/parport/ieee1284.c
index ac2a805ac7ea..8901ecf6e037 100644
--- a/drivers/parport/ieee1284.c
+++ b/drivers/parport/ieee1284.c
@@ -84,7 +84,7 @@ int parport_wait_event (struct parport *port, signed long timeout)
84 84
85 add_timer (&timer); 85 add_timer (&timer);
86 ret = down_interruptible (&port->physport->ieee1284.irq); 86 ret = down_interruptible (&port->physport->ieee1284.irq);
87 if (!del_timer (&timer) && !ret) 87 if (!del_timer_sync(&timer) && !ret)
88 /* Timed out. */ 88 /* Timed out. */
89 ret = 1; 89 ret = 1;
90 90
diff --git a/drivers/rapidio/rio-driver.c b/drivers/rapidio/rio-driver.c
index 956d3e79f6aa..addb87cf44d9 100644
--- a/drivers/rapidio/rio-driver.c
+++ b/drivers/rapidio/rio-driver.c
@@ -79,7 +79,6 @@ void rio_dev_put(struct rio_dev *rdev)
79 79
80/** 80/**
81 * rio_device_probe - Tell if a RIO device structure has a matching RIO device id structure 81 * rio_device_probe - Tell if a RIO device structure has a matching RIO device id structure
82 * @id: the RIO device id structure to match against
83 * @dev: the RIO device structure to match against 82 * @dev: the RIO device structure to match against
84 * 83 *
85 * return 0 and set rio_dev->driver when drv claims rio_dev, else error 84 * return 0 and set rio_dev->driver when drv claims rio_dev, else error
diff --git a/drivers/rtc/Kconfig b/drivers/rtc/Kconfig
index 165a81843357..4ad831de41ad 100644
--- a/drivers/rtc/Kconfig
+++ b/drivers/rtc/Kconfig
@@ -35,8 +35,8 @@ config RTC_HCTOSYS_DEVICE
35 default "rtc0" 35 default "rtc0"
36 help 36 help
37 The RTC device that will be used to (re)initialize the system 37 The RTC device that will be used to (re)initialize the system
38 clock, usually rtc0. Initialization is done when the system 38 clock, usually rtc0. Initialization is done when the system
39 starts up, and when it resumes from a low power state. This 39 starts up, and when it resumes from a low power state. This
40 device should record time in UTC, since the kernel won't do 40 device should record time in UTC, since the kernel won't do
41 timezone correction. 41 timezone correction.
42 42
@@ -44,7 +44,7 @@ config RTC_HCTOSYS_DEVICE
44 functions run, so it must usually be statically linked. 44 functions run, so it must usually be statically linked.
45 45
46 This clock should be battery-backed, so that it reads the correct 46 This clock should be battery-backed, so that it reads the correct
47 time when the system boots from a power-off state. Otherwise, your 47 time when the system boots from a power-off state. Otherwise, your
48 system will need an external clock source (like an NTP server). 48 system will need an external clock source (like an NTP server).
49 49
50 If the clock you specify here is not battery backed, it may still 50 If the clock you specify here is not battery backed, it may still
@@ -69,8 +69,7 @@ config RTC_INTF_SYSFS
69 Say yes here if you want to use your RTCs using sysfs interfaces, 69 Say yes here if you want to use your RTCs using sysfs interfaces,
70 /sys/class/rtc/rtc0 through /sys/.../rtcN. 70 /sys/class/rtc/rtc0 through /sys/.../rtcN.
71 71
72 This driver can also be built as a module. If so, the module 72 If unsure, say Y.
73 will be called rtc-sysfs.
74 73
75config RTC_INTF_PROC 74config RTC_INTF_PROC
76 boolean "/proc/driver/rtc (procfs for rtc0)" 75 boolean "/proc/driver/rtc (procfs for rtc0)"
@@ -78,11 +77,10 @@ config RTC_INTF_PROC
78 default RTC_CLASS 77 default RTC_CLASS
79 help 78 help
80 Say yes here if you want to use your first RTC through the proc 79 Say yes here if you want to use your first RTC through the proc
81 interface, /proc/driver/rtc. Other RTCs will not be available 80 interface, /proc/driver/rtc. Other RTCs will not be available
82 through that API. 81 through that API.
83 82
84 This driver can also be built as a module. If so, the module 83 If unsure, say Y.
85 will be called rtc-proc.
86 84
87config RTC_INTF_DEV 85config RTC_INTF_DEV
88 boolean "/dev/rtcN (character devices)" 86 boolean "/dev/rtcN (character devices)"
@@ -90,12 +88,14 @@ config RTC_INTF_DEV
90 help 88 help
91 Say yes here if you want to use your RTCs using the /dev 89 Say yes here if you want to use your RTCs using the /dev
92 interfaces, which "udev" sets up as /dev/rtc0 through 90 interfaces, which "udev" sets up as /dev/rtc0 through
93 /dev/rtcN. You may want to set up a symbolic link so one 91 /dev/rtcN.
94 of these can be accessed as /dev/rtc, which is a name
95 expected by "hwclock" and some other programs.
96 92
97 This driver can also be built as a module. If so, the module 93 You may want to set up a symbolic link so one of these
98 will be called rtc-dev. 94 can be accessed as /dev/rtc, which is a name
95 expected by "hwclock" and some other programs. Recent
96 versions of "udev" are known to set up the symlink for you.
97
98 If unsure, say Y.
99 99
100config RTC_INTF_DEV_UIE_EMUL 100config RTC_INTF_DEV_UIE_EMUL
101 bool "RTC UIE emulation on dev interface" 101 bool "RTC UIE emulation on dev interface"
@@ -132,14 +132,14 @@ config RTC_DRV_DS1307
132 tristate "Dallas/Maxim DS1307/37/38/39/40, ST M41T00" 132 tristate "Dallas/Maxim DS1307/37/38/39/40, ST M41T00"
133 help 133 help
134 If you say yes here you get support for various compatible RTC 134 If you say yes here you get support for various compatible RTC
135 chips (often with battery backup) connected with I2C. This driver 135 chips (often with battery backup) connected with I2C. This driver
136 should handle DS1307, DS1337, DS1338, DS1339, DS1340, ST M41T00, 136 should handle DS1307, DS1337, DS1338, DS1339, DS1340, ST M41T00,
137 and probably other chips. In some cases the RTC must already 137 and probably other chips. In some cases the RTC must already
138 have been initialized (by manufacturing or a bootloader). 138 have been initialized (by manufacturing or a bootloader).
139 139
140 The first seven registers on these chips hold an RTC, and other 140 The first seven registers on these chips hold an RTC, and other
141 registers may add features such as NVRAM, a trickle charger for 141 registers may add features such as NVRAM, a trickle charger for
142 the RTC/NVRAM backup power, and alarms. NVRAM is visible in 142 the RTC/NVRAM backup power, and alarms. NVRAM is visible in
143 sysfs, but other chip features may not be available. 143 sysfs, but other chip features may not be available.
144 144
145 This driver can also be built as a module. If so, the module 145 This driver can also be built as a module. If so, the module
@@ -150,10 +150,10 @@ config RTC_DRV_DS1374
150 depends on RTC_CLASS && I2C 150 depends on RTC_CLASS && I2C
151 help 151 help
152 If you say yes here you get support for Dallas Semiconductor 152 If you say yes here you get support for Dallas Semiconductor
153 DS1374 real-time clock chips. If an interrupt is associated 153 DS1374 real-time clock chips. If an interrupt is associated
154 with the device, the alarm functionality is supported. 154 with the device, the alarm functionality is supported.
155 155
156 This driver can also be built as a module. If so, the module 156 This driver can also be built as a module. If so, the module
157 will be called rtc-ds1374. 157 will be called rtc-ds1374.
158 158
159config RTC_DRV_DS1672 159config RTC_DRV_DS1672
@@ -247,7 +247,7 @@ config RTC_DRV_TWL92330
247 help 247 help
248 If you say yes here you get support for the RTC on the 248 If you say yes here you get support for the RTC on the
249 TWL92330 "Menelaus" power management chip, used with OMAP2 249 TWL92330 "Menelaus" power management chip, used with OMAP2
250 platforms. The support is integrated with the rest of 250 platforms. The support is integrated with the rest of
251 the Menelaus driver; it's not separate module. 251 the Menelaus driver; it's not separate module.
252 252
253config RTC_DRV_TWL4030 253config RTC_DRV_TWL4030
@@ -308,7 +308,7 @@ config RTC_DRV_DS1305
308 tristate "Dallas/Maxim DS1305/DS1306" 308 tristate "Dallas/Maxim DS1305/DS1306"
309 help 309 help
310 Select this driver to get support for the Dallas/Maxim DS1305 310 Select this driver to get support for the Dallas/Maxim DS1305
311 and DS1306 real time clock chips. These support a trickle 311 and DS1306 real time clock chips. These support a trickle
312 charger, alarms, and NVRAM in addition to the clock. 312 charger, alarms, and NVRAM in addition to the clock.
313 313
314 This driver can also be built as a module. If so, the module 314 This driver can also be built as a module. If so, the module
@@ -317,7 +317,8 @@ config RTC_DRV_DS1305
317config RTC_DRV_DS1390 317config RTC_DRV_DS1390
318 tristate "Dallas/Maxim DS1390/93/94" 318 tristate "Dallas/Maxim DS1390/93/94"
319 help 319 help
320 If you say yes here you get support for the DS1390/93/94 chips. 320 If you say yes here you get support for the
321 Dallas/Maxim DS1390/93/94 chips.
321 322
322 This driver only supports the RTC feature, and not other chip 323 This driver only supports the RTC feature, and not other chip
323 features such as alarms and trickle charging. 324 features such as alarms and trickle charging.
@@ -381,7 +382,7 @@ config RTC_DRV_CMOS
381 or LPC bus chips, and so on. 382 or LPC bus chips, and so on.
382 383
383 Your system will need to define the platform device used by 384 Your system will need to define the platform device used by
384 this driver, otherwise it won't be accessible. This means 385 this driver, otherwise it won't be accessible. This means
385 you can safely enable this driver if you don't know whether 386 you can safely enable this driver if you don't know whether
386 or not your board has this kind of hardware. 387 or not your board has this kind of hardware.
387 388
@@ -598,7 +599,7 @@ config RTC_DRV_AT91RM9200
598 depends on ARCH_AT91RM9200 || ARCH_AT91SAM9RL 599 depends on ARCH_AT91RM9200 || ARCH_AT91SAM9RL
599 help 600 help
600 Driver for the internal RTC (Realtime Clock) module found on 601 Driver for the internal RTC (Realtime Clock) module found on
601 Atmel AT91RM9200's and AT91SAM9RL chips. On SAM9RL chips 602 Atmel AT91RM9200's and AT91SAM9RL chips. On SAM9RL chips
602 this is powered by the backup power supply. 603 this is powered by the backup power supply.
603 604
604config RTC_DRV_AT91SAM9 605config RTC_DRV_AT91SAM9
@@ -620,8 +621,8 @@ config RTC_DRV_AT91SAM9_RTT
620 prompt "RTT module Number" if ARCH_AT91SAM9263 621 prompt "RTT module Number" if ARCH_AT91SAM9263
621 depends on RTC_DRV_AT91SAM9 622 depends on RTC_DRV_AT91SAM9
622 help 623 help
623 More than one RTT module is available. You can choose which 624 More than one RTT module is available. You can choose which
624 one will be used as an RTC. The default of zero is normally 625 one will be used as an RTC. The default of zero is normally
625 OK to use, though some systems use that for non-RTC purposes. 626 OK to use, though some systems use that for non-RTC purposes.
626 627
627config RTC_DRV_AT91SAM9_GPBR 628config RTC_DRV_AT91SAM9_GPBR
@@ -633,10 +634,20 @@ config RTC_DRV_AT91SAM9_GPBR
633 depends on RTC_DRV_AT91SAM9 634 depends on RTC_DRV_AT91SAM9
634 help 635 help
635 The RTC driver needs to use one of the General Purpose Backup 636 The RTC driver needs to use one of the General Purpose Backup
636 Registers (GPBRs) as well as the RTT. You can choose which one 637 Registers (GPBRs) as well as the RTT. You can choose which one
637 will be used. The default of zero is normally OK to use, but 638 will be used. The default of zero is normally OK to use, but
638 on some systems other software needs to use that register. 639 on some systems other software needs to use that register.
639 640
641config RTC_DRV_AU1XXX
642 tristate "Au1xxx Counter0 RTC support"
643 depends on SOC_AU1X00
644 help
645 This is a driver for the Au1xxx on-chip Counter0 (Time-Of-Year
646 counter) to be used as a RTC.
647
648 This driver can also be built as a module. If so, the module
649 will be called rtc-au1xxx.
650
640config RTC_DRV_BFIN 651config RTC_DRV_BFIN
641 tristate "Blackfin On-Chip RTC" 652 tristate "Blackfin On-Chip RTC"
642 depends on BLACKFIN && !BF561 653 depends on BLACKFIN && !BF561
@@ -669,6 +680,17 @@ config RTC_DRV_PPC
669 the RTC. This exposes that functionality through the generic RTC 680 the RTC. This exposes that functionality through the generic RTC
670 class. 681 class.
671 682
683config RTC_DRV_PXA
684 tristate "PXA27x/PXA3xx"
685 depends on ARCH_PXA
686 help
687 If you say Y here you will get access to the real time clock
688 built into your PXA27x or PXA3xx CPU.
689
690 This RTC driver uses PXA RTC registers available since pxa27x
691 series (RDxR, RYxR) instead of legacy RCNR, RTAR.
692
693
672config RTC_DRV_SUN4V 694config RTC_DRV_SUN4V
673 bool "SUN4V Hypervisor RTC" 695 bool "SUN4V Hypervisor RTC"
674 depends on SPARC64 696 depends on SPARC64
@@ -683,4 +705,22 @@ config RTC_DRV_STARFIRE
683 If you say Y here you will get support for the RTC found on 705 If you say Y here you will get support for the RTC found on
684 Starfire systems. 706 Starfire systems.
685 707
708config RTC_DRV_TX4939
709 tristate "TX4939 SoC"
710 depends on SOC_TX4939
711 help
712 Driver for the internal RTC (Realtime Clock) module found on
713 Toshiba TX4939 SoC.
714
715config RTC_DRV_MV
716 tristate "Marvell SoC RTC"
717 depends on ARCH_KIRKWOOD
718 help
719 If you say yes here you will get support for the in-chip RTC
720 that can be found in some of Marvell's SoC devices, such as
721 the Kirkwood 88F6281 and 88F6192.
722
723 This driver can also be built as a module. If so, the module
724 will be called rtc-mv.
725
686endif # RTC_CLASS 726endif # RTC_CLASS
diff --git a/drivers/rtc/Makefile b/drivers/rtc/Makefile
index 6e79c912bf9e..9a4340d48f26 100644
--- a/drivers/rtc/Makefile
+++ b/drivers/rtc/Makefile
@@ -20,6 +20,7 @@ rtc-core-$(CONFIG_RTC_INTF_SYSFS) += rtc-sysfs.o
20obj-$(CONFIG_RTC_DRV_AT32AP700X)+= rtc-at32ap700x.o 20obj-$(CONFIG_RTC_DRV_AT32AP700X)+= rtc-at32ap700x.o
21obj-$(CONFIG_RTC_DRV_AT91RM9200)+= rtc-at91rm9200.o 21obj-$(CONFIG_RTC_DRV_AT91RM9200)+= rtc-at91rm9200.o
22obj-$(CONFIG_RTC_DRV_AT91SAM9) += rtc-at91sam9.o 22obj-$(CONFIG_RTC_DRV_AT91SAM9) += rtc-at91sam9.o
23obj-$(CONFIG_RTC_DRV_AU1XXX) += rtc-au1xxx.o
23obj-$(CONFIG_RTC_DRV_BFIN) += rtc-bfin.o 24obj-$(CONFIG_RTC_DRV_BFIN) += rtc-bfin.o
24obj-$(CONFIG_RTC_DRV_CMOS) += rtc-cmos.o 25obj-$(CONFIG_RTC_DRV_CMOS) += rtc-cmos.o
25obj-$(CONFIG_RTC_DRV_DS1216) += rtc-ds1216.o 26obj-$(CONFIG_RTC_DRV_DS1216) += rtc-ds1216.o
@@ -47,6 +48,7 @@ obj-$(CONFIG_RTC_DRV_SUN4V) += rtc-sun4v.o
47obj-$(CONFIG_RTC_DRV_STARFIRE) += rtc-starfire.o 48obj-$(CONFIG_RTC_DRV_STARFIRE) += rtc-starfire.o
48obj-$(CONFIG_RTC_DRV_MAX6900) += rtc-max6900.o 49obj-$(CONFIG_RTC_DRV_MAX6900) += rtc-max6900.o
49obj-$(CONFIG_RTC_DRV_MAX6902) += rtc-max6902.o 50obj-$(CONFIG_RTC_DRV_MAX6902) += rtc-max6902.o
51obj-$(CONFIG_RTC_DRV_MV) += rtc-mv.o
50obj-$(CONFIG_RTC_DRV_OMAP) += rtc-omap.o 52obj-$(CONFIG_RTC_DRV_OMAP) += rtc-omap.o
51obj-$(CONFIG_RTC_DRV_PCF8563) += rtc-pcf8563.o 53obj-$(CONFIG_RTC_DRV_PCF8563) += rtc-pcf8563.o
52obj-$(CONFIG_RTC_DRV_PCF8583) += rtc-pcf8583.o 54obj-$(CONFIG_RTC_DRV_PCF8583) += rtc-pcf8583.o
@@ -54,6 +56,7 @@ obj-$(CONFIG_RTC_DRV_PL030) += rtc-pl030.o
54obj-$(CONFIG_RTC_DRV_PL031) += rtc-pl031.o 56obj-$(CONFIG_RTC_DRV_PL031) += rtc-pl031.o
55obj-$(CONFIG_RTC_DRV_PARISC) += rtc-parisc.o 57obj-$(CONFIG_RTC_DRV_PARISC) += rtc-parisc.o
56obj-$(CONFIG_RTC_DRV_PPC) += rtc-ppc.o 58obj-$(CONFIG_RTC_DRV_PPC) += rtc-ppc.o
59obj-$(CONFIG_RTC_DRV_PXA) += rtc-pxa.o
57obj-$(CONFIG_RTC_DRV_R9701) += rtc-r9701.o 60obj-$(CONFIG_RTC_DRV_R9701) += rtc-r9701.o
58obj-$(CONFIG_RTC_DRV_RS5C313) += rtc-rs5c313.o 61obj-$(CONFIG_RTC_DRV_RS5C313) += rtc-rs5c313.o
59obj-$(CONFIG_RTC_DRV_RS5C348) += rtc-rs5c348.o 62obj-$(CONFIG_RTC_DRV_RS5C348) += rtc-rs5c348.o
@@ -66,6 +69,7 @@ obj-$(CONFIG_RTC_DRV_SH) += rtc-sh.o
66obj-$(CONFIG_RTC_DRV_STK17TA8) += rtc-stk17ta8.o 69obj-$(CONFIG_RTC_DRV_STK17TA8) += rtc-stk17ta8.o
67obj-$(CONFIG_RTC_DRV_TEST) += rtc-test.o 70obj-$(CONFIG_RTC_DRV_TEST) += rtc-test.o
68obj-$(CONFIG_RTC_DRV_TWL4030) += rtc-twl4030.o 71obj-$(CONFIG_RTC_DRV_TWL4030) += rtc-twl4030.o
72obj-$(CONFIG_RTC_DRV_TX4939) += rtc-tx4939.o
69obj-$(CONFIG_RTC_DRV_V3020) += rtc-v3020.o 73obj-$(CONFIG_RTC_DRV_V3020) += rtc-v3020.o
70obj-$(CONFIG_RTC_DRV_VR41XX) += rtc-vr41xx.o 74obj-$(CONFIG_RTC_DRV_VR41XX) += rtc-vr41xx.o
71obj-$(CONFIG_RTC_DRV_WM8350) += rtc-wm8350.o 75obj-$(CONFIG_RTC_DRV_WM8350) += rtc-wm8350.o
diff --git a/drivers/rtc/class.c b/drivers/rtc/class.c
index 4dfdf019fccc..be5a6b73e601 100644
--- a/drivers/rtc/class.c
+++ b/drivers/rtc/class.c
@@ -48,9 +48,7 @@ static int rtc_suspend(struct device *dev, pm_message_t mesg)
48 struct rtc_time tm; 48 struct rtc_time tm;
49 struct timespec ts = current_kernel_time(); 49 struct timespec ts = current_kernel_time();
50 50
51 if (strncmp(rtc->dev.bus_id, 51 if (strcmp(dev_name(&rtc->dev), CONFIG_RTC_HCTOSYS_DEVICE) != 0)
52 CONFIG_RTC_HCTOSYS_DEVICE,
53 BUS_ID_SIZE) != 0)
54 return 0; 52 return 0;
55 53
56 rtc_read_time(rtc, &tm); 54 rtc_read_time(rtc, &tm);
@@ -71,20 +69,18 @@ static int rtc_resume(struct device *dev)
71 time_t newtime; 69 time_t newtime;
72 struct timespec time; 70 struct timespec time;
73 71
74 if (strncmp(rtc->dev.bus_id, 72 if (strcmp(dev_name(&rtc->dev), CONFIG_RTC_HCTOSYS_DEVICE) != 0)
75 CONFIG_RTC_HCTOSYS_DEVICE,
76 BUS_ID_SIZE) != 0)
77 return 0; 73 return 0;
78 74
79 rtc_read_time(rtc, &tm); 75 rtc_read_time(rtc, &tm);
80 if (rtc_valid_tm(&tm) != 0) { 76 if (rtc_valid_tm(&tm) != 0) {
81 pr_debug("%s: bogus resume time\n", rtc->dev.bus_id); 77 pr_debug("%s: bogus resume time\n", dev_name(&rtc->dev));
82 return 0; 78 return 0;
83 } 79 }
84 rtc_tm_to_time(&tm, &newtime); 80 rtc_tm_to_time(&tm, &newtime);
85 if (newtime <= oldtime) { 81 if (newtime <= oldtime) {
86 if (newtime < oldtime) 82 if (newtime < oldtime)
87 pr_debug("%s: time travel!\n", rtc->dev.bus_id); 83 pr_debug("%s: time travel!\n", dev_name(&rtc->dev));
88 return 0; 84 return 0;
89 } 85 }
90 86
@@ -156,7 +152,7 @@ struct rtc_device *rtc_device_register(const char *name, struct device *dev,
156 init_waitqueue_head(&rtc->irq_queue); 152 init_waitqueue_head(&rtc->irq_queue);
157 153
158 strlcpy(rtc->name, name, RTC_DEVICE_NAME_SIZE); 154 strlcpy(rtc->name, name, RTC_DEVICE_NAME_SIZE);
159 snprintf(rtc->dev.bus_id, BUS_ID_SIZE, "rtc%d", id); 155 dev_set_name(&rtc->dev, "rtc%d", id);
160 156
161 rtc_dev_prepare(rtc); 157 rtc_dev_prepare(rtc);
162 158
@@ -169,7 +165,7 @@ struct rtc_device *rtc_device_register(const char *name, struct device *dev,
169 rtc_proc_add_device(rtc); 165 rtc_proc_add_device(rtc);
170 166
171 dev_info(dev, "rtc core: registered %s as %s\n", 167 dev_info(dev, "rtc core: registered %s as %s\n",
172 rtc->name, rtc->dev.bus_id); 168 rtc->name, dev_name(&rtc->dev));
173 169
174 return rtc; 170 return rtc;
175 171
diff --git a/drivers/rtc/interface.c b/drivers/rtc/interface.c
index fd2c652504ff..4348c4b0d453 100644
--- a/drivers/rtc/interface.c
+++ b/drivers/rtc/interface.c
@@ -50,10 +50,15 @@ int rtc_set_time(struct rtc_device *rtc, struct rtc_time *tm)
50 50
51 if (!rtc->ops) 51 if (!rtc->ops)
52 err = -ENODEV; 52 err = -ENODEV;
53 else if (!rtc->ops->set_time) 53 else if (rtc->ops->set_time)
54 err = -EINVAL;
55 else
56 err = rtc->ops->set_time(rtc->dev.parent, tm); 54 err = rtc->ops->set_time(rtc->dev.parent, tm);
55 else if (rtc->ops->set_mmss) {
56 unsigned long secs;
57 err = rtc_tm_to_time(tm, &secs);
58 if (err == 0)
59 err = rtc->ops->set_mmss(rtc->dev.parent, secs);
60 } else
61 err = -EINVAL;
57 62
58 mutex_unlock(&rtc->ops_lock); 63 mutex_unlock(&rtc->ops_lock);
59 return err; 64 return err;
@@ -389,7 +394,7 @@ static int __rtc_match(struct device *dev, void *data)
389{ 394{
390 char *name = (char *)data; 395 char *name = (char *)data;
391 396
392 if (strncmp(dev->bus_id, name, BUS_ID_SIZE) == 0) 397 if (strcmp(dev_name(dev), name) == 0)
393 return 1; 398 return 1;
394 return 0; 399 return 0;
395} 400}
@@ -504,9 +509,6 @@ int rtc_irq_set_freq(struct rtc_device *rtc, struct rtc_task *task, int freq)
504 if (rtc->ops->irq_set_freq == NULL) 509 if (rtc->ops->irq_set_freq == NULL)
505 return -ENXIO; 510 return -ENXIO;
506 511
507 if (!is_power_of_2(freq))
508 return -EINVAL;
509
510 spin_lock_irqsave(&rtc->irq_task_lock, flags); 512 spin_lock_irqsave(&rtc->irq_task_lock, flags);
511 if (rtc->irq_task != NULL && task == NULL) 513 if (rtc->irq_task != NULL && task == NULL)
512 err = -EBUSY; 514 err = -EBUSY;
diff --git a/drivers/rtc/rtc-at32ap700x.c b/drivers/rtc/rtc-at32ap700x.c
index 90b9a6503e15..e1ec33e40e38 100644
--- a/drivers/rtc/rtc-at32ap700x.c
+++ b/drivers/rtc/rtc-at32ap700x.c
@@ -205,7 +205,7 @@ static int __init at32_rtc_probe(struct platform_device *pdev)
205{ 205{
206 struct resource *regs; 206 struct resource *regs;
207 struct rtc_at32ap700x *rtc; 207 struct rtc_at32ap700x *rtc;
208 int irq = -1; 208 int irq;
209 int ret; 209 int ret;
210 210
211 rtc = kzalloc(sizeof(struct rtc_at32ap700x), GFP_KERNEL); 211 rtc = kzalloc(sizeof(struct rtc_at32ap700x), GFP_KERNEL);
@@ -222,7 +222,7 @@ static int __init at32_rtc_probe(struct platform_device *pdev)
222 } 222 }
223 223
224 irq = platform_get_irq(pdev, 0); 224 irq = platform_get_irq(pdev, 0);
225 if (irq < 0) { 225 if (irq <= 0) {
226 dev_dbg(&pdev->dev, "could not get irq\n"); 226 dev_dbg(&pdev->dev, "could not get irq\n");
227 ret = -ENXIO; 227 ret = -ENXIO;
228 goto out; 228 goto out;
diff --git a/drivers/rtc/rtc-au1xxx.c b/drivers/rtc/rtc-au1xxx.c
new file mode 100644
index 000000000000..8906a688e6a6
--- /dev/null
+++ b/drivers/rtc/rtc-au1xxx.c
@@ -0,0 +1,153 @@
1/*
2 * Au1xxx counter0 (aka Time-Of-Year counter) RTC interface driver.
3 *
4 * Copyright (C) 2008 Manuel Lauss <mano@roarinelk.homelinux.net>
5 *
6 * This file is subject to the terms and conditions of the GNU General Public
7 * License. See the file "COPYING" in the main directory of this archive
8 * for more details.
9 */
10
11/* All current Au1xxx SoCs have 2 counters fed by an external 32.768 kHz
12 * crystal. Counter 0, which keeps counting during sleep/powerdown, is
13 * used to count seconds since the beginning of the unix epoch.
14 *
15 * The counters must be configured and enabled by bootloader/board code;
16 * no checks as to whether they really get a proper 32.768kHz clock are
17 * made as this would take far too long.
18 */
19
20#include <linux/module.h>
21#include <linux/kernel.h>
22#include <linux/rtc.h>
23#include <linux/init.h>
24#include <linux/platform_device.h>
25#include <linux/io.h>
26#include <asm/mach-au1x00/au1000.h>
27
28/* 32kHz clock enabled and detected */
29#define CNTR_OK (SYS_CNTRL_E0 | SYS_CNTRL_32S)
30
31static int au1xtoy_rtc_read_time(struct device *dev, struct rtc_time *tm)
32{
33 unsigned long t;
34
35 t = au_readl(SYS_TOYREAD);
36
37 rtc_time_to_tm(t, tm);
38
39 return rtc_valid_tm(tm);
40}
41
42static int au1xtoy_rtc_set_time(struct device *dev, struct rtc_time *tm)
43{
44 unsigned long t;
45
46 rtc_tm_to_time(tm, &t);
47
48 au_writel(t, SYS_TOYWRITE);
49 au_sync();
50
51 /* wait for the pending register write to succeed. This can
52 * take up to 6 seconds...
53 */
54 while (au_readl(SYS_COUNTER_CNTRL) & SYS_CNTRL_C0S)
55 msleep(1);
56
57 return 0;
58}
59
60static struct rtc_class_ops au1xtoy_rtc_ops = {
61 .read_time = au1xtoy_rtc_read_time,
62 .set_time = au1xtoy_rtc_set_time,
63};
64
65static int __devinit au1xtoy_rtc_probe(struct platform_device *pdev)
66{
67 struct rtc_device *rtcdev;
68 unsigned long t;
69 int ret;
70
71 t = au_readl(SYS_COUNTER_CNTRL);
72 if (!(t & CNTR_OK)) {
73 dev_err(&pdev->dev, "counters not working; aborting.\n");
74 ret = -ENODEV;
75 goto out_err;
76 }
77
78 ret = -ETIMEDOUT;
79
80 /* set counter0 tickrate to 1Hz if necessary */
81 if (au_readl(SYS_TOYTRIM) != 32767) {
82 /* wait until hardware gives access to TRIM register */
83 t = 0x00100000;
84 while ((au_readl(SYS_COUNTER_CNTRL) & SYS_CNTRL_T0S) && t--)
85 msleep(1);
86
87 if (!t) {
88 /* timed out waiting for register access; assume
89 * counters are unusable.
90 */
91 dev_err(&pdev->dev, "timeout waiting for access\n");
92 goto out_err;
93 }
94
95 /* set 1Hz TOY tick rate */
96 au_writel(32767, SYS_TOYTRIM);
97 au_sync();
98 }
99
100 /* wait until the hardware allows writes to the counter reg */
101 while (au_readl(SYS_COUNTER_CNTRL) & SYS_CNTRL_C0S)
102 msleep(1);
103
104 rtcdev = rtc_device_register("rtc-au1xxx", &pdev->dev,
105 &au1xtoy_rtc_ops, THIS_MODULE);
106 if (IS_ERR(rtcdev)) {
107 ret = PTR_ERR(rtcdev);
108 goto out_err;
109 }
110
111 platform_set_drvdata(pdev, rtcdev);
112
113 return 0;
114
115out_err:
116 return ret;
117}
118
119static int __devexit au1xtoy_rtc_remove(struct platform_device *pdev)
120{
121 struct rtc_device *rtcdev = platform_get_drvdata(pdev);
122
123 rtc_device_unregister(rtcdev);
124 platform_set_drvdata(pdev, NULL);
125
126 return 0;
127}
128
129static struct platform_driver au1xrtc_driver = {
130 .driver = {
131 .name = "rtc-au1xxx",
132 .owner = THIS_MODULE,
133 },
134 .remove = __devexit_p(au1xtoy_rtc_remove),
135};
136
137static int __init au1xtoy_rtc_init(void)
138{
139 return platform_driver_probe(&au1xrtc_driver, au1xtoy_rtc_probe);
140}
141
142static void __exit au1xtoy_rtc_exit(void)
143{
144 platform_driver_unregister(&au1xrtc_driver);
145}
146
147module_init(au1xtoy_rtc_init);
148module_exit(au1xtoy_rtc_exit);
149
150MODULE_DESCRIPTION("Au1xxx TOY-counter-based RTC driver");
151MODULE_AUTHOR("Manuel Lauss <manuel.lauss@gmail.com>");
152MODULE_LICENSE("GPL");
153MODULE_ALIAS("platform:rtc-au1xxx");
diff --git a/drivers/rtc/rtc-bfin.c b/drivers/rtc/rtc-bfin.c
index 34439ce3967e..aafd3e6ebb0d 100644
--- a/drivers/rtc/rtc-bfin.c
+++ b/drivers/rtc/rtc-bfin.c
@@ -390,7 +390,7 @@ static int __devinit bfin_rtc_probe(struct platform_device *pdev)
390 390
391 /* Register our RTC with the RTC framework */ 391 /* Register our RTC with the RTC framework */
392 rtc->rtc_dev = rtc_device_register(pdev->name, dev, &bfin_rtc_ops, THIS_MODULE); 392 rtc->rtc_dev = rtc_device_register(pdev->name, dev, &bfin_rtc_ops, THIS_MODULE);
393 if (unlikely(IS_ERR(rtc))) { 393 if (unlikely(IS_ERR(rtc->rtc_dev))) {
394 ret = PTR_ERR(rtc->rtc_dev); 394 ret = PTR_ERR(rtc->rtc_dev);
395 goto err_irq; 395 goto err_irq;
396 } 396 }
diff --git a/drivers/rtc/rtc-cmos.c b/drivers/rtc/rtc-cmos.c
index 6cf8e282338f..b6d35f50e404 100644
--- a/drivers/rtc/rtc-cmos.c
+++ b/drivers/rtc/rtc-cmos.c
@@ -35,6 +35,7 @@
35#include <linux/spinlock.h> 35#include <linux/spinlock.h>
36#include <linux/platform_device.h> 36#include <linux/platform_device.h>
37#include <linux/mod_devicetable.h> 37#include <linux/mod_devicetable.h>
38#include <linux/log2.h>
38 39
39/* this is for "generic access to PC-style RTC" using CMOS_READ/CMOS_WRITE */ 40/* this is for "generic access to PC-style RTC" using CMOS_READ/CMOS_WRITE */
40#include <asm-generic/rtc.h> 41#include <asm-generic/rtc.h>
@@ -58,7 +59,7 @@ struct cmos_rtc {
58}; 59};
59 60
60/* both platform and pnp busses use negative numbers for invalid irqs */ 61/* both platform and pnp busses use negative numbers for invalid irqs */
61#define is_valid_irq(n) ((n) >= 0) 62#define is_valid_irq(n) ((n) > 0)
62 63
63static const char driver_name[] = "rtc_cmos"; 64static const char driver_name[] = "rtc_cmos";
64 65
@@ -384,6 +385,8 @@ static int cmos_irq_set_freq(struct device *dev, int freq)
384 if (!is_valid_irq(cmos->irq)) 385 if (!is_valid_irq(cmos->irq))
385 return -ENXIO; 386 return -ENXIO;
386 387
388 if (!is_power_of_2(freq))
389 return -EINVAL;
387 /* 0 = no irqs; 1 = 2^15 Hz ... 15 = 2^0 Hz */ 390 /* 0 = no irqs; 1 = 2^15 Hz ... 15 = 2^0 Hz */
388 f = ffs(freq); 391 f = ffs(freq);
389 if (f-- > 16) 392 if (f-- > 16)
@@ -729,7 +732,7 @@ cmos_do_probe(struct device *dev, struct resource *ports, int rtc_irq)
729 732
730 cmos_rtc.dev = dev; 733 cmos_rtc.dev = dev;
731 dev_set_drvdata(dev, &cmos_rtc); 734 dev_set_drvdata(dev, &cmos_rtc);
732 rename_region(ports, cmos_rtc.rtc->dev.bus_id); 735 rename_region(ports, dev_name(&cmos_rtc.rtc->dev));
733 736
734 spin_lock_irq(&rtc_lock); 737 spin_lock_irq(&rtc_lock);
735 738
@@ -777,7 +780,7 @@ cmos_do_probe(struct device *dev, struct resource *ports, int rtc_irq)
777 rtc_cmos_int_handler = cmos_interrupt; 780 rtc_cmos_int_handler = cmos_interrupt;
778 781
779 retval = request_irq(rtc_irq, rtc_cmos_int_handler, 782 retval = request_irq(rtc_irq, rtc_cmos_int_handler,
780 IRQF_DISABLED, cmos_rtc.rtc->dev.bus_id, 783 IRQF_DISABLED, dev_name(&cmos_rtc.rtc->dev),
781 cmos_rtc.rtc); 784 cmos_rtc.rtc);
782 if (retval < 0) { 785 if (retval < 0) {
783 dev_dbg(dev, "IRQ %d is already in use\n", rtc_irq); 786 dev_dbg(dev, "IRQ %d is already in use\n", rtc_irq);
@@ -795,7 +798,7 @@ cmos_do_probe(struct device *dev, struct resource *ports, int rtc_irq)
795 } 798 }
796 799
797 pr_info("%s: alarms up to one %s%s, %zd bytes nvram%s\n", 800 pr_info("%s: alarms up to one %s%s, %zd bytes nvram%s\n",
798 cmos_rtc.rtc->dev.bus_id, 801 dev_name(&cmos_rtc.rtc->dev),
799 is_valid_irq(rtc_irq) 802 is_valid_irq(rtc_irq)
800 ? (cmos_rtc.mon_alrm 803 ? (cmos_rtc.mon_alrm
801 ? "year" 804 ? "year"
@@ -885,7 +888,7 @@ static int cmos_suspend(struct device *dev, pm_message_t mesg)
885 } 888 }
886 889
887 pr_debug("%s: suspend%s, ctrl %02x\n", 890 pr_debug("%s: suspend%s, ctrl %02x\n",
888 cmos_rtc.rtc->dev.bus_id, 891 dev_name(&cmos_rtc.rtc->dev),
889 (tmp & RTC_AIE) ? ", alarm may wake" : "", 892 (tmp & RTC_AIE) ? ", alarm may wake" : "",
890 tmp); 893 tmp);
891 894
@@ -941,7 +944,7 @@ static int cmos_resume(struct device *dev)
941 } 944 }
942 945
943 pr_debug("%s: resume, ctrl %02x\n", 946 pr_debug("%s: resume, ctrl %02x\n",
944 cmos_rtc.rtc->dev.bus_id, 947 dev_name(&cmos_rtc.rtc->dev),
945 tmp); 948 tmp);
946 949
947 return 0; 950 return 0;
diff --git a/drivers/rtc/rtc-ds1216.c b/drivers/rtc/rtc-ds1216.c
index 9a234a4ec06d..4aedc705518c 100644
--- a/drivers/rtc/rtc-ds1216.c
+++ b/drivers/rtc/rtc-ds1216.c
@@ -10,7 +10,7 @@
10#include <linux/platform_device.h> 10#include <linux/platform_device.h>
11#include <linux/bcd.h> 11#include <linux/bcd.h>
12 12
13#define DRV_VERSION "0.1" 13#define DRV_VERSION "0.2"
14 14
15struct ds1216_regs { 15struct ds1216_regs {
16 u8 tsec; 16 u8 tsec;
@@ -101,7 +101,8 @@ static int ds1216_rtc_read_time(struct device *dev, struct rtc_time *tm)
101 tm->tm_year = bcd2bin(regs.year); 101 tm->tm_year = bcd2bin(regs.year);
102 if (tm->tm_year < 70) 102 if (tm->tm_year < 70)
103 tm->tm_year += 100; 103 tm->tm_year += 100;
104 return 0; 104
105 return rtc_valid_tm(tm);
105} 106}
106 107
107static int ds1216_rtc_set_time(struct device *dev, struct rtc_time *tm) 108static int ds1216_rtc_set_time(struct device *dev, struct rtc_time *tm)
@@ -138,9 +139,8 @@ static const struct rtc_class_ops ds1216_rtc_ops = {
138 .set_time = ds1216_rtc_set_time, 139 .set_time = ds1216_rtc_set_time,
139}; 140};
140 141
141static int __devinit ds1216_rtc_probe(struct platform_device *pdev) 142static int __init ds1216_rtc_probe(struct platform_device *pdev)
142{ 143{
143 struct rtc_device *rtc;
144 struct resource *res; 144 struct resource *res;
145 struct ds1216_priv *priv; 145 struct ds1216_priv *priv;
146 int ret = 0; 146 int ret = 0;
@@ -152,7 +152,10 @@ static int __devinit ds1216_rtc_probe(struct platform_device *pdev)
152 priv = kzalloc(sizeof *priv, GFP_KERNEL); 152 priv = kzalloc(sizeof *priv, GFP_KERNEL);
153 if (!priv) 153 if (!priv)
154 return -ENOMEM; 154 return -ENOMEM;
155 priv->size = res->end - res->start + 1; 155
156 platform_set_drvdata(pdev, priv);
157
158 priv->size = resource_size(res);
156 if (!request_mem_region(res->start, priv->size, pdev->name)) { 159 if (!request_mem_region(res->start, priv->size, pdev->name)) {
157 ret = -EBUSY; 160 ret = -EBUSY;
158 goto out; 161 goto out;
@@ -163,22 +166,18 @@ static int __devinit ds1216_rtc_probe(struct platform_device *pdev)
163 ret = -ENOMEM; 166 ret = -ENOMEM;
164 goto out; 167 goto out;
165 } 168 }
166 rtc = rtc_device_register("ds1216", &pdev->dev, 169 priv->rtc = rtc_device_register("ds1216", &pdev->dev,
167 &ds1216_rtc_ops, THIS_MODULE); 170 &ds1216_rtc_ops, THIS_MODULE);
168 if (IS_ERR(rtc)) { 171 if (IS_ERR(priv->rtc)) {
169 ret = PTR_ERR(rtc); 172 ret = PTR_ERR(priv->rtc);
170 goto out; 173 goto out;
171 } 174 }
172 priv->rtc = rtc;
173 platform_set_drvdata(pdev, priv);
174 175
175 /* dummy read to get clock into a known state */ 176 /* dummy read to get clock into a known state */
176 ds1216_read(priv->ioaddr, dummy); 177 ds1216_read(priv->ioaddr, dummy);
177 return 0; 178 return 0;
178 179
179out: 180out:
180 if (priv->rtc)
181 rtc_device_unregister(priv->rtc);
182 if (priv->ioaddr) 181 if (priv->ioaddr)
183 iounmap(priv->ioaddr); 182 iounmap(priv->ioaddr);
184 if (priv->baseaddr) 183 if (priv->baseaddr)
@@ -187,7 +186,7 @@ out:
187 return ret; 186 return ret;
188} 187}
189 188
190static int __devexit ds1216_rtc_remove(struct platform_device *pdev) 189static int __exit ds1216_rtc_remove(struct platform_device *pdev)
191{ 190{
192 struct ds1216_priv *priv = platform_get_drvdata(pdev); 191 struct ds1216_priv *priv = platform_get_drvdata(pdev);
193 192
@@ -203,13 +202,12 @@ static struct platform_driver ds1216_rtc_platform_driver = {
203 .name = "rtc-ds1216", 202 .name = "rtc-ds1216",
204 .owner = THIS_MODULE, 203 .owner = THIS_MODULE,
205 }, 204 },
206 .probe = ds1216_rtc_probe, 205 .remove = __exit_p(ds1216_rtc_remove),
207 .remove = __devexit_p(ds1216_rtc_remove),
208}; 206};
209 207
210static int __init ds1216_rtc_init(void) 208static int __init ds1216_rtc_init(void)
211{ 209{
212 return platform_driver_register(&ds1216_rtc_platform_driver); 210 return platform_driver_probe(&ds1216_rtc_platform_driver, ds1216_rtc_probe);
213} 211}
214 212
215static void __exit ds1216_rtc_exit(void) 213static void __exit ds1216_rtc_exit(void)
diff --git a/drivers/rtc/rtc-ds1390.c b/drivers/rtc/rtc-ds1390.c
index 599e976bf014..e54b5c619bdf 100644
--- a/drivers/rtc/rtc-ds1390.c
+++ b/drivers/rtc/rtc-ds1390.c
@@ -1,5 +1,5 @@
1/* 1/*
2 * rtc-ds1390.c -- driver for DS1390/93/94 2 * rtc-ds1390.c -- driver for the Dallas/Maxim DS1390/93/94 SPI RTC
3 * 3 *
4 * Copyright (C) 2008 Mercury IMC Ltd 4 * Copyright (C) 2008 Mercury IMC Ltd
5 * Written by Mark Jackson <mpfj@mimc.co.uk> 5 * Written by Mark Jackson <mpfj@mimc.co.uk>
@@ -8,11 +8,13 @@
8 * it under the terms of the GNU General Public License version 2 as 8 * it under the terms of the GNU General Public License version 2 as
9 * published by the Free Software Foundation. 9 * published by the Free Software Foundation.
10 * 10 *
11 * NOTE : Currently this driver only supports the bare minimum for read 11 * NOTE: Currently this driver only supports the bare minimum for read
12 * and write the RTC. The extra features provided by the chip family 12 * and write the RTC. The extra features provided by the chip family
13 * (alarms, trickle charger, different control registers) are unavailable. 13 * (alarms, trickle charger, different control registers) are unavailable.
14 */ 14 */
15 15
16#include <linux/init.h>
17#include <linux/module.h>
16#include <linux/platform_device.h> 18#include <linux/platform_device.h>
17#include <linux/rtc.h> 19#include <linux/rtc.h>
18#include <linux/spi/spi.h> 20#include <linux/spi/spi.h>
@@ -42,20 +44,6 @@ struct ds1390 {
42 u8 txrx_buf[9]; /* cmd + 8 registers */ 44 u8 txrx_buf[9]; /* cmd + 8 registers */
43}; 45};
44 46
45static void ds1390_set_reg(struct device *dev, unsigned char address,
46 unsigned char data)
47{
48 struct spi_device *spi = to_spi_device(dev);
49 struct ds1390 *chip = dev_get_drvdata(dev);
50
51 /* Set MSB to indicate write */
52 chip->txrx_buf[0] = address | 0x80;
53 chip->txrx_buf[1] = data;
54
55 /* do the i/o */
56 spi_write_then_read(spi, chip->txrx_buf, 2, NULL, 0);
57}
58
59static int ds1390_get_reg(struct device *dev, unsigned char address, 47static int ds1390_get_reg(struct device *dev, unsigned char address,
60 unsigned char *data) 48 unsigned char *data)
61{ 49{
@@ -78,7 +66,7 @@ static int ds1390_get_reg(struct device *dev, unsigned char address,
78 return 0; 66 return 0;
79} 67}
80 68
81static int ds1390_get_datetime(struct device *dev, struct rtc_time *dt) 69static int ds1390_read_time(struct device *dev, struct rtc_time *dt)
82{ 70{
83 struct spi_device *spi = to_spi_device(dev); 71 struct spi_device *spi = to_spi_device(dev);
84 struct ds1390 *chip = dev_get_drvdata(dev); 72 struct ds1390 *chip = dev_get_drvdata(dev);
@@ -107,7 +95,7 @@ static int ds1390_get_datetime(struct device *dev, struct rtc_time *dt)
107 return rtc_valid_tm(dt); 95 return rtc_valid_tm(dt);
108} 96}
109 97
110static int ds1390_set_datetime(struct device *dev, struct rtc_time *dt) 98static int ds1390_set_time(struct device *dev, struct rtc_time *dt)
111{ 99{
112 struct spi_device *spi = to_spi_device(dev); 100 struct spi_device *spi = to_spi_device(dev);
113 struct ds1390 *chip = dev_get_drvdata(dev); 101 struct ds1390 *chip = dev_get_drvdata(dev);
@@ -127,16 +115,6 @@ static int ds1390_set_datetime(struct device *dev, struct rtc_time *dt)
127 return spi_write_then_read(spi, chip->txrx_buf, 8, NULL, 0); 115 return spi_write_then_read(spi, chip->txrx_buf, 8, NULL, 0);
128} 116}
129 117
130static int ds1390_read_time(struct device *dev, struct rtc_time *tm)
131{
132 return ds1390_get_datetime(dev, tm);
133}
134
135static int ds1390_set_time(struct device *dev, struct rtc_time *tm)
136{
137 return ds1390_set_datetime(dev, tm);
138}
139
140static const struct rtc_class_ops ds1390_rtc_ops = { 118static const struct rtc_class_ops ds1390_rtc_ops = {
141 .read_time = ds1390_read_time, 119 .read_time = ds1390_read_time,
142 .set_time = ds1390_set_time, 120 .set_time = ds1390_set_time,
@@ -149,46 +127,40 @@ static int __devinit ds1390_probe(struct spi_device *spi)
149 struct ds1390 *chip; 127 struct ds1390 *chip;
150 int res; 128 int res;
151 129
152 printk(KERN_DEBUG "DS1390 SPI RTC driver\n");
153
154 rtc = rtc_device_register("ds1390",
155 &spi->dev, &ds1390_rtc_ops, THIS_MODULE);
156 if (IS_ERR(rtc)) {
157 printk(KERN_ALERT "RTC : unable to register device\n");
158 return PTR_ERR(rtc);
159 }
160
161 spi->mode = SPI_MODE_3; 130 spi->mode = SPI_MODE_3;
162 spi->bits_per_word = 8; 131 spi->bits_per_word = 8;
163 spi_setup(spi); 132 spi_setup(spi);
164 133
165 chip = kzalloc(sizeof *chip, GFP_KERNEL); 134 chip = kzalloc(sizeof *chip, GFP_KERNEL);
166 if (!chip) { 135 if (!chip) {
167 printk(KERN_ALERT "RTC : unable to allocate device memory\n"); 136 dev_err(&spi->dev, "unable to allocate device memory\n");
168 rtc_device_unregister(rtc);
169 return -ENOMEM; 137 return -ENOMEM;
170 } 138 }
171 chip->rtc = rtc;
172 dev_set_drvdata(&spi->dev, chip); 139 dev_set_drvdata(&spi->dev, chip);
173 140
174 res = ds1390_get_reg(&spi->dev, DS1390_REG_SECONDS, &tmp); 141 res = ds1390_get_reg(&spi->dev, DS1390_REG_SECONDS, &tmp);
175 if (res) { 142 if (res != 0) {
176 printk(KERN_ALERT "RTC : unable to read device\n"); 143 dev_err(&spi->dev, "unable to read device\n");
177 rtc_device_unregister(rtc); 144 kfree(chip);
178 return res; 145 return res;
179 } 146 }
180 147
181 return 0; 148 chip->rtc = rtc_device_register("ds1390",
149 &spi->dev, &ds1390_rtc_ops, THIS_MODULE);
150 if (IS_ERR(chip->rtc)) {
151 dev_err(&spi->dev, "unable to register device\n");
152 res = PTR_ERR(chip->rtc);
153 kfree(chip);
154 }
155
156 return res;
182} 157}
183 158
184static int __devexit ds1390_remove(struct spi_device *spi) 159static int __devexit ds1390_remove(struct spi_device *spi)
185{ 160{
186 struct ds1390 *chip = platform_get_drvdata(spi); 161 struct ds1390 *chip = platform_get_drvdata(spi);
187 struct rtc_device *rtc = chip->rtc;
188
189 if (rtc)
190 rtc_device_unregister(rtc);
191 162
163 rtc_device_unregister(chip->rtc);
192 kfree(chip); 164 kfree(chip);
193 165
194 return 0; 166 return 0;
@@ -215,6 +187,6 @@ static __exit void ds1390_exit(void)
215} 187}
216module_exit(ds1390_exit); 188module_exit(ds1390_exit);
217 189
218MODULE_DESCRIPTION("DS1390/93/94 SPI RTC driver"); 190MODULE_DESCRIPTION("Dallas/Maxim DS1390/93/94 SPI RTC driver");
219MODULE_AUTHOR("Mark Jackson <mpfj@mimc.co.uk>"); 191MODULE_AUTHOR("Mark Jackson <mpfj@mimc.co.uk>");
220MODULE_LICENSE("GPL"); 192MODULE_LICENSE("GPL");
diff --git a/drivers/rtc/rtc-ds1511.c b/drivers/rtc/rtc-ds1511.c
index 25caada78398..23a07fe15a2c 100644
--- a/drivers/rtc/rtc-ds1511.c
+++ b/drivers/rtc/rtc-ds1511.c
@@ -326,9 +326,9 @@ ds1511_rtc_set_alarm(struct device *dev, struct rtc_wkalrm *alrm)
326 struct platform_device *pdev = to_platform_device(dev); 326 struct platform_device *pdev = to_platform_device(dev);
327 struct rtc_plat_data *pdata = platform_get_drvdata(pdev); 327 struct rtc_plat_data *pdata = platform_get_drvdata(pdev);
328 328
329 if (pdata->irq < 0) { 329 if (pdata->irq <= 0)
330 return -EINVAL; 330 return -EINVAL;
331 } 331
332 pdata->alrm_mday = alrm->time.tm_mday; 332 pdata->alrm_mday = alrm->time.tm_mday;
333 pdata->alrm_hour = alrm->time.tm_hour; 333 pdata->alrm_hour = alrm->time.tm_hour;
334 pdata->alrm_min = alrm->time.tm_min; 334 pdata->alrm_min = alrm->time.tm_min;
@@ -346,9 +346,9 @@ ds1511_rtc_read_alarm(struct device *dev, struct rtc_wkalrm *alrm)
346 struct platform_device *pdev = to_platform_device(dev); 346 struct platform_device *pdev = to_platform_device(dev);
347 struct rtc_plat_data *pdata = platform_get_drvdata(pdev); 347 struct rtc_plat_data *pdata = platform_get_drvdata(pdev);
348 348
349 if (pdata->irq < 0) { 349 if (pdata->irq <= 0)
350 return -EINVAL; 350 return -EINVAL;
351 } 351
352 alrm->time.tm_mday = pdata->alrm_mday < 0 ? 0 : pdata->alrm_mday; 352 alrm->time.tm_mday = pdata->alrm_mday < 0 ? 0 : pdata->alrm_mday;
353 alrm->time.tm_hour = pdata->alrm_hour < 0 ? 0 : pdata->alrm_hour; 353 alrm->time.tm_hour = pdata->alrm_hour < 0 ? 0 : pdata->alrm_hour;
354 alrm->time.tm_min = pdata->alrm_min < 0 ? 0 : pdata->alrm_min; 354 alrm->time.tm_min = pdata->alrm_min < 0 ? 0 : pdata->alrm_min;
@@ -385,7 +385,7 @@ ds1511_rtc_ioctl(struct device *dev, unsigned int cmd, unsigned long arg)
385 struct platform_device *pdev = to_platform_device(dev); 385 struct platform_device *pdev = to_platform_device(dev);
386 struct rtc_plat_data *pdata = platform_get_drvdata(pdev); 386 struct rtc_plat_data *pdata = platform_get_drvdata(pdev);
387 387
388 if (pdata->irq < 0) { 388 if (pdata->irq <= 0) {
389 return -ENOIOCTLCMD; /* fall back into rtc-dev's emulation */ 389 return -ENOIOCTLCMD; /* fall back into rtc-dev's emulation */
390 } 390 }
391 switch (cmd) { 391 switch (cmd) {
@@ -503,7 +503,6 @@ ds1511_rtc_probe(struct platform_device *pdev)
503 if (!pdata) { 503 if (!pdata) {
504 return -ENOMEM; 504 return -ENOMEM;
505 } 505 }
506 pdata->irq = -1;
507 pdata->size = res->end - res->start + 1; 506 pdata->size = res->end - res->start + 1;
508 if (!request_mem_region(res->start, pdata->size, pdev->name)) { 507 if (!request_mem_region(res->start, pdata->size, pdev->name)) {
509 ret = -EBUSY; 508 ret = -EBUSY;
@@ -545,13 +544,13 @@ ds1511_rtc_probe(struct platform_device *pdev)
545 * if the platform has an interrupt in mind for this device, 544 * if the platform has an interrupt in mind for this device,
546 * then by all means, set it 545 * then by all means, set it
547 */ 546 */
548 if (pdata->irq >= 0) { 547 if (pdata->irq > 0) {
549 rtc_read(RTC_CMD1); 548 rtc_read(RTC_CMD1);
550 if (request_irq(pdata->irq, ds1511_interrupt, 549 if (request_irq(pdata->irq, ds1511_interrupt,
551 IRQF_DISABLED | IRQF_SHARED, pdev->name, pdev) < 0) { 550 IRQF_DISABLED | IRQF_SHARED, pdev->name, pdev) < 0) {
552 551
553 dev_warn(&pdev->dev, "interrupt not available.\n"); 552 dev_warn(&pdev->dev, "interrupt not available.\n");
554 pdata->irq = -1; 553 pdata->irq = 0;
555 } 554 }
556 } 555 }
557 556
@@ -572,7 +571,7 @@ ds1511_rtc_probe(struct platform_device *pdev)
572 if (pdata->rtc) { 571 if (pdata->rtc) {
573 rtc_device_unregister(pdata->rtc); 572 rtc_device_unregister(pdata->rtc);
574 } 573 }
575 if (pdata->irq >= 0) { 574 if (pdata->irq > 0) {
576 free_irq(pdata->irq, pdev); 575 free_irq(pdata->irq, pdev);
577 } 576 }
578 if (ds1511_base) { 577 if (ds1511_base) {
@@ -595,7 +594,7 @@ ds1511_rtc_remove(struct platform_device *pdev)
595 sysfs_remove_bin_file(&pdev->dev.kobj, &ds1511_nvram_attr); 594 sysfs_remove_bin_file(&pdev->dev.kobj, &ds1511_nvram_attr);
596 rtc_device_unregister(pdata->rtc); 595 rtc_device_unregister(pdata->rtc);
597 pdata->rtc = NULL; 596 pdata->rtc = NULL;
598 if (pdata->irq >= 0) { 597 if (pdata->irq > 0) {
599 /* 598 /*
600 * disable the alarm interrupt 599 * disable the alarm interrupt
601 */ 600 */
diff --git a/drivers/rtc/rtc-ds1553.c b/drivers/rtc/rtc-ds1553.c
index b9475cd20210..38d472b63406 100644
--- a/drivers/rtc/rtc-ds1553.c
+++ b/drivers/rtc/rtc-ds1553.c
@@ -162,7 +162,7 @@ static int ds1553_rtc_set_alarm(struct device *dev, struct rtc_wkalrm *alrm)
162 struct platform_device *pdev = to_platform_device(dev); 162 struct platform_device *pdev = to_platform_device(dev);
163 struct rtc_plat_data *pdata = platform_get_drvdata(pdev); 163 struct rtc_plat_data *pdata = platform_get_drvdata(pdev);
164 164
165 if (pdata->irq < 0) 165 if (pdata->irq <= 0)
166 return -EINVAL; 166 return -EINVAL;
167 pdata->alrm_mday = alrm->time.tm_mday; 167 pdata->alrm_mday = alrm->time.tm_mday;
168 pdata->alrm_hour = alrm->time.tm_hour; 168 pdata->alrm_hour = alrm->time.tm_hour;
@@ -179,7 +179,7 @@ static int ds1553_rtc_read_alarm(struct device *dev, struct rtc_wkalrm *alrm)
179 struct platform_device *pdev = to_platform_device(dev); 179 struct platform_device *pdev = to_platform_device(dev);
180 struct rtc_plat_data *pdata = platform_get_drvdata(pdev); 180 struct rtc_plat_data *pdata = platform_get_drvdata(pdev);
181 181
182 if (pdata->irq < 0) 182 if (pdata->irq <= 0)
183 return -EINVAL; 183 return -EINVAL;
184 alrm->time.tm_mday = pdata->alrm_mday < 0 ? 0 : pdata->alrm_mday; 184 alrm->time.tm_mday = pdata->alrm_mday < 0 ? 0 : pdata->alrm_mday;
185 alrm->time.tm_hour = pdata->alrm_hour < 0 ? 0 : pdata->alrm_hour; 185 alrm->time.tm_hour = pdata->alrm_hour < 0 ? 0 : pdata->alrm_hour;
@@ -213,7 +213,7 @@ static int ds1553_rtc_ioctl(struct device *dev, unsigned int cmd,
213 struct platform_device *pdev = to_platform_device(dev); 213 struct platform_device *pdev = to_platform_device(dev);
214 struct rtc_plat_data *pdata = platform_get_drvdata(pdev); 214 struct rtc_plat_data *pdata = platform_get_drvdata(pdev);
215 215
216 if (pdata->irq < 0) 216 if (pdata->irq <= 0)
217 return -ENOIOCTLCMD; /* fall back into rtc-dev's emulation */ 217 return -ENOIOCTLCMD; /* fall back into rtc-dev's emulation */
218 switch (cmd) { 218 switch (cmd) {
219 case RTC_AIE_OFF: 219 case RTC_AIE_OFF:
@@ -301,7 +301,6 @@ static int __devinit ds1553_rtc_probe(struct platform_device *pdev)
301 pdata = kzalloc(sizeof(*pdata), GFP_KERNEL); 301 pdata = kzalloc(sizeof(*pdata), GFP_KERNEL);
302 if (!pdata) 302 if (!pdata)
303 return -ENOMEM; 303 return -ENOMEM;
304 pdata->irq = -1;
305 if (!request_mem_region(res->start, RTC_REG_SIZE, pdev->name)) { 304 if (!request_mem_region(res->start, RTC_REG_SIZE, pdev->name)) {
306 ret = -EBUSY; 305 ret = -EBUSY;
307 goto out; 306 goto out;
@@ -327,13 +326,13 @@ static int __devinit ds1553_rtc_probe(struct platform_device *pdev)
327 if (readb(ioaddr + RTC_FLAGS) & RTC_FLAGS_BLF) 326 if (readb(ioaddr + RTC_FLAGS) & RTC_FLAGS_BLF)
328 dev_warn(&pdev->dev, "voltage-low detected.\n"); 327 dev_warn(&pdev->dev, "voltage-low detected.\n");
329 328
330 if (pdata->irq >= 0) { 329 if (pdata->irq > 0) {
331 writeb(0, ioaddr + RTC_INTERRUPTS); 330 writeb(0, ioaddr + RTC_INTERRUPTS);
332 if (request_irq(pdata->irq, ds1553_rtc_interrupt, 331 if (request_irq(pdata->irq, ds1553_rtc_interrupt,
333 IRQF_DISABLED | IRQF_SHARED, 332 IRQF_DISABLED | IRQF_SHARED,
334 pdev->name, pdev) < 0) { 333 pdev->name, pdev) < 0) {
335 dev_warn(&pdev->dev, "interrupt not available.\n"); 334 dev_warn(&pdev->dev, "interrupt not available.\n");
336 pdata->irq = -1; 335 pdata->irq = 0;
337 } 336 }
338 } 337 }
339 338
@@ -353,7 +352,7 @@ static int __devinit ds1553_rtc_probe(struct platform_device *pdev)
353 out: 352 out:
354 if (pdata->rtc) 353 if (pdata->rtc)
355 rtc_device_unregister(pdata->rtc); 354 rtc_device_unregister(pdata->rtc);
356 if (pdata->irq >= 0) 355 if (pdata->irq > 0)
357 free_irq(pdata->irq, pdev); 356 free_irq(pdata->irq, pdev);
358 if (ioaddr) 357 if (ioaddr)
359 iounmap(ioaddr); 358 iounmap(ioaddr);
@@ -369,7 +368,7 @@ static int __devexit ds1553_rtc_remove(struct platform_device *pdev)
369 368
370 sysfs_remove_bin_file(&pdev->dev.kobj, &ds1553_nvram_attr); 369 sysfs_remove_bin_file(&pdev->dev.kobj, &ds1553_nvram_attr);
371 rtc_device_unregister(pdata->rtc); 370 rtc_device_unregister(pdata->rtc);
372 if (pdata->irq >= 0) { 371 if (pdata->irq > 0) {
373 writeb(0, pdata->ioaddr + RTC_INTERRUPTS); 372 writeb(0, pdata->ioaddr + RTC_INTERRUPTS);
374 free_irq(pdata->irq, pdev); 373 free_irq(pdata->irq, pdev);
375 } 374 }
diff --git a/drivers/rtc/rtc-ds1672.c b/drivers/rtc/rtc-ds1672.c
index 4e91419e8911..06dfb54f99b6 100644
--- a/drivers/rtc/rtc-ds1672.c
+++ b/drivers/rtc/rtc-ds1672.c
@@ -83,32 +83,11 @@ static int ds1672_set_mmss(struct i2c_client *client, unsigned long secs)
83 return 0; 83 return 0;
84} 84}
85 85
86static int ds1672_set_datetime(struct i2c_client *client, struct rtc_time *tm)
87{
88 unsigned long secs;
89
90 dev_dbg(&client->dev,
91 "%s: secs=%d, mins=%d, hours=%d, "
92 "mday=%d, mon=%d, year=%d, wday=%d\n",
93 __func__,
94 tm->tm_sec, tm->tm_min, tm->tm_hour,
95 tm->tm_mday, tm->tm_mon, tm->tm_year, tm->tm_wday);
96
97 rtc_tm_to_time(tm, &secs);
98
99 return ds1672_set_mmss(client, secs);
100}
101
102static int ds1672_rtc_read_time(struct device *dev, struct rtc_time *tm) 86static int ds1672_rtc_read_time(struct device *dev, struct rtc_time *tm)
103{ 87{
104 return ds1672_get_datetime(to_i2c_client(dev), tm); 88 return ds1672_get_datetime(to_i2c_client(dev), tm);
105} 89}
106 90
107static int ds1672_rtc_set_time(struct device *dev, struct rtc_time *tm)
108{
109 return ds1672_set_datetime(to_i2c_client(dev), tm);
110}
111
112static int ds1672_rtc_set_mmss(struct device *dev, unsigned long secs) 91static int ds1672_rtc_set_mmss(struct device *dev, unsigned long secs)
113{ 92{
114 return ds1672_set_mmss(to_i2c_client(dev), secs); 93 return ds1672_set_mmss(to_i2c_client(dev), secs);
@@ -152,7 +131,6 @@ static DEVICE_ATTR(control, S_IRUGO, show_control, NULL);
152 131
153static const struct rtc_class_ops ds1672_rtc_ops = { 132static const struct rtc_class_ops ds1672_rtc_ops = {
154 .read_time = ds1672_rtc_read_time, 133 .read_time = ds1672_rtc_read_time,
155 .set_time = ds1672_rtc_set_time,
156 .set_mmss = ds1672_rtc_set_mmss, 134 .set_mmss = ds1672_rtc_set_mmss,
157}; 135};
158 136
diff --git a/drivers/rtc/rtc-ds3234.c b/drivers/rtc/rtc-ds3234.c
index 45e5b106af73..c51589ede5b7 100644
--- a/drivers/rtc/rtc-ds3234.c
+++ b/drivers/rtc/rtc-ds3234.c
@@ -1,4 +1,4 @@
1/* drivers/rtc/rtc-ds3234.c 1/* rtc-ds3234.c
2 * 2 *
3 * Driver for Dallas Semiconductor (DS3234) SPI RTC with Integrated Crystal 3 * Driver for Dallas Semiconductor (DS3234) SPI RTC with Integrated Crystal
4 * and SRAM. 4 * and SRAM.
@@ -9,13 +9,10 @@
9 * it under the terms of the GNU General Public License version 2 as 9 * it under the terms of the GNU General Public License version 2 as
10 * published by the Free Software Foundation. 10 * published by the Free Software Foundation.
11 * 11 *
12 * Changelog:
13 *
14 * 07-May-2008: Dennis Aberilla <denzzzhome@yahoo.com>
15 * - Created based on the max6902 code. Only implements the
16 * date/time keeping functions; no SRAM yet.
17 */ 12 */
18 13
14#include <linux/init.h>
15#include <linux/module.h>
19#include <linux/device.h> 16#include <linux/device.h>
20#include <linux/platform_device.h> 17#include <linux/platform_device.h>
21#include <linux/rtc.h> 18#include <linux/rtc.h>
@@ -34,16 +31,7 @@
34#define DS3234_REG_CONTROL 0x0E 31#define DS3234_REG_CONTROL 0x0E
35#define DS3234_REG_CONT_STAT 0x0F 32#define DS3234_REG_CONT_STAT 0x0F
36 33
37#undef DS3234_DEBUG 34static int ds3234_set_reg(struct device *dev, unsigned char address,
38
39struct ds3234 {
40 struct rtc_device *rtc;
41 u8 buf[8]; /* Burst read: addr + 7 regs */
42 u8 tx_buf[2];
43 u8 rx_buf[2];
44};
45
46static void ds3234_set_reg(struct device *dev, unsigned char address,
47 unsigned char data) 35 unsigned char data)
48{ 36{
49 struct spi_device *spi = to_spi_device(dev); 37 struct spi_device *spi = to_spi_device(dev);
@@ -53,107 +41,45 @@ static void ds3234_set_reg(struct device *dev, unsigned char address,
53 buf[0] = address | 0x80; 41 buf[0] = address | 0x80;
54 buf[1] = data; 42 buf[1] = data;
55 43
56 spi_write(spi, buf, 2); 44 return spi_write_then_read(spi, buf, 2, NULL, 0);
57} 45}
58 46
59static int ds3234_get_reg(struct device *dev, unsigned char address, 47static int ds3234_get_reg(struct device *dev, unsigned char address,
60 unsigned char *data) 48 unsigned char *data)
61{ 49{
62 struct spi_device *spi = to_spi_device(dev); 50 struct spi_device *spi = to_spi_device(dev);
63 struct ds3234 *chip = dev_get_drvdata(dev);
64 struct spi_message message;
65 struct spi_transfer xfer;
66 int status;
67
68 if (!data)
69 return -EINVAL;
70
71 /* Build our spi message */
72 spi_message_init(&message);
73 memset(&xfer, 0, sizeof(xfer));
74
75 /* Address + dummy tx byte */
76 xfer.len = 2;
77 xfer.tx_buf = chip->tx_buf;
78 xfer.rx_buf = chip->rx_buf;
79
80 chip->tx_buf[0] = address;
81 chip->tx_buf[1] = 0xff;
82 51
83 spi_message_add_tail(&xfer, &message); 52 *data = address & 0x7f;
84 53
85 /* do the i/o */ 54 return spi_write_then_read(spi, data, 1, data, 1);
86 status = spi_sync(spi, &message);
87 if (status == 0)
88 status = message.status;
89 else
90 return status;
91
92 *data = chip->rx_buf[1];
93
94 return status;
95} 55}
96 56
97static int ds3234_get_datetime(struct device *dev, struct rtc_time *dt) 57static int ds3234_read_time(struct device *dev, struct rtc_time *dt)
98{ 58{
59 int err;
60 unsigned char buf[8];
99 struct spi_device *spi = to_spi_device(dev); 61 struct spi_device *spi = to_spi_device(dev);
100 struct ds3234 *chip = dev_get_drvdata(dev);
101 struct spi_message message;
102 struct spi_transfer xfer;
103 int status;
104
105 /* build the message */
106 spi_message_init(&message);
107 memset(&xfer, 0, sizeof(xfer));
108 xfer.len = 1 + 7; /* Addr + 7 registers */
109 xfer.tx_buf = chip->buf;
110 xfer.rx_buf = chip->buf;
111 chip->buf[0] = 0x00; /* Start address */
112 spi_message_add_tail(&xfer, &message);
113
114 /* do the i/o */
115 status = spi_sync(spi, &message);
116 if (status == 0)
117 status = message.status;
118 else
119 return status;
120 62
121 /* Seconds, Minutes, Hours, Day, Date, Month, Year */ 63 buf[0] = 0x00; /* Start address */
122 dt->tm_sec = bcd2bin(chip->buf[1]);
123 dt->tm_min = bcd2bin(chip->buf[2]);
124 dt->tm_hour = bcd2bin(chip->buf[3] & 0x3f);
125 dt->tm_wday = bcd2bin(chip->buf[4]) - 1; /* 0 = Sun */
126 dt->tm_mday = bcd2bin(chip->buf[5]);
127 dt->tm_mon = bcd2bin(chip->buf[6] & 0x1f) - 1; /* 0 = Jan */
128 dt->tm_year = bcd2bin(chip->buf[7] & 0xff) + 100; /* Assume 20YY */
129
130#ifdef DS3234_DEBUG
131 dev_dbg(dev, "\n%s : Read RTC values\n", __func__);
132 dev_dbg(dev, "tm_hour: %i\n", dt->tm_hour);
133 dev_dbg(dev, "tm_min : %i\n", dt->tm_min);
134 dev_dbg(dev, "tm_sec : %i\n", dt->tm_sec);
135 dev_dbg(dev, "tm_wday: %i\n", dt->tm_wday);
136 dev_dbg(dev, "tm_mday: %i\n", dt->tm_mday);
137 dev_dbg(dev, "tm_mon : %i\n", dt->tm_mon);
138 dev_dbg(dev, "tm_year: %i\n", dt->tm_year);
139#endif
140 64
141 return 0; 65 err = spi_write_then_read(spi, buf, 1, buf, 8);
66 if (err != 0)
67 return err;
68
69 /* Seconds, Minutes, Hours, Day, Date, Month, Year */
70 dt->tm_sec = bcd2bin(buf[0]);
71 dt->tm_min = bcd2bin(buf[1]);
72 dt->tm_hour = bcd2bin(buf[2] & 0x3f);
73 dt->tm_wday = bcd2bin(buf[3]) - 1; /* 0 = Sun */
74 dt->tm_mday = bcd2bin(buf[4]);
75 dt->tm_mon = bcd2bin(buf[5] & 0x1f) - 1; /* 0 = Jan */
76 dt->tm_year = bcd2bin(buf[6] & 0xff) + 100; /* Assume 20YY */
77
78 return rtc_valid_tm(dt);
142} 79}
143 80
144static int ds3234_set_datetime(struct device *dev, struct rtc_time *dt) 81static int ds3234_set_time(struct device *dev, struct rtc_time *dt)
145{ 82{
146#ifdef DS3234_DEBUG
147 dev_dbg(dev, "\n%s : Setting RTC values\n", __func__);
148 dev_dbg(dev, "tm_sec : %i\n", dt->tm_sec);
149 dev_dbg(dev, "tm_min : %i\n", dt->tm_min);
150 dev_dbg(dev, "tm_hour: %i\n", dt->tm_hour);
151 dev_dbg(dev, "tm_wday: %i\n", dt->tm_wday);
152 dev_dbg(dev, "tm_mday: %i\n", dt->tm_mday);
153 dev_dbg(dev, "tm_mon : %i\n", dt->tm_mon);
154 dev_dbg(dev, "tm_year: %i\n", dt->tm_year);
155#endif
156
157 ds3234_set_reg(dev, DS3234_REG_SECONDS, bin2bcd(dt->tm_sec)); 83 ds3234_set_reg(dev, DS3234_REG_SECONDS, bin2bcd(dt->tm_sec));
158 ds3234_set_reg(dev, DS3234_REG_MINUTES, bin2bcd(dt->tm_min)); 84 ds3234_set_reg(dev, DS3234_REG_MINUTES, bin2bcd(dt->tm_min));
159 ds3234_set_reg(dev, DS3234_REG_HOURS, bin2bcd(dt->tm_hour) & 0x3f); 85 ds3234_set_reg(dev, DS3234_REG_HOURS, bin2bcd(dt->tm_hour) & 0x3f);
@@ -174,16 +100,6 @@ static int ds3234_set_datetime(struct device *dev, struct rtc_time *dt)
174 return 0; 100 return 0;
175} 101}
176 102
177static int ds3234_read_time(struct device *dev, struct rtc_time *tm)
178{
179 return ds3234_get_datetime(dev, tm);
180}
181
182static int ds3234_set_time(struct device *dev, struct rtc_time *tm)
183{
184 return ds3234_set_datetime(dev, tm);
185}
186
187static const struct rtc_class_ops ds3234_rtc_ops = { 103static const struct rtc_class_ops ds3234_rtc_ops = {
188 .read_time = ds3234_read_time, 104 .read_time = ds3234_read_time,
189 .set_time = ds3234_set_time, 105 .set_time = ds3234_set_time,
@@ -193,31 +109,15 @@ static int __devinit ds3234_probe(struct spi_device *spi)
193{ 109{
194 struct rtc_device *rtc; 110 struct rtc_device *rtc;
195 unsigned char tmp; 111 unsigned char tmp;
196 struct ds3234 *chip;
197 int res; 112 int res;
198 113
199 rtc = rtc_device_register("ds3234",
200 &spi->dev, &ds3234_rtc_ops, THIS_MODULE);
201 if (IS_ERR(rtc))
202 return PTR_ERR(rtc);
203
204 spi->mode = SPI_MODE_3; 114 spi->mode = SPI_MODE_3;
205 spi->bits_per_word = 8; 115 spi->bits_per_word = 8;
206 spi_setup(spi); 116 spi_setup(spi);
207 117
208 chip = kzalloc(sizeof(struct ds3234), GFP_KERNEL);
209 if (!chip) {
210 rtc_device_unregister(rtc);
211 return -ENOMEM;
212 }
213 chip->rtc = rtc;
214 dev_set_drvdata(&spi->dev, chip);
215
216 res = ds3234_get_reg(&spi->dev, DS3234_REG_SECONDS, &tmp); 118 res = ds3234_get_reg(&spi->dev, DS3234_REG_SECONDS, &tmp);
217 if (res) { 119 if (res != 0)
218 rtc_device_unregister(rtc);
219 return res; 120 return res;
220 }
221 121
222 /* Control settings 122 /* Control settings
223 * 123 *
@@ -246,26 +146,27 @@ static int __devinit ds3234_probe(struct spi_device *spi)
246 ds3234_get_reg(&spi->dev, DS3234_REG_CONT_STAT, &tmp); 146 ds3234_get_reg(&spi->dev, DS3234_REG_CONT_STAT, &tmp);
247 dev_info(&spi->dev, "Ctrl/Stat Reg: 0x%02x\n", tmp); 147 dev_info(&spi->dev, "Ctrl/Stat Reg: 0x%02x\n", tmp);
248 148
149 rtc = rtc_device_register("ds3234",
150 &spi->dev, &ds3234_rtc_ops, THIS_MODULE);
151 if (IS_ERR(rtc))
152 return PTR_ERR(rtc);
153
154 dev_set_drvdata(&spi->dev, rtc);
155
249 return 0; 156 return 0;
250} 157}
251 158
252static int __devexit ds3234_remove(struct spi_device *spi) 159static int __devexit ds3234_remove(struct spi_device *spi)
253{ 160{
254 struct ds3234 *chip = platform_get_drvdata(spi); 161 struct rtc_device *rtc = platform_get_drvdata(spi);
255 struct rtc_device *rtc = chip->rtc;
256
257 if (rtc)
258 rtc_device_unregister(rtc);
259
260 kfree(chip);
261 162
163 rtc_device_unregister(rtc);
262 return 0; 164 return 0;
263} 165}
264 166
265static struct spi_driver ds3234_driver = { 167static struct spi_driver ds3234_driver = {
266 .driver = { 168 .driver = {
267 .name = "ds3234", 169 .name = "ds3234",
268 .bus = &spi_bus_type,
269 .owner = THIS_MODULE, 170 .owner = THIS_MODULE,
270 }, 171 },
271 .probe = ds3234_probe, 172 .probe = ds3234_probe,
@@ -274,7 +175,6 @@ static struct spi_driver ds3234_driver = {
274 175
275static __init int ds3234_init(void) 176static __init int ds3234_init(void)
276{ 177{
277 printk(KERN_INFO "DS3234 SPI RTC Driver\n");
278 return spi_register_driver(&ds3234_driver); 178 return spi_register_driver(&ds3234_driver);
279} 179}
280module_init(ds3234_init); 180module_init(ds3234_init);
diff --git a/drivers/rtc/rtc-ep93xx.c b/drivers/rtc/rtc-ep93xx.c
index 36e4ac0bd69c..f7a3283dd029 100644
--- a/drivers/rtc/rtc-ep93xx.c
+++ b/drivers/rtc/rtc-ep93xx.c
@@ -49,18 +49,6 @@ static int ep93xx_rtc_set_mmss(struct device *dev, unsigned long secs)
49 return 0; 49 return 0;
50} 50}
51 51
52static int ep93xx_rtc_set_time(struct device *dev, struct rtc_time *tm)
53{
54 int err;
55 unsigned long secs;
56
57 err = rtc_tm_to_time(tm, &secs);
58 if (err != 0)
59 return err;
60
61 return ep93xx_rtc_set_mmss(dev, secs);
62}
63
64static int ep93xx_rtc_proc(struct device *dev, struct seq_file *seq) 52static int ep93xx_rtc_proc(struct device *dev, struct seq_file *seq)
65{ 53{
66 unsigned short preload, delete; 54 unsigned short preload, delete;
@@ -75,7 +63,6 @@ static int ep93xx_rtc_proc(struct device *dev, struct seq_file *seq)
75 63
76static const struct rtc_class_ops ep93xx_rtc_ops = { 64static const struct rtc_class_ops ep93xx_rtc_ops = {
77 .read_time = ep93xx_rtc_read_time, 65 .read_time = ep93xx_rtc_read_time,
78 .set_time = ep93xx_rtc_set_time,
79 .set_mmss = ep93xx_rtc_set_mmss, 66 .set_mmss = ep93xx_rtc_set_mmss,
80 .proc = ep93xx_rtc_proc, 67 .proc = ep93xx_rtc_proc,
81}; 68};
diff --git a/drivers/rtc/rtc-m48t59.c b/drivers/rtc/rtc-m48t59.c
index 43afb7ab5289..33921a6b1707 100644
--- a/drivers/rtc/rtc-m48t59.c
+++ b/drivers/rtc/rtc-m48t59.c
@@ -450,7 +450,7 @@ static int __devinit m48t59_rtc_probe(struct platform_device *pdev)
450 * the mode without IRQ. 450 * the mode without IRQ.
451 */ 451 */
452 m48t59->irq = platform_get_irq(pdev, 0); 452 m48t59->irq = platform_get_irq(pdev, 0);
453 if (m48t59->irq < 0) 453 if (m48t59->irq <= 0)
454 m48t59->irq = NO_IRQ; 454 m48t59->irq = NO_IRQ;
455 455
456 if (m48t59->irq != NO_IRQ) { 456 if (m48t59->irq != NO_IRQ) {
diff --git a/drivers/rtc/rtc-max6902.c b/drivers/rtc/rtc-max6902.c
index 2f6507df7b49..36a8ea9ed8ba 100644
--- a/drivers/rtc/rtc-max6902.c
+++ b/drivers/rtc/rtc-max6902.c
@@ -9,14 +9,6 @@
9 * 9 *
10 * Driver for MAX6902 spi RTC 10 * Driver for MAX6902 spi RTC
11 * 11 *
12 * Changelog:
13 *
14 * 24-May-2006: Raphael Assenat <raph@8d.com>
15 * - Major rework
16 * Converted to rtc_device and uses the SPI layer.
17 *
18 * ??-???-2005: Someone at Compulab
19 * - Initial driver creation.
20 */ 12 */
21 13
22#include <linux/module.h> 14#include <linux/module.h>
@@ -26,7 +18,6 @@
26#include <linux/rtc.h> 18#include <linux/rtc.h>
27#include <linux/spi/spi.h> 19#include <linux/spi/spi.h>
28#include <linux/bcd.h> 20#include <linux/bcd.h>
29#include <linux/delay.h>
30 21
31#define MAX6902_REG_SECONDS 0x01 22#define MAX6902_REG_SECONDS 0x01
32#define MAX6902_REG_MINUTES 0x03 23#define MAX6902_REG_MINUTES 0x03
@@ -38,16 +29,7 @@
38#define MAX6902_REG_CONTROL 0x0F 29#define MAX6902_REG_CONTROL 0x0F
39#define MAX6902_REG_CENTURY 0x13 30#define MAX6902_REG_CENTURY 0x13
40 31
41#undef MAX6902_DEBUG 32static int max6902_set_reg(struct device *dev, unsigned char address,
42
43struct max6902 {
44 struct rtc_device *rtc;
45 u8 buf[9]; /* Burst read cmd + 8 registers */
46 u8 tx_buf[2];
47 u8 rx_buf[2];
48};
49
50static void max6902_set_reg(struct device *dev, unsigned char address,
51 unsigned char data) 33 unsigned char data)
52{ 34{
53 struct spi_device *spi = to_spi_device(dev); 35 struct spi_device *spi = to_spi_device(dev);
@@ -57,113 +39,58 @@ static void max6902_set_reg(struct device *dev, unsigned char address,
57 buf[0] = address & 0x7f; 39 buf[0] = address & 0x7f;
58 buf[1] = data; 40 buf[1] = data;
59 41
60 spi_write(spi, buf, 2); 42 return spi_write_then_read(spi, buf, 2, NULL, 0);
61} 43}
62 44
63static int max6902_get_reg(struct device *dev, unsigned char address, 45static int max6902_get_reg(struct device *dev, unsigned char address,
64 unsigned char *data) 46 unsigned char *data)
65{ 47{
66 struct spi_device *spi = to_spi_device(dev); 48 struct spi_device *spi = to_spi_device(dev);
67 struct max6902 *chip = dev_get_drvdata(dev);
68 struct spi_message message;
69 struct spi_transfer xfer;
70 int status;
71
72 if (!data)
73 return -EINVAL;
74
75 /* Build our spi message */
76 spi_message_init(&message);
77 memset(&xfer, 0, sizeof(xfer));
78 xfer.len = 2;
79 /* Can tx_buf and rx_buf be equal? The doc in spi.h is not sure... */
80 xfer.tx_buf = chip->tx_buf;
81 xfer.rx_buf = chip->rx_buf;
82 49
83 /* Set MSB to indicate read */ 50 /* Set MSB to indicate read */
84 chip->tx_buf[0] = address | 0x80; 51 *data = address | 0x80;
85
86 spi_message_add_tail(&xfer, &message);
87 52
88 /* do the i/o */ 53 return spi_write_then_read(spi, data, 1, data, 1);
89 status = spi_sync(spi, &message);
90
91 if (status == 0)
92 *data = chip->rx_buf[1];
93 return status;
94} 54}
95 55
96static int max6902_get_datetime(struct device *dev, struct rtc_time *dt) 56static int max6902_read_time(struct device *dev, struct rtc_time *dt)
97{ 57{
98 unsigned char tmp; 58 int err, century;
99 int century;
100 int err;
101 struct spi_device *spi = to_spi_device(dev); 59 struct spi_device *spi = to_spi_device(dev);
102 struct max6902 *chip = dev_get_drvdata(dev); 60 unsigned char buf[8];
103 struct spi_message message;
104 struct spi_transfer xfer;
105 int status;
106 61
107 err = max6902_get_reg(dev, MAX6902_REG_CENTURY, &tmp); 62 buf[0] = 0xbf; /* Burst read */
108 if (err)
109 return err;
110
111 /* build the message */
112 spi_message_init(&message);
113 memset(&xfer, 0, sizeof(xfer));
114 xfer.len = 1 + 7; /* Burst read command + 7 registers */
115 xfer.tx_buf = chip->buf;
116 xfer.rx_buf = chip->buf;
117 chip->buf[0] = 0xbf; /* Burst read */
118 spi_message_add_tail(&xfer, &message);
119 63
120 /* do the i/o */ 64 err = spi_write_then_read(spi, buf, 1, buf, 8);
121 status = spi_sync(spi, &message); 65 if (err != 0)
122 if (status) 66 return err;
123 return status;
124 67
125 /* The chip sends data in this order: 68 /* The chip sends data in this order:
126 * Seconds, Minutes, Hours, Date, Month, Day, Year */ 69 * Seconds, Minutes, Hours, Date, Month, Day, Year */
127 dt->tm_sec = bcd2bin(chip->buf[1]); 70 dt->tm_sec = bcd2bin(buf[0]);
128 dt->tm_min = bcd2bin(chip->buf[2]); 71 dt->tm_min = bcd2bin(buf[1]);
129 dt->tm_hour = bcd2bin(chip->buf[3]); 72 dt->tm_hour = bcd2bin(buf[2]);
130 dt->tm_mday = bcd2bin(chip->buf[4]); 73 dt->tm_mday = bcd2bin(buf[3]);
131 dt->tm_mon = bcd2bin(chip->buf[5]) - 1; 74 dt->tm_mon = bcd2bin(buf[4]) - 1;
132 dt->tm_wday = bcd2bin(chip->buf[6]); 75 dt->tm_wday = bcd2bin(buf[5]);
133 dt->tm_year = bcd2bin(chip->buf[7]); 76 dt->tm_year = bcd2bin(buf[6]);
77
78 /* Read century */
79 err = max6902_get_reg(dev, MAX6902_REG_CENTURY, &buf[0]);
80 if (err != 0)
81 return err;
134 82
135 century = bcd2bin(tmp) * 100; 83 century = bcd2bin(buf[0]) * 100;
136 84
137 dt->tm_year += century; 85 dt->tm_year += century;
138 dt->tm_year -= 1900; 86 dt->tm_year -= 1900;
139 87
140#ifdef MAX6902_DEBUG 88 return rtc_valid_tm(dt);
141 printk("\n%s : Read RTC values\n",__func__);
142 printk("tm_hour: %i\n",dt->tm_hour);
143 printk("tm_min : %i\n",dt->tm_min);
144 printk("tm_sec : %i\n",dt->tm_sec);
145 printk("tm_year: %i\n",dt->tm_year);
146 printk("tm_mon : %i\n",dt->tm_mon);
147 printk("tm_mday: %i\n",dt->tm_mday);
148 printk("tm_wday: %i\n",dt->tm_wday);
149#endif
150
151 return 0;
152} 89}
153 90
154static int max6902_set_datetime(struct device *dev, struct rtc_time *dt) 91static int max6902_set_time(struct device *dev, struct rtc_time *dt)
155{ 92{
156 dt->tm_year = dt->tm_year+1900; 93 dt->tm_year = dt->tm_year + 1900;
157
158#ifdef MAX6902_DEBUG
159 printk("\n%s : Setting RTC values\n",__func__);
160 printk("tm_sec : %i\n",dt->tm_sec);
161 printk("tm_min : %i\n",dt->tm_min);
162 printk("tm_hour: %i\n",dt->tm_hour);
163 printk("tm_mday: %i\n",dt->tm_mday);
164 printk("tm_wday: %i\n",dt->tm_wday);
165 printk("tm_year: %i\n",dt->tm_year);
166#endif
167 94
168 /* Remove write protection */ 95 /* Remove write protection */
169 max6902_set_reg(dev, 0xF, 0); 96 max6902_set_reg(dev, 0xF, 0);
@@ -173,10 +100,10 @@ static int max6902_set_datetime(struct device *dev, struct rtc_time *dt)
173 max6902_set_reg(dev, 0x05, bin2bcd(dt->tm_hour)); 100 max6902_set_reg(dev, 0x05, bin2bcd(dt->tm_hour));
174 101
175 max6902_set_reg(dev, 0x07, bin2bcd(dt->tm_mday)); 102 max6902_set_reg(dev, 0x07, bin2bcd(dt->tm_mday));
176 max6902_set_reg(dev, 0x09, bin2bcd(dt->tm_mon+1)); 103 max6902_set_reg(dev, 0x09, bin2bcd(dt->tm_mon + 1));
177 max6902_set_reg(dev, 0x0B, bin2bcd(dt->tm_wday)); 104 max6902_set_reg(dev, 0x0B, bin2bcd(dt->tm_wday));
178 max6902_set_reg(dev, 0x0D, bin2bcd(dt->tm_year%100)); 105 max6902_set_reg(dev, 0x0D, bin2bcd(dt->tm_year % 100));
179 max6902_set_reg(dev, 0x13, bin2bcd(dt->tm_year/100)); 106 max6902_set_reg(dev, 0x13, bin2bcd(dt->tm_year / 100));
180 107
181 /* Compulab used a delay here. However, the datasheet 108 /* Compulab used a delay here. However, the datasheet
182 * does not mention a delay being required anywhere... */ 109 * does not mention a delay being required anywhere... */
@@ -188,16 +115,6 @@ static int max6902_set_datetime(struct device *dev, struct rtc_time *dt)
188 return 0; 115 return 0;
189} 116}
190 117
191static int max6902_read_time(struct device *dev, struct rtc_time *tm)
192{
193 return max6902_get_datetime(dev, tm);
194}
195
196static int max6902_set_time(struct device *dev, struct rtc_time *tm)
197{
198 return max6902_set_datetime(dev, tm);
199}
200
201static const struct rtc_class_ops max6902_rtc_ops = { 118static const struct rtc_class_ops max6902_rtc_ops = {
202 .read_time = max6902_read_time, 119 .read_time = max6902_read_time,
203 .set_time = max6902_set_time, 120 .set_time = max6902_set_time,
@@ -207,45 +124,29 @@ static int __devinit max6902_probe(struct spi_device *spi)
207{ 124{
208 struct rtc_device *rtc; 125 struct rtc_device *rtc;
209 unsigned char tmp; 126 unsigned char tmp;
210 struct max6902 *chip;
211 int res; 127 int res;
212 128
213 rtc = rtc_device_register("max6902",
214 &spi->dev, &max6902_rtc_ops, THIS_MODULE);
215 if (IS_ERR(rtc))
216 return PTR_ERR(rtc);
217
218 spi->mode = SPI_MODE_3; 129 spi->mode = SPI_MODE_3;
219 spi->bits_per_word = 8; 130 spi->bits_per_word = 8;
220 spi_setup(spi); 131 spi_setup(spi);
221 132
222 chip = kzalloc(sizeof *chip, GFP_KERNEL);
223 if (!chip) {
224 rtc_device_unregister(rtc);
225 return -ENOMEM;
226 }
227 chip->rtc = rtc;
228 dev_set_drvdata(&spi->dev, chip);
229
230 res = max6902_get_reg(&spi->dev, MAX6902_REG_SECONDS, &tmp); 133 res = max6902_get_reg(&spi->dev, MAX6902_REG_SECONDS, &tmp);
231 if (res) { 134 if (res != 0)
232 rtc_device_unregister(rtc);
233 return res; 135 return res;
234 } 136
137 rtc = rtc_device_register("max6902",
138 &spi->dev, &max6902_rtc_ops, THIS_MODULE);
139 if (IS_ERR(rtc))
140 return PTR_ERR(rtc);
235 141
236 return 0; 142 return 0;
237} 143}
238 144
239static int __devexit max6902_remove(struct spi_device *spi) 145static int __devexit max6902_remove(struct spi_device *spi)
240{ 146{
241 struct max6902 *chip = platform_get_drvdata(spi); 147 struct rtc_device *rtc = platform_get_drvdata(spi);
242 struct rtc_device *rtc = chip->rtc;
243
244 if (rtc)
245 rtc_device_unregister(rtc);
246
247 kfree(chip);
248 148
149 rtc_device_unregister(rtc);
249 return 0; 150 return 0;
250} 151}
251 152
@@ -261,7 +162,6 @@ static struct spi_driver max6902_driver = {
261 162
262static __init int max6902_init(void) 163static __init int max6902_init(void)
263{ 164{
264 printk("max6902 spi driver\n");
265 return spi_register_driver(&max6902_driver); 165 return spi_register_driver(&max6902_driver);
266} 166}
267module_init(max6902_init); 167module_init(max6902_init);
diff --git a/drivers/rtc/rtc-mv.c b/drivers/rtc/rtc-mv.c
new file mode 100644
index 000000000000..45f12dcd3716
--- /dev/null
+++ b/drivers/rtc/rtc-mv.c
@@ -0,0 +1,163 @@
1/*
2 * Driver for the RTC in Marvell SoCs.
3 *
4 * This file is licensed under the terms of the GNU General Public
5 * License version 2. This program is licensed "as is" without any
6 * warranty of any kind, whether express or implied.
7 */
8
9#include <linux/init.h>
10#include <linux/kernel.h>
11#include <linux/rtc.h>
12#include <linux/bcd.h>
13#include <linux/io.h>
14#include <linux/platform_device.h>
15
16
17#define RTC_TIME_REG_OFFS 0
18#define RTC_SECONDS_OFFS 0
19#define RTC_MINUTES_OFFS 8
20#define RTC_HOURS_OFFS 16
21#define RTC_WDAY_OFFS 24
22#define RTC_HOURS_12H_MODE (1 << 22) /* 12 hours mode */
23
24#define RTC_DATE_REG_OFFS 4
25#define RTC_MDAY_OFFS 0
26#define RTC_MONTH_OFFS 8
27#define RTC_YEAR_OFFS 16
28
29
30struct rtc_plat_data {
31 struct rtc_device *rtc;
32 void __iomem *ioaddr;
33};
34
35static int mv_rtc_set_time(struct device *dev, struct rtc_time *tm)
36{
37 struct rtc_plat_data *pdata = dev_get_drvdata(dev);
38 void __iomem *ioaddr = pdata->ioaddr;
39 u32 rtc_reg;
40
41 rtc_reg = (bin2bcd(tm->tm_sec) << RTC_SECONDS_OFFS) |
42 (bin2bcd(tm->tm_min) << RTC_MINUTES_OFFS) |
43 (bin2bcd(tm->tm_hour) << RTC_HOURS_OFFS) |
44 (bin2bcd(tm->tm_wday) << RTC_WDAY_OFFS);
45 writel(rtc_reg, ioaddr + RTC_TIME_REG_OFFS);
46
47 rtc_reg = (bin2bcd(tm->tm_mday) << RTC_MDAY_OFFS) |
48 (bin2bcd(tm->tm_mon + 1) << RTC_MONTH_OFFS) |
49 (bin2bcd(tm->tm_year % 100) << RTC_YEAR_OFFS);
50 writel(rtc_reg, ioaddr + RTC_DATE_REG_OFFS);
51
52 return 0;
53}
54
55static int mv_rtc_read_time(struct device *dev, struct rtc_time *tm)
56{
57 struct rtc_plat_data *pdata = dev_get_drvdata(dev);
58 void __iomem *ioaddr = pdata->ioaddr;
59 u32 rtc_time, rtc_date;
60 unsigned int year, month, day, hour, minute, second, wday;
61
62 rtc_time = readl(ioaddr + RTC_TIME_REG_OFFS);
63 rtc_date = readl(ioaddr + RTC_DATE_REG_OFFS);
64
65 second = rtc_time & 0x7f;
66 minute = (rtc_time >> RTC_MINUTES_OFFS) & 0x7f;
67 hour = (rtc_time >> RTC_HOURS_OFFS) & 0x3f; /* assume 24 hours mode */
68 wday = (rtc_time >> RTC_WDAY_OFFS) & 0x7;
69
70 day = rtc_date & 0x3f;
71 month = (rtc_date >> RTC_MONTH_OFFS) & 0x3f;
72 year = (rtc_date >> RTC_YEAR_OFFS) & 0xff;
73
74 tm->tm_sec = bcd2bin(second);
75 tm->tm_min = bcd2bin(minute);
76 tm->tm_hour = bcd2bin(hour);
77 tm->tm_mday = bcd2bin(day);
78 tm->tm_wday = bcd2bin(wday);
79 tm->tm_mon = bcd2bin(month) - 1;
80 /* hw counts from year 2000, but tm_year is relative to 1900 */
81 tm->tm_year = bcd2bin(year) + 100;
82
83 return rtc_valid_tm(tm);
84}
85
86static const struct rtc_class_ops mv_rtc_ops = {
87 .read_time = mv_rtc_read_time,
88 .set_time = mv_rtc_set_time,
89};
90
91static int __init mv_rtc_probe(struct platform_device *pdev)
92{
93 struct resource *res;
94 struct rtc_plat_data *pdata;
95 resource_size_t size;
96 u32 rtc_time;
97
98 res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
99 if (!res)
100 return -ENODEV;
101
102 pdata = devm_kzalloc(&pdev->dev, sizeof(*pdata), GFP_KERNEL);
103 if (!pdata)
104 return -ENOMEM;
105
106 size = resource_size(res);
107 if (!devm_request_mem_region(&pdev->dev, res->start, size,
108 pdev->name))
109 return -EBUSY;
110
111 pdata->ioaddr = devm_ioremap(&pdev->dev, res->start, size);
112 if (!pdata->ioaddr)
113 return -ENOMEM;
114
115 /* make sure the 24 hours mode is enabled */
116 rtc_time = readl(pdata->ioaddr + RTC_TIME_REG_OFFS);
117 if (rtc_time & RTC_HOURS_12H_MODE) {
118 dev_err(&pdev->dev, "24 Hours mode not supported.\n");
119 return -EINVAL;
120 }
121
122 platform_set_drvdata(pdev, pdata);
123 pdata->rtc = rtc_device_register(pdev->name, &pdev->dev,
124 &mv_rtc_ops, THIS_MODULE);
125 if (IS_ERR(pdata->rtc))
126 return PTR_ERR(pdata->rtc);
127
128 return 0;
129}
130
131static int __exit mv_rtc_remove(struct platform_device *pdev)
132{
133 struct rtc_plat_data *pdata = platform_get_drvdata(pdev);
134
135 rtc_device_unregister(pdata->rtc);
136 return 0;
137}
138
139static struct platform_driver mv_rtc_driver = {
140 .remove = __exit_p(mv_rtc_remove),
141 .driver = {
142 .name = "rtc-mv",
143 .owner = THIS_MODULE,
144 },
145};
146
147static __init int mv_init(void)
148{
149 return platform_driver_probe(&mv_rtc_driver, mv_rtc_probe);
150}
151
152static __exit void mv_exit(void)
153{
154 platform_driver_unregister(&mv_rtc_driver);
155}
156
157module_init(mv_init);
158module_exit(mv_exit);
159
160MODULE_AUTHOR("Saeed Bishara <saeed@marvell.com>");
161MODULE_DESCRIPTION("Marvell RTC driver");
162MODULE_LICENSE("GPL");
163MODULE_ALIAS("platform:rtc-mv");
diff --git a/drivers/rtc/rtc-pxa.c b/drivers/rtc/rtc-pxa.c
new file mode 100644
index 000000000000..cc7eb8767b82
--- /dev/null
+++ b/drivers/rtc/rtc-pxa.c
@@ -0,0 +1,489 @@
1/*
2 * Real Time Clock interface for XScale PXA27x and PXA3xx
3 *
4 * Copyright (C) 2008 Robert Jarzmik
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
19 *
20 */
21
22#include <linux/init.h>
23#include <linux/platform_device.h>
24#include <linux/module.h>
25#include <linux/rtc.h>
26#include <linux/seq_file.h>
27#include <linux/interrupt.h>
28#include <linux/io.h>
29
30#define TIMER_FREQ CLOCK_TICK_RATE
31#define RTC_DEF_DIVIDER (32768 - 1)
32#define RTC_DEF_TRIM 0
33#define MAXFREQ_PERIODIC 1000
34
35/*
36 * PXA Registers and bits definitions
37 */
38#define RTSR_PICE (1 << 15) /* Periodic interrupt count enable */
39#define RTSR_PIALE (1 << 14) /* Periodic interrupt Alarm enable */
40#define RTSR_PIAL (1 << 13) /* Periodic interrupt detected */
41#define RTSR_SWALE2 (1 << 11) /* RTC stopwatch alarm2 enable */
42#define RTSR_SWAL2 (1 << 10) /* RTC stopwatch alarm2 detected */
43#define RTSR_SWALE1 (1 << 9) /* RTC stopwatch alarm1 enable */
44#define RTSR_SWAL1 (1 << 8) /* RTC stopwatch alarm1 detected */
45#define RTSR_RDALE2 (1 << 7) /* RTC alarm2 enable */
46#define RTSR_RDAL2 (1 << 6) /* RTC alarm2 detected */
47#define RTSR_RDALE1 (1 << 5) /* RTC alarm1 enable */
48#define RTSR_RDAL1 (1 << 4) /* RTC alarm1 detected */
49#define RTSR_HZE (1 << 3) /* HZ interrupt enable */
50#define RTSR_ALE (1 << 2) /* RTC alarm interrupt enable */
51#define RTSR_HZ (1 << 1) /* HZ rising-edge detected */
52#define RTSR_AL (1 << 0) /* RTC alarm detected */
53#define RTSR_TRIG_MASK (RTSR_AL | RTSR_HZ | RTSR_RDAL1 | RTSR_RDAL2\
54 | RTSR_SWAL1 | RTSR_SWAL2)
55#define RYxR_YEAR_S 9
56#define RYxR_YEAR_MASK (0xfff << RYxR_YEAR_S)
57#define RYxR_MONTH_S 5
58#define RYxR_MONTH_MASK (0xf << RYxR_MONTH_S)
59#define RYxR_DAY_MASK 0x1f
60#define RDxR_HOUR_S 12
61#define RDxR_HOUR_MASK (0x1f << RDxR_HOUR_S)
62#define RDxR_MIN_S 6
63#define RDxR_MIN_MASK (0x3f << RDxR_MIN_S)
64#define RDxR_SEC_MASK 0x3f
65
66#define RTSR 0x08
67#define RTTR 0x0c
68#define RDCR 0x10
69#define RYCR 0x14
70#define RDAR1 0x18
71#define RYAR1 0x1c
72#define RTCPICR 0x34
73#define PIAR 0x38
74
75#define rtc_readl(pxa_rtc, reg) \
76 __raw_readl((pxa_rtc)->base + (reg))
77#define rtc_writel(pxa_rtc, reg, value) \
78 __raw_writel((value), (pxa_rtc)->base + (reg))
79
80struct pxa_rtc {
81 struct resource *ress;
82 void __iomem *base;
83 int irq_1Hz;
84 int irq_Alrm;
85 struct rtc_device *rtc;
86 spinlock_t lock; /* Protects this structure */
87 struct rtc_time rtc_alarm;
88};
89
90static u32 ryxr_calc(struct rtc_time *tm)
91{
92 return ((tm->tm_year + 1900) << RYxR_YEAR_S)
93 | ((tm->tm_mon + 1) << RYxR_MONTH_S)
94 | tm->tm_mday;
95}
96
97static u32 rdxr_calc(struct rtc_time *tm)
98{
99 return (tm->tm_hour << RDxR_HOUR_S) | (tm->tm_min << RDxR_MIN_S)
100 | tm->tm_sec;
101}
102
103static void tm_calc(u32 rycr, u32 rdcr, struct rtc_time *tm)
104{
105 tm->tm_year = ((rycr & RYxR_YEAR_MASK) >> RYxR_YEAR_S) - 1900;
106 tm->tm_mon = (((rycr & RYxR_MONTH_MASK) >> RYxR_MONTH_S)) - 1;
107 tm->tm_mday = (rycr & RYxR_DAY_MASK);
108 tm->tm_hour = (rdcr & RDxR_HOUR_MASK) >> RDxR_HOUR_S;
109 tm->tm_min = (rdcr & RDxR_MIN_MASK) >> RDxR_MIN_S;
110 tm->tm_sec = rdcr & RDxR_SEC_MASK;
111}
112
113static void rtsr_clear_bits(struct pxa_rtc *pxa_rtc, u32 mask)
114{
115 u32 rtsr;
116
117 rtsr = rtc_readl(pxa_rtc, RTSR);
118 rtsr &= ~RTSR_TRIG_MASK;
119 rtsr &= ~mask;
120 rtc_writel(pxa_rtc, RTSR, rtsr);
121}
122
123static void rtsr_set_bits(struct pxa_rtc *pxa_rtc, u32 mask)
124{
125 u32 rtsr;
126
127 rtsr = rtc_readl(pxa_rtc, RTSR);
128 rtsr &= ~RTSR_TRIG_MASK;
129 rtsr |= mask;
130 rtc_writel(pxa_rtc, RTSR, rtsr);
131}
132
133static irqreturn_t pxa_rtc_irq(int irq, void *dev_id)
134{
135 struct platform_device *pdev = to_platform_device(dev_id);
136 struct pxa_rtc *pxa_rtc = platform_get_drvdata(pdev);
137 u32 rtsr;
138 unsigned long events = 0;
139
140 spin_lock(&pxa_rtc->lock);
141
142 /* clear interrupt sources */
143 rtsr = rtc_readl(pxa_rtc, RTSR);
144 rtc_writel(pxa_rtc, RTSR, rtsr);
145
146 /* temporary disable rtc interrupts */
147 rtsr_clear_bits(pxa_rtc, RTSR_RDALE1 | RTSR_PIALE | RTSR_HZE);
148
149 /* clear alarm interrupt if it has occurred */
150 if (rtsr & RTSR_RDAL1)
151 rtsr &= ~RTSR_RDALE1;
152
153 /* update irq data & counter */
154 if (rtsr & RTSR_RDAL1)
155 events |= RTC_AF | RTC_IRQF;
156 if (rtsr & RTSR_HZ)
157 events |= RTC_UF | RTC_IRQF;
158 if (rtsr & RTSR_PIAL)
159 events |= RTC_PF | RTC_IRQF;
160
161 rtc_update_irq(pxa_rtc->rtc, 1, events);
162
163 /* enable back rtc interrupts */
164 rtc_writel(pxa_rtc, RTSR, rtsr & ~RTSR_TRIG_MASK);
165
166 spin_unlock(&pxa_rtc->lock);
167 return IRQ_HANDLED;
168}
169
170static int pxa_rtc_open(struct device *dev)
171{
172 struct pxa_rtc *pxa_rtc = dev_get_drvdata(dev);
173 int ret;
174
175 ret = request_irq(pxa_rtc->irq_1Hz, pxa_rtc_irq, IRQF_DISABLED,
176 "rtc 1Hz", dev);
177 if (ret < 0) {
178 dev_err(dev, "can't get irq %i, err %d\n", pxa_rtc->irq_1Hz,
179 ret);
180 goto err_irq_1Hz;
181 }
182 ret = request_irq(pxa_rtc->irq_Alrm, pxa_rtc_irq, IRQF_DISABLED,
183 "rtc Alrm", dev);
184 if (ret < 0) {
185 dev_err(dev, "can't get irq %i, err %d\n", pxa_rtc->irq_Alrm,
186 ret);
187 goto err_irq_Alrm;
188 }
189
190 return 0;
191
192err_irq_Alrm:
193 free_irq(pxa_rtc->irq_1Hz, dev);
194err_irq_1Hz:
195 return ret;
196}
197
198static void pxa_rtc_release(struct device *dev)
199{
200 struct pxa_rtc *pxa_rtc = dev_get_drvdata(dev);
201
202 spin_lock_irq(&pxa_rtc->lock);
203 rtsr_clear_bits(pxa_rtc, RTSR_PIALE | RTSR_RDALE1 | RTSR_HZE);
204 spin_unlock_irq(&pxa_rtc->lock);
205
206 free_irq(pxa_rtc->irq_Alrm, dev);
207 free_irq(pxa_rtc->irq_1Hz, dev);
208}
209
210static int pxa_periodic_irq_set_freq(struct device *dev, int freq)
211{
212 struct pxa_rtc *pxa_rtc = dev_get_drvdata(dev);
213 int period_ms;
214
215 if (freq < 1 || freq > MAXFREQ_PERIODIC)
216 return -EINVAL;
217
218 period_ms = 1000 / freq;
219 rtc_writel(pxa_rtc, PIAR, period_ms);
220
221 return 0;
222}
223
224static int pxa_periodic_irq_set_state(struct device *dev, int enabled)
225{
226 struct pxa_rtc *pxa_rtc = dev_get_drvdata(dev);
227
228 if (enabled)
229 rtsr_set_bits(pxa_rtc, RTSR_PIALE | RTSR_PICE);
230 else
231 rtsr_clear_bits(pxa_rtc, RTSR_PIALE | RTSR_PICE);
232
233 return 0;
234}
235
236static int pxa_rtc_ioctl(struct device *dev, unsigned int cmd,
237 unsigned long arg)
238{
239 struct pxa_rtc *pxa_rtc = dev_get_drvdata(dev);
240 int ret = 0;
241
242 spin_lock_irq(&pxa_rtc->lock);
243 switch (cmd) {
244 case RTC_AIE_OFF:
245 rtsr_clear_bits(pxa_rtc, RTSR_RDALE1);
246 break;
247 case RTC_AIE_ON:
248 rtsr_set_bits(pxa_rtc, RTSR_RDALE1);
249 break;
250 case RTC_UIE_OFF:
251 rtsr_clear_bits(pxa_rtc, RTSR_HZE);
252 break;
253 case RTC_UIE_ON:
254 rtsr_set_bits(pxa_rtc, RTSR_HZE);
255 break;
256 default:
257 ret = -ENOIOCTLCMD;
258 }
259
260 spin_unlock_irq(&pxa_rtc->lock);
261 return ret;
262}
263
264static int pxa_rtc_read_time(struct device *dev, struct rtc_time *tm)
265{
266 struct pxa_rtc *pxa_rtc = dev_get_drvdata(dev);
267 u32 rycr, rdcr;
268
269 rycr = rtc_readl(pxa_rtc, RYCR);
270 rdcr = rtc_readl(pxa_rtc, RDCR);
271
272 tm_calc(rycr, rdcr, tm);
273 return 0;
274}
275
276static int pxa_rtc_set_time(struct device *dev, struct rtc_time *tm)
277{
278 struct pxa_rtc *pxa_rtc = dev_get_drvdata(dev);
279
280 rtc_writel(pxa_rtc, RYCR, ryxr_calc(tm));
281 rtc_writel(pxa_rtc, RDCR, rdxr_calc(tm));
282
283 return 0;
284}
285
286static int pxa_rtc_read_alarm(struct device *dev, struct rtc_wkalrm *alrm)
287{
288 struct pxa_rtc *pxa_rtc = dev_get_drvdata(dev);
289 u32 rtsr, ryar, rdar;
290
291 ryar = rtc_readl(pxa_rtc, RYAR1);
292 rdar = rtc_readl(pxa_rtc, RDAR1);
293 tm_calc(ryar, rdar, &alrm->time);
294
295 rtsr = rtc_readl(pxa_rtc, RTSR);
296 alrm->enabled = (rtsr & RTSR_RDALE1) ? 1 : 0;
297 alrm->pending = (rtsr & RTSR_RDAL1) ? 1 : 0;
298 return 0;
299}
300
301static int pxa_rtc_set_alarm(struct device *dev, struct rtc_wkalrm *alrm)
302{
303 struct pxa_rtc *pxa_rtc = dev_get_drvdata(dev);
304 u32 rtsr;
305
306 spin_lock_irq(&pxa_rtc->lock);
307
308 rtc_writel(pxa_rtc, RYAR1, ryxr_calc(&alrm->time));
309 rtc_writel(pxa_rtc, RDAR1, rdxr_calc(&alrm->time));
310
311 rtsr = rtc_readl(pxa_rtc, RTSR);
312 if (alrm->enabled)
313 rtsr |= RTSR_RDALE1;
314 else
315 rtsr &= ~RTSR_RDALE1;
316 rtc_writel(pxa_rtc, RTSR, rtsr);
317
318 spin_unlock_irq(&pxa_rtc->lock);
319
320 return 0;
321}
322
323static int pxa_rtc_proc(struct device *dev, struct seq_file *seq)
324{
325 struct pxa_rtc *pxa_rtc = dev_get_drvdata(dev);
326
327 seq_printf(seq, "trim/divider\t: 0x%08x\n", rtc_readl(pxa_rtc, RTTR));
328 seq_printf(seq, "update_IRQ\t: %s\n",
329 (rtc_readl(pxa_rtc, RTSR) & RTSR_HZE) ? "yes" : "no");
330 seq_printf(seq, "periodic_IRQ\t: %s\n",
331 (rtc_readl(pxa_rtc, RTSR) & RTSR_PIALE) ? "yes" : "no");
332 seq_printf(seq, "periodic_freq\t: %u\n", rtc_readl(pxa_rtc, PIAR));
333
334 return 0;
335}
336
337static const struct rtc_class_ops pxa_rtc_ops = {
338 .open = pxa_rtc_open,
339 .release = pxa_rtc_release,
340 .ioctl = pxa_rtc_ioctl,
341 .read_time = pxa_rtc_read_time,
342 .set_time = pxa_rtc_set_time,
343 .read_alarm = pxa_rtc_read_alarm,
344 .set_alarm = pxa_rtc_set_alarm,
345 .proc = pxa_rtc_proc,
346 .irq_set_state = pxa_periodic_irq_set_state,
347 .irq_set_freq = pxa_periodic_irq_set_freq,
348};
349
350static int __init pxa_rtc_probe(struct platform_device *pdev)
351{
352 struct device *dev = &pdev->dev;
353 struct pxa_rtc *pxa_rtc;
354 int ret;
355 u32 rttr;
356
357 pxa_rtc = kzalloc(sizeof(struct pxa_rtc), GFP_KERNEL);
358 if (!pxa_rtc)
359 return -ENOMEM;
360
361 spin_lock_init(&pxa_rtc->lock);
362 platform_set_drvdata(pdev, pxa_rtc);
363
364 ret = -ENXIO;
365 pxa_rtc->ress = platform_get_resource(pdev, IORESOURCE_MEM, 0);
366 if (!pxa_rtc->ress) {
367 dev_err(dev, "No I/O memory resource defined\n");
368 goto err_ress;
369 }
370
371 pxa_rtc->irq_1Hz = platform_get_irq(pdev, 0);
372 if (pxa_rtc->irq_1Hz < 0) {
373 dev_err(dev, "No 1Hz IRQ resource defined\n");
374 goto err_ress;
375 }
376 pxa_rtc->irq_Alrm = platform_get_irq(pdev, 1);
377 if (pxa_rtc->irq_Alrm < 0) {
378 dev_err(dev, "No alarm IRQ resource defined\n");
379 goto err_ress;
380 }
381
382 ret = -ENOMEM;
383 pxa_rtc->base = ioremap(pxa_rtc->ress->start,
384 resource_size(pxa_rtc->ress));
385 if (!pxa_rtc->base) {
386 dev_err(&pdev->dev, "Unable to map pxa RTC I/O memory\n");
387 goto err_map;
388 }
389
390 /*
391 * If the clock divider is uninitialized then reset it to the
392 * default value to get the 1Hz clock.
393 */
394 if (rtc_readl(pxa_rtc, RTTR) == 0) {
395 rttr = RTC_DEF_DIVIDER + (RTC_DEF_TRIM << 16);
396 rtc_writel(pxa_rtc, RTTR, rttr);
397 dev_warn(dev, "warning: initializing default clock"
398 " divider/trim value\n");
399 }
400
401 rtsr_clear_bits(pxa_rtc, RTSR_PIALE | RTSR_RDALE1 | RTSR_HZE);
402
403 pxa_rtc->rtc = rtc_device_register("pxa-rtc", &pdev->dev, &pxa_rtc_ops,
404 THIS_MODULE);
405 ret = PTR_ERR(pxa_rtc->rtc);
406 if (IS_ERR(pxa_rtc->rtc)) {
407 dev_err(dev, "Failed to register RTC device -> %d\n", ret);
408 goto err_rtc_reg;
409 }
410
411 device_init_wakeup(dev, 1);
412
413 return 0;
414
415err_rtc_reg:
416 iounmap(pxa_rtc->base);
417err_ress:
418err_map:
419 kfree(pxa_rtc);
420 return ret;
421}
422
423static int __exit pxa_rtc_remove(struct platform_device *pdev)
424{
425 struct pxa_rtc *pxa_rtc = platform_get_drvdata(pdev);
426
427 rtc_device_unregister(pxa_rtc->rtc);
428
429 spin_lock_irq(&pxa_rtc->lock);
430 iounmap(pxa_rtc->base);
431 spin_unlock_irq(&pxa_rtc->lock);
432
433 kfree(pxa_rtc);
434
435 return 0;
436}
437
438#ifdef CONFIG_PM
439static int pxa_rtc_suspend(struct platform_device *pdev, pm_message_t state)
440{
441 struct pxa_rtc *pxa_rtc = platform_get_drvdata(pdev);
442
443 if (device_may_wakeup(&pdev->dev))
444 enable_irq_wake(pxa_rtc->irq_Alrm);
445 return 0;
446}
447
448static int pxa_rtc_resume(struct platform_device *pdev)
449{
450 struct pxa_rtc *pxa_rtc = platform_get_drvdata(pdev);
451
452 if (device_may_wakeup(&pdev->dev))
453 disable_irq_wake(pxa_rtc->irq_Alrm);
454 return 0;
455}
456#else
457#define pxa_rtc_suspend NULL
458#define pxa_rtc_resume NULL
459#endif
460
461static struct platform_driver pxa_rtc_driver = {
462 .remove = __exit_p(pxa_rtc_remove),
463 .suspend = pxa_rtc_suspend,
464 .resume = pxa_rtc_resume,
465 .driver = {
466 .name = "pxa-rtc",
467 },
468};
469
470static int __init pxa_rtc_init(void)
471{
472 if (cpu_is_pxa27x() || cpu_is_pxa3xx())
473 return platform_driver_probe(&pxa_rtc_driver, pxa_rtc_probe);
474
475 return -ENODEV;
476}
477
478static void __exit pxa_rtc_exit(void)
479{
480 platform_driver_unregister(&pxa_rtc_driver);
481}
482
483module_init(pxa_rtc_init);
484module_exit(pxa_rtc_exit);
485
486MODULE_AUTHOR("Robert Jarzmik");
487MODULE_DESCRIPTION("PXA27x/PXA3xx Realtime Clock Driver (RTC)");
488MODULE_LICENSE("GPL");
489MODULE_ALIAS("platform:pxa-rtc");
diff --git a/drivers/rtc/rtc-s3c.c b/drivers/rtc/rtc-s3c.c
index 7a568beba3f0..e0d7b9991505 100644
--- a/drivers/rtc/rtc-s3c.c
+++ b/drivers/rtc/rtc-s3c.c
@@ -94,6 +94,9 @@ static int s3c_rtc_setfreq(struct device *dev, int freq)
94{ 94{
95 unsigned int tmp; 95 unsigned int tmp;
96 96
97 if (!is_power_of_2(freq))
98 return -EINVAL;
99
97 spin_lock_irq(&s3c_rtc_pie_lock); 100 spin_lock_irq(&s3c_rtc_pie_lock);
98 101
99 tmp = readb(s3c_rtc_base + S3C2410_TICNT) & S3C2410_TICNT_ENABLE; 102 tmp = readb(s3c_rtc_base + S3C2410_TICNT) & S3C2410_TICNT_ENABLE;
diff --git a/drivers/rtc/rtc-sh.c b/drivers/rtc/rtc-sh.c
index aaf9d6a337cc..1c3fc6b428e9 100644
--- a/drivers/rtc/rtc-sh.c
+++ b/drivers/rtc/rtc-sh.c
@@ -24,6 +24,7 @@
24#include <linux/interrupt.h> 24#include <linux/interrupt.h>
25#include <linux/spinlock.h> 25#include <linux/spinlock.h>
26#include <linux/io.h> 26#include <linux/io.h>
27#include <linux/log2.h>
27#include <asm/rtc.h> 28#include <asm/rtc.h>
28 29
29#define DRV_NAME "sh-rtc" 30#define DRV_NAME "sh-rtc"
@@ -89,7 +90,9 @@ struct sh_rtc {
89 void __iomem *regbase; 90 void __iomem *regbase;
90 unsigned long regsize; 91 unsigned long regsize;
91 struct resource *res; 92 struct resource *res;
92 unsigned int alarm_irq, periodic_irq, carry_irq; 93 int alarm_irq;
94 int periodic_irq;
95 int carry_irq;
93 struct rtc_device *rtc_dev; 96 struct rtc_device *rtc_dev;
94 spinlock_t lock; 97 spinlock_t lock;
95 unsigned long capabilities; /* See asm-sh/rtc.h for cap bits */ 98 unsigned long capabilities; /* See asm-sh/rtc.h for cap bits */
@@ -549,6 +552,8 @@ static int sh_rtc_irq_set_state(struct device *dev, int enabled)
549 552
550static int sh_rtc_irq_set_freq(struct device *dev, int freq) 553static int sh_rtc_irq_set_freq(struct device *dev, int freq)
551{ 554{
555 if (!is_power_of_2(freq))
556 return -EINVAL;
552 return sh_rtc_ioctl(dev, RTC_IRQP_SET, freq); 557 return sh_rtc_ioctl(dev, RTC_IRQP_SET, freq);
553} 558}
554 559
@@ -578,7 +583,7 @@ static int __devinit sh_rtc_probe(struct platform_device *pdev)
578 583
579 /* get periodic/carry/alarm irqs */ 584 /* get periodic/carry/alarm irqs */
580 ret = platform_get_irq(pdev, 0); 585 ret = platform_get_irq(pdev, 0);
581 if (unlikely(ret < 0)) { 586 if (unlikely(ret <= 0)) {
582 ret = -ENOENT; 587 ret = -ENOENT;
583 dev_err(&pdev->dev, "No IRQ for period\n"); 588 dev_err(&pdev->dev, "No IRQ for period\n");
584 goto err_badres; 589 goto err_badres;
@@ -586,7 +591,7 @@ static int __devinit sh_rtc_probe(struct platform_device *pdev)
586 rtc->periodic_irq = ret; 591 rtc->periodic_irq = ret;
587 592
588 ret = platform_get_irq(pdev, 1); 593 ret = platform_get_irq(pdev, 1);
589 if (unlikely(ret < 0)) { 594 if (unlikely(ret <= 0)) {
590 ret = -ENOENT; 595 ret = -ENOENT;
591 dev_err(&pdev->dev, "No IRQ for carry\n"); 596 dev_err(&pdev->dev, "No IRQ for carry\n");
592 goto err_badres; 597 goto err_badres;
@@ -594,7 +599,7 @@ static int __devinit sh_rtc_probe(struct platform_device *pdev)
594 rtc->carry_irq = ret; 599 rtc->carry_irq = ret;
595 600
596 ret = platform_get_irq(pdev, 2); 601 ret = platform_get_irq(pdev, 2);
597 if (unlikely(ret < 0)) { 602 if (unlikely(ret <= 0)) {
598 ret = -ENOENT; 603 ret = -ENOENT;
599 dev_err(&pdev->dev, "No IRQ for alarm\n"); 604 dev_err(&pdev->dev, "No IRQ for alarm\n");
600 goto err_badres; 605 goto err_badres;
diff --git a/drivers/rtc/rtc-stk17ta8.c b/drivers/rtc/rtc-stk17ta8.c
index f4cd46e15af9..dc0b6224ad9b 100644
--- a/drivers/rtc/rtc-stk17ta8.c
+++ b/drivers/rtc/rtc-stk17ta8.c
@@ -170,7 +170,7 @@ static int stk17ta8_rtc_set_alarm(struct device *dev, struct rtc_wkalrm *alrm)
170 struct platform_device *pdev = to_platform_device(dev); 170 struct platform_device *pdev = to_platform_device(dev);
171 struct rtc_plat_data *pdata = platform_get_drvdata(pdev); 171 struct rtc_plat_data *pdata = platform_get_drvdata(pdev);
172 172
173 if (pdata->irq < 0) 173 if (pdata->irq <= 0)
174 return -EINVAL; 174 return -EINVAL;
175 pdata->alrm_mday = alrm->time.tm_mday; 175 pdata->alrm_mday = alrm->time.tm_mday;
176 pdata->alrm_hour = alrm->time.tm_hour; 176 pdata->alrm_hour = alrm->time.tm_hour;
@@ -187,7 +187,7 @@ static int stk17ta8_rtc_read_alarm(struct device *dev, struct rtc_wkalrm *alrm)
187 struct platform_device *pdev = to_platform_device(dev); 187 struct platform_device *pdev = to_platform_device(dev);
188 struct rtc_plat_data *pdata = platform_get_drvdata(pdev); 188 struct rtc_plat_data *pdata = platform_get_drvdata(pdev);
189 189
190 if (pdata->irq < 0) 190 if (pdata->irq <= 0)
191 return -EINVAL; 191 return -EINVAL;
192 alrm->time.tm_mday = pdata->alrm_mday < 0 ? 0 : pdata->alrm_mday; 192 alrm->time.tm_mday = pdata->alrm_mday < 0 ? 0 : pdata->alrm_mday;
193 alrm->time.tm_hour = pdata->alrm_hour < 0 ? 0 : pdata->alrm_hour; 193 alrm->time.tm_hour = pdata->alrm_hour < 0 ? 0 : pdata->alrm_hour;
@@ -221,7 +221,7 @@ static int stk17ta8_rtc_ioctl(struct device *dev, unsigned int cmd,
221 struct platform_device *pdev = to_platform_device(dev); 221 struct platform_device *pdev = to_platform_device(dev);
222 struct rtc_plat_data *pdata = platform_get_drvdata(pdev); 222 struct rtc_plat_data *pdata = platform_get_drvdata(pdev);
223 223
224 if (pdata->irq < 0) 224 if (pdata->irq <= 0)
225 return -ENOIOCTLCMD; /* fall back into rtc-dev's emulation */ 225 return -ENOIOCTLCMD; /* fall back into rtc-dev's emulation */
226 switch (cmd) { 226 switch (cmd) {
227 case RTC_AIE_OFF: 227 case RTC_AIE_OFF:
@@ -303,7 +303,6 @@ static int __init stk17ta8_rtc_probe(struct platform_device *pdev)
303 pdata = kzalloc(sizeof(*pdata), GFP_KERNEL); 303 pdata = kzalloc(sizeof(*pdata), GFP_KERNEL);
304 if (!pdata) 304 if (!pdata)
305 return -ENOMEM; 305 return -ENOMEM;
306 pdata->irq = -1;
307 if (!request_mem_region(res->start, RTC_REG_SIZE, pdev->name)) { 306 if (!request_mem_region(res->start, RTC_REG_SIZE, pdev->name)) {
308 ret = -EBUSY; 307 ret = -EBUSY;
309 goto out; 308 goto out;
@@ -329,13 +328,13 @@ static int __init stk17ta8_rtc_probe(struct platform_device *pdev)
329 if (readb(ioaddr + RTC_FLAGS) & RTC_FLAGS_PF) 328 if (readb(ioaddr + RTC_FLAGS) & RTC_FLAGS_PF)
330 dev_warn(&pdev->dev, "voltage-low detected.\n"); 329 dev_warn(&pdev->dev, "voltage-low detected.\n");
331 330
332 if (pdata->irq >= 0) { 331 if (pdata->irq > 0) {
333 writeb(0, ioaddr + RTC_INTERRUPTS); 332 writeb(0, ioaddr + RTC_INTERRUPTS);
334 if (request_irq(pdata->irq, stk17ta8_rtc_interrupt, 333 if (request_irq(pdata->irq, stk17ta8_rtc_interrupt,
335 IRQF_DISABLED | IRQF_SHARED, 334 IRQF_DISABLED | IRQF_SHARED,
336 pdev->name, pdev) < 0) { 335 pdev->name, pdev) < 0) {
337 dev_warn(&pdev->dev, "interrupt not available.\n"); 336 dev_warn(&pdev->dev, "interrupt not available.\n");
338 pdata->irq = -1; 337 pdata->irq = 0;
339 } 338 }
340 } 339 }
341 340
@@ -355,7 +354,7 @@ static int __init stk17ta8_rtc_probe(struct platform_device *pdev)
355 out: 354 out:
356 if (pdata->rtc) 355 if (pdata->rtc)
357 rtc_device_unregister(pdata->rtc); 356 rtc_device_unregister(pdata->rtc);
358 if (pdata->irq >= 0) 357 if (pdata->irq > 0)
359 free_irq(pdata->irq, pdev); 358 free_irq(pdata->irq, pdev);
360 if (ioaddr) 359 if (ioaddr)
361 iounmap(ioaddr); 360 iounmap(ioaddr);
@@ -371,7 +370,7 @@ static int __devexit stk17ta8_rtc_remove(struct platform_device *pdev)
371 370
372 sysfs_remove_bin_file(&pdev->dev.kobj, &stk17ta8_nvram_attr); 371 sysfs_remove_bin_file(&pdev->dev.kobj, &stk17ta8_nvram_attr);
373 rtc_device_unregister(pdata->rtc); 372 rtc_device_unregister(pdata->rtc);
374 if (pdata->irq >= 0) { 373 if (pdata->irq > 0) {
375 writeb(0, pdata->ioaddr + RTC_INTERRUPTS); 374 writeb(0, pdata->ioaddr + RTC_INTERRUPTS);
376 free_irq(pdata->irq, pdev); 375 free_irq(pdata->irq, pdev);
377 } 376 }
diff --git a/drivers/rtc/rtc-test.c b/drivers/rtc/rtc-test.c
index bc930022004a..e478280ff628 100644
--- a/drivers/rtc/rtc-test.c
+++ b/drivers/rtc/rtc-test.c
@@ -34,14 +34,9 @@ static int test_rtc_read_time(struct device *dev,
34 return 0; 34 return 0;
35} 35}
36 36
37static int test_rtc_set_time(struct device *dev,
38 struct rtc_time *tm)
39{
40 return 0;
41}
42
43static int test_rtc_set_mmss(struct device *dev, unsigned long secs) 37static int test_rtc_set_mmss(struct device *dev, unsigned long secs)
44{ 38{
39 dev_info(dev, "%s, secs = %lu\n", __func__, secs);
45 return 0; 40 return 0;
46} 41}
47 42
@@ -78,7 +73,6 @@ static int test_rtc_ioctl(struct device *dev, unsigned int cmd,
78static const struct rtc_class_ops test_rtc_ops = { 73static const struct rtc_class_ops test_rtc_ops = {
79 .proc = test_rtc_proc, 74 .proc = test_rtc_proc,
80 .read_time = test_rtc_read_time, 75 .read_time = test_rtc_read_time,
81 .set_time = test_rtc_set_time,
82 .read_alarm = test_rtc_read_alarm, 76 .read_alarm = test_rtc_read_alarm,
83 .set_alarm = test_rtc_set_alarm, 77 .set_alarm = test_rtc_set_alarm,
84 .set_mmss = test_rtc_set_mmss, 78 .set_mmss = test_rtc_set_mmss,
diff --git a/drivers/rtc/rtc-twl4030.c b/drivers/rtc/rtc-twl4030.c
index 01d8da9afdc8..8ce5f74ee45b 100644
--- a/drivers/rtc/rtc-twl4030.c
+++ b/drivers/rtc/rtc-twl4030.c
@@ -19,6 +19,7 @@
19 */ 19 */
20 20
21#include <linux/kernel.h> 21#include <linux/kernel.h>
22#include <linux/errno.h>
22#include <linux/init.h> 23#include <linux/init.h>
23#include <linux/module.h> 24#include <linux/module.h>
24#include <linux/types.h> 25#include <linux/types.h>
@@ -415,8 +416,8 @@ static int __devinit twl4030_rtc_probe(struct platform_device *pdev)
415 int irq = platform_get_irq(pdev, 0); 416 int irq = platform_get_irq(pdev, 0);
416 u8 rd_reg; 417 u8 rd_reg;
417 418
418 if (irq < 0) 419 if (irq <= 0)
419 return irq; 420 return -EINVAL;
420 421
421 rtc = rtc_device_register(pdev->name, 422 rtc = rtc_device_register(pdev->name,
422 &pdev->dev, &twl4030_rtc_ops, THIS_MODULE); 423 &pdev->dev, &twl4030_rtc_ops, THIS_MODULE);
diff --git a/drivers/rtc/rtc-tx4939.c b/drivers/rtc/rtc-tx4939.c
new file mode 100644
index 000000000000..4ee4857ff207
--- /dev/null
+++ b/drivers/rtc/rtc-tx4939.c
@@ -0,0 +1,317 @@
1/*
2 * TX4939 internal RTC driver
3 * Based on RBTX49xx patch from CELF patch archive.
4 *
5 * This file is subject to the terms and conditions of the GNU General Public
6 * License. See the file "COPYING" in the main directory of this archive
7 * for more details.
8 *
9 * (C) Copyright TOSHIBA CORPORATION 2005-2007
10 */
11#include <linux/rtc.h>
12#include <linux/platform_device.h>
13#include <linux/interrupt.h>
14#include <linux/io.h>
15#include <asm/txx9/tx4939.h>
16
17struct tx4939rtc_plat_data {
18 struct rtc_device *rtc;
19 struct tx4939_rtc_reg __iomem *rtcreg;
20};
21
22static struct tx4939rtc_plat_data *get_tx4939rtc_plat_data(struct device *dev)
23{
24 return platform_get_drvdata(to_platform_device(dev));
25}
26
27static int tx4939_rtc_cmd(struct tx4939_rtc_reg __iomem *rtcreg, int cmd)
28{
29 int i = 0;
30
31 __raw_writel(cmd, &rtcreg->ctl);
32 /* This might take 30us (next 32.768KHz clock) */
33 while (__raw_readl(&rtcreg->ctl) & TX4939_RTCCTL_BUSY) {
34 /* timeout on approx. 100us (@ GBUS200MHz) */
35 if (i++ > 200 * 100)
36 return -EBUSY;
37 cpu_relax();
38 }
39 return 0;
40}
41
42static int tx4939_rtc_set_mmss(struct device *dev, unsigned long secs)
43{
44 struct tx4939rtc_plat_data *pdata = get_tx4939rtc_plat_data(dev);
45 struct tx4939_rtc_reg __iomem *rtcreg = pdata->rtcreg;
46 int i, ret;
47 unsigned char buf[6];
48
49 buf[0] = 0;
50 buf[1] = 0;
51 buf[2] = secs;
52 buf[3] = secs >> 8;
53 buf[4] = secs >> 16;
54 buf[5] = secs >> 24;
55 spin_lock_irq(&pdata->rtc->irq_lock);
56 __raw_writel(0, &rtcreg->adr);
57 for (i = 0; i < 6; i++)
58 __raw_writel(buf[i], &rtcreg->dat);
59 ret = tx4939_rtc_cmd(rtcreg,
60 TX4939_RTCCTL_COMMAND_SETTIME |
61 (__raw_readl(&rtcreg->ctl) & TX4939_RTCCTL_ALME));
62 spin_unlock_irq(&pdata->rtc->irq_lock);
63 return ret;
64}
65
66static int tx4939_rtc_read_time(struct device *dev, struct rtc_time *tm)
67{
68 struct tx4939rtc_plat_data *pdata = get_tx4939rtc_plat_data(dev);
69 struct tx4939_rtc_reg __iomem *rtcreg = pdata->rtcreg;
70 int i, ret;
71 unsigned long sec;
72 unsigned char buf[6];
73
74 spin_lock_irq(&pdata->rtc->irq_lock);
75 ret = tx4939_rtc_cmd(rtcreg,
76 TX4939_RTCCTL_COMMAND_GETTIME |
77 (__raw_readl(&rtcreg->ctl) & TX4939_RTCCTL_ALME));
78 if (ret) {
79 spin_unlock_irq(&pdata->rtc->irq_lock);
80 return ret;
81 }
82 __raw_writel(2, &rtcreg->adr);
83 for (i = 2; i < 6; i++)
84 buf[i] = __raw_readl(&rtcreg->dat);
85 spin_unlock_irq(&pdata->rtc->irq_lock);
86 sec = (buf[5] << 24) | (buf[4] << 16) | (buf[3] << 8) | buf[2];
87 rtc_time_to_tm(sec, tm);
88 return rtc_valid_tm(tm);
89}
90
91static int tx4939_rtc_set_alarm(struct device *dev, struct rtc_wkalrm *alrm)
92{
93 struct tx4939rtc_plat_data *pdata = get_tx4939rtc_plat_data(dev);
94 struct tx4939_rtc_reg __iomem *rtcreg = pdata->rtcreg;
95 int i, ret;
96 unsigned long sec;
97 unsigned char buf[6];
98
99 if (alrm->time.tm_sec < 0 ||
100 alrm->time.tm_min < 0 ||
101 alrm->time.tm_hour < 0 ||
102 alrm->time.tm_mday < 0 ||
103 alrm->time.tm_mon < 0 ||
104 alrm->time.tm_year < 0)
105 return -EINVAL;
106 rtc_tm_to_time(&alrm->time, &sec);
107 buf[0] = 0;
108 buf[1] = 0;
109 buf[2] = sec;
110 buf[3] = sec >> 8;
111 buf[4] = sec >> 16;
112 buf[5] = sec >> 24;
113 spin_lock_irq(&pdata->rtc->irq_lock);
114 __raw_writel(0, &rtcreg->adr);
115 for (i = 0; i < 6; i++)
116 __raw_writel(buf[i], &rtcreg->dat);
117 ret = tx4939_rtc_cmd(rtcreg, TX4939_RTCCTL_COMMAND_SETALARM |
118 (alrm->enabled ? TX4939_RTCCTL_ALME : 0));
119 spin_unlock_irq(&pdata->rtc->irq_lock);
120 return ret;
121}
122
123static int tx4939_rtc_read_alarm(struct device *dev, struct rtc_wkalrm *alrm)
124{
125 struct tx4939rtc_plat_data *pdata = get_tx4939rtc_plat_data(dev);
126 struct tx4939_rtc_reg __iomem *rtcreg = pdata->rtcreg;
127 int i, ret;
128 unsigned long sec;
129 unsigned char buf[6];
130 u32 ctl;
131
132 spin_lock_irq(&pdata->rtc->irq_lock);
133 ret = tx4939_rtc_cmd(rtcreg,
134 TX4939_RTCCTL_COMMAND_GETALARM |
135 (__raw_readl(&rtcreg->ctl) & TX4939_RTCCTL_ALME));
136 if (ret) {
137 spin_unlock_irq(&pdata->rtc->irq_lock);
138 return ret;
139 }
140 __raw_writel(2, &rtcreg->adr);
141 for (i = 2; i < 6; i++)
142 buf[i] = __raw_readl(&rtcreg->dat);
143 ctl = __raw_readl(&rtcreg->ctl);
144 alrm->enabled = (ctl & TX4939_RTCCTL_ALME) ? 1 : 0;
145 alrm->pending = (ctl & TX4939_RTCCTL_ALMD) ? 1 : 0;
146 spin_unlock_irq(&pdata->rtc->irq_lock);
147 sec = (buf[5] << 24) | (buf[4] << 16) | (buf[3] << 8) | buf[2];
148 rtc_time_to_tm(sec, &alrm->time);
149 return rtc_valid_tm(&alrm->time);
150}
151
152static int tx4939_rtc_alarm_irq_enable(struct device *dev, unsigned int enabled)
153{
154 struct tx4939rtc_plat_data *pdata = get_tx4939rtc_plat_data(dev);
155
156 spin_lock_irq(&pdata->rtc->irq_lock);
157 tx4939_rtc_cmd(pdata->rtcreg,
158 TX4939_RTCCTL_COMMAND_NOP |
159 (enabled ? TX4939_RTCCTL_ALME : 0));
160 spin_unlock_irq(&pdata->rtc->irq_lock);
161 return 0;
162}
163
164static irqreturn_t tx4939_rtc_interrupt(int irq, void *dev_id)
165{
166 struct tx4939rtc_plat_data *pdata = get_tx4939rtc_plat_data(dev_id);
167 struct tx4939_rtc_reg __iomem *rtcreg = pdata->rtcreg;
168 unsigned long events = RTC_IRQF;
169
170 spin_lock(&pdata->rtc->irq_lock);
171 if (__raw_readl(&rtcreg->ctl) & TX4939_RTCCTL_ALMD) {
172 events |= RTC_AF;
173 tx4939_rtc_cmd(rtcreg, TX4939_RTCCTL_COMMAND_NOP);
174 }
175 spin_unlock(&pdata->rtc->irq_lock);
176 rtc_update_irq(pdata->rtc, 1, events);
177 return IRQ_HANDLED;
178}
179
180static const struct rtc_class_ops tx4939_rtc_ops = {
181 .read_time = tx4939_rtc_read_time,
182 .read_alarm = tx4939_rtc_read_alarm,
183 .set_alarm = tx4939_rtc_set_alarm,
184 .set_mmss = tx4939_rtc_set_mmss,
185 .alarm_irq_enable = tx4939_rtc_alarm_irq_enable,
186};
187
188static ssize_t tx4939_rtc_nvram_read(struct kobject *kobj,
189 struct bin_attribute *bin_attr,
190 char *buf, loff_t pos, size_t size)
191{
192 struct device *dev = container_of(kobj, struct device, kobj);
193 struct tx4939rtc_plat_data *pdata = get_tx4939rtc_plat_data(dev);
194 struct tx4939_rtc_reg __iomem *rtcreg = pdata->rtcreg;
195 ssize_t count;
196
197 spin_lock_irq(&pdata->rtc->irq_lock);
198 for (count = 0; size > 0 && pos < TX4939_RTC_REG_RAMSIZE;
199 count++, size--) {
200 __raw_writel(pos++, &rtcreg->adr);
201 *buf++ = __raw_readl(&rtcreg->dat);
202 }
203 spin_unlock_irq(&pdata->rtc->irq_lock);
204 return count;
205}
206
207static ssize_t tx4939_rtc_nvram_write(struct kobject *kobj,
208 struct bin_attribute *bin_attr,
209 char *buf, loff_t pos, size_t size)
210{
211 struct device *dev = container_of(kobj, struct device, kobj);
212 struct tx4939rtc_plat_data *pdata = get_tx4939rtc_plat_data(dev);
213 struct tx4939_rtc_reg __iomem *rtcreg = pdata->rtcreg;
214 ssize_t count;
215
216 spin_lock_irq(&pdata->rtc->irq_lock);
217 for (count = 0; size > 0 && pos < TX4939_RTC_REG_RAMSIZE;
218 count++, size--) {
219 __raw_writel(pos++, &rtcreg->adr);
220 __raw_writel(*buf++, &rtcreg->dat);
221 }
222 spin_unlock_irq(&pdata->rtc->irq_lock);
223 return count;
224}
225
226static struct bin_attribute tx4939_rtc_nvram_attr = {
227 .attr = {
228 .name = "nvram",
229 .mode = S_IRUGO | S_IWUSR,
230 },
231 .size = TX4939_RTC_REG_RAMSIZE,
232 .read = tx4939_rtc_nvram_read,
233 .write = tx4939_rtc_nvram_write,
234};
235
236static int __init tx4939_rtc_probe(struct platform_device *pdev)
237{
238 struct rtc_device *rtc;
239 struct tx4939rtc_plat_data *pdata;
240 struct resource *res;
241 int irq, ret;
242
243 res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
244 if (!res)
245 return -ENODEV;
246 irq = platform_get_irq(pdev, 0);
247 if (irq < 0)
248 return -ENODEV;
249 pdata = devm_kzalloc(&pdev->dev, sizeof(*pdata), GFP_KERNEL);
250 if (!pdata)
251 return -ENOMEM;
252 platform_set_drvdata(pdev, pdata);
253
254 if (!devm_request_mem_region(&pdev->dev, res->start,
255 resource_size(res), pdev->name))
256 return -EBUSY;
257 pdata->rtcreg = devm_ioremap(&pdev->dev, res->start,
258 resource_size(res));
259 if (!pdata->rtcreg)
260 return -EBUSY;
261
262 tx4939_rtc_cmd(pdata->rtcreg, TX4939_RTCCTL_COMMAND_NOP);
263 if (devm_request_irq(&pdev->dev, irq, tx4939_rtc_interrupt,
264 IRQF_DISABLED | IRQF_SHARED,
265 pdev->name, &pdev->dev) < 0) {
266 return -EBUSY;
267 }
268 rtc = rtc_device_register(pdev->name, &pdev->dev,
269 &tx4939_rtc_ops, THIS_MODULE);
270 if (IS_ERR(rtc))
271 return PTR_ERR(rtc);
272 pdata->rtc = rtc;
273 ret = sysfs_create_bin_file(&pdev->dev.kobj, &tx4939_rtc_nvram_attr);
274 if (ret)
275 rtc_device_unregister(rtc);
276 return ret;
277}
278
279static int __exit tx4939_rtc_remove(struct platform_device *pdev)
280{
281 struct tx4939rtc_plat_data *pdata = platform_get_drvdata(pdev);
282 struct rtc_device *rtc = pdata->rtc;
283
284 spin_lock_irq(&rtc->irq_lock);
285 tx4939_rtc_cmd(pdata->rtcreg, TX4939_RTCCTL_COMMAND_NOP);
286 spin_unlock_irq(&rtc->irq_lock);
287 sysfs_remove_bin_file(&pdev->dev.kobj, &tx4939_rtc_nvram_attr);
288 rtc_device_unregister(rtc);
289 platform_set_drvdata(pdev, NULL);
290 return 0;
291}
292
293static struct platform_driver tx4939_rtc_driver = {
294 .remove = __exit_p(tx4939_rtc_remove),
295 .driver = {
296 .name = "tx4939rtc",
297 .owner = THIS_MODULE,
298 },
299};
300
301static int __init tx4939rtc_init(void)
302{
303 return platform_driver_probe(&tx4939_rtc_driver, tx4939_rtc_probe);
304}
305
306static void __exit tx4939rtc_exit(void)
307{
308 platform_driver_unregister(&tx4939_rtc_driver);
309}
310
311module_init(tx4939rtc_init);
312module_exit(tx4939rtc_exit);
313
314MODULE_AUTHOR("Atsushi Nemoto <anemo@mba.ocn.ne.jp>");
315MODULE_DESCRIPTION("TX4939 internal RTC driver");
316MODULE_LICENSE("GPL");
317MODULE_ALIAS("platform:tx4939rtc");
diff --git a/drivers/rtc/rtc-vr41xx.c b/drivers/rtc/rtc-vr41xx.c
index 834dcc6d785f..f11297aff854 100644
--- a/drivers/rtc/rtc-vr41xx.c
+++ b/drivers/rtc/rtc-vr41xx.c
@@ -27,6 +27,7 @@
27#include <linux/rtc.h> 27#include <linux/rtc.h>
28#include <linux/spinlock.h> 28#include <linux/spinlock.h>
29#include <linux/types.h> 29#include <linux/types.h>
30#include <linux/log2.h>
30 31
31#include <asm/div64.h> 32#include <asm/div64.h>
32#include <asm/io.h> 33#include <asm/io.h>
@@ -84,8 +85,8 @@ static DEFINE_SPINLOCK(rtc_lock);
84static char rtc_name[] = "RTC"; 85static char rtc_name[] = "RTC";
85static unsigned long periodic_count; 86static unsigned long periodic_count;
86static unsigned int alarm_enabled; 87static unsigned int alarm_enabled;
87static int aie_irq = -1; 88static int aie_irq;
88static int pie_irq = -1; 89static int pie_irq;
89 90
90static inline unsigned long read_elapsed_second(void) 91static inline unsigned long read_elapsed_second(void)
91{ 92{
@@ -210,6 +211,8 @@ static int vr41xx_rtc_irq_set_freq(struct device *dev, int freq)
210{ 211{
211 unsigned long count; 212 unsigned long count;
212 213
214 if (!is_power_of_2(freq))
215 return -EINVAL;
213 count = RTC_FREQUENCY; 216 count = RTC_FREQUENCY;
214 do_div(count, freq); 217 do_div(count, freq);
215 218
@@ -360,7 +363,7 @@ static int __devinit rtc_probe(struct platform_device *pdev)
360 spin_unlock_irq(&rtc_lock); 363 spin_unlock_irq(&rtc_lock);
361 364
362 aie_irq = platform_get_irq(pdev, 0); 365 aie_irq = platform_get_irq(pdev, 0);
363 if (aie_irq < 0 || aie_irq >= nr_irqs) { 366 if (aie_irq <= 0) {
364 retval = -EBUSY; 367 retval = -EBUSY;
365 goto err_device_unregister; 368 goto err_device_unregister;
366 } 369 }
@@ -371,7 +374,7 @@ static int __devinit rtc_probe(struct platform_device *pdev)
371 goto err_device_unregister; 374 goto err_device_unregister;
372 375
373 pie_irq = platform_get_irq(pdev, 1); 376 pie_irq = platform_get_irq(pdev, 1);
374 if (pie_irq < 0 || pie_irq >= nr_irqs) 377 if (pie_irq <= 0)
375 goto err_free_irq; 378 goto err_free_irq;
376 379
377 retval = request_irq(pie_irq, rtclong1_interrupt, IRQF_DISABLED, 380 retval = request_irq(pie_irq, rtclong1_interrupt, IRQF_DISABLED,
diff --git a/drivers/spi/Kconfig b/drivers/spi/Kconfig
index b9d0efb6803f..4a6fe01831a8 100644
--- a/drivers/spi/Kconfig
+++ b/drivers/spi/Kconfig
@@ -78,7 +78,7 @@ config SPI_AU1550
78 will be called au1550_spi. 78 will be called au1550_spi.
79 79
80config SPI_BITBANG 80config SPI_BITBANG
81 tristate "Bitbanging SPI master" 81 tristate "Utilities for Bitbanging SPI masters"
82 help 82 help
83 With a few GPIO pins, your system can bitbang the SPI protocol. 83 With a few GPIO pins, your system can bitbang the SPI protocol.
84 Select this to get SPI support through I/O pins (GPIO, parallel 84 Select this to get SPI support through I/O pins (GPIO, parallel
@@ -100,6 +100,22 @@ config SPI_BUTTERFLY
100 inexpensive battery powered microcontroller evaluation board. 100 inexpensive battery powered microcontroller evaluation board.
101 This same cable can be used to flash new firmware. 101 This same cable can be used to flash new firmware.
102 102
103config SPI_GPIO
104 tristate "GPIO-based bitbanging SPI Master"
105 depends on GENERIC_GPIO
106 select SPI_BITBANG
107 help
108 This simple GPIO bitbanging SPI master uses the arch-neutral GPIO
109 interface to manage MOSI, MISO, SCK, and chipselect signals. SPI
110 slaves connected to a bus using this driver are configured as usual,
111 except that the spi_board_info.controller_data holds the GPIO number
112 for the chipselect used by this controller driver.
113
114 Note that this driver often won't achieve even 1 Mbit/sec speeds,
115 making it unusually slow for SPI. If your platform can inline
116 GPIO operations, you should be able to leverage that for better
117 speed with a custom version of this driver; see the source code.
118
103config SPI_IMX 119config SPI_IMX
104 tristate "Freescale iMX SPI controller" 120 tristate "Freescale iMX SPI controller"
105 depends on ARCH_IMX && EXPERIMENTAL 121 depends on ARCH_IMX && EXPERIMENTAL
diff --git a/drivers/spi/Makefile b/drivers/spi/Makefile
index ccf18de34e1e..5e9f521b8844 100644
--- a/drivers/spi/Makefile
+++ b/drivers/spi/Makefile
@@ -16,6 +16,7 @@ obj-$(CONFIG_SPI_BFIN) += spi_bfin5xx.o
16obj-$(CONFIG_SPI_BITBANG) += spi_bitbang.o 16obj-$(CONFIG_SPI_BITBANG) += spi_bitbang.o
17obj-$(CONFIG_SPI_AU1550) += au1550_spi.o 17obj-$(CONFIG_SPI_AU1550) += au1550_spi.o
18obj-$(CONFIG_SPI_BUTTERFLY) += spi_butterfly.o 18obj-$(CONFIG_SPI_BUTTERFLY) += spi_butterfly.o
19obj-$(CONFIG_SPI_GPIO) += spi_gpio.o
19obj-$(CONFIG_SPI_IMX) += spi_imx.o 20obj-$(CONFIG_SPI_IMX) += spi_imx.o
20obj-$(CONFIG_SPI_LM70_LLP) += spi_lm70llp.o 21obj-$(CONFIG_SPI_LM70_LLP) += spi_lm70llp.o
21obj-$(CONFIG_SPI_PXA2XX) += pxa2xx_spi.o 22obj-$(CONFIG_SPI_PXA2XX) += pxa2xx_spi.o
diff --git a/drivers/spi/atmel_spi.c b/drivers/spi/atmel_spi.c
index 8abae4ad0fa5..5e39bac9c51b 100644
--- a/drivers/spi/atmel_spi.c
+++ b/drivers/spi/atmel_spi.c
@@ -30,13 +30,6 @@
30 * The core SPI transfer engine just talks to a register bank to set up 30 * The core SPI transfer engine just talks to a register bank to set up
31 * DMA transfers; transfer queue progress is driven by IRQs. The clock 31 * DMA transfers; transfer queue progress is driven by IRQs. The clock
32 * framework provides the base clock, subdivided for each spi_device. 32 * framework provides the base clock, subdivided for each spi_device.
33 *
34 * Newer controllers, marked with "new_1" flag, have:
35 * - CR.LASTXFER
36 * - SPI_MR.DIV32 may become FDIV or must-be-zero (here: always zero)
37 * - SPI_SR.TXEMPTY, SPI_SR.NSSR (and corresponding irqs)
38 * - SPI_CSRx.CSAAT
39 * - SPI_CSRx.SBCR allows faster clocking
40 */ 33 */
41struct atmel_spi { 34struct atmel_spi {
42 spinlock_t lock; 35 spinlock_t lock;
@@ -45,7 +38,6 @@ struct atmel_spi {
45 int irq; 38 int irq;
46 struct clk *clk; 39 struct clk *clk;
47 struct platform_device *pdev; 40 struct platform_device *pdev;
48 unsigned new_1:1;
49 struct spi_device *stay; 41 struct spi_device *stay;
50 42
51 u8 stopping; 43 u8 stopping;
@@ -59,10 +51,33 @@ struct atmel_spi {
59 dma_addr_t buffer_dma; 51 dma_addr_t buffer_dma;
60}; 52};
61 53
54/* Controller-specific per-slave state */
55struct atmel_spi_device {
56 unsigned int npcs_pin;
57 u32 csr;
58};
59
62#define BUFFER_SIZE PAGE_SIZE 60#define BUFFER_SIZE PAGE_SIZE
63#define INVALID_DMA_ADDRESS 0xffffffff 61#define INVALID_DMA_ADDRESS 0xffffffff
64 62
65/* 63/*
64 * Version 2 of the SPI controller has
65 * - CR.LASTXFER
66 * - SPI_MR.DIV32 may become FDIV or must-be-zero (here: always zero)
67 * - SPI_SR.TXEMPTY, SPI_SR.NSSR (and corresponding irqs)
68 * - SPI_CSRx.CSAAT
69 * - SPI_CSRx.SBCR allows faster clocking
70 *
71 * We can determine the controller version by reading the VERSION
72 * register, but I haven't checked that it exists on all chips, and
73 * this is cheaper anyway.
74 */
75static bool atmel_spi_is_v2(void)
76{
77 return !cpu_is_at91rm9200();
78}
79
80/*
66 * Earlier SPI controllers (e.g. on at91rm9200) have a design bug whereby 81 * Earlier SPI controllers (e.g. on at91rm9200) have a design bug whereby
67 * they assume that spi slave device state will not change on deselect, so 82 * they assume that spi slave device state will not change on deselect, so
68 * that automagic deselection is OK. ("NPCSx rises if no data is to be 83 * that automagic deselection is OK. ("NPCSx rises if no data is to be
@@ -80,39 +95,58 @@ struct atmel_spi {
80 * Master on Chip Select 0.") No workaround exists for that ... so for 95 * Master on Chip Select 0.") No workaround exists for that ... so for
81 * nCS0 on that chip, we (a) don't use the GPIO, (b) can't support CS_HIGH, 96 * nCS0 on that chip, we (a) don't use the GPIO, (b) can't support CS_HIGH,
82 * and (c) will trigger that first erratum in some cases. 97 * and (c) will trigger that first erratum in some cases.
98 *
99 * TODO: Test if the atmel_spi_is_v2() branch below works on
100 * AT91RM9200 if we use some other register than CSR0. However, don't
101 * do this unconditionally since AP7000 has an errata where the BITS
102 * field in CSR0 overrides all other CSRs.
83 */ 103 */
84 104
85static void cs_activate(struct atmel_spi *as, struct spi_device *spi) 105static void cs_activate(struct atmel_spi *as, struct spi_device *spi)
86{ 106{
87 unsigned gpio = (unsigned) spi->controller_data; 107 struct atmel_spi_device *asd = spi->controller_state;
88 unsigned active = spi->mode & SPI_CS_HIGH; 108 unsigned active = spi->mode & SPI_CS_HIGH;
89 u32 mr; 109 u32 mr;
90 int i;
91 u32 csr;
92 u32 cpol = (spi->mode & SPI_CPOL) ? SPI_BIT(CPOL) : 0;
93
94 /* Make sure clock polarity is correct */
95 for (i = 0; i < spi->master->num_chipselect; i++) {
96 csr = spi_readl(as, CSR0 + 4 * i);
97 if ((csr ^ cpol) & SPI_BIT(CPOL))
98 spi_writel(as, CSR0 + 4 * i, csr ^ SPI_BIT(CPOL));
99 }
100 110
101 mr = spi_readl(as, MR); 111 if (atmel_spi_is_v2()) {
102 mr = SPI_BFINS(PCS, ~(1 << spi->chip_select), mr); 112 /*
113 * Always use CSR0. This ensures that the clock
114 * switches to the correct idle polarity before we
115 * toggle the CS.
116 */
117 spi_writel(as, CSR0, asd->csr);
118 spi_writel(as, MR, SPI_BF(PCS, 0x0e) | SPI_BIT(MODFDIS)
119 | SPI_BIT(MSTR));
120 mr = spi_readl(as, MR);
121 gpio_set_value(asd->npcs_pin, active);
122 } else {
123 u32 cpol = (spi->mode & SPI_CPOL) ? SPI_BIT(CPOL) : 0;
124 int i;
125 u32 csr;
126
127 /* Make sure clock polarity is correct */
128 for (i = 0; i < spi->master->num_chipselect; i++) {
129 csr = spi_readl(as, CSR0 + 4 * i);
130 if ((csr ^ cpol) & SPI_BIT(CPOL))
131 spi_writel(as, CSR0 + 4 * i,
132 csr ^ SPI_BIT(CPOL));
133 }
134
135 mr = spi_readl(as, MR);
136 mr = SPI_BFINS(PCS, ~(1 << spi->chip_select), mr);
137 if (spi->chip_select != 0)
138 gpio_set_value(asd->npcs_pin, active);
139 spi_writel(as, MR, mr);
140 }
103 141
104 dev_dbg(&spi->dev, "activate %u%s, mr %08x\n", 142 dev_dbg(&spi->dev, "activate %u%s, mr %08x\n",
105 gpio, active ? " (high)" : "", 143 asd->npcs_pin, active ? " (high)" : "",
106 mr); 144 mr);
107
108 if (!(cpu_is_at91rm9200() && spi->chip_select == 0))
109 gpio_set_value(gpio, active);
110 spi_writel(as, MR, mr);
111} 145}
112 146
113static void cs_deactivate(struct atmel_spi *as, struct spi_device *spi) 147static void cs_deactivate(struct atmel_spi *as, struct spi_device *spi)
114{ 148{
115 unsigned gpio = (unsigned) spi->controller_data; 149 struct atmel_spi_device *asd = spi->controller_state;
116 unsigned active = spi->mode & SPI_CS_HIGH; 150 unsigned active = spi->mode & SPI_CS_HIGH;
117 u32 mr; 151 u32 mr;
118 152
@@ -126,11 +160,11 @@ static void cs_deactivate(struct atmel_spi *as, struct spi_device *spi)
126 } 160 }
127 161
128 dev_dbg(&spi->dev, "DEactivate %u%s, mr %08x\n", 162 dev_dbg(&spi->dev, "DEactivate %u%s, mr %08x\n",
129 gpio, active ? " (low)" : "", 163 asd->npcs_pin, active ? " (low)" : "",
130 mr); 164 mr);
131 165
132 if (!(cpu_is_at91rm9200() && spi->chip_select == 0)) 166 if (atmel_spi_is_v2() || spi->chip_select != 0)
133 gpio_set_value(gpio, !active); 167 gpio_set_value(asd->npcs_pin, !active);
134} 168}
135 169
136static inline int atmel_spi_xfer_is_last(struct spi_message *msg, 170static inline int atmel_spi_xfer_is_last(struct spi_message *msg,
@@ -502,6 +536,7 @@ atmel_spi_interrupt(int irq, void *dev_id)
502static int atmel_spi_setup(struct spi_device *spi) 536static int atmel_spi_setup(struct spi_device *spi)
503{ 537{
504 struct atmel_spi *as; 538 struct atmel_spi *as;
539 struct atmel_spi_device *asd;
505 u32 scbr, csr; 540 u32 scbr, csr;
506 unsigned int bits = spi->bits_per_word; 541 unsigned int bits = spi->bits_per_word;
507 unsigned long bus_hz; 542 unsigned long bus_hz;
@@ -536,19 +571,16 @@ static int atmel_spi_setup(struct spi_device *spi)
536 } 571 }
537 572
538 /* see notes above re chipselect */ 573 /* see notes above re chipselect */
539 if (cpu_is_at91rm9200() 574 if (!atmel_spi_is_v2()
540 && spi->chip_select == 0 575 && spi->chip_select == 0
541 && (spi->mode & SPI_CS_HIGH)) { 576 && (spi->mode & SPI_CS_HIGH)) {
542 dev_dbg(&spi->dev, "setup: can't be active-high\n"); 577 dev_dbg(&spi->dev, "setup: can't be active-high\n");
543 return -EINVAL; 578 return -EINVAL;
544 } 579 }
545 580
546 /* 581 /* v1 chips start out at half the peripheral bus speed. */
547 * Pre-new_1 chips start out at half the peripheral
548 * bus speed.
549 */
550 bus_hz = clk_get_rate(as->clk); 582 bus_hz = clk_get_rate(as->clk);
551 if (!as->new_1) 583 if (!atmel_spi_is_v2())
552 bus_hz /= 2; 584 bus_hz /= 2;
553 585
554 if (spi->max_speed_hz) { 586 if (spi->max_speed_hz) {
@@ -589,11 +621,20 @@ static int atmel_spi_setup(struct spi_device *spi)
589 621
590 /* chipselect must have been muxed as GPIO (e.g. in board setup) */ 622 /* chipselect must have been muxed as GPIO (e.g. in board setup) */
591 npcs_pin = (unsigned int)spi->controller_data; 623 npcs_pin = (unsigned int)spi->controller_data;
592 if (!spi->controller_state) { 624 asd = spi->controller_state;
625 if (!asd) {
626 asd = kzalloc(sizeof(struct atmel_spi_device), GFP_KERNEL);
627 if (!asd)
628 return -ENOMEM;
629
593 ret = gpio_request(npcs_pin, spi->dev.bus_id); 630 ret = gpio_request(npcs_pin, spi->dev.bus_id);
594 if (ret) 631 if (ret) {
632 kfree(asd);
595 return ret; 633 return ret;
596 spi->controller_state = (void *)npcs_pin; 634 }
635
636 asd->npcs_pin = npcs_pin;
637 spi->controller_state = asd;
597 gpio_direction_output(npcs_pin, !(spi->mode & SPI_CS_HIGH)); 638 gpio_direction_output(npcs_pin, !(spi->mode & SPI_CS_HIGH));
598 } else { 639 } else {
599 unsigned long flags; 640 unsigned long flags;
@@ -605,11 +646,14 @@ static int atmel_spi_setup(struct spi_device *spi)
605 spin_unlock_irqrestore(&as->lock, flags); 646 spin_unlock_irqrestore(&as->lock, flags);
606 } 647 }
607 648
649 asd->csr = csr;
650
608 dev_dbg(&spi->dev, 651 dev_dbg(&spi->dev,
609 "setup: %lu Hz bpw %u mode 0x%x -> csr%d %08x\n", 652 "setup: %lu Hz bpw %u mode 0x%x -> csr%d %08x\n",
610 bus_hz / scbr, bits, spi->mode, spi->chip_select, csr); 653 bus_hz / scbr, bits, spi->mode, spi->chip_select, csr);
611 654
612 spi_writel(as, CSR0 + 4 * spi->chip_select, csr); 655 if (!atmel_spi_is_v2())
656 spi_writel(as, CSR0 + 4 * spi->chip_select, csr);
613 657
614 return 0; 658 return 0;
615} 659}
@@ -684,10 +728,11 @@ static int atmel_spi_transfer(struct spi_device *spi, struct spi_message *msg)
684static void atmel_spi_cleanup(struct spi_device *spi) 728static void atmel_spi_cleanup(struct spi_device *spi)
685{ 729{
686 struct atmel_spi *as = spi_master_get_devdata(spi->master); 730 struct atmel_spi *as = spi_master_get_devdata(spi->master);
731 struct atmel_spi_device *asd = spi->controller_state;
687 unsigned gpio = (unsigned) spi->controller_data; 732 unsigned gpio = (unsigned) spi->controller_data;
688 unsigned long flags; 733 unsigned long flags;
689 734
690 if (!spi->controller_state) 735 if (!asd)
691 return; 736 return;
692 737
693 spin_lock_irqsave(&as->lock, flags); 738 spin_lock_irqsave(&as->lock, flags);
@@ -697,7 +742,9 @@ static void atmel_spi_cleanup(struct spi_device *spi)
697 } 742 }
698 spin_unlock_irqrestore(&as->lock, flags); 743 spin_unlock_irqrestore(&as->lock, flags);
699 744
745 spi->controller_state = NULL;
700 gpio_free(gpio); 746 gpio_free(gpio);
747 kfree(asd);
701} 748}
702 749
703/*-------------------------------------------------------------------------*/ 750/*-------------------------------------------------------------------------*/
@@ -755,8 +802,6 @@ static int __init atmel_spi_probe(struct platform_device *pdev)
755 goto out_free_buffer; 802 goto out_free_buffer;
756 as->irq = irq; 803 as->irq = irq;
757 as->clk = clk; 804 as->clk = clk;
758 if (!cpu_is_at91rm9200())
759 as->new_1 = 1;
760 805
761 ret = request_irq(irq, atmel_spi_interrupt, 0, 806 ret = request_irq(irq, atmel_spi_interrupt, 0,
762 pdev->dev.bus_id, master); 807 pdev->dev.bus_id, master);
diff --git a/drivers/spi/pxa2xx_spi.c b/drivers/spi/pxa2xx_spi.c
index 6104f461a3cd..d0fc4ca2f656 100644
--- a/drivers/spi/pxa2xx_spi.c
+++ b/drivers/spi/pxa2xx_spi.c
@@ -1561,11 +1561,12 @@ out_error_master_alloc:
1561static int pxa2xx_spi_remove(struct platform_device *pdev) 1561static int pxa2xx_spi_remove(struct platform_device *pdev)
1562{ 1562{
1563 struct driver_data *drv_data = platform_get_drvdata(pdev); 1563 struct driver_data *drv_data = platform_get_drvdata(pdev);
1564 struct ssp_device *ssp = drv_data->ssp; 1564 struct ssp_device *ssp;
1565 int status = 0; 1565 int status = 0;
1566 1566
1567 if (!drv_data) 1567 if (!drv_data)
1568 return 0; 1568 return 0;
1569 ssp = drv_data->ssp;
1569 1570
1570 /* Remove the queue */ 1571 /* Remove the queue */
1571 status = destroy_queue(drv_data); 1572 status = destroy_queue(drv_data);
diff --git a/drivers/spi/spi_gpio.c b/drivers/spi/spi_gpio.c
new file mode 100644
index 000000000000..49698cabc30d
--- /dev/null
+++ b/drivers/spi/spi_gpio.c
@@ -0,0 +1,360 @@
1/*
2 * spi_gpio.c - SPI master driver using generic bitbanged GPIO
3 *
4 * Copyright (C) 2006,2008 David Brownell
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
19 */
20#include <linux/kernel.h>
21#include <linux/init.h>
22#include <linux/platform_device.h>
23#include <linux/gpio.h>
24
25#include <linux/spi/spi.h>
26#include <linux/spi/spi_bitbang.h>
27#include <linux/spi/spi_gpio.h>
28
29
30/*
31 * This bitbanging SPI master driver should help make systems usable
32 * when a native hardware SPI engine is not available, perhaps because
33 * its driver isn't yet working or because the I/O pins it requires
34 * are used for other purposes.
35 *
36 * platform_device->driver_data ... points to spi_gpio
37 *
38 * spi->controller_state ... reserved for bitbang framework code
39 * spi->controller_data ... holds chipselect GPIO
40 *
41 * spi->master->dev.driver_data ... points to spi_gpio->bitbang
42 */
43
44struct spi_gpio {
45 struct spi_bitbang bitbang;
46 struct spi_gpio_platform_data pdata;
47 struct platform_device *pdev;
48};
49
50/*----------------------------------------------------------------------*/
51
52/*
53 * Because the overhead of going through four GPIO procedure calls
54 * per transferred bit can make performance a problem, this code
55 * is set up so that you can use it in either of two ways:
56 *
57 * - The slow generic way: set up platform_data to hold the GPIO
58 * numbers used for MISO/MOSI/SCK, and issue procedure calls for
59 * each of them. This driver can handle several such busses.
60 *
61 * - The quicker inlined way: only helps with platform GPIO code
62 * that inlines operations for constant GPIOs. This can give
63 * you tight (fast!) inner loops, but each such bus needs a
64 * new driver. You'll define a new C file, with Makefile and
65 * Kconfig support; the C code can be a total of six lines:
66 *
67 * #define DRIVER_NAME "myboard_spi2"
68 * #define SPI_MISO_GPIO 119
69 * #define SPI_MOSI_GPIO 120
70 * #define SPI_SCK_GPIO 121
71 * #define SPI_N_CHIPSEL 4
72 * #include "spi_gpio.c"
73 */
74
75#ifndef DRIVER_NAME
76#define DRIVER_NAME "spi_gpio"
77
78#define GENERIC_BITBANG /* vs tight inlines */
79
80/* all functions referencing these symbols must define pdata */
81#define SPI_MISO_GPIO ((pdata)->miso)
82#define SPI_MOSI_GPIO ((pdata)->mosi)
83#define SPI_SCK_GPIO ((pdata)->sck)
84
85#define SPI_N_CHIPSEL ((pdata)->num_chipselect)
86
87#endif
88
89/*----------------------------------------------------------------------*/
90
91static inline const struct spi_gpio_platform_data * __pure
92spi_to_pdata(const struct spi_device *spi)
93{
94 const struct spi_bitbang *bang;
95 const struct spi_gpio *spi_gpio;
96
97 bang = spi_master_get_devdata(spi->master);
98 spi_gpio = container_of(bang, struct spi_gpio, bitbang);
99 return &spi_gpio->pdata;
100}
101
102/* this is #defined to avoid unused-variable warnings when inlining */
103#define pdata spi_to_pdata(spi)
104
105static inline void setsck(const struct spi_device *spi, int is_on)
106{
107 gpio_set_value(SPI_SCK_GPIO, is_on);
108}
109
110static inline void setmosi(const struct spi_device *spi, int is_on)
111{
112 gpio_set_value(SPI_MOSI_GPIO, is_on);
113}
114
115static inline int getmiso(const struct spi_device *spi)
116{
117 return gpio_get_value(SPI_MISO_GPIO);
118}
119
120#undef pdata
121
122/*
123 * NOTE: this clocks "as fast as we can". It "should" be a function of the
124 * requested device clock. Software overhead means we usually have trouble
125 * reaching even one Mbit/sec (except when we can inline bitops), so for now
126 * we'll just assume we never need additional per-bit slowdowns.
127 */
128#define spidelay(nsecs) do {} while (0)
129
130#define EXPAND_BITBANG_TXRX
131#include <linux/spi/spi_bitbang.h>
132
133/*
134 * These functions can leverage inline expansion of GPIO calls to shrink
135 * costs for a txrx bit, often by factors of around ten (by instruction
136 * count). That is particularly visible for larger word sizes, but helps
137 * even with default 8-bit words.
138 *
139 * REVISIT overheads calling these functions for each word also have
140 * significant performance costs. Having txrx_bufs() calls that inline
141 * the txrx_word() logic would help performance, e.g. on larger blocks
142 * used with flash storage or MMC/SD. There should also be ways to make
143 * GCC be less stupid about reloading registers inside the I/O loops,
144 * even without inlined GPIO calls; __attribute__((hot)) on GCC 4.3?
145 */
146
147static u32 spi_gpio_txrx_word_mode0(struct spi_device *spi,
148 unsigned nsecs, u32 word, u8 bits)
149{
150 return bitbang_txrx_be_cpha0(spi, nsecs, 0, word, bits);
151}
152
153static u32 spi_gpio_txrx_word_mode1(struct spi_device *spi,
154 unsigned nsecs, u32 word, u8 bits)
155{
156 return bitbang_txrx_be_cpha1(spi, nsecs, 0, word, bits);
157}
158
159static u32 spi_gpio_txrx_word_mode2(struct spi_device *spi,
160 unsigned nsecs, u32 word, u8 bits)
161{
162 return bitbang_txrx_be_cpha0(spi, nsecs, 1, word, bits);
163}
164
165static u32 spi_gpio_txrx_word_mode3(struct spi_device *spi,
166 unsigned nsecs, u32 word, u8 bits)
167{
168 return bitbang_txrx_be_cpha1(spi, nsecs, 1, word, bits);
169}
170
171/*----------------------------------------------------------------------*/
172
173static void spi_gpio_chipselect(struct spi_device *spi, int is_active)
174{
175 unsigned long cs = (unsigned long) spi->controller_data;
176
177 /* set initial clock polarity */
178 if (is_active)
179 setsck(spi, spi->mode & SPI_CPOL);
180
181 /* SPI is normally active-low */
182 gpio_set_value(cs, (spi->mode & SPI_CS_HIGH) ? is_active : !is_active);
183}
184
185static int spi_gpio_setup(struct spi_device *spi)
186{
187 unsigned long cs = (unsigned long) spi->controller_data;
188 int status = 0;
189
190 if (spi->bits_per_word > 32)
191 return -EINVAL;
192
193 if (!spi->controller_state) {
194 status = gpio_request(cs, spi->dev.bus_id);
195 if (status)
196 return status;
197 status = gpio_direction_output(cs, spi->mode & SPI_CS_HIGH);
198 }
199 if (!status)
200 status = spi_bitbang_setup(spi);
201 if (status) {
202 if (!spi->controller_state)
203 gpio_free(cs);
204 }
205 return status;
206}
207
208static void spi_gpio_cleanup(struct spi_device *spi)
209{
210 unsigned long cs = (unsigned long) spi->controller_data;
211
212 gpio_free(cs);
213 spi_bitbang_cleanup(spi);
214}
215
216static int __init spi_gpio_alloc(unsigned pin, const char *label, bool is_in)
217{
218 int value;
219
220 value = gpio_request(pin, label);
221 if (value == 0) {
222 if (is_in)
223 value = gpio_direction_input(pin);
224 else
225 value = gpio_direction_output(pin, 0);
226 }
227 return value;
228}
229
230static int __init
231spi_gpio_request(struct spi_gpio_platform_data *pdata, const char *label)
232{
233 int value;
234
235 /* NOTE: SPI_*_GPIO symbols may reference "pdata" */
236
237 value = spi_gpio_alloc(SPI_MOSI_GPIO, label, false);
238 if (value)
239 goto done;
240
241 value = spi_gpio_alloc(SPI_MISO_GPIO, label, true);
242 if (value)
243 goto free_mosi;
244
245 value = spi_gpio_alloc(SPI_SCK_GPIO, label, false);
246 if (value)
247 goto free_miso;
248
249 goto done;
250
251free_miso:
252 gpio_free(SPI_MISO_GPIO);
253free_mosi:
254 gpio_free(SPI_MOSI_GPIO);
255done:
256 return value;
257}
258
259static int __init spi_gpio_probe(struct platform_device *pdev)
260{
261 int status;
262 struct spi_master *master;
263 struct spi_gpio *spi_gpio;
264 struct spi_gpio_platform_data *pdata;
265
266 pdata = pdev->dev.platform_data;
267#ifdef GENERIC_BITBANG
268 if (!pdata || !pdata->num_chipselect)
269 return -ENODEV;
270#endif
271
272 status = spi_gpio_request(pdata, dev_name(&pdev->dev));
273 if (status < 0)
274 return status;
275
276 master = spi_alloc_master(&pdev->dev, sizeof *spi_gpio);
277 if (!master) {
278 status = -ENOMEM;
279 goto gpio_free;
280 }
281 spi_gpio = spi_master_get_devdata(master);
282 platform_set_drvdata(pdev, spi_gpio);
283
284 spi_gpio->pdev = pdev;
285 if (pdata)
286 spi_gpio->pdata = *pdata;
287
288 master->bus_num = pdev->id;
289 master->num_chipselect = SPI_N_CHIPSEL;
290 master->setup = spi_gpio_setup;
291 master->cleanup = spi_gpio_cleanup;
292
293 spi_gpio->bitbang.master = spi_master_get(master);
294 spi_gpio->bitbang.chipselect = spi_gpio_chipselect;
295 spi_gpio->bitbang.txrx_word[SPI_MODE_0] = spi_gpio_txrx_word_mode0;
296 spi_gpio->bitbang.txrx_word[SPI_MODE_1] = spi_gpio_txrx_word_mode1;
297 spi_gpio->bitbang.txrx_word[SPI_MODE_2] = spi_gpio_txrx_word_mode2;
298 spi_gpio->bitbang.txrx_word[SPI_MODE_3] = spi_gpio_txrx_word_mode3;
299 spi_gpio->bitbang.setup_transfer = spi_bitbang_setup_transfer;
300 spi_gpio->bitbang.flags = SPI_CS_HIGH;
301
302 status = spi_bitbang_start(&spi_gpio->bitbang);
303 if (status < 0) {
304 spi_master_put(spi_gpio->bitbang.master);
305gpio_free:
306 gpio_free(SPI_MISO_GPIO);
307 gpio_free(SPI_MOSI_GPIO);
308 gpio_free(SPI_SCK_GPIO);
309 spi_master_put(master);
310 }
311
312 return status;
313}
314
315static int __exit spi_gpio_remove(struct platform_device *pdev)
316{
317 struct spi_gpio *spi_gpio;
318 struct spi_gpio_platform_data *pdata;
319 int status;
320
321 spi_gpio = platform_get_drvdata(pdev);
322 pdata = pdev->dev.platform_data;
323
324 /* stop() unregisters child devices too */
325 status = spi_bitbang_stop(&spi_gpio->bitbang);
326 spi_master_put(spi_gpio->bitbang.master);
327
328 platform_set_drvdata(pdev, NULL);
329
330 gpio_free(SPI_MISO_GPIO);
331 gpio_free(SPI_MOSI_GPIO);
332 gpio_free(SPI_SCK_GPIO);
333
334 return status;
335}
336
337MODULE_ALIAS("platform:" DRIVER_NAME);
338
339static struct platform_driver spi_gpio_driver = {
340 .driver.name = DRIVER_NAME,
341 .driver.owner = THIS_MODULE,
342 .remove = __exit_p(spi_gpio_remove),
343};
344
345static int __init spi_gpio_init(void)
346{
347 return platform_driver_probe(&spi_gpio_driver, spi_gpio_probe);
348}
349module_init(spi_gpio_init);
350
351static void __exit spi_gpio_exit(void)
352{
353 platform_driver_unregister(&spi_gpio_driver);
354}
355module_exit(spi_gpio_exit);
356
357
358MODULE_DESCRIPTION("SPI master driver using generic bitbanged GPIO ");
359MODULE_AUTHOR("David Brownell");
360MODULE_LICENSE("GPL");
diff --git a/drivers/spi/spi_s3c24xx.c b/drivers/spi/spi_s3c24xx.c
index 256d18395a23..b3ebc1d0f85f 100644
--- a/drivers/spi/spi_s3c24xx.c
+++ b/drivers/spi/spi_s3c24xx.c
@@ -19,6 +19,7 @@
19#include <linux/err.h> 19#include <linux/err.h>
20#include <linux/clk.h> 20#include <linux/clk.h>
21#include <linux/platform_device.h> 21#include <linux/platform_device.h>
22#include <linux/gpio.h>
22 23
23#include <linux/spi/spi.h> 24#include <linux/spi/spi.h>
24#include <linux/spi/spi_bitbang.h> 25#include <linux/spi/spi_bitbang.h>
@@ -27,7 +28,6 @@
27#include <asm/dma.h> 28#include <asm/dma.h>
28#include <mach/hardware.h> 29#include <mach/hardware.h>
29 30
30#include <mach/regs-gpio.h>
31#include <plat/regs-spi.h> 31#include <plat/regs-spi.h>
32#include <mach/spi.h> 32#include <mach/spi.h>
33 33
@@ -66,7 +66,7 @@ static inline struct s3c24xx_spi *to_hw(struct spi_device *sdev)
66 66
67static void s3c24xx_spi_gpiocs(struct s3c2410_spi_info *spi, int cs, int pol) 67static void s3c24xx_spi_gpiocs(struct s3c2410_spi_info *spi, int cs, int pol)
68{ 68{
69 s3c2410_gpio_setpin(spi->pin_cs, pol); 69 gpio_set_value(spi->pin_cs, pol);
70} 70}
71 71
72static void s3c24xx_spi_chipsel(struct spi_device *spi, int value) 72static void s3c24xx_spi_chipsel(struct spi_device *spi, int value)
@@ -248,8 +248,13 @@ static void s3c24xx_spi_initialsetup(struct s3c24xx_spi *hw)
248 writeb(SPPIN_DEFAULT, hw->regs + S3C2410_SPPIN); 248 writeb(SPPIN_DEFAULT, hw->regs + S3C2410_SPPIN);
249 writeb(SPCON_DEFAULT, hw->regs + S3C2410_SPCON); 249 writeb(SPCON_DEFAULT, hw->regs + S3C2410_SPCON);
250 250
251 if (hw->pdata && hw->pdata->gpio_setup) 251 if (hw->pdata) {
252 hw->pdata->gpio_setup(hw->pdata, 1); 252 if (hw->set_cs == s3c24xx_spi_gpiocs)
253 gpio_direction_output(hw->pdata->pin_cs, 1);
254
255 if (hw->pdata->gpio_setup)
256 hw->pdata->gpio_setup(hw->pdata, 1);
257 }
253} 258}
254 259
255static int __init s3c24xx_spi_probe(struct platform_device *pdev) 260static int __init s3c24xx_spi_probe(struct platform_device *pdev)
@@ -343,18 +348,27 @@ static int __init s3c24xx_spi_probe(struct platform_device *pdev)
343 goto err_no_clk; 348 goto err_no_clk;
344 } 349 }
345 350
346 s3c24xx_spi_initialsetup(hw);
347
348 /* setup any gpio we can */ 351 /* setup any gpio we can */
349 352
350 if (!pdata->set_cs) { 353 if (!pdata->set_cs) {
351 hw->set_cs = s3c24xx_spi_gpiocs; 354 if (pdata->pin_cs < 0) {
355 dev_err(&pdev->dev, "No chipselect pin\n");
356 goto err_register;
357 }
352 358
353 s3c2410_gpio_setpin(pdata->pin_cs, 1); 359 err = gpio_request(pdata->pin_cs, dev_name(&pdev->dev));
354 s3c2410_gpio_cfgpin(pdata->pin_cs, S3C2410_GPIO_OUTPUT); 360 if (err) {
361 dev_err(&pdev->dev, "Failed to get gpio for cs\n");
362 goto err_register;
363 }
364
365 hw->set_cs = s3c24xx_spi_gpiocs;
366 gpio_direction_output(pdata->pin_cs, 1);
355 } else 367 } else
356 hw->set_cs = pdata->set_cs; 368 hw->set_cs = pdata->set_cs;
357 369
370 s3c24xx_spi_initialsetup(hw);
371
358 /* register our spi controller */ 372 /* register our spi controller */
359 373
360 err = spi_bitbang_start(&hw->bitbang); 374 err = spi_bitbang_start(&hw->bitbang);
@@ -366,6 +380,9 @@ static int __init s3c24xx_spi_probe(struct platform_device *pdev)
366 return 0; 380 return 0;
367 381
368 err_register: 382 err_register:
383 if (hw->set_cs == s3c24xx_spi_gpiocs)
384 gpio_free(pdata->pin_cs);
385
369 clk_disable(hw->clk); 386 clk_disable(hw->clk);
370 clk_put(hw->clk); 387 clk_put(hw->clk);
371 388
@@ -401,6 +418,9 @@ static int __exit s3c24xx_spi_remove(struct platform_device *dev)
401 free_irq(hw->irq, hw); 418 free_irq(hw->irq, hw);
402 iounmap(hw->regs); 419 iounmap(hw->regs);
403 420
421 if (hw->set_cs == s3c24xx_spi_gpiocs)
422 gpio_free(hw->pdata->pin_cs);
423
404 release_resource(hw->ioarea); 424 release_resource(hw->ioarea);
405 kfree(hw->ioarea); 425 kfree(hw->ioarea);
406 426
diff --git a/drivers/video/aty/aty128fb.c b/drivers/video/aty/aty128fb.c
index 243ea4ab20c8..db16112cf197 100644
--- a/drivers/video/aty/aty128fb.c
+++ b/drivers/video/aty/aty128fb.c
@@ -2051,7 +2051,7 @@ static int __devinit aty128_probe(struct pci_dev *pdev, const struct pci_device_
2051 2051
2052 /* Virtualize mmio region */ 2052 /* Virtualize mmio region */
2053 info->fix.mmio_start = reg_addr; 2053 info->fix.mmio_start = reg_addr;
2054 par->regbase = ioremap(reg_addr, pci_resource_len(pdev, 2)); 2054 par->regbase = pci_ioremap_bar(pdev, 2);
2055 if (!par->regbase) 2055 if (!par->regbase)
2056 goto err_free_info; 2056 goto err_free_info;
2057 2057
diff --git a/drivers/video/bfin-t350mcqb-fb.c b/drivers/video/bfin-t350mcqb-fb.c
index 7d1b819e501c..a9b3ada05d99 100644
--- a/drivers/video/bfin-t350mcqb-fb.c
+++ b/drivers/video/bfin-t350mcqb-fb.c
@@ -255,7 +255,7 @@ static int bfin_t350mcqb_fb_check_var(struct fb_var_screeninfo *var,
255{ 255{
256 256
257 if (var->bits_per_pixel != LCD_BPP) { 257 if (var->bits_per_pixel != LCD_BPP) {
258 pr_debug("%s: depth not supported: %u BPP\n", __FUNCTION__, 258 pr_debug("%s: depth not supported: %u BPP\n", __func__,
259 var->bits_per_pixel); 259 var->bits_per_pixel);
260 return -EINVAL; 260 return -EINVAL;
261 } 261 }
@@ -264,7 +264,7 @@ static int bfin_t350mcqb_fb_check_var(struct fb_var_screeninfo *var,
264 info->var.xres_virtual != var->xres_virtual || 264 info->var.xres_virtual != var->xres_virtual ||
265 info->var.yres_virtual != var->yres_virtual) { 265 info->var.yres_virtual != var->yres_virtual) {
266 pr_debug("%s: Resolution not supported: X%u x Y%u \n", 266 pr_debug("%s: Resolution not supported: X%u x Y%u \n",
267 __FUNCTION__, var->xres, var->yres); 267 __func__, var->xres, var->yres);
268 return -EINVAL; 268 return -EINVAL;
269 } 269 }
270 270
@@ -274,7 +274,7 @@ static int bfin_t350mcqb_fb_check_var(struct fb_var_screeninfo *var,
274 274
275 if ((info->fix.line_length * var->yres_virtual) > info->fix.smem_len) { 275 if ((info->fix.line_length * var->yres_virtual) > info->fix.smem_len) {
276 pr_debug("%s: Memory Limit requested yres_virtual = %u\n", 276 pr_debug("%s: Memory Limit requested yres_virtual = %u\n",
277 __FUNCTION__, var->yres_virtual); 277 __func__, var->yres_virtual);
278 return -ENOMEM; 278 return -ENOMEM;
279 } 279 }
280 280
diff --git a/drivers/video/carminefb.c b/drivers/video/carminefb.c
index c9b191319a9a..c7ff3c1a266a 100644
--- a/drivers/video/carminefb.c
+++ b/drivers/video/carminefb.c
@@ -168,7 +168,7 @@ static int carmine_setcolreg(unsigned regno, unsigned red, unsigned green,
168 blue >>= 8; 168 blue >>= 8;
169 transp >>= 8; 169 transp >>= 8;
170 170
171 ((u32 *)info->pseudo_palette)[regno] = be32_to_cpu(transp << 24 | 171 ((__be32 *)info->pseudo_palette)[regno] = cpu_to_be32(transp << 24 |
172 red << 0 | green << 8 | blue << 16); 172 red << 0 | green << 8 | blue << 16);
173 return 0; 173 return 0;
174} 174}
diff --git a/drivers/video/cyber2000fb.c b/drivers/video/cyber2000fb.c
index 39d5d643a50b..7a9e42e3a9a9 100644
--- a/drivers/video/cyber2000fb.c
+++ b/drivers/video/cyber2000fb.c
@@ -1583,8 +1583,7 @@ cyberpro_pci_probe(struct pci_dev *dev, const struct pci_device_id *id)
1583 goto failed_release; 1583 goto failed_release;
1584 1584
1585 cfb->dev = dev; 1585 cfb->dev = dev;
1586 cfb->region = ioremap(pci_resource_start(dev, 0), 1586 cfb->region = pci_ioremap_bar(dev, 0);
1587 pci_resource_len(dev, 0));
1588 if (!cfb->region) 1587 if (!cfb->region)
1589 goto failed_ioremap; 1588 goto failed_ioremap;
1590 1589
diff --git a/drivers/video/fbmem.c b/drivers/video/fbmem.c
index 3c65b0d67617..756efeb91abc 100644
--- a/drivers/video/fbmem.c
+++ b/drivers/video/fbmem.c
@@ -510,6 +510,10 @@ static int fb_prepare_extra_logos(struct fb_info *info, unsigned int height,
510 fb_logo_ex_num = 0; 510 fb_logo_ex_num = 0;
511 511
512 for (i = 0; i < fb_logo_ex_num; i++) { 512 for (i = 0; i < fb_logo_ex_num; i++) {
513 if (fb_logo_ex[i].logo->type != fb_logo.logo->type) {
514 fb_logo_ex[i].logo = NULL;
515 continue;
516 }
513 height += fb_logo_ex[i].logo->height; 517 height += fb_logo_ex[i].logo->height;
514 if (height > yres) { 518 if (height > yres) {
515 height -= fb_logo_ex[i].logo->height; 519 height -= fb_logo_ex[i].logo->height;
diff --git a/drivers/video/gbefb.c b/drivers/video/gbefb.c
index f89c3cce1e0c..fe5b519860b1 100644
--- a/drivers/video/gbefb.c
+++ b/drivers/video/gbefb.c
@@ -912,6 +912,7 @@ static int gbefb_check_var(struct fb_var_screeninfo *var, struct fb_info *info)
912{ 912{
913 unsigned int line_length; 913 unsigned int line_length;
914 struct gbe_timing_info timing; 914 struct gbe_timing_info timing;
915 int ret;
915 916
916 /* Limit bpp to 8, 16, and 32 */ 917 /* Limit bpp to 8, 16, and 32 */
917 if (var->bits_per_pixel <= 8) 918 if (var->bits_per_pixel <= 8)
@@ -930,8 +931,10 @@ static int gbefb_check_var(struct fb_var_screeninfo *var, struct fb_info *info)
930 931
931 var->grayscale = 0; /* No grayscale for now */ 932 var->grayscale = 0; /* No grayscale for now */
932 933
933 if ((var->pixclock = compute_gbe_timing(var, &timing)) < 0) 934 ret = compute_gbe_timing(var, &timing);
934 return(-EINVAL); 935 var->pixclock = ret;
936 if (ret < 0)
937 return -EINVAL;
935 938
936 /* Adjust virtual resolution, if necessary */ 939 /* Adjust virtual resolution, if necessary */
937 if (var->xres > var->xres_virtual || (!ywrap && !ypan)) 940 if (var->xres > var->xres_virtual || (!ywrap && !ypan))
diff --git a/drivers/video/geode/gx1fb_core.c b/drivers/video/geode/gx1fb_core.c
index bb20a2289760..751e491ca8c8 100644
--- a/drivers/video/geode/gx1fb_core.c
+++ b/drivers/video/geode/gx1fb_core.c
@@ -217,8 +217,7 @@ static int __init gx1fb_map_video_memory(struct fb_info *info, struct pci_dev *d
217 ret = pci_request_region(dev, 0, "gx1fb (video)"); 217 ret = pci_request_region(dev, 0, "gx1fb (video)");
218 if (ret < 0) 218 if (ret < 0)
219 return ret; 219 return ret;
220 par->vid_regs = ioremap(pci_resource_start(dev, 0), 220 par->vid_regs = pci_ioremap_bar(dev, 0);
221 pci_resource_len(dev, 0));
222 if (!par->vid_regs) 221 if (!par->vid_regs)
223 return -ENOMEM; 222 return -ENOMEM;
224 223
diff --git a/drivers/video/geode/gxfb_core.c b/drivers/video/geode/gxfb_core.c
index de2b8f9876a5..484118926318 100644
--- a/drivers/video/geode/gxfb_core.c
+++ b/drivers/video/geode/gxfb_core.c
@@ -242,23 +242,21 @@ static int __init gxfb_map_video_memory(struct fb_info *info, struct pci_dev *de
242 ret = pci_request_region(dev, 3, "gxfb (video processor)"); 242 ret = pci_request_region(dev, 3, "gxfb (video processor)");
243 if (ret < 0) 243 if (ret < 0)
244 return ret; 244 return ret;
245 par->vid_regs = ioremap(pci_resource_start(dev, 3), 245 par->vid_regs = pci_ioremap_bar(dev, 3);
246 pci_resource_len(dev, 3));
247 if (!par->vid_regs) 246 if (!par->vid_regs)
248 return -ENOMEM; 247 return -ENOMEM;
249 248
250 ret = pci_request_region(dev, 2, "gxfb (display controller)"); 249 ret = pci_request_region(dev, 2, "gxfb (display controller)");
251 if (ret < 0) 250 if (ret < 0)
252 return ret; 251 return ret;
253 par->dc_regs = ioremap(pci_resource_start(dev, 2), pci_resource_len(dev, 2)); 252 par->dc_regs = pci_ioremap_bar(dev, 2);
254 if (!par->dc_regs) 253 if (!par->dc_regs)
255 return -ENOMEM; 254 return -ENOMEM;
256 255
257 ret = pci_request_region(dev, 1, "gxfb (graphics processor)"); 256 ret = pci_request_region(dev, 1, "gxfb (graphics processor)");
258 if (ret < 0) 257 if (ret < 0)
259 return ret; 258 return ret;
260 par->gp_regs = ioremap(pci_resource_start(dev, 1), 259 par->gp_regs = pci_ioremap_bar(dev, 1);
261 pci_resource_len(dev, 1));
262 260
263 if (!par->gp_regs) 261 if (!par->gp_regs)
264 return -ENOMEM; 262 return -ENOMEM;
diff --git a/drivers/video/geode/lxfb_core.c b/drivers/video/geode/lxfb_core.c
index 2cd9b74d2225..b965ecdbc604 100644
--- a/drivers/video/geode/lxfb_core.c
+++ b/drivers/video/geode/lxfb_core.c
@@ -379,20 +379,17 @@ static int __init lxfb_map_video_memory(struct fb_info *info,
379 if (info->screen_base == NULL) 379 if (info->screen_base == NULL)
380 return ret; 380 return ret;
381 381
382 par->gp_regs = ioremap(pci_resource_start(dev, 1), 382 par->gp_regs = pci_ioremap_bar(dev, 1);
383 pci_resource_len(dev, 1));
384 383
385 if (par->gp_regs == NULL) 384 if (par->gp_regs == NULL)
386 return ret; 385 return ret;
387 386
388 par->dc_regs = ioremap(pci_resource_start(dev, 2), 387 par->dc_regs = pci_ioremap_bar(dev, 2);
389 pci_resource_len(dev, 2));
390 388
391 if (par->dc_regs == NULL) 389 if (par->dc_regs == NULL)
392 return ret; 390 return ret;
393 391
394 par->vp_regs = ioremap(pci_resource_start(dev, 3), 392 par->vp_regs = pci_ioremap_bar(dev, 3);
395 pci_resource_len(dev, 3));
396 393
397 if (par->vp_regs == NULL) 394 if (par->vp_regs == NULL)
398 return ret; 395 return ret;
diff --git a/drivers/video/gxt4500.c b/drivers/video/gxt4500.c
index 564557792bed..896e53dea906 100644
--- a/drivers/video/gxt4500.c
+++ b/drivers/video/gxt4500.c
@@ -648,7 +648,7 @@ static int __devinit gxt4500_probe(struct pci_dev *pdev,
648 info->pseudo_palette = par->pseudo_palette; 648 info->pseudo_palette = par->pseudo_palette;
649 649
650 info->fix.mmio_start = reg_phys; 650 info->fix.mmio_start = reg_phys;
651 par->regs = ioremap(reg_phys, pci_resource_len(pdev, 0)); 651 par->regs = pci_ioremap_bar(pdev, 0);
652 if (!par->regs) { 652 if (!par->regs) {
653 dev_err(&pdev->dev, "gxt4500: cannot map registers\n"); 653 dev_err(&pdev->dev, "gxt4500: cannot map registers\n");
654 goto err_free_all; 654 goto err_free_all;
@@ -656,7 +656,7 @@ static int __devinit gxt4500_probe(struct pci_dev *pdev,
656 656
657 info->fix.smem_start = fb_phys; 657 info->fix.smem_start = fb_phys;
658 info->fix.smem_len = pci_resource_len(pdev, 1); 658 info->fix.smem_len = pci_resource_len(pdev, 1);
659 info->screen_base = ioremap(fb_phys, pci_resource_len(pdev, 1)); 659 info->screen_base = pci_ioremap_bar(pdev, 1);
660 if (!info->screen_base) { 660 if (!info->screen_base) {
661 dev_err(&pdev->dev, "gxt4500: cannot map framebuffer\n"); 661 dev_err(&pdev->dev, "gxt4500: cannot map framebuffer\n");
662 goto err_unmap_regs; 662 goto err_unmap_regs;
diff --git a/drivers/video/i810/i810_accel.c b/drivers/video/i810/i810_accel.c
index 76764ea3486a..f5bedee4310a 100644
--- a/drivers/video/i810/i810_accel.c
+++ b/drivers/video/i810/i810_accel.c
@@ -301,8 +301,10 @@ void i810fb_fillrect(struct fb_info *info, const struct fb_fillrect *rect)
301 u32 dx, dy, width, height, dest, rop = 0, color = 0; 301 u32 dx, dy, width, height, dest, rop = 0, color = 0;
302 302
303 if (!info->var.accel_flags || par->dev_flags & LOCKUP || 303 if (!info->var.accel_flags || par->dev_flags & LOCKUP ||
304 par->depth == 4) 304 par->depth == 4) {
305 return cfb_fillrect(info, rect); 305 cfb_fillrect(info, rect);
306 return;
307 }
306 308
307 if (par->depth == 1) 309 if (par->depth == 1)
308 color = rect->color; 310 color = rect->color;
@@ -327,8 +329,10 @@ void i810fb_copyarea(struct fb_info *info, const struct fb_copyarea *region)
327 u32 sx, sy, dx, dy, pitch, width, height, src, dest, xdir; 329 u32 sx, sy, dx, dy, pitch, width, height, src, dest, xdir;
328 330
329 if (!info->var.accel_flags || par->dev_flags & LOCKUP || 331 if (!info->var.accel_flags || par->dev_flags & LOCKUP ||
330 par->depth == 4) 332 par->depth == 4) {
331 return cfb_copyarea(info, region); 333 cfb_copyarea(info, region);
334 return;
335 }
332 336
333 dx = region->dx * par->depth; 337 dx = region->dx * par->depth;
334 sx = region->sx * par->depth; 338 sx = region->sx * par->depth;
@@ -366,8 +370,10 @@ void i810fb_imageblit(struct fb_info *info, const struct fb_image *image)
366 u32 fg = 0, bg = 0, size, dst; 370 u32 fg = 0, bg = 0, size, dst;
367 371
368 if (!info->var.accel_flags || par->dev_flags & LOCKUP || 372 if (!info->var.accel_flags || par->dev_flags & LOCKUP ||
369 par->depth == 4 || image->depth != 1) 373 par->depth == 4 || image->depth != 1) {
370 return cfb_imageblit(info, image); 374 cfb_imageblit(info, image);
375 return;
376 }
371 377
372 switch (info->var.bits_per_pixel) { 378 switch (info->var.bits_per_pixel) {
373 case 8: 379 case 8:
diff --git a/drivers/video/intelfb/intelfbdrv.c b/drivers/video/intelfb/intelfbdrv.c
index a09e23649357..6d8e5415c809 100644
--- a/drivers/video/intelfb/intelfbdrv.c
+++ b/drivers/video/intelfb/intelfbdrv.c
@@ -1493,8 +1493,10 @@ static void intelfb_fillrect (struct fb_info *info,
1493 DBG_MSG("intelfb_fillrect\n"); 1493 DBG_MSG("intelfb_fillrect\n");
1494#endif 1494#endif
1495 1495
1496 if (!ACCEL(dinfo, info) || dinfo->depth == 4) 1496 if (!ACCEL(dinfo, info) || dinfo->depth == 4) {
1497 return cfb_fillrect(info, rect); 1497 cfb_fillrect(info, rect);
1498 return;
1499 }
1498 1500
1499 if (rect->rop == ROP_COPY) 1501 if (rect->rop == ROP_COPY)
1500 rop = PAT_ROP_GXCOPY; 1502 rop = PAT_ROP_GXCOPY;
@@ -1521,8 +1523,10 @@ static void intelfb_copyarea(struct fb_info *info,
1521 DBG_MSG("intelfb_copyarea\n"); 1523 DBG_MSG("intelfb_copyarea\n");
1522#endif 1524#endif
1523 1525
1524 if (!ACCEL(dinfo, info) || dinfo->depth == 4) 1526 if (!ACCEL(dinfo, info) || dinfo->depth == 4) {
1525 return cfb_copyarea(info, region); 1527 cfb_copyarea(info, region);
1528 return;
1529 }
1526 1530
1527 intelfbhw_do_bitblt(dinfo, region->sx, region->sy, region->dx, 1531 intelfbhw_do_bitblt(dinfo, region->sx, region->sy, region->dx,
1528 region->dy, region->width, region->height, 1532 region->dy, region->width, region->height,
@@ -1540,8 +1544,10 @@ static void intelfb_imageblit(struct fb_info *info,
1540#endif 1544#endif
1541 1545
1542 if (!ACCEL(dinfo, info) || dinfo->depth == 4 1546 if (!ACCEL(dinfo, info) || dinfo->depth == 4
1543 || image->depth != 1) 1547 || image->depth != 1) {
1544 return cfb_imageblit(info, image); 1548 cfb_imageblit(info, image);
1549 return;
1550 }
1545 1551
1546 if (dinfo->depth != 8) { 1552 if (dinfo->depth != 8) {
1547 fgcolor = dinfo->pseudo_palette[image->fg_color]; 1553 fgcolor = dinfo->pseudo_palette[image->fg_color];
@@ -1554,8 +1560,10 @@ static void intelfb_imageblit(struct fb_info *info,
1554 if (!intelfbhw_do_drawglyph(dinfo, fgcolor, bgcolor, image->width, 1560 if (!intelfbhw_do_drawglyph(dinfo, fgcolor, bgcolor, image->width,
1555 image->height, image->data, 1561 image->height, image->data,
1556 image->dx, image->dy, 1562 image->dx, image->dy,
1557 dinfo->pitch, info->var.bits_per_pixel)) 1563 dinfo->pitch, info->var.bits_per_pixel)) {
1558 return cfb_imageblit(info, image); 1564 cfb_imageblit(info, image);
1565 return;
1566 }
1559} 1567}
1560 1568
1561static int intelfb_cursor(struct fb_info *info, struct fb_cursor *cursor) 1569static int intelfb_cursor(struct fb_info *info, struct fb_cursor *cursor)
diff --git a/drivers/video/modedb.c b/drivers/video/modedb.c
index d3c3af53a290..16186240c5f2 100644
--- a/drivers/video/modedb.c
+++ b/drivers/video/modedb.c
@@ -329,7 +329,7 @@ const struct fb_videomode vesa_modes[] = {
329 FB_SYNC_HOR_HIGH_ACT | FB_SYNC_VERT_HIGH_ACT, 329 FB_SYNC_HOR_HIGH_ACT | FB_SYNC_VERT_HIGH_ACT,
330 FB_VMODE_NONINTERLACED, FB_MODE_IS_VESA }, 330 FB_VMODE_NONINTERLACED, FB_MODE_IS_VESA },
331 /* 17 1152x864-75 VESA */ 331 /* 17 1152x864-75 VESA */
332 { NULL, 75, 1153, 864, 9259, 256, 64, 32, 1, 128, 3, 332 { NULL, 75, 1152, 864, 9259, 256, 64, 32, 1, 128, 3,
333 FB_SYNC_HOR_HIGH_ACT | FB_SYNC_VERT_HIGH_ACT, 333 FB_SYNC_HOR_HIGH_ACT | FB_SYNC_VERT_HIGH_ACT,
334 FB_VMODE_NONINTERLACED, FB_MODE_IS_VESA }, 334 FB_VMODE_NONINTERLACED, FB_MODE_IS_VESA },
335 /* 18 1280x960-60 VESA */ 335 /* 18 1280x960-60 VESA */
diff --git a/drivers/video/neofb.c b/drivers/video/neofb.c
index bfb802d26d5a..588527a254c2 100644
--- a/drivers/video/neofb.c
+++ b/drivers/video/neofb.c
@@ -1453,7 +1453,8 @@ neo2200_imageblit(struct fb_info *info, const struct fb_image *image)
1453 * is less than 16 bits wide. This is due to insufficient 1453 * is less than 16 bits wide. This is due to insufficient
1454 * padding when writing the image. We need to adjust 1454 * padding when writing the image. We need to adjust
1455 * struct fb_pixmap. Not yet done. */ 1455 * struct fb_pixmap. Not yet done. */
1456 return cfb_imageblit(info, image); 1456 cfb_imageblit(info, image);
1457 return;
1457 } 1458 }
1458 bltCntl_flags = NEO_BC0_SRC_MONO; 1459 bltCntl_flags = NEO_BC0_SRC_MONO;
1459 } else if (image->depth == info->var.bits_per_pixel) { 1460 } else if (image->depth == info->var.bits_per_pixel) {
@@ -1461,7 +1462,8 @@ neo2200_imageblit(struct fb_info *info, const struct fb_image *image)
1461 } else { 1462 } else {
1462 /* We don't currently support hardware acceleration if image 1463 /* We don't currently support hardware acceleration if image
1463 * depth is different from display */ 1464 * depth is different from display */
1464 return cfb_imageblit(info, image); 1465 cfb_imageblit(info, image);
1466 return;
1465 } 1467 }
1466 1468
1467 switch (info->var.bits_per_pixel) { 1469 switch (info->var.bits_per_pixel) {
diff --git a/drivers/video/nvidia/nv_accel.c b/drivers/video/nvidia/nv_accel.c
index fa4821c5572b..ad6472a894ea 100644
--- a/drivers/video/nvidia/nv_accel.c
+++ b/drivers/video/nvidia/nv_accel.c
@@ -300,8 +300,10 @@ void nvidiafb_copyarea(struct fb_info *info, const struct fb_copyarea *region)
300 if (info->state != FBINFO_STATE_RUNNING) 300 if (info->state != FBINFO_STATE_RUNNING)
301 return; 301 return;
302 302
303 if (par->lockup) 303 if (par->lockup) {
304 return cfb_copyarea(info, region); 304 cfb_copyarea(info, region);
305 return;
306 }
305 307
306 NVDmaStart(info, par, BLIT_POINT_SRC, 3); 308 NVDmaStart(info, par, BLIT_POINT_SRC, 3);
307 NVDmaNext(par, (region->sy << 16) | region->sx); 309 NVDmaNext(par, (region->sy << 16) | region->sx);
@@ -319,8 +321,10 @@ void nvidiafb_fillrect(struct fb_info *info, const struct fb_fillrect *rect)
319 if (info->state != FBINFO_STATE_RUNNING) 321 if (info->state != FBINFO_STATE_RUNNING)
320 return; 322 return;
321 323
322 if (par->lockup) 324 if (par->lockup) {
323 return cfb_fillrect(info, rect); 325 cfb_fillrect(info, rect);
326 return;
327 }
324 328
325 if (info->var.bits_per_pixel == 8) 329 if (info->var.bits_per_pixel == 8)
326 color = rect->color; 330 color = rect->color;
diff --git a/drivers/video/pm3fb.c b/drivers/video/pm3fb.c
index 68089d1456c2..6666f45a2f8c 100644
--- a/drivers/video/pm3fb.c
+++ b/drivers/video/pm3fb.c
@@ -539,8 +539,10 @@ static void pm3fb_imageblit(struct fb_info *info, const struct fb_image *image)
539 bgx = par->palette[image->bg_color]; 539 bgx = par->palette[image->bg_color];
540 break; 540 break;
541 } 541 }
542 if (image->depth != 1) 542 if (image->depth != 1) {
543 return cfb_imageblit(info, image); 543 cfb_imageblit(info, image);
544 return;
545 }
544 546
545 if (info->var.bits_per_pixel == 8) { 547 if (info->var.bits_per_pixel == 8) {
546 fgx |= fgx << 8; 548 fgx |= fgx << 8;
diff --git a/drivers/video/sm501fb.c b/drivers/video/sm501fb.c
index f94ae84a58cd..dcd98793d568 100644
--- a/drivers/video/sm501fb.c
+++ b/drivers/video/sm501fb.c
@@ -159,6 +159,9 @@ static int sm501_alloc_mem(struct sm501fb_info *inf, struct sm501_mem *mem,
159 break; 159 break;
160 160
161 case SM501_MEMF_PANEL: 161 case SM501_MEMF_PANEL:
162 if (size > inf->fbmem_len)
163 return -ENOMEM;
164
162 ptr = inf->fbmem_len - size; 165 ptr = inf->fbmem_len - size;
163 fbi = inf->fb[HEAD_CRT]; 166 fbi = inf->fb[HEAD_CRT];
164 167
@@ -172,9 +175,6 @@ static int sm501_alloc_mem(struct sm501fb_info *inf, struct sm501_mem *mem,
172 if (fbi && ptr < fbi->fix.smem_len) 175 if (fbi && ptr < fbi->fix.smem_len)
173 return -ENOMEM; 176 return -ENOMEM;
174 177
175 if (ptr < 0)
176 return -ENOMEM;
177
178 break; 178 break;
179 179
180 case SM501_MEMF_CRT: 180 case SM501_MEMF_CRT:
diff --git a/drivers/video/via/viafbdev.c b/drivers/video/via/viafbdev.c
index e21fe5b6f9ff..37b433a08ce8 100644
--- a/drivers/video/via/viafbdev.c
+++ b/drivers/video/via/viafbdev.c
@@ -870,8 +870,10 @@ static void viafb_fillrect(struct fb_info *info,
870 u32 col = 0, rop = 0; 870 u32 col = 0, rop = 0;
871 int pitch; 871 int pitch;
872 872
873 if (!viafb_accel) 873 if (!viafb_accel) {
874 return cfb_fillrect(info, rect); 874 cfb_fillrect(info, rect);
875 return;
876 }
875 877
876 if (!rect->width || !rect->height) 878 if (!rect->width || !rect->height)
877 return; 879 return;
@@ -937,8 +939,10 @@ static void viafb_copyarea(struct fb_info *info,
937 939
938 DEBUG_MSG(KERN_INFO "viafb_copyarea!!\n"); 940 DEBUG_MSG(KERN_INFO "viafb_copyarea!!\n");
939 941
940 if (!viafb_accel) 942 if (!viafb_accel) {
941 return cfb_copyarea(info, area); 943 cfb_copyarea(info, area);
944 return;
945 }
942 946
943 if (!area->width || !area->height) 947 if (!area->width || !area->height)
944 return; 948 return;
@@ -994,8 +998,10 @@ static void viafb_imageblit(struct fb_info *info,
994 int i; 998 int i;
995 int pitch; 999 int pitch;
996 1000
997 if (!viafb_accel) 1001 if (!viafb_accel) {
998 return cfb_imageblit(info, image); 1002 cfb_imageblit(info, image);
1003 return;
1004 }
999 1005
1000 udata = (u32 *) image->data; 1006 udata = (u32 *) image->data;
1001 1007
diff --git a/firmware/dsp56k/bootstrap.asm b/firmware/dsp56k/bootstrap.asm
index 10d891929cd3..a411047e6db3 100644
--- a/firmware/dsp56k/bootstrap.asm
+++ b/firmware/dsp56k/bootstrap.asm
@@ -51,19 +51,19 @@ start jmp <$40
51 ; Copy DSP program control 51 ; Copy DSP program control
52 move #real,r0 52 move #real,r0
53 move #upload,r1 53 move #upload,r1
54 do #upload_end-upload,<_copy 54 do #upload_end-upload,_copy
55 move P:(r0)+,x0 55 movem P:(r0)+,x0
56 move x0,P:(r1)+ 56 movem x0,P:(r1)+
57_copy movep #>4,X:<<M_HCR 57_copy movep #4,X:<<M_HCR
58 movep #>$c00,X:<<M_IPR 58 movep #$c00,X:<<M_IPR
59 and #<$fe,mr 59 and #<$fe,mr
60 jmp upload 60 jmp upload
61 61
62real 62real
63 org P:$7ea9 63 org P:$7ea9
64upload 64upload
65 movep #>1,X:<<M_PBC 65 movep #1,X:<<M_PBC
66 movep #>0,X:<<M_BCR 66 movep #0,X:<<M_BCR
67 67
68next jclr #0,X:<<M_HSR,* 68next jclr #0,X:<<M_HSR,*
69 movep X:<<M_HRX,A 69 movep X:<<M_HRX,A
@@ -81,18 +81,18 @@ _get_length
81 cmp x0,A 81 cmp x0,A
82 jeq load_Y 82 jeq load_Y
83 83
84load_P do y0,_load 84load_P do y0,_load_P
85 jclr #0,X:<<M_HSR,* 85 jclr #0,X:<<M_HSR,*
86 movep X:<<M_HRX,P:(r0)+ 86 movep X:<<M_HRX,P:(r0)+
87_load jmp next 87_load_P jmp next
88load_X do y0,_load 88load_X do y0,_load_X
89 jclr #0,X:<<M_HSR,* 89 jclr #0,X:<<M_HSR,*
90 movep X:<<M_HRX,X:(r0)+ 90 movep X:<<M_HRX,X:(r0)+
91_load jmp next 91_load_X jmp next
92load_Y do y0,_load 92load_Y do y0,_load_Y
93 jclr #0,X:<<M_HSR,* 93 jclr #0,X:<<M_HSR,*
94 movep X:<<M_HRX,Y:(r0)+ 94 movep X:<<M_HRX,Y:(r0)+
95_load jmp next 95_load_Y jmp next
96 96
97upload_end 97upload_end
98 end 98 end
diff --git a/fs/Kconfig b/fs/Kconfig
index f9b6e2979aaa..32883589ee54 100644
--- a/fs/Kconfig
+++ b/fs/Kconfig
@@ -721,7 +721,20 @@ config CONFIGFS_FS
721 721
722endmenu 722endmenu
723 723
724menu "Miscellaneous filesystems" 724menuconfig MISC_FILESYSTEMS
725 bool "Miscellaneous filesystems"
726 default y
727 ---help---
728 Say Y here to get to see options for various miscellaneous
729 filesystems, such as filesystems that came from other
730 operating systems.
731
732 This option alone does not add any kernel code.
733
734 If you say N, all options in this submenu will be skipped and
735 disabled; if unsure, say Y here.
736
737if MISC_FILESYSTEMS
725 738
726config ADFS_FS 739config ADFS_FS
727 tristate "ADFS file system support (EXPERIMENTAL)" 740 tristate "ADFS file system support (EXPERIMENTAL)"
@@ -1091,7 +1104,7 @@ config UFS_DEBUG
1091 Y here. This will result in _many_ additional debugging messages to be 1104 Y here. This will result in _many_ additional debugging messages to be
1092 written to the system log. 1105 written to the system log.
1093 1106
1094endmenu 1107endif # MISC_FILESYSTEMS
1095 1108
1096menuconfig NETWORK_FILESYSTEMS 1109menuconfig NETWORK_FILESYSTEMS
1097 bool "Network File Systems" 1110 bool "Network File Systems"
diff --git a/fs/autofs4/autofs_i.h b/fs/autofs4/autofs_i.h
index e0f16da00e54..a76803108d06 100644
--- a/fs/autofs4/autofs_i.h
+++ b/fs/autofs4/autofs_i.h
@@ -25,8 +25,6 @@
25#define AUTOFS_DEV_IOCTL_IOC_FIRST (AUTOFS_DEV_IOCTL_VERSION) 25#define AUTOFS_DEV_IOCTL_IOC_FIRST (AUTOFS_DEV_IOCTL_VERSION)
26#define AUTOFS_DEV_IOCTL_IOC_COUNT (AUTOFS_IOC_COUNT - 11) 26#define AUTOFS_DEV_IOCTL_IOC_COUNT (AUTOFS_IOC_COUNT - 11)
27 27
28#define AUTOFS_TYPE_TRIGGER (AUTOFS_TYPE_DIRECT|AUTOFS_TYPE_OFFSET)
29
30#include <linux/kernel.h> 28#include <linux/kernel.h>
31#include <linux/slab.h> 29#include <linux/slab.h>
32#include <linux/time.h> 30#include <linux/time.h>
diff --git a/fs/autofs4/dev-ioctl.c b/fs/autofs4/dev-ioctl.c
index 63b7c7afe8df..025e105bffea 100644
--- a/fs/autofs4/dev-ioctl.c
+++ b/fs/autofs4/dev-ioctl.c
@@ -124,7 +124,7 @@ static inline void free_dev_ioctl(struct autofs_dev_ioctl *param)
124 124
125/* 125/*
126 * Check sanity of parameter control fields and if a path is present 126 * Check sanity of parameter control fields and if a path is present
127 * check that it has a "/" and is terminated. 127 * check that it is terminated and contains at least one "/".
128 */ 128 */
129static int validate_dev_ioctl(int cmd, struct autofs_dev_ioctl *param) 129static int validate_dev_ioctl(int cmd, struct autofs_dev_ioctl *param)
130{ 130{
@@ -138,15 +138,16 @@ static int validate_dev_ioctl(int cmd, struct autofs_dev_ioctl *param)
138 } 138 }
139 139
140 if (param->size > sizeof(*param)) { 140 if (param->size > sizeof(*param)) {
141 err = check_name(param->path); 141 err = invalid_str(param->path,
142 (void *) ((size_t) param + param->size));
142 if (err) { 143 if (err) {
143 AUTOFS_WARN("invalid path supplied for cmd(0x%08x)", 144 AUTOFS_WARN(
144 cmd); 145 "path string terminator missing for cmd(0x%08x)",
146 cmd);
145 goto out; 147 goto out;
146 } 148 }
147 149
148 err = invalid_str(param->path, 150 err = check_name(param->path);
149 (void *) ((size_t) param + param->size));
150 if (err) { 151 if (err) {
151 AUTOFS_WARN("invalid path supplied for cmd(0x%08x)", 152 AUTOFS_WARN("invalid path supplied for cmd(0x%08x)",
152 cmd); 153 cmd);
@@ -180,7 +181,7 @@ static int autofs_dev_ioctl_protover(struct file *fp,
180 struct autofs_sb_info *sbi, 181 struct autofs_sb_info *sbi,
181 struct autofs_dev_ioctl *param) 182 struct autofs_dev_ioctl *param)
182{ 183{
183 param->arg1 = sbi->version; 184 param->protover.version = sbi->version;
184 return 0; 185 return 0;
185} 186}
186 187
@@ -189,7 +190,7 @@ static int autofs_dev_ioctl_protosubver(struct file *fp,
189 struct autofs_sb_info *sbi, 190 struct autofs_sb_info *sbi,
190 struct autofs_dev_ioctl *param) 191 struct autofs_dev_ioctl *param)
191{ 192{
192 param->arg1 = sbi->sub_version; 193 param->protosubver.sub_version = sbi->sub_version;
193 return 0; 194 return 0;
194} 195}
195 196
@@ -335,13 +336,13 @@ static int autofs_dev_ioctl_openmount(struct file *fp,
335 int err, fd; 336 int err, fd;
336 337
337 /* param->path has already been checked */ 338 /* param->path has already been checked */
338 if (!param->arg1) 339 if (!param->openmount.devid)
339 return -EINVAL; 340 return -EINVAL;
340 341
341 param->ioctlfd = -1; 342 param->ioctlfd = -1;
342 343
343 path = param->path; 344 path = param->path;
344 devid = param->arg1; 345 devid = param->openmount.devid;
345 346
346 err = 0; 347 err = 0;
347 fd = autofs_dev_ioctl_open_mountpoint(path, devid); 348 fd = autofs_dev_ioctl_open_mountpoint(path, devid);
@@ -373,7 +374,7 @@ static int autofs_dev_ioctl_ready(struct file *fp,
373{ 374{
374 autofs_wqt_t token; 375 autofs_wqt_t token;
375 376
376 token = (autofs_wqt_t) param->arg1; 377 token = (autofs_wqt_t) param->ready.token;
377 return autofs4_wait_release(sbi, token, 0); 378 return autofs4_wait_release(sbi, token, 0);
378} 379}
379 380
@@ -388,8 +389,8 @@ static int autofs_dev_ioctl_fail(struct file *fp,
388 autofs_wqt_t token; 389 autofs_wqt_t token;
389 int status; 390 int status;
390 391
391 token = (autofs_wqt_t) param->arg1; 392 token = (autofs_wqt_t) param->fail.token;
392 status = param->arg2 ? param->arg2 : -ENOENT; 393 status = param->fail.status ? param->fail.status : -ENOENT;
393 return autofs4_wait_release(sbi, token, status); 394 return autofs4_wait_release(sbi, token, status);
394} 395}
395 396
@@ -412,10 +413,10 @@ static int autofs_dev_ioctl_setpipefd(struct file *fp,
412 int pipefd; 413 int pipefd;
413 int err = 0; 414 int err = 0;
414 415
415 if (param->arg1 == -1) 416 if (param->setpipefd.pipefd == -1)
416 return -EINVAL; 417 return -EINVAL;
417 418
418 pipefd = param->arg1; 419 pipefd = param->setpipefd.pipefd;
419 420
420 mutex_lock(&sbi->wq_mutex); 421 mutex_lock(&sbi->wq_mutex);
421 if (!sbi->catatonic) { 422 if (!sbi->catatonic) {
@@ -457,8 +458,8 @@ static int autofs_dev_ioctl_timeout(struct file *fp,
457{ 458{
458 unsigned long timeout; 459 unsigned long timeout;
459 460
460 timeout = param->arg1; 461 timeout = param->timeout.timeout;
461 param->arg1 = sbi->exp_timeout / HZ; 462 param->timeout.timeout = sbi->exp_timeout / HZ;
462 sbi->exp_timeout = timeout * HZ; 463 sbi->exp_timeout = timeout * HZ;
463 return 0; 464 return 0;
464} 465}
@@ -489,7 +490,7 @@ static int autofs_dev_ioctl_requester(struct file *fp,
489 path = param->path; 490 path = param->path;
490 devid = sbi->sb->s_dev; 491 devid = sbi->sb->s_dev;
491 492
492 param->arg1 = param->arg2 = -1; 493 param->requester.uid = param->requester.gid = -1;
493 494
494 /* Get nameidata of the parent directory */ 495 /* Get nameidata of the parent directory */
495 err = path_lookup(path, LOOKUP_PARENT, &nd); 496 err = path_lookup(path, LOOKUP_PARENT, &nd);
@@ -505,8 +506,8 @@ static int autofs_dev_ioctl_requester(struct file *fp,
505 err = 0; 506 err = 0;
506 autofs4_expire_wait(nd.path.dentry); 507 autofs4_expire_wait(nd.path.dentry);
507 spin_lock(&sbi->fs_lock); 508 spin_lock(&sbi->fs_lock);
508 param->arg1 = ino->uid; 509 param->requester.uid = ino->uid;
509 param->arg2 = ino->gid; 510 param->requester.gid = ino->gid;
510 spin_unlock(&sbi->fs_lock); 511 spin_unlock(&sbi->fs_lock);
511 } 512 }
512 513
@@ -529,10 +530,10 @@ static int autofs_dev_ioctl_expire(struct file *fp,
529 int err = -EAGAIN; 530 int err = -EAGAIN;
530 int how; 531 int how;
531 532
532 how = param->arg1; 533 how = param->expire.how;
533 mnt = fp->f_path.mnt; 534 mnt = fp->f_path.mnt;
534 535
535 if (sbi->type & AUTOFS_TYPE_TRIGGER) 536 if (autofs_type_trigger(sbi->type))
536 dentry = autofs4_expire_direct(sbi->sb, mnt, sbi, how); 537 dentry = autofs4_expire_direct(sbi->sb, mnt, sbi, how);
537 else 538 else
538 dentry = autofs4_expire_indirect(sbi->sb, mnt, sbi, how); 539 dentry = autofs4_expire_indirect(sbi->sb, mnt, sbi, how);
@@ -565,9 +566,9 @@ static int autofs_dev_ioctl_askumount(struct file *fp,
565 struct autofs_sb_info *sbi, 566 struct autofs_sb_info *sbi,
566 struct autofs_dev_ioctl *param) 567 struct autofs_dev_ioctl *param)
567{ 568{
568 param->arg1 = 0; 569 param->askumount.may_umount = 0;
569 if (may_umount(fp->f_path.mnt)) 570 if (may_umount(fp->f_path.mnt))
570 param->arg1 = 1; 571 param->askumount.may_umount = 1;
571 return 0; 572 return 0;
572} 573}
573 574
@@ -600,6 +601,7 @@ static int autofs_dev_ioctl_ismountpoint(struct file *fp,
600 struct nameidata nd; 601 struct nameidata nd;
601 const char *path; 602 const char *path;
602 unsigned int type; 603 unsigned int type;
604 unsigned int devid, magic;
603 int err = -ENOENT; 605 int err = -ENOENT;
604 606
605 if (param->size <= sizeof(*param)) { 607 if (param->size <= sizeof(*param)) {
@@ -608,13 +610,13 @@ static int autofs_dev_ioctl_ismountpoint(struct file *fp,
608 } 610 }
609 611
610 path = param->path; 612 path = param->path;
611 type = param->arg1; 613 type = param->ismountpoint.in.type;
612 614
613 param->arg1 = 0; 615 param->ismountpoint.out.devid = devid = 0;
614 param->arg2 = 0; 616 param->ismountpoint.out.magic = magic = 0;
615 617
616 if (!fp || param->ioctlfd == -1) { 618 if (!fp || param->ioctlfd == -1) {
617 if (type == AUTOFS_TYPE_ANY) { 619 if (autofs_type_any(type)) {
618 struct super_block *sb; 620 struct super_block *sb;
619 621
620 err = path_lookup(path, LOOKUP_FOLLOW, &nd); 622 err = path_lookup(path, LOOKUP_FOLLOW, &nd);
@@ -622,7 +624,7 @@ static int autofs_dev_ioctl_ismountpoint(struct file *fp,
622 goto out; 624 goto out;
623 625
624 sb = nd.path.dentry->d_sb; 626 sb = nd.path.dentry->d_sb;
625 param->arg1 = new_encode_dev(sb->s_dev); 627 devid = new_encode_dev(sb->s_dev);
626 } else { 628 } else {
627 struct autofs_info *ino; 629 struct autofs_info *ino;
628 630
@@ -635,38 +637,41 @@ static int autofs_dev_ioctl_ismountpoint(struct file *fp,
635 goto out_release; 637 goto out_release;
636 638
637 ino = autofs4_dentry_ino(nd.path.dentry); 639 ino = autofs4_dentry_ino(nd.path.dentry);
638 param->arg1 = autofs4_get_dev(ino->sbi); 640 devid = autofs4_get_dev(ino->sbi);
639 } 641 }
640 642
641 err = 0; 643 err = 0;
642 if (nd.path.dentry->d_inode && 644 if (nd.path.dentry->d_inode &&
643 nd.path.mnt->mnt_root == nd.path.dentry) { 645 nd.path.mnt->mnt_root == nd.path.dentry) {
644 err = 1; 646 err = 1;
645 param->arg2 = nd.path.dentry->d_inode->i_sb->s_magic; 647 magic = nd.path.dentry->d_inode->i_sb->s_magic;
646 } 648 }
647 } else { 649 } else {
648 dev_t devid = new_encode_dev(sbi->sb->s_dev); 650 dev_t dev = autofs4_get_dev(sbi);
649 651
650 err = path_lookup(path, LOOKUP_PARENT, &nd); 652 err = path_lookup(path, LOOKUP_PARENT, &nd);
651 if (err) 653 if (err)
652 goto out; 654 goto out;
653 655
654 err = autofs_dev_ioctl_find_super(&nd, devid); 656 err = autofs_dev_ioctl_find_super(&nd, dev);
655 if (err) 657 if (err)
656 goto out_release; 658 goto out_release;
657 659
658 param->arg1 = autofs4_get_dev(sbi); 660 devid = dev;
659 661
660 err = have_submounts(nd.path.dentry); 662 err = have_submounts(nd.path.dentry);
661 663
662 if (nd.path.mnt->mnt_mountpoint != nd.path.mnt->mnt_root) { 664 if (nd.path.mnt->mnt_mountpoint != nd.path.mnt->mnt_root) {
663 if (follow_down(&nd.path.mnt, &nd.path.dentry)) { 665 if (follow_down(&nd.path.mnt, &nd.path.dentry)) {
664 struct inode *inode = nd.path.dentry->d_inode; 666 struct inode *inode = nd.path.dentry->d_inode;
665 param->arg2 = inode->i_sb->s_magic; 667 magic = inode->i_sb->s_magic;
666 } 668 }
667 } 669 }
668 } 670 }
669 671
672 param->ismountpoint.out.devid = devid;
673 param->ismountpoint.out.magic = magic;
674
670out_release: 675out_release:
671 path_put(&nd.path); 676 path_put(&nd.path);
672out: 677out:
diff --git a/fs/autofs4/expire.c b/fs/autofs4/expire.c
index 4b6fb3f628c0..e3bd50776f9e 100644
--- a/fs/autofs4/expire.c
+++ b/fs/autofs4/expire.c
@@ -63,7 +63,7 @@ static int autofs4_mount_busy(struct vfsmount *mnt, struct dentry *dentry)
63 struct autofs_sb_info *sbi = autofs4_sbi(dentry->d_sb); 63 struct autofs_sb_info *sbi = autofs4_sbi(dentry->d_sb);
64 64
65 /* This is an autofs submount, we can't expire it */ 65 /* This is an autofs submount, we can't expire it */
66 if (sbi->type == AUTOFS_TYPE_INDIRECT) 66 if (autofs_type_indirect(sbi->type))
67 goto done; 67 goto done;
68 68
69 /* 69 /*
@@ -490,7 +490,7 @@ int autofs4_expire_multi(struct super_block *sb, struct vfsmount *mnt,
490 if (arg && get_user(do_now, arg)) 490 if (arg && get_user(do_now, arg))
491 return -EFAULT; 491 return -EFAULT;
492 492
493 if (sbi->type & AUTOFS_TYPE_TRIGGER) 493 if (autofs_type_trigger(sbi->type))
494 dentry = autofs4_expire_direct(sb, mnt, sbi, do_now); 494 dentry = autofs4_expire_direct(sb, mnt, sbi, do_now);
495 else 495 else
496 dentry = autofs4_expire_indirect(sb, mnt, sbi, do_now); 496 dentry = autofs4_expire_indirect(sb, mnt, sbi, do_now);
diff --git a/fs/autofs4/inode.c b/fs/autofs4/inode.c
index cfc23e53b6f4..716e12b627b2 100644
--- a/fs/autofs4/inode.c
+++ b/fs/autofs4/inode.c
@@ -197,9 +197,9 @@ static int autofs4_show_options(struct seq_file *m, struct vfsmount *mnt)
197 seq_printf(m, ",minproto=%d", sbi->min_proto); 197 seq_printf(m, ",minproto=%d", sbi->min_proto);
198 seq_printf(m, ",maxproto=%d", sbi->max_proto); 198 seq_printf(m, ",maxproto=%d", sbi->max_proto);
199 199
200 if (sbi->type & AUTOFS_TYPE_OFFSET) 200 if (autofs_type_offset(sbi->type))
201 seq_printf(m, ",offset"); 201 seq_printf(m, ",offset");
202 else if (sbi->type & AUTOFS_TYPE_DIRECT) 202 else if (autofs_type_direct(sbi->type))
203 seq_printf(m, ",direct"); 203 seq_printf(m, ",direct");
204 else 204 else
205 seq_printf(m, ",indirect"); 205 seq_printf(m, ",indirect");
@@ -284,13 +284,13 @@ static int parse_options(char *options, int *pipefd, uid_t *uid, gid_t *gid,
284 *maxproto = option; 284 *maxproto = option;
285 break; 285 break;
286 case Opt_indirect: 286 case Opt_indirect:
287 *type = AUTOFS_TYPE_INDIRECT; 287 set_autofs_type_indirect(type);
288 break; 288 break;
289 case Opt_direct: 289 case Opt_direct:
290 *type = AUTOFS_TYPE_DIRECT; 290 set_autofs_type_direct(type);
291 break; 291 break;
292 case Opt_offset: 292 case Opt_offset:
293 *type = AUTOFS_TYPE_OFFSET; 293 set_autofs_type_offset(type);
294 break; 294 break;
295 default: 295 default:
296 return 1; 296 return 1;
@@ -338,7 +338,7 @@ int autofs4_fill_super(struct super_block *s, void *data, int silent)
338 sbi->sb = s; 338 sbi->sb = s;
339 sbi->version = 0; 339 sbi->version = 0;
340 sbi->sub_version = 0; 340 sbi->sub_version = 0;
341 sbi->type = AUTOFS_TYPE_INDIRECT; 341 set_autofs_type_indirect(&sbi->type);
342 sbi->min_proto = 0; 342 sbi->min_proto = 0;
343 sbi->max_proto = 0; 343 sbi->max_proto = 0;
344 mutex_init(&sbi->wq_mutex); 344 mutex_init(&sbi->wq_mutex);
@@ -380,7 +380,7 @@ int autofs4_fill_super(struct super_block *s, void *data, int silent)
380 } 380 }
381 381
382 root_inode->i_fop = &autofs4_root_operations; 382 root_inode->i_fop = &autofs4_root_operations;
383 root_inode->i_op = sbi->type & AUTOFS_TYPE_TRIGGER ? 383 root_inode->i_op = autofs_type_trigger(sbi->type) ?
384 &autofs4_direct_root_inode_operations : 384 &autofs4_direct_root_inode_operations :
385 &autofs4_indirect_root_inode_operations; 385 &autofs4_indirect_root_inode_operations;
386 386
diff --git a/fs/autofs4/waitq.c b/fs/autofs4/waitq.c
index e02cc8ae5eb3..eeb246845909 100644
--- a/fs/autofs4/waitq.c
+++ b/fs/autofs4/waitq.c
@@ -337,7 +337,7 @@ int autofs4_wait(struct autofs_sb_info *sbi, struct dentry *dentry,
337 * is very similar for indirect mounts except only dentrys 337 * is very similar for indirect mounts except only dentrys
338 * in the root of the autofs file system may be negative. 338 * in the root of the autofs file system may be negative.
339 */ 339 */
340 if (sbi->type & AUTOFS_TYPE_TRIGGER) 340 if (autofs_type_trigger(sbi->type))
341 return -ENOENT; 341 return -ENOENT;
342 else if (!IS_ROOT(dentry->d_parent)) 342 else if (!IS_ROOT(dentry->d_parent))
343 return -ENOENT; 343 return -ENOENT;
@@ -348,7 +348,7 @@ int autofs4_wait(struct autofs_sb_info *sbi, struct dentry *dentry,
348 return -ENOMEM; 348 return -ENOMEM;
349 349
350 /* If this is a direct mount request create a dummy name */ 350 /* If this is a direct mount request create a dummy name */
351 if (IS_ROOT(dentry) && sbi->type & AUTOFS_TYPE_TRIGGER) 351 if (IS_ROOT(dentry) && autofs_type_trigger(sbi->type))
352 qstr.len = sprintf(name, "%p", dentry); 352 qstr.len = sprintf(name, "%p", dentry);
353 else { 353 else {
354 qstr.len = autofs4_getpath(sbi, dentry, &name); 354 qstr.len = autofs4_getpath(sbi, dentry, &name);
@@ -406,11 +406,11 @@ int autofs4_wait(struct autofs_sb_info *sbi, struct dentry *dentry,
406 type = autofs_ptype_expire_multi; 406 type = autofs_ptype_expire_multi;
407 } else { 407 } else {
408 if (notify == NFY_MOUNT) 408 if (notify == NFY_MOUNT)
409 type = (sbi->type & AUTOFS_TYPE_TRIGGER) ? 409 type = autofs_type_trigger(sbi->type) ?
410 autofs_ptype_missing_direct : 410 autofs_ptype_missing_direct :
411 autofs_ptype_missing_indirect; 411 autofs_ptype_missing_indirect;
412 else 412 else
413 type = (sbi->type & AUTOFS_TYPE_TRIGGER) ? 413 type = autofs_type_trigger(sbi->type) ?
414 autofs_ptype_expire_direct : 414 autofs_ptype_expire_direct :
415 autofs_ptype_expire_indirect; 415 autofs_ptype_expire_indirect;
416 } 416 }
diff --git a/fs/bfs/inode.c b/fs/bfs/inode.c
index 0ed57b5ee012..cc4062d12ca2 100644
--- a/fs/bfs/inode.c
+++ b/fs/bfs/inode.c
@@ -213,6 +213,9 @@ static void bfs_put_super(struct super_block *s)
213{ 213{
214 struct bfs_sb_info *info = BFS_SB(s); 214 struct bfs_sb_info *info = BFS_SB(s);
215 215
216 if (!info)
217 return;
218
216 brelse(info->si_sbh); 219 brelse(info->si_sbh);
217 mutex_destroy(&info->bfs_lock); 220 mutex_destroy(&info->bfs_lock);
218 kfree(info->si_imap); 221 kfree(info->si_imap);
@@ -327,6 +330,7 @@ static int bfs_fill_super(struct super_block *s, void *data, int silent)
327 unsigned i, imap_len; 330 unsigned i, imap_len;
328 struct bfs_sb_info *info; 331 struct bfs_sb_info *info;
329 long ret = -EINVAL; 332 long ret = -EINVAL;
333 unsigned long i_sblock, i_eblock, i_eoff, s_size;
330 334
331 info = kzalloc(sizeof(*info), GFP_KERNEL); 335 info = kzalloc(sizeof(*info), GFP_KERNEL);
332 if (!info) 336 if (!info)
@@ -350,6 +354,12 @@ static int bfs_fill_super(struct super_block *s, void *data, int silent)
350 354
351 s->s_magic = BFS_MAGIC; 355 s->s_magic = BFS_MAGIC;
352 info->si_sbh = bh; 356 info->si_sbh = bh;
357
358 if (le32_to_cpu(bfs_sb->s_start) > le32_to_cpu(bfs_sb->s_end)) {
359 printf("Superblock is corrupted\n");
360 goto out;
361 }
362
353 info->si_lasti = (le32_to_cpu(bfs_sb->s_start) - BFS_BSIZE) / 363 info->si_lasti = (le32_to_cpu(bfs_sb->s_start) - BFS_BSIZE) /
354 sizeof(struct bfs_inode) 364 sizeof(struct bfs_inode)
355 + BFS_ROOT_INO - 1; 365 + BFS_ROOT_INO - 1;
@@ -380,6 +390,18 @@ static int bfs_fill_super(struct super_block *s, void *data, int silent)
380 - le32_to_cpu(bfs_sb->s_start)) >> BFS_BSIZE_BITS; 390 - le32_to_cpu(bfs_sb->s_start)) >> BFS_BSIZE_BITS;
381 info->si_freei = 0; 391 info->si_freei = 0;
382 info->si_lf_eblk = 0; 392 info->si_lf_eblk = 0;
393
394 /* can we read the last block? */
395 bh = sb_bread(s, info->si_blocks - 1);
396 if (!bh) {
397 printf("Last block not available: %lu\n", info->si_blocks - 1);
398 iput(inode);
399 ret = -EIO;
400 kfree(info->si_imap);
401 goto out;
402 }
403 brelse(bh);
404
383 bh = NULL; 405 bh = NULL;
384 for (i = BFS_ROOT_INO; i <= info->si_lasti; i++) { 406 for (i = BFS_ROOT_INO; i <= info->si_lasti; i++) {
385 struct bfs_inode *di; 407 struct bfs_inode *di;
@@ -397,6 +419,29 @@ static int bfs_fill_super(struct super_block *s, void *data, int silent)
397 419
398 di = (struct bfs_inode *)bh->b_data + off; 420 di = (struct bfs_inode *)bh->b_data + off;
399 421
422 /* test if filesystem is not corrupted */
423
424 i_eoff = le32_to_cpu(di->i_eoffset);
425 i_sblock = le32_to_cpu(di->i_sblock);
426 i_eblock = le32_to_cpu(di->i_eblock);
427 s_size = le32_to_cpu(bfs_sb->s_end);
428
429 if (i_sblock > info->si_blocks ||
430 i_eblock > info->si_blocks ||
431 i_sblock > i_eblock ||
432 i_eoff > s_size ||
433 i_sblock * BFS_BSIZE > i_eoff) {
434
435 printf("Inode 0x%08x corrupted\n", i);
436
437 brelse(bh);
438 s->s_root = NULL;
439 kfree(info->si_imap);
440 kfree(info);
441 s->s_fs_info = NULL;
442 return -EIO;
443 }
444
400 if (!di->i_ino) { 445 if (!di->i_ino) {
401 info->si_freei++; 446 info->si_freei++;
402 continue; 447 continue;
diff --git a/fs/binfmt_misc.c b/fs/binfmt_misc.c
index e1158cb4fbd6..c4e83537ead7 100644
--- a/fs/binfmt_misc.c
+++ b/fs/binfmt_misc.c
@@ -649,7 +649,7 @@ static const struct file_operations bm_register_operations = {
649static ssize_t 649static ssize_t
650bm_status_read(struct file *file, char __user *buf, size_t nbytes, loff_t *ppos) 650bm_status_read(struct file *file, char __user *buf, size_t nbytes, loff_t *ppos)
651{ 651{
652 char *s = enabled ? "enabled" : "disabled"; 652 char *s = enabled ? "enabled\n" : "disabled\n";
653 653
654 return simple_read_from_buffer(buf, nbytes, ppos, s, strlen(s)); 654 return simple_read_from_buffer(buf, nbytes, ppos, s, strlen(s));
655} 655}
diff --git a/fs/block_dev.c b/fs/block_dev.c
index 349a26c10001..b957717e25ab 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -1262,7 +1262,7 @@ EXPORT_SYMBOL(ioctl_by_bdev);
1262 1262
1263/** 1263/**
1264 * lookup_bdev - lookup a struct block_device by name 1264 * lookup_bdev - lookup a struct block_device by name
1265 * @path: special file representing the block device 1265 * @pathname: special file representing the block device
1266 * 1266 *
1267 * Get a reference to the blockdevice at @pathname in the current 1267 * Get a reference to the blockdevice at @pathname in the current
1268 * namespace if possible and return it. Return ERR_PTR(error) 1268 * namespace if possible and return it. Return ERR_PTR(error)
diff --git a/fs/buffer.c b/fs/buffer.c
index a13f09b696f7..c26da785938a 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -2022,7 +2022,6 @@ int block_write_begin(struct file *file, struct address_space *mapping,
2022 if (pos + len > inode->i_size) 2022 if (pos + len > inode->i_size)
2023 vmtruncate(inode, inode->i_size); 2023 vmtruncate(inode, inode->i_size);
2024 } 2024 }
2025 goto out;
2026 } 2025 }
2027 2026
2028out: 2027out:
diff --git a/fs/char_dev.c b/fs/char_dev.c
index 700697a72618..38f71222a552 100644
--- a/fs/char_dev.c
+++ b/fs/char_dev.c
@@ -120,7 +120,7 @@ __register_chrdev_region(unsigned int major, unsigned int baseminor,
120 cd->major = major; 120 cd->major = major;
121 cd->baseminor = baseminor; 121 cd->baseminor = baseminor;
122 cd->minorct = minorct; 122 cd->minorct = minorct;
123 strncpy(cd->name,name, 64); 123 strlcpy(cd->name, name, sizeof(cd->name));
124 124
125 i = major_to_index(major); 125 i = major_to_index(major);
126 126
diff --git a/fs/compat.c b/fs/compat.c
index d1ece79b6411..30f2faa22f5c 100644
--- a/fs/compat.c
+++ b/fs/compat.c
@@ -1187,6 +1187,9 @@ compat_sys_readv(unsigned long fd, const struct compat_iovec __user *vec, unsign
1187 ret = compat_do_readv_writev(READ, file, vec, vlen, &file->f_pos); 1187 ret = compat_do_readv_writev(READ, file, vec, vlen, &file->f_pos);
1188 1188
1189out: 1189out:
1190 if (ret > 0)
1191 add_rchar(current, ret);
1192 inc_syscr(current);
1190 fput(file); 1193 fput(file);
1191 return ret; 1194 return ret;
1192} 1195}
@@ -1210,6 +1213,9 @@ compat_sys_writev(unsigned long fd, const struct compat_iovec __user *vec, unsig
1210 ret = compat_do_readv_writev(WRITE, file, vec, vlen, &file->f_pos); 1213 ret = compat_do_readv_writev(WRITE, file, vec, vlen, &file->f_pos);
1211 1214
1212out: 1215out:
1216 if (ret > 0)
1217 add_wchar(current, ret);
1218 inc_syscw(current);
1213 fput(file); 1219 fput(file);
1214 return ret; 1220 return ret;
1215} 1221}
diff --git a/fs/direct-io.c b/fs/direct-io.c
index af0558dbe8b7..b6d43908ff7a 100644
--- a/fs/direct-io.c
+++ b/fs/direct-io.c
@@ -1209,6 +1209,19 @@ __blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode,
1209 retval = direct_io_worker(rw, iocb, inode, iov, offset, 1209 retval = direct_io_worker(rw, iocb, inode, iov, offset,
1210 nr_segs, blkbits, get_block, end_io, dio); 1210 nr_segs, blkbits, get_block, end_io, dio);
1211 1211
1212 /*
1213 * In case of error extending write may have instantiated a few
1214 * blocks outside i_size. Trim these off again for DIO_LOCKING.
1215 * NOTE: DIO_NO_LOCK/DIO_OWN_LOCK callers have to handle this by
1216 * it's own meaner.
1217 */
1218 if (unlikely(retval < 0 && (rw & WRITE))) {
1219 loff_t isize = i_size_read(inode);
1220
1221 if (end > isize && dio_lock_type == DIO_LOCKING)
1222 vmtruncate(inode, isize);
1223 }
1224
1212 if (rw == READ && dio_lock_type == DIO_LOCKING) 1225 if (rw == READ && dio_lock_type == DIO_LOCKING)
1213 release_i_mutex = 0; 1226 release_i_mutex = 0;
1214 1227
diff --git a/fs/ecryptfs/crypto.c b/fs/ecryptfs/crypto.c
index 6046239465a1..c01e043670e2 100644
--- a/fs/ecryptfs/crypto.c
+++ b/fs/ecryptfs/crypto.c
@@ -175,8 +175,8 @@ out:
175 * 175 *
176 * Returns zero on success; non-zero on error. 176 * Returns zero on success; non-zero on error.
177 */ 177 */
178static int ecryptfs_derive_iv(char *iv, struct ecryptfs_crypt_stat *crypt_stat, 178int ecryptfs_derive_iv(char *iv, struct ecryptfs_crypt_stat *crypt_stat,
179 loff_t offset) 179 loff_t offset)
180{ 180{
181 int rc = 0; 181 int rc = 0;
182 char dst[MD5_DIGEST_SIZE]; 182 char dst[MD5_DIGEST_SIZE];
@@ -924,6 +924,15 @@ static void ecryptfs_copy_mount_wide_flags_to_inode_flags(
924 crypt_stat->flags |= ECRYPTFS_METADATA_IN_XATTR; 924 crypt_stat->flags |= ECRYPTFS_METADATA_IN_XATTR;
925 if (mount_crypt_stat->flags & ECRYPTFS_ENCRYPTED_VIEW_ENABLED) 925 if (mount_crypt_stat->flags & ECRYPTFS_ENCRYPTED_VIEW_ENABLED)
926 crypt_stat->flags |= ECRYPTFS_VIEW_AS_ENCRYPTED; 926 crypt_stat->flags |= ECRYPTFS_VIEW_AS_ENCRYPTED;
927 if (mount_crypt_stat->flags & ECRYPTFS_GLOBAL_ENCRYPT_FILENAMES) {
928 crypt_stat->flags |= ECRYPTFS_ENCRYPT_FILENAMES;
929 if (mount_crypt_stat->flags
930 & ECRYPTFS_GLOBAL_ENCFN_USE_MOUNT_FNEK)
931 crypt_stat->flags |= ECRYPTFS_ENCFN_USE_MOUNT_FNEK;
932 else if (mount_crypt_stat->flags
933 & ECRYPTFS_GLOBAL_ENCFN_USE_FEK)
934 crypt_stat->flags |= ECRYPTFS_ENCFN_USE_FEK;
935 }
927} 936}
928 937
929static int ecryptfs_copy_mount_wide_sigs_to_inode_sigs( 938static int ecryptfs_copy_mount_wide_sigs_to_inode_sigs(
@@ -1060,7 +1069,8 @@ struct ecryptfs_flag_map_elem {
1060static struct ecryptfs_flag_map_elem ecryptfs_flag_map[] = { 1069static struct ecryptfs_flag_map_elem ecryptfs_flag_map[] = {
1061 {0x00000001, ECRYPTFS_ENABLE_HMAC}, 1070 {0x00000001, ECRYPTFS_ENABLE_HMAC},
1062 {0x00000002, ECRYPTFS_ENCRYPTED}, 1071 {0x00000002, ECRYPTFS_ENCRYPTED},
1063 {0x00000004, ECRYPTFS_METADATA_IN_XATTR} 1072 {0x00000004, ECRYPTFS_METADATA_IN_XATTR},
1073 {0x00000008, ECRYPTFS_ENCRYPT_FILENAMES}
1064}; 1074};
1065 1075
1066/** 1076/**
@@ -1149,19 +1159,20 @@ ecryptfs_cipher_code_str_map[] = {
1149 1159
1150/** 1160/**
1151 * ecryptfs_code_for_cipher_string 1161 * ecryptfs_code_for_cipher_string
1152 * @crypt_stat: The cryptographic context 1162 * @cipher_name: The string alias for the cipher
1163 * @key_bytes: Length of key in bytes; used for AES code selection
1153 * 1164 *
1154 * Returns zero on no match, or the cipher code on match 1165 * Returns zero on no match, or the cipher code on match
1155 */ 1166 */
1156u8 ecryptfs_code_for_cipher_string(struct ecryptfs_crypt_stat *crypt_stat) 1167u8 ecryptfs_code_for_cipher_string(char *cipher_name, size_t key_bytes)
1157{ 1168{
1158 int i; 1169 int i;
1159 u8 code = 0; 1170 u8 code = 0;
1160 struct ecryptfs_cipher_code_str_map_elem *map = 1171 struct ecryptfs_cipher_code_str_map_elem *map =
1161 ecryptfs_cipher_code_str_map; 1172 ecryptfs_cipher_code_str_map;
1162 1173
1163 if (strcmp(crypt_stat->cipher, "aes") == 0) { 1174 if (strcmp(cipher_name, "aes") == 0) {
1164 switch (crypt_stat->key_size) { 1175 switch (key_bytes) {
1165 case 16: 1176 case 16:
1166 code = RFC2440_CIPHER_AES_128; 1177 code = RFC2440_CIPHER_AES_128;
1167 break; 1178 break;
@@ -1173,7 +1184,7 @@ u8 ecryptfs_code_for_cipher_string(struct ecryptfs_crypt_stat *crypt_stat)
1173 } 1184 }
1174 } else { 1185 } else {
1175 for (i = 0; i < ARRAY_SIZE(ecryptfs_cipher_code_str_map); i++) 1186 for (i = 0; i < ARRAY_SIZE(ecryptfs_cipher_code_str_map); i++)
1176 if (strcmp(crypt_stat->cipher, map[i].cipher_str) == 0){ 1187 if (strcmp(cipher_name, map[i].cipher_str) == 0) {
1177 code = map[i].cipher_code; 1188 code = map[i].cipher_code;
1178 break; 1189 break;
1179 } 1190 }
@@ -1212,6 +1223,8 @@ int ecryptfs_read_and_validate_header_region(char *data,
1212 &(ecryptfs_inode_to_private(ecryptfs_inode)->crypt_stat); 1223 &(ecryptfs_inode_to_private(ecryptfs_inode)->crypt_stat);
1213 int rc; 1224 int rc;
1214 1225
1226 if (crypt_stat->extent_size == 0)
1227 crypt_stat->extent_size = ECRYPTFS_DEFAULT_EXTENT_SIZE;
1215 rc = ecryptfs_read_lower(data, 0, crypt_stat->extent_size, 1228 rc = ecryptfs_read_lower(data, 0, crypt_stat->extent_size,
1216 ecryptfs_inode); 1229 ecryptfs_inode);
1217 if (rc) { 1230 if (rc) {
@@ -1221,7 +1234,6 @@ int ecryptfs_read_and_validate_header_region(char *data,
1221 } 1234 }
1222 if (!contains_ecryptfs_marker(data + ECRYPTFS_FILE_SIZE_BYTES)) { 1235 if (!contains_ecryptfs_marker(data + ECRYPTFS_FILE_SIZE_BYTES)) {
1223 rc = -EINVAL; 1236 rc = -EINVAL;
1224 ecryptfs_printk(KERN_DEBUG, "Valid marker not found\n");
1225 } 1237 }
1226out: 1238out:
1227 return rc; 1239 return rc;
@@ -1628,95 +1640,95 @@ out:
1628} 1640}
1629 1641
1630/** 1642/**
1631 * ecryptfs_encode_filename - converts a plaintext file name to cipher text 1643 * ecryptfs_encrypt_filename - encrypt filename
1632 * @crypt_stat: The crypt_stat struct associated with the file anem to encode
1633 * @name: The plaintext name
1634 * @length: The length of the plaintext
1635 * @encoded_name: The encypted name
1636 * 1644 *
1637 * Encrypts and encodes a filename into something that constitutes a 1645 * CBC-encrypts the filename. We do not want to encrypt the same
1638 * valid filename for a filesystem, with printable characters. 1646 * filename with the same key and IV, which may happen with hard
1647 * links, so we prepend random bits to each filename.
1639 * 1648 *
1640 * We assume that we have a properly initialized crypto context, 1649 * Returns zero on success; non-zero otherwise
1641 * pointed to by crypt_stat->tfm.
1642 *
1643 * TODO: Implement filename decoding and decryption here, in place of
1644 * memcpy. We are keeping the framework around for now to (1)
1645 * facilitate testing of the components needed to implement filename
1646 * encryption and (2) to provide a code base from which other
1647 * developers in the community can easily implement this feature.
1648 *
1649 * Returns the length of encoded filename; negative if error
1650 */ 1650 */
1651int 1651static int
1652ecryptfs_encode_filename(struct ecryptfs_crypt_stat *crypt_stat, 1652ecryptfs_encrypt_filename(struct ecryptfs_filename *filename,
1653 const char *name, int length, char **encoded_name) 1653 struct ecryptfs_crypt_stat *crypt_stat,
1654 struct ecryptfs_mount_crypt_stat *mount_crypt_stat)
1654{ 1655{
1655 int error = 0; 1656 int rc = 0;
1656 1657
1657 (*encoded_name) = kmalloc(length + 2, GFP_KERNEL); 1658 filename->encrypted_filename = NULL;
1658 if (!(*encoded_name)) { 1659 filename->encrypted_filename_size = 0;
1659 error = -ENOMEM; 1660 if ((crypt_stat && (crypt_stat->flags & ECRYPTFS_ENCFN_USE_MOUNT_FNEK))
1661 || (mount_crypt_stat && (mount_crypt_stat->flags
1662 & ECRYPTFS_GLOBAL_ENCFN_USE_MOUNT_FNEK))) {
1663 size_t packet_size;
1664 size_t remaining_bytes;
1665
1666 rc = ecryptfs_write_tag_70_packet(
1667 NULL, NULL,
1668 &filename->encrypted_filename_size,
1669 mount_crypt_stat, NULL,
1670 filename->filename_size);
1671 if (rc) {
1672 printk(KERN_ERR "%s: Error attempting to get packet "
1673 "size for tag 72; rc = [%d]\n", __func__,
1674 rc);
1675 filename->encrypted_filename_size = 0;
1676 goto out;
1677 }
1678 filename->encrypted_filename =
1679 kmalloc(filename->encrypted_filename_size, GFP_KERNEL);
1680 if (!filename->encrypted_filename) {
1681 printk(KERN_ERR "%s: Out of memory whilst attempting "
1682 "to kmalloc [%zd] bytes\n", __func__,
1683 filename->encrypted_filename_size);
1684 rc = -ENOMEM;
1685 goto out;
1686 }
1687 remaining_bytes = filename->encrypted_filename_size;
1688 rc = ecryptfs_write_tag_70_packet(filename->encrypted_filename,
1689 &remaining_bytes,
1690 &packet_size,
1691 mount_crypt_stat,
1692 filename->filename,
1693 filename->filename_size);
1694 if (rc) {
1695 printk(KERN_ERR "%s: Error attempting to generate "
1696 "tag 70 packet; rc = [%d]\n", __func__,
1697 rc);
1698 kfree(filename->encrypted_filename);
1699 filename->encrypted_filename = NULL;
1700 filename->encrypted_filename_size = 0;
1701 goto out;
1702 }
1703 filename->encrypted_filename_size = packet_size;
1704 } else {
1705 printk(KERN_ERR "%s: No support for requested filename "
1706 "encryption method in this release\n", __func__);
1707 rc = -ENOTSUPP;
1660 goto out; 1708 goto out;
1661 } 1709 }
1662 /* TODO: Filename encryption is a scheduled feature for a
1663 * future version of eCryptfs. This function is here only for
1664 * the purpose of providing a framework for other developers
1665 * to easily implement filename encryption. Hint: Replace this
1666 * memcpy() with a call to encrypt and encode the
1667 * filename, the set the length accordingly. */
1668 memcpy((void *)(*encoded_name), (void *)name, length);
1669 (*encoded_name)[length] = '\0';
1670 error = length + 1;
1671out: 1710out:
1672 return error; 1711 return rc;
1673} 1712}
1674 1713
1675/** 1714static int ecryptfs_copy_filename(char **copied_name, size_t *copied_name_size,
1676 * ecryptfs_decode_filename - converts the cipher text name to plaintext 1715 const char *name, size_t name_size)
1677 * @crypt_stat: The crypt_stat struct associated with the file
1678 * @name: The filename in cipher text
1679 * @length: The length of the cipher text name
1680 * @decrypted_name: The plaintext name
1681 *
1682 * Decodes and decrypts the filename.
1683 *
1684 * We assume that we have a properly initialized crypto context,
1685 * pointed to by crypt_stat->tfm.
1686 *
1687 * TODO: Implement filename decoding and decryption here, in place of
1688 * memcpy. We are keeping the framework around for now to (1)
1689 * facilitate testing of the components needed to implement filename
1690 * encryption and (2) to provide a code base from which other
1691 * developers in the community can easily implement this feature.
1692 *
1693 * Returns the length of decoded filename; negative if error
1694 */
1695int
1696ecryptfs_decode_filename(struct ecryptfs_crypt_stat *crypt_stat,
1697 const char *name, int length, char **decrypted_name)
1698{ 1716{
1699 int error = 0; 1717 int rc = 0;
1700 1718
1701 (*decrypted_name) = kmalloc(length + 2, GFP_KERNEL); 1719 (*copied_name) = kmalloc((name_size + 2), GFP_KERNEL);
1702 if (!(*decrypted_name)) { 1720 if (!(*copied_name)) {
1703 error = -ENOMEM; 1721 rc = -ENOMEM;
1704 goto out; 1722 goto out;
1705 } 1723 }
1706 /* TODO: Filename encryption is a scheduled feature for a 1724 memcpy((void *)(*copied_name), (void *)name, name_size);
1707 * future version of eCryptfs. This function is here only for 1725 (*copied_name)[(name_size)] = '\0'; /* Only for convenience
1708 * the purpose of providing a framework for other developers
1709 * to easily implement filename encryption. Hint: Replace this
1710 * memcpy() with a call to decode and decrypt the
1711 * filename, the set the length accordingly. */
1712 memcpy((void *)(*decrypted_name), (void *)name, length);
1713 (*decrypted_name)[length + 1] = '\0'; /* Only for convenience
1714 * in printing out the 1726 * in printing out the
1715 * string in debug 1727 * string in debug
1716 * messages */ 1728 * messages */
1717 error = length; 1729 (*copied_name_size) = (name_size + 1);
1718out: 1730out:
1719 return error; 1731 return rc;
1720} 1732}
1721 1733
1722/** 1734/**
@@ -1740,7 +1752,7 @@ ecryptfs_process_key_cipher(struct crypto_blkcipher **key_tfm,
1740 *key_tfm = NULL; 1752 *key_tfm = NULL;
1741 if (*key_size > ECRYPTFS_MAX_KEY_BYTES) { 1753 if (*key_size > ECRYPTFS_MAX_KEY_BYTES) {
1742 rc = -EINVAL; 1754 rc = -EINVAL;
1743 printk(KERN_ERR "Requested key size is [%Zd] bytes; maximum " 1755 printk(KERN_ERR "Requested key size is [%zd] bytes; maximum "
1744 "allowable is [%d]\n", *key_size, ECRYPTFS_MAX_KEY_BYTES); 1756 "allowable is [%d]\n", *key_size, ECRYPTFS_MAX_KEY_BYTES);
1745 goto out; 1757 goto out;
1746 } 1758 }
@@ -1765,7 +1777,7 @@ ecryptfs_process_key_cipher(struct crypto_blkcipher **key_tfm,
1765 get_random_bytes(dummy_key, *key_size); 1777 get_random_bytes(dummy_key, *key_size);
1766 rc = crypto_blkcipher_setkey(*key_tfm, dummy_key, *key_size); 1778 rc = crypto_blkcipher_setkey(*key_tfm, dummy_key, *key_size);
1767 if (rc) { 1779 if (rc) {
1768 printk(KERN_ERR "Error attempting to set key of size [%Zd] for " 1780 printk(KERN_ERR "Error attempting to set key of size [%zd] for "
1769 "cipher [%s]; rc = [%d]\n", *key_size, cipher_name, rc); 1781 "cipher [%s]; rc = [%d]\n", *key_size, cipher_name, rc);
1770 rc = -EINVAL; 1782 rc = -EINVAL;
1771 goto out; 1783 goto out;
@@ -1910,3 +1922,341 @@ out:
1910 mutex_unlock(&key_tfm_list_mutex); 1922 mutex_unlock(&key_tfm_list_mutex);
1911 return rc; 1923 return rc;
1912} 1924}
1925
1926/* 64 characters forming a 6-bit target field */
1927static unsigned char *portable_filename_chars = ("-.0123456789ABCD"
1928 "EFGHIJKLMNOPQRST"
1929 "UVWXYZabcdefghij"
1930 "klmnopqrstuvwxyz");
1931
1932/* We could either offset on every reverse map or just pad some 0x00's
1933 * at the front here */
1934static const unsigned char filename_rev_map[] = {
1935 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 7 */
1936 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 15 */
1937 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 23 */
1938 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 31 */
1939 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 39 */
1940 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, /* 47 */
1941 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, /* 55 */
1942 0x0A, 0x0B, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 63 */
1943 0x00, 0x0C, 0x0D, 0x0E, 0x0F, 0x10, 0x11, 0x12, /* 71 */
1944 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19, 0x1A, /* 79 */
1945 0x1B, 0x1C, 0x1D, 0x1E, 0x1F, 0x20, 0x21, 0x22, /* 87 */
1946 0x23, 0x24, 0x25, 0x00, 0x00, 0x00, 0x00, 0x00, /* 95 */
1947 0x00, 0x26, 0x27, 0x28, 0x29, 0x2A, 0x2B, 0x2C, /* 103 */
1948 0x2D, 0x2E, 0x2F, 0x30, 0x31, 0x32, 0x33, 0x34, /* 111 */
1949 0x35, 0x36, 0x37, 0x38, 0x39, 0x3A, 0x3B, 0x3C, /* 119 */
1950 0x3D, 0x3E, 0x3F
1951};
1952
1953/**
1954 * ecryptfs_encode_for_filename
1955 * @dst: Destination location for encoded filename
1956 * @dst_size: Size of the encoded filename in bytes
1957 * @src: Source location for the filename to encode
1958 * @src_size: Size of the source in bytes
1959 */
1960void ecryptfs_encode_for_filename(unsigned char *dst, size_t *dst_size,
1961 unsigned char *src, size_t src_size)
1962{
1963 size_t num_blocks;
1964 size_t block_num = 0;
1965 size_t dst_offset = 0;
1966 unsigned char last_block[3];
1967
1968 if (src_size == 0) {
1969 (*dst_size) = 0;
1970 goto out;
1971 }
1972 num_blocks = (src_size / 3);
1973 if ((src_size % 3) == 0) {
1974 memcpy(last_block, (&src[src_size - 3]), 3);
1975 } else {
1976 num_blocks++;
1977 last_block[2] = 0x00;
1978 switch (src_size % 3) {
1979 case 1:
1980 last_block[0] = src[src_size - 1];
1981 last_block[1] = 0x00;
1982 break;
1983 case 2:
1984 last_block[0] = src[src_size - 2];
1985 last_block[1] = src[src_size - 1];
1986 }
1987 }
1988 (*dst_size) = (num_blocks * 4);
1989 if (!dst)
1990 goto out;
1991 while (block_num < num_blocks) {
1992 unsigned char *src_block;
1993 unsigned char dst_block[4];
1994
1995 if (block_num == (num_blocks - 1))
1996 src_block = last_block;
1997 else
1998 src_block = &src[block_num * 3];
1999 dst_block[0] = ((src_block[0] >> 2) & 0x3F);
2000 dst_block[1] = (((src_block[0] << 4) & 0x30)
2001 | ((src_block[1] >> 4) & 0x0F));
2002 dst_block[2] = (((src_block[1] << 2) & 0x3C)
2003 | ((src_block[2] >> 6) & 0x03));
2004 dst_block[3] = (src_block[2] & 0x3F);
2005 dst[dst_offset++] = portable_filename_chars[dst_block[0]];
2006 dst[dst_offset++] = portable_filename_chars[dst_block[1]];
2007 dst[dst_offset++] = portable_filename_chars[dst_block[2]];
2008 dst[dst_offset++] = portable_filename_chars[dst_block[3]];
2009 block_num++;
2010 }
2011out:
2012 return;
2013}
2014
2015/**
2016 * ecryptfs_decode_from_filename
2017 * @dst: If NULL, this function only sets @dst_size and returns. If
2018 * non-NULL, this function decodes the encoded octets in @src
2019 * into the memory that @dst points to.
2020 * @dst_size: Set to the size of the decoded string.
2021 * @src: The encoded set of octets to decode.
2022 * @src_size: The size of the encoded set of octets to decode.
2023 */
2024static void
2025ecryptfs_decode_from_filename(unsigned char *dst, size_t *dst_size,
2026 const unsigned char *src, size_t src_size)
2027{
2028 u8 current_bit_offset = 0;
2029 size_t src_byte_offset = 0;
2030 size_t dst_byte_offset = 0;
2031
2032 if (dst == NULL) {
2033 /* Not exact; conservatively long. Every block of 4
2034 * encoded characters decodes into a block of 3
2035 * decoded characters. This segment of code provides
2036 * the caller with the maximum amount of allocated
2037 * space that @dst will need to point to in a
2038 * subsequent call. */
2039 (*dst_size) = (((src_size + 1) * 3) / 4);
2040 goto out;
2041 }
2042 while (src_byte_offset < src_size) {
2043 unsigned char src_byte =
2044 filename_rev_map[(int)src[src_byte_offset]];
2045
2046 switch (current_bit_offset) {
2047 case 0:
2048 dst[dst_byte_offset] = (src_byte << 2);
2049 current_bit_offset = 6;
2050 break;
2051 case 6:
2052 dst[dst_byte_offset++] |= (src_byte >> 4);
2053 dst[dst_byte_offset] = ((src_byte & 0xF)
2054 << 4);
2055 current_bit_offset = 4;
2056 break;
2057 case 4:
2058 dst[dst_byte_offset++] |= (src_byte >> 2);
2059 dst[dst_byte_offset] = (src_byte << 6);
2060 current_bit_offset = 2;
2061 break;
2062 case 2:
2063 dst[dst_byte_offset++] |= (src_byte);
2064 dst[dst_byte_offset] = 0;
2065 current_bit_offset = 0;
2066 break;
2067 }
2068 src_byte_offset++;
2069 }
2070 (*dst_size) = dst_byte_offset;
2071out:
2072 return;
2073}
2074
2075/**
2076 * ecryptfs_encrypt_and_encode_filename - converts a plaintext file name to cipher text
2077 * @crypt_stat: The crypt_stat struct associated with the file anem to encode
2078 * @name: The plaintext name
2079 * @length: The length of the plaintext
2080 * @encoded_name: The encypted name
2081 *
2082 * Encrypts and encodes a filename into something that constitutes a
2083 * valid filename for a filesystem, with printable characters.
2084 *
2085 * We assume that we have a properly initialized crypto context,
2086 * pointed to by crypt_stat->tfm.
2087 *
2088 * Returns zero on success; non-zero on otherwise
2089 */
2090int ecryptfs_encrypt_and_encode_filename(
2091 char **encoded_name,
2092 size_t *encoded_name_size,
2093 struct ecryptfs_crypt_stat *crypt_stat,
2094 struct ecryptfs_mount_crypt_stat *mount_crypt_stat,
2095 const char *name, size_t name_size)
2096{
2097 size_t encoded_name_no_prefix_size;
2098 int rc = 0;
2099
2100 (*encoded_name) = NULL;
2101 (*encoded_name_size) = 0;
2102 if ((crypt_stat && (crypt_stat->flags & ECRYPTFS_ENCRYPT_FILENAMES))
2103 || (mount_crypt_stat && (mount_crypt_stat->flags
2104 & ECRYPTFS_GLOBAL_ENCRYPT_FILENAMES))) {
2105 struct ecryptfs_filename *filename;
2106
2107 filename = kzalloc(sizeof(*filename), GFP_KERNEL);
2108 if (!filename) {
2109 printk(KERN_ERR "%s: Out of memory whilst attempting "
2110 "to kzalloc [%zd] bytes\n", __func__,
2111 sizeof(*filename));
2112 rc = -ENOMEM;
2113 goto out;
2114 }
2115 filename->filename = (char *)name;
2116 filename->filename_size = name_size;
2117 rc = ecryptfs_encrypt_filename(filename, crypt_stat,
2118 mount_crypt_stat);
2119 if (rc) {
2120 printk(KERN_ERR "%s: Error attempting to encrypt "
2121 "filename; rc = [%d]\n", __func__, rc);
2122 kfree(filename);
2123 goto out;
2124 }
2125 ecryptfs_encode_for_filename(
2126 NULL, &encoded_name_no_prefix_size,
2127 filename->encrypted_filename,
2128 filename->encrypted_filename_size);
2129 if ((crypt_stat && (crypt_stat->flags
2130 & ECRYPTFS_ENCFN_USE_MOUNT_FNEK))
2131 || (mount_crypt_stat
2132 && (mount_crypt_stat->flags
2133 & ECRYPTFS_GLOBAL_ENCFN_USE_MOUNT_FNEK)))
2134 (*encoded_name_size) =
2135 (ECRYPTFS_FNEK_ENCRYPTED_FILENAME_PREFIX_SIZE
2136 + encoded_name_no_prefix_size);
2137 else
2138 (*encoded_name_size) =
2139 (ECRYPTFS_FEK_ENCRYPTED_FILENAME_PREFIX_SIZE
2140 + encoded_name_no_prefix_size);
2141 (*encoded_name) = kmalloc((*encoded_name_size) + 1, GFP_KERNEL);
2142 if (!(*encoded_name)) {
2143 printk(KERN_ERR "%s: Out of memory whilst attempting "
2144 "to kzalloc [%zd] bytes\n", __func__,
2145 (*encoded_name_size));
2146 rc = -ENOMEM;
2147 kfree(filename->encrypted_filename);
2148 kfree(filename);
2149 goto out;
2150 }
2151 if ((crypt_stat && (crypt_stat->flags
2152 & ECRYPTFS_ENCFN_USE_MOUNT_FNEK))
2153 || (mount_crypt_stat
2154 && (mount_crypt_stat->flags
2155 & ECRYPTFS_GLOBAL_ENCFN_USE_MOUNT_FNEK))) {
2156 memcpy((*encoded_name),
2157 ECRYPTFS_FNEK_ENCRYPTED_FILENAME_PREFIX,
2158 ECRYPTFS_FNEK_ENCRYPTED_FILENAME_PREFIX_SIZE);
2159 ecryptfs_encode_for_filename(
2160 ((*encoded_name)
2161 + ECRYPTFS_FNEK_ENCRYPTED_FILENAME_PREFIX_SIZE),
2162 &encoded_name_no_prefix_size,
2163 filename->encrypted_filename,
2164 filename->encrypted_filename_size);
2165 (*encoded_name_size) =
2166 (ECRYPTFS_FNEK_ENCRYPTED_FILENAME_PREFIX_SIZE
2167 + encoded_name_no_prefix_size);
2168 (*encoded_name)[(*encoded_name_size)] = '\0';
2169 (*encoded_name_size)++;
2170 } else {
2171 rc = -ENOTSUPP;
2172 }
2173 if (rc) {
2174 printk(KERN_ERR "%s: Error attempting to encode "
2175 "encrypted filename; rc = [%d]\n", __func__,
2176 rc);
2177 kfree((*encoded_name));
2178 (*encoded_name) = NULL;
2179 (*encoded_name_size) = 0;
2180 }
2181 kfree(filename->encrypted_filename);
2182 kfree(filename);
2183 } else {
2184 rc = ecryptfs_copy_filename(encoded_name,
2185 encoded_name_size,
2186 name, name_size);
2187 }
2188out:
2189 return rc;
2190}
2191
2192/**
2193 * ecryptfs_decode_and_decrypt_filename - converts the encoded cipher text name to decoded plaintext
2194 * @plaintext_name: The plaintext name
2195 * @plaintext_name_size: The plaintext name size
2196 * @ecryptfs_dir_dentry: eCryptfs directory dentry
2197 * @name: The filename in cipher text
2198 * @name_size: The cipher text name size
2199 *
2200 * Decrypts and decodes the filename.
2201 *
2202 * Returns zero on error; non-zero otherwise
2203 */
2204int ecryptfs_decode_and_decrypt_filename(char **plaintext_name,
2205 size_t *plaintext_name_size,
2206 struct dentry *ecryptfs_dir_dentry,
2207 const char *name, size_t name_size)
2208{
2209 char *decoded_name;
2210 size_t decoded_name_size;
2211 size_t packet_size;
2212 int rc = 0;
2213
2214 if ((name_size > ECRYPTFS_FNEK_ENCRYPTED_FILENAME_PREFIX_SIZE)
2215 && (strncmp(name, ECRYPTFS_FNEK_ENCRYPTED_FILENAME_PREFIX,
2216 ECRYPTFS_FNEK_ENCRYPTED_FILENAME_PREFIX_SIZE) == 0)) {
2217 struct ecryptfs_mount_crypt_stat *mount_crypt_stat =
2218 &ecryptfs_superblock_to_private(
2219 ecryptfs_dir_dentry->d_sb)->mount_crypt_stat;
2220 const char *orig_name = name;
2221 size_t orig_name_size = name_size;
2222
2223 name += ECRYPTFS_FNEK_ENCRYPTED_FILENAME_PREFIX_SIZE;
2224 name_size -= ECRYPTFS_FNEK_ENCRYPTED_FILENAME_PREFIX_SIZE;
2225 ecryptfs_decode_from_filename(NULL, &decoded_name_size,
2226 name, name_size);
2227 decoded_name = kmalloc(decoded_name_size, GFP_KERNEL);
2228 if (!decoded_name) {
2229 printk(KERN_ERR "%s: Out of memory whilst attempting "
2230 "to kmalloc [%zd] bytes\n", __func__,
2231 decoded_name_size);
2232 rc = -ENOMEM;
2233 goto out;
2234 }
2235 ecryptfs_decode_from_filename(decoded_name, &decoded_name_size,
2236 name, name_size);
2237 rc = ecryptfs_parse_tag_70_packet(plaintext_name,
2238 plaintext_name_size,
2239 &packet_size,
2240 mount_crypt_stat,
2241 decoded_name,
2242 decoded_name_size);
2243 if (rc) {
2244 printk(KERN_INFO "%s: Could not parse tag 70 packet "
2245 "from filename; copying through filename "
2246 "as-is\n", __func__);
2247 rc = ecryptfs_copy_filename(plaintext_name,
2248 plaintext_name_size,
2249 orig_name, orig_name_size);
2250 goto out_free;
2251 }
2252 } else {
2253 rc = ecryptfs_copy_filename(plaintext_name,
2254 plaintext_name_size,
2255 name, name_size);
2256 goto out;
2257 }
2258out_free:
2259 kfree(decoded_name);
2260out:
2261 return rc;
2262}
diff --git a/fs/ecryptfs/ecryptfs_kernel.h b/fs/ecryptfs/ecryptfs_kernel.h
index a75026d35d16..c11fc95714ab 100644
--- a/fs/ecryptfs/ecryptfs_kernel.h
+++ b/fs/ecryptfs/ecryptfs_kernel.h
@@ -51,12 +51,16 @@
51#define ECRYPTFS_VERSIONING_XATTR 0x00000010 51#define ECRYPTFS_VERSIONING_XATTR 0x00000010
52#define ECRYPTFS_VERSIONING_MULTKEY 0x00000020 52#define ECRYPTFS_VERSIONING_MULTKEY 0x00000020
53#define ECRYPTFS_VERSIONING_DEVMISC 0x00000040 53#define ECRYPTFS_VERSIONING_DEVMISC 0x00000040
54#define ECRYPTFS_VERSIONING_HMAC 0x00000080
55#define ECRYPTFS_VERSIONING_FILENAME_ENCRYPTION 0x00000100
56#define ECRYPTFS_VERSIONING_GCM 0x00000200
54#define ECRYPTFS_VERSIONING_MASK (ECRYPTFS_VERSIONING_PASSPHRASE \ 57#define ECRYPTFS_VERSIONING_MASK (ECRYPTFS_VERSIONING_PASSPHRASE \
55 | ECRYPTFS_VERSIONING_PLAINTEXT_PASSTHROUGH \ 58 | ECRYPTFS_VERSIONING_PLAINTEXT_PASSTHROUGH \
56 | ECRYPTFS_VERSIONING_PUBKEY \ 59 | ECRYPTFS_VERSIONING_PUBKEY \
57 | ECRYPTFS_VERSIONING_XATTR \ 60 | ECRYPTFS_VERSIONING_XATTR \
58 | ECRYPTFS_VERSIONING_MULTKEY \ 61 | ECRYPTFS_VERSIONING_MULTKEY \
59 | ECRYPTFS_VERSIONING_DEVMISC) 62 | ECRYPTFS_VERSIONING_DEVMISC \
63 | ECRYPTFS_VERSIONING_FILENAME_ENCRYPTION)
60#define ECRYPTFS_MAX_PASSWORD_LENGTH 64 64#define ECRYPTFS_MAX_PASSWORD_LENGTH 64
61#define ECRYPTFS_MAX_PASSPHRASE_BYTES ECRYPTFS_MAX_PASSWORD_LENGTH 65#define ECRYPTFS_MAX_PASSPHRASE_BYTES ECRYPTFS_MAX_PASSWORD_LENGTH
62#define ECRYPTFS_SALT_SIZE 8 66#define ECRYPTFS_SALT_SIZE 8
@@ -199,6 +203,7 @@ ecryptfs_get_key_payload_data(struct key *key)
199#define ECRYPTFS_DEFAULT_CIPHER "aes" 203#define ECRYPTFS_DEFAULT_CIPHER "aes"
200#define ECRYPTFS_DEFAULT_KEY_BYTES 16 204#define ECRYPTFS_DEFAULT_KEY_BYTES 16
201#define ECRYPTFS_DEFAULT_HASH "md5" 205#define ECRYPTFS_DEFAULT_HASH "md5"
206#define ECRYPTFS_TAG_70_DIGEST ECRYPTFS_DEFAULT_HASH
202#define ECRYPTFS_TAG_1_PACKET_TYPE 0x01 207#define ECRYPTFS_TAG_1_PACKET_TYPE 0x01
203#define ECRYPTFS_TAG_3_PACKET_TYPE 0x8C 208#define ECRYPTFS_TAG_3_PACKET_TYPE 0x8C
204#define ECRYPTFS_TAG_11_PACKET_TYPE 0xED 209#define ECRYPTFS_TAG_11_PACKET_TYPE 0xED
@@ -206,30 +211,64 @@ ecryptfs_get_key_payload_data(struct key *key)
206#define ECRYPTFS_TAG_65_PACKET_TYPE 0x41 211#define ECRYPTFS_TAG_65_PACKET_TYPE 0x41
207#define ECRYPTFS_TAG_66_PACKET_TYPE 0x42 212#define ECRYPTFS_TAG_66_PACKET_TYPE 0x42
208#define ECRYPTFS_TAG_67_PACKET_TYPE 0x43 213#define ECRYPTFS_TAG_67_PACKET_TYPE 0x43
214#define ECRYPTFS_TAG_70_PACKET_TYPE 0x46 /* FNEK-encrypted filename
215 * as dentry name */
216#define ECRYPTFS_TAG_71_PACKET_TYPE 0x47 /* FNEK-encrypted filename in
217 * metadata */
218#define ECRYPTFS_TAG_72_PACKET_TYPE 0x48 /* FEK-encrypted filename as
219 * dentry name */
220#define ECRYPTFS_TAG_73_PACKET_TYPE 0x49 /* FEK-encrypted filename as
221 * metadata */
222/* Constraint: ECRYPTFS_FILENAME_MIN_RANDOM_PREPEND_BYTES >=
223 * ECRYPTFS_MAX_IV_BYTES */
224#define ECRYPTFS_FILENAME_MIN_RANDOM_PREPEND_BYTES 16
225#define ECRYPTFS_NON_NULL 0x42 /* A reasonable substitute for NULL */
209#define MD5_DIGEST_SIZE 16 226#define MD5_DIGEST_SIZE 16
227#define ECRYPTFS_TAG_70_DIGEST_SIZE MD5_DIGEST_SIZE
228#define ECRYPTFS_FEK_ENCRYPTED_FILENAME_PREFIX "ECRYPTFS_FEK_ENCRYPTED."
229#define ECRYPTFS_FEK_ENCRYPTED_FILENAME_PREFIX_SIZE 23
230#define ECRYPTFS_FNEK_ENCRYPTED_FILENAME_PREFIX "ECRYPTFS_FNEK_ENCRYPTED."
231#define ECRYPTFS_FNEK_ENCRYPTED_FILENAME_PREFIX_SIZE 24
232#define ECRYPTFS_ENCRYPTED_DENTRY_NAME_LEN (18 + 1 + 4 + 1 + 32)
210 233
211struct ecryptfs_key_sig { 234struct ecryptfs_key_sig {
212 struct list_head crypt_stat_list; 235 struct list_head crypt_stat_list;
213 char keysig[ECRYPTFS_SIG_SIZE_HEX]; 236 char keysig[ECRYPTFS_SIG_SIZE_HEX];
214}; 237};
215 238
239struct ecryptfs_filename {
240 struct list_head crypt_stat_list;
241#define ECRYPTFS_FILENAME_CONTAINS_DECRYPTED 0x00000001
242 u32 flags;
243 u32 seq_no;
244 char *filename;
245 char *encrypted_filename;
246 size_t filename_size;
247 size_t encrypted_filename_size;
248 char fnek_sig[ECRYPTFS_SIG_SIZE_HEX];
249 char dentry_name[ECRYPTFS_ENCRYPTED_DENTRY_NAME_LEN + 1];
250};
251
216/** 252/**
217 * This is the primary struct associated with each encrypted file. 253 * This is the primary struct associated with each encrypted file.
218 * 254 *
219 * TODO: cache align/pack? 255 * TODO: cache align/pack?
220 */ 256 */
221struct ecryptfs_crypt_stat { 257struct ecryptfs_crypt_stat {
222#define ECRYPTFS_STRUCT_INITIALIZED 0x00000001 258#define ECRYPTFS_STRUCT_INITIALIZED 0x00000001
223#define ECRYPTFS_POLICY_APPLIED 0x00000002 259#define ECRYPTFS_POLICY_APPLIED 0x00000002
224#define ECRYPTFS_NEW_FILE 0x00000004 260#define ECRYPTFS_NEW_FILE 0x00000004
225#define ECRYPTFS_ENCRYPTED 0x00000008 261#define ECRYPTFS_ENCRYPTED 0x00000008
226#define ECRYPTFS_SECURITY_WARNING 0x00000010 262#define ECRYPTFS_SECURITY_WARNING 0x00000010
227#define ECRYPTFS_ENABLE_HMAC 0x00000020 263#define ECRYPTFS_ENABLE_HMAC 0x00000020
228#define ECRYPTFS_ENCRYPT_IV_PAGES 0x00000040 264#define ECRYPTFS_ENCRYPT_IV_PAGES 0x00000040
229#define ECRYPTFS_KEY_VALID 0x00000080 265#define ECRYPTFS_KEY_VALID 0x00000080
230#define ECRYPTFS_METADATA_IN_XATTR 0x00000100 266#define ECRYPTFS_METADATA_IN_XATTR 0x00000100
231#define ECRYPTFS_VIEW_AS_ENCRYPTED 0x00000200 267#define ECRYPTFS_VIEW_AS_ENCRYPTED 0x00000200
232#define ECRYPTFS_KEY_SET 0x00000400 268#define ECRYPTFS_KEY_SET 0x00000400
269#define ECRYPTFS_ENCRYPT_FILENAMES 0x00000800
270#define ECRYPTFS_ENCFN_USE_MOUNT_FNEK 0x00001000
271#define ECRYPTFS_ENCFN_USE_FEK 0x00002000
233 u32 flags; 272 u32 flags;
234 unsigned int file_version; 273 unsigned int file_version;
235 size_t iv_bytes; 274 size_t iv_bytes;
@@ -332,13 +371,20 @@ struct ecryptfs_mount_crypt_stat {
332#define ECRYPTFS_XATTR_METADATA_ENABLED 0x00000002 371#define ECRYPTFS_XATTR_METADATA_ENABLED 0x00000002
333#define ECRYPTFS_ENCRYPTED_VIEW_ENABLED 0x00000004 372#define ECRYPTFS_ENCRYPTED_VIEW_ENABLED 0x00000004
334#define ECRYPTFS_MOUNT_CRYPT_STAT_INITIALIZED 0x00000008 373#define ECRYPTFS_MOUNT_CRYPT_STAT_INITIALIZED 0x00000008
374#define ECRYPTFS_GLOBAL_ENCRYPT_FILENAMES 0x00000010
375#define ECRYPTFS_GLOBAL_ENCFN_USE_MOUNT_FNEK 0x00000020
376#define ECRYPTFS_GLOBAL_ENCFN_USE_FEK 0x00000040
335 u32 flags; 377 u32 flags;
336 struct list_head global_auth_tok_list; 378 struct list_head global_auth_tok_list;
337 struct mutex global_auth_tok_list_mutex; 379 struct mutex global_auth_tok_list_mutex;
338 size_t num_global_auth_toks; 380 size_t num_global_auth_toks;
339 size_t global_default_cipher_key_size; 381 size_t global_default_cipher_key_size;
382 size_t global_default_fn_cipher_key_bytes;
340 unsigned char global_default_cipher_name[ECRYPTFS_MAX_CIPHER_NAME_SIZE 383 unsigned char global_default_cipher_name[ECRYPTFS_MAX_CIPHER_NAME_SIZE
341 + 1]; 384 + 1];
385 unsigned char global_default_fn_cipher_name[
386 ECRYPTFS_MAX_CIPHER_NAME_SIZE + 1];
387 char global_default_fnek_sig[ECRYPTFS_SIG_SIZE_HEX + 1];
342}; 388};
343 389
344/* superblock private data. */ 390/* superblock private data. */
@@ -571,13 +617,22 @@ struct ecryptfs_open_req {
571int ecryptfs_interpose(struct dentry *hidden_dentry, 617int ecryptfs_interpose(struct dentry *hidden_dentry,
572 struct dentry *this_dentry, struct super_block *sb, 618 struct dentry *this_dentry, struct super_block *sb,
573 u32 flags); 619 u32 flags);
620int ecryptfs_lookup_and_interpose_lower(struct dentry *ecryptfs_dentry,
621 struct dentry *lower_dentry,
622 struct ecryptfs_crypt_stat *crypt_stat,
623 struct inode *ecryptfs_dir_inode,
624 struct nameidata *ecryptfs_nd);
625int ecryptfs_decode_and_decrypt_filename(char **decrypted_name,
626 size_t *decrypted_name_size,
627 struct dentry *ecryptfs_dentry,
628 const char *name, size_t name_size);
574int ecryptfs_fill_zeros(struct file *file, loff_t new_length); 629int ecryptfs_fill_zeros(struct file *file, loff_t new_length);
575int ecryptfs_decode_filename(struct ecryptfs_crypt_stat *crypt_stat, 630int ecryptfs_encrypt_and_encode_filename(
576 const char *name, int length, 631 char **encoded_name,
577 char **decrypted_name); 632 size_t *encoded_name_size,
578int ecryptfs_encode_filename(struct ecryptfs_crypt_stat *crypt_stat, 633 struct ecryptfs_crypt_stat *crypt_stat,
579 const char *name, int length, 634 struct ecryptfs_mount_crypt_stat *mount_crypt_stat,
580 char **encoded_name); 635 const char *name, size_t name_size);
581struct dentry *ecryptfs_lower_dentry(struct dentry *this_dentry); 636struct dentry *ecryptfs_lower_dentry(struct dentry *this_dentry);
582void ecryptfs_dump_hex(char *data, int bytes); 637void ecryptfs_dump_hex(char *data, int bytes);
583int virt_to_scatterlist(const void *addr, int size, struct scatterlist *sg, 638int virt_to_scatterlist(const void *addr, int size, struct scatterlist *sg,
@@ -599,7 +654,7 @@ int ecryptfs_read_and_validate_header_region(char *data,
599 struct inode *ecryptfs_inode); 654 struct inode *ecryptfs_inode);
600int ecryptfs_read_and_validate_xattr_region(char *page_virt, 655int ecryptfs_read_and_validate_xattr_region(char *page_virt,
601 struct dentry *ecryptfs_dentry); 656 struct dentry *ecryptfs_dentry);
602u8 ecryptfs_code_for_cipher_string(struct ecryptfs_crypt_stat *crypt_stat); 657u8 ecryptfs_code_for_cipher_string(char *cipher_name, size_t key_bytes);
603int ecryptfs_cipher_code_to_string(char *str, u8 cipher_code); 658int ecryptfs_cipher_code_to_string(char *str, u8 cipher_code);
604void ecryptfs_set_default_sizes(struct ecryptfs_crypt_stat *crypt_stat); 659void ecryptfs_set_default_sizes(struct ecryptfs_crypt_stat *crypt_stat);
605int ecryptfs_generate_key_packet_set(char *dest_base, 660int ecryptfs_generate_key_packet_set(char *dest_base,
@@ -694,5 +749,17 @@ int ecryptfs_privileged_open(struct file **lower_file,
694 struct vfsmount *lower_mnt, 749 struct vfsmount *lower_mnt,
695 const struct cred *cred); 750 const struct cred *cred);
696int ecryptfs_init_persistent_file(struct dentry *ecryptfs_dentry); 751int ecryptfs_init_persistent_file(struct dentry *ecryptfs_dentry);
752int
753ecryptfs_write_tag_70_packet(char *dest, size_t *remaining_bytes,
754 size_t *packet_size,
755 struct ecryptfs_mount_crypt_stat *mount_crypt_stat,
756 char *filename, size_t filename_size);
757int
758ecryptfs_parse_tag_70_packet(char **filename, size_t *filename_size,
759 size_t *packet_size,
760 struct ecryptfs_mount_crypt_stat *mount_crypt_stat,
761 char *data, size_t max_packet_size);
762int ecryptfs_derive_iv(char *iv, struct ecryptfs_crypt_stat *crypt_stat,
763 loff_t offset);
697 764
698#endif /* #ifndef ECRYPTFS_KERNEL_H */ 765#endif /* #ifndef ECRYPTFS_KERNEL_H */
diff --git a/fs/ecryptfs/file.c b/fs/ecryptfs/file.c
index 713834371229..9e944057001b 100644
--- a/fs/ecryptfs/file.c
+++ b/fs/ecryptfs/file.c
@@ -77,27 +77,27 @@ struct ecryptfs_getdents_callback {
77 77
78/* Inspired by generic filldir in fs/readdir.c */ 78/* Inspired by generic filldir in fs/readdir.c */
79static int 79static int
80ecryptfs_filldir(void *dirent, const char *name, int namelen, loff_t offset, 80ecryptfs_filldir(void *dirent, const char *lower_name, int lower_namelen,
81 u64 ino, unsigned int d_type) 81 loff_t offset, u64 ino, unsigned int d_type)
82{ 82{
83 struct ecryptfs_crypt_stat *crypt_stat;
84 struct ecryptfs_getdents_callback *buf = 83 struct ecryptfs_getdents_callback *buf =
85 (struct ecryptfs_getdents_callback *)dirent; 84 (struct ecryptfs_getdents_callback *)dirent;
85 size_t name_size;
86 char *name;
86 int rc; 87 int rc;
87 int decoded_length;
88 char *decoded_name;
89 88
90 crypt_stat = ecryptfs_dentry_to_private(buf->dentry)->crypt_stat;
91 buf->filldir_called++; 89 buf->filldir_called++;
92 decoded_length = ecryptfs_decode_filename(crypt_stat, name, namelen, 90 rc = ecryptfs_decode_and_decrypt_filename(&name, &name_size,
93 &decoded_name); 91 buf->dentry, lower_name,
94 if (decoded_length < 0) { 92 lower_namelen);
95 rc = decoded_length; 93 if (rc) {
94 printk(KERN_ERR "%s: Error attempting to decode and decrypt "
95 "filename [%s]; rc = [%d]\n", __func__, lower_name,
96 rc);
96 goto out; 97 goto out;
97 } 98 }
98 rc = buf->filldir(buf->dirent, decoded_name, decoded_length, offset, 99 rc = buf->filldir(buf->dirent, name, name_size, offset, ino, d_type);
99 ino, d_type); 100 kfree(name);
100 kfree(decoded_name);
101 if (rc >= 0) 101 if (rc >= 0)
102 buf->entries_written++; 102 buf->entries_written++;
103out: 103out:
@@ -106,8 +106,8 @@ out:
106 106
107/** 107/**
108 * ecryptfs_readdir 108 * ecryptfs_readdir
109 * @file: The ecryptfs file struct 109 * @file: The eCryptfs directory file
110 * @dirent: Directory entry 110 * @dirent: Directory entry handle
111 * @filldir: The filldir callback function 111 * @filldir: The filldir callback function
112 */ 112 */
113static int ecryptfs_readdir(struct file *file, void *dirent, filldir_t filldir) 113static int ecryptfs_readdir(struct file *file, void *dirent, filldir_t filldir)
diff --git a/fs/ecryptfs/inode.c b/fs/ecryptfs/inode.c
index 0111906a8877..5697899a168d 100644
--- a/fs/ecryptfs/inode.c
+++ b/fs/ecryptfs/inode.c
@@ -52,8 +52,7 @@ static void unlock_dir(struct dentry *dir)
52/** 52/**
53 * ecryptfs_create_underlying_file 53 * ecryptfs_create_underlying_file
54 * @lower_dir_inode: inode of the parent in the lower fs of the new file 54 * @lower_dir_inode: inode of the parent in the lower fs of the new file
55 * @lower_dentry: New file's dentry in the lower fs 55 * @dentry: New file's dentry
56 * @ecryptfs_dentry: New file's dentry in ecryptfs
57 * @mode: The mode of the new file 56 * @mode: The mode of the new file
58 * @nd: nameidata of ecryptfs' parent's dentry & vfsmount 57 * @nd: nameidata of ecryptfs' parent's dentry & vfsmount
59 * 58 *
@@ -228,8 +227,7 @@ ecryptfs_create(struct inode *directory_inode, struct dentry *ecryptfs_dentry,
228{ 227{
229 int rc; 228 int rc;
230 229
231 /* ecryptfs_do_create() calls ecryptfs_interpose(), which opens 230 /* ecryptfs_do_create() calls ecryptfs_interpose() */
232 * the crypt_stat->lower_file (persistent file) */
233 rc = ecryptfs_do_create(directory_inode, ecryptfs_dentry, mode, nd); 231 rc = ecryptfs_do_create(directory_inode, ecryptfs_dentry, mode, nd);
234 if (unlikely(rc)) { 232 if (unlikely(rc)) {
235 ecryptfs_printk(KERN_WARNING, "Failed to create file in" 233 ecryptfs_printk(KERN_WARNING, "Failed to create file in"
@@ -244,141 +242,91 @@ out:
244} 242}
245 243
246/** 244/**
247 * ecryptfs_lookup 245 * ecryptfs_lookup_and_interpose_lower - Perform a lookup
248 * @dir: inode
249 * @dentry: The dentry
250 * @nd: nameidata, may be NULL
251 *
252 * Find a file on disk. If the file does not exist, then we'll add it to the
253 * dentry cache and continue on to read it from the disk.
254 */ 246 */
255static struct dentry *ecryptfs_lookup(struct inode *dir, struct dentry *dentry, 247int ecryptfs_lookup_and_interpose_lower(struct dentry *ecryptfs_dentry,
256 struct nameidata *nd) 248 struct dentry *lower_dentry,
249 struct ecryptfs_crypt_stat *crypt_stat,
250 struct inode *ecryptfs_dir_inode,
251 struct nameidata *ecryptfs_nd)
257{ 252{
258 int rc = 0;
259 struct dentry *lower_dir_dentry; 253 struct dentry *lower_dir_dentry;
260 struct dentry *lower_dentry;
261 struct vfsmount *lower_mnt; 254 struct vfsmount *lower_mnt;
262 char *encoded_name; 255 struct inode *lower_inode;
263 int encoded_namelen;
264 struct ecryptfs_crypt_stat *crypt_stat = NULL;
265 struct ecryptfs_mount_crypt_stat *mount_crypt_stat; 256 struct ecryptfs_mount_crypt_stat *mount_crypt_stat;
266 char *page_virt = NULL; 257 char *page_virt = NULL;
267 struct inode *lower_inode;
268 u64 file_size; 258 u64 file_size;
259 int rc = 0;
269 260
270 lower_dir_dentry = ecryptfs_dentry_to_lower(dentry->d_parent); 261 lower_dir_dentry = lower_dentry->d_parent;
271 dentry->d_op = &ecryptfs_dops; 262 lower_mnt = mntget(ecryptfs_dentry_to_lower_mnt(
272 if ((dentry->d_name.len == 1 && !strcmp(dentry->d_name.name, ".")) 263 ecryptfs_dentry->d_parent));
273 || (dentry->d_name.len == 2
274 && !strcmp(dentry->d_name.name, ".."))) {
275 d_drop(dentry);
276 goto out;
277 }
278 encoded_namelen = ecryptfs_encode_filename(crypt_stat,
279 dentry->d_name.name,
280 dentry->d_name.len,
281 &encoded_name);
282 if (encoded_namelen < 0) {
283 rc = encoded_namelen;
284 d_drop(dentry);
285 goto out;
286 }
287 ecryptfs_printk(KERN_DEBUG, "encoded_name = [%s]; encoded_namelen "
288 "= [%d]\n", encoded_name, encoded_namelen);
289 lower_dentry = lookup_one_len(encoded_name, lower_dir_dentry,
290 encoded_namelen - 1);
291 kfree(encoded_name);
292 if (IS_ERR(lower_dentry)) {
293 ecryptfs_printk(KERN_ERR, "ERR from lower_dentry\n");
294 rc = PTR_ERR(lower_dentry);
295 d_drop(dentry);
296 goto out;
297 }
298 lower_mnt = mntget(ecryptfs_dentry_to_lower_mnt(dentry->d_parent));
299 ecryptfs_printk(KERN_DEBUG, "lower_dentry = [%p]; lower_dentry->"
300 "d_name.name = [%s]\n", lower_dentry,
301 lower_dentry->d_name.name);
302 lower_inode = lower_dentry->d_inode; 264 lower_inode = lower_dentry->d_inode;
303 fsstack_copy_attr_atime(dir, lower_dir_dentry->d_inode); 265 fsstack_copy_attr_atime(ecryptfs_dir_inode, lower_dir_dentry->d_inode);
304 BUG_ON(!atomic_read(&lower_dentry->d_count)); 266 BUG_ON(!atomic_read(&lower_dentry->d_count));
305 ecryptfs_set_dentry_private(dentry, 267 ecryptfs_set_dentry_private(ecryptfs_dentry,
306 kmem_cache_alloc(ecryptfs_dentry_info_cache, 268 kmem_cache_alloc(ecryptfs_dentry_info_cache,
307 GFP_KERNEL)); 269 GFP_KERNEL));
308 if (!ecryptfs_dentry_to_private(dentry)) { 270 if (!ecryptfs_dentry_to_private(ecryptfs_dentry)) {
309 rc = -ENOMEM; 271 rc = -ENOMEM;
310 ecryptfs_printk(KERN_ERR, "Out of memory whilst attempting " 272 printk(KERN_ERR "%s: Out of memory whilst attempting "
311 "to allocate ecryptfs_dentry_info struct\n"); 273 "to allocate ecryptfs_dentry_info struct\n",
274 __func__);
312 goto out_dput; 275 goto out_dput;
313 } 276 }
314 ecryptfs_set_dentry_lower(dentry, lower_dentry); 277 ecryptfs_set_dentry_lower(ecryptfs_dentry, lower_dentry);
315 ecryptfs_set_dentry_lower_mnt(dentry, lower_mnt); 278 ecryptfs_set_dentry_lower_mnt(ecryptfs_dentry, lower_mnt);
316 if (!lower_dentry->d_inode) { 279 if (!lower_dentry->d_inode) {
317 /* We want to add because we couldn't find in lower */ 280 /* We want to add because we couldn't find in lower */
318 d_add(dentry, NULL); 281 d_add(ecryptfs_dentry, NULL);
319 goto out; 282 goto out;
320 } 283 }
321 rc = ecryptfs_interpose(lower_dentry, dentry, dir->i_sb, 284 rc = ecryptfs_interpose(lower_dentry, ecryptfs_dentry,
322 ECRYPTFS_INTERPOSE_FLAG_D_ADD); 285 ecryptfs_dir_inode->i_sb, 1);
323 if (rc) { 286 if (rc) {
324 ecryptfs_printk(KERN_ERR, "Error interposing\n"); 287 printk(KERN_ERR "%s: Error interposing; rc = [%d]\n",
288 __func__, rc);
325 goto out; 289 goto out;
326 } 290 }
327 if (S_ISDIR(lower_inode->i_mode)) { 291 if (S_ISDIR(lower_inode->i_mode))
328 ecryptfs_printk(KERN_DEBUG, "Is a directory; returning\n");
329 goto out; 292 goto out;
330 } 293 if (S_ISLNK(lower_inode->i_mode))
331 if (S_ISLNK(lower_inode->i_mode)) {
332 ecryptfs_printk(KERN_DEBUG, "Is a symlink; returning\n");
333 goto out; 294 goto out;
334 } 295 if (special_file(lower_inode->i_mode))
335 if (special_file(lower_inode->i_mode)) {
336 ecryptfs_printk(KERN_DEBUG, "Is a special file; returning\n");
337 goto out; 296 goto out;
338 } 297 if (!ecryptfs_nd)
339 if (!nd) {
340 ecryptfs_printk(KERN_DEBUG, "We have a NULL nd, just leave"
341 "as we *think* we are about to unlink\n");
342 goto out; 298 goto out;
343 }
344 /* Released in this function */ 299 /* Released in this function */
345 page_virt = kmem_cache_zalloc(ecryptfs_header_cache_2, 300 page_virt = kmem_cache_zalloc(ecryptfs_header_cache_2, GFP_USER);
346 GFP_USER);
347 if (!page_virt) { 301 if (!page_virt) {
302 printk(KERN_ERR "%s: Cannot kmem_cache_zalloc() a page\n",
303 __func__);
348 rc = -ENOMEM; 304 rc = -ENOMEM;
349 ecryptfs_printk(KERN_ERR,
350 "Cannot ecryptfs_kmalloc a page\n");
351 goto out; 305 goto out;
352 } 306 }
353 crypt_stat = &ecryptfs_inode_to_private(dentry->d_inode)->crypt_stat; 307 if (!ecryptfs_inode_to_private(ecryptfs_dentry->d_inode)->lower_file) {
354 if (!(crypt_stat->flags & ECRYPTFS_POLICY_APPLIED)) 308 rc = ecryptfs_init_persistent_file(ecryptfs_dentry);
355 ecryptfs_set_default_sizes(crypt_stat);
356 if (!ecryptfs_inode_to_private(dentry->d_inode)->lower_file) {
357 rc = ecryptfs_init_persistent_file(dentry);
358 if (rc) { 309 if (rc) {
359 printk(KERN_ERR "%s: Error attempting to initialize " 310 printk(KERN_ERR "%s: Error attempting to initialize "
360 "the persistent file for the dentry with name " 311 "the persistent file for the dentry with name "
361 "[%s]; rc = [%d]\n", __func__, 312 "[%s]; rc = [%d]\n", __func__,
362 dentry->d_name.name, rc); 313 ecryptfs_dentry->d_name.name, rc);
363 goto out; 314 goto out_free_kmem;
364 } 315 }
365 } 316 }
366 rc = ecryptfs_read_and_validate_header_region(page_virt, 317 rc = ecryptfs_read_and_validate_header_region(page_virt,
367 dentry->d_inode); 318 ecryptfs_dentry->d_inode);
368 if (rc) { 319 if (rc) {
369 rc = ecryptfs_read_and_validate_xattr_region(page_virt, dentry); 320 rc = ecryptfs_read_and_validate_xattr_region(page_virt,
321 ecryptfs_dentry);
370 if (rc) { 322 if (rc) {
371 printk(KERN_DEBUG "Valid metadata not found in header "
372 "region or xattr region; treating file as "
373 "unencrypted\n");
374 rc = 0; 323 rc = 0;
375 kmem_cache_free(ecryptfs_header_cache_2, page_virt); 324 goto out_free_kmem;
376 goto out;
377 } 325 }
378 crypt_stat->flags |= ECRYPTFS_METADATA_IN_XATTR; 326 crypt_stat->flags |= ECRYPTFS_METADATA_IN_XATTR;
379 } 327 }
380 mount_crypt_stat = &ecryptfs_superblock_to_private( 328 mount_crypt_stat = &ecryptfs_superblock_to_private(
381 dentry->d_sb)->mount_crypt_stat; 329 ecryptfs_dentry->d_sb)->mount_crypt_stat;
382 if (mount_crypt_stat->flags & ECRYPTFS_ENCRYPTED_VIEW_ENABLED) { 330 if (mount_crypt_stat->flags & ECRYPTFS_ENCRYPTED_VIEW_ENABLED) {
383 if (crypt_stat->flags & ECRYPTFS_METADATA_IN_XATTR) 331 if (crypt_stat->flags & ECRYPTFS_METADATA_IN_XATTR)
384 file_size = (crypt_stat->num_header_bytes_at_front 332 file_size = (crypt_stat->num_header_bytes_at_front
@@ -388,14 +336,103 @@ static struct dentry *ecryptfs_lookup(struct inode *dir, struct dentry *dentry,
388 } else { 336 } else {
389 file_size = get_unaligned_be64(page_virt); 337 file_size = get_unaligned_be64(page_virt);
390 } 338 }
391 i_size_write(dentry->d_inode, (loff_t)file_size); 339 i_size_write(ecryptfs_dentry->d_inode, (loff_t)file_size);
340out_free_kmem:
392 kmem_cache_free(ecryptfs_header_cache_2, page_virt); 341 kmem_cache_free(ecryptfs_header_cache_2, page_virt);
393 goto out; 342 goto out;
394
395out_dput: 343out_dput:
396 dput(lower_dentry); 344 dput(lower_dentry);
397 d_drop(dentry); 345 d_drop(ecryptfs_dentry);
346out:
347 return rc;
348}
349
350/**
351 * ecryptfs_lookup
352 * @ecryptfs_dir_inode: The eCryptfs directory inode
353 * @ecryptfs_dentry: The eCryptfs dentry that we are looking up
354 * @ecryptfs_nd: nameidata; may be NULL
355 *
356 * Find a file on disk. If the file does not exist, then we'll add it to the
357 * dentry cache and continue on to read it from the disk.
358 */
359static struct dentry *ecryptfs_lookup(struct inode *ecryptfs_dir_inode,
360 struct dentry *ecryptfs_dentry,
361 struct nameidata *ecryptfs_nd)
362{
363 char *encrypted_and_encoded_name = NULL;
364 size_t encrypted_and_encoded_name_size;
365 struct ecryptfs_crypt_stat *crypt_stat = NULL;
366 struct ecryptfs_mount_crypt_stat *mount_crypt_stat = NULL;
367 struct ecryptfs_inode_info *inode_info;
368 struct dentry *lower_dir_dentry, *lower_dentry;
369 int rc = 0;
370
371 ecryptfs_dentry->d_op = &ecryptfs_dops;
372 if ((ecryptfs_dentry->d_name.len == 1
373 && !strcmp(ecryptfs_dentry->d_name.name, "."))
374 || (ecryptfs_dentry->d_name.len == 2
375 && !strcmp(ecryptfs_dentry->d_name.name, ".."))) {
376 goto out_d_drop;
377 }
378 lower_dir_dentry = ecryptfs_dentry_to_lower(ecryptfs_dentry->d_parent);
379 lower_dentry = lookup_one_len(ecryptfs_dentry->d_name.name,
380 lower_dir_dentry,
381 ecryptfs_dentry->d_name.len);
382 if (IS_ERR(lower_dentry)) {
383 rc = PTR_ERR(lower_dentry);
384 printk(KERN_ERR "%s: lookup_one_len() returned [%d] on "
385 "lower_dentry = [%s]\n", __func__, rc,
386 ecryptfs_dentry->d_name.name);
387 goto out_d_drop;
388 }
389 if (lower_dentry->d_inode)
390 goto lookup_and_interpose;
391 inode_info = ecryptfs_inode_to_private(ecryptfs_dentry->d_inode);
392 if (inode_info) {
393 crypt_stat = &inode_info->crypt_stat;
394 /* TODO: lock for crypt_stat comparison */
395 if (!(crypt_stat->flags & ECRYPTFS_POLICY_APPLIED))
396 ecryptfs_set_default_sizes(crypt_stat);
397 }
398 if (crypt_stat)
399 mount_crypt_stat = crypt_stat->mount_crypt_stat;
400 else
401 mount_crypt_stat = &ecryptfs_superblock_to_private(
402 ecryptfs_dentry->d_sb)->mount_crypt_stat;
403 if (!(crypt_stat && (crypt_stat->flags & ECRYPTFS_ENCRYPT_FILENAMES))
404 && !(mount_crypt_stat && (mount_crypt_stat->flags
405 & ECRYPTFS_GLOBAL_ENCRYPT_FILENAMES)))
406 goto lookup_and_interpose;
407 dput(lower_dentry);
408 rc = ecryptfs_encrypt_and_encode_filename(
409 &encrypted_and_encoded_name, &encrypted_and_encoded_name_size,
410 crypt_stat, mount_crypt_stat, ecryptfs_dentry->d_name.name,
411 ecryptfs_dentry->d_name.len);
412 if (rc) {
413 printk(KERN_ERR "%s: Error attempting to encrypt and encode "
414 "filename; rc = [%d]\n", __func__, rc);
415 goto out_d_drop;
416 }
417 lower_dentry = lookup_one_len(encrypted_and_encoded_name,
418 lower_dir_dentry,
419 encrypted_and_encoded_name_size - 1);
420 if (IS_ERR(lower_dentry)) {
421 rc = PTR_ERR(lower_dentry);
422 printk(KERN_ERR "%s: lookup_one_len() returned [%d] on "
423 "lower_dentry = [%s]\n", __func__, rc,
424 encrypted_and_encoded_name);
425 goto out_d_drop;
426 }
427lookup_and_interpose:
428 rc = ecryptfs_lookup_and_interpose_lower(ecryptfs_dentry, lower_dentry,
429 crypt_stat, ecryptfs_dir_inode,
430 ecryptfs_nd);
431 goto out;
432out_d_drop:
433 d_drop(ecryptfs_dentry);
398out: 434out:
435 kfree(encrypted_and_encoded_name);
399 return ERR_PTR(rc); 436 return ERR_PTR(rc);
400} 437}
401 438
@@ -466,19 +503,21 @@ static int ecryptfs_symlink(struct inode *dir, struct dentry *dentry,
466 struct dentry *lower_dentry; 503 struct dentry *lower_dentry;
467 struct dentry *lower_dir_dentry; 504 struct dentry *lower_dir_dentry;
468 char *encoded_symname; 505 char *encoded_symname;
469 int encoded_symlen; 506 size_t encoded_symlen;
470 struct ecryptfs_crypt_stat *crypt_stat = NULL; 507 struct ecryptfs_mount_crypt_stat *mount_crypt_stat = NULL;
471 508
472 lower_dentry = ecryptfs_dentry_to_lower(dentry); 509 lower_dentry = ecryptfs_dentry_to_lower(dentry);
473 dget(lower_dentry); 510 dget(lower_dentry);
474 lower_dir_dentry = lock_parent(lower_dentry); 511 lower_dir_dentry = lock_parent(lower_dentry);
475 encoded_symlen = ecryptfs_encode_filename(crypt_stat, symname, 512 mount_crypt_stat = &ecryptfs_superblock_to_private(
476 strlen(symname), 513 dir->i_sb)->mount_crypt_stat;
477 &encoded_symname); 514 rc = ecryptfs_encrypt_and_encode_filename(&encoded_symname,
478 if (encoded_symlen < 0) { 515 &encoded_symlen,
479 rc = encoded_symlen; 516 NULL,
517 mount_crypt_stat, symname,
518 strlen(symname));
519 if (rc)
480 goto out_lock; 520 goto out_lock;
481 }
482 rc = vfs_symlink(lower_dir_dentry->d_inode, lower_dentry, 521 rc = vfs_symlink(lower_dir_dentry->d_inode, lower_dentry,
483 encoded_symname); 522 encoded_symname);
484 kfree(encoded_symname); 523 kfree(encoded_symname);
@@ -602,52 +641,54 @@ out_lock:
602} 641}
603 642
604static int 643static int
605ecryptfs_readlink(struct dentry *dentry, char __user * buf, int bufsiz) 644ecryptfs_readlink(struct dentry *dentry, char __user *buf, int bufsiz)
606{ 645{
607 int rc;
608 struct dentry *lower_dentry;
609 char *decoded_name;
610 char *lower_buf; 646 char *lower_buf;
611 mm_segment_t old_fs; 647 struct dentry *lower_dentry;
612 struct ecryptfs_crypt_stat *crypt_stat; 648 struct ecryptfs_crypt_stat *crypt_stat;
649 char *plaintext_name;
650 size_t plaintext_name_size;
651 mm_segment_t old_fs;
652 int rc;
613 653
614 lower_dentry = ecryptfs_dentry_to_lower(dentry); 654 lower_dentry = ecryptfs_dentry_to_lower(dentry);
615 if (!lower_dentry->d_inode->i_op->readlink) { 655 if (!lower_dentry->d_inode->i_op->readlink) {
616 rc = -EINVAL; 656 rc = -EINVAL;
617 goto out; 657 goto out;
618 } 658 }
659 crypt_stat = &ecryptfs_inode_to_private(dentry->d_inode)->crypt_stat;
619 /* Released in this function */ 660 /* Released in this function */
620 lower_buf = kmalloc(bufsiz, GFP_KERNEL); 661 lower_buf = kmalloc(bufsiz, GFP_KERNEL);
621 if (lower_buf == NULL) { 662 if (lower_buf == NULL) {
622 ecryptfs_printk(KERN_ERR, "Out of memory\n"); 663 printk(KERN_ERR "%s: Out of memory whilst attempting to "
664 "kmalloc [%d] bytes\n", __func__, bufsiz);
623 rc = -ENOMEM; 665 rc = -ENOMEM;
624 goto out; 666 goto out;
625 } 667 }
626 old_fs = get_fs(); 668 old_fs = get_fs();
627 set_fs(get_ds()); 669 set_fs(get_ds());
628 ecryptfs_printk(KERN_DEBUG, "Calling readlink w/ "
629 "lower_dentry->d_name.name = [%s]\n",
630 lower_dentry->d_name.name);
631 rc = lower_dentry->d_inode->i_op->readlink(lower_dentry, 670 rc = lower_dentry->d_inode->i_op->readlink(lower_dentry,
632 (char __user *)lower_buf, 671 (char __user *)lower_buf,
633 bufsiz); 672 bufsiz);
634 set_fs(old_fs); 673 set_fs(old_fs);
635 if (rc >= 0) { 674 if (rc >= 0) {
636 crypt_stat = NULL; 675 rc = ecryptfs_decode_and_decrypt_filename(&plaintext_name,
637 rc = ecryptfs_decode_filename(crypt_stat, lower_buf, rc, 676 &plaintext_name_size,
638 &decoded_name); 677 dentry, lower_buf,
639 if (rc == -ENOMEM) 678 rc);
679 if (rc) {
680 printk(KERN_ERR "%s: Error attempting to decode and "
681 "decrypt filename; rc = [%d]\n", __func__,
682 rc);
640 goto out_free_lower_buf; 683 goto out_free_lower_buf;
641 if (rc > 0) {
642 ecryptfs_printk(KERN_DEBUG, "Copying [%d] bytes "
643 "to userspace: [%*s]\n", rc,
644 decoded_name);
645 if (copy_to_user(buf, decoded_name, rc))
646 rc = -EFAULT;
647 } 684 }
648 kfree(decoded_name); 685 rc = copy_to_user(buf, plaintext_name, plaintext_name_size);
649 fsstack_copy_attr_atime(dentry->d_inode, 686 if (rc)
650 lower_dentry->d_inode); 687 rc = -EFAULT;
688 else
689 rc = plaintext_name_size;
690 kfree(plaintext_name);
691 fsstack_copy_attr_atime(dentry->d_inode, lower_dentry->d_inode);
651 } 692 }
652out_free_lower_buf: 693out_free_lower_buf:
653 kfree(lower_buf); 694 kfree(lower_buf);
@@ -669,8 +710,6 @@ static void *ecryptfs_follow_link(struct dentry *dentry, struct nameidata *nd)
669 } 710 }
670 old_fs = get_fs(); 711 old_fs = get_fs();
671 set_fs(get_ds()); 712 set_fs(get_ds());
672 ecryptfs_printk(KERN_DEBUG, "Calling readlink w/ "
673 "dentry->d_name.name = [%s]\n", dentry->d_name.name);
674 rc = dentry->d_inode->i_op->readlink(dentry, (char __user *)buf, len); 713 rc = dentry->d_inode->i_op->readlink(dentry, (char __user *)buf, len);
675 set_fs(old_fs); 714 set_fs(old_fs);
676 if (rc < 0) 715 if (rc < 0)
diff --git a/fs/ecryptfs/keystore.c b/fs/ecryptfs/keystore.c
index 0d713b691941..ff539420cc6f 100644
--- a/fs/ecryptfs/keystore.c
+++ b/fs/ecryptfs/keystore.c
@@ -358,7 +358,7 @@ parse_tag_67_packet(struct ecryptfs_key_record *key_rec,
358 /* verify that everything through the encrypted FEK size is present */ 358 /* verify that everything through the encrypted FEK size is present */
359 if (message_len < 4) { 359 if (message_len < 4) {
360 rc = -EIO; 360 rc = -EIO;
361 printk(KERN_ERR "%s: message_len is [%Zd]; minimum acceptable " 361 printk(KERN_ERR "%s: message_len is [%zd]; minimum acceptable "
362 "message length is [%d]\n", __func__, message_len, 4); 362 "message length is [%d]\n", __func__, message_len, 4);
363 goto out; 363 goto out;
364 } 364 }
@@ -385,13 +385,13 @@ parse_tag_67_packet(struct ecryptfs_key_record *key_rec,
385 i += data_len; 385 i += data_len;
386 if (message_len < (i + key_rec->enc_key_size)) { 386 if (message_len < (i + key_rec->enc_key_size)) {
387 rc = -EIO; 387 rc = -EIO;
388 printk(KERN_ERR "%s: message_len [%Zd]; max len is [%Zd]\n", 388 printk(KERN_ERR "%s: message_len [%zd]; max len is [%zd]\n",
389 __func__, message_len, (i + key_rec->enc_key_size)); 389 __func__, message_len, (i + key_rec->enc_key_size));
390 goto out; 390 goto out;
391 } 391 }
392 if (key_rec->enc_key_size > ECRYPTFS_MAX_ENCRYPTED_KEY_BYTES) { 392 if (key_rec->enc_key_size > ECRYPTFS_MAX_ENCRYPTED_KEY_BYTES) {
393 rc = -EIO; 393 rc = -EIO;
394 printk(KERN_ERR "%s: Encrypted key_size [%Zd] larger than " 394 printk(KERN_ERR "%s: Encrypted key_size [%zd] larger than "
395 "the maximum key size [%d]\n", __func__, 395 "the maximum key size [%d]\n", __func__,
396 key_rec->enc_key_size, 396 key_rec->enc_key_size,
397 ECRYPTFS_MAX_ENCRYPTED_KEY_BYTES); 397 ECRYPTFS_MAX_ENCRYPTED_KEY_BYTES);
@@ -403,6 +403,580 @@ out:
403} 403}
404 404
405static int 405static int
406ecryptfs_find_global_auth_tok_for_sig(
407 struct ecryptfs_global_auth_tok **global_auth_tok,
408 struct ecryptfs_mount_crypt_stat *mount_crypt_stat, char *sig)
409{
410 struct ecryptfs_global_auth_tok *walker;
411 int rc = 0;
412
413 (*global_auth_tok) = NULL;
414 mutex_lock(&mount_crypt_stat->global_auth_tok_list_mutex);
415 list_for_each_entry(walker,
416 &mount_crypt_stat->global_auth_tok_list,
417 mount_crypt_stat_list) {
418 if (memcmp(walker->sig, sig, ECRYPTFS_SIG_SIZE_HEX) == 0) {
419 (*global_auth_tok) = walker;
420 goto out;
421 }
422 }
423 rc = -EINVAL;
424out:
425 mutex_unlock(&mount_crypt_stat->global_auth_tok_list_mutex);
426 return rc;
427}
428
429/**
430 * ecryptfs_find_auth_tok_for_sig
431 * @auth_tok: Set to the matching auth_tok; NULL if not found
432 * @crypt_stat: inode crypt_stat crypto context
433 * @sig: Sig of auth_tok to find
434 *
435 * For now, this function simply looks at the registered auth_tok's
436 * linked off the mount_crypt_stat, so all the auth_toks that can be
437 * used must be registered at mount time. This function could
438 * potentially try a lot harder to find auth_tok's (e.g., by calling
439 * out to ecryptfsd to dynamically retrieve an auth_tok object) so
440 * that static registration of auth_tok's will no longer be necessary.
441 *
442 * Returns zero on no error; non-zero on error
443 */
444static int
445ecryptfs_find_auth_tok_for_sig(
446 struct ecryptfs_auth_tok **auth_tok,
447 struct ecryptfs_mount_crypt_stat *mount_crypt_stat,
448 char *sig)
449{
450 struct ecryptfs_global_auth_tok *global_auth_tok;
451 int rc = 0;
452
453 (*auth_tok) = NULL;
454 if (ecryptfs_find_global_auth_tok_for_sig(&global_auth_tok,
455 mount_crypt_stat, sig)) {
456 struct key *auth_tok_key;
457
458 rc = ecryptfs_keyring_auth_tok_for_sig(&auth_tok_key, auth_tok,
459 sig);
460 } else
461 (*auth_tok) = global_auth_tok->global_auth_tok;
462 return rc;
463}
464
465/**
466 * write_tag_70_packet can gobble a lot of stack space. We stuff most
467 * of the function's parameters in a kmalloc'd struct to help reduce
468 * eCryptfs' overall stack usage.
469 */
470struct ecryptfs_write_tag_70_packet_silly_stack {
471 u8 cipher_code;
472 size_t max_packet_size;
473 size_t packet_size_len;
474 size_t block_aligned_filename_size;
475 size_t block_size;
476 size_t i;
477 size_t j;
478 size_t num_rand_bytes;
479 struct mutex *tfm_mutex;
480 char *block_aligned_filename;
481 struct ecryptfs_auth_tok *auth_tok;
482 struct scatterlist src_sg;
483 struct scatterlist dst_sg;
484 struct blkcipher_desc desc;
485 char iv[ECRYPTFS_MAX_IV_BYTES];
486 char hash[ECRYPTFS_TAG_70_DIGEST_SIZE];
487 char tmp_hash[ECRYPTFS_TAG_70_DIGEST_SIZE];
488 struct hash_desc hash_desc;
489 struct scatterlist hash_sg;
490};
491
492/**
493 * write_tag_70_packet - Write encrypted filename (EFN) packet against FNEK
494 * @filename: NULL-terminated filename string
495 *
496 * This is the simplest mechanism for achieving filename encryption in
497 * eCryptfs. It encrypts the given filename with the mount-wide
498 * filename encryption key (FNEK) and stores it in a packet to @dest,
499 * which the callee will encode and write directly into the dentry
500 * name.
501 */
502int
503ecryptfs_write_tag_70_packet(char *dest, size_t *remaining_bytes,
504 size_t *packet_size,
505 struct ecryptfs_mount_crypt_stat *mount_crypt_stat,
506 char *filename, size_t filename_size)
507{
508 struct ecryptfs_write_tag_70_packet_silly_stack *s;
509 int rc = 0;
510
511 s = kmalloc(sizeof(*s), GFP_KERNEL);
512 if (!s) {
513 printk(KERN_ERR "%s: Out of memory whilst trying to kmalloc "
514 "[%zd] bytes of kernel memory\n", __func__, sizeof(*s));
515 goto out;
516 }
517 s->desc.flags = CRYPTO_TFM_REQ_MAY_SLEEP;
518 (*packet_size) = 0;
519 rc = ecryptfs_get_tfm_and_mutex_for_cipher_name(
520 &s->desc.tfm,
521 &s->tfm_mutex, mount_crypt_stat->global_default_fn_cipher_name);
522 if (unlikely(rc)) {
523 printk(KERN_ERR "Internal error whilst attempting to get "
524 "tfm and mutex for cipher name [%s]; rc = [%d]\n",
525 mount_crypt_stat->global_default_fn_cipher_name, rc);
526 goto out;
527 }
528 mutex_lock(s->tfm_mutex);
529 s->block_size = crypto_blkcipher_blocksize(s->desc.tfm);
530 /* Plus one for the \0 separator between the random prefix
531 * and the plaintext filename */
532 s->num_rand_bytes = (ECRYPTFS_FILENAME_MIN_RANDOM_PREPEND_BYTES + 1);
533 s->block_aligned_filename_size = (s->num_rand_bytes + filename_size);
534 if ((s->block_aligned_filename_size % s->block_size) != 0) {
535 s->num_rand_bytes += (s->block_size
536 - (s->block_aligned_filename_size
537 % s->block_size));
538 s->block_aligned_filename_size = (s->num_rand_bytes
539 + filename_size);
540 }
541 /* Octet 0: Tag 70 identifier
542 * Octets 1-N1: Tag 70 packet size (includes cipher identifier
543 * and block-aligned encrypted filename size)
544 * Octets N1-N2: FNEK sig (ECRYPTFS_SIG_SIZE)
545 * Octet N2-N3: Cipher identifier (1 octet)
546 * Octets N3-N4: Block-aligned encrypted filename
547 * - Consists of a minimum number of random characters, a \0
548 * separator, and then the filename */
549 s->max_packet_size = (1 /* Tag 70 identifier */
550 + 3 /* Max Tag 70 packet size */
551 + ECRYPTFS_SIG_SIZE /* FNEK sig */
552 + 1 /* Cipher identifier */
553 + s->block_aligned_filename_size);
554 if (dest == NULL) {
555 (*packet_size) = s->max_packet_size;
556 goto out_unlock;
557 }
558 if (s->max_packet_size > (*remaining_bytes)) {
559 printk(KERN_WARNING "%s: Require [%zd] bytes to write; only "
560 "[%zd] available\n", __func__, s->max_packet_size,
561 (*remaining_bytes));
562 rc = -EINVAL;
563 goto out_unlock;
564 }
565 s->block_aligned_filename = kzalloc(s->block_aligned_filename_size,
566 GFP_KERNEL);
567 if (!s->block_aligned_filename) {
568 printk(KERN_ERR "%s: Out of kernel memory whilst attempting to "
569 "kzalloc [%zd] bytes\n", __func__,
570 s->block_aligned_filename_size);
571 rc = -ENOMEM;
572 goto out_unlock;
573 }
574 s->i = 0;
575 dest[s->i++] = ECRYPTFS_TAG_70_PACKET_TYPE;
576 rc = ecryptfs_write_packet_length(&dest[s->i],
577 (ECRYPTFS_SIG_SIZE
578 + 1 /* Cipher code */
579 + s->block_aligned_filename_size),
580 &s->packet_size_len);
581 if (rc) {
582 printk(KERN_ERR "%s: Error generating tag 70 packet "
583 "header; cannot generate packet length; rc = [%d]\n",
584 __func__, rc);
585 goto out_free_unlock;
586 }
587 s->i += s->packet_size_len;
588 ecryptfs_from_hex(&dest[s->i],
589 mount_crypt_stat->global_default_fnek_sig,
590 ECRYPTFS_SIG_SIZE);
591 s->i += ECRYPTFS_SIG_SIZE;
592 s->cipher_code = ecryptfs_code_for_cipher_string(
593 mount_crypt_stat->global_default_fn_cipher_name,
594 mount_crypt_stat->global_default_fn_cipher_key_bytes);
595 if (s->cipher_code == 0) {
596 printk(KERN_WARNING "%s: Unable to generate code for "
597 "cipher [%s] with key bytes [%zd]\n", __func__,
598 mount_crypt_stat->global_default_fn_cipher_name,
599 mount_crypt_stat->global_default_fn_cipher_key_bytes);
600 rc = -EINVAL;
601 goto out_free_unlock;
602 }
603 dest[s->i++] = s->cipher_code;
604 rc = ecryptfs_find_auth_tok_for_sig(
605 &s->auth_tok, mount_crypt_stat,
606 mount_crypt_stat->global_default_fnek_sig);
607 if (rc) {
608 printk(KERN_ERR "%s: Error attempting to find auth tok for "
609 "fnek sig [%s]; rc = [%d]\n", __func__,
610 mount_crypt_stat->global_default_fnek_sig, rc);
611 goto out_free_unlock;
612 }
613 /* TODO: Support other key modules than passphrase for
614 * filename encryption */
615 BUG_ON(s->auth_tok->token_type != ECRYPTFS_PASSWORD);
616 sg_init_one(
617 &s->hash_sg,
618 (u8 *)s->auth_tok->token.password.session_key_encryption_key,
619 s->auth_tok->token.password.session_key_encryption_key_bytes);
620 s->hash_desc.flags = CRYPTO_TFM_REQ_MAY_SLEEP;
621 s->hash_desc.tfm = crypto_alloc_hash(ECRYPTFS_TAG_70_DIGEST, 0,
622 CRYPTO_ALG_ASYNC);
623 if (IS_ERR(s->hash_desc.tfm)) {
624 rc = PTR_ERR(s->hash_desc.tfm);
625 printk(KERN_ERR "%s: Error attempting to "
626 "allocate hash crypto context; rc = [%d]\n",
627 __func__, rc);
628 goto out_free_unlock;
629 }
630 rc = crypto_hash_init(&s->hash_desc);
631 if (rc) {
632 printk(KERN_ERR
633 "%s: Error initializing crypto hash; rc = [%d]\n",
634 __func__, rc);
635 goto out_release_free_unlock;
636 }
637 rc = crypto_hash_update(
638 &s->hash_desc, &s->hash_sg,
639 s->auth_tok->token.password.session_key_encryption_key_bytes);
640 if (rc) {
641 printk(KERN_ERR
642 "%s: Error updating crypto hash; rc = [%d]\n",
643 __func__, rc);
644 goto out_release_free_unlock;
645 }
646 rc = crypto_hash_final(&s->hash_desc, s->hash);
647 if (rc) {
648 printk(KERN_ERR
649 "%s: Error finalizing crypto hash; rc = [%d]\n",
650 __func__, rc);
651 goto out_release_free_unlock;
652 }
653 for (s->j = 0; s->j < (s->num_rand_bytes - 1); s->j++) {
654 s->block_aligned_filename[s->j] =
655 s->hash[(s->j % ECRYPTFS_TAG_70_DIGEST_SIZE)];
656 if ((s->j % ECRYPTFS_TAG_70_DIGEST_SIZE)
657 == (ECRYPTFS_TAG_70_DIGEST_SIZE - 1)) {
658 sg_init_one(&s->hash_sg, (u8 *)s->hash,
659 ECRYPTFS_TAG_70_DIGEST_SIZE);
660 rc = crypto_hash_init(&s->hash_desc);
661 if (rc) {
662 printk(KERN_ERR
663 "%s: Error initializing crypto hash; "
664 "rc = [%d]\n", __func__, rc);
665 goto out_release_free_unlock;
666 }
667 rc = crypto_hash_update(&s->hash_desc, &s->hash_sg,
668 ECRYPTFS_TAG_70_DIGEST_SIZE);
669 if (rc) {
670 printk(KERN_ERR
671 "%s: Error updating crypto hash; "
672 "rc = [%d]\n", __func__, rc);
673 goto out_release_free_unlock;
674 }
675 rc = crypto_hash_final(&s->hash_desc, s->tmp_hash);
676 if (rc) {
677 printk(KERN_ERR
678 "%s: Error finalizing crypto hash; "
679 "rc = [%d]\n", __func__, rc);
680 goto out_release_free_unlock;
681 }
682 memcpy(s->hash, s->tmp_hash,
683 ECRYPTFS_TAG_70_DIGEST_SIZE);
684 }
685 if (s->block_aligned_filename[s->j] == '\0')
686 s->block_aligned_filename[s->j] = ECRYPTFS_NON_NULL;
687 }
688 memcpy(&s->block_aligned_filename[s->num_rand_bytes], filename,
689 filename_size);
690 rc = virt_to_scatterlist(s->block_aligned_filename,
691 s->block_aligned_filename_size, &s->src_sg, 1);
692 if (rc != 1) {
693 printk(KERN_ERR "%s: Internal error whilst attempting to "
694 "convert filename memory to scatterlist; "
695 "expected rc = 1; got rc = [%d]. "
696 "block_aligned_filename_size = [%zd]\n", __func__, rc,
697 s->block_aligned_filename_size);
698 goto out_release_free_unlock;
699 }
700 rc = virt_to_scatterlist(&dest[s->i], s->block_aligned_filename_size,
701 &s->dst_sg, 1);
702 if (rc != 1) {
703 printk(KERN_ERR "%s: Internal error whilst attempting to "
704 "convert encrypted filename memory to scatterlist; "
705 "expected rc = 1; got rc = [%d]. "
706 "block_aligned_filename_size = [%zd]\n", __func__, rc,
707 s->block_aligned_filename_size);
708 goto out_release_free_unlock;
709 }
710 /* The characters in the first block effectively do the job
711 * of the IV here, so we just use 0's for the IV. Note the
712 * constraint that ECRYPTFS_FILENAME_MIN_RANDOM_PREPEND_BYTES
713 * >= ECRYPTFS_MAX_IV_BYTES. */
714 memset(s->iv, 0, ECRYPTFS_MAX_IV_BYTES);
715 s->desc.info = s->iv;
716 rc = crypto_blkcipher_setkey(
717 s->desc.tfm,
718 s->auth_tok->token.password.session_key_encryption_key,
719 mount_crypt_stat->global_default_fn_cipher_key_bytes);
720 if (rc < 0) {
721 printk(KERN_ERR "%s: Error setting key for crypto context; "
722 "rc = [%d]. s->auth_tok->token.password.session_key_"
723 "encryption_key = [0x%p]; mount_crypt_stat->"
724 "global_default_fn_cipher_key_bytes = [%zd]\n", __func__,
725 rc,
726 s->auth_tok->token.password.session_key_encryption_key,
727 mount_crypt_stat->global_default_fn_cipher_key_bytes);
728 goto out_release_free_unlock;
729 }
730 rc = crypto_blkcipher_encrypt_iv(&s->desc, &s->dst_sg, &s->src_sg,
731 s->block_aligned_filename_size);
732 if (rc) {
733 printk(KERN_ERR "%s: Error attempting to encrypt filename; "
734 "rc = [%d]\n", __func__, rc);
735 goto out_release_free_unlock;
736 }
737 s->i += s->block_aligned_filename_size;
738 (*packet_size) = s->i;
739 (*remaining_bytes) -= (*packet_size);
740out_release_free_unlock:
741 crypto_free_hash(s->hash_desc.tfm);
742out_free_unlock:
743 memset(s->block_aligned_filename, 0, s->block_aligned_filename_size);
744 kfree(s->block_aligned_filename);
745out_unlock:
746 mutex_unlock(s->tfm_mutex);
747out:
748 kfree(s);
749 return rc;
750}
751
752struct ecryptfs_parse_tag_70_packet_silly_stack {
753 u8 cipher_code;
754 size_t max_packet_size;
755 size_t packet_size_len;
756 size_t parsed_tag_70_packet_size;
757 size_t block_aligned_filename_size;
758 size_t block_size;
759 size_t i;
760 struct mutex *tfm_mutex;
761 char *decrypted_filename;
762 struct ecryptfs_auth_tok *auth_tok;
763 struct scatterlist src_sg;
764 struct scatterlist dst_sg;
765 struct blkcipher_desc desc;
766 char fnek_sig_hex[ECRYPTFS_SIG_SIZE_HEX + 1];
767 char iv[ECRYPTFS_MAX_IV_BYTES];
768 char cipher_string[ECRYPTFS_MAX_CIPHER_NAME_SIZE];
769};
770
771/**
772 * parse_tag_70_packet - Parse and process FNEK-encrypted passphrase packet
773 * @filename: This function kmalloc's the memory for the filename
774 * @filename_size: This function sets this to the amount of memory
775 * kmalloc'd for the filename
776 * @packet_size: This function sets this to the the number of octets
777 * in the packet parsed
778 * @mount_crypt_stat: The mount-wide cryptographic context
779 * @data: The memory location containing the start of the tag 70
780 * packet
781 * @max_packet_size: The maximum legal size of the packet to be parsed
782 * from @data
783 *
784 * Returns zero on success; non-zero otherwise
785 */
786int
787ecryptfs_parse_tag_70_packet(char **filename, size_t *filename_size,
788 size_t *packet_size,
789 struct ecryptfs_mount_crypt_stat *mount_crypt_stat,
790 char *data, size_t max_packet_size)
791{
792 struct ecryptfs_parse_tag_70_packet_silly_stack *s;
793 int rc = 0;
794
795 (*packet_size) = 0;
796 (*filename_size) = 0;
797 (*filename) = NULL;
798 s = kmalloc(sizeof(*s), GFP_KERNEL);
799 if (!s) {
800 printk(KERN_ERR "%s: Out of memory whilst trying to kmalloc "
801 "[%zd] bytes of kernel memory\n", __func__, sizeof(*s));
802 goto out;
803 }
804 s->desc.flags = CRYPTO_TFM_REQ_MAY_SLEEP;
805 if (max_packet_size < (1 + 1 + ECRYPTFS_SIG_SIZE + 1 + 1)) {
806 printk(KERN_WARNING "%s: max_packet_size is [%zd]; it must be "
807 "at least [%d]\n", __func__, max_packet_size,
808 (1 + 1 + ECRYPTFS_SIG_SIZE + 1 + 1));
809 rc = -EINVAL;
810 goto out;
811 }
812 /* Octet 0: Tag 70 identifier
813 * Octets 1-N1: Tag 70 packet size (includes cipher identifier
814 * and block-aligned encrypted filename size)
815 * Octets N1-N2: FNEK sig (ECRYPTFS_SIG_SIZE)
816 * Octet N2-N3: Cipher identifier (1 octet)
817 * Octets N3-N4: Block-aligned encrypted filename
818 * - Consists of a minimum number of random numbers, a \0
819 * separator, and then the filename */
820 if (data[(*packet_size)++] != ECRYPTFS_TAG_70_PACKET_TYPE) {
821 printk(KERN_WARNING "%s: Invalid packet tag [0x%.2x]; must be "
822 "tag [0x%.2x]\n", __func__,
823 data[((*packet_size) - 1)], ECRYPTFS_TAG_70_PACKET_TYPE);
824 rc = -EINVAL;
825 goto out;
826 }
827 rc = ecryptfs_parse_packet_length(&data[(*packet_size)],
828 &s->parsed_tag_70_packet_size,
829 &s->packet_size_len);
830 if (rc) {
831 printk(KERN_WARNING "%s: Error parsing packet length; "
832 "rc = [%d]\n", __func__, rc);
833 goto out;
834 }
835 s->block_aligned_filename_size = (s->parsed_tag_70_packet_size
836 - ECRYPTFS_SIG_SIZE - 1);
837 if ((1 + s->packet_size_len + s->parsed_tag_70_packet_size)
838 > max_packet_size) {
839 printk(KERN_WARNING "%s: max_packet_size is [%zd]; real packet "
840 "size is [%zd]\n", __func__, max_packet_size,
841 (1 + s->packet_size_len + 1
842 + s->block_aligned_filename_size));
843 rc = -EINVAL;
844 goto out;
845 }
846 (*packet_size) += s->packet_size_len;
847 ecryptfs_to_hex(s->fnek_sig_hex, &data[(*packet_size)],
848 ECRYPTFS_SIG_SIZE);
849 s->fnek_sig_hex[ECRYPTFS_SIG_SIZE_HEX] = '\0';
850 (*packet_size) += ECRYPTFS_SIG_SIZE;
851 s->cipher_code = data[(*packet_size)++];
852 rc = ecryptfs_cipher_code_to_string(s->cipher_string, s->cipher_code);
853 if (rc) {
854 printk(KERN_WARNING "%s: Cipher code [%d] is invalid\n",
855 __func__, s->cipher_code);
856 goto out;
857 }
858 rc = ecryptfs_get_tfm_and_mutex_for_cipher_name(&s->desc.tfm,
859 &s->tfm_mutex,
860 s->cipher_string);
861 if (unlikely(rc)) {
862 printk(KERN_ERR "Internal error whilst attempting to get "
863 "tfm and mutex for cipher name [%s]; rc = [%d]\n",
864 s->cipher_string, rc);
865 goto out;
866 }
867 mutex_lock(s->tfm_mutex);
868 rc = virt_to_scatterlist(&data[(*packet_size)],
869 s->block_aligned_filename_size, &s->src_sg, 1);
870 if (rc != 1) {
871 printk(KERN_ERR "%s: Internal error whilst attempting to "
872 "convert encrypted filename memory to scatterlist; "
873 "expected rc = 1; got rc = [%d]. "
874 "block_aligned_filename_size = [%zd]\n", __func__, rc,
875 s->block_aligned_filename_size);
876 goto out_unlock;
877 }
878 (*packet_size) += s->block_aligned_filename_size;
879 s->decrypted_filename = kmalloc(s->block_aligned_filename_size,
880 GFP_KERNEL);
881 if (!s->decrypted_filename) {
882 printk(KERN_ERR "%s: Out of memory whilst attempting to "
883 "kmalloc [%zd] bytes\n", __func__,
884 s->block_aligned_filename_size);
885 rc = -ENOMEM;
886 goto out_unlock;
887 }
888 rc = virt_to_scatterlist(s->decrypted_filename,
889 s->block_aligned_filename_size, &s->dst_sg, 1);
890 if (rc != 1) {
891 printk(KERN_ERR "%s: Internal error whilst attempting to "
892 "convert decrypted filename memory to scatterlist; "
893 "expected rc = 1; got rc = [%d]. "
894 "block_aligned_filename_size = [%zd]\n", __func__, rc,
895 s->block_aligned_filename_size);
896 goto out_free_unlock;
897 }
898 /* The characters in the first block effectively do the job of
899 * the IV here, so we just use 0's for the IV. Note the
900 * constraint that ECRYPTFS_FILENAME_MIN_RANDOM_PREPEND_BYTES
901 * >= ECRYPTFS_MAX_IV_BYTES. */
902 memset(s->iv, 0, ECRYPTFS_MAX_IV_BYTES);
903 s->desc.info = s->iv;
904 rc = ecryptfs_find_auth_tok_for_sig(&s->auth_tok, mount_crypt_stat,
905 s->fnek_sig_hex);
906 if (rc) {
907 printk(KERN_ERR "%s: Error attempting to find auth tok for "
908 "fnek sig [%s]; rc = [%d]\n", __func__, s->fnek_sig_hex,
909 rc);
910 goto out_free_unlock;
911 }
912 /* TODO: Support other key modules than passphrase for
913 * filename encryption */
914 BUG_ON(s->auth_tok->token_type != ECRYPTFS_PASSWORD);
915 rc = crypto_blkcipher_setkey(
916 s->desc.tfm,
917 s->auth_tok->token.password.session_key_encryption_key,
918 mount_crypt_stat->global_default_fn_cipher_key_bytes);
919 if (rc < 0) {
920 printk(KERN_ERR "%s: Error setting key for crypto context; "
921 "rc = [%d]. s->auth_tok->token.password.session_key_"
922 "encryption_key = [0x%p]; mount_crypt_stat->"
923 "global_default_fn_cipher_key_bytes = [%zd]\n", __func__,
924 rc,
925 s->auth_tok->token.password.session_key_encryption_key,
926 mount_crypt_stat->global_default_fn_cipher_key_bytes);
927 goto out_free_unlock;
928 }
929 rc = crypto_blkcipher_decrypt_iv(&s->desc, &s->dst_sg, &s->src_sg,
930 s->block_aligned_filename_size);
931 if (rc) {
932 printk(KERN_ERR "%s: Error attempting to decrypt filename; "
933 "rc = [%d]\n", __func__, rc);
934 goto out_free_unlock;
935 }
936 s->i = 0;
937 while (s->decrypted_filename[s->i] != '\0'
938 && s->i < s->block_aligned_filename_size)
939 s->i++;
940 if (s->i == s->block_aligned_filename_size) {
941 printk(KERN_WARNING "%s: Invalid tag 70 packet; could not "
942 "find valid separator between random characters and "
943 "the filename\n", __func__);
944 rc = -EINVAL;
945 goto out_free_unlock;
946 }
947 s->i++;
948 (*filename_size) = (s->block_aligned_filename_size - s->i);
949 if (!((*filename_size) > 0 && (*filename_size < PATH_MAX))) {
950 printk(KERN_WARNING "%s: Filename size is [%zd], which is "
951 "invalid\n", __func__, (*filename_size));
952 rc = -EINVAL;
953 goto out_free_unlock;
954 }
955 (*filename) = kmalloc(((*filename_size) + 1), GFP_KERNEL);
956 if (!(*filename)) {
957 printk(KERN_ERR "%s: Out of memory whilst attempting to "
958 "kmalloc [%zd] bytes\n", __func__,
959 ((*filename_size) + 1));
960 rc = -ENOMEM;
961 goto out_free_unlock;
962 }
963 memcpy((*filename), &s->decrypted_filename[s->i], (*filename_size));
964 (*filename)[(*filename_size)] = '\0';
965out_free_unlock:
966 kfree(s->decrypted_filename);
967out_unlock:
968 mutex_unlock(s->tfm_mutex);
969out:
970 if (rc) {
971 (*packet_size) = 0;
972 (*filename_size) = 0;
973 (*filename) = NULL;
974 }
975 kfree(s);
976 return rc;
977}
978
979static int
406ecryptfs_get_auth_tok_sig(char **sig, struct ecryptfs_auth_tok *auth_tok) 980ecryptfs_get_auth_tok_sig(char **sig, struct ecryptfs_auth_tok *auth_tok)
407{ 981{
408 int rc = 0; 982 int rc = 0;
@@ -897,30 +1471,6 @@ out:
897 return rc; 1471 return rc;
898} 1472}
899 1473
900static int
901ecryptfs_find_global_auth_tok_for_sig(
902 struct ecryptfs_global_auth_tok **global_auth_tok,
903 struct ecryptfs_mount_crypt_stat *mount_crypt_stat, char *sig)
904{
905 struct ecryptfs_global_auth_tok *walker;
906 int rc = 0;
907
908 (*global_auth_tok) = NULL;
909 mutex_lock(&mount_crypt_stat->global_auth_tok_list_mutex);
910 list_for_each_entry(walker,
911 &mount_crypt_stat->global_auth_tok_list,
912 mount_crypt_stat_list) {
913 if (memcmp(walker->sig, sig, ECRYPTFS_SIG_SIZE_HEX) == 0) {
914 (*global_auth_tok) = walker;
915 goto out;
916 }
917 }
918 rc = -EINVAL;
919out:
920 mutex_unlock(&mount_crypt_stat->global_auth_tok_list_mutex);
921 return rc;
922}
923
924/** 1474/**
925 * ecryptfs_verify_version 1475 * ecryptfs_verify_version
926 * @version: The version number to confirm 1476 * @version: The version number to confirm
@@ -990,43 +1540,6 @@ out:
990} 1540}
991 1541
992/** 1542/**
993 * ecryptfs_find_auth_tok_for_sig
994 * @auth_tok: Set to the matching auth_tok; NULL if not found
995 * @crypt_stat: inode crypt_stat crypto context
996 * @sig: Sig of auth_tok to find
997 *
998 * For now, this function simply looks at the registered auth_tok's
999 * linked off the mount_crypt_stat, so all the auth_toks that can be
1000 * used must be registered at mount time. This function could
1001 * potentially try a lot harder to find auth_tok's (e.g., by calling
1002 * out to ecryptfsd to dynamically retrieve an auth_tok object) so
1003 * that static registration of auth_tok's will no longer be necessary.
1004 *
1005 * Returns zero on no error; non-zero on error
1006 */
1007static int
1008ecryptfs_find_auth_tok_for_sig(
1009 struct ecryptfs_auth_tok **auth_tok,
1010 struct ecryptfs_crypt_stat *crypt_stat, char *sig)
1011{
1012 struct ecryptfs_mount_crypt_stat *mount_crypt_stat =
1013 crypt_stat->mount_crypt_stat;
1014 struct ecryptfs_global_auth_tok *global_auth_tok;
1015 int rc = 0;
1016
1017 (*auth_tok) = NULL;
1018 if (ecryptfs_find_global_auth_tok_for_sig(&global_auth_tok,
1019 mount_crypt_stat, sig)) {
1020 struct key *auth_tok_key;
1021
1022 rc = ecryptfs_keyring_auth_tok_for_sig(&auth_tok_key, auth_tok,
1023 sig);
1024 } else
1025 (*auth_tok) = global_auth_tok->global_auth_tok;
1026 return rc;
1027}
1028
1029/**
1030 * decrypt_passphrase_encrypted_session_key - Decrypt the session key with the given auth_tok. 1543 * decrypt_passphrase_encrypted_session_key - Decrypt the session key with the given auth_tok.
1031 * @auth_tok: The passphrase authentication token to use to encrypt the FEK 1544 * @auth_tok: The passphrase authentication token to use to encrypt the FEK
1032 * @crypt_stat: The cryptographic context 1545 * @crypt_stat: The cryptographic context
@@ -1256,7 +1769,8 @@ find_next_matching_auth_tok:
1256 rc = -EINVAL; 1769 rc = -EINVAL;
1257 goto out_wipe_list; 1770 goto out_wipe_list;
1258 } 1771 }
1259 ecryptfs_find_auth_tok_for_sig(&matching_auth_tok, crypt_stat, 1772 ecryptfs_find_auth_tok_for_sig(&matching_auth_tok,
1773 crypt_stat->mount_crypt_stat,
1260 candidate_auth_tok_sig); 1774 candidate_auth_tok_sig);
1261 if (matching_auth_tok) { 1775 if (matching_auth_tok) {
1262 found_auth_tok = 1; 1776 found_auth_tok = 1;
@@ -1336,7 +1850,9 @@ pki_encrypt_session_key(struct ecryptfs_auth_tok *auth_tok,
1336 int rc; 1850 int rc;
1337 1851
1338 rc = write_tag_66_packet(auth_tok->token.private_key.signature, 1852 rc = write_tag_66_packet(auth_tok->token.private_key.signature,
1339 ecryptfs_code_for_cipher_string(crypt_stat), 1853 ecryptfs_code_for_cipher_string(
1854 crypt_stat->cipher,
1855 crypt_stat->key_size),
1340 crypt_stat, &payload, &payload_len); 1856 crypt_stat, &payload, &payload_len);
1341 if (rc) { 1857 if (rc) {
1342 ecryptfs_printk(KERN_ERR, "Error generating tag 66 packet\n"); 1858 ecryptfs_printk(KERN_ERR, "Error generating tag 66 packet\n");
@@ -1696,7 +2212,8 @@ encrypted_session_key_set:
1696 dest[(*packet_size)++] = 0x04; /* version 4 */ 2212 dest[(*packet_size)++] = 0x04; /* version 4 */
1697 /* TODO: Break from RFC2440 so that arbitrary ciphers can be 2213 /* TODO: Break from RFC2440 so that arbitrary ciphers can be
1698 * specified with strings */ 2214 * specified with strings */
1699 cipher_code = ecryptfs_code_for_cipher_string(crypt_stat); 2215 cipher_code = ecryptfs_code_for_cipher_string(crypt_stat->cipher,
2216 crypt_stat->key_size);
1700 if (cipher_code == 0) { 2217 if (cipher_code == 0) {
1701 ecryptfs_printk(KERN_WARNING, "Unable to generate code for " 2218 ecryptfs_printk(KERN_WARNING, "Unable to generate code for "
1702 "cipher [%s]\n", crypt_stat->cipher); 2219 "cipher [%s]\n", crypt_stat->cipher);
diff --git a/fs/ecryptfs/main.c b/fs/ecryptfs/main.c
index fd630713c5c7..789cf2e1be1e 100644
--- a/fs/ecryptfs/main.c
+++ b/fs/ecryptfs/main.c
@@ -206,7 +206,9 @@ enum { ecryptfs_opt_sig, ecryptfs_opt_ecryptfs_sig,
206 ecryptfs_opt_cipher, ecryptfs_opt_ecryptfs_cipher, 206 ecryptfs_opt_cipher, ecryptfs_opt_ecryptfs_cipher,
207 ecryptfs_opt_ecryptfs_key_bytes, 207 ecryptfs_opt_ecryptfs_key_bytes,
208 ecryptfs_opt_passthrough, ecryptfs_opt_xattr_metadata, 208 ecryptfs_opt_passthrough, ecryptfs_opt_xattr_metadata,
209 ecryptfs_opt_encrypted_view, ecryptfs_opt_err }; 209 ecryptfs_opt_encrypted_view, ecryptfs_opt_fnek_sig,
210 ecryptfs_opt_fn_cipher, ecryptfs_opt_fn_cipher_key_bytes,
211 ecryptfs_opt_err };
210 212
211static const match_table_t tokens = { 213static const match_table_t tokens = {
212 {ecryptfs_opt_sig, "sig=%s"}, 214 {ecryptfs_opt_sig, "sig=%s"},
@@ -217,6 +219,9 @@ static const match_table_t tokens = {
217 {ecryptfs_opt_passthrough, "ecryptfs_passthrough"}, 219 {ecryptfs_opt_passthrough, "ecryptfs_passthrough"},
218 {ecryptfs_opt_xattr_metadata, "ecryptfs_xattr_metadata"}, 220 {ecryptfs_opt_xattr_metadata, "ecryptfs_xattr_metadata"},
219 {ecryptfs_opt_encrypted_view, "ecryptfs_encrypted_view"}, 221 {ecryptfs_opt_encrypted_view, "ecryptfs_encrypted_view"},
222 {ecryptfs_opt_fnek_sig, "ecryptfs_fnek_sig=%s"},
223 {ecryptfs_opt_fn_cipher, "ecryptfs_fn_cipher=%s"},
224 {ecryptfs_opt_fn_cipher_key_bytes, "ecryptfs_fn_key_bytes=%u"},
220 {ecryptfs_opt_err, NULL} 225 {ecryptfs_opt_err, NULL}
221}; 226};
222 227
@@ -281,8 +286,11 @@ static int ecryptfs_parse_options(struct super_block *sb, char *options)
281 int rc = 0; 286 int rc = 0;
282 int sig_set = 0; 287 int sig_set = 0;
283 int cipher_name_set = 0; 288 int cipher_name_set = 0;
289 int fn_cipher_name_set = 0;
284 int cipher_key_bytes; 290 int cipher_key_bytes;
285 int cipher_key_bytes_set = 0; 291 int cipher_key_bytes_set = 0;
292 int fn_cipher_key_bytes;
293 int fn_cipher_key_bytes_set = 0;
286 struct ecryptfs_mount_crypt_stat *mount_crypt_stat = 294 struct ecryptfs_mount_crypt_stat *mount_crypt_stat =
287 &ecryptfs_superblock_to_private(sb)->mount_crypt_stat; 295 &ecryptfs_superblock_to_private(sb)->mount_crypt_stat;
288 substring_t args[MAX_OPT_ARGS]; 296 substring_t args[MAX_OPT_ARGS];
@@ -290,7 +298,12 @@ static int ecryptfs_parse_options(struct super_block *sb, char *options)
290 char *sig_src; 298 char *sig_src;
291 char *cipher_name_dst; 299 char *cipher_name_dst;
292 char *cipher_name_src; 300 char *cipher_name_src;
301 char *fn_cipher_name_dst;
302 char *fn_cipher_name_src;
303 char *fnek_dst;
304 char *fnek_src;
293 char *cipher_key_bytes_src; 305 char *cipher_key_bytes_src;
306 char *fn_cipher_key_bytes_src;
294 307
295 if (!options) { 308 if (!options) {
296 rc = -EINVAL; 309 rc = -EINVAL;
@@ -322,10 +335,7 @@ static int ecryptfs_parse_options(struct super_block *sb, char *options)
322 global_default_cipher_name; 335 global_default_cipher_name;
323 strncpy(cipher_name_dst, cipher_name_src, 336 strncpy(cipher_name_dst, cipher_name_src,
324 ECRYPTFS_MAX_CIPHER_NAME_SIZE); 337 ECRYPTFS_MAX_CIPHER_NAME_SIZE);
325 ecryptfs_printk(KERN_DEBUG, 338 cipher_name_dst[ECRYPTFS_MAX_CIPHER_NAME_SIZE] = '\0';
326 "The mount_crypt_stat "
327 "global_default_cipher_name set to: "
328 "[%s]\n", cipher_name_dst);
329 cipher_name_set = 1; 339 cipher_name_set = 1;
330 break; 340 break;
331 case ecryptfs_opt_ecryptfs_key_bytes: 341 case ecryptfs_opt_ecryptfs_key_bytes:
@@ -335,11 +345,6 @@ static int ecryptfs_parse_options(struct super_block *sb, char *options)
335 &cipher_key_bytes_src, 0); 345 &cipher_key_bytes_src, 0);
336 mount_crypt_stat->global_default_cipher_key_size = 346 mount_crypt_stat->global_default_cipher_key_size =
337 cipher_key_bytes; 347 cipher_key_bytes;
338 ecryptfs_printk(KERN_DEBUG,
339 "The mount_crypt_stat "
340 "global_default_cipher_key_size "
341 "set to: [%d]\n", mount_crypt_stat->
342 global_default_cipher_key_size);
343 cipher_key_bytes_set = 1; 348 cipher_key_bytes_set = 1;
344 break; 349 break;
345 case ecryptfs_opt_passthrough: 350 case ecryptfs_opt_passthrough:
@@ -356,11 +361,51 @@ static int ecryptfs_parse_options(struct super_block *sb, char *options)
356 mount_crypt_stat->flags |= 361 mount_crypt_stat->flags |=
357 ECRYPTFS_ENCRYPTED_VIEW_ENABLED; 362 ECRYPTFS_ENCRYPTED_VIEW_ENABLED;
358 break; 363 break;
364 case ecryptfs_opt_fnek_sig:
365 fnek_src = args[0].from;
366 fnek_dst =
367 mount_crypt_stat->global_default_fnek_sig;
368 strncpy(fnek_dst, fnek_src, ECRYPTFS_SIG_SIZE_HEX);
369 mount_crypt_stat->global_default_fnek_sig[
370 ECRYPTFS_SIG_SIZE_HEX] = '\0';
371 rc = ecryptfs_add_global_auth_tok(
372 mount_crypt_stat,
373 mount_crypt_stat->global_default_fnek_sig);
374 if (rc) {
375 printk(KERN_ERR "Error attempting to register "
376 "global fnek sig [%s]; rc = [%d]\n",
377 mount_crypt_stat->global_default_fnek_sig,
378 rc);
379 goto out;
380 }
381 mount_crypt_stat->flags |=
382 (ECRYPTFS_GLOBAL_ENCRYPT_FILENAMES
383 | ECRYPTFS_GLOBAL_ENCFN_USE_MOUNT_FNEK);
384 break;
385 case ecryptfs_opt_fn_cipher:
386 fn_cipher_name_src = args[0].from;
387 fn_cipher_name_dst =
388 mount_crypt_stat->global_default_fn_cipher_name;
389 strncpy(fn_cipher_name_dst, fn_cipher_name_src,
390 ECRYPTFS_MAX_CIPHER_NAME_SIZE);
391 mount_crypt_stat->global_default_fn_cipher_name[
392 ECRYPTFS_MAX_CIPHER_NAME_SIZE] = '\0';
393 fn_cipher_name_set = 1;
394 break;
395 case ecryptfs_opt_fn_cipher_key_bytes:
396 fn_cipher_key_bytes_src = args[0].from;
397 fn_cipher_key_bytes =
398 (int)simple_strtol(fn_cipher_key_bytes_src,
399 &fn_cipher_key_bytes_src, 0);
400 mount_crypt_stat->global_default_fn_cipher_key_bytes =
401 fn_cipher_key_bytes;
402 fn_cipher_key_bytes_set = 1;
403 break;
359 case ecryptfs_opt_err: 404 case ecryptfs_opt_err:
360 default: 405 default:
361 ecryptfs_printk(KERN_WARNING, 406 printk(KERN_WARNING
362 "eCryptfs: unrecognized option '%s'\n", 407 "%s: eCryptfs: unrecognized option [%s]\n",
363 p); 408 __func__, p);
364 } 409 }
365 } 410 }
366 if (!sig_set) { 411 if (!sig_set) {
@@ -374,33 +419,60 @@ static int ecryptfs_parse_options(struct super_block *sb, char *options)
374 int cipher_name_len = strlen(ECRYPTFS_DEFAULT_CIPHER); 419 int cipher_name_len = strlen(ECRYPTFS_DEFAULT_CIPHER);
375 420
376 BUG_ON(cipher_name_len >= ECRYPTFS_MAX_CIPHER_NAME_SIZE); 421 BUG_ON(cipher_name_len >= ECRYPTFS_MAX_CIPHER_NAME_SIZE);
377
378 strcpy(mount_crypt_stat->global_default_cipher_name, 422 strcpy(mount_crypt_stat->global_default_cipher_name,
379 ECRYPTFS_DEFAULT_CIPHER); 423 ECRYPTFS_DEFAULT_CIPHER);
380 } 424 }
381 if (!cipher_key_bytes_set) { 425 if ((mount_crypt_stat->flags & ECRYPTFS_GLOBAL_ENCRYPT_FILENAMES)
426 && !fn_cipher_name_set)
427 strcpy(mount_crypt_stat->global_default_fn_cipher_name,
428 mount_crypt_stat->global_default_cipher_name);
429 if (!cipher_key_bytes_set)
382 mount_crypt_stat->global_default_cipher_key_size = 0; 430 mount_crypt_stat->global_default_cipher_key_size = 0;
383 } 431 if ((mount_crypt_stat->flags & ECRYPTFS_GLOBAL_ENCRYPT_FILENAMES)
432 && !fn_cipher_key_bytes_set)
433 mount_crypt_stat->global_default_fn_cipher_key_bytes =
434 mount_crypt_stat->global_default_cipher_key_size;
384 mutex_lock(&key_tfm_list_mutex); 435 mutex_lock(&key_tfm_list_mutex);
385 if (!ecryptfs_tfm_exists(mount_crypt_stat->global_default_cipher_name, 436 if (!ecryptfs_tfm_exists(mount_crypt_stat->global_default_cipher_name,
386 NULL)) 437 NULL)) {
387 rc = ecryptfs_add_new_key_tfm( 438 rc = ecryptfs_add_new_key_tfm(
388 NULL, mount_crypt_stat->global_default_cipher_name, 439 NULL, mount_crypt_stat->global_default_cipher_name,
389 mount_crypt_stat->global_default_cipher_key_size); 440 mount_crypt_stat->global_default_cipher_key_size);
390 mutex_unlock(&key_tfm_list_mutex); 441 if (rc) {
391 if (rc) { 442 printk(KERN_ERR "Error attempting to initialize "
392 printk(KERN_ERR "Error attempting to initialize cipher with " 443 "cipher with name = [%s] and key size = [%td]; "
393 "name = [%s] and key size = [%td]; rc = [%d]\n", 444 "rc = [%d]\n",
394 mount_crypt_stat->global_default_cipher_name, 445 mount_crypt_stat->global_default_cipher_name,
395 mount_crypt_stat->global_default_cipher_key_size, rc); 446 mount_crypt_stat->global_default_cipher_key_size,
396 rc = -EINVAL; 447 rc);
397 goto out; 448 rc = -EINVAL;
449 mutex_unlock(&key_tfm_list_mutex);
450 goto out;
451 }
398 } 452 }
453 if ((mount_crypt_stat->flags & ECRYPTFS_GLOBAL_ENCRYPT_FILENAMES)
454 && !ecryptfs_tfm_exists(
455 mount_crypt_stat->global_default_fn_cipher_name, NULL)) {
456 rc = ecryptfs_add_new_key_tfm(
457 NULL, mount_crypt_stat->global_default_fn_cipher_name,
458 mount_crypt_stat->global_default_fn_cipher_key_bytes);
459 if (rc) {
460 printk(KERN_ERR "Error attempting to initialize "
461 "cipher with name = [%s] and key size = [%td]; "
462 "rc = [%d]\n",
463 mount_crypt_stat->global_default_fn_cipher_name,
464 mount_crypt_stat->global_default_fn_cipher_key_bytes,
465 rc);
466 rc = -EINVAL;
467 mutex_unlock(&key_tfm_list_mutex);
468 goto out;
469 }
470 }
471 mutex_unlock(&key_tfm_list_mutex);
399 rc = ecryptfs_init_global_auth_toks(mount_crypt_stat); 472 rc = ecryptfs_init_global_auth_toks(mount_crypt_stat);
400 if (rc) { 473 if (rc)
401 printk(KERN_WARNING "One or more global auth toks could not " 474 printk(KERN_WARNING "One or more global auth toks could not "
402 "properly register; rc = [%d]\n", rc); 475 "properly register; rc = [%d]\n", rc);
403 }
404out: 476out:
405 return rc; 477 return rc;
406} 478}
diff --git a/fs/ecryptfs/messaging.c b/fs/ecryptfs/messaging.c
index 6913f727624d..96ef51489e01 100644
--- a/fs/ecryptfs/messaging.c
+++ b/fs/ecryptfs/messaging.c
@@ -193,7 +193,7 @@ ecryptfs_spawn_daemon(struct ecryptfs_daemon **daemon, uid_t euid,
193 (*daemon) = kzalloc(sizeof(**daemon), GFP_KERNEL); 193 (*daemon) = kzalloc(sizeof(**daemon), GFP_KERNEL);
194 if (!(*daemon)) { 194 if (!(*daemon)) {
195 rc = -ENOMEM; 195 rc = -ENOMEM;
196 printk(KERN_ERR "%s: Failed to allocate [%Zd] bytes of " 196 printk(KERN_ERR "%s: Failed to allocate [%zd] bytes of "
197 "GFP_KERNEL memory\n", __func__, sizeof(**daemon)); 197 "GFP_KERNEL memory\n", __func__, sizeof(**daemon));
198 goto out; 198 goto out;
199 } 199 }
@@ -435,7 +435,7 @@ int ecryptfs_process_response(struct ecryptfs_message *msg, uid_t euid,
435 msg_ctx->msg = kmalloc(msg_size, GFP_KERNEL); 435 msg_ctx->msg = kmalloc(msg_size, GFP_KERNEL);
436 if (!msg_ctx->msg) { 436 if (!msg_ctx->msg) {
437 rc = -ENOMEM; 437 rc = -ENOMEM;
438 printk(KERN_ERR "%s: Failed to allocate [%Zd] bytes of " 438 printk(KERN_ERR "%s: Failed to allocate [%zd] bytes of "
439 "GFP_KERNEL memory\n", __func__, msg_size); 439 "GFP_KERNEL memory\n", __func__, msg_size);
440 goto unlock; 440 goto unlock;
441 } 441 }
diff --git a/fs/ecryptfs/miscdev.c b/fs/ecryptfs/miscdev.c
index efd95a0ed1ea..a67fea655f49 100644
--- a/fs/ecryptfs/miscdev.c
+++ b/fs/ecryptfs/miscdev.c
@@ -199,7 +199,7 @@ int ecryptfs_send_miscdev(char *data, size_t data_size,
199 if (!msg_ctx->msg) { 199 if (!msg_ctx->msg) {
200 rc = -ENOMEM; 200 rc = -ENOMEM;
201 printk(KERN_ERR "%s: Out of memory whilst attempting " 201 printk(KERN_ERR "%s: Out of memory whilst attempting "
202 "to kmalloc(%Zd, GFP_KERNEL)\n", __func__, 202 "to kmalloc(%zd, GFP_KERNEL)\n", __func__,
203 (sizeof(*msg_ctx->msg) + data_size)); 203 (sizeof(*msg_ctx->msg) + data_size));
204 goto out_unlock; 204 goto out_unlock;
205 } 205 }
@@ -322,7 +322,7 @@ check_list:
322 if (count < total_length) { 322 if (count < total_length) {
323 rc = 0; 323 rc = 0;
324 printk(KERN_WARNING "%s: Only given user buffer of " 324 printk(KERN_WARNING "%s: Only given user buffer of "
325 "size [%Zd], but we need [%Zd] to read the " 325 "size [%zd], but we need [%zd] to read the "
326 "pending message\n", __func__, count, total_length); 326 "pending message\n", __func__, count, total_length);
327 goto out_unlock_msg_ctx; 327 goto out_unlock_msg_ctx;
328 } 328 }
@@ -376,7 +376,7 @@ static int ecryptfs_miscdev_response(char *data, size_t data_size,
376 376
377 if ((sizeof(*msg) + msg->data_len) != data_size) { 377 if ((sizeof(*msg) + msg->data_len) != data_size) {
378 printk(KERN_WARNING "%s: (sizeof(*msg) + msg->data_len) = " 378 printk(KERN_WARNING "%s: (sizeof(*msg) + msg->data_len) = "
379 "[%Zd]; data_size = [%Zd]. Invalid packet.\n", __func__, 379 "[%zd]; data_size = [%zd]. Invalid packet.\n", __func__,
380 (sizeof(*msg) + msg->data_len), data_size); 380 (sizeof(*msg) + msg->data_len), data_size);
381 rc = -EINVAL; 381 rc = -EINVAL;
382 goto out; 382 goto out;
@@ -421,7 +421,7 @@ ecryptfs_miscdev_write(struct file *file, const char __user *buf,
421 data = kmalloc(count, GFP_KERNEL); 421 data = kmalloc(count, GFP_KERNEL);
422 if (!data) { 422 if (!data) {
423 printk(KERN_ERR "%s: Out of memory whilst attempting to " 423 printk(KERN_ERR "%s: Out of memory whilst attempting to "
424 "kmalloc([%Zd], GFP_KERNEL)\n", __func__, count); 424 "kmalloc([%zd], GFP_KERNEL)\n", __func__, count);
425 goto out; 425 goto out;
426 } 426 }
427 rc = copy_from_user(data, buf, count); 427 rc = copy_from_user(data, buf, count);
@@ -436,8 +436,8 @@ ecryptfs_miscdev_write(struct file *file, const char __user *buf,
436 case ECRYPTFS_MSG_RESPONSE: 436 case ECRYPTFS_MSG_RESPONSE:
437 if (count < (1 + 4 + 1 + sizeof(struct ecryptfs_message))) { 437 if (count < (1 + 4 + 1 + sizeof(struct ecryptfs_message))) {
438 printk(KERN_WARNING "%s: Minimum acceptable packet " 438 printk(KERN_WARNING "%s: Minimum acceptable packet "
439 "size is [%Zd], but amount of data written is " 439 "size is [%zd], but amount of data written is "
440 "only [%Zd]. Discarding response packet.\n", 440 "only [%zd]. Discarding response packet.\n",
441 __func__, 441 __func__,
442 (1 + 4 + 1 + sizeof(struct ecryptfs_message)), 442 (1 + 4 + 1 + sizeof(struct ecryptfs_message)),
443 count); 443 count);
@@ -455,9 +455,9 @@ ecryptfs_miscdev_write(struct file *file, const char __user *buf,
455 } 455 }
456 i += packet_size_length; 456 i += packet_size_length;
457 if ((1 + 4 + packet_size_length + packet_size) != count) { 457 if ((1 + 4 + packet_size_length + packet_size) != count) {
458 printk(KERN_WARNING "%s: (1 + packet_size_length([%Zd])" 458 printk(KERN_WARNING "%s: (1 + packet_size_length([%zd])"
459 " + packet_size([%Zd]))([%Zd]) != " 459 " + packet_size([%zd]))([%zd]) != "
460 "count([%Zd]). Invalid packet format.\n", 460 "count([%zd]). Invalid packet format.\n",
461 __func__, packet_size_length, packet_size, 461 __func__, packet_size_length, packet_size,
462 (1 + packet_size_length + packet_size), count); 462 (1 + packet_size_length + packet_size), count);
463 goto out_free; 463 goto out_free;
diff --git a/fs/exec.c b/fs/exec.c
index 9c33f542dc77..71a6efe5d8bd 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -232,13 +232,13 @@ static void flush_arg_page(struct linux_binprm *bprm, unsigned long pos,
232 232
233static int __bprm_mm_init(struct linux_binprm *bprm) 233static int __bprm_mm_init(struct linux_binprm *bprm)
234{ 234{
235 int err = -ENOMEM; 235 int err;
236 struct vm_area_struct *vma = NULL; 236 struct vm_area_struct *vma = NULL;
237 struct mm_struct *mm = bprm->mm; 237 struct mm_struct *mm = bprm->mm;
238 238
239 bprm->vma = vma = kmem_cache_zalloc(vm_area_cachep, GFP_KERNEL); 239 bprm->vma = vma = kmem_cache_zalloc(vm_area_cachep, GFP_KERNEL);
240 if (!vma) 240 if (!vma)
241 goto err; 241 return -ENOMEM;
242 242
243 down_write(&mm->mmap_sem); 243 down_write(&mm->mmap_sem);
244 vma->vm_mm = mm; 244 vma->vm_mm = mm;
@@ -251,28 +251,20 @@ static int __bprm_mm_init(struct linux_binprm *bprm)
251 */ 251 */
252 vma->vm_end = STACK_TOP_MAX; 252 vma->vm_end = STACK_TOP_MAX;
253 vma->vm_start = vma->vm_end - PAGE_SIZE; 253 vma->vm_start = vma->vm_end - PAGE_SIZE;
254
255 vma->vm_flags = VM_STACK_FLAGS; 254 vma->vm_flags = VM_STACK_FLAGS;
256 vma->vm_page_prot = vm_get_page_prot(vma->vm_flags); 255 vma->vm_page_prot = vm_get_page_prot(vma->vm_flags);
257 err = insert_vm_struct(mm, vma); 256 err = insert_vm_struct(mm, vma);
258 if (err) { 257 if (err)
259 up_write(&mm->mmap_sem);
260 goto err; 258 goto err;
261 }
262 259
263 mm->stack_vm = mm->total_vm = 1; 260 mm->stack_vm = mm->total_vm = 1;
264 up_write(&mm->mmap_sem); 261 up_write(&mm->mmap_sem);
265
266 bprm->p = vma->vm_end - sizeof(void *); 262 bprm->p = vma->vm_end - sizeof(void *);
267
268 return 0; 263 return 0;
269
270err: 264err:
271 if (vma) { 265 up_write(&mm->mmap_sem);
272 bprm->vma = NULL; 266 bprm->vma = NULL;
273 kmem_cache_free(vm_area_cachep, vma); 267 kmem_cache_free(vm_area_cachep, vma);
274 }
275
276 return err; 268 return err;
277} 269}
278 270
@@ -1694,7 +1686,7 @@ int get_dumpable(struct mm_struct *mm)
1694 return (ret >= 2) ? 2 : ret; 1686 return (ret >= 2) ? 2 : ret;
1695} 1687}
1696 1688
1697int do_coredump(long signr, int exit_code, struct pt_regs * regs) 1689void do_coredump(long signr, int exit_code, struct pt_regs *regs)
1698{ 1690{
1699 struct core_state core_state; 1691 struct core_state core_state;
1700 char corename[CORENAME_MAX_SIZE + 1]; 1692 char corename[CORENAME_MAX_SIZE + 1];
@@ -1778,6 +1770,11 @@ int do_coredump(long signr, int exit_code, struct pt_regs * regs)
1778 1770
1779 if (ispipe) { 1771 if (ispipe) {
1780 helper_argv = argv_split(GFP_KERNEL, corename+1, &helper_argc); 1772 helper_argv = argv_split(GFP_KERNEL, corename+1, &helper_argc);
1773 if (!helper_argv) {
1774 printk(KERN_WARNING "%s failed to allocate memory\n",
1775 __func__);
1776 goto fail_unlock;
1777 }
1781 /* Terminate the string before the first option */ 1778 /* Terminate the string before the first option */
1782 delimit = strchr(corename, ' '); 1779 delimit = strchr(corename, ' ');
1783 if (delimit) 1780 if (delimit)
@@ -1845,5 +1842,5 @@ fail_unlock:
1845 put_cred(cred); 1842 put_cred(cred);
1846 coredump_finish(mm); 1843 coredump_finish(mm);
1847fail: 1844fail:
1848 return retval; 1845 return;
1849} 1846}
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index b0537c827024..6c46c648430d 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -1225,11 +1225,11 @@ do { \
1225} while (0) 1225} while (0)
1226 1226
1227#ifdef CONFIG_SMP 1227#ifdef CONFIG_SMP
1228/* Each CPU can accumulate FBC_BATCH blocks in their local 1228/* Each CPU can accumulate percpu_counter_batch blocks in their local
1229 * counters. So we need to make sure we have free blocks more 1229 * counters. So we need to make sure we have free blocks more
1230 * than FBC_BATCH * nr_cpu_ids. Also add a window of 4 times. 1230 * than percpu_counter_batch * nr_cpu_ids. Also add a window of 4 times.
1231 */ 1231 */
1232#define EXT4_FREEBLOCKS_WATERMARK (4 * (FBC_BATCH * nr_cpu_ids)) 1232#define EXT4_FREEBLOCKS_WATERMARK (4 * (percpu_counter_batch * nr_cpu_ids))
1233#else 1233#else
1234#define EXT4_FREEBLOCKS_WATERMARK 0 1234#define EXT4_FREEBLOCKS_WATERMARK 0
1235#endif 1235#endif
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 6702a49992a6..98d3fe7057ef 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -2498,7 +2498,7 @@ static int ext4_nonda_switch(struct super_block *sb)
2498 /* 2498 /*
2499 * switch to non delalloc mode if we are running low 2499 * switch to non delalloc mode if we are running low
2500 * on free block. The free block accounting via percpu 2500 * on free block. The free block accounting via percpu
2501 * counters can get slightly wrong with FBC_BATCH getting 2501 * counters can get slightly wrong with percpu_counter_batch getting
2502 * accumulated on each CPU without updating global counters 2502 * accumulated on each CPU without updating global counters
2503 * Delalloc need an accurate free block accounting. So switch 2503 * Delalloc need an accurate free block accounting. So switch
2504 * to non delalloc when we are near to error range. 2504 * to non delalloc when we are near to error range.
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index d0ff0b8cf309..e5eaa62fd17f 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -421,9 +421,6 @@ __writeback_single_inode(struct inode *inode, struct writeback_control *wbc)
421 * If we're a pdlfush thread, then implement pdflush collision avoidance 421 * If we're a pdlfush thread, then implement pdflush collision avoidance
422 * against the entire list. 422 * against the entire list.
423 * 423 *
424 * WB_SYNC_HOLD is a hack for sys_sync(): reattach the inode to sb->s_dirty so
425 * that it can be located for waiting on in __writeback_single_inode().
426 *
427 * If `bdi' is non-zero then we're being asked to writeback a specific queue. 424 * If `bdi' is non-zero then we're being asked to writeback a specific queue.
428 * This function assumes that the blockdev superblock's inodes are backed by 425 * This function assumes that the blockdev superblock's inodes are backed by
429 * a variety of queues, so all inodes are searched. For other superblocks, 426 * a variety of queues, so all inodes are searched. For other superblocks,
@@ -443,6 +440,7 @@ void generic_sync_sb_inodes(struct super_block *sb,
443 struct writeback_control *wbc) 440 struct writeback_control *wbc)
444{ 441{
445 const unsigned long start = jiffies; /* livelock avoidance */ 442 const unsigned long start = jiffies; /* livelock avoidance */
443 int sync = wbc->sync_mode == WB_SYNC_ALL;
446 444
447 spin_lock(&inode_lock); 445 spin_lock(&inode_lock);
448 if (!wbc->for_kupdate || list_empty(&sb->s_io)) 446 if (!wbc->for_kupdate || list_empty(&sb->s_io))
@@ -499,10 +497,6 @@ void generic_sync_sb_inodes(struct super_block *sb,
499 __iget(inode); 497 __iget(inode);
500 pages_skipped = wbc->pages_skipped; 498 pages_skipped = wbc->pages_skipped;
501 __writeback_single_inode(inode, wbc); 499 __writeback_single_inode(inode, wbc);
502 if (wbc->sync_mode == WB_SYNC_HOLD) {
503 inode->dirtied_when = jiffies;
504 list_move(&inode->i_list, &sb->s_dirty);
505 }
506 if (current_is_pdflush()) 500 if (current_is_pdflush())
507 writeback_release(bdi); 501 writeback_release(bdi);
508 if (wbc->pages_skipped != pages_skipped) { 502 if (wbc->pages_skipped != pages_skipped) {
@@ -523,7 +517,49 @@ void generic_sync_sb_inodes(struct super_block *sb,
523 if (!list_empty(&sb->s_more_io)) 517 if (!list_empty(&sb->s_more_io))
524 wbc->more_io = 1; 518 wbc->more_io = 1;
525 } 519 }
526 spin_unlock(&inode_lock); 520
521 if (sync) {
522 struct inode *inode, *old_inode = NULL;
523
524 /*
525 * Data integrity sync. Must wait for all pages under writeback,
526 * because there may have been pages dirtied before our sync
527 * call, but which had writeout started before we write it out.
528 * In which case, the inode may not be on the dirty list, but
529 * we still have to wait for that writeout.
530 */
531 list_for_each_entry(inode, &sb->s_inodes, i_sb_list) {
532 struct address_space *mapping;
533
534 if (inode->i_state & (I_FREEING|I_WILL_FREE))
535 continue;
536 mapping = inode->i_mapping;
537 if (mapping->nrpages == 0)
538 continue;
539 __iget(inode);
540 spin_unlock(&inode_lock);
541 /*
542 * We hold a reference to 'inode' so it couldn't have
543 * been removed from s_inodes list while we dropped the
544 * inode_lock. We cannot iput the inode now as we can
545 * be holding the last reference and we cannot iput it
546 * under inode_lock. So we keep the reference and iput
547 * it later.
548 */
549 iput(old_inode);
550 old_inode = inode;
551
552 filemap_fdatawait(mapping);
553
554 cond_resched();
555
556 spin_lock(&inode_lock);
557 }
558 spin_unlock(&inode_lock);
559 iput(old_inode);
560 } else
561 spin_unlock(&inode_lock);
562
527 return; /* Leave any unwritten inodes on s_io */ 563 return; /* Leave any unwritten inodes on s_io */
528} 564}
529EXPORT_SYMBOL_GPL(generic_sync_sb_inodes); 565EXPORT_SYMBOL_GPL(generic_sync_sb_inodes);
@@ -588,8 +624,7 @@ restart:
588 624
589/* 625/*
590 * writeback and wait upon the filesystem's dirty inodes. The caller will 626 * writeback and wait upon the filesystem's dirty inodes. The caller will
591 * do this in two passes - one to write, and one to wait. WB_SYNC_HOLD is 627 * do this in two passes - one to write, and one to wait.
592 * used to park the written inodes on sb->s_dirty for the wait pass.
593 * 628 *
594 * A finite limit is set on the number of pages which will be written. 629 * A finite limit is set on the number of pages which will be written.
595 * To prevent infinite livelock of sys_sync(). 630 * To prevent infinite livelock of sys_sync().
@@ -600,30 +635,21 @@ restart:
600void sync_inodes_sb(struct super_block *sb, int wait) 635void sync_inodes_sb(struct super_block *sb, int wait)
601{ 636{
602 struct writeback_control wbc = { 637 struct writeback_control wbc = {
603 .sync_mode = wait ? WB_SYNC_ALL : WB_SYNC_HOLD, 638 .sync_mode = wait ? WB_SYNC_ALL : WB_SYNC_NONE,
604 .range_start = 0, 639 .range_start = 0,
605 .range_end = LLONG_MAX, 640 .range_end = LLONG_MAX,
606 }; 641 };
607 unsigned long nr_dirty = global_page_state(NR_FILE_DIRTY);
608 unsigned long nr_unstable = global_page_state(NR_UNSTABLE_NFS);
609 642
610 wbc.nr_to_write = nr_dirty + nr_unstable + 643 if (!wait) {
611 (inodes_stat.nr_inodes - inodes_stat.nr_unused) + 644 unsigned long nr_dirty = global_page_state(NR_FILE_DIRTY);
612 nr_dirty + nr_unstable; 645 unsigned long nr_unstable = global_page_state(NR_UNSTABLE_NFS);
613 wbc.nr_to_write += wbc.nr_to_write / 2; /* Bit more for luck */
614 sync_sb_inodes(sb, &wbc);
615}
616 646
617/* 647 wbc.nr_to_write = nr_dirty + nr_unstable +
618 * Rather lame livelock avoidance. 648 (inodes_stat.nr_inodes - inodes_stat.nr_unused);
619 */ 649 } else
620static void set_sb_syncing(int val) 650 wbc.nr_to_write = LONG_MAX; /* doesn't actually matter */
621{ 651
622 struct super_block *sb; 652 sync_sb_inodes(sb, &wbc);
623 spin_lock(&sb_lock);
624 list_for_each_entry_reverse(sb, &super_blocks, s_list)
625 sb->s_syncing = val;
626 spin_unlock(&sb_lock);
627} 653}
628 654
629/** 655/**
@@ -652,9 +678,6 @@ static void __sync_inodes(int wait)
652 spin_lock(&sb_lock); 678 spin_lock(&sb_lock);
653restart: 679restart:
654 list_for_each_entry(sb, &super_blocks, s_list) { 680 list_for_each_entry(sb, &super_blocks, s_list) {
655 if (sb->s_syncing)
656 continue;
657 sb->s_syncing = 1;
658 sb->s_count++; 681 sb->s_count++;
659 spin_unlock(&sb_lock); 682 spin_unlock(&sb_lock);
660 down_read(&sb->s_umount); 683 down_read(&sb->s_umount);
@@ -672,13 +695,10 @@ restart:
672 695
673void sync_inodes(int wait) 696void sync_inodes(int wait)
674{ 697{
675 set_sb_syncing(0);
676 __sync_inodes(0); 698 __sync_inodes(0);
677 699
678 if (wait) { 700 if (wait)
679 set_sb_syncing(0);
680 __sync_inodes(1); 701 __sync_inodes(1);
681 }
682} 702}
683 703
684/** 704/**
diff --git a/fs/fuse/control.c b/fs/fuse/control.c
index 4f3cab321415..99c99dfb0373 100644
--- a/fs/fuse/control.c
+++ b/fs/fuse/control.c
@@ -1,6 +1,6 @@
1/* 1/*
2 FUSE: Filesystem in Userspace 2 FUSE: Filesystem in Userspace
3 Copyright (C) 2001-2006 Miklos Szeredi <miklos@szeredi.hu> 3 Copyright (C) 2001-2008 Miklos Szeredi <miklos@szeredi.hu>
4 4
5 This program can be distributed under the terms of the GNU GPL. 5 This program can be distributed under the terms of the GNU GPL.
6 See the file COPYING. 6 See the file COPYING.
@@ -48,11 +48,13 @@ static ssize_t fuse_conn_waiting_read(struct file *file, char __user *buf,
48 size_t size; 48 size_t size;
49 49
50 if (!*ppos) { 50 if (!*ppos) {
51 long value;
51 struct fuse_conn *fc = fuse_ctl_file_conn_get(file); 52 struct fuse_conn *fc = fuse_ctl_file_conn_get(file);
52 if (!fc) 53 if (!fc)
53 return 0; 54 return 0;
54 55
55 file->private_data=(void *)(long)atomic_read(&fc->num_waiting); 56 value = atomic_read(&fc->num_waiting);
57 file->private_data = (void *)value;
56 fuse_conn_put(fc); 58 fuse_conn_put(fc);
57 } 59 }
58 size = sprintf(tmp, "%ld\n", (long)file->private_data); 60 size = sprintf(tmp, "%ld\n", (long)file->private_data);
diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c
index fba571648a8e..e0c7ada08a1f 100644
--- a/fs/fuse/dev.c
+++ b/fs/fuse/dev.c
@@ -1,6 +1,6 @@
1/* 1/*
2 FUSE: Filesystem in Userspace 2 FUSE: Filesystem in Userspace
3 Copyright (C) 2001-2006 Miklos Szeredi <miklos@szeredi.hu> 3 Copyright (C) 2001-2008 Miklos Szeredi <miklos@szeredi.hu>
4 4
5 This program can be distributed under the terms of the GNU GPL. 5 This program can be distributed under the terms of the GNU GPL.
6 See the file COPYING. 6 See the file COPYING.
@@ -269,7 +269,7 @@ static void flush_bg_queue(struct fuse_conn *fc)
269 * Called with fc->lock, unlocks it 269 * Called with fc->lock, unlocks it
270 */ 270 */
271static void request_end(struct fuse_conn *fc, struct fuse_req *req) 271static void request_end(struct fuse_conn *fc, struct fuse_req *req)
272 __releases(fc->lock) 272__releases(&fc->lock)
273{ 273{
274 void (*end) (struct fuse_conn *, struct fuse_req *) = req->end; 274 void (*end) (struct fuse_conn *, struct fuse_req *) = req->end;
275 req->end = NULL; 275 req->end = NULL;
@@ -293,13 +293,13 @@ static void request_end(struct fuse_conn *fc, struct fuse_req *req)
293 wake_up(&req->waitq); 293 wake_up(&req->waitq);
294 if (end) 294 if (end)
295 end(fc, req); 295 end(fc, req);
296 else 296 fuse_put_request(fc, req);
297 fuse_put_request(fc, req);
298} 297}
299 298
300static void wait_answer_interruptible(struct fuse_conn *fc, 299static void wait_answer_interruptible(struct fuse_conn *fc,
301 struct fuse_req *req) 300 struct fuse_req *req)
302 __releases(fc->lock) __acquires(fc->lock) 301__releases(&fc->lock)
302__acquires(&fc->lock)
303{ 303{
304 if (signal_pending(current)) 304 if (signal_pending(current))
305 return; 305 return;
@@ -317,7 +317,8 @@ static void queue_interrupt(struct fuse_conn *fc, struct fuse_req *req)
317} 317}
318 318
319static void request_wait_answer(struct fuse_conn *fc, struct fuse_req *req) 319static void request_wait_answer(struct fuse_conn *fc, struct fuse_req *req)
320 __releases(fc->lock) __acquires(fc->lock) 320__releases(&fc->lock)
321__acquires(&fc->lock)
321{ 322{
322 if (!fc->no_interrupt) { 323 if (!fc->no_interrupt) {
323 /* Any signal may interrupt this */ 324 /* Any signal may interrupt this */
@@ -380,7 +381,7 @@ static void request_wait_answer(struct fuse_conn *fc, struct fuse_req *req)
380 } 381 }
381} 382}
382 383
383void request_send(struct fuse_conn *fc, struct fuse_req *req) 384void fuse_request_send(struct fuse_conn *fc, struct fuse_req *req)
384{ 385{
385 req->isreply = 1; 386 req->isreply = 1;
386 spin_lock(&fc->lock); 387 spin_lock(&fc->lock);
@@ -399,8 +400,8 @@ void request_send(struct fuse_conn *fc, struct fuse_req *req)
399 spin_unlock(&fc->lock); 400 spin_unlock(&fc->lock);
400} 401}
401 402
402static void request_send_nowait_locked(struct fuse_conn *fc, 403static void fuse_request_send_nowait_locked(struct fuse_conn *fc,
403 struct fuse_req *req) 404 struct fuse_req *req)
404{ 405{
405 req->background = 1; 406 req->background = 1;
406 fc->num_background++; 407 fc->num_background++;
@@ -414,11 +415,11 @@ static void request_send_nowait_locked(struct fuse_conn *fc,
414 flush_bg_queue(fc); 415 flush_bg_queue(fc);
415} 416}
416 417
417static void request_send_nowait(struct fuse_conn *fc, struct fuse_req *req) 418static void fuse_request_send_nowait(struct fuse_conn *fc, struct fuse_req *req)
418{ 419{
419 spin_lock(&fc->lock); 420 spin_lock(&fc->lock);
420 if (fc->connected) { 421 if (fc->connected) {
421 request_send_nowait_locked(fc, req); 422 fuse_request_send_nowait_locked(fc, req);
422 spin_unlock(&fc->lock); 423 spin_unlock(&fc->lock);
423 } else { 424 } else {
424 req->out.h.error = -ENOTCONN; 425 req->out.h.error = -ENOTCONN;
@@ -426,16 +427,16 @@ static void request_send_nowait(struct fuse_conn *fc, struct fuse_req *req)
426 } 427 }
427} 428}
428 429
429void request_send_noreply(struct fuse_conn *fc, struct fuse_req *req) 430void fuse_request_send_noreply(struct fuse_conn *fc, struct fuse_req *req)
430{ 431{
431 req->isreply = 0; 432 req->isreply = 0;
432 request_send_nowait(fc, req); 433 fuse_request_send_nowait(fc, req);
433} 434}
434 435
435void request_send_background(struct fuse_conn *fc, struct fuse_req *req) 436void fuse_request_send_background(struct fuse_conn *fc, struct fuse_req *req)
436{ 437{
437 req->isreply = 1; 438 req->isreply = 1;
438 request_send_nowait(fc, req); 439 fuse_request_send_nowait(fc, req);
439} 440}
440 441
441/* 442/*
@@ -443,10 +444,11 @@ void request_send_background(struct fuse_conn *fc, struct fuse_req *req)
443 * 444 *
444 * fc->connected must have been checked previously 445 * fc->connected must have been checked previously
445 */ 446 */
446void request_send_background_locked(struct fuse_conn *fc, struct fuse_req *req) 447void fuse_request_send_background_locked(struct fuse_conn *fc,
448 struct fuse_req *req)
447{ 449{
448 req->isreply = 1; 450 req->isreply = 1;
449 request_send_nowait_locked(fc, req); 451 fuse_request_send_nowait_locked(fc, req);
450} 452}
451 453
452/* 454/*
@@ -539,8 +541,8 @@ static int fuse_copy_fill(struct fuse_copy_state *cs)
539 BUG_ON(!cs->nr_segs); 541 BUG_ON(!cs->nr_segs);
540 cs->seglen = cs->iov[0].iov_len; 542 cs->seglen = cs->iov[0].iov_len;
541 cs->addr = (unsigned long) cs->iov[0].iov_base; 543 cs->addr = (unsigned long) cs->iov[0].iov_base;
542 cs->iov ++; 544 cs->iov++;
543 cs->nr_segs --; 545 cs->nr_segs--;
544 } 546 }
545 down_read(&current->mm->mmap_sem); 547 down_read(&current->mm->mmap_sem);
546 err = get_user_pages(current, current->mm, cs->addr, 1, cs->write, 0, 548 err = get_user_pages(current, current->mm, cs->addr, 1, cs->write, 0,
@@ -589,9 +591,11 @@ static int fuse_copy_page(struct fuse_copy_state *cs, struct page *page,
589 kunmap_atomic(mapaddr, KM_USER1); 591 kunmap_atomic(mapaddr, KM_USER1);
590 } 592 }
591 while (count) { 593 while (count) {
592 int err; 594 if (!cs->len) {
593 if (!cs->len && (err = fuse_copy_fill(cs))) 595 int err = fuse_copy_fill(cs);
594 return err; 596 if (err)
597 return err;
598 }
595 if (page) { 599 if (page) {
596 void *mapaddr = kmap_atomic(page, KM_USER1); 600 void *mapaddr = kmap_atomic(page, KM_USER1);
597 void *buf = mapaddr + offset; 601 void *buf = mapaddr + offset;
@@ -631,9 +635,11 @@ static int fuse_copy_pages(struct fuse_copy_state *cs, unsigned nbytes,
631static int fuse_copy_one(struct fuse_copy_state *cs, void *val, unsigned size) 635static int fuse_copy_one(struct fuse_copy_state *cs, void *val, unsigned size)
632{ 636{
633 while (size) { 637 while (size) {
634 int err; 638 if (!cs->len) {
635 if (!cs->len && (err = fuse_copy_fill(cs))) 639 int err = fuse_copy_fill(cs);
636 return err; 640 if (err)
641 return err;
642 }
637 fuse_copy_do(cs, &val, &size); 643 fuse_copy_do(cs, &val, &size);
638 } 644 }
639 return 0; 645 return 0;
@@ -664,6 +670,8 @@ static int request_pending(struct fuse_conn *fc)
664 670
665/* Wait until a request is available on the pending list */ 671/* Wait until a request is available on the pending list */
666static void request_wait(struct fuse_conn *fc) 672static void request_wait(struct fuse_conn *fc)
673__releases(&fc->lock)
674__acquires(&fc->lock)
667{ 675{
668 DECLARE_WAITQUEUE(wait, current); 676 DECLARE_WAITQUEUE(wait, current);
669 677
@@ -691,7 +699,7 @@ static void request_wait(struct fuse_conn *fc)
691 */ 699 */
692static int fuse_read_interrupt(struct fuse_conn *fc, struct fuse_req *req, 700static int fuse_read_interrupt(struct fuse_conn *fc, struct fuse_req *req,
693 const struct iovec *iov, unsigned long nr_segs) 701 const struct iovec *iov, unsigned long nr_segs)
694 __releases(fc->lock) 702__releases(&fc->lock)
695{ 703{
696 struct fuse_copy_state cs; 704 struct fuse_copy_state cs;
697 struct fuse_in_header ih; 705 struct fuse_in_header ih;
@@ -813,6 +821,34 @@ static ssize_t fuse_dev_read(struct kiocb *iocb, const struct iovec *iov,
813 return err; 821 return err;
814} 822}
815 823
824static int fuse_notify_poll(struct fuse_conn *fc, unsigned int size,
825 struct fuse_copy_state *cs)
826{
827 struct fuse_notify_poll_wakeup_out outarg;
828 int err;
829
830 if (size != sizeof(outarg))
831 return -EINVAL;
832
833 err = fuse_copy_one(cs, &outarg, sizeof(outarg));
834 if (err)
835 return err;
836
837 return fuse_notify_poll_wakeup(fc, &outarg);
838}
839
840static int fuse_notify(struct fuse_conn *fc, enum fuse_notify_code code,
841 unsigned int size, struct fuse_copy_state *cs)
842{
843 switch (code) {
844 case FUSE_NOTIFY_POLL:
845 return fuse_notify_poll(fc, size, cs);
846
847 default:
848 return -EINVAL;
849 }
850}
851
816/* Look up request on processing list by unique ID */ 852/* Look up request on processing list by unique ID */
817static struct fuse_req *request_find(struct fuse_conn *fc, u64 unique) 853static struct fuse_req *request_find(struct fuse_conn *fc, u64 unique)
818{ 854{
@@ -876,9 +912,23 @@ static ssize_t fuse_dev_write(struct kiocb *iocb, const struct iovec *iov,
876 err = fuse_copy_one(&cs, &oh, sizeof(oh)); 912 err = fuse_copy_one(&cs, &oh, sizeof(oh));
877 if (err) 913 if (err)
878 goto err_finish; 914 goto err_finish;
915
916 err = -EINVAL;
917 if (oh.len != nbytes)
918 goto err_finish;
919
920 /*
921 * Zero oh.unique indicates unsolicited notification message
922 * and error contains notification code.
923 */
924 if (!oh.unique) {
925 err = fuse_notify(fc, oh.error, nbytes - sizeof(oh), &cs);
926 fuse_copy_finish(&cs);
927 return err ? err : nbytes;
928 }
929
879 err = -EINVAL; 930 err = -EINVAL;
880 if (!oh.unique || oh.error <= -1000 || oh.error > 0 || 931 if (oh.error <= -1000 || oh.error > 0)
881 oh.len != nbytes)
882 goto err_finish; 932 goto err_finish;
883 933
884 spin_lock(&fc->lock); 934 spin_lock(&fc->lock);
@@ -966,6 +1016,8 @@ static unsigned fuse_dev_poll(struct file *file, poll_table *wait)
966 * This function releases and reacquires fc->lock 1016 * This function releases and reacquires fc->lock
967 */ 1017 */
968static void end_requests(struct fuse_conn *fc, struct list_head *head) 1018static void end_requests(struct fuse_conn *fc, struct list_head *head)
1019__releases(&fc->lock)
1020__acquires(&fc->lock)
969{ 1021{
970 while (!list_empty(head)) { 1022 while (!list_empty(head)) {
971 struct fuse_req *req; 1023 struct fuse_req *req;
@@ -988,7 +1040,8 @@ static void end_requests(struct fuse_conn *fc, struct list_head *head)
988 * locked). 1040 * locked).
989 */ 1041 */
990static void end_io_requests(struct fuse_conn *fc) 1042static void end_io_requests(struct fuse_conn *fc)
991 __releases(fc->lock) __acquires(fc->lock) 1043__releases(&fc->lock)
1044__acquires(&fc->lock)
992{ 1045{
993 while (!list_empty(&fc->io)) { 1046 while (!list_empty(&fc->io)) {
994 struct fuse_req *req = 1047 struct fuse_req *req =
@@ -1002,11 +1055,11 @@ static void end_io_requests(struct fuse_conn *fc)
1002 wake_up(&req->waitq); 1055 wake_up(&req->waitq);
1003 if (end) { 1056 if (end) {
1004 req->end = NULL; 1057 req->end = NULL;
1005 /* The end function will consume this reference */
1006 __fuse_get_request(req); 1058 __fuse_get_request(req);
1007 spin_unlock(&fc->lock); 1059 spin_unlock(&fc->lock);
1008 wait_event(req->waitq, !req->locked); 1060 wait_event(req->waitq, !req->locked);
1009 end(fc, req); 1061 end(fc, req);
1062 fuse_put_request(fc, req);
1010 spin_lock(&fc->lock); 1063 spin_lock(&fc->lock);
1011 } 1064 }
1012 } 1065 }
diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c
index 95bc22bdd060..fdff346e96fd 100644
--- a/fs/fuse/dir.c
+++ b/fs/fuse/dir.c
@@ -1,6 +1,6 @@
1/* 1/*
2 FUSE: Filesystem in Userspace 2 FUSE: Filesystem in Userspace
3 Copyright (C) 2001-2006 Miklos Szeredi <miklos@szeredi.hu> 3 Copyright (C) 2001-2008 Miklos Szeredi <miklos@szeredi.hu>
4 4
5 This program can be distributed under the terms of the GNU GPL. 5 This program can be distributed under the terms of the GNU GPL.
6 See the file COPYING. 6 See the file COPYING.
@@ -189,7 +189,7 @@ static int fuse_dentry_revalidate(struct dentry *entry, struct nameidata *nd)
189 parent = dget_parent(entry); 189 parent = dget_parent(entry);
190 fuse_lookup_init(fc, req, get_node_id(parent->d_inode), 190 fuse_lookup_init(fc, req, get_node_id(parent->d_inode),
191 &entry->d_name, &outarg); 191 &entry->d_name, &outarg);
192 request_send(fc, req); 192 fuse_request_send(fc, req);
193 dput(parent); 193 dput(parent);
194 err = req->out.h.error; 194 err = req->out.h.error;
195 fuse_put_request(fc, req); 195 fuse_put_request(fc, req);
@@ -204,7 +204,7 @@ static int fuse_dentry_revalidate(struct dentry *entry, struct nameidata *nd)
204 return 0; 204 return 0;
205 } 205 }
206 spin_lock(&fc->lock); 206 spin_lock(&fc->lock);
207 fi->nlookup ++; 207 fi->nlookup++;
208 spin_unlock(&fc->lock); 208 spin_unlock(&fc->lock);
209 } 209 }
210 fuse_put_request(fc, forget_req); 210 fuse_put_request(fc, forget_req);
@@ -283,7 +283,7 @@ int fuse_lookup_name(struct super_block *sb, u64 nodeid, struct qstr *name,
283 attr_version = fuse_get_attr_version(fc); 283 attr_version = fuse_get_attr_version(fc);
284 284
285 fuse_lookup_init(fc, req, nodeid, name, outarg); 285 fuse_lookup_init(fc, req, nodeid, name, outarg);
286 request_send(fc, req); 286 fuse_request_send(fc, req);
287 err = req->out.h.error; 287 err = req->out.h.error;
288 fuse_put_request(fc, req); 288 fuse_put_request(fc, req);
289 /* Zero nodeid is same as -ENOENT, but with valid timeout */ 289 /* Zero nodeid is same as -ENOENT, but with valid timeout */
@@ -369,7 +369,7 @@ static void fuse_sync_release(struct fuse_conn *fc, struct fuse_file *ff,
369{ 369{
370 fuse_release_fill(ff, nodeid, flags, FUSE_RELEASE); 370 fuse_release_fill(ff, nodeid, flags, FUSE_RELEASE);
371 ff->reserved_req->force = 1; 371 ff->reserved_req->force = 1;
372 request_send(fc, ff->reserved_req); 372 fuse_request_send(fc, ff->reserved_req);
373 fuse_put_request(fc, ff->reserved_req); 373 fuse_put_request(fc, ff->reserved_req);
374 kfree(ff); 374 kfree(ff);
375} 375}
@@ -408,7 +408,7 @@ static int fuse_create_open(struct inode *dir, struct dentry *entry, int mode,
408 goto out_put_forget_req; 408 goto out_put_forget_req;
409 409
410 err = -ENOMEM; 410 err = -ENOMEM;
411 ff = fuse_file_alloc(); 411 ff = fuse_file_alloc(fc);
412 if (!ff) 412 if (!ff)
413 goto out_put_request; 413 goto out_put_request;
414 414
@@ -432,7 +432,7 @@ static int fuse_create_open(struct inode *dir, struct dentry *entry, int mode,
432 req->out.args[0].value = &outentry; 432 req->out.args[0].value = &outentry;
433 req->out.args[1].size = sizeof(outopen); 433 req->out.args[1].size = sizeof(outopen);
434 req->out.args[1].value = &outopen; 434 req->out.args[1].value = &outopen;
435 request_send(fc, req); 435 fuse_request_send(fc, req);
436 err = req->out.h.error; 436 err = req->out.h.error;
437 if (err) { 437 if (err) {
438 if (err == -ENOSYS) 438 if (err == -ENOSYS)
@@ -502,7 +502,7 @@ static int create_new_entry(struct fuse_conn *fc, struct fuse_req *req,
502 else 502 else
503 req->out.args[0].size = sizeof(outarg); 503 req->out.args[0].size = sizeof(outarg);
504 req->out.args[0].value = &outarg; 504 req->out.args[0].value = &outarg;
505 request_send(fc, req); 505 fuse_request_send(fc, req);
506 err = req->out.h.error; 506 err = req->out.h.error;
507 fuse_put_request(fc, req); 507 fuse_put_request(fc, req);
508 if (err) 508 if (err)
@@ -631,15 +631,17 @@ static int fuse_unlink(struct inode *dir, struct dentry *entry)
631 req->in.numargs = 1; 631 req->in.numargs = 1;
632 req->in.args[0].size = entry->d_name.len + 1; 632 req->in.args[0].size = entry->d_name.len + 1;
633 req->in.args[0].value = entry->d_name.name; 633 req->in.args[0].value = entry->d_name.name;
634 request_send(fc, req); 634 fuse_request_send(fc, req);
635 err = req->out.h.error; 635 err = req->out.h.error;
636 fuse_put_request(fc, req); 636 fuse_put_request(fc, req);
637 if (!err) { 637 if (!err) {
638 struct inode *inode = entry->d_inode; 638 struct inode *inode = entry->d_inode;
639 639
640 /* Set nlink to zero so the inode can be cleared, if 640 /*
641 the inode does have more links this will be 641 * Set nlink to zero so the inode can be cleared, if the inode
642 discovered at the next lookup/getattr */ 642 * does have more links this will be discovered at the next
643 * lookup/getattr.
644 */
643 clear_nlink(inode); 645 clear_nlink(inode);
644 fuse_invalidate_attr(inode); 646 fuse_invalidate_attr(inode);
645 fuse_invalidate_attr(dir); 647 fuse_invalidate_attr(dir);
@@ -662,7 +664,7 @@ static int fuse_rmdir(struct inode *dir, struct dentry *entry)
662 req->in.numargs = 1; 664 req->in.numargs = 1;
663 req->in.args[0].size = entry->d_name.len + 1; 665 req->in.args[0].size = entry->d_name.len + 1;
664 req->in.args[0].value = entry->d_name.name; 666 req->in.args[0].value = entry->d_name.name;
665 request_send(fc, req); 667 fuse_request_send(fc, req);
666 err = req->out.h.error; 668 err = req->out.h.error;
667 fuse_put_request(fc, req); 669 fuse_put_request(fc, req);
668 if (!err) { 670 if (!err) {
@@ -695,7 +697,7 @@ static int fuse_rename(struct inode *olddir, struct dentry *oldent,
695 req->in.args[1].value = oldent->d_name.name; 697 req->in.args[1].value = oldent->d_name.name;
696 req->in.args[2].size = newent->d_name.len + 1; 698 req->in.args[2].size = newent->d_name.len + 1;
697 req->in.args[2].value = newent->d_name.name; 699 req->in.args[2].value = newent->d_name.name;
698 request_send(fc, req); 700 fuse_request_send(fc, req);
699 err = req->out.h.error; 701 err = req->out.h.error;
700 fuse_put_request(fc, req); 702 fuse_put_request(fc, req);
701 if (!err) { 703 if (!err) {
@@ -811,7 +813,7 @@ static int fuse_do_getattr(struct inode *inode, struct kstat *stat,
811 else 813 else
812 req->out.args[0].size = sizeof(outarg); 814 req->out.args[0].size = sizeof(outarg);
813 req->out.args[0].value = &outarg; 815 req->out.args[0].value = &outarg;
814 request_send(fc, req); 816 fuse_request_send(fc, req);
815 err = req->out.h.error; 817 err = req->out.h.error;
816 fuse_put_request(fc, req); 818 fuse_put_request(fc, req);
817 if (!err) { 819 if (!err) {
@@ -911,7 +913,7 @@ static int fuse_access(struct inode *inode, int mask)
911 req->in.numargs = 1; 913 req->in.numargs = 1;
912 req->in.args[0].size = sizeof(inarg); 914 req->in.args[0].size = sizeof(inarg);
913 req->in.args[0].value = &inarg; 915 req->in.args[0].value = &inarg;
914 request_send(fc, req); 916 fuse_request_send(fc, req);
915 err = req->out.h.error; 917 err = req->out.h.error;
916 fuse_put_request(fc, req); 918 fuse_put_request(fc, req);
917 if (err == -ENOSYS) { 919 if (err == -ENOSYS) {
@@ -1033,7 +1035,7 @@ static int fuse_readdir(struct file *file, void *dstbuf, filldir_t filldir)
1033 req->num_pages = 1; 1035 req->num_pages = 1;
1034 req->pages[0] = page; 1036 req->pages[0] = page;
1035 fuse_read_fill(req, file, inode, file->f_pos, PAGE_SIZE, FUSE_READDIR); 1037 fuse_read_fill(req, file, inode, file->f_pos, PAGE_SIZE, FUSE_READDIR);
1036 request_send(fc, req); 1038 fuse_request_send(fc, req);
1037 nbytes = req->out.args[0].size; 1039 nbytes = req->out.args[0].size;
1038 err = req->out.h.error; 1040 err = req->out.h.error;
1039 fuse_put_request(fc, req); 1041 fuse_put_request(fc, req);
@@ -1067,7 +1069,7 @@ static char *read_link(struct dentry *dentry)
1067 req->out.numargs = 1; 1069 req->out.numargs = 1;
1068 req->out.args[0].size = PAGE_SIZE - 1; 1070 req->out.args[0].size = PAGE_SIZE - 1;
1069 req->out.args[0].value = link; 1071 req->out.args[0].value = link;
1070 request_send(fc, req); 1072 fuse_request_send(fc, req);
1071 if (req->out.h.error) { 1073 if (req->out.h.error) {
1072 free_page((unsigned long) link); 1074 free_page((unsigned long) link);
1073 link = ERR_PTR(req->out.h.error); 1075 link = ERR_PTR(req->out.h.error);
@@ -1273,7 +1275,7 @@ static int fuse_do_setattr(struct dentry *entry, struct iattr *attr,
1273 else 1275 else
1274 req->out.args[0].size = sizeof(outarg); 1276 req->out.args[0].size = sizeof(outarg);
1275 req->out.args[0].value = &outarg; 1277 req->out.args[0].value = &outarg;
1276 request_send(fc, req); 1278 fuse_request_send(fc, req);
1277 err = req->out.h.error; 1279 err = req->out.h.error;
1278 fuse_put_request(fc, req); 1280 fuse_put_request(fc, req);
1279 if (err) { 1281 if (err) {
@@ -1367,7 +1369,7 @@ static int fuse_setxattr(struct dentry *entry, const char *name,
1367 req->in.args[1].value = name; 1369 req->in.args[1].value = name;
1368 req->in.args[2].size = size; 1370 req->in.args[2].size = size;
1369 req->in.args[2].value = value; 1371 req->in.args[2].value = value;
1370 request_send(fc, req); 1372 fuse_request_send(fc, req);
1371 err = req->out.h.error; 1373 err = req->out.h.error;
1372 fuse_put_request(fc, req); 1374 fuse_put_request(fc, req);
1373 if (err == -ENOSYS) { 1375 if (err == -ENOSYS) {
@@ -1413,7 +1415,7 @@ static ssize_t fuse_getxattr(struct dentry *entry, const char *name,
1413 req->out.args[0].size = sizeof(outarg); 1415 req->out.args[0].size = sizeof(outarg);
1414 req->out.args[0].value = &outarg; 1416 req->out.args[0].value = &outarg;
1415 } 1417 }
1416 request_send(fc, req); 1418 fuse_request_send(fc, req);
1417 ret = req->out.h.error; 1419 ret = req->out.h.error;
1418 if (!ret) 1420 if (!ret)
1419 ret = size ? req->out.args[0].size : outarg.size; 1421 ret = size ? req->out.args[0].size : outarg.size;
@@ -1463,7 +1465,7 @@ static ssize_t fuse_listxattr(struct dentry *entry, char *list, size_t size)
1463 req->out.args[0].size = sizeof(outarg); 1465 req->out.args[0].size = sizeof(outarg);
1464 req->out.args[0].value = &outarg; 1466 req->out.args[0].value = &outarg;
1465 } 1467 }
1466 request_send(fc, req); 1468 fuse_request_send(fc, req);
1467 ret = req->out.h.error; 1469 ret = req->out.h.error;
1468 if (!ret) 1470 if (!ret)
1469 ret = size ? req->out.args[0].size : outarg.size; 1471 ret = size ? req->out.args[0].size : outarg.size;
@@ -1496,7 +1498,7 @@ static int fuse_removexattr(struct dentry *entry, const char *name)
1496 req->in.numargs = 1; 1498 req->in.numargs = 1;
1497 req->in.args[0].size = strlen(name) + 1; 1499 req->in.args[0].size = strlen(name) + 1;
1498 req->in.args[0].value = name; 1500 req->in.args[0].value = name;
1499 request_send(fc, req); 1501 fuse_request_send(fc, req);
1500 err = req->out.h.error; 1502 err = req->out.h.error;
1501 fuse_put_request(fc, req); 1503 fuse_put_request(fc, req);
1502 if (err == -ENOSYS) { 1504 if (err == -ENOSYS) {
diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index 4c9ee7011265..e8162646a9b5 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -1,6 +1,6 @@
1/* 1/*
2 FUSE: Filesystem in Userspace 2 FUSE: Filesystem in Userspace
3 Copyright (C) 2001-2006 Miklos Szeredi <miklos@szeredi.hu> 3 Copyright (C) 2001-2008 Miklos Szeredi <miklos@szeredi.hu>
4 4
5 This program can be distributed under the terms of the GNU GPL. 5 This program can be distributed under the terms of the GNU GPL.
6 See the file COPYING. 6 See the file COPYING.
@@ -39,14 +39,14 @@ static int fuse_send_open(struct inode *inode, struct file *file, int isdir,
39 req->out.numargs = 1; 39 req->out.numargs = 1;
40 req->out.args[0].size = sizeof(*outargp); 40 req->out.args[0].size = sizeof(*outargp);
41 req->out.args[0].value = outargp; 41 req->out.args[0].value = outargp;
42 request_send(fc, req); 42 fuse_request_send(fc, req);
43 err = req->out.h.error; 43 err = req->out.h.error;
44 fuse_put_request(fc, req); 44 fuse_put_request(fc, req);
45 45
46 return err; 46 return err;
47} 47}
48 48
49struct fuse_file *fuse_file_alloc(void) 49struct fuse_file *fuse_file_alloc(struct fuse_conn *fc)
50{ 50{
51 struct fuse_file *ff; 51 struct fuse_file *ff;
52 ff = kmalloc(sizeof(struct fuse_file), GFP_KERNEL); 52 ff = kmalloc(sizeof(struct fuse_file), GFP_KERNEL);
@@ -58,7 +58,12 @@ struct fuse_file *fuse_file_alloc(void)
58 } else { 58 } else {
59 INIT_LIST_HEAD(&ff->write_entry); 59 INIT_LIST_HEAD(&ff->write_entry);
60 atomic_set(&ff->count, 0); 60 atomic_set(&ff->count, 0);
61 spin_lock(&fc->lock);
62 ff->kh = ++fc->khctr;
63 spin_unlock(&fc->lock);
61 } 64 }
65 RB_CLEAR_NODE(&ff->polled_node);
66 init_waitqueue_head(&ff->poll_wait);
62 } 67 }
63 return ff; 68 return ff;
64} 69}
@@ -79,7 +84,6 @@ static void fuse_release_end(struct fuse_conn *fc, struct fuse_req *req)
79{ 84{
80 dput(req->misc.release.dentry); 85 dput(req->misc.release.dentry);
81 mntput(req->misc.release.vfsmount); 86 mntput(req->misc.release.vfsmount);
82 fuse_put_request(fc, req);
83} 87}
84 88
85static void fuse_file_put(struct fuse_file *ff) 89static void fuse_file_put(struct fuse_file *ff)
@@ -89,7 +93,7 @@ static void fuse_file_put(struct fuse_file *ff)
89 struct inode *inode = req->misc.release.dentry->d_inode; 93 struct inode *inode = req->misc.release.dentry->d_inode;
90 struct fuse_conn *fc = get_fuse_conn(inode); 94 struct fuse_conn *fc = get_fuse_conn(inode);
91 req->end = fuse_release_end; 95 req->end = fuse_release_end;
92 request_send_background(fc, req); 96 fuse_request_send_background(fc, req);
93 kfree(ff); 97 kfree(ff);
94 } 98 }
95} 99}
@@ -109,6 +113,7 @@ void fuse_finish_open(struct inode *inode, struct file *file,
109 113
110int fuse_open_common(struct inode *inode, struct file *file, int isdir) 114int fuse_open_common(struct inode *inode, struct file *file, int isdir)
111{ 115{
116 struct fuse_conn *fc = get_fuse_conn(inode);
112 struct fuse_open_out outarg; 117 struct fuse_open_out outarg;
113 struct fuse_file *ff; 118 struct fuse_file *ff;
114 int err; 119 int err;
@@ -121,7 +126,7 @@ int fuse_open_common(struct inode *inode, struct file *file, int isdir)
121 if (err) 126 if (err)
122 return err; 127 return err;
123 128
124 ff = fuse_file_alloc(); 129 ff = fuse_file_alloc(fc);
125 if (!ff) 130 if (!ff)
126 return -ENOMEM; 131 return -ENOMEM;
127 132
@@ -167,7 +172,11 @@ int fuse_release_common(struct inode *inode, struct file *file, int isdir)
167 172
168 spin_lock(&fc->lock); 173 spin_lock(&fc->lock);
169 list_del(&ff->write_entry); 174 list_del(&ff->write_entry);
175 if (!RB_EMPTY_NODE(&ff->polled_node))
176 rb_erase(&ff->polled_node, &fc->polled_files);
170 spin_unlock(&fc->lock); 177 spin_unlock(&fc->lock);
178
179 wake_up_interruptible_sync(&ff->poll_wait);
171 /* 180 /*
172 * Normally this will send the RELEASE request, 181 * Normally this will send the RELEASE request,
173 * however if some asynchronous READ or WRITE requests 182 * however if some asynchronous READ or WRITE requests
@@ -280,7 +289,7 @@ static int fuse_flush(struct file *file, fl_owner_t id)
280 req->in.args[0].size = sizeof(inarg); 289 req->in.args[0].size = sizeof(inarg);
281 req->in.args[0].value = &inarg; 290 req->in.args[0].value = &inarg;
282 req->force = 1; 291 req->force = 1;
283 request_send(fc, req); 292 fuse_request_send(fc, req);
284 err = req->out.h.error; 293 err = req->out.h.error;
285 fuse_put_request(fc, req); 294 fuse_put_request(fc, req);
286 if (err == -ENOSYS) { 295 if (err == -ENOSYS) {
@@ -344,7 +353,7 @@ int fuse_fsync_common(struct file *file, struct dentry *de, int datasync,
344 req->in.numargs = 1; 353 req->in.numargs = 1;
345 req->in.args[0].size = sizeof(inarg); 354 req->in.args[0].size = sizeof(inarg);
346 req->in.args[0].value = &inarg; 355 req->in.args[0].value = &inarg;
347 request_send(fc, req); 356 fuse_request_send(fc, req);
348 err = req->out.h.error; 357 err = req->out.h.error;
349 fuse_put_request(fc, req); 358 fuse_put_request(fc, req);
350 if (err == -ENOSYS) { 359 if (err == -ENOSYS) {
@@ -396,7 +405,7 @@ static size_t fuse_send_read(struct fuse_req *req, struct file *file,
396 inarg->read_flags |= FUSE_READ_LOCKOWNER; 405 inarg->read_flags |= FUSE_READ_LOCKOWNER;
397 inarg->lock_owner = fuse_lock_owner_id(fc, owner); 406 inarg->lock_owner = fuse_lock_owner_id(fc, owner);
398 } 407 }
399 request_send(fc, req); 408 fuse_request_send(fc, req);
400 return req->out.args[0].size; 409 return req->out.args[0].size;
401} 410}
402 411
@@ -493,7 +502,6 @@ static void fuse_readpages_end(struct fuse_conn *fc, struct fuse_req *req)
493 } 502 }
494 if (req->ff) 503 if (req->ff)
495 fuse_file_put(req->ff); 504 fuse_file_put(req->ff);
496 fuse_put_request(fc, req);
497} 505}
498 506
499static void fuse_send_readpages(struct fuse_req *req, struct file *file, 507static void fuse_send_readpages(struct fuse_req *req, struct file *file,
@@ -509,10 +517,11 @@ static void fuse_send_readpages(struct fuse_req *req, struct file *file,
509 struct fuse_file *ff = file->private_data; 517 struct fuse_file *ff = file->private_data;
510 req->ff = fuse_file_get(ff); 518 req->ff = fuse_file_get(ff);
511 req->end = fuse_readpages_end; 519 req->end = fuse_readpages_end;
512 request_send_background(fc, req); 520 fuse_request_send_background(fc, req);
513 } else { 521 } else {
514 request_send(fc, req); 522 fuse_request_send(fc, req);
515 fuse_readpages_end(fc, req); 523 fuse_readpages_end(fc, req);
524 fuse_put_request(fc, req);
516 } 525 }
517} 526}
518 527
@@ -543,7 +552,7 @@ static int fuse_readpages_fill(void *_data, struct page *page)
543 } 552 }
544 } 553 }
545 req->pages[req->num_pages] = page; 554 req->pages[req->num_pages] = page;
546 req->num_pages ++; 555 req->num_pages++;
547 return 0; 556 return 0;
548} 557}
549 558
@@ -636,7 +645,7 @@ static size_t fuse_send_write(struct fuse_req *req, struct file *file,
636 inarg->write_flags |= FUSE_WRITE_LOCKOWNER; 645 inarg->write_flags |= FUSE_WRITE_LOCKOWNER;
637 inarg->lock_owner = fuse_lock_owner_id(fc, owner); 646 inarg->lock_owner = fuse_lock_owner_id(fc, owner);
638 } 647 }
639 request_send(fc, req); 648 fuse_request_send(fc, req);
640 return req->misc.write.out.size; 649 return req->misc.write.out.size;
641} 650}
642 651
@@ -1042,7 +1051,6 @@ static void fuse_writepage_free(struct fuse_conn *fc, struct fuse_req *req)
1042{ 1051{
1043 __free_page(req->pages[0]); 1052 __free_page(req->pages[0]);
1044 fuse_file_put(req->ff); 1053 fuse_file_put(req->ff);
1045 fuse_put_request(fc, req);
1046} 1054}
1047 1055
1048static void fuse_writepage_finish(struct fuse_conn *fc, struct fuse_req *req) 1056static void fuse_writepage_finish(struct fuse_conn *fc, struct fuse_req *req)
@@ -1060,6 +1068,8 @@ static void fuse_writepage_finish(struct fuse_conn *fc, struct fuse_req *req)
1060 1068
1061/* Called under fc->lock, may release and reacquire it */ 1069/* Called under fc->lock, may release and reacquire it */
1062static void fuse_send_writepage(struct fuse_conn *fc, struct fuse_req *req) 1070static void fuse_send_writepage(struct fuse_conn *fc, struct fuse_req *req)
1071__releases(&fc->lock)
1072__acquires(&fc->lock)
1063{ 1073{
1064 struct fuse_inode *fi = get_fuse_inode(req->inode); 1074 struct fuse_inode *fi = get_fuse_inode(req->inode);
1065 loff_t size = i_size_read(req->inode); 1075 loff_t size = i_size_read(req->inode);
@@ -1079,13 +1089,14 @@ static void fuse_send_writepage(struct fuse_conn *fc, struct fuse_req *req)
1079 1089
1080 req->in.args[1].size = inarg->size; 1090 req->in.args[1].size = inarg->size;
1081 fi->writectr++; 1091 fi->writectr++;
1082 request_send_background_locked(fc, req); 1092 fuse_request_send_background_locked(fc, req);
1083 return; 1093 return;
1084 1094
1085 out_free: 1095 out_free:
1086 fuse_writepage_finish(fc, req); 1096 fuse_writepage_finish(fc, req);
1087 spin_unlock(&fc->lock); 1097 spin_unlock(&fc->lock);
1088 fuse_writepage_free(fc, req); 1098 fuse_writepage_free(fc, req);
1099 fuse_put_request(fc, req);
1089 spin_lock(&fc->lock); 1100 spin_lock(&fc->lock);
1090} 1101}
1091 1102
@@ -1096,6 +1107,8 @@ static void fuse_send_writepage(struct fuse_conn *fc, struct fuse_req *req)
1096 * Called with fc->lock 1107 * Called with fc->lock
1097 */ 1108 */
1098void fuse_flush_writepages(struct inode *inode) 1109void fuse_flush_writepages(struct inode *inode)
1110__releases(&fc->lock)
1111__acquires(&fc->lock)
1099{ 1112{
1100 struct fuse_conn *fc = get_fuse_conn(inode); 1113 struct fuse_conn *fc = get_fuse_conn(inode);
1101 struct fuse_inode *fi = get_fuse_inode(inode); 1114 struct fuse_inode *fi = get_fuse_inode(inode);
@@ -1325,7 +1338,7 @@ static int fuse_getlk(struct file *file, struct file_lock *fl)
1325 req->out.numargs = 1; 1338 req->out.numargs = 1;
1326 req->out.args[0].size = sizeof(outarg); 1339 req->out.args[0].size = sizeof(outarg);
1327 req->out.args[0].value = &outarg; 1340 req->out.args[0].value = &outarg;
1328 request_send(fc, req); 1341 fuse_request_send(fc, req);
1329 err = req->out.h.error; 1342 err = req->out.h.error;
1330 fuse_put_request(fc, req); 1343 fuse_put_request(fc, req);
1331 if (!err) 1344 if (!err)
@@ -1357,7 +1370,7 @@ static int fuse_setlk(struct file *file, struct file_lock *fl, int flock)
1357 return PTR_ERR(req); 1370 return PTR_ERR(req);
1358 1371
1359 fuse_lk_fill(req, file, fl, opcode, pid, flock); 1372 fuse_lk_fill(req, file, fl, opcode, pid, flock);
1360 request_send(fc, req); 1373 fuse_request_send(fc, req);
1361 err = req->out.h.error; 1374 err = req->out.h.error;
1362 /* locking is restartable */ 1375 /* locking is restartable */
1363 if (err == -EINTR) 1376 if (err == -EINTR)
@@ -1433,7 +1446,7 @@ static sector_t fuse_bmap(struct address_space *mapping, sector_t block)
1433 req->out.numargs = 1; 1446 req->out.numargs = 1;
1434 req->out.args[0].size = sizeof(outarg); 1447 req->out.args[0].size = sizeof(outarg);
1435 req->out.args[0].value = &outarg; 1448 req->out.args[0].value = &outarg;
1436 request_send(fc, req); 1449 fuse_request_send(fc, req);
1437 err = req->out.h.error; 1450 err = req->out.h.error;
1438 fuse_put_request(fc, req); 1451 fuse_put_request(fc, req);
1439 if (err == -ENOSYS) 1452 if (err == -ENOSYS)
@@ -1470,6 +1483,406 @@ static loff_t fuse_file_llseek(struct file *file, loff_t offset, int origin)
1470 return retval; 1483 return retval;
1471} 1484}
1472 1485
1486static int fuse_ioctl_copy_user(struct page **pages, struct iovec *iov,
1487 unsigned int nr_segs, size_t bytes, bool to_user)
1488{
1489 struct iov_iter ii;
1490 int page_idx = 0;
1491
1492 if (!bytes)
1493 return 0;
1494
1495 iov_iter_init(&ii, iov, nr_segs, bytes, 0);
1496
1497 while (iov_iter_count(&ii)) {
1498 struct page *page = pages[page_idx++];
1499 size_t todo = min_t(size_t, PAGE_SIZE, iov_iter_count(&ii));
1500 void *kaddr, *map;
1501
1502 kaddr = map = kmap(page);
1503
1504 while (todo) {
1505 char __user *uaddr = ii.iov->iov_base + ii.iov_offset;
1506 size_t iov_len = ii.iov->iov_len - ii.iov_offset;
1507 size_t copy = min(todo, iov_len);
1508 size_t left;
1509
1510 if (!to_user)
1511 left = copy_from_user(kaddr, uaddr, copy);
1512 else
1513 left = copy_to_user(uaddr, kaddr, copy);
1514
1515 if (unlikely(left))
1516 return -EFAULT;
1517
1518 iov_iter_advance(&ii, copy);
1519 todo -= copy;
1520 kaddr += copy;
1521 }
1522
1523 kunmap(map);
1524 }
1525
1526 return 0;
1527}
1528
1529/*
1530 * For ioctls, there is no generic way to determine how much memory
1531 * needs to be read and/or written. Furthermore, ioctls are allowed
1532 * to dereference the passed pointer, so the parameter requires deep
1533 * copying but FUSE has no idea whatsoever about what to copy in or
1534 * out.
1535 *
1536 * This is solved by allowing FUSE server to retry ioctl with
1537 * necessary in/out iovecs. Let's assume the ioctl implementation
1538 * needs to read in the following structure.
1539 *
1540 * struct a {
1541 * char *buf;
1542 * size_t buflen;
1543 * }
1544 *
1545 * On the first callout to FUSE server, inarg->in_size and
1546 * inarg->out_size will be NULL; then, the server completes the ioctl
1547 * with FUSE_IOCTL_RETRY set in out->flags, out->in_iovs set to 1 and
1548 * the actual iov array to
1549 *
1550 * { { .iov_base = inarg.arg, .iov_len = sizeof(struct a) } }
1551 *
1552 * which tells FUSE to copy in the requested area and retry the ioctl.
1553 * On the second round, the server has access to the structure and
1554 * from that it can tell what to look for next, so on the invocation,
1555 * it sets FUSE_IOCTL_RETRY, out->in_iovs to 2 and iov array to
1556 *
1557 * { { .iov_base = inarg.arg, .iov_len = sizeof(struct a) },
1558 * { .iov_base = a.buf, .iov_len = a.buflen } }
1559 *
1560 * FUSE will copy both struct a and the pointed buffer from the
1561 * process doing the ioctl and retry ioctl with both struct a and the
1562 * buffer.
1563 *
1564 * This time, FUSE server has everything it needs and completes ioctl
1565 * without FUSE_IOCTL_RETRY which finishes the ioctl call.
1566 *
1567 * Copying data out works the same way.
1568 *
1569 * Note that if FUSE_IOCTL_UNRESTRICTED is clear, the kernel
1570 * automatically initializes in and out iovs by decoding @cmd with
1571 * _IOC_* macros and the server is not allowed to request RETRY. This
1572 * limits ioctl data transfers to well-formed ioctls and is the forced
1573 * behavior for all FUSE servers.
1574 */
1575static long fuse_file_do_ioctl(struct file *file, unsigned int cmd,
1576 unsigned long arg, unsigned int flags)
1577{
1578 struct inode *inode = file->f_dentry->d_inode;
1579 struct fuse_file *ff = file->private_data;
1580 struct fuse_conn *fc = get_fuse_conn(inode);
1581 struct fuse_ioctl_in inarg = {
1582 .fh = ff->fh,
1583 .cmd = cmd,
1584 .arg = arg,
1585 .flags = flags
1586 };
1587 struct fuse_ioctl_out outarg;
1588 struct fuse_req *req = NULL;
1589 struct page **pages = NULL;
1590 struct page *iov_page = NULL;
1591 struct iovec *in_iov = NULL, *out_iov = NULL;
1592 unsigned int in_iovs = 0, out_iovs = 0, num_pages = 0, max_pages;
1593 size_t in_size, out_size, transferred;
1594 int err;
1595
1596 /* assume all the iovs returned by client always fits in a page */
1597 BUILD_BUG_ON(sizeof(struct iovec) * FUSE_IOCTL_MAX_IOV > PAGE_SIZE);
1598
1599 if (!fuse_allow_task(fc, current))
1600 return -EACCES;
1601
1602 err = -EIO;
1603 if (is_bad_inode(inode))
1604 goto out;
1605
1606 err = -ENOMEM;
1607 pages = kzalloc(sizeof(pages[0]) * FUSE_MAX_PAGES_PER_REQ, GFP_KERNEL);
1608 iov_page = alloc_page(GFP_KERNEL);
1609 if (!pages || !iov_page)
1610 goto out;
1611
1612 /*
1613 * If restricted, initialize IO parameters as encoded in @cmd.
1614 * RETRY from server is not allowed.
1615 */
1616 if (!(flags & FUSE_IOCTL_UNRESTRICTED)) {
1617 struct iovec *iov = page_address(iov_page);
1618
1619 iov->iov_base = (void __user *)arg;
1620 iov->iov_len = _IOC_SIZE(cmd);
1621
1622 if (_IOC_DIR(cmd) & _IOC_WRITE) {
1623 in_iov = iov;
1624 in_iovs = 1;
1625 }
1626
1627 if (_IOC_DIR(cmd) & _IOC_READ) {
1628 out_iov = iov;
1629 out_iovs = 1;
1630 }
1631 }
1632
1633 retry:
1634 inarg.in_size = in_size = iov_length(in_iov, in_iovs);
1635 inarg.out_size = out_size = iov_length(out_iov, out_iovs);
1636
1637 /*
1638 * Out data can be used either for actual out data or iovs,
1639 * make sure there always is at least one page.
1640 */
1641 out_size = max_t(size_t, out_size, PAGE_SIZE);
1642 max_pages = DIV_ROUND_UP(max(in_size, out_size), PAGE_SIZE);
1643
1644 /* make sure there are enough buffer pages and init request with them */
1645 err = -ENOMEM;
1646 if (max_pages > FUSE_MAX_PAGES_PER_REQ)
1647 goto out;
1648 while (num_pages < max_pages) {
1649 pages[num_pages] = alloc_page(GFP_KERNEL | __GFP_HIGHMEM);
1650 if (!pages[num_pages])
1651 goto out;
1652 num_pages++;
1653 }
1654
1655 req = fuse_get_req(fc);
1656 if (IS_ERR(req)) {
1657 err = PTR_ERR(req);
1658 req = NULL;
1659 goto out;
1660 }
1661 memcpy(req->pages, pages, sizeof(req->pages[0]) * num_pages);
1662 req->num_pages = num_pages;
1663
1664 /* okay, let's send it to the client */
1665 req->in.h.opcode = FUSE_IOCTL;
1666 req->in.h.nodeid = get_node_id(inode);
1667 req->in.numargs = 1;
1668 req->in.args[0].size = sizeof(inarg);
1669 req->in.args[0].value = &inarg;
1670 if (in_size) {
1671 req->in.numargs++;
1672 req->in.args[1].size = in_size;
1673 req->in.argpages = 1;
1674
1675 err = fuse_ioctl_copy_user(pages, in_iov, in_iovs, in_size,
1676 false);
1677 if (err)
1678 goto out;
1679 }
1680
1681 req->out.numargs = 2;
1682 req->out.args[0].size = sizeof(outarg);
1683 req->out.args[0].value = &outarg;
1684 req->out.args[1].size = out_size;
1685 req->out.argpages = 1;
1686 req->out.argvar = 1;
1687
1688 fuse_request_send(fc, req);
1689 err = req->out.h.error;
1690 transferred = req->out.args[1].size;
1691 fuse_put_request(fc, req);
1692 req = NULL;
1693 if (err)
1694 goto out;
1695
1696 /* did it ask for retry? */
1697 if (outarg.flags & FUSE_IOCTL_RETRY) {
1698 char *vaddr;
1699
1700 /* no retry if in restricted mode */
1701 err = -EIO;
1702 if (!(flags & FUSE_IOCTL_UNRESTRICTED))
1703 goto out;
1704
1705 in_iovs = outarg.in_iovs;
1706 out_iovs = outarg.out_iovs;
1707
1708 /*
1709 * Make sure things are in boundary, separate checks
1710 * are to protect against overflow.
1711 */
1712 err = -ENOMEM;
1713 if (in_iovs > FUSE_IOCTL_MAX_IOV ||
1714 out_iovs > FUSE_IOCTL_MAX_IOV ||
1715 in_iovs + out_iovs > FUSE_IOCTL_MAX_IOV)
1716 goto out;
1717
1718 err = -EIO;
1719 if ((in_iovs + out_iovs) * sizeof(struct iovec) != transferred)
1720 goto out;
1721
1722 /* okay, copy in iovs and retry */
1723 vaddr = kmap_atomic(pages[0], KM_USER0);
1724 memcpy(page_address(iov_page), vaddr, transferred);
1725 kunmap_atomic(vaddr, KM_USER0);
1726
1727 in_iov = page_address(iov_page);
1728 out_iov = in_iov + in_iovs;
1729
1730 goto retry;
1731 }
1732
1733 err = -EIO;
1734 if (transferred > inarg.out_size)
1735 goto out;
1736
1737 err = fuse_ioctl_copy_user(pages, out_iov, out_iovs, transferred, true);
1738 out:
1739 if (req)
1740 fuse_put_request(fc, req);
1741 if (iov_page)
1742 __free_page(iov_page);
1743 while (num_pages)
1744 __free_page(pages[--num_pages]);
1745 kfree(pages);
1746
1747 return err ? err : outarg.result;
1748}
1749
1750static long fuse_file_ioctl(struct file *file, unsigned int cmd,
1751 unsigned long arg)
1752{
1753 return fuse_file_do_ioctl(file, cmd, arg, 0);
1754}
1755
1756static long fuse_file_compat_ioctl(struct file *file, unsigned int cmd,
1757 unsigned long arg)
1758{
1759 return fuse_file_do_ioctl(file, cmd, arg, FUSE_IOCTL_COMPAT);
1760}
1761
1762/*
1763 * All files which have been polled are linked to RB tree
1764 * fuse_conn->polled_files which is indexed by kh. Walk the tree and
1765 * find the matching one.
1766 */
1767static struct rb_node **fuse_find_polled_node(struct fuse_conn *fc, u64 kh,
1768 struct rb_node **parent_out)
1769{
1770 struct rb_node **link = &fc->polled_files.rb_node;
1771 struct rb_node *last = NULL;
1772
1773 while (*link) {
1774 struct fuse_file *ff;
1775
1776 last = *link;
1777 ff = rb_entry(last, struct fuse_file, polled_node);
1778
1779 if (kh < ff->kh)
1780 link = &last->rb_left;
1781 else if (kh > ff->kh)
1782 link = &last->rb_right;
1783 else
1784 return link;
1785 }
1786
1787 if (parent_out)
1788 *parent_out = last;
1789 return link;
1790}
1791
1792/*
1793 * The file is about to be polled. Make sure it's on the polled_files
1794 * RB tree. Note that files once added to the polled_files tree are
1795 * not removed before the file is released. This is because a file
1796 * polled once is likely to be polled again.
1797 */
1798static void fuse_register_polled_file(struct fuse_conn *fc,
1799 struct fuse_file *ff)
1800{
1801 spin_lock(&fc->lock);
1802 if (RB_EMPTY_NODE(&ff->polled_node)) {
1803 struct rb_node **link, *parent;
1804
1805 link = fuse_find_polled_node(fc, ff->kh, &parent);
1806 BUG_ON(*link);
1807 rb_link_node(&ff->polled_node, parent, link);
1808 rb_insert_color(&ff->polled_node, &fc->polled_files);
1809 }
1810 spin_unlock(&fc->lock);
1811}
1812
1813static unsigned fuse_file_poll(struct file *file, poll_table *wait)
1814{
1815 struct inode *inode = file->f_dentry->d_inode;
1816 struct fuse_file *ff = file->private_data;
1817 struct fuse_conn *fc = get_fuse_conn(inode);
1818 struct fuse_poll_in inarg = { .fh = ff->fh, .kh = ff->kh };
1819 struct fuse_poll_out outarg;
1820 struct fuse_req *req;
1821 int err;
1822
1823 if (fc->no_poll)
1824 return DEFAULT_POLLMASK;
1825
1826 poll_wait(file, &ff->poll_wait, wait);
1827
1828 /*
1829 * Ask for notification iff there's someone waiting for it.
1830 * The client may ignore the flag and always notify.
1831 */
1832 if (waitqueue_active(&ff->poll_wait)) {
1833 inarg.flags |= FUSE_POLL_SCHEDULE_NOTIFY;
1834 fuse_register_polled_file(fc, ff);
1835 }
1836
1837 req = fuse_get_req(fc);
1838 if (IS_ERR(req))
1839 return PTR_ERR(req);
1840
1841 req->in.h.opcode = FUSE_POLL;
1842 req->in.h.nodeid = get_node_id(inode);
1843 req->in.numargs = 1;
1844 req->in.args[0].size = sizeof(inarg);
1845 req->in.args[0].value = &inarg;
1846 req->out.numargs = 1;
1847 req->out.args[0].size = sizeof(outarg);
1848 req->out.args[0].value = &outarg;
1849 fuse_request_send(fc, req);
1850 err = req->out.h.error;
1851 fuse_put_request(fc, req);
1852
1853 if (!err)
1854 return outarg.revents;
1855 if (err == -ENOSYS) {
1856 fc->no_poll = 1;
1857 return DEFAULT_POLLMASK;
1858 }
1859 return POLLERR;
1860}
1861
1862/*
1863 * This is called from fuse_handle_notify() on FUSE_NOTIFY_POLL and
1864 * wakes up the poll waiters.
1865 */
1866int fuse_notify_poll_wakeup(struct fuse_conn *fc,
1867 struct fuse_notify_poll_wakeup_out *outarg)
1868{
1869 u64 kh = outarg->kh;
1870 struct rb_node **link;
1871
1872 spin_lock(&fc->lock);
1873
1874 link = fuse_find_polled_node(fc, kh, NULL);
1875 if (*link) {
1876 struct fuse_file *ff;
1877
1878 ff = rb_entry(*link, struct fuse_file, polled_node);
1879 wake_up_interruptible_sync(&ff->poll_wait);
1880 }
1881
1882 spin_unlock(&fc->lock);
1883 return 0;
1884}
1885
1473static const struct file_operations fuse_file_operations = { 1886static const struct file_operations fuse_file_operations = {
1474 .llseek = fuse_file_llseek, 1887 .llseek = fuse_file_llseek,
1475 .read = do_sync_read, 1888 .read = do_sync_read,
@@ -1484,6 +1897,9 @@ static const struct file_operations fuse_file_operations = {
1484 .lock = fuse_file_lock, 1897 .lock = fuse_file_lock,
1485 .flock = fuse_file_flock, 1898 .flock = fuse_file_flock,
1486 .splice_read = generic_file_splice_read, 1899 .splice_read = generic_file_splice_read,
1900 .unlocked_ioctl = fuse_file_ioctl,
1901 .compat_ioctl = fuse_file_compat_ioctl,
1902 .poll = fuse_file_poll,
1487}; 1903};
1488 1904
1489static const struct file_operations fuse_direct_io_file_operations = { 1905static const struct file_operations fuse_direct_io_file_operations = {
@@ -1496,6 +1912,9 @@ static const struct file_operations fuse_direct_io_file_operations = {
1496 .fsync = fuse_fsync, 1912 .fsync = fuse_fsync,
1497 .lock = fuse_file_lock, 1913 .lock = fuse_file_lock,
1498 .flock = fuse_file_flock, 1914 .flock = fuse_file_flock,
1915 .unlocked_ioctl = fuse_file_ioctl,
1916 .compat_ioctl = fuse_file_compat_ioctl,
1917 .poll = fuse_file_poll,
1499 /* no mmap and splice_read */ 1918 /* no mmap and splice_read */
1500}; 1919};
1501 1920
diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h
index 35accfdd747f..5e64b815a5a1 100644
--- a/fs/fuse/fuse_i.h
+++ b/fs/fuse/fuse_i.h
@@ -1,6 +1,6 @@
1/* 1/*
2 FUSE: Filesystem in Userspace 2 FUSE: Filesystem in Userspace
3 Copyright (C) 2001-2006 Miklos Szeredi <miklos@szeredi.hu> 3 Copyright (C) 2001-2008 Miklos Szeredi <miklos@szeredi.hu>
4 4
5 This program can be distributed under the terms of the GNU GPL. 5 This program can be distributed under the terms of the GNU GPL.
6 See the file COPYING. 6 See the file COPYING.
@@ -19,6 +19,8 @@
19#include <linux/backing-dev.h> 19#include <linux/backing-dev.h>
20#include <linux/mutex.h> 20#include <linux/mutex.h>
21#include <linux/rwsem.h> 21#include <linux/rwsem.h>
22#include <linux/rbtree.h>
23#include <linux/poll.h>
22 24
23/** Max number of pages that can be used in a single read request */ 25/** Max number of pages that can be used in a single read request */
24#define FUSE_MAX_PAGES_PER_REQ 32 26#define FUSE_MAX_PAGES_PER_REQ 32
@@ -100,6 +102,9 @@ struct fuse_file {
100 /** Request reserved for flush and release */ 102 /** Request reserved for flush and release */
101 struct fuse_req *reserved_req; 103 struct fuse_req *reserved_req;
102 104
105 /** Kernel file handle guaranteed to be unique */
106 u64 kh;
107
103 /** File handle used by userspace */ 108 /** File handle used by userspace */
104 u64 fh; 109 u64 fh;
105 110
@@ -108,6 +113,12 @@ struct fuse_file {
108 113
109 /** Entry on inode's write_files list */ 114 /** Entry on inode's write_files list */
110 struct list_head write_entry; 115 struct list_head write_entry;
116
117 /** RB node to be linked on fuse_conn->polled_files */
118 struct rb_node polled_node;
119
120 /** Wait queue head for poll */
121 wait_queue_head_t poll_wait;
111}; 122};
112 123
113/** One input argument of a request */ 124/** One input argument of a request */
@@ -322,6 +333,12 @@ struct fuse_conn {
322 /** The list of requests under I/O */ 333 /** The list of requests under I/O */
323 struct list_head io; 334 struct list_head io;
324 335
336 /** The next unique kernel file handle */
337 u64 khctr;
338
339 /** rbtree of fuse_files waiting for poll events indexed by ph */
340 struct rb_root polled_files;
341
325 /** Number of requests currently in the background */ 342 /** Number of requests currently in the background */
326 unsigned num_background; 343 unsigned num_background;
327 344
@@ -355,19 +372,19 @@ struct fuse_conn {
355 /** Connection failed (version mismatch). Cannot race with 372 /** Connection failed (version mismatch). Cannot race with
356 setting other bitfields since it is only set once in INIT 373 setting other bitfields since it is only set once in INIT
357 reply, before any other request, and never cleared */ 374 reply, before any other request, and never cleared */
358 unsigned conn_error : 1; 375 unsigned conn_error:1;
359 376
360 /** Connection successful. Only set in INIT */ 377 /** Connection successful. Only set in INIT */
361 unsigned conn_init : 1; 378 unsigned conn_init:1;
362 379
363 /** Do readpages asynchronously? Only set in INIT */ 380 /** Do readpages asynchronously? Only set in INIT */
364 unsigned async_read : 1; 381 unsigned async_read:1;
365 382
366 /** Do not send separate SETATTR request before open(O_TRUNC) */ 383 /** Do not send separate SETATTR request before open(O_TRUNC) */
367 unsigned atomic_o_trunc : 1; 384 unsigned atomic_o_trunc:1;
368 385
369 /** Filesystem supports NFS exporting. Only set in INIT */ 386 /** Filesystem supports NFS exporting. Only set in INIT */
370 unsigned export_support : 1; 387 unsigned export_support:1;
371 388
372 /* 389 /*
373 * The following bitfields are only for optimization purposes 390 * The following bitfields are only for optimization purposes
@@ -375,43 +392,46 @@ struct fuse_conn {
375 */ 392 */
376 393
377 /** Is fsync not implemented by fs? */ 394 /** Is fsync not implemented by fs? */
378 unsigned no_fsync : 1; 395 unsigned no_fsync:1;
379 396
380 /** Is fsyncdir not implemented by fs? */ 397 /** Is fsyncdir not implemented by fs? */
381 unsigned no_fsyncdir : 1; 398 unsigned no_fsyncdir:1;
382 399
383 /** Is flush not implemented by fs? */ 400 /** Is flush not implemented by fs? */
384 unsigned no_flush : 1; 401 unsigned no_flush:1;
385 402
386 /** Is setxattr not implemented by fs? */ 403 /** Is setxattr not implemented by fs? */
387 unsigned no_setxattr : 1; 404 unsigned no_setxattr:1;
388 405
389 /** Is getxattr not implemented by fs? */ 406 /** Is getxattr not implemented by fs? */
390 unsigned no_getxattr : 1; 407 unsigned no_getxattr:1;
391 408
392 /** Is listxattr not implemented by fs? */ 409 /** Is listxattr not implemented by fs? */
393 unsigned no_listxattr : 1; 410 unsigned no_listxattr:1;
394 411
395 /** Is removexattr not implemented by fs? */ 412 /** Is removexattr not implemented by fs? */
396 unsigned no_removexattr : 1; 413 unsigned no_removexattr:1;
397 414
398 /** Are file locking primitives not implemented by fs? */ 415 /** Are file locking primitives not implemented by fs? */
399 unsigned no_lock : 1; 416 unsigned no_lock:1;
400 417
401 /** Is access not implemented by fs? */ 418 /** Is access not implemented by fs? */
402 unsigned no_access : 1; 419 unsigned no_access:1;
403 420
404 /** Is create not implemented by fs? */ 421 /** Is create not implemented by fs? */
405 unsigned no_create : 1; 422 unsigned no_create:1;
406 423
407 /** Is interrupt not implemented by fs? */ 424 /** Is interrupt not implemented by fs? */
408 unsigned no_interrupt : 1; 425 unsigned no_interrupt:1;
409 426
410 /** Is bmap not implemented by fs? */ 427 /** Is bmap not implemented by fs? */
411 unsigned no_bmap : 1; 428 unsigned no_bmap:1;
429
430 /** Is poll not implemented by fs? */
431 unsigned no_poll:1;
412 432
413 /** Do multi-page cached writes */ 433 /** Do multi-page cached writes */
414 unsigned big_writes : 1; 434 unsigned big_writes:1;
415 435
416 /** The number of requests waiting for completion */ 436 /** The number of requests waiting for completion */
417 atomic_t num_waiting; 437 atomic_t num_waiting;
@@ -445,6 +465,9 @@ struct fuse_conn {
445 465
446 /** Version counter for attribute changes */ 466 /** Version counter for attribute changes */
447 u64 attr_version; 467 u64 attr_version;
468
469 /** Called on final put */
470 void (*release)(struct fuse_conn *);
448}; 471};
449 472
450static inline struct fuse_conn *get_fuse_conn_super(struct super_block *sb) 473static inline struct fuse_conn *get_fuse_conn_super(struct super_block *sb)
@@ -499,7 +522,7 @@ void fuse_read_fill(struct fuse_req *req, struct file *file,
499 */ 522 */
500int fuse_open_common(struct inode *inode, struct file *file, int isdir); 523int fuse_open_common(struct inode *inode, struct file *file, int isdir);
501 524
502struct fuse_file *fuse_file_alloc(void); 525struct fuse_file *fuse_file_alloc(struct fuse_conn *fc);
503void fuse_file_free(struct fuse_file *ff); 526void fuse_file_free(struct fuse_file *ff);
504void fuse_finish_open(struct inode *inode, struct file *file, 527void fuse_finish_open(struct inode *inode, struct file *file,
505 struct fuse_file *ff, struct fuse_open_out *outarg); 528 struct fuse_file *ff, struct fuse_open_out *outarg);
@@ -519,6 +542,12 @@ int fuse_fsync_common(struct file *file, struct dentry *de, int datasync,
519 int isdir); 542 int isdir);
520 543
521/** 544/**
545 * Notify poll wakeup
546 */
547int fuse_notify_poll_wakeup(struct fuse_conn *fc,
548 struct fuse_notify_poll_wakeup_out *outarg);
549
550/**
522 * Initialize file operations on a regular file 551 * Initialize file operations on a regular file
523 */ 552 */
524void fuse_init_file_inode(struct inode *inode); 553void fuse_init_file_inode(struct inode *inode);
@@ -593,19 +622,20 @@ void fuse_put_request(struct fuse_conn *fc, struct fuse_req *req);
593/** 622/**
594 * Send a request (synchronous) 623 * Send a request (synchronous)
595 */ 624 */
596void request_send(struct fuse_conn *fc, struct fuse_req *req); 625void fuse_request_send(struct fuse_conn *fc, struct fuse_req *req);
597 626
598/** 627/**
599 * Send a request with no reply 628 * Send a request with no reply
600 */ 629 */
601void request_send_noreply(struct fuse_conn *fc, struct fuse_req *req); 630void fuse_request_send_noreply(struct fuse_conn *fc, struct fuse_req *req);
602 631
603/** 632/**
604 * Send a request in the background 633 * Send a request in the background
605 */ 634 */
606void request_send_background(struct fuse_conn *fc, struct fuse_req *req); 635void fuse_request_send_background(struct fuse_conn *fc, struct fuse_req *req);
607 636
608void request_send_background_locked(struct fuse_conn *fc, struct fuse_req *req); 637void fuse_request_send_background_locked(struct fuse_conn *fc,
638 struct fuse_req *req);
609 639
610/* Abort all requests */ 640/* Abort all requests */
611void fuse_abort_conn(struct fuse_conn *fc); 641void fuse_abort_conn(struct fuse_conn *fc);
@@ -623,6 +653,11 @@ void fuse_invalidate_entry_cache(struct dentry *entry);
623struct fuse_conn *fuse_conn_get(struct fuse_conn *fc); 653struct fuse_conn *fuse_conn_get(struct fuse_conn *fc);
624 654
625/** 655/**
656 * Initialize fuse_conn
657 */
658int fuse_conn_init(struct fuse_conn *fc, struct super_block *sb);
659
660/**
626 * Release reference to fuse_conn 661 * Release reference to fuse_conn
627 */ 662 */
628void fuse_conn_put(struct fuse_conn *fc); 663void fuse_conn_put(struct fuse_conn *fc);
diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
index 2e99f34b4435..47c96fdca1ac 100644
--- a/fs/fuse/inode.c
+++ b/fs/fuse/inode.c
@@ -1,6 +1,6 @@
1/* 1/*
2 FUSE: Filesystem in Userspace 2 FUSE: Filesystem in Userspace
3 Copyright (C) 2001-2006 Miklos Szeredi <miklos@szeredi.hu> 3 Copyright (C) 2001-2008 Miklos Szeredi <miklos@szeredi.hu>
4 4
5 This program can be distributed under the terms of the GNU GPL. 5 This program can be distributed under the terms of the GNU GPL.
6 See the file COPYING. 6 See the file COPYING.
@@ -37,10 +37,10 @@ struct fuse_mount_data {
37 unsigned rootmode; 37 unsigned rootmode;
38 unsigned user_id; 38 unsigned user_id;
39 unsigned group_id; 39 unsigned group_id;
40 unsigned fd_present : 1; 40 unsigned fd_present:1;
41 unsigned rootmode_present : 1; 41 unsigned rootmode_present:1;
42 unsigned user_id_present : 1; 42 unsigned user_id_present:1;
43 unsigned group_id_present : 1; 43 unsigned group_id_present:1;
44 unsigned flags; 44 unsigned flags;
45 unsigned max_read; 45 unsigned max_read;
46 unsigned blksize; 46 unsigned blksize;
@@ -94,7 +94,7 @@ void fuse_send_forget(struct fuse_conn *fc, struct fuse_req *req,
94 req->in.numargs = 1; 94 req->in.numargs = 1;
95 req->in.args[0].size = sizeof(struct fuse_forget_in); 95 req->in.args[0].size = sizeof(struct fuse_forget_in);
96 req->in.args[0].value = inarg; 96 req->in.args[0].value = inarg;
97 request_send_noreply(fc, req); 97 fuse_request_send_noreply(fc, req);
98} 98}
99 99
100static void fuse_clear_inode(struct inode *inode) 100static void fuse_clear_inode(struct inode *inode)
@@ -250,7 +250,7 @@ struct inode *fuse_iget(struct super_block *sb, u64 nodeid,
250 250
251 fi = get_fuse_inode(inode); 251 fi = get_fuse_inode(inode);
252 spin_lock(&fc->lock); 252 spin_lock(&fc->lock);
253 fi->nlookup ++; 253 fi->nlookup++;
254 spin_unlock(&fc->lock); 254 spin_unlock(&fc->lock);
255 fuse_change_attributes(inode, attr, attr_valid, attr_version); 255 fuse_change_attributes(inode, attr, attr_valid, attr_version);
256 256
@@ -269,7 +269,7 @@ static void fuse_send_destroy(struct fuse_conn *fc)
269 fc->destroy_req = NULL; 269 fc->destroy_req = NULL;
270 req->in.h.opcode = FUSE_DESTROY; 270 req->in.h.opcode = FUSE_DESTROY;
271 req->force = 1; 271 req->force = 1;
272 request_send(fc, req); 272 fuse_request_send(fc, req);
273 fuse_put_request(fc, req); 273 fuse_put_request(fc, req);
274 } 274 }
275} 275}
@@ -334,7 +334,7 @@ static int fuse_statfs(struct dentry *dentry, struct kstatfs *buf)
334 req->out.args[0].size = 334 req->out.args[0].size =
335 fc->minor < 4 ? FUSE_COMPAT_STATFS_SIZE : sizeof(outarg); 335 fc->minor < 4 ? FUSE_COMPAT_STATFS_SIZE : sizeof(outarg);
336 req->out.args[0].value = &outarg; 336 req->out.args[0].value = &outarg;
337 request_send(fc, req); 337 fuse_request_send(fc, req);
338 err = req->out.h.error; 338 err = req->out.h.error;
339 if (!err) 339 if (!err)
340 convert_fuse_statfs(buf, &outarg.st); 340 convert_fuse_statfs(buf, &outarg.st);
@@ -462,68 +462,69 @@ static int fuse_show_options(struct seq_file *m, struct vfsmount *mnt)
462 return 0; 462 return 0;
463} 463}
464 464
465static struct fuse_conn *new_conn(struct super_block *sb) 465int fuse_conn_init(struct fuse_conn *fc, struct super_block *sb)
466{ 466{
467 struct fuse_conn *fc;
468 int err; 467 int err;
469 468
470 fc = kzalloc(sizeof(*fc), GFP_KERNEL); 469 memset(fc, 0, sizeof(*fc));
471 if (fc) { 470 spin_lock_init(&fc->lock);
472 spin_lock_init(&fc->lock); 471 mutex_init(&fc->inst_mutex);
473 mutex_init(&fc->inst_mutex); 472 atomic_set(&fc->count, 1);
474 atomic_set(&fc->count, 1); 473 init_waitqueue_head(&fc->waitq);
475 init_waitqueue_head(&fc->waitq); 474 init_waitqueue_head(&fc->blocked_waitq);
476 init_waitqueue_head(&fc->blocked_waitq); 475 init_waitqueue_head(&fc->reserved_req_waitq);
477 init_waitqueue_head(&fc->reserved_req_waitq); 476 INIT_LIST_HEAD(&fc->pending);
478 INIT_LIST_HEAD(&fc->pending); 477 INIT_LIST_HEAD(&fc->processing);
479 INIT_LIST_HEAD(&fc->processing); 478 INIT_LIST_HEAD(&fc->io);
480 INIT_LIST_HEAD(&fc->io); 479 INIT_LIST_HEAD(&fc->interrupts);
481 INIT_LIST_HEAD(&fc->interrupts); 480 INIT_LIST_HEAD(&fc->bg_queue);
482 INIT_LIST_HEAD(&fc->bg_queue); 481 INIT_LIST_HEAD(&fc->entry);
483 atomic_set(&fc->num_waiting, 0); 482 atomic_set(&fc->num_waiting, 0);
484 fc->bdi.ra_pages = (VM_MAX_READAHEAD * 1024) / PAGE_CACHE_SIZE; 483 fc->bdi.ra_pages = (VM_MAX_READAHEAD * 1024) / PAGE_CACHE_SIZE;
485 fc->bdi.unplug_io_fn = default_unplug_io_fn; 484 fc->bdi.unplug_io_fn = default_unplug_io_fn;
486 /* fuse does it's own writeback accounting */ 485 /* fuse does it's own writeback accounting */
487 fc->bdi.capabilities = BDI_CAP_NO_ACCT_WB; 486 fc->bdi.capabilities = BDI_CAP_NO_ACCT_WB;
488 fc->dev = sb->s_dev; 487 fc->khctr = 0;
489 err = bdi_init(&fc->bdi); 488 fc->polled_files = RB_ROOT;
490 if (err) 489 fc->dev = sb->s_dev;
491 goto error_kfree; 490 err = bdi_init(&fc->bdi);
492 if (sb->s_bdev) { 491 if (err)
493 err = bdi_register(&fc->bdi, NULL, "%u:%u-fuseblk", 492 goto error_mutex_destroy;
494 MAJOR(fc->dev), MINOR(fc->dev)); 493 if (sb->s_bdev) {
495 } else { 494 err = bdi_register(&fc->bdi, NULL, "%u:%u-fuseblk",
496 err = bdi_register_dev(&fc->bdi, fc->dev); 495 MAJOR(fc->dev), MINOR(fc->dev));
497 } 496 } else {
498 if (err) 497 err = bdi_register_dev(&fc->bdi, fc->dev);
499 goto error_bdi_destroy;
500 /*
501 * For a single fuse filesystem use max 1% of dirty +
502 * writeback threshold.
503 *
504 * This gives about 1M of write buffer for memory maps on a
505 * machine with 1G and 10% dirty_ratio, which should be more
506 * than enough.
507 *
508 * Privileged users can raise it by writing to
509 *
510 * /sys/class/bdi/<bdi>/max_ratio
511 */
512 bdi_set_max_ratio(&fc->bdi, 1);
513 fc->reqctr = 0;
514 fc->blocked = 1;
515 fc->attr_version = 1;
516 get_random_bytes(&fc->scramble_key, sizeof(fc->scramble_key));
517 } 498 }
518 return fc; 499 if (err)
500 goto error_bdi_destroy;
501 /*
502 * For a single fuse filesystem use max 1% of dirty +
503 * writeback threshold.
504 *
505 * This gives about 1M of write buffer for memory maps on a
506 * machine with 1G and 10% dirty_ratio, which should be more
507 * than enough.
508 *
509 * Privileged users can raise it by writing to
510 *
511 * /sys/class/bdi/<bdi>/max_ratio
512 */
513 bdi_set_max_ratio(&fc->bdi, 1);
514 fc->reqctr = 0;
515 fc->blocked = 1;
516 fc->attr_version = 1;
517 get_random_bytes(&fc->scramble_key, sizeof(fc->scramble_key));
519 518
520error_bdi_destroy: 519 return 0;
520
521 error_bdi_destroy:
521 bdi_destroy(&fc->bdi); 522 bdi_destroy(&fc->bdi);
522error_kfree: 523 error_mutex_destroy:
523 mutex_destroy(&fc->inst_mutex); 524 mutex_destroy(&fc->inst_mutex);
524 kfree(fc); 525 return err;
525 return NULL;
526} 526}
527EXPORT_SYMBOL_GPL(fuse_conn_init);
527 528
528void fuse_conn_put(struct fuse_conn *fc) 529void fuse_conn_put(struct fuse_conn *fc)
529{ 530{
@@ -532,7 +533,7 @@ void fuse_conn_put(struct fuse_conn *fc)
532 fuse_request_free(fc->destroy_req); 533 fuse_request_free(fc->destroy_req);
533 mutex_destroy(&fc->inst_mutex); 534 mutex_destroy(&fc->inst_mutex);
534 bdi_destroy(&fc->bdi); 535 bdi_destroy(&fc->bdi);
535 kfree(fc); 536 fc->release(fc);
536 } 537 }
537} 538}
538 539
@@ -542,7 +543,7 @@ struct fuse_conn *fuse_conn_get(struct fuse_conn *fc)
542 return fc; 543 return fc;
543} 544}
544 545
545static struct inode *get_root_inode(struct super_block *sb, unsigned mode) 546static struct inode *fuse_get_root_inode(struct super_block *sb, unsigned mode)
546{ 547{
547 struct fuse_attr attr; 548 struct fuse_attr attr;
548 memset(&attr, 0, sizeof(attr)); 549 memset(&attr, 0, sizeof(attr));
@@ -553,8 +554,7 @@ static struct inode *get_root_inode(struct super_block *sb, unsigned mode)
553 return fuse_iget(sb, 1, 0, &attr, 0, 0); 554 return fuse_iget(sb, 1, 0, &attr, 0, 0);
554} 555}
555 556
556struct fuse_inode_handle 557struct fuse_inode_handle {
557{
558 u64 nodeid; 558 u64 nodeid;
559 u32 generation; 559 u32 generation;
560}; 560};
@@ -761,7 +761,6 @@ static void process_init_reply(struct fuse_conn *fc, struct fuse_req *req)
761 fc->max_write = max_t(unsigned, 4096, fc->max_write); 761 fc->max_write = max_t(unsigned, 4096, fc->max_write);
762 fc->conn_init = 1; 762 fc->conn_init = 1;
763 } 763 }
764 fuse_put_request(fc, req);
765 fc->blocked = 0; 764 fc->blocked = 0;
766 wake_up_all(&fc->blocked_waitq); 765 wake_up_all(&fc->blocked_waitq);
767} 766}
@@ -787,7 +786,12 @@ static void fuse_send_init(struct fuse_conn *fc, struct fuse_req *req)
787 req->out.args[0].size = sizeof(struct fuse_init_out); 786 req->out.args[0].size = sizeof(struct fuse_init_out);
788 req->out.args[0].value = &req->misc.init_out; 787 req->out.args[0].value = &req->misc.init_out;
789 req->end = process_init_reply; 788 req->end = process_init_reply;
790 request_send_background(fc, req); 789 fuse_request_send_background(fc, req);
790}
791
792static void fuse_free_conn(struct fuse_conn *fc)
793{
794 kfree(fc);
791} 795}
792 796
793static int fuse_fill_super(struct super_block *sb, void *data, int silent) 797static int fuse_fill_super(struct super_block *sb, void *data, int silent)
@@ -828,10 +832,17 @@ static int fuse_fill_super(struct super_block *sb, void *data, int silent)
828 if (file->f_op != &fuse_dev_operations) 832 if (file->f_op != &fuse_dev_operations)
829 return -EINVAL; 833 return -EINVAL;
830 834
831 fc = new_conn(sb); 835 fc = kmalloc(sizeof(*fc), GFP_KERNEL);
832 if (!fc) 836 if (!fc)
833 return -ENOMEM; 837 return -ENOMEM;
834 838
839 err = fuse_conn_init(fc, sb);
840 if (err) {
841 kfree(fc);
842 return err;
843 }
844
845 fc->release = fuse_free_conn;
835 fc->flags = d.flags; 846 fc->flags = d.flags;
836 fc->user_id = d.user_id; 847 fc->user_id = d.user_id;
837 fc->group_id = d.group_id; 848 fc->group_id = d.group_id;
@@ -841,7 +852,7 @@ static int fuse_fill_super(struct super_block *sb, void *data, int silent)
841 sb->s_fs_info = fc; 852 sb->s_fs_info = fc;
842 853
843 err = -ENOMEM; 854 err = -ENOMEM;
844 root = get_root_inode(sb, d.rootmode); 855 root = fuse_get_root_inode(sb, d.rootmode);
845 if (!root) 856 if (!root)
846 goto err; 857 goto err;
847 858
@@ -952,7 +963,7 @@ static inline void unregister_fuseblk(void)
952 963
953static void fuse_inode_init_once(void *foo) 964static void fuse_inode_init_once(void *foo)
954{ 965{
955 struct inode * inode = foo; 966 struct inode *inode = foo;
956 967
957 inode_init_once(inode); 968 inode_init_once(inode);
958} 969}
@@ -1031,7 +1042,7 @@ static int __init fuse_init(void)
1031{ 1042{
1032 int res; 1043 int res;
1033 1044
1034 printk("fuse init (API version %i.%i)\n", 1045 printk(KERN_INFO "fuse init (API version %i.%i)\n",
1035 FUSE_KERNEL_VERSION, FUSE_KERNEL_MINOR_VERSION); 1046 FUSE_KERNEL_VERSION, FUSE_KERNEL_MINOR_VERSION);
1036 1047
1037 INIT_LIST_HEAD(&fuse_conn_list); 1048 INIT_LIST_HEAD(&fuse_conn_list);
diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
index 0ab0c6f5f438..6903d37af037 100644
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -252,6 +252,7 @@ static ssize_t hugetlbfs_read(struct file *filp, char __user *buf,
252 for (;;) { 252 for (;;) {
253 struct page *page; 253 struct page *page;
254 unsigned long nr, ret; 254 unsigned long nr, ret;
255 int ra;
255 256
256 /* nr is the maximum number of bytes to copy from this page */ 257 /* nr is the maximum number of bytes to copy from this page */
257 nr = huge_page_size(h); 258 nr = huge_page_size(h);
@@ -274,16 +275,19 @@ static ssize_t hugetlbfs_read(struct file *filp, char __user *buf,
274 */ 275 */
275 ret = len < nr ? len : nr; 276 ret = len < nr ? len : nr;
276 if (clear_user(buf, ret)) 277 if (clear_user(buf, ret))
277 ret = -EFAULT; 278 ra = -EFAULT;
279 else
280 ra = 0;
278 } else { 281 } else {
279 /* 282 /*
280 * We have the page, copy it to user space buffer. 283 * We have the page, copy it to user space buffer.
281 */ 284 */
282 ret = hugetlbfs_read_actor(page, offset, buf, len, nr); 285 ra = hugetlbfs_read_actor(page, offset, buf, len, nr);
286 ret = ra;
283 } 287 }
284 if (ret < 0) { 288 if (ra < 0) {
285 if (retval == 0) 289 if (retval == 0)
286 retval = ret; 290 retval = ra;
287 if (page) 291 if (page)
288 page_cache_release(page); 292 page_cache_release(page);
289 goto out; 293 goto out;
diff --git a/fs/inode.c b/fs/inode.c
index bd48e5e6d3e8..7a6e8c2ff7b1 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -110,8 +110,8 @@ static void wake_up_inode(struct inode *inode)
110 110
111/** 111/**
112 * inode_init_always - perform inode structure intialisation 112 * inode_init_always - perform inode structure intialisation
113 * @sb - superblock inode belongs to. 113 * @sb: superblock inode belongs to
114 * @inode - inode to initialise 114 * @inode: inode to initialise
115 * 115 *
116 * These are initializations that need to be done on every inode 116 * These are initializations that need to be done on every inode
117 * allocation as the fields are not initialised by slab allocation. 117 * allocation as the fields are not initialised by slab allocation.
@@ -166,7 +166,7 @@ struct inode *inode_init_always(struct super_block *sb, struct inode *inode)
166 mapping->a_ops = &empty_aops; 166 mapping->a_ops = &empty_aops;
167 mapping->host = inode; 167 mapping->host = inode;
168 mapping->flags = 0; 168 mapping->flags = 0;
169 mapping_set_gfp_mask(mapping, GFP_HIGHUSER_PAGECACHE); 169 mapping_set_gfp_mask(mapping, GFP_HIGHUSER_MOVABLE);
170 mapping->assoc_mapping = NULL; 170 mapping->assoc_mapping = NULL;
171 mapping->backing_dev_info = &default_backing_dev_info; 171 mapping->backing_dev_info = &default_backing_dev_info;
172 mapping->writeback_index = 0; 172 mapping->writeback_index = 0;
@@ -576,8 +576,8 @@ __inode_add_to_lists(struct super_block *sb, struct hlist_head *head,
576 576
577/** 577/**
578 * inode_add_to_lists - add a new inode to relevant lists 578 * inode_add_to_lists - add a new inode to relevant lists
579 * @sb - superblock inode belongs to. 579 * @sb: superblock inode belongs to
580 * @inode - inode to mark in use 580 * @inode: inode to mark in use
581 * 581 *
582 * When an inode is allocated it needs to be accounted for, added to the in use 582 * When an inode is allocated it needs to be accounted for, added to the in use
583 * list, the owning superblock and the inode hash. This needs to be done under 583 * list, the owning superblock and the inode hash. This needs to be done under
@@ -601,7 +601,7 @@ EXPORT_SYMBOL_GPL(inode_add_to_lists);
601 * @sb: superblock 601 * @sb: superblock
602 * 602 *
603 * Allocates a new inode for given superblock. The default gfp_mask 603 * Allocates a new inode for given superblock. The default gfp_mask
604 * for allocations related to inode->i_mapping is GFP_HIGHUSER_PAGECACHE. 604 * for allocations related to inode->i_mapping is GFP_HIGHUSER_MOVABLE.
605 * If HIGHMEM pages are unsuitable or it is known that pages allocated 605 * If HIGHMEM pages are unsuitable or it is known that pages allocated
606 * for the page cache are not reclaimable or migratable, 606 * for the page cache are not reclaimable or migratable,
607 * mapping_set_gfp_mask() must be called with suitable flags on the 607 * mapping_set_gfp_mask() must be called with suitable flags on the
diff --git a/fs/minix/dir.c b/fs/minix/dir.c
index f70433816a38..d4946c4c90e2 100644
--- a/fs/minix/dir.c
+++ b/fs/minix/dir.c
@@ -280,7 +280,7 @@ int minix_add_link(struct dentry *dentry, struct inode *inode)
280 return -EINVAL; 280 return -EINVAL;
281 281
282got_it: 282got_it:
283 pos = (page->index >> PAGE_CACHE_SHIFT) + p - (char*)page_address(page); 283 pos = page_offset(page) + p - (char *)page_address(page);
284 err = __minix_write_begin(NULL, page->mapping, pos, sbi->s_dirsize, 284 err = __minix_write_begin(NULL, page->mapping, pos, sbi->s_dirsize,
285 AOP_FLAG_UNINTERRUPTIBLE, &page, NULL); 285 AOP_FLAG_UNINTERRUPTIBLE, &page, NULL);
286 if (err) 286 if (err)
diff --git a/fs/mpage.c b/fs/mpage.c
index 552b80b3facc..16c3ef37eae3 100644
--- a/fs/mpage.c
+++ b/fs/mpage.c
@@ -241,7 +241,6 @@ do_mpage_readpage(struct bio *bio, struct page *page, unsigned nr_pages,
241 first_hole = page_block; 241 first_hole = page_block;
242 page_block++; 242 page_block++;
243 block_in_file++; 243 block_in_file++;
244 clear_buffer_mapped(map_bh);
245 continue; 244 continue;
246 } 245 }
247 246
@@ -308,7 +307,10 @@ alloc_new:
308 goto alloc_new; 307 goto alloc_new;
309 } 308 }
310 309
311 if (buffer_boundary(map_bh) || (first_hole != blocks_per_page)) 310 relative_block = block_in_file - *first_logical_block;
311 nblocks = map_bh->b_size >> blkbits;
312 if ((buffer_boundary(map_bh) && relative_block == nblocks) ||
313 (first_hole != blocks_per_page))
312 bio = mpage_bio_submit(READ, bio); 314 bio = mpage_bio_submit(READ, bio);
313 else 315 else
314 *last_block_in_bio = blocks[blocks_per_page - 1]; 316 *last_block_in_bio = blocks[blocks_per_page - 1];
diff --git a/fs/ncpfs/getopt.c b/fs/ncpfs/getopt.c
index 335b003dddf9..0af3349de851 100644
--- a/fs/ncpfs/getopt.c
+++ b/fs/ncpfs/getopt.c
@@ -16,7 +16,6 @@
16 * @opts: an array of &struct option entries controlling parser operations 16 * @opts: an array of &struct option entries controlling parser operations
17 * @optopt: output; will contain the current option 17 * @optopt: output; will contain the current option
18 * @optarg: output; will contain the value (if one exists) 18 * @optarg: output; will contain the value (if one exists)
19 * @flag: output; may be NULL; should point to a long for or'ing flags
20 * @value: output; may be NULL; will be overwritten with the integer value 19 * @value: output; may be NULL; will be overwritten with the integer value
21 * of the current argument. 20 * of the current argument.
22 * 21 *
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index 3a8bdd7f5756..94063840832a 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -396,7 +396,9 @@ static int show_smap(struct seq_file *m, void *v)
396 "Private_Clean: %8lu kB\n" 396 "Private_Clean: %8lu kB\n"
397 "Private_Dirty: %8lu kB\n" 397 "Private_Dirty: %8lu kB\n"
398 "Referenced: %8lu kB\n" 398 "Referenced: %8lu kB\n"
399 "Swap: %8lu kB\n", 399 "Swap: %8lu kB\n"
400 "KernelPageSize: %8lu kB\n"
401 "MMUPageSize: %8lu kB\n",
400 (vma->vm_end - vma->vm_start) >> 10, 402 (vma->vm_end - vma->vm_start) >> 10,
401 mss.resident >> 10, 403 mss.resident >> 10,
402 (unsigned long)(mss.pss >> (10 + PSS_SHIFT)), 404 (unsigned long)(mss.pss >> (10 + PSS_SHIFT)),
@@ -405,7 +407,9 @@ static int show_smap(struct seq_file *m, void *v)
405 mss.private_clean >> 10, 407 mss.private_clean >> 10,
406 mss.private_dirty >> 10, 408 mss.private_dirty >> 10,
407 mss.referenced >> 10, 409 mss.referenced >> 10,
408 mss.swap >> 10); 410 mss.swap >> 10,
411 vma_kernel_pagesize(vma) >> 10,
412 vma_mmu_pagesize(vma) >> 10);
409 413
410 if (m->count < m->size) /* vma is copied successfully */ 414 if (m->count < m->size) /* vma is copied successfully */
411 m->version = (vma != get_gate_vma(task)) ? vma->vm_start : 0; 415 m->version = (vma != get_gate_vma(task)) ? vma->vm_start : 0;
diff --git a/fs/select.c b/fs/select.c
index 87df51eadcf2..08b91beed806 100644
--- a/fs/select.c
+++ b/fs/select.c
@@ -109,11 +109,11 @@ static void __pollwait(struct file *filp, wait_queue_head_t *wait_address,
109void poll_initwait(struct poll_wqueues *pwq) 109void poll_initwait(struct poll_wqueues *pwq)
110{ 110{
111 init_poll_funcptr(&pwq->pt, __pollwait); 111 init_poll_funcptr(&pwq->pt, __pollwait);
112 pwq->polling_task = current;
112 pwq->error = 0; 113 pwq->error = 0;
113 pwq->table = NULL; 114 pwq->table = NULL;
114 pwq->inline_index = 0; 115 pwq->inline_index = 0;
115} 116}
116
117EXPORT_SYMBOL(poll_initwait); 117EXPORT_SYMBOL(poll_initwait);
118 118
119static void free_poll_entry(struct poll_table_entry *entry) 119static void free_poll_entry(struct poll_table_entry *entry)
@@ -142,12 +142,10 @@ void poll_freewait(struct poll_wqueues *pwq)
142 free_page((unsigned long) old); 142 free_page((unsigned long) old);
143 } 143 }
144} 144}
145
146EXPORT_SYMBOL(poll_freewait); 145EXPORT_SYMBOL(poll_freewait);
147 146
148static struct poll_table_entry *poll_get_entry(poll_table *_p) 147static struct poll_table_entry *poll_get_entry(struct poll_wqueues *p)
149{ 148{
150 struct poll_wqueues *p = container_of(_p, struct poll_wqueues, pt);
151 struct poll_table_page *table = p->table; 149 struct poll_table_page *table = p->table;
152 150
153 if (p->inline_index < N_INLINE_POLL_ENTRIES) 151 if (p->inline_index < N_INLINE_POLL_ENTRIES)
@@ -159,7 +157,6 @@ static struct poll_table_entry *poll_get_entry(poll_table *_p)
159 new_table = (struct poll_table_page *) __get_free_page(GFP_KERNEL); 157 new_table = (struct poll_table_page *) __get_free_page(GFP_KERNEL);
160 if (!new_table) { 158 if (!new_table) {
161 p->error = -ENOMEM; 159 p->error = -ENOMEM;
162 __set_current_state(TASK_RUNNING);
163 return NULL; 160 return NULL;
164 } 161 }
165 new_table->entry = new_table->entries; 162 new_table->entry = new_table->entries;
@@ -171,20 +168,75 @@ static struct poll_table_entry *poll_get_entry(poll_table *_p)
171 return table->entry++; 168 return table->entry++;
172} 169}
173 170
171static int pollwake(wait_queue_t *wait, unsigned mode, int sync, void *key)
172{
173 struct poll_wqueues *pwq = wait->private;
174 DECLARE_WAITQUEUE(dummy_wait, pwq->polling_task);
175
176 /*
177 * Although this function is called under waitqueue lock, LOCK
178 * doesn't imply write barrier and the users expect write
179 * barrier semantics on wakeup functions. The following
180 * smp_wmb() is equivalent to smp_wmb() in try_to_wake_up()
181 * and is paired with set_mb() in poll_schedule_timeout.
182 */
183 smp_wmb();
184 pwq->triggered = 1;
185
186 /*
187 * Perform the default wake up operation using a dummy
188 * waitqueue.
189 *
190 * TODO: This is hacky but there currently is no interface to
191 * pass in @sync. @sync is scheduled to be removed and once
192 * that happens, wake_up_process() can be used directly.
193 */
194 return default_wake_function(&dummy_wait, mode, sync, key);
195}
196
174/* Add a new entry */ 197/* Add a new entry */
175static void __pollwait(struct file *filp, wait_queue_head_t *wait_address, 198static void __pollwait(struct file *filp, wait_queue_head_t *wait_address,
176 poll_table *p) 199 poll_table *p)
177{ 200{
178 struct poll_table_entry *entry = poll_get_entry(p); 201 struct poll_wqueues *pwq = container_of(p, struct poll_wqueues, pt);
202 struct poll_table_entry *entry = poll_get_entry(pwq);
179 if (!entry) 203 if (!entry)
180 return; 204 return;
181 get_file(filp); 205 get_file(filp);
182 entry->filp = filp; 206 entry->filp = filp;
183 entry->wait_address = wait_address; 207 entry->wait_address = wait_address;
184 init_waitqueue_entry(&entry->wait, current); 208 init_waitqueue_func_entry(&entry->wait, pollwake);
209 entry->wait.private = pwq;
185 add_wait_queue(wait_address, &entry->wait); 210 add_wait_queue(wait_address, &entry->wait);
186} 211}
187 212
213int poll_schedule_timeout(struct poll_wqueues *pwq, int state,
214 ktime_t *expires, unsigned long slack)
215{
216 int rc = -EINTR;
217
218 set_current_state(state);
219 if (!pwq->triggered)
220 rc = schedule_hrtimeout_range(expires, slack, HRTIMER_MODE_ABS);
221 __set_current_state(TASK_RUNNING);
222
223 /*
224 * Prepare for the next iteration.
225 *
226 * The following set_mb() serves two purposes. First, it's
227 * the counterpart rmb of the wmb in pollwake() such that data
228 * written before wake up is always visible after wake up.
229 * Second, the full barrier guarantees that triggered clearing
230 * doesn't pass event check of the next iteration. Note that
231 * this problem doesn't exist for the first iteration as
232 * add_wait_queue() has full barrier semantics.
233 */
234 set_mb(pwq->triggered, 0);
235
236 return rc;
237}
238EXPORT_SYMBOL(poll_schedule_timeout);
239
188/** 240/**
189 * poll_select_set_timeout - helper function to setup the timeout value 241 * poll_select_set_timeout - helper function to setup the timeout value
190 * @to: pointer to timespec variable for the final timeout 242 * @to: pointer to timespec variable for the final timeout
@@ -340,8 +392,6 @@ int do_select(int n, fd_set_bits *fds, struct timespec *end_time)
340 for (;;) { 392 for (;;) {
341 unsigned long *rinp, *routp, *rexp, *inp, *outp, *exp; 393 unsigned long *rinp, *routp, *rexp, *inp, *outp, *exp;
342 394
343 set_current_state(TASK_INTERRUPTIBLE);
344
345 inp = fds->in; outp = fds->out; exp = fds->ex; 395 inp = fds->in; outp = fds->out; exp = fds->ex;
346 rinp = fds->res_in; routp = fds->res_out; rexp = fds->res_ex; 396 rinp = fds->res_in; routp = fds->res_out; rexp = fds->res_ex;
347 397
@@ -411,10 +461,10 @@ int do_select(int n, fd_set_bits *fds, struct timespec *end_time)
411 to = &expire; 461 to = &expire;
412 } 462 }
413 463
414 if (!schedule_hrtimeout_range(to, slack, HRTIMER_MODE_ABS)) 464 if (!poll_schedule_timeout(&table, TASK_INTERRUPTIBLE,
465 to, slack))
415 timed_out = 1; 466 timed_out = 1;
416 } 467 }
417 __set_current_state(TASK_RUNNING);
418 468
419 poll_freewait(&table); 469 poll_freewait(&table);
420 470
@@ -666,7 +716,6 @@ static int do_poll(unsigned int nfds, struct poll_list *list,
666 for (;;) { 716 for (;;) {
667 struct poll_list *walk; 717 struct poll_list *walk;
668 718
669 set_current_state(TASK_INTERRUPTIBLE);
670 for (walk = list; walk != NULL; walk = walk->next) { 719 for (walk = list; walk != NULL; walk = walk->next) {
671 struct pollfd * pfd, * pfd_end; 720 struct pollfd * pfd, * pfd_end;
672 721
@@ -709,10 +758,9 @@ static int do_poll(unsigned int nfds, struct poll_list *list,
709 to = &expire; 758 to = &expire;
710 } 759 }
711 760
712 if (!schedule_hrtimeout_range(to, slack, HRTIMER_MODE_ABS)) 761 if (!poll_schedule_timeout(wait, TASK_INTERRUPTIBLE, to, slack))
713 timed_out = 1; 762 timed_out = 1;
714 } 763 }
715 __set_current_state(TASK_RUNNING);
716 return count; 764 return count;
717} 765}
718 766
diff --git a/fs/sync.c b/fs/sync.c
index 0921d6d4b5e6..ac02b56548bc 100644
--- a/fs/sync.c
+++ b/fs/sync.c
@@ -295,7 +295,7 @@ int do_sync_mapping_range(struct address_space *mapping, loff_t offset,
295 295
296 if (flags & SYNC_FILE_RANGE_WRITE) { 296 if (flags & SYNC_FILE_RANGE_WRITE) {
297 ret = __filemap_fdatawrite_range(mapping, offset, endbyte, 297 ret = __filemap_fdatawrite_range(mapping, offset, endbyte,
298 WB_SYNC_NONE); 298 WB_SYNC_ALL);
299 if (ret < 0) 299 if (ret < 0)
300 goto out; 300 goto out;
301 } 301 }
diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c
index 0d7564b95f8e..89556ee72518 100644
--- a/fs/ubifs/super.c
+++ b/fs/ubifs/super.c
@@ -432,12 +432,19 @@ static int ubifs_sync_fs(struct super_block *sb, int wait)
432 int i, err; 432 int i, err;
433 struct ubifs_info *c = sb->s_fs_info; 433 struct ubifs_info *c = sb->s_fs_info;
434 struct writeback_control wbc = { 434 struct writeback_control wbc = {
435 .sync_mode = wait ? WB_SYNC_ALL : WB_SYNC_HOLD, 435 .sync_mode = wait ? WB_SYNC_ALL : WB_SYNC_NONE,
436 .range_start = 0, 436 .range_start = 0,
437 .range_end = LLONG_MAX, 437 .range_end = LLONG_MAX,
438 .nr_to_write = LONG_MAX, 438 .nr_to_write = LONG_MAX,
439 }; 439 };
440 440
441 /*
442 * Note by akpm about WB_SYNC_NONE used above: zero @wait is just an
443 * advisory thing to help the file system shove lots of data into the
444 * queues. If some gets missed then it'll be picked up on the second
445 * '->sync_fs()' call, with non-zero @wait.
446 */
447
441 if (sb->s_flags & MS_RDONLY) 448 if (sb->s_flags & MS_RDONLY)
442 return 0; 449 return 0;
443 450
diff --git a/include/asm-frv/atomic.h b/include/asm-frv/atomic.h
index 46d696b331e7..296c35cfb207 100644
--- a/include/asm-frv/atomic.h
+++ b/include/asm-frv/atomic.h
@@ -35,10 +35,6 @@
35#define smp_mb__before_atomic_inc() barrier() 35#define smp_mb__before_atomic_inc() barrier()
36#define smp_mb__after_atomic_inc() barrier() 36#define smp_mb__after_atomic_inc() barrier()
37 37
38typedef struct {
39 int counter;
40} atomic_t;
41
42#define ATOMIC_INIT(i) { (i) } 38#define ATOMIC_INIT(i) { (i) }
43#define atomic_read(v) ((v)->counter) 39#define atomic_read(v) ((v)->counter)
44#define atomic_set(v, i) (((v)->counter) = (i)) 40#define atomic_set(v, i) (((v)->counter) = (i))
diff --git a/include/asm-generic/bug.h b/include/asm-generic/bug.h
index 8af276361bf2..37b82cb96c89 100644
--- a/include/asm-generic/bug.h
+++ b/include/asm-generic/bug.h
@@ -28,6 +28,17 @@ struct bug_entry {
28#define BUGFLAG_WARNING (1<<0) 28#define BUGFLAG_WARNING (1<<0)
29#endif /* CONFIG_GENERIC_BUG */ 29#endif /* CONFIG_GENERIC_BUG */
30 30
31/*
32 * Don't use BUG() or BUG_ON() unless there's really no way out; one
33 * example might be detecting data structure corruption in the middle
34 * of an operation that can't be backed out of. If the (sub)system
35 * can somehow continue operating, perhaps with reduced functionality,
36 * it's probably not BUG-worthy.
37 *
38 * If you're tempted to BUG(), think again: is completely giving up
39 * really the *only* solution? There are usually better options, where
40 * users don't need to reboot ASAP and can mostly shut down cleanly.
41 */
31#ifndef HAVE_ARCH_BUG 42#ifndef HAVE_ARCH_BUG
32#define BUG() do { \ 43#define BUG() do { \
33 printk("BUG: failure at %s:%d/%s()!\n", __FILE__, __LINE__, __func__); \ 44 printk("BUG: failure at %s:%d/%s()!\n", __FILE__, __LINE__, __func__); \
@@ -39,6 +50,12 @@ struct bug_entry {
39#define BUG_ON(condition) do { if (unlikely(condition)) BUG(); } while(0) 50#define BUG_ON(condition) do { if (unlikely(condition)) BUG(); } while(0)
40#endif 51#endif
41 52
53/*
54 * WARN(), WARN_ON(), WARN_ON_ONCE, and so on can be used to report
55 * significant issues that need prompt attention if they should ever
56 * appear at runtime. Use the versions with printk format strings
57 * to provide better diagnostics.
58 */
42#ifndef __WARN 59#ifndef __WARN
43#ifndef __ASSEMBLY__ 60#ifndef __ASSEMBLY__
44extern void warn_slowpath(const char *file, const int line, 61extern void warn_slowpath(const char *file, const int line,
diff --git a/include/asm-generic/local.h b/include/asm-generic/local.h
index 33d7d04e4119..dbd6150763e9 100644
--- a/include/asm-generic/local.h
+++ b/include/asm-generic/local.h
@@ -2,7 +2,6 @@
2#define _ASM_GENERIC_LOCAL_H 2#define _ASM_GENERIC_LOCAL_H
3 3
4#include <linux/percpu.h> 4#include <linux/percpu.h>
5#include <linux/hardirq.h>
6#include <asm/atomic.h> 5#include <asm/atomic.h>
7#include <asm/types.h> 6#include <asm/types.h>
8 7
diff --git a/include/asm-generic/memory_model.h b/include/asm-generic/memory_model.h
index 36fa286adad5..4c8d0afae711 100644
--- a/include/asm-generic/memory_model.h
+++ b/include/asm-generic/memory_model.h
@@ -69,15 +69,8 @@
69}) 69})
70#endif /* CONFIG_FLATMEM/DISCONTIGMEM/SPARSEMEM */ 70#endif /* CONFIG_FLATMEM/DISCONTIGMEM/SPARSEMEM */
71 71
72#ifdef CONFIG_OUT_OF_LINE_PFN_TO_PAGE
73struct page;
74/* this is useful when inlined pfn_to_page is too big */
75extern struct page *pfn_to_page(unsigned long pfn);
76extern unsigned long page_to_pfn(struct page *page);
77#else
78#define page_to_pfn __page_to_pfn 72#define page_to_pfn __page_to_pfn
79#define pfn_to_page __pfn_to_page 73#define pfn_to_page __pfn_to_page
80#endif /* CONFIG_OUT_OF_LINE_PFN_TO_PAGE */
81 74
82#endif /* __ASSEMBLY__ */ 75#endif /* __ASSEMBLY__ */
83 76
diff --git a/include/asm-m32r/atomic.h b/include/asm-m32r/atomic.h
index 3a38ffe4a4f4..2eed30f84080 100644
--- a/include/asm-m32r/atomic.h
+++ b/include/asm-m32r/atomic.h
@@ -9,6 +9,7 @@
9 * Copyright (C) 2004 Hirokazu Takata <takata at linux-m32r.org> 9 * Copyright (C) 2004 Hirokazu Takata <takata at linux-m32r.org>
10 */ 10 */
11 11
12#include <linux/types.h>
12#include <asm/assembler.h> 13#include <asm/assembler.h>
13#include <asm/system.h> 14#include <asm/system.h>
14 15
@@ -17,13 +18,6 @@
17 * resource counting etc.. 18 * resource counting etc..
18 */ 19 */
19 20
20/*
21 * Make sure gcc doesn't try to be clever and move things around
22 * on us. We need to use _exactly_ the address the user gave us,
23 * not some alias that contains the same information.
24 */
25typedef struct { volatile int counter; } atomic_t;
26
27#define ATOMIC_INIT(i) { (i) } 21#define ATOMIC_INIT(i) { (i) }
28 22
29/** 23/**
diff --git a/include/asm-m68k/atomic.h b/include/asm-m68k/atomic.h
index 4915294fea63..eb0ab9d4ee77 100644
--- a/include/asm-m68k/atomic.h
+++ b/include/asm-m68k/atomic.h
@@ -1,7 +1,7 @@
1#ifndef __ARCH_M68K_ATOMIC__ 1#ifndef __ARCH_M68K_ATOMIC__
2#define __ARCH_M68K_ATOMIC__ 2#define __ARCH_M68K_ATOMIC__
3 3
4 4#include <linux/types.h>
5#include <asm/system.h> 5#include <asm/system.h>
6 6
7/* 7/*
@@ -13,7 +13,6 @@
13 * We do not have SMP m68k systems, so we don't have to deal with that. 13 * We do not have SMP m68k systems, so we don't have to deal with that.
14 */ 14 */
15 15
16typedef struct { int counter; } atomic_t;
17#define ATOMIC_INIT(i) { (i) } 16#define ATOMIC_INIT(i) { (i) }
18 17
19#define atomic_read(v) ((v)->counter) 18#define atomic_read(v) ((v)->counter)
diff --git a/include/asm-mn10300/atomic.h b/include/asm-mn10300/atomic.h
index 27c9690b9574..bc064825f9b1 100644
--- a/include/asm-mn10300/atomic.h
+++ b/include/asm-mn10300/atomic.h
@@ -20,15 +20,6 @@
20 * resource counting etc.. 20 * resource counting etc..
21 */ 21 */
22 22
23/*
24 * Make sure gcc doesn't try to be clever and move things around
25 * on us. We need to use _exactly_ the address the user gave us,
26 * not some alias that contains the same information.
27 */
28typedef struct {
29 int counter;
30} atomic_t;
31
32#define ATOMIC_INIT(i) { (i) } 23#define ATOMIC_INIT(i) { (i) }
33 24
34#ifdef __KERNEL__ 25#ifdef __KERNEL__
diff --git a/include/asm-xtensa/atomic.h b/include/asm-xtensa/atomic.h
index b3b23540f14d..67ad67bed8c1 100644
--- a/include/asm-xtensa/atomic.h
+++ b/include/asm-xtensa/atomic.h
@@ -14,8 +14,7 @@
14#define _XTENSA_ATOMIC_H 14#define _XTENSA_ATOMIC_H
15 15
16#include <linux/stringify.h> 16#include <linux/stringify.h>
17 17#include <linux/types.h>
18typedef struct { volatile int counter; } atomic_t;
19 18
20#ifdef __KERNEL__ 19#ifdef __KERNEL__
21#include <asm/processor.h> 20#include <asm/processor.h>
diff --git a/include/linux/auto_dev-ioctl.h b/include/linux/auto_dev-ioctl.h
index f4d05ccd731f..91a773993a5c 100644
--- a/include/linux/auto_dev-ioctl.h
+++ b/include/linux/auto_dev-ioctl.h
@@ -10,6 +10,7 @@
10#ifndef _LINUX_AUTO_DEV_IOCTL_H 10#ifndef _LINUX_AUTO_DEV_IOCTL_H
11#define _LINUX_AUTO_DEV_IOCTL_H 11#define _LINUX_AUTO_DEV_IOCTL_H
12 12
13#include <linux/string.h>
13#include <linux/types.h> 14#include <linux/types.h>
14 15
15#define AUTOFS_DEVICE_NAME "autofs" 16#define AUTOFS_DEVICE_NAME "autofs"
@@ -25,6 +26,60 @@
25 * An ioctl interface for autofs mount point control. 26 * An ioctl interface for autofs mount point control.
26 */ 27 */
27 28
29struct args_protover {
30 __u32 version;
31};
32
33struct args_protosubver {
34 __u32 sub_version;
35};
36
37struct args_openmount {
38 __u32 devid;
39};
40
41struct args_ready {
42 __u32 token;
43};
44
45struct args_fail {
46 __u32 token;
47 __s32 status;
48};
49
50struct args_setpipefd {
51 __s32 pipefd;
52};
53
54struct args_timeout {
55 __u64 timeout;
56};
57
58struct args_requester {
59 __u32 uid;
60 __u32 gid;
61};
62
63struct args_expire {
64 __u32 how;
65};
66
67struct args_askumount {
68 __u32 may_umount;
69};
70
71struct args_ismountpoint {
72 union {
73 struct args_in {
74 __u32 type;
75 } in;
76 struct args_out {
77 __u32 devid;
78 __u32 magic;
79 } out;
80 };
81};
82
28/* 83/*
29 * All the ioctls use this structure. 84 * All the ioctls use this structure.
30 * When sending a path size must account for the total length 85 * When sending a path size must account for the total length
@@ -39,20 +94,32 @@ struct autofs_dev_ioctl {
39 * including this struct */ 94 * including this struct */
40 __s32 ioctlfd; /* automount command fd */ 95 __s32 ioctlfd; /* automount command fd */
41 96
42 __u32 arg1; /* Command parameters */ 97 /* Command parameters */
43 __u32 arg2; 98
99 union {
100 struct args_protover protover;
101 struct args_protosubver protosubver;
102 struct args_openmount openmount;
103 struct args_ready ready;
104 struct args_fail fail;
105 struct args_setpipefd setpipefd;
106 struct args_timeout timeout;
107 struct args_requester requester;
108 struct args_expire expire;
109 struct args_askumount askumount;
110 struct args_ismountpoint ismountpoint;
111 };
44 112
45 char path[0]; 113 char path[0];
46}; 114};
47 115
48static inline void init_autofs_dev_ioctl(struct autofs_dev_ioctl *in) 116static inline void init_autofs_dev_ioctl(struct autofs_dev_ioctl *in)
49{ 117{
118 memset(in, 0, sizeof(struct autofs_dev_ioctl));
50 in->ver_major = AUTOFS_DEV_IOCTL_VERSION_MAJOR; 119 in->ver_major = AUTOFS_DEV_IOCTL_VERSION_MAJOR;
51 in->ver_minor = AUTOFS_DEV_IOCTL_VERSION_MINOR; 120 in->ver_minor = AUTOFS_DEV_IOCTL_VERSION_MINOR;
52 in->size = sizeof(struct autofs_dev_ioctl); 121 in->size = sizeof(struct autofs_dev_ioctl);
53 in->ioctlfd = -1; 122 in->ioctlfd = -1;
54 in->arg1 = 0;
55 in->arg2 = 0;
56 return; 123 return;
57} 124}
58 125
diff --git a/include/linux/auto_fs4.h b/include/linux/auto_fs4.h
index 2253716d4b92..55fa478bd639 100644
--- a/include/linux/auto_fs4.h
+++ b/include/linux/auto_fs4.h
@@ -29,10 +29,64 @@
29#define AUTOFS_EXP_IMMEDIATE 1 29#define AUTOFS_EXP_IMMEDIATE 1
30#define AUTOFS_EXP_LEAVES 2 30#define AUTOFS_EXP_LEAVES 2
31 31
32#define AUTOFS_TYPE_ANY 0x0000 32#define AUTOFS_TYPE_ANY 0U
33#define AUTOFS_TYPE_INDIRECT 0x0001 33#define AUTOFS_TYPE_INDIRECT 1U
34#define AUTOFS_TYPE_DIRECT 0x0002 34#define AUTOFS_TYPE_DIRECT 2U
35#define AUTOFS_TYPE_OFFSET 0x0004 35#define AUTOFS_TYPE_OFFSET 4U
36
37static inline void set_autofs_type_indirect(unsigned int *type)
38{
39 *type = AUTOFS_TYPE_INDIRECT;
40 return;
41}
42
43static inline unsigned int autofs_type_indirect(unsigned int type)
44{
45 return (type == AUTOFS_TYPE_INDIRECT);
46}
47
48static inline void set_autofs_type_direct(unsigned int *type)
49{
50 *type = AUTOFS_TYPE_DIRECT;
51 return;
52}
53
54static inline unsigned int autofs_type_direct(unsigned int type)
55{
56 return (type == AUTOFS_TYPE_DIRECT);
57}
58
59static inline void set_autofs_type_offset(unsigned int *type)
60{
61 *type = AUTOFS_TYPE_OFFSET;
62 return;
63}
64
65static inline unsigned int autofs_type_offset(unsigned int type)
66{
67 return (type == AUTOFS_TYPE_OFFSET);
68}
69
70static inline unsigned int autofs_type_trigger(unsigned int type)
71{
72 return (type == AUTOFS_TYPE_DIRECT || type == AUTOFS_TYPE_OFFSET);
73}
74
75/*
76 * This isn't really a type as we use it to say "no type set" to
77 * indicate we want to search for "any" mount in the
78 * autofs_dev_ioctl_ismountpoint() device ioctl function.
79 */
80static inline void set_autofs_type_any(unsigned int *type)
81{
82 *type = AUTOFS_TYPE_ANY;
83 return;
84}
85
86static inline unsigned int autofs_type_any(unsigned int type)
87{
88 return (type == AUTOFS_TYPE_ANY);
89}
36 90
37/* Daemon notification packet types */ 91/* Daemon notification packet types */
38enum autofs_notify { 92enum autofs_notify {
diff --git a/include/linux/binfmts.h b/include/linux/binfmts.h
index 6cbfbe297180..77b4a9e46004 100644
--- a/include/linux/binfmts.h
+++ b/include/linux/binfmts.h
@@ -18,6 +18,7 @@ struct pt_regs;
18#define BINPRM_BUF_SIZE 128 18#define BINPRM_BUF_SIZE 128
19 19
20#ifdef __KERNEL__ 20#ifdef __KERNEL__
21#include <linux/list.h>
21 22
22#define CORENAME_MAX_SIZE 128 23#define CORENAME_MAX_SIZE 128
23 24
@@ -106,7 +107,7 @@ extern int setup_arg_pages(struct linux_binprm * bprm,
106extern int bprm_mm_init(struct linux_binprm *bprm); 107extern int bprm_mm_init(struct linux_binprm *bprm);
107extern int copy_strings_kernel(int argc,char ** argv,struct linux_binprm *bprm); 108extern int copy_strings_kernel(int argc,char ** argv,struct linux_binprm *bprm);
108extern void install_exec_creds(struct linux_binprm *bprm); 109extern void install_exec_creds(struct linux_binprm *bprm);
109extern int do_coredump(long signr, int exit_code, struct pt_regs * regs); 110extern void do_coredump(long signr, int exit_code, struct pt_regs *regs);
110extern int set_binfmt(struct linux_binfmt *new); 111extern int set_binfmt(struct linux_binfmt *new);
111extern void free_bprm(struct linux_binprm *); 112extern void free_bprm(struct linux_binprm *);
112 113
diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index 1164963c3a85..08b78c09b09a 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -329,13 +329,7 @@ struct cgroup_subsys {
329 struct cgroup *cgrp); 329 struct cgroup *cgrp);
330 void (*post_clone)(struct cgroup_subsys *ss, struct cgroup *cgrp); 330 void (*post_clone)(struct cgroup_subsys *ss, struct cgroup *cgrp);
331 void (*bind)(struct cgroup_subsys *ss, struct cgroup *root); 331 void (*bind)(struct cgroup_subsys *ss, struct cgroup *root);
332 /* 332
333 * This routine is called with the task_lock of mm->owner held
334 */
335 void (*mm_owner_changed)(struct cgroup_subsys *ss,
336 struct cgroup *old,
337 struct cgroup *new,
338 struct task_struct *p);
339 int subsys_id; 333 int subsys_id;
340 int active; 334 int active;
341 int disabled; 335 int disabled;
@@ -400,9 +394,6 @@ void cgroup_iter_end(struct cgroup *cgrp, struct cgroup_iter *it);
400int cgroup_scan_tasks(struct cgroup_scanner *scan); 394int cgroup_scan_tasks(struct cgroup_scanner *scan);
401int cgroup_attach_task(struct cgroup *, struct task_struct *); 395int cgroup_attach_task(struct cgroup *, struct task_struct *);
402 396
403void cgroup_mm_owner_callbacks(struct task_struct *old,
404 struct task_struct *new);
405
406#else /* !CONFIG_CGROUPS */ 397#else /* !CONFIG_CGROUPS */
407 398
408static inline int cgroup_init_early(void) { return 0; } 399static inline int cgroup_init_early(void) { return 0; }
@@ -420,9 +411,6 @@ static inline int cgroupstats_build(struct cgroupstats *stats,
420 return -EINVAL; 411 return -EINVAL;
421} 412}
422 413
423static inline void cgroup_mm_owner_callbacks(struct task_struct *old,
424 struct task_struct *new) {}
425
426#endif /* !CONFIG_CGROUPS */ 414#endif /* !CONFIG_CGROUPS */
427 415
428#endif /* _LINUX_CGROUP_H */ 416#endif /* _LINUX_CGROUP_H */
diff --git a/include/linux/cpuset.h b/include/linux/cpuset.h
index 8e540d32c9fe..51ea2bdea0f9 100644
--- a/include/linux/cpuset.h
+++ b/include/linux/cpuset.h
@@ -78,6 +78,8 @@ extern int current_cpuset_is_being_rebound(void);
78 78
79extern void rebuild_sched_domains(void); 79extern void rebuild_sched_domains(void);
80 80
81extern void cpuset_print_task_mems_allowed(struct task_struct *p);
82
81#else /* !CONFIG_CPUSETS */ 83#else /* !CONFIG_CPUSETS */
82 84
83static inline int cpuset_init_early(void) { return 0; } 85static inline int cpuset_init_early(void) { return 0; }
@@ -159,6 +161,10 @@ static inline void rebuild_sched_domains(void)
159 partition_sched_domains(1, NULL, NULL); 161 partition_sched_domains(1, NULL, NULL);
160} 162}
161 163
164static inline void cpuset_print_task_mems_allowed(struct task_struct *p)
165{
166}
167
162#endif /* !CONFIG_CPUSETS */ 168#endif /* !CONFIG_CPUSETS */
163 169
164#endif /* _LINUX_CPUSET_H */ 170#endif /* _LINUX_CPUSET_H */
diff --git a/include/linux/fs.h b/include/linux/fs.h
index fb59673c60b1..d7eba77f666e 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1133,7 +1133,6 @@ struct super_block {
1133 struct rw_semaphore s_umount; 1133 struct rw_semaphore s_umount;
1134 struct mutex s_lock; 1134 struct mutex s_lock;
1135 int s_count; 1135 int s_count;
1136 int s_syncing;
1137 int s_need_sync_fs; 1136 int s_need_sync_fs;
1138 atomic_t s_active; 1137 atomic_t s_active;
1139#ifdef CONFIG_SECURITY 1138#ifdef CONFIG_SECURITY
diff --git a/include/linux/fuse.h b/include/linux/fuse.h
index 350fe9767bbc..162e5defe683 100644
--- a/include/linux/fuse.h
+++ b/include/linux/fuse.h
@@ -1,6 +1,6 @@
1/* 1/*
2 FUSE: Filesystem in Userspace 2 FUSE: Filesystem in Userspace
3 Copyright (C) 2001-2006 Miklos Szeredi <miklos@szeredi.hu> 3 Copyright (C) 2001-2008 Miklos Szeredi <miklos@szeredi.hu>
4 4
5 This program can be distributed under the terms of the GNU GPL. 5 This program can be distributed under the terms of the GNU GPL.
6 See the file COPYING. 6 See the file COPYING.
@@ -20,29 +20,27 @@
20 * 20 *
21 * 7.10 21 * 7.10
22 * - add nonseekable open flag 22 * - add nonseekable open flag
23 *
24 * 7.11
25 * - add IOCTL message
26 * - add unsolicited notification support
27 * - add POLL message and NOTIFY_POLL notification
23 */ 28 */
24 29
25#ifndef _LINUX_FUSE_H 30#ifndef _LINUX_FUSE_H
26#define _LINUX_FUSE_H 31#define _LINUX_FUSE_H
27 32
28#include <asm/types.h> 33#include <linux/types.h>
29#include <linux/major.h>
30 34
31/** Version number of this interface */ 35/** Version number of this interface */
32#define FUSE_KERNEL_VERSION 7 36#define FUSE_KERNEL_VERSION 7
33 37
34/** Minor version number of this interface */ 38/** Minor version number of this interface */
35#define FUSE_KERNEL_MINOR_VERSION 10 39#define FUSE_KERNEL_MINOR_VERSION 11
36 40
37/** The node ID of the root inode */ 41/** The node ID of the root inode */
38#define FUSE_ROOT_ID 1 42#define FUSE_ROOT_ID 1
39 43
40/** The major number of the fuse character device */
41#define FUSE_MAJOR MISC_MAJOR
42
43/** The minor number of the fuse character device */
44#define FUSE_MINOR 229
45
46/* Make sure all structures are padded to 64bit boundary, so 32bit 44/* Make sure all structures are padded to 64bit boundary, so 32bit
47 userspace works under 64bit kernels */ 45 userspace works under 64bit kernels */
48 46
@@ -151,6 +149,28 @@ struct fuse_file_lock {
151 */ 149 */
152#define FUSE_READ_LOCKOWNER (1 << 1) 150#define FUSE_READ_LOCKOWNER (1 << 1)
153 151
152/**
153 * Ioctl flags
154 *
155 * FUSE_IOCTL_COMPAT: 32bit compat ioctl on 64bit machine
156 * FUSE_IOCTL_UNRESTRICTED: not restricted to well-formed ioctls, retry allowed
157 * FUSE_IOCTL_RETRY: retry with new iovecs
158 *
159 * FUSE_IOCTL_MAX_IOV: maximum of in_iovecs + out_iovecs
160 */
161#define FUSE_IOCTL_COMPAT (1 << 0)
162#define FUSE_IOCTL_UNRESTRICTED (1 << 1)
163#define FUSE_IOCTL_RETRY (1 << 2)
164
165#define FUSE_IOCTL_MAX_IOV 256
166
167/**
168 * Poll flags
169 *
170 * FUSE_POLL_SCHEDULE_NOTIFY: request poll notify
171 */
172#define FUSE_POLL_SCHEDULE_NOTIFY (1 << 0)
173
154enum fuse_opcode { 174enum fuse_opcode {
155 FUSE_LOOKUP = 1, 175 FUSE_LOOKUP = 1,
156 FUSE_FORGET = 2, /* no reply */ 176 FUSE_FORGET = 2, /* no reply */
@@ -188,6 +208,13 @@ enum fuse_opcode {
188 FUSE_INTERRUPT = 36, 208 FUSE_INTERRUPT = 36,
189 FUSE_BMAP = 37, 209 FUSE_BMAP = 37,
190 FUSE_DESTROY = 38, 210 FUSE_DESTROY = 38,
211 FUSE_IOCTL = 39,
212 FUSE_POLL = 40,
213};
214
215enum fuse_notify_code {
216 FUSE_NOTIFY_POLL = 1,
217 FUSE_NOTIFY_CODE_MAX,
191}; 218};
192 219
193/* The read buffer is required to be at least 8k, but may be much larger */ 220/* The read buffer is required to be at least 8k, but may be much larger */
@@ -388,6 +415,38 @@ struct fuse_bmap_out {
388 __u64 block; 415 __u64 block;
389}; 416};
390 417
418struct fuse_ioctl_in {
419 __u64 fh;
420 __u32 flags;
421 __u32 cmd;
422 __u64 arg;
423 __u32 in_size;
424 __u32 out_size;
425};
426
427struct fuse_ioctl_out {
428 __s32 result;
429 __u32 flags;
430 __u32 in_iovs;
431 __u32 out_iovs;
432};
433
434struct fuse_poll_in {
435 __u64 fh;
436 __u64 kh;
437 __u32 flags;
438 __u32 padding;
439};
440
441struct fuse_poll_out {
442 __u32 revents;
443 __u32 padding;
444};
445
446struct fuse_notify_poll_wakeup_out {
447 __u64 kh;
448};
449
391struct fuse_in_header { 450struct fuse_in_header {
392 __u32 len; 451 __u32 len;
393 __u32 opcode; 452 __u32 opcode;
diff --git a/include/linux/gfp.h b/include/linux/gfp.h
index e8003afeffba..dd20cd78faa8 100644
--- a/include/linux/gfp.h
+++ b/include/linux/gfp.h
@@ -69,12 +69,6 @@ struct vm_area_struct;
69#define GFP_HIGHUSER_MOVABLE (__GFP_WAIT | __GFP_IO | __GFP_FS | \ 69#define GFP_HIGHUSER_MOVABLE (__GFP_WAIT | __GFP_IO | __GFP_FS | \
70 __GFP_HARDWALL | __GFP_HIGHMEM | \ 70 __GFP_HARDWALL | __GFP_HIGHMEM | \
71 __GFP_MOVABLE) 71 __GFP_MOVABLE)
72#define GFP_NOFS_PAGECACHE (__GFP_WAIT | __GFP_IO | __GFP_MOVABLE)
73#define GFP_USER_PAGECACHE (__GFP_WAIT | __GFP_IO | __GFP_FS | \
74 __GFP_HARDWALL | __GFP_MOVABLE)
75#define GFP_HIGHUSER_PAGECACHE (__GFP_WAIT | __GFP_IO | __GFP_FS | \
76 __GFP_HARDWALL | __GFP_HIGHMEM | \
77 __GFP_MOVABLE)
78 72
79#ifdef CONFIG_NUMA 73#ifdef CONFIG_NUMA
80#define GFP_THISNODE (__GFP_THISNODE | __GFP_NOWARN | __GFP_NORETRY) 74#define GFP_THISNODE (__GFP_THISNODE | __GFP_NOWARN | __GFP_NORETRY)
diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
index e1c8afc002c0..f1d2fba19ea0 100644
--- a/include/linux/hugetlb.h
+++ b/include/linux/hugetlb.h
@@ -233,6 +233,10 @@ static inline unsigned long huge_page_size(struct hstate *h)
233 return (unsigned long)PAGE_SIZE << h->order; 233 return (unsigned long)PAGE_SIZE << h->order;
234} 234}
235 235
236extern unsigned long vma_kernel_pagesize(struct vm_area_struct *vma);
237
238extern unsigned long vma_mmu_pagesize(struct vm_area_struct *vma);
239
236static inline unsigned long huge_page_mask(struct hstate *h) 240static inline unsigned long huge_page_mask(struct hstate *h)
237{ 241{
238 return h->mask; 242 return h->mask;
@@ -273,6 +277,8 @@ struct hstate {};
273#define hstate_inode(i) NULL 277#define hstate_inode(i) NULL
274#define huge_page_size(h) PAGE_SIZE 278#define huge_page_size(h) PAGE_SIZE
275#define huge_page_mask(h) PAGE_MASK 279#define huge_page_mask(h) PAGE_MASK
280#define vma_kernel_pagesize(v) PAGE_SIZE
281#define vma_mmu_pagesize(v) PAGE_SIZE
276#define huge_page_order(h) 0 282#define huge_page_order(h) 0
277#define huge_page_shift(h) PAGE_SHIFT 283#define huge_page_shift(h) PAGE_SHIFT
278static inline unsigned int pages_per_huge_page(struct hstate *h) 284static inline unsigned int pages_per_huge_page(struct hstate *h)
diff --git a/include/linux/i2c/twl4030.h b/include/linux/i2c/twl4030.h
index a8f84c01f82e..8137f660a5cc 100644
--- a/include/linux/i2c/twl4030.h
+++ b/include/linux/i2c/twl4030.h
@@ -234,6 +234,9 @@ struct twl4030_gpio_platform_data {
234 /* gpio-n should control VMMC(n+1) if BIT(n) in mmc_cd is set */ 234 /* gpio-n should control VMMC(n+1) if BIT(n) in mmc_cd is set */
235 u8 mmc_cd; 235 u8 mmc_cd;
236 236
237 /* if BIT(N) is set, or VMMC(n+1) is linked, debounce GPIO-N */
238 u32 debounce;
239
237 /* For gpio-N, bit (1 << N) in "pullups" is set if that pullup 240 /* For gpio-N, bit (1 << N) in "pullups" is set if that pullup
238 * should be enabled. Else, if that bit is set in "pulldowns", 241 * should be enabled. Else, if that bit is set in "pulldowns",
239 * that pulldown is enabled. Don't waste power by letting any 242 * that pulldown is enabled. Don't waste power by letting any
@@ -307,12 +310,6 @@ int twl4030_sih_setup(int module);
307#define TWL4030_VAUX3_DEV_GRP 0x1F 310#define TWL4030_VAUX3_DEV_GRP 0x1F
308#define TWL4030_VAUX3_DEDICATED 0x22 311#define TWL4030_VAUX3_DEDICATED 0x22
309 312
310/*
311 * Exported TWL4030 GPIO APIs
312 *
313 * WARNING -- use standard GPIO and IRQ calls instead; these will vanish.
314 */
315int twl4030_set_gpio_debounce(int gpio, int enable);
316 313
317#if defined(CONFIG_TWL4030_BCI_BATTERY) || \ 314#if defined(CONFIG_TWL4030_BCI_BATTERY) || \
318 defined(CONFIG_TWL4030_BCI_BATTERY_MODULE) 315 defined(CONFIG_TWL4030_BCI_BATTERY_MODULE)
diff --git a/include/linux/ide.h b/include/linux/ide.h
index db5ef8ae1ab9..3644f6323384 100644
--- a/include/linux/ide.h
+++ b/include/linux/ide.h
@@ -32,18 +32,14 @@
32# define SUPPORT_VLB_SYNC 1 32# define SUPPORT_VLB_SYNC 1
33#endif 33#endif
34 34
35typedef unsigned char byte; /* used everywhere */
36
37/* 35/*
38 * Probably not wise to fiddle with these 36 * Probably not wise to fiddle with these
39 */ 37 */
38#define IDE_DEFAULT_MAX_FAILURES 1
40#define ERROR_MAX 8 /* Max read/write errors per sector */ 39#define ERROR_MAX 8 /* Max read/write errors per sector */
41#define ERROR_RESET 3 /* Reset controller every 4th retry */ 40#define ERROR_RESET 3 /* Reset controller every 4th retry */
42#define ERROR_RECAL 1 /* Recalibrate every 2nd retry */ 41#define ERROR_RECAL 1 /* Recalibrate every 2nd retry */
43 42
44#define HWIF(drive) ((ide_hwif_t *)((drive)->hwif))
45#define HWGROUP(drive) ((ide_hwgroup_t *)(HWIF(drive)->hwgroup))
46
47/* 43/*
48 * Definitions for accessing IDE controller registers 44 * Definitions for accessing IDE controller registers
49 */ 45 */
@@ -185,9 +181,6 @@ typedef struct hw_regs_s {
185 unsigned long config; 181 unsigned long config;
186} hw_regs_t; 182} hw_regs_t;
187 183
188void ide_init_port_data(struct hwif_s *, unsigned int);
189void ide_init_port_hw(struct hwif_s *, hw_regs_t *);
190
191static inline void ide_std_init_ports(hw_regs_t *hw, 184static inline void ide_std_init_ports(hw_regs_t *hw,
192 unsigned long io_addr, 185 unsigned long io_addr,
193 unsigned long ctl_addr) 186 unsigned long ctl_addr)
@@ -433,18 +426,14 @@ struct ide_atapi_pc {
433 struct idetape_bh *bh; 426 struct idetape_bh *bh;
434 char *b_data; 427 char *b_data;
435 428
436 /* idescsi only for now */
437 struct scatterlist *sg; 429 struct scatterlist *sg;
438 unsigned int sg_cnt; 430 unsigned int sg_cnt;
439 431
440 struct scsi_cmnd *scsi_cmd;
441 void (*done) (struct scsi_cmnd *);
442
443 unsigned long timeout; 432 unsigned long timeout;
444}; 433};
445 434
446struct ide_devset; 435struct ide_devset;
447struct ide_driver_s; 436struct ide_driver;
448 437
449#ifdef CONFIG_BLK_DEV_IDEACPI 438#ifdef CONFIG_BLK_DEV_IDEACPI
450struct ide_acpi_drive_link; 439struct ide_acpi_drive_link;
@@ -588,7 +577,6 @@ struct ide_drive_s {
588 struct request_queue *queue; /* request queue */ 577 struct request_queue *queue; /* request queue */
589 578
590 struct request *rq; /* current request */ 579 struct request *rq; /* current request */
591 struct ide_drive_s *next; /* circular list of hwgroup drives */
592 void *driver_data; /* extra driver data */ 580 void *driver_data; /* extra driver data */
593 u16 *id; /* identification info */ 581 u16 *id; /* identification info */
594#ifdef CONFIG_IDE_PROC_FS 582#ifdef CONFIG_IDE_PROC_FS
@@ -662,6 +650,8 @@ struct ide_drive_s {
662 int (*pc_io_buffers)(struct ide_drive_s *, struct ide_atapi_pc *, 650 int (*pc_io_buffers)(struct ide_drive_s *, struct ide_atapi_pc *,
663 unsigned int, int); 651 unsigned int, int);
664 652
653 ide_startstop_t (*irq_handler)(struct ide_drive_s *);
654
665 unsigned long atapi_flags; 655 unsigned long atapi_flags;
666 656
667 struct ide_atapi_pc request_sense_pc; 657 struct ide_atapi_pc request_sense_pc;
@@ -684,7 +674,6 @@ struct ide_tp_ops {
684 void (*exec_command)(struct hwif_s *, u8); 674 void (*exec_command)(struct hwif_s *, u8);
685 u8 (*read_status)(struct hwif_s *); 675 u8 (*read_status)(struct hwif_s *);
686 u8 (*read_altstatus)(struct hwif_s *); 676 u8 (*read_altstatus)(struct hwif_s *);
687 u8 (*read_sff_dma_status)(struct hwif_s *);
688 677
689 void (*set_irq)(struct hwif_s *, int); 678 void (*set_irq)(struct hwif_s *, int);
690 679
@@ -745,14 +734,17 @@ struct ide_dma_ops {
745 int (*dma_test_irq)(struct ide_drive_s *); 734 int (*dma_test_irq)(struct ide_drive_s *);
746 void (*dma_lost_irq)(struct ide_drive_s *); 735 void (*dma_lost_irq)(struct ide_drive_s *);
747 void (*dma_timeout)(struct ide_drive_s *); 736 void (*dma_timeout)(struct ide_drive_s *);
737 /*
738 * The following method is optional and only required to be
739 * implemented for the SFF-8038i compatible controllers.
740 */
741 u8 (*dma_sff_read_status)(struct hwif_s *);
748}; 742};
749 743
750struct ide_host; 744struct ide_host;
751 745
752typedef struct hwif_s { 746typedef struct hwif_s {
753 struct hwif_s *next; /* for linked-list in ide_hwgroup_t */
754 struct hwif_s *mate; /* other hwif from same PCI chip */ 747 struct hwif_s *mate; /* other hwif from same PCI chip */
755 struct hwgroup_s *hwgroup; /* actually (ide_hwgroup_t *) */
756 struct proc_dir_entry *proc; /* /proc/ide/ directory entry */ 748 struct proc_dir_entry *proc; /* /proc/ide/ directory entry */
757 749
758 struct ide_host *host; 750 struct ide_host *host;
@@ -763,7 +755,7 @@ typedef struct hwif_s {
763 755
764 unsigned long sata_scr[SATA_NR_PORTS]; 756 unsigned long sata_scr[SATA_NR_PORTS];
765 757
766 ide_drive_t drives[MAX_DRIVES]; /* drive info */ 758 ide_drive_t *devices[MAX_DRIVES + 1];
767 759
768 u8 major; /* our major number */ 760 u8 major; /* our major number */
769 u8 index; /* 0 for ide0; 1 for ide1; ... */ 761 u8 index; /* 0 for ide0; 1 for ide1; ... */
@@ -829,7 +821,7 @@ typedef struct hwif_s {
829 unsigned extra_ports; /* number of extra dma ports */ 821 unsigned extra_ports; /* number of extra dma ports */
830 822
831 unsigned present : 1; /* this interface exists */ 823 unsigned present : 1; /* this interface exists */
832 unsigned sg_mapped : 1; /* sg_table and sg_nents are ready */ 824 unsigned busy : 1; /* serializes devices on a port */
833 825
834 struct device gendev; 826 struct device gendev;
835 struct device *portdev; 827 struct device *portdev;
@@ -841,19 +833,49 @@ typedef struct hwif_s {
841#ifdef CONFIG_BLK_DEV_IDEACPI 833#ifdef CONFIG_BLK_DEV_IDEACPI
842 struct ide_acpi_hwif_link *acpidata; 834 struct ide_acpi_hwif_link *acpidata;
843#endif 835#endif
836
837 /* IRQ handler, if active */
838 ide_startstop_t (*handler)(ide_drive_t *);
839
840 /* BOOL: polling active & poll_timeout field valid */
841 unsigned int polling : 1;
842
843 /* current drive */
844 ide_drive_t *cur_dev;
845
846 /* current request */
847 struct request *rq;
848
849 /* failsafe timer */
850 struct timer_list timer;
851 /* timeout value during long polls */
852 unsigned long poll_timeout;
853 /* queried upon timeouts */
854 int (*expiry)(ide_drive_t *);
855
856 int req_gen;
857 int req_gen_timer;
858
859 spinlock_t lock;
844} ____cacheline_internodealigned_in_smp ide_hwif_t; 860} ____cacheline_internodealigned_in_smp ide_hwif_t;
845 861
846#define MAX_HOST_PORTS 4 862#define MAX_HOST_PORTS 4
847 863
848struct ide_host { 864struct ide_host {
849 ide_hwif_t *ports[MAX_HOST_PORTS]; 865 ide_hwif_t *ports[MAX_HOST_PORTS + 1];
850 unsigned int n_ports; 866 unsigned int n_ports;
851 struct device *dev[2]; 867 struct device *dev[2];
852 unsigned int (*init_chipset)(struct pci_dev *); 868 unsigned int (*init_chipset)(struct pci_dev *);
853 unsigned long host_flags; 869 unsigned long host_flags;
854 void *host_priv; 870 void *host_priv;
871 ide_hwif_t *cur_port; /* for hosts requiring serialization */
872
873 /* used for hosts requiring serialization */
874 volatile long host_busy;
855}; 875};
856 876
877#define IDE_HOST_BUSY 0
878
857/* 879/*
858 * internal ide interrupt handler type 880 * internal ide interrupt handler type
859 */ 881 */
@@ -863,38 +885,6 @@ typedef int (ide_expiry_t)(ide_drive_t *);
863/* used by ide-cd, ide-floppy, etc. */ 885/* used by ide-cd, ide-floppy, etc. */
864typedef void (xfer_func_t)(ide_drive_t *, struct request *rq, void *, unsigned); 886typedef void (xfer_func_t)(ide_drive_t *, struct request *rq, void *, unsigned);
865 887
866typedef struct hwgroup_s {
867 /* irq handler, if active */
868 ide_startstop_t (*handler)(ide_drive_t *);
869
870 /* BOOL: protects all fields below */
871 volatile int busy;
872 /* BOOL: polling active & poll_timeout field valid */
873 unsigned int polling : 1;
874
875 /* current drive */
876 ide_drive_t *drive;
877 /* ptr to current hwif in linked-list */
878 ide_hwif_t *hwif;
879
880 /* current request */
881 struct request *rq;
882
883 /* failsafe timer */
884 struct timer_list timer;
885 /* timeout value during long polls */
886 unsigned long poll_timeout;
887 /* queried upon timeouts */
888 int (*expiry)(ide_drive_t *);
889
890 int req_gen;
891 int req_gen_timer;
892
893 spinlock_t lock;
894} ide_hwgroup_t;
895
896typedef struct ide_driver_s ide_driver_t;
897
898extern struct mutex ide_setting_mtx; 888extern struct mutex ide_setting_mtx;
899 889
900/* 890/*
@@ -1020,8 +1010,8 @@ void ide_proc_register_port(ide_hwif_t *);
1020void ide_proc_port_register_devices(ide_hwif_t *); 1010void ide_proc_port_register_devices(ide_hwif_t *);
1021void ide_proc_unregister_device(ide_drive_t *); 1011void ide_proc_unregister_device(ide_drive_t *);
1022void ide_proc_unregister_port(ide_hwif_t *); 1012void ide_proc_unregister_port(ide_hwif_t *);
1023void ide_proc_register_driver(ide_drive_t *, ide_driver_t *); 1013void ide_proc_register_driver(ide_drive_t *, struct ide_driver *);
1024void ide_proc_unregister_driver(ide_drive_t *, ide_driver_t *); 1014void ide_proc_unregister_driver(ide_drive_t *, struct ide_driver *);
1025 1015
1026read_proc_t proc_ide_read_capacity; 1016read_proc_t proc_ide_read_capacity;
1027read_proc_t proc_ide_read_geometry; 1017read_proc_t proc_ide_read_geometry;
@@ -1048,8 +1038,10 @@ static inline void ide_proc_register_port(ide_hwif_t *hwif) { ; }
1048static inline void ide_proc_port_register_devices(ide_hwif_t *hwif) { ; } 1038static inline void ide_proc_port_register_devices(ide_hwif_t *hwif) { ; }
1049static inline void ide_proc_unregister_device(ide_drive_t *drive) { ; } 1039static inline void ide_proc_unregister_device(ide_drive_t *drive) { ; }
1050static inline void ide_proc_unregister_port(ide_hwif_t *hwif) { ; } 1040static inline void ide_proc_unregister_port(ide_hwif_t *hwif) { ; }
1051static inline void ide_proc_register_driver(ide_drive_t *drive, ide_driver_t *driver) { ; } 1041static inline void ide_proc_register_driver(ide_drive_t *drive,
1052static inline void ide_proc_unregister_driver(ide_drive_t *drive, ide_driver_t *driver) { ; } 1042 struct ide_driver *driver) { ; }
1043static inline void ide_proc_unregister_driver(ide_drive_t *drive,
1044 struct ide_driver *driver) { ; }
1053#define PROC_IDE_READ_RETURN(page,start,off,count,eof,len) return 0; 1045#define PROC_IDE_READ_RETURN(page,start,off,count,eof,len) return 0;
1054#endif 1046#endif
1055 1047
@@ -1118,11 +1110,10 @@ void ide_check_pm_state(ide_drive_t *, struct request *);
1118 * The gendriver.owner field should be set to the module owner of this driver. 1110 * The gendriver.owner field should be set to the module owner of this driver.
1119 * The gendriver.name field should be set to the name of this driver 1111 * The gendriver.name field should be set to the name of this driver
1120 */ 1112 */
1121struct ide_driver_s { 1113struct ide_driver {
1122 const char *version; 1114 const char *version;
1123 ide_startstop_t (*do_request)(ide_drive_t *, struct request *, sector_t); 1115 ide_startstop_t (*do_request)(ide_drive_t *, struct request *, sector_t);
1124 int (*end_request)(ide_drive_t *, int, int); 1116 int (*end_request)(ide_drive_t *, int, int);
1125 ide_startstop_t (*error)(ide_drive_t *, struct request *rq, u8, u8);
1126 struct device_driver gen_driver; 1117 struct device_driver gen_driver;
1127 int (*probe)(ide_drive_t *); 1118 int (*probe)(ide_drive_t *);
1128 void (*remove)(ide_drive_t *); 1119 void (*remove)(ide_drive_t *);
@@ -1134,7 +1125,7 @@ struct ide_driver_s {
1134#endif 1125#endif
1135}; 1126};
1136 1127
1137#define to_ide_driver(drv) container_of(drv, ide_driver_t, gen_driver) 1128#define to_ide_driver(drv) container_of(drv, struct ide_driver, gen_driver)
1138 1129
1139int ide_device_get(ide_drive_t *); 1130int ide_device_get(ide_drive_t *);
1140void ide_device_put(ide_drive_t *); 1131void ide_device_put(ide_drive_t *);
@@ -1166,9 +1157,7 @@ void ide_execute_pkt_cmd(ide_drive_t *);
1166 1157
1167void ide_pad_transfer(ide_drive_t *, int, int); 1158void ide_pad_transfer(ide_drive_t *, int, int);
1168 1159
1169ide_startstop_t __ide_error(ide_drive_t *, struct request *, u8, u8); 1160ide_startstop_t ide_error(ide_drive_t *, const char *, u8);
1170
1171ide_startstop_t ide_error (ide_drive_t *drive, const char *msg, byte stat);
1172 1161
1173void ide_fix_driveid(u16 *); 1162void ide_fix_driveid(u16 *);
1174 1163
@@ -1192,7 +1181,6 @@ void ide_tf_dump(const char *, struct ide_taskfile *);
1192void ide_exec_command(ide_hwif_t *, u8); 1181void ide_exec_command(ide_hwif_t *, u8);
1193u8 ide_read_status(ide_hwif_t *); 1182u8 ide_read_status(ide_hwif_t *);
1194u8 ide_read_altstatus(ide_hwif_t *); 1183u8 ide_read_altstatus(ide_hwif_t *);
1195u8 ide_read_sff_dma_status(ide_hwif_t *);
1196 1184
1197void ide_set_irq(ide_hwif_t *, int); 1185void ide_set_irq(ide_hwif_t *, int);
1198 1186
@@ -1272,26 +1260,6 @@ extern void ide_stall_queue(ide_drive_t *drive, unsigned long timeout);
1272 1260
1273extern void ide_timer_expiry(unsigned long); 1261extern void ide_timer_expiry(unsigned long);
1274extern irqreturn_t ide_intr(int irq, void *dev_id); 1262extern irqreturn_t ide_intr(int irq, void *dev_id);
1275
1276static inline int ide_lock_hwgroup(ide_hwgroup_t *hwgroup)
1277{
1278 if (hwgroup->busy)
1279 return 1;
1280
1281 hwgroup->busy = 1;
1282 /* for atari only */
1283 ide_get_lock(ide_intr, hwgroup);
1284
1285 return 0;
1286}
1287
1288static inline void ide_unlock_hwgroup(ide_hwgroup_t *hwgroup)
1289{
1290 /* for atari only */
1291 ide_release_lock();
1292 hwgroup->busy = 0;
1293}
1294
1295extern void do_ide_request(struct request_queue *); 1263extern void do_ide_request(struct request_queue *);
1296 1264
1297void ide_init_disk(struct gendisk *, ide_drive_t *); 1265void ide_init_disk(struct gendisk *, ide_drive_t *);
@@ -1327,11 +1295,11 @@ static inline int ide_hwif_setup_dma(ide_hwif_t *hwif,
1327} 1295}
1328#endif 1296#endif
1329 1297
1330typedef struct ide_pci_enablebit_s { 1298struct ide_pci_enablebit {
1331 u8 reg; /* byte pci reg holding the enable-bit */ 1299 u8 reg; /* byte pci reg holding the enable-bit */
1332 u8 mask; /* mask to isolate the enable-bit */ 1300 u8 mask; /* mask to isolate the enable-bit */
1333 u8 val; /* value of masked reg when "enabled" */ 1301 u8 val; /* value of masked reg when "enabled" */
1334} ide_pci_enablebit_t; 1302};
1335 1303
1336enum { 1304enum {
1337 /* Uses ISA control ports not PCI ones. */ 1305 /* Uses ISA control ports not PCI ones. */
@@ -1420,7 +1388,8 @@ struct ide_port_info {
1420 const struct ide_port_ops *port_ops; 1388 const struct ide_port_ops *port_ops;
1421 const struct ide_dma_ops *dma_ops; 1389 const struct ide_dma_ops *dma_ops;
1422 1390
1423 ide_pci_enablebit_t enablebits[2]; 1391 struct ide_pci_enablebit enablebits[2];
1392
1424 hwif_chipset_t chipset; 1393 hwif_chipset_t chipset;
1425 1394
1426 u16 max_sectors; /* if < than the default one */ 1395 u16 max_sectors; /* if < than the default one */
@@ -1492,6 +1461,7 @@ void ide_dma_exec_cmd(ide_drive_t *, u8);
1492extern void ide_dma_start(ide_drive_t *); 1461extern void ide_dma_start(ide_drive_t *);
1493int ide_dma_end(ide_drive_t *); 1462int ide_dma_end(ide_drive_t *);
1494int ide_dma_test_irq(ide_drive_t *); 1463int ide_dma_test_irq(ide_drive_t *);
1464u8 ide_dma_sff_read_status(ide_hwif_t *);
1495extern const struct ide_dma_ops sff_dma_ops; 1465extern const struct ide_dma_ops sff_dma_ops;
1496#else 1466#else
1497static inline int config_drive_for_dma(ide_drive_t *drive) { return 0; } 1467static inline int config_drive_for_dma(ide_drive_t *drive) { return 0; }
@@ -1529,9 +1499,6 @@ static inline void ide_acpi_port_init_devices(ide_hwif_t *hwif) { ; }
1529static inline void ide_acpi_set_state(ide_hwif_t *hwif, int on) {} 1499static inline void ide_acpi_set_state(ide_hwif_t *hwif, int on) {}
1530#endif 1500#endif
1531 1501
1532void ide_remove_port_from_hwgroup(ide_hwif_t *);
1533void ide_unregister(ide_hwif_t *);
1534
1535void ide_register_region(struct gendisk *); 1502void ide_register_region(struct gendisk *);
1536void ide_unregister_region(struct gendisk *); 1503void ide_unregister_region(struct gendisk *);
1537 1504
@@ -1616,23 +1583,6 @@ static inline void ide_set_max_pio(ide_drive_t *drive)
1616 ide_set_pio(drive, 255); 1583 ide_set_pio(drive, 255);
1617} 1584}
1618 1585
1619extern spinlock_t ide_lock;
1620extern struct mutex ide_cfg_mtx;
1621/*
1622 * Structure locking:
1623 *
1624 * ide_cfg_mtx and hwgroup->lock together protect changes to
1625 * ide_hwif_t->next
1626 * ide_drive_t->next
1627 *
1628 * ide_hwgroup_t->busy: hwgroup->lock
1629 * ide_hwgroup_t->hwif: hwgroup->lock
1630 * ide_hwif_t->{hwgroup,mate}: constant, no locking
1631 * ide_drive_t->hwif: constant, no locking
1632 */
1633
1634#define local_irq_set(flags) do { local_save_flags((flags)); local_irq_enable_in_hardirq(); } while (0)
1635
1636char *ide_media_string(ide_drive_t *); 1586char *ide_media_string(ide_drive_t *);
1637 1587
1638extern struct device_attribute ide_dev_attrs[]; 1588extern struct device_attribute ide_dev_attrs[];
@@ -1651,8 +1601,15 @@ static inline int hwif_to_node(ide_hwif_t *hwif)
1651 1601
1652static inline ide_drive_t *ide_get_pair_dev(ide_drive_t *drive) 1602static inline ide_drive_t *ide_get_pair_dev(ide_drive_t *drive)
1653{ 1603{
1654 ide_drive_t *peer = &drive->hwif->drives[(drive->dn ^ 1) & 1]; 1604 ide_drive_t *peer = drive->hwif->devices[(drive->dn ^ 1) & 1];
1655 1605
1656 return (peer->dev_flags & IDE_DFLAG_PRESENT) ? peer : NULL; 1606 return (peer->dev_flags & IDE_DFLAG_PRESENT) ? peer : NULL;
1657} 1607}
1608
1609#define ide_port_for_each_dev(i, dev, port) \
1610 for ((i) = 0; ((dev) = (port)->devices[i]) || (i) < MAX_DRIVES; (i)++)
1611
1612#define ide_host_for_each_port(i, port, host) \
1613 for ((i) = 0; ((port) = (host)->ports[i]) || (i) < MAX_HOST_PORTS; (i)++)
1614
1658#endif /* _IDE_H */ 1615#endif /* _IDE_H */
diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h
index 0702c4d7bdf0..af886b26c9d1 100644
--- a/include/linux/interrupt.h
+++ b/include/linux/interrupt.h
@@ -14,7 +14,6 @@
14#include <linux/irqflags.h> 14#include <linux/irqflags.h>
15#include <linux/smp.h> 15#include <linux/smp.h>
16#include <linux/percpu.h> 16#include <linux/percpu.h>
17#include <linux/irqnr.h>
18 17
19#include <asm/atomic.h> 18#include <asm/atomic.h>
20#include <asm/ptrace.h> 19#include <asm/ptrace.h>
diff --git a/include/linux/kernel.h b/include/linux/kernel.h
index d242fe1381fd..6b8e2027165e 100644
--- a/include/linux/kernel.h
+++ b/include/linux/kernel.h
@@ -48,6 +48,12 @@ extern const char linux_proc_banner[];
48#define FIELD_SIZEOF(t, f) (sizeof(((t*)0)->f)) 48#define FIELD_SIZEOF(t, f) (sizeof(((t*)0)->f))
49#define DIV_ROUND_UP(n,d) (((n) + (d) - 1) / (d)) 49#define DIV_ROUND_UP(n,d) (((n) + (d) - 1) / (d))
50#define roundup(x, y) ((((x) + ((y) - 1)) / (y)) * (y)) 50#define roundup(x, y) ((((x) + ((y) - 1)) / (y)) * (y))
51#define DIV_ROUND_CLOSEST(x, divisor)( \
52{ \
53 typeof(divisor) __divisor = divisor; \
54 (((x) + ((__divisor) / 2)) / (__divisor)); \
55} \
56)
51 57
52#define _RET_IP_ (unsigned long)__builtin_return_address(0) 58#define _RET_IP_ (unsigned long)__builtin_return_address(0)
53#define _THIS_IP_ ({ __label__ __here; __here: (unsigned long)&&__here; }) 59#define _THIS_IP_ ({ __label__ __here; __here: (unsigned long)&&__here; })
diff --git a/include/linux/kprobes.h b/include/linux/kprobes.h
index 497b1d1f7a05..d6ea19e314bb 100644
--- a/include/linux/kprobes.h
+++ b/include/linux/kprobes.h
@@ -69,9 +69,6 @@ struct kprobe {
69 /* list of kprobes for multi-handler support */ 69 /* list of kprobes for multi-handler support */
70 struct list_head list; 70 struct list_head list;
71 71
72 /* Indicates that the corresponding module has been ref counted */
73 unsigned int mod_refcounted;
74
75 /*count the number of times this probe was temporarily disarmed */ 72 /*count the number of times this probe was temporarily disarmed */
76 unsigned long nmissed; 73 unsigned long nmissed;
77 74
@@ -103,8 +100,19 @@ struct kprobe {
103 100
104 /* copy of the original instruction */ 101 /* copy of the original instruction */
105 struct arch_specific_insn ainsn; 102 struct arch_specific_insn ainsn;
103
104 /* Indicates various status flags. Protected by kprobe_mutex. */
105 u32 flags;
106}; 106};
107 107
108/* Kprobe status flags */
109#define KPROBE_FLAG_GONE 1 /* breakpoint has already gone */
110
111static inline int kprobe_gone(struct kprobe *p)
112{
113 return p->flags & KPROBE_FLAG_GONE;
114}
115
108/* 116/*
109 * Special probe type that uses setjmp-longjmp type tricks to resume 117 * Special probe type that uses setjmp-longjmp type tricks to resume
110 * execution at a specified entry with a matching prototype corresponding 118 * execution at a specified entry with a matching prototype corresponding
@@ -201,7 +209,6 @@ static inline int init_test_probes(void)
201} 209}
202#endif /* CONFIG_KPROBES_SANITY_TEST */ 210#endif /* CONFIG_KPROBES_SANITY_TEST */
203 211
204extern struct mutex kprobe_mutex;
205extern int arch_prepare_kprobe(struct kprobe *p); 212extern int arch_prepare_kprobe(struct kprobe *p);
206extern void arch_arm_kprobe(struct kprobe *p); 213extern void arch_arm_kprobe(struct kprobe *p);
207extern void arch_disarm_kprobe(struct kprobe *p); 214extern void arch_disarm_kprobe(struct kprobe *p);
diff --git a/include/linux/memory.h b/include/linux/memory.h
index 36c82c9e6ea7..3fdc10806d31 100644
--- a/include/linux/memory.h
+++ b/include/linux/memory.h
@@ -79,14 +79,14 @@ static inline int memory_notify(unsigned long val, void *v)
79#else 79#else
80extern int register_memory_notifier(struct notifier_block *nb); 80extern int register_memory_notifier(struct notifier_block *nb);
81extern void unregister_memory_notifier(struct notifier_block *nb); 81extern void unregister_memory_notifier(struct notifier_block *nb);
82extern int register_new_memory(struct mem_section *); 82extern int register_new_memory(int, struct mem_section *);
83extern int unregister_memory_section(struct mem_section *); 83extern int unregister_memory_section(struct mem_section *);
84extern int memory_dev_init(void); 84extern int memory_dev_init(void);
85extern int remove_memory_block(unsigned long, struct mem_section *, int); 85extern int remove_memory_block(unsigned long, struct mem_section *, int);
86extern int memory_notify(unsigned long val, void *v); 86extern int memory_notify(unsigned long val, void *v);
87extern struct memory_block *find_memory_block(struct mem_section *);
87#define CONFIG_MEM_BLOCK_SIZE (PAGES_PER_SECTION<<PAGE_SHIFT) 88#define CONFIG_MEM_BLOCK_SIZE (PAGES_PER_SECTION<<PAGE_SHIFT)
88 89enum mem_add_context { BOOT, HOTPLUG };
89
90#endif /* CONFIG_MEMORY_HOTPLUG_SPARSE */ 90#endif /* CONFIG_MEMORY_HOTPLUG_SPARSE */
91 91
92#ifdef CONFIG_MEMORY_HOTPLUG 92#ifdef CONFIG_MEMORY_HOTPLUG
diff --git a/include/linux/memory_hotplug.h b/include/linux/memory_hotplug.h
index 763ba81fc0f0..d95f72e79b82 100644
--- a/include/linux/memory_hotplug.h
+++ b/include/linux/memory_hotplug.h
@@ -72,7 +72,7 @@ extern void __offline_isolated_pages(unsigned long, unsigned long);
72extern int offline_pages(unsigned long, unsigned long, unsigned long); 72extern int offline_pages(unsigned long, unsigned long, unsigned long);
73 73
74/* reasonably generic interface to expand the physical pages in a zone */ 74/* reasonably generic interface to expand the physical pages in a zone */
75extern int __add_pages(struct zone *zone, unsigned long start_pfn, 75extern int __add_pages(int nid, struct zone *zone, unsigned long start_pfn,
76 unsigned long nr_pages); 76 unsigned long nr_pages);
77extern int __remove_pages(struct zone *zone, unsigned long start_pfn, 77extern int __remove_pages(struct zone *zone, unsigned long start_pfn,
78 unsigned long nr_pages); 78 unsigned long nr_pages);
diff --git a/include/linux/migrate.h b/include/linux/migrate.h
index 3f34005068d4..527602cdea1c 100644
--- a/include/linux/migrate.h
+++ b/include/linux/migrate.h
@@ -7,6 +7,8 @@
7typedef struct page *new_page_t(struct page *, unsigned long private, int **); 7typedef struct page *new_page_t(struct page *, unsigned long private, int **);
8 8
9#ifdef CONFIG_MIGRATION 9#ifdef CONFIG_MIGRATION
10#define PAGE_MIGRATION 1
11
10extern int putback_lru_pages(struct list_head *l); 12extern int putback_lru_pages(struct list_head *l);
11extern int migrate_page(struct address_space *, 13extern int migrate_page(struct address_space *,
12 struct page *, struct page *); 14 struct page *, struct page *);
@@ -20,6 +22,8 @@ extern int migrate_vmas(struct mm_struct *mm,
20 const nodemask_t *from, const nodemask_t *to, 22 const nodemask_t *from, const nodemask_t *to,
21 unsigned long flags); 23 unsigned long flags);
22#else 24#else
25#define PAGE_MIGRATION 0
26
23static inline int putback_lru_pages(struct list_head *l) { return 0; } 27static inline int putback_lru_pages(struct list_head *l) { return 0; }
24static inline int migrate_pages(struct list_head *l, new_page_t x, 28static inline int migrate_pages(struct list_head *l, new_page_t x,
25 unsigned long private) { return -ENOSYS; } 29 unsigned long private) { return -ENOSYS; }
diff --git a/include/linux/miscdevice.h b/include/linux/miscdevice.h
index 26433ec520b3..a820f816a49e 100644
--- a/include/linux/miscdevice.h
+++ b/include/linux/miscdevice.h
@@ -3,33 +3,33 @@
3#include <linux/module.h> 3#include <linux/module.h>
4#include <linux/major.h> 4#include <linux/major.h>
5 5
6#define PSMOUSE_MINOR 1 6#define PSMOUSE_MINOR 1
7#define MS_BUSMOUSE_MINOR 2 7#define MS_BUSMOUSE_MINOR 2
8#define ATIXL_BUSMOUSE_MINOR 3 8#define ATIXL_BUSMOUSE_MINOR 3
9/*#define AMIGAMOUSE_MINOR 4 FIXME OBSOLETE */ 9/*#define AMIGAMOUSE_MINOR 4 FIXME OBSOLETE */
10#define ATARIMOUSE_MINOR 5 10#define ATARIMOUSE_MINOR 5
11#define SUN_MOUSE_MINOR 6 11#define SUN_MOUSE_MINOR 6
12#define APOLLO_MOUSE_MINOR 7 12#define APOLLO_MOUSE_MINOR 7
13#define PC110PAD_MINOR 9 13#define PC110PAD_MINOR 9
14/*#define ADB_MOUSE_MINOR 10 FIXME OBSOLETE */ 14/*#define ADB_MOUSE_MINOR 10 FIXME OBSOLETE */
15#define WATCHDOG_MINOR 130 /* Watchdog timer */ 15#define WATCHDOG_MINOR 130 /* Watchdog timer */
16#define TEMP_MINOR 131 /* Temperature Sensor */ 16#define TEMP_MINOR 131 /* Temperature Sensor */
17#define RTC_MINOR 135 17#define RTC_MINOR 135
18#define EFI_RTC_MINOR 136 /* EFI Time services */ 18#define EFI_RTC_MINOR 136 /* EFI Time services */
19#define SUN_OPENPROM_MINOR 139 19#define SUN_OPENPROM_MINOR 139
20#define DMAPI_MINOR 140 /* DMAPI */ 20#define DMAPI_MINOR 140 /* DMAPI */
21#define NVRAM_MINOR 144 21#define NVRAM_MINOR 144
22#define SGI_MMTIMER 153 22#define SGI_MMTIMER 153
23#define STORE_QUEUE_MINOR 155 23#define STORE_QUEUE_MINOR 155
24#define I2O_MINOR 166 24#define I2O_MINOR 166
25#define MICROCODE_MINOR 184 25#define MICROCODE_MINOR 184
26#define MWAVE_MINOR 219 /* ACP/Mwave Modem */ 26#define TUN_MINOR 200
27#define MPT_MINOR 220 27#define MWAVE_MINOR 219 /* ACP/Mwave Modem */
28#define MISC_DYNAMIC_MINOR 255 28#define MPT_MINOR 220
29 29#define HPET_MINOR 228
30#define TUN_MINOR 200 30#define FUSE_MINOR 229
31#define HPET_MINOR 228 31#define KVM_MINOR 232
32#define KVM_MINOR 232 32#define MISC_DYNAMIC_MINOR 255
33 33
34struct device; 34struct device;
35 35
diff --git a/include/linux/mm.h b/include/linux/mm.h
index aaa8b843be28..4a3d28c86443 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -717,6 +717,11 @@ static inline int page_mapped(struct page *page)
717 717
718#define VM_FAULT_ERROR (VM_FAULT_OOM | VM_FAULT_SIGBUS) 718#define VM_FAULT_ERROR (VM_FAULT_OOM | VM_FAULT_SIGBUS)
719 719
720/*
721 * Can be called by the pagefault handler when it gets a VM_FAULT_OOM.
722 */
723extern void pagefault_out_of_memory(void);
724
720#define offset_in_page(p) ((unsigned long)(p) & ~PAGE_MASK) 725#define offset_in_page(p) ((unsigned long)(p) & ~PAGE_MASK)
721 726
722extern void show_free_areas(void); 727extern void show_free_areas(void);
diff --git a/include/linux/module.h b/include/linux/module.h
index 3bfed013350b..4f7ea12463d3 100644
--- a/include/linux/module.h
+++ b/include/linux/module.h
@@ -294,9 +294,6 @@ struct module
294 /* The size of the executable code in each section. */ 294 /* The size of the executable code in each section. */
295 unsigned int init_text_size, core_text_size; 295 unsigned int init_text_size, core_text_size;
296 296
297 /* The handle returned from unwind_add_table. */
298 void *unwind_info;
299
300 /* Arch-specific module values */ 297 /* Arch-specific module values */
301 struct mod_arch_specific arch; 298 struct mod_arch_specific arch;
302 299
@@ -368,6 +365,18 @@ struct module *module_text_address(unsigned long addr);
368struct module *__module_text_address(unsigned long addr); 365struct module *__module_text_address(unsigned long addr);
369int is_module_address(unsigned long addr); 366int is_module_address(unsigned long addr);
370 367
368static inline int within_module_core(unsigned long addr, struct module *mod)
369{
370 return (unsigned long)mod->module_core <= addr &&
371 addr < (unsigned long)mod->module_core + mod->core_size;
372}
373
374static inline int within_module_init(unsigned long addr, struct module *mod)
375{
376 return (unsigned long)mod->module_init <= addr &&
377 addr < (unsigned long)mod->module_init + mod->init_size;
378}
379
371/* Returns 0 and fills in value, defined and namebuf, or -ERANGE if 380/* Returns 0 and fills in value, defined and namebuf, or -ERANGE if
372 symnum out of range. */ 381 symnum out of range. */
373int module_get_kallsym(unsigned int symnum, unsigned long *value, char *type, 382int module_get_kallsym(unsigned int symnum, unsigned long *value, char *type,
diff --git a/include/linux/node.h b/include/linux/node.h
index bc001bc225c3..681a697b9a86 100644
--- a/include/linux/node.h
+++ b/include/linux/node.h
@@ -26,6 +26,7 @@ struct node {
26 struct sys_device sysdev; 26 struct sys_device sysdev;
27}; 27};
28 28
29struct memory_block;
29extern struct node node_devices[]; 30extern struct node node_devices[];
30 31
31extern int register_node(struct node *, int, struct node *); 32extern int register_node(struct node *, int, struct node *);
@@ -35,6 +36,9 @@ extern int register_one_node(int nid);
35extern void unregister_one_node(int nid); 36extern void unregister_one_node(int nid);
36extern int register_cpu_under_node(unsigned int cpu, unsigned int nid); 37extern int register_cpu_under_node(unsigned int cpu, unsigned int nid);
37extern int unregister_cpu_under_node(unsigned int cpu, unsigned int nid); 38extern int unregister_cpu_under_node(unsigned int cpu, unsigned int nid);
39extern int register_mem_sect_under_node(struct memory_block *mem_blk,
40 int nid);
41extern int unregister_mem_sect_under_nodes(struct memory_block *mem_blk);
38#else 42#else
39static inline int register_one_node(int nid) 43static inline int register_one_node(int nid)
40{ 44{
@@ -52,6 +56,15 @@ static inline int unregister_cpu_under_node(unsigned int cpu, unsigned int nid)
52{ 56{
53 return 0; 57 return 0;
54} 58}
59static inline int register_mem_sect_under_node(struct memory_block *mem_blk,
60 int nid)
61{
62 return 0;
63}
64static inline int unregister_mem_sect_under_nodes(struct memory_block *mem_blk)
65{
66 return 0;
67}
55#endif 68#endif
56 69
57#define to_node(sys_device) container_of(sys_device, struct node, sysdev) 70#define to_node(sys_device) container_of(sys_device, struct node, sysdev)
diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h
index b12f93a3c345..219a523ecdb0 100644
--- a/include/linux/page-flags.h
+++ b/include/linux/page-flags.h
@@ -228,6 +228,7 @@ PAGEFLAG_FALSE(HighMem)
228PAGEFLAG(SwapCache, swapcache) 228PAGEFLAG(SwapCache, swapcache)
229#else 229#else
230PAGEFLAG_FALSE(SwapCache) 230PAGEFLAG_FALSE(SwapCache)
231 SETPAGEFLAG_NOOP(SwapCache) CLEARPAGEFLAG_NOOP(SwapCache)
231#endif 232#endif
232 233
233#ifdef CONFIG_UNEVICTABLE_LRU 234#ifdef CONFIG_UNEVICTABLE_LRU
@@ -372,31 +373,22 @@ static inline void __ClearPageTail(struct page *page)
372#define __PG_MLOCKED 0 373#define __PG_MLOCKED 0
373#endif 374#endif
374 375
375#define PAGE_FLAGS (1 << PG_lru | 1 << PG_private | 1 << PG_locked | \
376 1 << PG_buddy | 1 << PG_writeback | \
377 1 << PG_slab | 1 << PG_swapcache | 1 << PG_active | \
378 __PG_UNEVICTABLE | __PG_MLOCKED)
379
380/*
381 * Flags checked in bad_page(). Pages on the free list should not have
382 * these flags set. It they are, there is a problem.
383 */
384#define PAGE_FLAGS_CLEAR_WHEN_BAD (PAGE_FLAGS | \
385 1 << PG_reclaim | 1 << PG_dirty | 1 << PG_swapbacked)
386
387/* 376/*
388 * Flags checked when a page is freed. Pages being freed should not have 377 * Flags checked when a page is freed. Pages being freed should not have
389 * these flags set. It they are, there is a problem. 378 * these flags set. It they are, there is a problem.
390 */ 379 */
391#define PAGE_FLAGS_CHECK_AT_FREE (PAGE_FLAGS | 1 << PG_reserved) 380#define PAGE_FLAGS_CHECK_AT_FREE \
381 (1 << PG_lru | 1 << PG_private | 1 << PG_locked | \
382 1 << PG_buddy | 1 << PG_writeback | 1 << PG_reserved | \
383 1 << PG_slab | 1 << PG_swapcache | 1 << PG_active | \
384 __PG_UNEVICTABLE | __PG_MLOCKED)
392 385
393/* 386/*
394 * Flags checked when a page is prepped for return by the page allocator. 387 * Flags checked when a page is prepped for return by the page allocator.
395 * Pages being prepped should not have these flags set. It they are, there 388 * Pages being prepped should not have any flags set. It they are set,
396 * is a problem. 389 * there has been a kernel bug or struct page corruption.
397 */ 390 */
398#define PAGE_FLAGS_CHECK_AT_PREP (PAGE_FLAGS | \ 391#define PAGE_FLAGS_CHECK_AT_PREP ((1 << NR_PAGEFLAGS) - 1)
399 1 << PG_reserved | 1 << PG_dirty | 1 << PG_swapbacked)
400 392
401#endif /* !__GENERATING_BOUNDS_H */ 393#endif /* !__GENERATING_BOUNDS_H */
402#endif /* PAGE_FLAGS_H */ 394#endif /* PAGE_FLAGS_H */
diff --git a/include/linux/pagevec.h b/include/linux/pagevec.h
index e90a2cb02915..7b2886fa7fdc 100644
--- a/include/linux/pagevec.h
+++ b/include/linux/pagevec.h
@@ -21,7 +21,6 @@ struct pagevec {
21}; 21};
22 22
23void __pagevec_release(struct pagevec *pvec); 23void __pagevec_release(struct pagevec *pvec);
24void __pagevec_release_nonlru(struct pagevec *pvec);
25void __pagevec_free(struct pagevec *pvec); 24void __pagevec_free(struct pagevec *pvec);
26void ____pagevec_lru_add(struct pagevec *pvec, enum lru_list lru); 25void ____pagevec_lru_add(struct pagevec *pvec, enum lru_list lru);
27void pagevec_strip(struct pagevec *pvec); 26void pagevec_strip(struct pagevec *pvec);
@@ -69,12 +68,6 @@ static inline void pagevec_release(struct pagevec *pvec)
69 __pagevec_release(pvec); 68 __pagevec_release(pvec);
70} 69}
71 70
72static inline void pagevec_release_nonlru(struct pagevec *pvec)
73{
74 if (pagevec_count(pvec))
75 __pagevec_release_nonlru(pvec);
76}
77
78static inline void pagevec_free(struct pagevec *pvec) 71static inline void pagevec_free(struct pagevec *pvec)
79{ 72{
80 if (pagevec_count(pvec)) 73 if (pagevec_count(pvec))
diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h
index 218c73b1e6d4..d543365518ab 100644
--- a/include/linux/pci_ids.h
+++ b/include/linux/pci_ids.h
@@ -1658,6 +1658,7 @@
1658#define PCI_VENDOR_ID_ROCKWELL 0x127A 1658#define PCI_VENDOR_ID_ROCKWELL 0x127A
1659 1659
1660#define PCI_VENDOR_ID_ITE 0x1283 1660#define PCI_VENDOR_ID_ITE 0x1283
1661#define PCI_DEVICE_ID_ITE_8172 0x8172
1661#define PCI_DEVICE_ID_ITE_8211 0x8211 1662#define PCI_DEVICE_ID_ITE_8211 0x8211
1662#define PCI_DEVICE_ID_ITE_8212 0x8212 1663#define PCI_DEVICE_ID_ITE_8212 0x8212
1663#define PCI_DEVICE_ID_ITE_8213 0x8213 1664#define PCI_DEVICE_ID_ITE_8213 0x8213
diff --git a/include/linux/percpu_counter.h b/include/linux/percpu_counter.h
index 9007ccdfc112..99de7a31bab8 100644
--- a/include/linux/percpu_counter.h
+++ b/include/linux/percpu_counter.h
@@ -24,11 +24,7 @@ struct percpu_counter {
24 s32 *counters; 24 s32 *counters;
25}; 25};
26 26
27#if NR_CPUS >= 16 27extern int percpu_counter_batch;
28#define FBC_BATCH (NR_CPUS*2)
29#else
30#define FBC_BATCH (NR_CPUS*4)
31#endif
32 28
33int percpu_counter_init(struct percpu_counter *fbc, s64 amount); 29int percpu_counter_init(struct percpu_counter *fbc, s64 amount);
34int percpu_counter_init_irq(struct percpu_counter *fbc, s64 amount); 30int percpu_counter_init_irq(struct percpu_counter *fbc, s64 amount);
@@ -39,7 +35,7 @@ s64 __percpu_counter_sum(struct percpu_counter *fbc);
39 35
40static inline void percpu_counter_add(struct percpu_counter *fbc, s64 amount) 36static inline void percpu_counter_add(struct percpu_counter *fbc, s64 amount)
41{ 37{
42 __percpu_counter_add(fbc, amount, FBC_BATCH); 38 __percpu_counter_add(fbc, amount, percpu_counter_batch);
43} 39}
44 40
45static inline s64 percpu_counter_sum_positive(struct percpu_counter *fbc) 41static inline s64 percpu_counter_sum_positive(struct percpu_counter *fbc)
diff --git a/include/linux/poll.h b/include/linux/poll.h
index badd98ab06f6..8c24ef8d9976 100644
--- a/include/linux/poll.h
+++ b/include/linux/poll.h
@@ -46,9 +46,9 @@ static inline void init_poll_funcptr(poll_table *pt, poll_queue_proc qproc)
46} 46}
47 47
48struct poll_table_entry { 48struct poll_table_entry {
49 struct file * filp; 49 struct file *filp;
50 wait_queue_t wait; 50 wait_queue_t wait;
51 wait_queue_head_t * wait_address; 51 wait_queue_head_t *wait_address;
52}; 52};
53 53
54/* 54/*
@@ -56,7 +56,9 @@ struct poll_table_entry {
56 */ 56 */
57struct poll_wqueues { 57struct poll_wqueues {
58 poll_table pt; 58 poll_table pt;
59 struct poll_table_page * table; 59 struct poll_table_page *table;
60 struct task_struct *polling_task;
61 int triggered;
60 int error; 62 int error;
61 int inline_index; 63 int inline_index;
62 struct poll_table_entry inline_entries[N_INLINE_POLL_ENTRIES]; 64 struct poll_table_entry inline_entries[N_INLINE_POLL_ENTRIES];
@@ -64,6 +66,13 @@ struct poll_wqueues {
64 66
65extern void poll_initwait(struct poll_wqueues *pwq); 67extern void poll_initwait(struct poll_wqueues *pwq);
66extern void poll_freewait(struct poll_wqueues *pwq); 68extern void poll_freewait(struct poll_wqueues *pwq);
69extern int poll_schedule_timeout(struct poll_wqueues *pwq, int state,
70 ktime_t *expires, unsigned long slack);
71
72static inline int poll_schedule(struct poll_wqueues *pwq, int state)
73{
74 return poll_schedule_timeout(pwq, state, NULL, 0);
75}
67 76
68/* 77/*
69 * Scaleable version of the fd_set. 78 * Scaleable version of the fd_set.
diff --git a/include/linux/rio_drv.h b/include/linux/rio_drv.h
index 32c0547ffafc..c93a58a40033 100644
--- a/include/linux/rio_drv.h
+++ b/include/linux/rio_drv.h
@@ -391,7 +391,6 @@ static inline int rio_add_inb_buffer(struct rio_mport *mport, int mbox,
391 * rio_get_inb_message - Get A RIO message from an inbound mailbox queue 391 * rio_get_inb_message - Get A RIO message from an inbound mailbox queue
392 * @mport: Master port containing the inbound mailbox 392 * @mport: Master port containing the inbound mailbox
393 * @mbox: The inbound mailbox number 393 * @mbox: The inbound mailbox number
394 * @buffer: Pointer to the message buffer
395 * 394 *
396 * Get a RIO message from an inbound mailbox queue. Returns 0 on success. 395 * Get a RIO message from an inbound mailbox queue. Returns 0 on success.
397 */ 396 */
diff --git a/include/linux/rmap.h b/include/linux/rmap.h
index 89f0564b10c8..b35bc0e19cd9 100644
--- a/include/linux/rmap.h
+++ b/include/linux/rmap.h
@@ -63,16 +63,13 @@ void anon_vma_unlink(struct vm_area_struct *);
63void anon_vma_link(struct vm_area_struct *); 63void anon_vma_link(struct vm_area_struct *);
64void __anon_vma_link(struct vm_area_struct *); 64void __anon_vma_link(struct vm_area_struct *);
65 65
66extern struct anon_vma *page_lock_anon_vma(struct page *page);
67extern void page_unlock_anon_vma(struct anon_vma *anon_vma);
68
69/* 66/*
70 * rmap interfaces called when adding or removing pte of page 67 * rmap interfaces called when adding or removing pte of page
71 */ 68 */
72void page_add_anon_rmap(struct page *, struct vm_area_struct *, unsigned long); 69void page_add_anon_rmap(struct page *, struct vm_area_struct *, unsigned long);
73void page_add_new_anon_rmap(struct page *, struct vm_area_struct *, unsigned long); 70void page_add_new_anon_rmap(struct page *, struct vm_area_struct *, unsigned long);
74void page_add_file_rmap(struct page *); 71void page_add_file_rmap(struct page *);
75void page_remove_rmap(struct page *, struct vm_area_struct *); 72void page_remove_rmap(struct page *);
76 73
77#ifdef CONFIG_DEBUG_VM 74#ifdef CONFIG_DEBUG_VM
78void page_dup_rmap(struct page *page, struct vm_area_struct *vma, unsigned long address); 75void page_dup_rmap(struct page *page, struct vm_area_struct *vma, unsigned long address);
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 38a3f4b15394..ea415136ac9e 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -386,6 +386,9 @@ extern void arch_unmap_area_topdown(struct mm_struct *, unsigned long);
386 (mm)->hiwater_vm = (mm)->total_vm; \ 386 (mm)->hiwater_vm = (mm)->total_vm; \
387} while (0) 387} while (0)
388 388
389#define get_mm_hiwater_rss(mm) max((mm)->hiwater_rss, get_mm_rss(mm))
390#define get_mm_hiwater_vm(mm) max((mm)->hiwater_vm, (mm)->total_vm)
391
389extern void set_dumpable(struct mm_struct *mm, int value); 392extern void set_dumpable(struct mm_struct *mm, int value);
390extern int get_dumpable(struct mm_struct *mm); 393extern int get_dumpable(struct mm_struct *mm);
391 394
diff --git a/include/linux/spi/spi_gpio.h b/include/linux/spi/spi_gpio.h
new file mode 100644
index 000000000000..0f01a0f1f40c
--- /dev/null
+++ b/include/linux/spi/spi_gpio.h
@@ -0,0 +1,60 @@
1#ifndef __LINUX_SPI_GPIO_H
2#define __LINUX_SPI_GPIO_H
3
4/*
5 * For each bitbanged SPI bus, set up a platform_device node with:
6 * - name "spi_gpio"
7 * - id the same as the SPI bus number it implements
8 * - dev.platform data pointing to a struct spi_gpio_platform_data
9 *
10 * Or, see the driver code for information about speedups that are
11 * possible on platforms that support inlined access for GPIOs (no
12 * spi_gpio_platform_data is used).
13 *
14 * Use spi_board_info with these busses in the usual way, being sure
15 * that the controller_data being the GPIO used for each device's
16 * chipselect:
17 *
18 * static struct spi_board_info ... [] = {
19 * ...
20 * // this slave uses GPIO 42 for its chipselect
21 * .controller_data = (void *) 42,
22 * ...
23 * // this one uses GPIO 86 for its chipselect
24 * .controller_data = (void *) 86,
25 * ...
26 * };
27 *
28 * If the bitbanged bus is later switched to a "native" controller,
29 * that platform_device and controller_data should be removed.
30 */
31
32/**
33 * struct spi_gpio_platform_data - parameter for bitbanged SPI master
34 * @sck: number of the GPIO used for clock output
35 * @mosi: number of the GPIO used for Master Output, Slave In (MOSI) data
36 * @miso: number of the GPIO used for Master Input, Slave Output (MISO) data
37 * @num_chipselect: how many slaves to allow
38 *
39 * All GPIO signals used with the SPI bus managed through this driver
40 * (chipselects, MOSI, MISO, SCK) must be configured as GPIOs, instead
41 * of some alternate function.
42 *
43 * It can be convenient to use this driver with pins that have alternate
44 * functions associated with a "native" SPI controller if a driver for that
45 * controller is not available, or is missing important functionality.
46 *
47 * On platforms which can do so, configure MISO with a weak pullup unless
48 * there's an external pullup on that signal. That saves power by avoiding
49 * floating signals. (A weak pulldown would save power too, but many
50 * drivers expect to see all-ones data as the no slave "response".)
51 */
52struct spi_gpio_platform_data {
53 unsigned sck;
54 unsigned mosi;
55 unsigned miso;
56
57 u16 num_chipselect;
58};
59
60#endif /* __LINUX_SPI_GPIO_H */
diff --git a/include/linux/swap.h b/include/linux/swap.h
index a3af95b2cb6d..91dee50fe260 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -120,7 +120,9 @@ struct swap_extent {
120enum { 120enum {
121 SWP_USED = (1 << 0), /* is slot in swap_info[] used? */ 121 SWP_USED = (1 << 0), /* is slot in swap_info[] used? */
122 SWP_WRITEOK = (1 << 1), /* ok to write to this swap? */ 122 SWP_WRITEOK = (1 << 1), /* ok to write to this swap? */
123 SWP_ACTIVE = (SWP_USED | SWP_WRITEOK), 123 SWP_DISCARDABLE = (1 << 2), /* blkdev supports discard */
124 SWP_DISCARDING = (1 << 3), /* now discarding a free cluster */
125 SWP_SOLIDSTATE = (1 << 4), /* blkdev seeks are cheap */
124 /* add others here before... */ 126 /* add others here before... */
125 SWP_SCANNING = (1 << 8), /* refcount in scan_swap_map */ 127 SWP_SCANNING = (1 << 8), /* refcount in scan_swap_map */
126}; 128};
@@ -134,22 +136,24 @@ enum {
134 * The in-memory structure used to track swap areas. 136 * The in-memory structure used to track swap areas.
135 */ 137 */
136struct swap_info_struct { 138struct swap_info_struct {
137 unsigned int flags; 139 unsigned long flags;
138 int prio; /* swap priority */ 140 int prio; /* swap priority */
141 int next; /* next entry on swap list */
139 struct file *swap_file; 142 struct file *swap_file;
140 struct block_device *bdev; 143 struct block_device *bdev;
141 struct list_head extent_list; 144 struct list_head extent_list;
142 struct swap_extent *curr_swap_extent; 145 struct swap_extent *curr_swap_extent;
143 unsigned old_block_size; 146 unsigned short *swap_map;
144 unsigned short * swap_map;
145 unsigned int lowest_bit; 147 unsigned int lowest_bit;
146 unsigned int highest_bit; 148 unsigned int highest_bit;
149 unsigned int lowest_alloc; /* while preparing discard cluster */
150 unsigned int highest_alloc; /* while preparing discard cluster */
147 unsigned int cluster_next; 151 unsigned int cluster_next;
148 unsigned int cluster_nr; 152 unsigned int cluster_nr;
149 unsigned int pages; 153 unsigned int pages;
150 unsigned int max; 154 unsigned int max;
151 unsigned int inuse_pages; 155 unsigned int inuse_pages;
152 int next; /* next entry on swap list */ 156 unsigned int old_block_size;
153}; 157};
154 158
155struct swap_list_t { 159struct swap_list_t {
@@ -163,7 +167,6 @@ struct swap_list_t {
163/* linux/mm/page_alloc.c */ 167/* linux/mm/page_alloc.c */
164extern unsigned long totalram_pages; 168extern unsigned long totalram_pages;
165extern unsigned long totalreserve_pages; 169extern unsigned long totalreserve_pages;
166extern long nr_swap_pages;
167extern unsigned int nr_free_buffer_pages(void); 170extern unsigned int nr_free_buffer_pages(void);
168extern unsigned int nr_free_pagecache_pages(void); 171extern unsigned int nr_free_pagecache_pages(void);
169 172
@@ -174,8 +177,6 @@ extern unsigned int nr_free_pagecache_pages(void);
174/* linux/mm/swap.c */ 177/* linux/mm/swap.c */
175extern void __lru_cache_add(struct page *, enum lru_list lru); 178extern void __lru_cache_add(struct page *, enum lru_list lru);
176extern void lru_cache_add_lru(struct page *, enum lru_list lru); 179extern void lru_cache_add_lru(struct page *, enum lru_list lru);
177extern void lru_cache_add_active_or_unevictable(struct page *,
178 struct vm_area_struct *);
179extern void activate_page(struct page *); 180extern void activate_page(struct page *);
180extern void mark_page_accessed(struct page *); 181extern void mark_page_accessed(struct page *);
181extern void lru_add_drain(void); 182extern void lru_add_drain(void);
@@ -280,7 +281,7 @@ extern void end_swap_bio_read(struct bio *bio, int err);
280extern struct address_space swapper_space; 281extern struct address_space swapper_space;
281#define total_swapcache_pages swapper_space.nrpages 282#define total_swapcache_pages swapper_space.nrpages
282extern void show_swap_cache_info(void); 283extern void show_swap_cache_info(void);
283extern int add_to_swap(struct page *, gfp_t); 284extern int add_to_swap(struct page *);
284extern int add_to_swap_cache(struct page *, swp_entry_t, gfp_t); 285extern int add_to_swap_cache(struct page *, swp_entry_t, gfp_t);
285extern void __delete_from_swap_cache(struct page *); 286extern void __delete_from_swap_cache(struct page *);
286extern void delete_from_swap_cache(struct page *); 287extern void delete_from_swap_cache(struct page *);
@@ -293,6 +294,7 @@ extern struct page *swapin_readahead(swp_entry_t, gfp_t,
293 struct vm_area_struct *vma, unsigned long addr); 294 struct vm_area_struct *vma, unsigned long addr);
294 295
295/* linux/mm/swapfile.c */ 296/* linux/mm/swapfile.c */
297extern long nr_swap_pages;
296extern long total_swap_pages; 298extern long total_swap_pages;
297extern void si_swapinfo(struct sysinfo *); 299extern void si_swapinfo(struct sysinfo *);
298extern swp_entry_t get_swap_page(void); 300extern swp_entry_t get_swap_page(void);
@@ -300,15 +302,14 @@ extern swp_entry_t get_swap_page_of_type(int);
300extern int swap_duplicate(swp_entry_t); 302extern int swap_duplicate(swp_entry_t);
301extern int valid_swaphandles(swp_entry_t, unsigned long *); 303extern int valid_swaphandles(swp_entry_t, unsigned long *);
302extern void swap_free(swp_entry_t); 304extern void swap_free(swp_entry_t);
303extern void free_swap_and_cache(swp_entry_t); 305extern int free_swap_and_cache(swp_entry_t);
304extern int swap_type_of(dev_t, sector_t, struct block_device **); 306extern int swap_type_of(dev_t, sector_t, struct block_device **);
305extern unsigned int count_swap_pages(int, int); 307extern unsigned int count_swap_pages(int, int);
306extern sector_t map_swap_page(struct swap_info_struct *, pgoff_t); 308extern sector_t map_swap_page(struct swap_info_struct *, pgoff_t);
307extern sector_t swapdev_block(int, pgoff_t); 309extern sector_t swapdev_block(int, pgoff_t);
308extern struct swap_info_struct *get_swap_info_struct(unsigned); 310extern struct swap_info_struct *get_swap_info_struct(unsigned);
309extern int can_share_swap_page(struct page *); 311extern int reuse_swap_page(struct page *);
310extern int remove_exclusive_swap_page(struct page *); 312extern int try_to_free_swap(struct page *);
311extern int remove_exclusive_swap_page_ref(struct page *);
312struct backing_dev_info; 313struct backing_dev_info;
313 314
314/* linux/mm/thrash.c */ 315/* linux/mm/thrash.c */
@@ -334,7 +335,8 @@ static inline void disable_swap_token(void)
334 335
335#else /* CONFIG_SWAP */ 336#else /* CONFIG_SWAP */
336 337
337#define total_swap_pages 0 338#define nr_swap_pages 0L
339#define total_swap_pages 0L
338#define total_swapcache_pages 0UL 340#define total_swapcache_pages 0UL
339 341
340#define si_swapinfo(val) \ 342#define si_swapinfo(val) \
@@ -350,14 +352,8 @@ static inline void show_swap_cache_info(void)
350{ 352{
351} 353}
352 354
353static inline void free_swap_and_cache(swp_entry_t swp) 355#define free_swap_and_cache(swp) is_migration_entry(swp)
354{ 356#define swap_duplicate(swp) is_migration_entry(swp)
355}
356
357static inline int swap_duplicate(swp_entry_t swp)
358{
359 return 0;
360}
361 357
362static inline void swap_free(swp_entry_t swp) 358static inline void swap_free(swp_entry_t swp)
363{ 359{
@@ -374,7 +370,10 @@ static inline struct page *lookup_swap_cache(swp_entry_t swp)
374 return NULL; 370 return NULL;
375} 371}
376 372
377#define can_share_swap_page(p) (page_mapcount(p) == 1) 373static inline int add_to_swap(struct page *page)
374{
375 return 0;
376}
378 377
379static inline int add_to_swap_cache(struct page *page, swp_entry_t entry, 378static inline int add_to_swap_cache(struct page *page, swp_entry_t entry,
380 gfp_t gfp_mask) 379 gfp_t gfp_mask)
@@ -390,14 +389,9 @@ static inline void delete_from_swap_cache(struct page *page)
390{ 389{
391} 390}
392 391
393#define swap_token_default_timeout 0 392#define reuse_swap_page(page) (page_mapcount(page) == 1)
394
395static inline int remove_exclusive_swap_page(struct page *p)
396{
397 return 0;
398}
399 393
400static inline int remove_exclusive_swap_page_ref(struct page *page) 394static inline int try_to_free_swap(struct page *page)
401{ 395{
402 return 0; 396 return 0;
403} 397}
diff --git a/include/linux/types.h b/include/linux/types.h
index 121f349cb7ec..3b864f2d9560 100644
--- a/include/linux/types.h
+++ b/include/linux/types.h
@@ -195,6 +195,16 @@ typedef u32 phys_addr_t;
195 195
196typedef phys_addr_t resource_size_t; 196typedef phys_addr_t resource_size_t;
197 197
198typedef struct {
199 volatile int counter;
200} atomic_t;
201
202#ifdef CONFIG_64BIT
203typedef struct {
204 volatile long counter;
205} atomic64_t;
206#endif
207
198struct ustat { 208struct ustat {
199 __kernel_daddr_t f_tfree; 209 __kernel_daddr_t f_tfree;
200 __kernel_ino_t f_tinode; 210 __kernel_ino_t f_tinode;
diff --git a/include/linux/unwind.h b/include/linux/unwind.h
deleted file mode 100644
index 7760860fa170..000000000000
--- a/include/linux/unwind.h
+++ /dev/null
@@ -1,68 +0,0 @@
1#ifndef _LINUX_UNWIND_H
2#define _LINUX_UNWIND_H
3
4/*
5 * Copyright (C) 2002-2006 Novell, Inc.
6 * Jan Beulich <jbeulich@novell.com>
7 * This code is released under version 2 of the GNU GPL.
8 *
9 * A simple API for unwinding kernel stacks. This is used for
10 * debugging and error reporting purposes. The kernel doesn't need
11 * full-blown stack unwinding with all the bells and whistles, so there
12 * is not much point in implementing the full Dwarf2 unwind API.
13 */
14
15struct module;
16
17struct unwind_frame_info {};
18
19static inline void unwind_init(void) {}
20static inline void unwind_setup(void) {}
21
22#ifdef CONFIG_MODULES
23
24static inline void *unwind_add_table(struct module *mod,
25 const void *table_start,
26 unsigned long table_size)
27{
28 return NULL;
29}
30
31static inline void unwind_remove_table(void *handle, int init_only)
32{
33}
34
35#endif
36
37static inline int unwind_init_frame_info(struct unwind_frame_info *info,
38 struct task_struct *tsk,
39 const struct pt_regs *regs)
40{
41 return -ENOSYS;
42}
43
44static inline int unwind_init_blocked(struct unwind_frame_info *info,
45 struct task_struct *tsk)
46{
47 return -ENOSYS;
48}
49
50static inline int unwind_init_running(struct unwind_frame_info *info,
51 asmlinkage int (*cb)(struct unwind_frame_info *,
52 void *arg),
53 void *arg)
54{
55 return -ENOSYS;
56}
57
58static inline int unwind(struct unwind_frame_info *info)
59{
60 return -ENOSYS;
61}
62
63static inline int unwind_to_user(struct unwind_frame_info *info)
64{
65 return -ENOSYS;
66}
67
68#endif /* _LINUX_UNWIND_H */
diff --git a/include/linux/vmalloc.h b/include/linux/vmalloc.h
index 307b88577eaa..506e7620a986 100644
--- a/include/linux/vmalloc.h
+++ b/include/linux/vmalloc.h
@@ -97,6 +97,10 @@ extern void unmap_kernel_range(unsigned long addr, unsigned long size);
97extern struct vm_struct *alloc_vm_area(size_t size); 97extern struct vm_struct *alloc_vm_area(size_t size);
98extern void free_vm_area(struct vm_struct *area); 98extern void free_vm_area(struct vm_struct *area);
99 99
100/* for /dev/kmem */
101extern long vread(char *buf, char *addr, unsigned long count);
102extern long vwrite(char *buf, char *addr, unsigned long count);
103
100/* 104/*
101 * Internals. Dont't use.. 105 * Internals. Dont't use..
102 */ 106 */
diff --git a/include/linux/writeback.h b/include/linux/writeback.h
index e585657e9831..7300ecdc480c 100644
--- a/include/linux/writeback.h
+++ b/include/linux/writeback.h
@@ -30,7 +30,6 @@ static inline int task_is_pdflush(struct task_struct *task)
30enum writeback_sync_modes { 30enum writeback_sync_modes {
31 WB_SYNC_NONE, /* Don't wait on anything */ 31 WB_SYNC_NONE, /* Don't wait on anything */
32 WB_SYNC_ALL, /* Wait on every mapping */ 32 WB_SYNC_ALL, /* Wait on every mapping */
33 WB_SYNC_HOLD, /* Hold the inode on sb_dirty for sys_sync() */
34}; 33};
35 34
36/* 35/*
@@ -107,7 +106,9 @@ void throttle_vm_writeout(gfp_t gfp_mask);
107 106
108/* These are exported to sysctl. */ 107/* These are exported to sysctl. */
109extern int dirty_background_ratio; 108extern int dirty_background_ratio;
109extern unsigned long dirty_background_bytes;
110extern int vm_dirty_ratio; 110extern int vm_dirty_ratio;
111extern unsigned long vm_dirty_bytes;
111extern int dirty_writeback_interval; 112extern int dirty_writeback_interval;
112extern int dirty_expire_interval; 113extern int dirty_expire_interval;
113extern int vm_highmem_is_dirtyable; 114extern int vm_highmem_is_dirtyable;
@@ -116,17 +117,26 @@ extern int laptop_mode;
116 117
117extern unsigned long determine_dirtyable_memory(void); 118extern unsigned long determine_dirtyable_memory(void);
118 119
120extern int dirty_background_ratio_handler(struct ctl_table *table, int write,
121 struct file *filp, void __user *buffer, size_t *lenp,
122 loff_t *ppos);
123extern int dirty_background_bytes_handler(struct ctl_table *table, int write,
124 struct file *filp, void __user *buffer, size_t *lenp,
125 loff_t *ppos);
119extern int dirty_ratio_handler(struct ctl_table *table, int write, 126extern int dirty_ratio_handler(struct ctl_table *table, int write,
120 struct file *filp, void __user *buffer, size_t *lenp, 127 struct file *filp, void __user *buffer, size_t *lenp,
121 loff_t *ppos); 128 loff_t *ppos);
129extern int dirty_bytes_handler(struct ctl_table *table, int write,
130 struct file *filp, void __user *buffer, size_t *lenp,
131 loff_t *ppos);
122 132
123struct ctl_table; 133struct ctl_table;
124struct file; 134struct file;
125int dirty_writeback_centisecs_handler(struct ctl_table *, int, struct file *, 135int dirty_writeback_centisecs_handler(struct ctl_table *, int, struct file *,
126 void __user *, size_t *, loff_t *); 136 void __user *, size_t *, loff_t *);
127 137
128void get_dirty_limits(long *pbackground, long *pdirty, long *pbdi_dirty, 138void get_dirty_limits(unsigned long *pbackground, unsigned long *pdirty,
129 struct backing_dev_info *bdi); 139 unsigned long *pbdi_dirty, struct backing_dev_info *bdi);
130 140
131void page_writeback_init(void); 141void page_writeback_init(void);
132void balance_dirty_pages_ratelimited_nr(struct address_space *mapping, 142void balance_dirty_pages_ratelimited_nr(struct address_space *mapping,
diff --git a/init/Kconfig b/init/Kconfig
index d9d3dbabdb18..e7893b1d3e42 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -848,10 +848,6 @@ config RT_MUTEXES
848 boolean 848 boolean
849 select PLIST 849 select PLIST
850 850
851config TINY_SHMEM
852 default !SHMEM
853 bool
854
855config BASE_SMALL 851config BASE_SMALL
856 int 852 int
857 default 0 if BASE_FULL 853 default 0 if BASE_FULL
diff --git a/init/do_mounts.c b/init/do_mounts.c
index d055b1914c3d..5efca73b39f9 100644
--- a/init/do_mounts.c
+++ b/init/do_mounts.c
@@ -220,10 +220,10 @@ static int __init do_mount_root(char *name, char *fs, int flags, void *data)
220 220
221 sys_chdir("/root"); 221 sys_chdir("/root");
222 ROOT_DEV = current->fs->pwd.mnt->mnt_sb->s_dev; 222 ROOT_DEV = current->fs->pwd.mnt->mnt_sb->s_dev;
223 printk("VFS: Mounted root (%s filesystem)%s.\n", 223 printk("VFS: Mounted root (%s filesystem)%s on device %u:%u.\n",
224 current->fs->pwd.mnt->mnt_sb->s_type->name, 224 current->fs->pwd.mnt->mnt_sb->s_type->name,
225 current->fs->pwd.mnt->mnt_sb->s_flags & MS_RDONLY ? 225 current->fs->pwd.mnt->mnt_sb->s_flags & MS_RDONLY ?
226 " readonly" : ""); 226 " readonly" : "", MAJOR(ROOT_DEV), MINOR(ROOT_DEV));
227 return 0; 227 return 0;
228} 228}
229 229
diff --git a/init/do_mounts_md.c b/init/do_mounts_md.c
index d6da5cdd3c38..ff95e3192884 100644
--- a/init/do_mounts_md.c
+++ b/init/do_mounts_md.c
@@ -271,7 +271,7 @@ static int __init raid_setup(char *str)
271__setup("raid=", raid_setup); 271__setup("raid=", raid_setup);
272__setup("md=", md_setup); 272__setup("md=", md_setup);
273 273
274static void autodetect_raid(void) 274static void __init autodetect_raid(void)
275{ 275{
276 int fd; 276 int fd;
277 277
diff --git a/init/main.c b/init/main.c
index cd168ebc5924..b5a892c68375 100644
--- a/init/main.c
+++ b/init/main.c
@@ -50,7 +50,6 @@
50#include <linux/rmap.h> 50#include <linux/rmap.h>
51#include <linux/mempolicy.h> 51#include <linux/mempolicy.h>
52#include <linux/key.h> 52#include <linux/key.h>
53#include <linux/unwind.h>
54#include <linux/buffer_head.h> 53#include <linux/buffer_head.h>
55#include <linux/page_cgroup.h> 54#include <linux/page_cgroup.h>
56#include <linux/debug_locks.h> 55#include <linux/debug_locks.h>
@@ -108,7 +107,7 @@ EXPORT_SYMBOL(system_state);
108 107
109extern void time_init(void); 108extern void time_init(void);
110/* Default late time init is NULL. archs can override this later. */ 109/* Default late time init is NULL. archs can override this later. */
111void (*late_time_init)(void); 110void (*__initdata late_time_init)(void);
112extern void softirq_init(void); 111extern void softirq_init(void);
113 112
114/* Untouched command line saved by arch-specific code. */ 113/* Untouched command line saved by arch-specific code. */
@@ -447,7 +446,7 @@ static void __init setup_command_line(char *command_line)
447 * gcc-3.4 accidentally inlines this function, so use noinline. 446 * gcc-3.4 accidentally inlines this function, so use noinline.
448 */ 447 */
449 448
450static void noinline __init_refok rest_init(void) 449static noinline void __init_refok rest_init(void)
451 __releases(kernel_lock) 450 __releases(kernel_lock)
452{ 451{
453 int pid; 452 int pid;
@@ -537,7 +536,6 @@ asmlinkage void __init start_kernel(void)
537 * Need to run as early as possible, to initialize the 536 * Need to run as early as possible, to initialize the
538 * lockdep hash: 537 * lockdep hash:
539 */ 538 */
540 unwind_init();
541 lockdep_init(); 539 lockdep_init();
542 debug_objects_early_init(); 540 debug_objects_early_init();
543 cgroup_init_early(); 541 cgroup_init_early();
@@ -559,7 +557,6 @@ asmlinkage void __init start_kernel(void)
559 setup_arch(&command_line); 557 setup_arch(&command_line);
560 mm_init_owner(&init_mm, &init_task); 558 mm_init_owner(&init_mm, &init_task);
561 setup_command_line(command_line); 559 setup_command_line(command_line);
562 unwind_setup();
563 setup_per_cpu_areas(); 560 setup_per_cpu_areas();
564 setup_nr_cpu_ids(); 561 setup_nr_cpu_ids();
565 smp_prepare_boot_cpu(); /* arch-specific boot-cpu hooks */ 562 smp_prepare_boot_cpu(); /* arch-specific boot-cpu hooks */
@@ -786,7 +783,7 @@ static void run_init_process(char *init_filename)
786/* This is a non __init function. Force it to be noinline otherwise gcc 783/* This is a non __init function. Force it to be noinline otherwise gcc
787 * makes it inline to init() and it becomes part of init.text section 784 * makes it inline to init() and it becomes part of init.text section
788 */ 785 */
789static int noinline init_post(void) 786static noinline int init_post(void)
790{ 787{
791 free_initmem(); 788 free_initmem();
792 unlock_kernel(); 789 unlock_kernel();
diff --git a/ipc/ipc_sysctl.c b/ipc/ipc_sysctl.c
index 0dfebc509426..4a7a12c95abe 100644
--- a/ipc/ipc_sysctl.c
+++ b/ipc/ipc_sysctl.c
@@ -26,29 +26,6 @@ static void *get_ipc(ctl_table *table)
26 return which; 26 return which;
27} 27}
28 28
29/*
30 * Routine that is called when the file "auto_msgmni" has successfully been
31 * written.
32 * Two values are allowed:
33 * 0: unregister msgmni's callback routine from the ipc namespace notifier
34 * chain. This means that msgmni won't be recomputed anymore upon memory
35 * add/remove or ipc namespace creation/removal.
36 * 1: register back the callback routine.
37 */
38static void ipc_auto_callback(int val)
39{
40 if (!val)
41 unregister_ipcns_notifier(current->nsproxy->ipc_ns);
42 else {
43 /*
44 * Re-enable automatic recomputing only if not already
45 * enabled.
46 */
47 recompute_msgmni(current->nsproxy->ipc_ns);
48 cond_register_ipcns_notifier(current->nsproxy->ipc_ns);
49 }
50}
51
52#ifdef CONFIG_PROC_FS 29#ifdef CONFIG_PROC_FS
53static int proc_ipc_dointvec(ctl_table *table, int write, struct file *filp, 30static int proc_ipc_dointvec(ctl_table *table, int write, struct file *filp,
54 void __user *buffer, size_t *lenp, loff_t *ppos) 31 void __user *buffer, size_t *lenp, loff_t *ppos)
@@ -94,6 +71,29 @@ static int proc_ipc_doulongvec_minmax(ctl_table *table, int write,
94 lenp, ppos); 71 lenp, ppos);
95} 72}
96 73
74/*
75 * Routine that is called when the file "auto_msgmni" has successfully been
76 * written.
77 * Two values are allowed:
78 * 0: unregister msgmni's callback routine from the ipc namespace notifier
79 * chain. This means that msgmni won't be recomputed anymore upon memory
80 * add/remove or ipc namespace creation/removal.
81 * 1: register back the callback routine.
82 */
83static void ipc_auto_callback(int val)
84{
85 if (!val)
86 unregister_ipcns_notifier(current->nsproxy->ipc_ns);
87 else {
88 /*
89 * Re-enable automatic recomputing only if not already
90 * enabled.
91 */
92 recompute_msgmni(current->nsproxy->ipc_ns);
93 cond_register_ipcns_notifier(current->nsproxy->ipc_ns);
94 }
95}
96
97static int proc_ipcauto_dointvec_minmax(ctl_table *table, int write, 97static int proc_ipcauto_dointvec_minmax(ctl_table *table, int write,
98 struct file *filp, void __user *buffer, size_t *lenp, loff_t *ppos) 98 struct file *filp, void __user *buffer, size_t *lenp, loff_t *ppos)
99{ 99{
diff --git a/ipc/sem.c b/ipc/sem.c
index fea0ad3aed7b..c68cd3f8f0c9 100644
--- a/ipc/sem.c
+++ b/ipc/sem.c
@@ -1216,7 +1216,6 @@ asmlinkage long sys_semtimedop(int semid, struct sembuf __user *tsops,
1216 if (timeout && jiffies_left == 0) 1216 if (timeout && jiffies_left == 0)
1217 error = -EAGAIN; 1217 error = -EAGAIN;
1218 list_del(&queue.list); 1218 list_del(&queue.list);
1219 goto out_unlock_free;
1220 1219
1221out_unlock_free: 1220out_unlock_free:
1222 sem_unlock(sma); 1221 sem_unlock(sma);
diff --git a/ipc/shm.c b/ipc/shm.c
index 57dd50046cef..b125b560240e 100644
--- a/ipc/shm.c
+++ b/ipc/shm.c
@@ -75,7 +75,7 @@ void shm_init_ns(struct ipc_namespace *ns)
75 ns->shm_ctlall = SHMALL; 75 ns->shm_ctlall = SHMALL;
76 ns->shm_ctlmni = SHMMNI; 76 ns->shm_ctlmni = SHMMNI;
77 ns->shm_tot = 0; 77 ns->shm_tot = 0;
78 ipc_init_ids(&ns->ids[IPC_SHM_IDS]); 78 ipc_init_ids(&shm_ids(ns));
79} 79}
80 80
81/* 81/*
@@ -644,7 +644,7 @@ asmlinkage long sys_shmctl(int shmid, int cmd, struct shmid_ds __user *buf)
644 if (err) 644 if (err)
645 return err; 645 return err;
646 646
647 memset(&shminfo,0,sizeof(shminfo)); 647 memset(&shminfo, 0, sizeof(shminfo));
648 shminfo.shmmni = shminfo.shmseg = ns->shm_ctlmni; 648 shminfo.shmmni = shminfo.shmseg = ns->shm_ctlmni;
649 shminfo.shmmax = ns->shm_ctlmax; 649 shminfo.shmmax = ns->shm_ctlmax;
650 shminfo.shmall = ns->shm_ctlall; 650 shminfo.shmall = ns->shm_ctlall;
@@ -669,7 +669,7 @@ asmlinkage long sys_shmctl(int shmid, int cmd, struct shmid_ds __user *buf)
669 if (err) 669 if (err)
670 return err; 670 return err;
671 671
672 memset(&shm_info,0,sizeof(shm_info)); 672 memset(&shm_info, 0, sizeof(shm_info));
673 down_read(&shm_ids(ns).rw_mutex); 673 down_read(&shm_ids(ns).rw_mutex);
674 shm_info.used_ids = shm_ids(ns).in_use; 674 shm_info.used_ids = shm_ids(ns).in_use;
675 shm_get_stat (ns, &shm_info.shm_rss, &shm_info.shm_swp); 675 shm_get_stat (ns, &shm_info.shm_rss, &shm_info.shm_swp);
@@ -678,7 +678,7 @@ asmlinkage long sys_shmctl(int shmid, int cmd, struct shmid_ds __user *buf)
678 shm_info.swap_successes = 0; 678 shm_info.swap_successes = 0;
679 err = ipc_get_maxid(&shm_ids(ns)); 679 err = ipc_get_maxid(&shm_ids(ns));
680 up_read(&shm_ids(ns).rw_mutex); 680 up_read(&shm_ids(ns).rw_mutex);
681 if(copy_to_user (buf, &shm_info, sizeof(shm_info))) { 681 if (copy_to_user(buf, &shm_info, sizeof(shm_info))) {
682 err = -EFAULT; 682 err = -EFAULT;
683 goto out; 683 goto out;
684 } 684 }
@@ -692,11 +692,6 @@ asmlinkage long sys_shmctl(int shmid, int cmd, struct shmid_ds __user *buf)
692 struct shmid64_ds tbuf; 692 struct shmid64_ds tbuf;
693 int result; 693 int result;
694 694
695 if (!buf) {
696 err = -EFAULT;
697 goto out;
698 }
699
700 if (cmd == SHM_STAT) { 695 if (cmd == SHM_STAT) {
701 shp = shm_lock(ns, shmid); 696 shp = shm_lock(ns, shmid);
702 if (IS_ERR(shp)) { 697 if (IS_ERR(shp)) {
@@ -712,7 +707,7 @@ asmlinkage long sys_shmctl(int shmid, int cmd, struct shmid_ds __user *buf)
712 } 707 }
713 result = 0; 708 result = 0;
714 } 709 }
715 err=-EACCES; 710 err = -EACCES;
716 if (ipcperms (&shp->shm_perm, S_IRUGO)) 711 if (ipcperms (&shp->shm_perm, S_IRUGO))
717 goto out_unlock; 712 goto out_unlock;
718 err = security_shm_shmctl(shp, cmd); 713 err = security_shm_shmctl(shp, cmd);
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index 87bb0258fd27..f221446aa02d 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -116,7 +116,6 @@ static int root_count;
116 * be called. 116 * be called.
117 */ 117 */
118static int need_forkexit_callback __read_mostly; 118static int need_forkexit_callback __read_mostly;
119static int need_mm_owner_callback __read_mostly;
120 119
121/* convenient tests for these bits */ 120/* convenient tests for these bits */
122inline int cgroup_is_removed(const struct cgroup *cgrp) 121inline int cgroup_is_removed(const struct cgroup *cgrp)
@@ -2539,7 +2538,6 @@ static void __init cgroup_init_subsys(struct cgroup_subsys *ss)
2539 init_css_set.subsys[ss->subsys_id] = dummytop->subsys[ss->subsys_id]; 2538 init_css_set.subsys[ss->subsys_id] = dummytop->subsys[ss->subsys_id];
2540 2539
2541 need_forkexit_callback |= ss->fork || ss->exit; 2540 need_forkexit_callback |= ss->fork || ss->exit;
2542 need_mm_owner_callback |= !!ss->mm_owner_changed;
2543 2541
2544 /* At system boot, before all subsystems have been 2542 /* At system boot, before all subsystems have been
2545 * registered, no tasks have been forked, so we don't 2543 * registered, no tasks have been forked, so we don't
@@ -2789,37 +2787,6 @@ void cgroup_fork_callbacks(struct task_struct *child)
2789 } 2787 }
2790} 2788}
2791 2789
2792#ifdef CONFIG_MM_OWNER
2793/**
2794 * cgroup_mm_owner_callbacks - run callbacks when the mm->owner changes
2795 * @p: the new owner
2796 *
2797 * Called on every change to mm->owner. mm_init_owner() does not
2798 * invoke this routine, since it assigns the mm->owner the first time
2799 * and does not change it.
2800 *
2801 * The callbacks are invoked with mmap_sem held in read mode.
2802 */
2803void cgroup_mm_owner_callbacks(struct task_struct *old, struct task_struct *new)
2804{
2805 struct cgroup *oldcgrp, *newcgrp = NULL;
2806
2807 if (need_mm_owner_callback) {
2808 int i;
2809 for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
2810 struct cgroup_subsys *ss = subsys[i];
2811 oldcgrp = task_cgroup(old, ss->subsys_id);
2812 if (new)
2813 newcgrp = task_cgroup(new, ss->subsys_id);
2814 if (oldcgrp == newcgrp)
2815 continue;
2816 if (ss->mm_owner_changed)
2817 ss->mm_owner_changed(ss, oldcgrp, newcgrp, new);
2818 }
2819 }
2820}
2821#endif /* CONFIG_MM_OWNER */
2822
2823/** 2790/**
2824 * cgroup_post_fork - called on a new task after adding it to the task list 2791 * cgroup_post_fork - called on a new task after adding it to the task list
2825 * @child: the task in question 2792 * @child: the task in question
diff --git a/kernel/compat.c b/kernel/compat.c
index d52e2ec1deb5..42d56544460f 100644
--- a/kernel/compat.c
+++ b/kernel/compat.c
@@ -24,6 +24,7 @@
24#include <linux/migrate.h> 24#include <linux/migrate.h>
25#include <linux/posix-timers.h> 25#include <linux/posix-timers.h>
26#include <linux/times.h> 26#include <linux/times.h>
27#include <linux/ptrace.h>
27 28
28#include <asm/uaccess.h> 29#include <asm/uaccess.h>
29 30
@@ -229,6 +230,7 @@ asmlinkage long compat_sys_times(struct compat_tms __user *tbuf)
229 if (copy_to_user(tbuf, &tmp, sizeof(tmp))) 230 if (copy_to_user(tbuf, &tmp, sizeof(tmp)))
230 return -EFAULT; 231 return -EFAULT;
231 } 232 }
233 force_successful_syscall_return();
232 return compat_jiffies_to_clock_t(jiffies); 234 return compat_jiffies_to_clock_t(jiffies);
233} 235}
234 236
@@ -894,8 +896,9 @@ asmlinkage long compat_sys_time(compat_time_t __user * tloc)
894 896
895 if (tloc) { 897 if (tloc) {
896 if (put_user(i,tloc)) 898 if (put_user(i,tloc))
897 i = -EFAULT; 899 return -EFAULT;
898 } 900 }
901 force_successful_syscall_return();
899 return i; 902 return i;
900} 903}
901 904
diff --git a/kernel/cpuset.c b/kernel/cpuset.c
index 39c1a4c1c5a9..345ace5117de 100644
--- a/kernel/cpuset.c
+++ b/kernel/cpuset.c
@@ -240,6 +240,17 @@ static struct cpuset top_cpuset = {
240static DEFINE_MUTEX(callback_mutex); 240static DEFINE_MUTEX(callback_mutex);
241 241
242/* 242/*
243 * cpuset_buffer_lock protects both the cpuset_name and cpuset_nodelist
244 * buffers. They are statically allocated to prevent using excess stack
245 * when calling cpuset_print_task_mems_allowed().
246 */
247#define CPUSET_NAME_LEN (128)
248#define CPUSET_NODELIST_LEN (256)
249static char cpuset_name[CPUSET_NAME_LEN];
250static char cpuset_nodelist[CPUSET_NODELIST_LEN];
251static DEFINE_SPINLOCK(cpuset_buffer_lock);
252
253/*
243 * This is ugly, but preserves the userspace API for existing cpuset 254 * This is ugly, but preserves the userspace API for existing cpuset
244 * users. If someone tries to mount the "cpuset" filesystem, we 255 * users. If someone tries to mount the "cpuset" filesystem, we
245 * silently switch it to mount "cgroup" instead 256 * silently switch it to mount "cgroup" instead
@@ -2356,6 +2367,29 @@ int cpuset_mems_allowed_intersects(const struct task_struct *tsk1,
2356 return nodes_intersects(tsk1->mems_allowed, tsk2->mems_allowed); 2367 return nodes_intersects(tsk1->mems_allowed, tsk2->mems_allowed);
2357} 2368}
2358 2369
2370/**
2371 * cpuset_print_task_mems_allowed - prints task's cpuset and mems_allowed
2372 * @task: pointer to task_struct of some task.
2373 *
2374 * Description: Prints @task's name, cpuset name, and cached copy of its
2375 * mems_allowed to the kernel log. Must hold task_lock(task) to allow
2376 * dereferencing task_cs(task).
2377 */
2378void cpuset_print_task_mems_allowed(struct task_struct *tsk)
2379{
2380 struct dentry *dentry;
2381
2382 dentry = task_cs(tsk)->css.cgroup->dentry;
2383 spin_lock(&cpuset_buffer_lock);
2384 snprintf(cpuset_name, CPUSET_NAME_LEN,
2385 dentry ? (const char *)dentry->d_name.name : "/");
2386 nodelist_scnprintf(cpuset_nodelist, CPUSET_NODELIST_LEN,
2387 tsk->mems_allowed);
2388 printk(KERN_INFO "%s cpuset=%s mems_allowed=%s\n",
2389 tsk->comm, cpuset_name, cpuset_nodelist);
2390 spin_unlock(&cpuset_buffer_lock);
2391}
2392
2359/* 2393/*
2360 * Collection of memory_pressure is suppressed unless 2394 * Collection of memory_pressure is suppressed unless
2361 * this flag is enabled by writing "1" to the special 2395 * this flag is enabled by writing "1" to the special
diff --git a/kernel/dma-coherent.c b/kernel/dma-coherent.c
index f013a0c2e111..038707404b76 100644
--- a/kernel/dma-coherent.c
+++ b/kernel/dma-coherent.c
@@ -109,20 +109,40 @@ EXPORT_SYMBOL(dma_mark_declared_memory_occupied);
109int dma_alloc_from_coherent(struct device *dev, ssize_t size, 109int dma_alloc_from_coherent(struct device *dev, ssize_t size,
110 dma_addr_t *dma_handle, void **ret) 110 dma_addr_t *dma_handle, void **ret)
111{ 111{
112 struct dma_coherent_mem *mem = dev ? dev->dma_mem : NULL; 112 struct dma_coherent_mem *mem;
113 int order = get_order(size); 113 int order = get_order(size);
114 int pageno;
114 115
115 if (mem) { 116 if (!dev)
116 int page = bitmap_find_free_region(mem->bitmap, mem->size, 117 return 0;
117 order); 118 mem = dev->dma_mem;
118 if (page >= 0) { 119 if (!mem)
119 *dma_handle = mem->device_base + (page << PAGE_SHIFT); 120 return 0;
120 *ret = mem->virt_base + (page << PAGE_SHIFT); 121 if (unlikely(size > mem->size))
121 memset(*ret, 0, size); 122 return 0;
122 } else if (mem->flags & DMA_MEMORY_EXCLUSIVE) 123
123 *ret = NULL; 124 pageno = bitmap_find_free_region(mem->bitmap, mem->size, order);
125 if (pageno >= 0) {
126 /*
127 * Memory was found in the per-device arena.
128 */
129 *dma_handle = mem->device_base + (pageno << PAGE_SHIFT);
130 *ret = mem->virt_base + (pageno << PAGE_SHIFT);
131 memset(*ret, 0, size);
132 } else if (mem->flags & DMA_MEMORY_EXCLUSIVE) {
133 /*
134 * The per-device arena is exhausted and we are not
135 * permitted to fall back to generic memory.
136 */
137 *ret = NULL;
138 } else {
139 /*
140 * The per-device arena is exhausted and we are
141 * permitted to fall back to generic memory.
142 */
143 return 0;
124 } 144 }
125 return (mem != NULL); 145 return 1;
126} 146}
127EXPORT_SYMBOL(dma_alloc_from_coherent); 147EXPORT_SYMBOL(dma_alloc_from_coherent);
128 148
diff --git a/kernel/exit.c b/kernel/exit.c
index c9e5a1c14e08..c7740fa3252c 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -642,35 +642,31 @@ retry:
642 /* 642 /*
643 * We found no owner yet mm_users > 1: this implies that we are 643 * We found no owner yet mm_users > 1: this implies that we are
644 * most likely racing with swapoff (try_to_unuse()) or /proc or 644 * most likely racing with swapoff (try_to_unuse()) or /proc or
645 * ptrace or page migration (get_task_mm()). Mark owner as NULL, 645 * ptrace or page migration (get_task_mm()). Mark owner as NULL.
646 * so that subsystems can understand the callback and take action.
647 */ 646 */
648 down_write(&mm->mmap_sem);
649 cgroup_mm_owner_callbacks(mm->owner, NULL);
650 mm->owner = NULL; 647 mm->owner = NULL;
651 up_write(&mm->mmap_sem);
652 return; 648 return;
653 649
654assign_new_owner: 650assign_new_owner:
655 BUG_ON(c == p); 651 BUG_ON(c == p);
656 get_task_struct(c); 652 get_task_struct(c);
657 read_unlock(&tasklist_lock);
658 down_write(&mm->mmap_sem);
659 /* 653 /*
660 * The task_lock protects c->mm from changing. 654 * The task_lock protects c->mm from changing.
661 * We always want mm->owner->mm == mm 655 * We always want mm->owner->mm == mm
662 */ 656 */
663 task_lock(c); 657 task_lock(c);
658 /*
659 * Delay read_unlock() till we have the task_lock()
660 * to ensure that c does not slip away underneath us
661 */
662 read_unlock(&tasklist_lock);
664 if (c->mm != mm) { 663 if (c->mm != mm) {
665 task_unlock(c); 664 task_unlock(c);
666 up_write(&mm->mmap_sem);
667 put_task_struct(c); 665 put_task_struct(c);
668 goto retry; 666 goto retry;
669 } 667 }
670 cgroup_mm_owner_callbacks(mm->owner, c);
671 mm->owner = c; 668 mm->owner = c;
672 task_unlock(c); 669 task_unlock(c);
673 up_write(&mm->mmap_sem);
674 put_task_struct(c); 670 put_task_struct(c);
675} 671}
676#endif /* CONFIG_MM_OWNER */ 672#endif /* CONFIG_MM_OWNER */
@@ -1055,10 +1051,7 @@ NORET_TYPE void do_exit(long code)
1055 preempt_count()); 1051 preempt_count());
1056 1052
1057 acct_update_integrals(tsk); 1053 acct_update_integrals(tsk);
1058 if (tsk->mm) { 1054
1059 update_hiwater_rss(tsk->mm);
1060 update_hiwater_vm(tsk->mm);
1061 }
1062 group_dead = atomic_dec_and_test(&tsk->signal->live); 1055 group_dead = atomic_dec_and_test(&tsk->signal->live);
1063 if (group_dead) { 1056 if (group_dead) {
1064 hrtimer_cancel(&tsk->signal->real_timer); 1057 hrtimer_cancel(&tsk->signal->real_timer);
diff --git a/kernel/fork.c b/kernel/fork.c
index 43cbf30669e6..7b8f2a78be3d 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -400,6 +400,18 @@ __cacheline_aligned_in_smp DEFINE_SPINLOCK(mmlist_lock);
400#define allocate_mm() (kmem_cache_alloc(mm_cachep, GFP_KERNEL)) 400#define allocate_mm() (kmem_cache_alloc(mm_cachep, GFP_KERNEL))
401#define free_mm(mm) (kmem_cache_free(mm_cachep, (mm))) 401#define free_mm(mm) (kmem_cache_free(mm_cachep, (mm)))
402 402
403static unsigned long default_dump_filter = MMF_DUMP_FILTER_DEFAULT;
404
405static int __init coredump_filter_setup(char *s)
406{
407 default_dump_filter =
408 (simple_strtoul(s, NULL, 0) << MMF_DUMP_FILTER_SHIFT) &
409 MMF_DUMP_FILTER_MASK;
410 return 1;
411}
412
413__setup("coredump_filter=", coredump_filter_setup);
414
403#include <linux/init_task.h> 415#include <linux/init_task.h>
404 416
405static struct mm_struct * mm_init(struct mm_struct * mm, struct task_struct *p) 417static struct mm_struct * mm_init(struct mm_struct * mm, struct task_struct *p)
@@ -408,8 +420,7 @@ static struct mm_struct * mm_init(struct mm_struct * mm, struct task_struct *p)
408 atomic_set(&mm->mm_count, 1); 420 atomic_set(&mm->mm_count, 1);
409 init_rwsem(&mm->mmap_sem); 421 init_rwsem(&mm->mmap_sem);
410 INIT_LIST_HEAD(&mm->mmlist); 422 INIT_LIST_HEAD(&mm->mmlist);
411 mm->flags = (current->mm) ? current->mm->flags 423 mm->flags = (current->mm) ? current->mm->flags : default_dump_filter;
412 : MMF_DUMP_FILTER_DEFAULT;
413 mm->core_state = NULL; 424 mm->core_state = NULL;
414 mm->nr_ptes = 0; 425 mm->nr_ptes = 0;
415 set_mm_counter(mm, file_rss, 0); 426 set_mm_counter(mm, file_rss, 0);
@@ -758,7 +769,7 @@ static int copy_sighand(unsigned long clone_flags, struct task_struct *tsk)
758{ 769{
759 struct sighand_struct *sig; 770 struct sighand_struct *sig;
760 771
761 if (clone_flags & (CLONE_SIGHAND | CLONE_THREAD)) { 772 if (clone_flags & CLONE_SIGHAND) {
762 atomic_inc(&current->sighand->count); 773 atomic_inc(&current->sighand->count);
763 return 0; 774 return 0;
764 } 775 }
diff --git a/kernel/kmod.c b/kernel/kmod.c
index b46dbb908669..a27a5f64443d 100644
--- a/kernel/kmod.c
+++ b/kernel/kmod.c
@@ -51,8 +51,8 @@ char modprobe_path[KMOD_PATH_LEN] = "/sbin/modprobe";
51 51
52/** 52/**
53 * request_module - try to load a kernel module 53 * request_module - try to load a kernel module
54 * @fmt: printf style format string for the name of the module 54 * @fmt: printf style format string for the name of the module
55 * @varargs: arguements as specified in the format string 55 * @...: arguments as specified in the format string
56 * 56 *
57 * Load a module using the user mode module loader. The function returns 57 * Load a module using the user mode module loader. The function returns
58 * zero on success or a negative errno code on failure. Note that a 58 * zero on success or a negative errno code on failure. Note that a
diff --git a/kernel/kprobes.c b/kernel/kprobes.c
index 9f8a3f25259a..1b9cbdc0127a 100644
--- a/kernel/kprobes.c
+++ b/kernel/kprobes.c
@@ -69,7 +69,7 @@ static struct hlist_head kretprobe_inst_table[KPROBE_TABLE_SIZE];
69/* NOTE: change this value only with kprobe_mutex held */ 69/* NOTE: change this value only with kprobe_mutex held */
70static bool kprobe_enabled; 70static bool kprobe_enabled;
71 71
72DEFINE_MUTEX(kprobe_mutex); /* Protects kprobe_table */ 72static DEFINE_MUTEX(kprobe_mutex); /* Protects kprobe_table */
73static DEFINE_PER_CPU(struct kprobe *, kprobe_instance) = NULL; 73static DEFINE_PER_CPU(struct kprobe *, kprobe_instance) = NULL;
74static struct { 74static struct {
75 spinlock_t lock ____cacheline_aligned_in_smp; 75 spinlock_t lock ____cacheline_aligned_in_smp;
@@ -115,6 +115,7 @@ enum kprobe_slot_state {
115 SLOT_USED = 2, 115 SLOT_USED = 2,
116}; 116};
117 117
118static DEFINE_MUTEX(kprobe_insn_mutex); /* Protects kprobe_insn_pages */
118static struct hlist_head kprobe_insn_pages; 119static struct hlist_head kprobe_insn_pages;
119static int kprobe_garbage_slots; 120static int kprobe_garbage_slots;
120static int collect_garbage_slots(void); 121static int collect_garbage_slots(void);
@@ -144,10 +145,10 @@ loop_end:
144} 145}
145 146
146/** 147/**
147 * get_insn_slot() - Find a slot on an executable page for an instruction. 148 * __get_insn_slot() - Find a slot on an executable page for an instruction.
148 * We allocate an executable page if there's no room on existing ones. 149 * We allocate an executable page if there's no room on existing ones.
149 */ 150 */
150kprobe_opcode_t __kprobes *get_insn_slot(void) 151static kprobe_opcode_t __kprobes *__get_insn_slot(void)
151{ 152{
152 struct kprobe_insn_page *kip; 153 struct kprobe_insn_page *kip;
153 struct hlist_node *pos; 154 struct hlist_node *pos;
@@ -196,6 +197,15 @@ kprobe_opcode_t __kprobes *get_insn_slot(void)
196 return kip->insns; 197 return kip->insns;
197} 198}
198 199
200kprobe_opcode_t __kprobes *get_insn_slot(void)
201{
202 kprobe_opcode_t *ret;
203 mutex_lock(&kprobe_insn_mutex);
204 ret = __get_insn_slot();
205 mutex_unlock(&kprobe_insn_mutex);
206 return ret;
207}
208
199/* Return 1 if all garbages are collected, otherwise 0. */ 209/* Return 1 if all garbages are collected, otherwise 0. */
200static int __kprobes collect_one_slot(struct kprobe_insn_page *kip, int idx) 210static int __kprobes collect_one_slot(struct kprobe_insn_page *kip, int idx)
201{ 211{
@@ -226,9 +236,13 @@ static int __kprobes collect_garbage_slots(void)
226{ 236{
227 struct kprobe_insn_page *kip; 237 struct kprobe_insn_page *kip;
228 struct hlist_node *pos, *next; 238 struct hlist_node *pos, *next;
239 int safety;
229 240
230 /* Ensure no-one is preepmted on the garbages */ 241 /* Ensure no-one is preepmted on the garbages */
231 if (check_safety() != 0) 242 mutex_unlock(&kprobe_insn_mutex);
243 safety = check_safety();
244 mutex_lock(&kprobe_insn_mutex);
245 if (safety != 0)
232 return -EAGAIN; 246 return -EAGAIN;
233 247
234 hlist_for_each_entry_safe(kip, pos, next, &kprobe_insn_pages, hlist) { 248 hlist_for_each_entry_safe(kip, pos, next, &kprobe_insn_pages, hlist) {
@@ -251,6 +265,7 @@ void __kprobes free_insn_slot(kprobe_opcode_t * slot, int dirty)
251 struct kprobe_insn_page *kip; 265 struct kprobe_insn_page *kip;
252 struct hlist_node *pos; 266 struct hlist_node *pos;
253 267
268 mutex_lock(&kprobe_insn_mutex);
254 hlist_for_each_entry(kip, pos, &kprobe_insn_pages, hlist) { 269 hlist_for_each_entry(kip, pos, &kprobe_insn_pages, hlist) {
255 if (kip->insns <= slot && 270 if (kip->insns <= slot &&
256 slot < kip->insns + (INSNS_PER_PAGE * MAX_INSN_SIZE)) { 271 slot < kip->insns + (INSNS_PER_PAGE * MAX_INSN_SIZE)) {
@@ -267,6 +282,8 @@ void __kprobes free_insn_slot(kprobe_opcode_t * slot, int dirty)
267 282
268 if (dirty && ++kprobe_garbage_slots > INSNS_PER_PAGE) 283 if (dirty && ++kprobe_garbage_slots > INSNS_PER_PAGE)
269 collect_garbage_slots(); 284 collect_garbage_slots();
285
286 mutex_unlock(&kprobe_insn_mutex);
270} 287}
271#endif 288#endif
272 289
@@ -310,7 +327,7 @@ static int __kprobes aggr_pre_handler(struct kprobe *p, struct pt_regs *regs)
310 struct kprobe *kp; 327 struct kprobe *kp;
311 328
312 list_for_each_entry_rcu(kp, &p->list, list) { 329 list_for_each_entry_rcu(kp, &p->list, list) {
313 if (kp->pre_handler) { 330 if (kp->pre_handler && !kprobe_gone(kp)) {
314 set_kprobe_instance(kp); 331 set_kprobe_instance(kp);
315 if (kp->pre_handler(kp, regs)) 332 if (kp->pre_handler(kp, regs))
316 return 1; 333 return 1;
@@ -326,7 +343,7 @@ static void __kprobes aggr_post_handler(struct kprobe *p, struct pt_regs *regs,
326 struct kprobe *kp; 343 struct kprobe *kp;
327 344
328 list_for_each_entry_rcu(kp, &p->list, list) { 345 list_for_each_entry_rcu(kp, &p->list, list) {
329 if (kp->post_handler) { 346 if (kp->post_handler && !kprobe_gone(kp)) {
330 set_kprobe_instance(kp); 347 set_kprobe_instance(kp);
331 kp->post_handler(kp, regs, flags); 348 kp->post_handler(kp, regs, flags);
332 reset_kprobe_instance(); 349 reset_kprobe_instance();
@@ -393,7 +410,7 @@ void __kprobes recycle_rp_inst(struct kretprobe_instance *ri,
393 hlist_add_head(&ri->hlist, head); 410 hlist_add_head(&ri->hlist, head);
394} 411}
395 412
396void kretprobe_hash_lock(struct task_struct *tsk, 413void __kprobes kretprobe_hash_lock(struct task_struct *tsk,
397 struct hlist_head **head, unsigned long *flags) 414 struct hlist_head **head, unsigned long *flags)
398{ 415{
399 unsigned long hash = hash_ptr(tsk, KPROBE_HASH_BITS); 416 unsigned long hash = hash_ptr(tsk, KPROBE_HASH_BITS);
@@ -404,13 +421,15 @@ void kretprobe_hash_lock(struct task_struct *tsk,
404 spin_lock_irqsave(hlist_lock, *flags); 421 spin_lock_irqsave(hlist_lock, *flags);
405} 422}
406 423
407static void kretprobe_table_lock(unsigned long hash, unsigned long *flags) 424static void __kprobes kretprobe_table_lock(unsigned long hash,
425 unsigned long *flags)
408{ 426{
409 spinlock_t *hlist_lock = kretprobe_table_lock_ptr(hash); 427 spinlock_t *hlist_lock = kretprobe_table_lock_ptr(hash);
410 spin_lock_irqsave(hlist_lock, *flags); 428 spin_lock_irqsave(hlist_lock, *flags);
411} 429}
412 430
413void kretprobe_hash_unlock(struct task_struct *tsk, unsigned long *flags) 431void __kprobes kretprobe_hash_unlock(struct task_struct *tsk,
432 unsigned long *flags)
414{ 433{
415 unsigned long hash = hash_ptr(tsk, KPROBE_HASH_BITS); 434 unsigned long hash = hash_ptr(tsk, KPROBE_HASH_BITS);
416 spinlock_t *hlist_lock; 435 spinlock_t *hlist_lock;
@@ -419,7 +438,7 @@ void kretprobe_hash_unlock(struct task_struct *tsk, unsigned long *flags)
419 spin_unlock_irqrestore(hlist_lock, *flags); 438 spin_unlock_irqrestore(hlist_lock, *flags);
420} 439}
421 440
422void kretprobe_table_unlock(unsigned long hash, unsigned long *flags) 441void __kprobes kretprobe_table_unlock(unsigned long hash, unsigned long *flags)
423{ 442{
424 spinlock_t *hlist_lock = kretprobe_table_lock_ptr(hash); 443 spinlock_t *hlist_lock = kretprobe_table_lock_ptr(hash);
425 spin_unlock_irqrestore(hlist_lock, *flags); 444 spin_unlock_irqrestore(hlist_lock, *flags);
@@ -526,9 +545,10 @@ static inline void add_aggr_kprobe(struct kprobe *ap, struct kprobe *p)
526 ap->addr = p->addr; 545 ap->addr = p->addr;
527 ap->pre_handler = aggr_pre_handler; 546 ap->pre_handler = aggr_pre_handler;
528 ap->fault_handler = aggr_fault_handler; 547 ap->fault_handler = aggr_fault_handler;
529 if (p->post_handler) 548 /* We don't care the kprobe which has gone. */
549 if (p->post_handler && !kprobe_gone(p))
530 ap->post_handler = aggr_post_handler; 550 ap->post_handler = aggr_post_handler;
531 if (p->break_handler) 551 if (p->break_handler && !kprobe_gone(p))
532 ap->break_handler = aggr_break_handler; 552 ap->break_handler = aggr_break_handler;
533 553
534 INIT_LIST_HEAD(&ap->list); 554 INIT_LIST_HEAD(&ap->list);
@@ -547,17 +567,41 @@ static int __kprobes register_aggr_kprobe(struct kprobe *old_p,
547 int ret = 0; 567 int ret = 0;
548 struct kprobe *ap; 568 struct kprobe *ap;
549 569
570 if (kprobe_gone(old_p)) {
571 /*
572 * Attempting to insert new probe at the same location that
573 * had a probe in the module vaddr area which already
574 * freed. So, the instruction slot has already been
575 * released. We need a new slot for the new probe.
576 */
577 ret = arch_prepare_kprobe(old_p);
578 if (ret)
579 return ret;
580 }
550 if (old_p->pre_handler == aggr_pre_handler) { 581 if (old_p->pre_handler == aggr_pre_handler) {
551 copy_kprobe(old_p, p); 582 copy_kprobe(old_p, p);
552 ret = add_new_kprobe(old_p, p); 583 ret = add_new_kprobe(old_p, p);
584 ap = old_p;
553 } else { 585 } else {
554 ap = kzalloc(sizeof(struct kprobe), GFP_KERNEL); 586 ap = kzalloc(sizeof(struct kprobe), GFP_KERNEL);
555 if (!ap) 587 if (!ap) {
588 if (kprobe_gone(old_p))
589 arch_remove_kprobe(old_p);
556 return -ENOMEM; 590 return -ENOMEM;
591 }
557 add_aggr_kprobe(ap, old_p); 592 add_aggr_kprobe(ap, old_p);
558 copy_kprobe(ap, p); 593 copy_kprobe(ap, p);
559 ret = add_new_kprobe(ap, p); 594 ret = add_new_kprobe(ap, p);
560 } 595 }
596 if (kprobe_gone(old_p)) {
597 /*
598 * If the old_p has gone, its breakpoint has been disarmed.
599 * We have to arm it again after preparing real kprobes.
600 */
601 ap->flags &= ~KPROBE_FLAG_GONE;
602 if (kprobe_enabled)
603 arch_arm_kprobe(ap);
604 }
561 return ret; 605 return ret;
562} 606}
563 607
@@ -600,8 +644,7 @@ static kprobe_opcode_t __kprobes *kprobe_addr(struct kprobe *p)
600 return (kprobe_opcode_t *)(((char *)addr) + p->offset); 644 return (kprobe_opcode_t *)(((char *)addr) + p->offset);
601} 645}
602 646
603static int __kprobes __register_kprobe(struct kprobe *p, 647int __kprobes register_kprobe(struct kprobe *p)
604 unsigned long called_from)
605{ 648{
606 int ret = 0; 649 int ret = 0;
607 struct kprobe *old_p; 650 struct kprobe *old_p;
@@ -620,28 +663,30 @@ static int __kprobes __register_kprobe(struct kprobe *p,
620 return -EINVAL; 663 return -EINVAL;
621 } 664 }
622 665
623 p->mod_refcounted = 0; 666 p->flags = 0;
624
625 /* 667 /*
626 * Check if are we probing a module. 668 * Check if are we probing a module.
627 */ 669 */
628 probed_mod = __module_text_address((unsigned long) p->addr); 670 probed_mod = __module_text_address((unsigned long) p->addr);
629 if (probed_mod) { 671 if (probed_mod) {
630 struct module *calling_mod;
631 calling_mod = __module_text_address(called_from);
632 /* 672 /*
633 * We must allow modules to probe themself and in this case 673 * We must hold a refcount of the probed module while updating
634 * avoid incrementing the module refcount, so as to allow 674 * its code to prohibit unexpected unloading.
635 * unloading of self probing modules.
636 */ 675 */
637 if (calling_mod && calling_mod != probed_mod) { 676 if (unlikely(!try_module_get(probed_mod))) {
638 if (unlikely(!try_module_get(probed_mod))) { 677 preempt_enable();
639 preempt_enable(); 678 return -EINVAL;
640 return -EINVAL; 679 }
641 } 680 /*
642 p->mod_refcounted = 1; 681 * If the module freed .init.text, we couldn't insert
643 } else 682 * kprobes in there.
644 probed_mod = NULL; 683 */
684 if (within_module_init((unsigned long)p->addr, probed_mod) &&
685 probed_mod->state != MODULE_STATE_COMING) {
686 module_put(probed_mod);
687 preempt_enable();
688 return -EINVAL;
689 }
645 } 690 }
646 preempt_enable(); 691 preempt_enable();
647 692
@@ -668,8 +713,9 @@ static int __kprobes __register_kprobe(struct kprobe *p,
668out: 713out:
669 mutex_unlock(&kprobe_mutex); 714 mutex_unlock(&kprobe_mutex);
670 715
671 if (ret && probed_mod) 716 if (probed_mod)
672 module_put(probed_mod); 717 module_put(probed_mod);
718
673 return ret; 719 return ret;
674} 720}
675 721
@@ -697,16 +743,16 @@ valid_p:
697 list_is_singular(&old_p->list))) { 743 list_is_singular(&old_p->list))) {
698 /* 744 /*
699 * Only probe on the hash list. Disarm only if kprobes are 745 * Only probe on the hash list. Disarm only if kprobes are
700 * enabled - otherwise, the breakpoint would already have 746 * enabled and not gone - otherwise, the breakpoint would
701 * been removed. We save on flushing icache. 747 * already have been removed. We save on flushing icache.
702 */ 748 */
703 if (kprobe_enabled) 749 if (kprobe_enabled && !kprobe_gone(old_p))
704 arch_disarm_kprobe(p); 750 arch_disarm_kprobe(p);
705 hlist_del_rcu(&old_p->hlist); 751 hlist_del_rcu(&old_p->hlist);
706 } else { 752 } else {
707 if (p->break_handler) 753 if (p->break_handler && !kprobe_gone(p))
708 old_p->break_handler = NULL; 754 old_p->break_handler = NULL;
709 if (p->post_handler) { 755 if (p->post_handler && !kprobe_gone(p)) {
710 list_for_each_entry_rcu(list_p, &old_p->list, list) { 756 list_for_each_entry_rcu(list_p, &old_p->list, list) {
711 if ((list_p != p) && (list_p->post_handler)) 757 if ((list_p != p) && (list_p->post_handler))
712 goto noclean; 758 goto noclean;
@@ -721,39 +767,27 @@ noclean:
721 767
722static void __kprobes __unregister_kprobe_bottom(struct kprobe *p) 768static void __kprobes __unregister_kprobe_bottom(struct kprobe *p)
723{ 769{
724 struct module *mod;
725 struct kprobe *old_p; 770 struct kprobe *old_p;
726 771
727 if (p->mod_refcounted) { 772 if (list_empty(&p->list))
728 /*
729 * Since we've already incremented refcount,
730 * we don't need to disable preemption.
731 */
732 mod = module_text_address((unsigned long)p->addr);
733 if (mod)
734 module_put(mod);
735 }
736
737 if (list_empty(&p->list) || list_is_singular(&p->list)) {
738 if (!list_empty(&p->list)) {
739 /* "p" is the last child of an aggr_kprobe */
740 old_p = list_entry(p->list.next, struct kprobe, list);
741 list_del(&p->list);
742 kfree(old_p);
743 }
744 arch_remove_kprobe(p); 773 arch_remove_kprobe(p);
774 else if (list_is_singular(&p->list)) {
775 /* "p" is the last child of an aggr_kprobe */
776 old_p = list_entry(p->list.next, struct kprobe, list);
777 list_del(&p->list);
778 arch_remove_kprobe(old_p);
779 kfree(old_p);
745 } 780 }
746} 781}
747 782
748static int __register_kprobes(struct kprobe **kps, int num, 783int __kprobes register_kprobes(struct kprobe **kps, int num)
749 unsigned long called_from)
750{ 784{
751 int i, ret = 0; 785 int i, ret = 0;
752 786
753 if (num <= 0) 787 if (num <= 0)
754 return -EINVAL; 788 return -EINVAL;
755 for (i = 0; i < num; i++) { 789 for (i = 0; i < num; i++) {
756 ret = __register_kprobe(kps[i], called_from); 790 ret = register_kprobe(kps[i]);
757 if (ret < 0) { 791 if (ret < 0) {
758 if (i > 0) 792 if (i > 0)
759 unregister_kprobes(kps, i); 793 unregister_kprobes(kps, i);
@@ -763,26 +797,11 @@ static int __register_kprobes(struct kprobe **kps, int num,
763 return ret; 797 return ret;
764} 798}
765 799
766/*
767 * Registration and unregistration functions for kprobe.
768 */
769int __kprobes register_kprobe(struct kprobe *p)
770{
771 return __register_kprobes(&p, 1,
772 (unsigned long)__builtin_return_address(0));
773}
774
775void __kprobes unregister_kprobe(struct kprobe *p) 800void __kprobes unregister_kprobe(struct kprobe *p)
776{ 801{
777 unregister_kprobes(&p, 1); 802 unregister_kprobes(&p, 1);
778} 803}
779 804
780int __kprobes register_kprobes(struct kprobe **kps, int num)
781{
782 return __register_kprobes(kps, num,
783 (unsigned long)__builtin_return_address(0));
784}
785
786void __kprobes unregister_kprobes(struct kprobe **kps, int num) 805void __kprobes unregister_kprobes(struct kprobe **kps, int num)
787{ 806{
788 int i; 807 int i;
@@ -811,8 +830,7 @@ unsigned long __weak arch_deref_entry_point(void *entry)
811 return (unsigned long)entry; 830 return (unsigned long)entry;
812} 831}
813 832
814static int __register_jprobes(struct jprobe **jps, int num, 833int __kprobes register_jprobes(struct jprobe **jps, int num)
815 unsigned long called_from)
816{ 834{
817 struct jprobe *jp; 835 struct jprobe *jp;
818 int ret = 0, i; 836 int ret = 0, i;
@@ -830,7 +848,7 @@ static int __register_jprobes(struct jprobe **jps, int num,
830 /* Todo: Verify probepoint is a function entry point */ 848 /* Todo: Verify probepoint is a function entry point */
831 jp->kp.pre_handler = setjmp_pre_handler; 849 jp->kp.pre_handler = setjmp_pre_handler;
832 jp->kp.break_handler = longjmp_break_handler; 850 jp->kp.break_handler = longjmp_break_handler;
833 ret = __register_kprobe(&jp->kp, called_from); 851 ret = register_kprobe(&jp->kp);
834 } 852 }
835 if (ret < 0) { 853 if (ret < 0) {
836 if (i > 0) 854 if (i > 0)
@@ -843,8 +861,7 @@ static int __register_jprobes(struct jprobe **jps, int num,
843 861
844int __kprobes register_jprobe(struct jprobe *jp) 862int __kprobes register_jprobe(struct jprobe *jp)
845{ 863{
846 return __register_jprobes(&jp, 1, 864 return register_jprobes(&jp, 1);
847 (unsigned long)__builtin_return_address(0));
848} 865}
849 866
850void __kprobes unregister_jprobe(struct jprobe *jp) 867void __kprobes unregister_jprobe(struct jprobe *jp)
@@ -852,12 +869,6 @@ void __kprobes unregister_jprobe(struct jprobe *jp)
852 unregister_jprobes(&jp, 1); 869 unregister_jprobes(&jp, 1);
853} 870}
854 871
855int __kprobes register_jprobes(struct jprobe **jps, int num)
856{
857 return __register_jprobes(jps, num,
858 (unsigned long)__builtin_return_address(0));
859}
860
861void __kprobes unregister_jprobes(struct jprobe **jps, int num) 872void __kprobes unregister_jprobes(struct jprobe **jps, int num)
862{ 873{
863 int i; 874 int i;
@@ -920,8 +931,7 @@ static int __kprobes pre_handler_kretprobe(struct kprobe *p,
920 return 0; 931 return 0;
921} 932}
922 933
923static int __kprobes __register_kretprobe(struct kretprobe *rp, 934int __kprobes register_kretprobe(struct kretprobe *rp)
924 unsigned long called_from)
925{ 935{
926 int ret = 0; 936 int ret = 0;
927 struct kretprobe_instance *inst; 937 struct kretprobe_instance *inst;
@@ -967,21 +977,20 @@ static int __kprobes __register_kretprobe(struct kretprobe *rp,
967 977
968 rp->nmissed = 0; 978 rp->nmissed = 0;
969 /* Establish function entry probe point */ 979 /* Establish function entry probe point */
970 ret = __register_kprobe(&rp->kp, called_from); 980 ret = register_kprobe(&rp->kp);
971 if (ret != 0) 981 if (ret != 0)
972 free_rp_inst(rp); 982 free_rp_inst(rp);
973 return ret; 983 return ret;
974} 984}
975 985
976static int __register_kretprobes(struct kretprobe **rps, int num, 986int __kprobes register_kretprobes(struct kretprobe **rps, int num)
977 unsigned long called_from)
978{ 987{
979 int ret = 0, i; 988 int ret = 0, i;
980 989
981 if (num <= 0) 990 if (num <= 0)
982 return -EINVAL; 991 return -EINVAL;
983 for (i = 0; i < num; i++) { 992 for (i = 0; i < num; i++) {
984 ret = __register_kretprobe(rps[i], called_from); 993 ret = register_kretprobe(rps[i]);
985 if (ret < 0) { 994 if (ret < 0) {
986 if (i > 0) 995 if (i > 0)
987 unregister_kretprobes(rps, i); 996 unregister_kretprobes(rps, i);
@@ -991,23 +1000,11 @@ static int __register_kretprobes(struct kretprobe **rps, int num,
991 return ret; 1000 return ret;
992} 1001}
993 1002
994int __kprobes register_kretprobe(struct kretprobe *rp)
995{
996 return __register_kretprobes(&rp, 1,
997 (unsigned long)__builtin_return_address(0));
998}
999
1000void __kprobes unregister_kretprobe(struct kretprobe *rp) 1003void __kprobes unregister_kretprobe(struct kretprobe *rp)
1001{ 1004{
1002 unregister_kretprobes(&rp, 1); 1005 unregister_kretprobes(&rp, 1);
1003} 1006}
1004 1007
1005int __kprobes register_kretprobes(struct kretprobe **rps, int num)
1006{
1007 return __register_kretprobes(rps, num,
1008 (unsigned long)__builtin_return_address(0));
1009}
1010
1011void __kprobes unregister_kretprobes(struct kretprobe **rps, int num) 1008void __kprobes unregister_kretprobes(struct kretprobe **rps, int num)
1012{ 1009{
1013 int i; 1010 int i;
@@ -1055,6 +1052,72 @@ static int __kprobes pre_handler_kretprobe(struct kprobe *p,
1055 1052
1056#endif /* CONFIG_KRETPROBES */ 1053#endif /* CONFIG_KRETPROBES */
1057 1054
1055/* Set the kprobe gone and remove its instruction buffer. */
1056static void __kprobes kill_kprobe(struct kprobe *p)
1057{
1058 struct kprobe *kp;
1059 p->flags |= KPROBE_FLAG_GONE;
1060 if (p->pre_handler == aggr_pre_handler) {
1061 /*
1062 * If this is an aggr_kprobe, we have to list all the
1063 * chained probes and mark them GONE.
1064 */
1065 list_for_each_entry_rcu(kp, &p->list, list)
1066 kp->flags |= KPROBE_FLAG_GONE;
1067 p->post_handler = NULL;
1068 p->break_handler = NULL;
1069 }
1070 /*
1071 * Here, we can remove insn_slot safely, because no thread calls
1072 * the original probed function (which will be freed soon) any more.
1073 */
1074 arch_remove_kprobe(p);
1075}
1076
1077/* Module notifier call back, checking kprobes on the module */
1078static int __kprobes kprobes_module_callback(struct notifier_block *nb,
1079 unsigned long val, void *data)
1080{
1081 struct module *mod = data;
1082 struct hlist_head *head;
1083 struct hlist_node *node;
1084 struct kprobe *p;
1085 unsigned int i;
1086 int checkcore = (val == MODULE_STATE_GOING);
1087
1088 if (val != MODULE_STATE_GOING && val != MODULE_STATE_LIVE)
1089 return NOTIFY_DONE;
1090
1091 /*
1092 * When MODULE_STATE_GOING was notified, both of module .text and
1093 * .init.text sections would be freed. When MODULE_STATE_LIVE was
1094 * notified, only .init.text section would be freed. We need to
1095 * disable kprobes which have been inserted in the sections.
1096 */
1097 mutex_lock(&kprobe_mutex);
1098 for (i = 0; i < KPROBE_TABLE_SIZE; i++) {
1099 head = &kprobe_table[i];
1100 hlist_for_each_entry_rcu(p, node, head, hlist)
1101 if (within_module_init((unsigned long)p->addr, mod) ||
1102 (checkcore &&
1103 within_module_core((unsigned long)p->addr, mod))) {
1104 /*
1105 * The vaddr this probe is installed will soon
1106 * be vfreed buy not synced to disk. Hence,
1107 * disarming the breakpoint isn't needed.
1108 */
1109 kill_kprobe(p);
1110 }
1111 }
1112 mutex_unlock(&kprobe_mutex);
1113 return NOTIFY_DONE;
1114}
1115
1116static struct notifier_block kprobe_module_nb = {
1117 .notifier_call = kprobes_module_callback,
1118 .priority = 0
1119};
1120
1058static int __init init_kprobes(void) 1121static int __init init_kprobes(void)
1059{ 1122{
1060 int i, err = 0; 1123 int i, err = 0;
@@ -1111,6 +1174,9 @@ static int __init init_kprobes(void)
1111 err = arch_init_kprobes(); 1174 err = arch_init_kprobes();
1112 if (!err) 1175 if (!err)
1113 err = register_die_notifier(&kprobe_exceptions_nb); 1176 err = register_die_notifier(&kprobe_exceptions_nb);
1177 if (!err)
1178 err = register_module_notifier(&kprobe_module_nb);
1179
1114 kprobes_initialized = (err == 0); 1180 kprobes_initialized = (err == 0);
1115 1181
1116 if (!err) 1182 if (!err)
@@ -1131,10 +1197,12 @@ static void __kprobes report_probe(struct seq_file *pi, struct kprobe *p,
1131 else 1197 else
1132 kprobe_type = "k"; 1198 kprobe_type = "k";
1133 if (sym) 1199 if (sym)
1134 seq_printf(pi, "%p %s %s+0x%x %s\n", p->addr, kprobe_type, 1200 seq_printf(pi, "%p %s %s+0x%x %s %s\n", p->addr, kprobe_type,
1135 sym, offset, (modname ? modname : " ")); 1201 sym, offset, (modname ? modname : " "),
1202 (kprobe_gone(p) ? "[GONE]" : ""));
1136 else 1203 else
1137 seq_printf(pi, "%p %s %p\n", p->addr, kprobe_type, p->addr); 1204 seq_printf(pi, "%p %s %p %s\n", p->addr, kprobe_type, p->addr,
1205 (kprobe_gone(p) ? "[GONE]" : ""));
1138} 1206}
1139 1207
1140static void __kprobes *kprobe_seq_start(struct seq_file *f, loff_t *pos) 1208static void __kprobes *kprobe_seq_start(struct seq_file *f, loff_t *pos)
@@ -1215,7 +1283,8 @@ static void __kprobes enable_all_kprobes(void)
1215 for (i = 0; i < KPROBE_TABLE_SIZE; i++) { 1283 for (i = 0; i < KPROBE_TABLE_SIZE; i++) {
1216 head = &kprobe_table[i]; 1284 head = &kprobe_table[i];
1217 hlist_for_each_entry_rcu(p, node, head, hlist) 1285 hlist_for_each_entry_rcu(p, node, head, hlist)
1218 arch_arm_kprobe(p); 1286 if (!kprobe_gone(p))
1287 arch_arm_kprobe(p);
1219 } 1288 }
1220 1289
1221 kprobe_enabled = true; 1290 kprobe_enabled = true;
@@ -1244,7 +1313,7 @@ static void __kprobes disable_all_kprobes(void)
1244 for (i = 0; i < KPROBE_TABLE_SIZE; i++) { 1313 for (i = 0; i < KPROBE_TABLE_SIZE; i++) {
1245 head = &kprobe_table[i]; 1314 head = &kprobe_table[i];
1246 hlist_for_each_entry_rcu(p, node, head, hlist) { 1315 hlist_for_each_entry_rcu(p, node, head, hlist) {
1247 if (!arch_trampoline_kprobe(p)) 1316 if (!arch_trampoline_kprobe(p) && !kprobe_gone(p))
1248 arch_disarm_kprobe(p); 1317 arch_disarm_kprobe(p);
1249 } 1318 }
1250 } 1319 }
diff --git a/kernel/module.c b/kernel/module.c
index f47cce910f25..496dcb57b608 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -43,7 +43,6 @@
43#include <linux/device.h> 43#include <linux/device.h>
44#include <linux/string.h> 44#include <linux/string.h>
45#include <linux/mutex.h> 45#include <linux/mutex.h>
46#include <linux/unwind.h>
47#include <linux/rculist.h> 46#include <linux/rculist.h>
48#include <asm/uaccess.h> 47#include <asm/uaccess.h>
49#include <asm/cacheflush.h> 48#include <asm/cacheflush.h>
@@ -1449,8 +1448,6 @@ static void free_module(struct module *mod)
1449 remove_sect_attrs(mod); 1448 remove_sect_attrs(mod);
1450 mod_kobject_remove(mod); 1449 mod_kobject_remove(mod);
1451 1450
1452 unwind_remove_table(mod->unwind_info, 0);
1453
1454 /* Arch-specific cleanup. */ 1451 /* Arch-specific cleanup. */
1455 module_arch_cleanup(mod); 1452 module_arch_cleanup(mod);
1456 1453
@@ -1867,7 +1864,6 @@ static noinline struct module *load_module(void __user *umod,
1867 unsigned int symindex = 0; 1864 unsigned int symindex = 0;
1868 unsigned int strindex = 0; 1865 unsigned int strindex = 0;
1869 unsigned int modindex, versindex, infoindex, pcpuindex; 1866 unsigned int modindex, versindex, infoindex, pcpuindex;
1870 unsigned int unwindex = 0;
1871 unsigned int num_kp, num_mcount; 1867 unsigned int num_kp, num_mcount;
1872 struct kernel_param *kp; 1868 struct kernel_param *kp;
1873 struct module *mod; 1869 struct module *mod;
@@ -1957,9 +1953,6 @@ static noinline struct module *load_module(void __user *umod,
1957 versindex = find_sec(hdr, sechdrs, secstrings, "__versions"); 1953 versindex = find_sec(hdr, sechdrs, secstrings, "__versions");
1958 infoindex = find_sec(hdr, sechdrs, secstrings, ".modinfo"); 1954 infoindex = find_sec(hdr, sechdrs, secstrings, ".modinfo");
1959 pcpuindex = find_pcpusec(hdr, sechdrs, secstrings); 1955 pcpuindex = find_pcpusec(hdr, sechdrs, secstrings);
1960#ifdef ARCH_UNWIND_SECTION_NAME
1961 unwindex = find_sec(hdr, sechdrs, secstrings, ARCH_UNWIND_SECTION_NAME);
1962#endif
1963 1956
1964 /* Don't keep modinfo and version sections. */ 1957 /* Don't keep modinfo and version sections. */
1965 sechdrs[infoindex].sh_flags &= ~(unsigned long)SHF_ALLOC; 1958 sechdrs[infoindex].sh_flags &= ~(unsigned long)SHF_ALLOC;
@@ -1969,8 +1962,6 @@ static noinline struct module *load_module(void __user *umod,
1969 sechdrs[symindex].sh_flags |= SHF_ALLOC; 1962 sechdrs[symindex].sh_flags |= SHF_ALLOC;
1970 sechdrs[strindex].sh_flags |= SHF_ALLOC; 1963 sechdrs[strindex].sh_flags |= SHF_ALLOC;
1971#endif 1964#endif
1972 if (unwindex)
1973 sechdrs[unwindex].sh_flags |= SHF_ALLOC;
1974 1965
1975 /* Check module struct version now, before we try to use module. */ 1966 /* Check module struct version now, before we try to use module. */
1976 if (!check_modstruct_version(sechdrs, versindex, mod)) { 1967 if (!check_modstruct_version(sechdrs, versindex, mod)) {
@@ -2267,11 +2258,6 @@ static noinline struct module *load_module(void __user *umod,
2267 add_sect_attrs(mod, hdr->e_shnum, secstrings, sechdrs); 2258 add_sect_attrs(mod, hdr->e_shnum, secstrings, sechdrs);
2268 add_notes_attrs(mod, hdr->e_shnum, secstrings, sechdrs); 2259 add_notes_attrs(mod, hdr->e_shnum, secstrings, sechdrs);
2269 2260
2270 /* Size of section 0 is 0, so this works well if no unwind info. */
2271 mod->unwind_info = unwind_add_table(mod,
2272 (void *)sechdrs[unwindex].sh_addr,
2273 sechdrs[unwindex].sh_size);
2274
2275 /* Get rid of temporary copy */ 2261 /* Get rid of temporary copy */
2276 vfree(hdr); 2262 vfree(hdr);
2277 2263
@@ -2366,11 +2352,12 @@ sys_init_module(void __user *umod,
2366 /* Now it's a first class citizen! Wake up anyone waiting for it. */ 2352 /* Now it's a first class citizen! Wake up anyone waiting for it. */
2367 mod->state = MODULE_STATE_LIVE; 2353 mod->state = MODULE_STATE_LIVE;
2368 wake_up(&module_wq); 2354 wake_up(&module_wq);
2355 blocking_notifier_call_chain(&module_notify_list,
2356 MODULE_STATE_LIVE, mod);
2369 2357
2370 mutex_lock(&module_mutex); 2358 mutex_lock(&module_mutex);
2371 /* Drop initial reference. */ 2359 /* Drop initial reference. */
2372 module_put(mod); 2360 module_put(mod);
2373 unwind_remove_table(mod->unwind_info, 1);
2374 module_free(mod, mod->module_init); 2361 module_free(mod, mod->module_init);
2375 mod->module_init = NULL; 2362 mod->module_init = NULL;
2376 mod->init_size = 0; 2363 mod->init_size = 0;
@@ -2405,7 +2392,7 @@ static const char *get_ksymbol(struct module *mod,
2405 unsigned long nextval; 2392 unsigned long nextval;
2406 2393
2407 /* At worse, next value is at end of module */ 2394 /* At worse, next value is at end of module */
2408 if (within(addr, mod->module_init, mod->init_size)) 2395 if (within_module_init(addr, mod))
2409 nextval = (unsigned long)mod->module_init+mod->init_text_size; 2396 nextval = (unsigned long)mod->module_init+mod->init_text_size;
2410 else 2397 else
2411 nextval = (unsigned long)mod->module_core+mod->core_text_size; 2398 nextval = (unsigned long)mod->module_core+mod->core_text_size;
@@ -2453,8 +2440,8 @@ const char *module_address_lookup(unsigned long addr,
2453 2440
2454 preempt_disable(); 2441 preempt_disable();
2455 list_for_each_entry_rcu(mod, &modules, list) { 2442 list_for_each_entry_rcu(mod, &modules, list) {
2456 if (within(addr, mod->module_init, mod->init_size) 2443 if (within_module_init(addr, mod) ||
2457 || within(addr, mod->module_core, mod->core_size)) { 2444 within_module_core(addr, mod)) {
2458 if (modname) 2445 if (modname)
2459 *modname = mod->name; 2446 *modname = mod->name;
2460 ret = get_ksymbol(mod, addr, size, offset); 2447 ret = get_ksymbol(mod, addr, size, offset);
@@ -2476,8 +2463,8 @@ int lookup_module_symbol_name(unsigned long addr, char *symname)
2476 2463
2477 preempt_disable(); 2464 preempt_disable();
2478 list_for_each_entry_rcu(mod, &modules, list) { 2465 list_for_each_entry_rcu(mod, &modules, list) {
2479 if (within(addr, mod->module_init, mod->init_size) || 2466 if (within_module_init(addr, mod) ||
2480 within(addr, mod->module_core, mod->core_size)) { 2467 within_module_core(addr, mod)) {
2481 const char *sym; 2468 const char *sym;
2482 2469
2483 sym = get_ksymbol(mod, addr, NULL, NULL); 2470 sym = get_ksymbol(mod, addr, NULL, NULL);
@@ -2500,8 +2487,8 @@ int lookup_module_symbol_attrs(unsigned long addr, unsigned long *size,
2500 2487
2501 preempt_disable(); 2488 preempt_disable();
2502 list_for_each_entry_rcu(mod, &modules, list) { 2489 list_for_each_entry_rcu(mod, &modules, list) {
2503 if (within(addr, mod->module_init, mod->init_size) || 2490 if (within_module_init(addr, mod) ||
2504 within(addr, mod->module_core, mod->core_size)) { 2491 within_module_core(addr, mod)) {
2505 const char *sym; 2492 const char *sym;
2506 2493
2507 sym = get_ksymbol(mod, addr, size, offset); 2494 sym = get_ksymbol(mod, addr, size, offset);
@@ -2720,7 +2707,7 @@ int is_module_address(unsigned long addr)
2720 preempt_disable(); 2707 preempt_disable();
2721 2708
2722 list_for_each_entry_rcu(mod, &modules, list) { 2709 list_for_each_entry_rcu(mod, &modules, list) {
2723 if (within(addr, mod->module_core, mod->core_size)) { 2710 if (within_module_core(addr, mod)) {
2724 preempt_enable(); 2711 preempt_enable();
2725 return 1; 2712 return 1;
2726 } 2713 }
diff --git a/kernel/panic.c b/kernel/panic.c
index 13f06349a786..2a2ff36ff44d 100644
--- a/kernel/panic.c
+++ b/kernel/panic.c
@@ -299,6 +299,8 @@ static int init_oops_id(void)
299{ 299{
300 if (!oops_id) 300 if (!oops_id)
301 get_random_bytes(&oops_id, sizeof(oops_id)); 301 get_random_bytes(&oops_id, sizeof(oops_id));
302 else
303 oops_id++;
302 304
303 return 0; 305 return 0;
304} 306}
diff --git a/kernel/profile.c b/kernel/profile.c
index d18e2d2654f2..784933acf5b8 100644
--- a/kernel/profile.c
+++ b/kernel/profile.c
@@ -445,7 +445,6 @@ void profile_tick(int type)
445#ifdef CONFIG_PROC_FS 445#ifdef CONFIG_PROC_FS
446#include <linux/proc_fs.h> 446#include <linux/proc_fs.h>
447#include <asm/uaccess.h> 447#include <asm/uaccess.h>
448#include <asm/ptrace.h>
449 448
450static int prof_cpu_mask_read_proc(char *page, char **start, off_t off, 449static int prof_cpu_mask_read_proc(char *page, char **start, off_t off,
451 int count, int *eof, void *data) 450 int count, int *eof, void *data)
diff --git a/kernel/signal.c b/kernel/signal.c
index 8e95855ff3cf..3152ac3b62e2 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -858,7 +858,8 @@ static int send_signal(int sig, struct siginfo *info, struct task_struct *t,
858 q->info.si_signo = sig; 858 q->info.si_signo = sig;
859 q->info.si_errno = 0; 859 q->info.si_errno = 0;
860 q->info.si_code = SI_USER; 860 q->info.si_code = SI_USER;
861 q->info.si_pid = task_pid_vnr(current); 861 q->info.si_pid = task_tgid_nr_ns(current,
862 task_active_pid_ns(t));
862 q->info.si_uid = current_uid(); 863 q->info.si_uid = current_uid();
863 break; 864 break;
864 case (unsigned long) SEND_SIG_PRIV: 865 case (unsigned long) SEND_SIG_PRIV:
diff --git a/kernel/sys.c b/kernel/sys.c
index d356d79e84ac..4a43617cd565 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -33,6 +33,7 @@
33#include <linux/task_io_accounting_ops.h> 33#include <linux/task_io_accounting_ops.h>
34#include <linux/seccomp.h> 34#include <linux/seccomp.h>
35#include <linux/cpu.h> 35#include <linux/cpu.h>
36#include <linux/ptrace.h>
36 37
37#include <linux/compat.h> 38#include <linux/compat.h>
38#include <linux/syscalls.h> 39#include <linux/syscalls.h>
@@ -927,6 +928,7 @@ asmlinkage long sys_times(struct tms __user * tbuf)
927 if (copy_to_user(tbuf, &tmp, sizeof(struct tms))) 928 if (copy_to_user(tbuf, &tmp, sizeof(struct tms)))
928 return -EFAULT; 929 return -EFAULT;
929 } 930 }
931 force_successful_syscall_return();
930 return (long) jiffies_64_to_clock_t(get_jiffies_64()); 932 return (long) jiffies_64_to_clock_t(get_jiffies_64());
931} 933}
932 934
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index ff6d45c7626f..92f6e5bc3c24 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -87,10 +87,6 @@ extern int rcutorture_runnable;
87#endif /* #ifdef CONFIG_RCU_TORTURE_TEST */ 87#endif /* #ifdef CONFIG_RCU_TORTURE_TEST */
88 88
89/* Constants used for minimum and maximum */ 89/* Constants used for minimum and maximum */
90#if defined(CONFIG_HIGHMEM) || defined(CONFIG_DETECT_SOFTLOCKUP)
91static int one = 1;
92#endif
93
94#ifdef CONFIG_DETECT_SOFTLOCKUP 90#ifdef CONFIG_DETECT_SOFTLOCKUP
95static int sixty = 60; 91static int sixty = 60;
96static int neg_one = -1; 92static int neg_one = -1;
@@ -101,6 +97,7 @@ static int two = 2;
101#endif 97#endif
102 98
103static int zero; 99static int zero;
100static int one = 1;
104static int one_hundred = 100; 101static int one_hundred = 100;
105 102
106/* this is needed for the proc_dointvec_minmax for [fs_]overflow UID and GID */ 103/* this is needed for the proc_dointvec_minmax for [fs_]overflow UID and GID */
@@ -952,12 +949,22 @@ static struct ctl_table vm_table[] = {
952 .data = &dirty_background_ratio, 949 .data = &dirty_background_ratio,
953 .maxlen = sizeof(dirty_background_ratio), 950 .maxlen = sizeof(dirty_background_ratio),
954 .mode = 0644, 951 .mode = 0644,
955 .proc_handler = &proc_dointvec_minmax, 952 .proc_handler = &dirty_background_ratio_handler,
956 .strategy = &sysctl_intvec, 953 .strategy = &sysctl_intvec,
957 .extra1 = &zero, 954 .extra1 = &zero,
958 .extra2 = &one_hundred, 955 .extra2 = &one_hundred,
959 }, 956 },
960 { 957 {
958 .ctl_name = CTL_UNNUMBERED,
959 .procname = "dirty_background_bytes",
960 .data = &dirty_background_bytes,
961 .maxlen = sizeof(dirty_background_bytes),
962 .mode = 0644,
963 .proc_handler = &dirty_background_bytes_handler,
964 .strategy = &sysctl_intvec,
965 .extra1 = &one,
966 },
967 {
961 .ctl_name = VM_DIRTY_RATIO, 968 .ctl_name = VM_DIRTY_RATIO,
962 .procname = "dirty_ratio", 969 .procname = "dirty_ratio",
963 .data = &vm_dirty_ratio, 970 .data = &vm_dirty_ratio,
@@ -969,6 +976,16 @@ static struct ctl_table vm_table[] = {
969 .extra2 = &one_hundred, 976 .extra2 = &one_hundred,
970 }, 977 },
971 { 978 {
979 .ctl_name = CTL_UNNUMBERED,
980 .procname = "dirty_bytes",
981 .data = &vm_dirty_bytes,
982 .maxlen = sizeof(vm_dirty_bytes),
983 .mode = 0644,
984 .proc_handler = &dirty_bytes_handler,
985 .strategy = &sysctl_intvec,
986 .extra1 = &one,
987 },
988 {
972 .procname = "dirty_writeback_centisecs", 989 .procname = "dirty_writeback_centisecs",
973 .data = &dirty_writeback_interval, 990 .data = &dirty_writeback_interval,
974 .maxlen = sizeof(dirty_writeback_interval), 991 .maxlen = sizeof(dirty_writeback_interval),
diff --git a/kernel/test_kprobes.c b/kernel/test_kprobes.c
index 06b6395b45b2..4f104515a19b 100644
--- a/kernel/test_kprobes.c
+++ b/kernel/test_kprobes.c
@@ -22,21 +22,11 @@
22 22
23static u32 rand1, preh_val, posth_val, jph_val; 23static u32 rand1, preh_val, posth_val, jph_val;
24static int errors, handler_errors, num_tests; 24static int errors, handler_errors, num_tests;
25static u32 (*target)(u32 value);
26static u32 (*target2)(u32 value);
25 27
26static noinline u32 kprobe_target(u32 value) 28static noinline u32 kprobe_target(u32 value)
27{ 29{
28 /*
29 * gcc ignores noinline on some architectures unless we stuff
30 * sufficient lard into the function. The get_kprobe() here is
31 * just for that.
32 *
33 * NOTE: We aren't concerned about the correctness of get_kprobe()
34 * here; hence, this call is neither under !preempt nor with the
35 * kprobe_mutex held. This is fine(tm)
36 */
37 if (get_kprobe((void *)0xdeadbeef))
38 printk(KERN_INFO "Kprobe smoke test: probe on 0xdeadbeef!\n");
39
40 return (value / div_factor); 30 return (value / div_factor);
41} 31}
42 32
@@ -74,7 +64,7 @@ static int test_kprobe(void)
74 return ret; 64 return ret;
75 } 65 }
76 66
77 ret = kprobe_target(rand1); 67 ret = target(rand1);
78 unregister_kprobe(&kp); 68 unregister_kprobe(&kp);
79 69
80 if (preh_val == 0) { 70 if (preh_val == 0) {
@@ -92,6 +82,84 @@ static int test_kprobe(void)
92 return 0; 82 return 0;
93} 83}
94 84
85static noinline u32 kprobe_target2(u32 value)
86{
87 return (value / div_factor) + 1;
88}
89
90static int kp_pre_handler2(struct kprobe *p, struct pt_regs *regs)
91{
92 preh_val = (rand1 / div_factor) + 1;
93 return 0;
94}
95
96static void kp_post_handler2(struct kprobe *p, struct pt_regs *regs,
97 unsigned long flags)
98{
99 if (preh_val != (rand1 / div_factor) + 1) {
100 handler_errors++;
101 printk(KERN_ERR "Kprobe smoke test failed: "
102 "incorrect value in post_handler2\n");
103 }
104 posth_val = preh_val + div_factor;
105}
106
107static struct kprobe kp2 = {
108 .symbol_name = "kprobe_target2",
109 .pre_handler = kp_pre_handler2,
110 .post_handler = kp_post_handler2
111};
112
113static int test_kprobes(void)
114{
115 int ret;
116 struct kprobe *kps[2] = {&kp, &kp2};
117
118 kp.addr = 0; /* addr should be cleard for reusing kprobe. */
119 ret = register_kprobes(kps, 2);
120 if (ret < 0) {
121 printk(KERN_ERR "Kprobe smoke test failed: "
122 "register_kprobes returned %d\n", ret);
123 return ret;
124 }
125
126 preh_val = 0;
127 posth_val = 0;
128 ret = target(rand1);
129
130 if (preh_val == 0) {
131 printk(KERN_ERR "Kprobe smoke test failed: "
132 "kprobe pre_handler not called\n");
133 handler_errors++;
134 }
135
136 if (posth_val == 0) {
137 printk(KERN_ERR "Kprobe smoke test failed: "
138 "kprobe post_handler not called\n");
139 handler_errors++;
140 }
141
142 preh_val = 0;
143 posth_val = 0;
144 ret = target2(rand1);
145
146 if (preh_val == 0) {
147 printk(KERN_ERR "Kprobe smoke test failed: "
148 "kprobe pre_handler2 not called\n");
149 handler_errors++;
150 }
151
152 if (posth_val == 0) {
153 printk(KERN_ERR "Kprobe smoke test failed: "
154 "kprobe post_handler2 not called\n");
155 handler_errors++;
156 }
157
158 unregister_kprobes(kps, 2);
159 return 0;
160
161}
162
95static u32 j_kprobe_target(u32 value) 163static u32 j_kprobe_target(u32 value)
96{ 164{
97 if (value != rand1) { 165 if (value != rand1) {
@@ -121,7 +189,7 @@ static int test_jprobe(void)
121 return ret; 189 return ret;
122 } 190 }
123 191
124 ret = kprobe_target(rand1); 192 ret = target(rand1);
125 unregister_jprobe(&jp); 193 unregister_jprobe(&jp);
126 if (jph_val == 0) { 194 if (jph_val == 0) {
127 printk(KERN_ERR "Kprobe smoke test failed: " 195 printk(KERN_ERR "Kprobe smoke test failed: "
@@ -132,6 +200,43 @@ static int test_jprobe(void)
132 return 0; 200 return 0;
133} 201}
134 202
203static struct jprobe jp2 = {
204 .entry = j_kprobe_target,
205 .kp.symbol_name = "kprobe_target2"
206};
207
208static int test_jprobes(void)
209{
210 int ret;
211 struct jprobe *jps[2] = {&jp, &jp2};
212
213 jp.kp.addr = 0; /* addr should be cleard for reusing kprobe. */
214 ret = register_jprobes(jps, 2);
215 if (ret < 0) {
216 printk(KERN_ERR "Kprobe smoke test failed: "
217 "register_jprobes returned %d\n", ret);
218 return ret;
219 }
220
221 jph_val = 0;
222 ret = target(rand1);
223 if (jph_val == 0) {
224 printk(KERN_ERR "Kprobe smoke test failed: "
225 "jprobe handler not called\n");
226 handler_errors++;
227 }
228
229 jph_val = 0;
230 ret = target2(rand1);
231 if (jph_val == 0) {
232 printk(KERN_ERR "Kprobe smoke test failed: "
233 "jprobe handler2 not called\n");
234 handler_errors++;
235 }
236 unregister_jprobes(jps, 2);
237
238 return 0;
239}
135#ifdef CONFIG_KRETPROBES 240#ifdef CONFIG_KRETPROBES
136static u32 krph_val; 241static u32 krph_val;
137 242
@@ -177,7 +282,7 @@ static int test_kretprobe(void)
177 return ret; 282 return ret;
178 } 283 }
179 284
180 ret = kprobe_target(rand1); 285 ret = target(rand1);
181 unregister_kretprobe(&rp); 286 unregister_kretprobe(&rp);
182 if (krph_val != rand1) { 287 if (krph_val != rand1) {
183 printk(KERN_ERR "Kprobe smoke test failed: " 288 printk(KERN_ERR "Kprobe smoke test failed: "
@@ -187,12 +292,72 @@ static int test_kretprobe(void)
187 292
188 return 0; 293 return 0;
189} 294}
295
296static int return_handler2(struct kretprobe_instance *ri, struct pt_regs *regs)
297{
298 unsigned long ret = regs_return_value(regs);
299
300 if (ret != (rand1 / div_factor) + 1) {
301 handler_errors++;
302 printk(KERN_ERR "Kprobe smoke test failed: "
303 "incorrect value in kretprobe handler2\n");
304 }
305 if (krph_val == 0) {
306 handler_errors++;
307 printk(KERN_ERR "Kprobe smoke test failed: "
308 "call to kretprobe entry handler failed\n");
309 }
310
311 krph_val = rand1;
312 return 0;
313}
314
315static struct kretprobe rp2 = {
316 .handler = return_handler2,
317 .entry_handler = entry_handler,
318 .kp.symbol_name = "kprobe_target2"
319};
320
321static int test_kretprobes(void)
322{
323 int ret;
324 struct kretprobe *rps[2] = {&rp, &rp2};
325
326 rp.kp.addr = 0; /* addr should be cleard for reusing kprobe. */
327 ret = register_kretprobes(rps, 2);
328 if (ret < 0) {
329 printk(KERN_ERR "Kprobe smoke test failed: "
330 "register_kretprobe returned %d\n", ret);
331 return ret;
332 }
333
334 krph_val = 0;
335 ret = target(rand1);
336 if (krph_val != rand1) {
337 printk(KERN_ERR "Kprobe smoke test failed: "
338 "kretprobe handler not called\n");
339 handler_errors++;
340 }
341
342 krph_val = 0;
343 ret = target2(rand1);
344 if (krph_val != rand1) {
345 printk(KERN_ERR "Kprobe smoke test failed: "
346 "kretprobe handler2 not called\n");
347 handler_errors++;
348 }
349 unregister_kretprobes(rps, 2);
350 return 0;
351}
190#endif /* CONFIG_KRETPROBES */ 352#endif /* CONFIG_KRETPROBES */
191 353
192int init_test_probes(void) 354int init_test_probes(void)
193{ 355{
194 int ret; 356 int ret;
195 357
358 target = kprobe_target;
359 target2 = kprobe_target2;
360
196 do { 361 do {
197 rand1 = random32(); 362 rand1 = random32();
198 } while (rand1 <= div_factor); 363 } while (rand1 <= div_factor);
@@ -204,15 +369,30 @@ int init_test_probes(void)
204 errors++; 369 errors++;
205 370
206 num_tests++; 371 num_tests++;
372 ret = test_kprobes();
373 if (ret < 0)
374 errors++;
375
376 num_tests++;
207 ret = test_jprobe(); 377 ret = test_jprobe();
208 if (ret < 0) 378 if (ret < 0)
209 errors++; 379 errors++;
210 380
381 num_tests++;
382 ret = test_jprobes();
383 if (ret < 0)
384 errors++;
385
211#ifdef CONFIG_KRETPROBES 386#ifdef CONFIG_KRETPROBES
212 num_tests++; 387 num_tests++;
213 ret = test_kretprobe(); 388 ret = test_kretprobe();
214 if (ret < 0) 389 if (ret < 0)
215 errors++; 390 errors++;
391
392 num_tests++;
393 ret = test_kretprobes();
394 if (ret < 0)
395 errors++;
216#endif /* CONFIG_KRETPROBES */ 396#endif /* CONFIG_KRETPROBES */
217 397
218 if (errors) 398 if (errors)
diff --git a/kernel/time.c b/kernel/time.c
index d63a4336fad6..4886e3ce83a4 100644
--- a/kernel/time.c
+++ b/kernel/time.c
@@ -37,6 +37,7 @@
37#include <linux/fs.h> 37#include <linux/fs.h>
38#include <linux/slab.h> 38#include <linux/slab.h>
39#include <linux/math64.h> 39#include <linux/math64.h>
40#include <linux/ptrace.h>
40 41
41#include <asm/uaccess.h> 42#include <asm/uaccess.h>
42#include <asm/unistd.h> 43#include <asm/unistd.h>
@@ -65,8 +66,9 @@ asmlinkage long sys_time(time_t __user * tloc)
65 66
66 if (tloc) { 67 if (tloc) {
67 if (put_user(i,tloc)) 68 if (put_user(i,tloc))
68 i = -EFAULT; 69 return -EFAULT;
69 } 70 }
71 force_successful_syscall_return();
70 return i; 72 return i;
71} 73}
72 74
diff --git a/kernel/tsacct.c b/kernel/tsacct.c
index 2dc06ab35716..43f891b05a4b 100644
--- a/kernel/tsacct.c
+++ b/kernel/tsacct.c
@@ -92,8 +92,8 @@ void xacct_add_tsk(struct taskstats *stats, struct task_struct *p)
92 mm = get_task_mm(p); 92 mm = get_task_mm(p);
93 if (mm) { 93 if (mm) {
94 /* adjust to KB unit */ 94 /* adjust to KB unit */
95 stats->hiwater_rss = mm->hiwater_rss * PAGE_SIZE / KB; 95 stats->hiwater_rss = get_mm_hiwater_rss(mm) * PAGE_SIZE / KB;
96 stats->hiwater_vm = mm->hiwater_vm * PAGE_SIZE / KB; 96 stats->hiwater_vm = get_mm_hiwater_vm(mm) * PAGE_SIZE / KB;
97 mmput(mm); 97 mmput(mm);
98 } 98 }
99 stats->read_char = p->ioac.rchar; 99 stats->read_char = p->ioac.rchar;
diff --git a/lib/bust_spinlocks.c b/lib/bust_spinlocks.c
index 486da62b2b07..9681d54b95d1 100644
--- a/lib/bust_spinlocks.c
+++ b/lib/bust_spinlocks.c
@@ -12,6 +12,7 @@
12#include <linux/tty.h> 12#include <linux/tty.h>
13#include <linux/wait.h> 13#include <linux/wait.h>
14#include <linux/vt_kern.h> 14#include <linux/vt_kern.h>
15#include <linux/console.h>
15 16
16 17
17void __attribute__((weak)) bust_spinlocks(int yes) 18void __attribute__((weak)) bust_spinlocks(int yes)
@@ -22,6 +23,7 @@ void __attribute__((weak)) bust_spinlocks(int yes)
22#ifdef CONFIG_VT 23#ifdef CONFIG_VT
23 unblank_screen(); 24 unblank_screen();
24#endif 25#endif
26 console_unblank();
25 if (--oops_in_progress == 0) 27 if (--oops_in_progress == 0)
26 wake_up_klogd(); 28 wake_up_klogd();
27 } 29 }
diff --git a/lib/fault-inject.c b/lib/fault-inject.c
index a50a311554cc..f97af55bdd96 100644
--- a/lib/fault-inject.c
+++ b/lib/fault-inject.c
@@ -6,7 +6,6 @@
6#include <linux/fs.h> 6#include <linux/fs.h>
7#include <linux/module.h> 7#include <linux/module.h>
8#include <linux/interrupt.h> 8#include <linux/interrupt.h>
9#include <linux/unwind.h>
10#include <linux/stacktrace.h> 9#include <linux/stacktrace.h>
11#include <linux/kallsyms.h> 10#include <linux/kallsyms.h>
12#include <linux/fault-inject.h> 11#include <linux/fault-inject.h>
diff --git a/lib/percpu_counter.c b/lib/percpu_counter.c
index b255b939bc1b..a60bd8046095 100644
--- a/lib/percpu_counter.c
+++ b/lib/percpu_counter.c
@@ -9,10 +9,8 @@
9#include <linux/cpu.h> 9#include <linux/cpu.h>
10#include <linux/module.h> 10#include <linux/module.h>
11 11
12#ifdef CONFIG_HOTPLUG_CPU
13static LIST_HEAD(percpu_counters); 12static LIST_HEAD(percpu_counters);
14static DEFINE_MUTEX(percpu_counters_lock); 13static DEFINE_MUTEX(percpu_counters_lock);
15#endif
16 14
17void percpu_counter_set(struct percpu_counter *fbc, s64 amount) 15void percpu_counter_set(struct percpu_counter *fbc, s64 amount)
18{ 16{
@@ -111,13 +109,24 @@ void percpu_counter_destroy(struct percpu_counter *fbc)
111} 109}
112EXPORT_SYMBOL(percpu_counter_destroy); 110EXPORT_SYMBOL(percpu_counter_destroy);
113 111
114#ifdef CONFIG_HOTPLUG_CPU 112int percpu_counter_batch __read_mostly = 32;
113EXPORT_SYMBOL(percpu_counter_batch);
114
115static void compute_batch_value(void)
116{
117 int nr = num_online_cpus();
118
119 percpu_counter_batch = max(32, nr*2);
120}
121
115static int __cpuinit percpu_counter_hotcpu_callback(struct notifier_block *nb, 122static int __cpuinit percpu_counter_hotcpu_callback(struct notifier_block *nb,
116 unsigned long action, void *hcpu) 123 unsigned long action, void *hcpu)
117{ 124{
125#ifdef CONFIG_HOTPLUG_CPU
118 unsigned int cpu; 126 unsigned int cpu;
119 struct percpu_counter *fbc; 127 struct percpu_counter *fbc;
120 128
129 compute_batch_value();
121 if (action != CPU_DEAD) 130 if (action != CPU_DEAD)
122 return NOTIFY_OK; 131 return NOTIFY_OK;
123 132
@@ -134,13 +143,14 @@ static int __cpuinit percpu_counter_hotcpu_callback(struct notifier_block *nb,
134 spin_unlock_irqrestore(&fbc->lock, flags); 143 spin_unlock_irqrestore(&fbc->lock, flags);
135 } 144 }
136 mutex_unlock(&percpu_counters_lock); 145 mutex_unlock(&percpu_counters_lock);
146#endif
137 return NOTIFY_OK; 147 return NOTIFY_OK;
138} 148}
139 149
140static int __init percpu_counter_startup(void) 150static int __init percpu_counter_startup(void)
141{ 151{
152 compute_batch_value();
142 hotcpu_notifier(percpu_counter_hotcpu_callback, 0); 153 hotcpu_notifier(percpu_counter_hotcpu_callback, 0);
143 return 0; 154 return 0;
144} 155}
145module_init(percpu_counter_startup); 156module_init(percpu_counter_startup);
146#endif
diff --git a/lib/prio_heap.c b/lib/prio_heap.c
index 471944a54e23..a7af6f85eca8 100644
--- a/lib/prio_heap.c
+++ b/lib/prio_heap.c
@@ -31,7 +31,7 @@ void *heap_insert(struct ptr_heap *heap, void *p)
31 31
32 if (heap->size < heap->max) { 32 if (heap->size < heap->max) {
33 /* Heap insertion */ 33 /* Heap insertion */
34 int pos = heap->size++; 34 pos = heap->size++;
35 while (pos > 0 && heap->gt(p, ptrs[(pos-1)/2])) { 35 while (pos > 0 && heap->gt(p, ptrs[(pos-1)/2])) {
36 ptrs[pos] = ptrs[(pos-1)/2]; 36 ptrs[pos] = ptrs[(pos-1)/2];
37 pos = (pos-1)/2; 37 pos = (pos-1)/2;
diff --git a/lib/proportions.c b/lib/proportions.c
index 4f387a643d72..3fda810faf0d 100644
--- a/lib/proportions.c
+++ b/lib/proportions.c
@@ -147,6 +147,7 @@ out:
147 * this is used to track the active references. 147 * this is used to track the active references.
148 */ 148 */
149static struct prop_global *prop_get_global(struct prop_descriptor *pd) 149static struct prop_global *prop_get_global(struct prop_descriptor *pd)
150__acquires(RCU)
150{ 151{
151 int index; 152 int index;
152 153
@@ -160,6 +161,7 @@ static struct prop_global *prop_get_global(struct prop_descriptor *pd)
160} 161}
161 162
162static void prop_put_global(struct prop_descriptor *pd, struct prop_global *pg) 163static void prop_put_global(struct prop_descriptor *pd, struct prop_global *pg)
164__releases(RCU)
163{ 165{
164 rcu_read_unlock(); 166 rcu_read_unlock();
165} 167}
diff --git a/lib/radix-tree.c b/lib/radix-tree.c
index be86b32bc874..8d3fb0bd1288 100644
--- a/lib/radix-tree.c
+++ b/lib/radix-tree.c
@@ -81,7 +81,7 @@ struct radix_tree_preload {
81 int nr; 81 int nr;
82 struct radix_tree_node *nodes[RADIX_TREE_MAX_PATH]; 82 struct radix_tree_node *nodes[RADIX_TREE_MAX_PATH];
83}; 83};
84DEFINE_PER_CPU(struct radix_tree_preload, radix_tree_preloads) = { 0, }; 84static DEFINE_PER_CPU(struct radix_tree_preload, radix_tree_preloads) = { 0, };
85 85
86static inline gfp_t root_gfp_mask(struct radix_tree_root *root) 86static inline gfp_t root_gfp_mask(struct radix_tree_root *root)
87{ 87{
diff --git a/lib/vsprintf.c b/lib/vsprintf.c
index 98d632277ca8..0fbd0121d91d 100644
--- a/lib/vsprintf.c
+++ b/lib/vsprintf.c
@@ -170,6 +170,8 @@ int strict_strtoul(const char *cp, unsigned int base, unsigned long *res)
170 return -EINVAL; 170 return -EINVAL;
171 171
172 val = simple_strtoul(cp, &tail, base); 172 val = simple_strtoul(cp, &tail, base);
173 if (tail == cp)
174 return -EINVAL;
173 if ((*tail == '\0') || 175 if ((*tail == '\0') ||
174 ((len == (size_t)(tail - cp) + 1) && (*tail == '\n'))) { 176 ((len == (size_t)(tail - cp) + 1) && (*tail == '\n'))) {
175 *res = val; 177 *res = val;
@@ -241,6 +243,8 @@ int strict_strtoull(const char *cp, unsigned int base, unsigned long long *res)
241 return -EINVAL; 243 return -EINVAL;
242 244
243 val = simple_strtoull(cp, &tail, base); 245 val = simple_strtoull(cp, &tail, base);
246 if (tail == cp)
247 return -EINVAL;
244 if ((*tail == '\0') || 248 if ((*tail == '\0') ||
245 ((len == (size_t)(tail - cp) + 1) && (*tail == '\n'))) { 249 ((len == (size_t)(tail - cp) + 1) && (*tail == '\n'))) {
246 *res = val; 250 *res = val;
diff --git a/mm/Kconfig b/mm/Kconfig
index 5b5790f8a816..a5b77811fdf2 100644
--- a/mm/Kconfig
+++ b/mm/Kconfig
@@ -181,12 +181,6 @@ config MIGRATION
181 example on NUMA systems to put pages nearer to the processors accessing 181 example on NUMA systems to put pages nearer to the processors accessing
182 the page. 182 the page.
183 183
184config RESOURCES_64BIT
185 bool "64 bit Memory and IO resources (EXPERIMENTAL)" if (!64BIT && EXPERIMENTAL)
186 default 64BIT
187 help
188 This option allows memory and IO resources to be 64 bit.
189
190config PHYS_ADDR_T_64BIT 184config PHYS_ADDR_T_64BIT
191 def_bool 64BIT || ARCH_PHYS_ADDR_T_64BIT 185 def_bool 64BIT || ARCH_PHYS_ADDR_T_64BIT
192 186
diff --git a/mm/Makefile b/mm/Makefile
index 51c27709cc7c..72255be57f89 100644
--- a/mm/Makefile
+++ b/mm/Makefile
@@ -9,7 +9,7 @@ mmu-$(CONFIG_MMU) := fremap.o highmem.o madvise.o memory.o mincore.o \
9 9
10obj-y := bootmem.o filemap.o mempool.o oom_kill.o fadvise.o \ 10obj-y := bootmem.o filemap.o mempool.o oom_kill.o fadvise.o \
11 maccess.o page_alloc.o page-writeback.o pdflush.o \ 11 maccess.o page_alloc.o page-writeback.o pdflush.o \
12 readahead.o swap.o truncate.o vmscan.o \ 12 readahead.o swap.o truncate.o vmscan.o shmem.o \
13 prio_tree.o util.o mmzone.o vmstat.o backing-dev.o \ 13 prio_tree.o util.o mmzone.o vmstat.o backing-dev.o \
14 page_isolation.o mm_init.o $(mmu-y) 14 page_isolation.o mm_init.o $(mmu-y)
15 15
@@ -21,9 +21,7 @@ obj-$(CONFIG_HUGETLBFS) += hugetlb.o
21obj-$(CONFIG_NUMA) += mempolicy.o 21obj-$(CONFIG_NUMA) += mempolicy.o
22obj-$(CONFIG_SPARSEMEM) += sparse.o 22obj-$(CONFIG_SPARSEMEM) += sparse.o
23obj-$(CONFIG_SPARSEMEM_VMEMMAP) += sparse-vmemmap.o 23obj-$(CONFIG_SPARSEMEM_VMEMMAP) += sparse-vmemmap.o
24obj-$(CONFIG_SHMEM) += shmem.o
25obj-$(CONFIG_TMPFS_POSIX_ACL) += shmem_acl.o 24obj-$(CONFIG_TMPFS_POSIX_ACL) += shmem_acl.o
26obj-$(CONFIG_TINY_SHMEM) += tiny-shmem.o
27obj-$(CONFIG_SLOB) += slob.o 25obj-$(CONFIG_SLOB) += slob.o
28obj-$(CONFIG_MMU_NOTIFIER) += mmu_notifier.o 26obj-$(CONFIG_MMU_NOTIFIER) += mmu_notifier.o
29obj-$(CONFIG_SLAB) += slab.o 27obj-$(CONFIG_SLAB) += slab.o
diff --git a/mm/backing-dev.c b/mm/backing-dev.c
index 801c08b046e6..6f80beddd8a4 100644
--- a/mm/backing-dev.c
+++ b/mm/backing-dev.c
@@ -24,9 +24,9 @@ static void bdi_debug_init(void)
24static int bdi_debug_stats_show(struct seq_file *m, void *v) 24static int bdi_debug_stats_show(struct seq_file *m, void *v)
25{ 25{
26 struct backing_dev_info *bdi = m->private; 26 struct backing_dev_info *bdi = m->private;
27 long background_thresh; 27 unsigned long background_thresh;
28 long dirty_thresh; 28 unsigned long dirty_thresh;
29 long bdi_thresh; 29 unsigned long bdi_thresh;
30 30
31 get_dirty_limits(&background_thresh, &dirty_thresh, &bdi_thresh, bdi); 31 get_dirty_limits(&background_thresh, &dirty_thresh, &bdi_thresh, bdi);
32 32
diff --git a/mm/bootmem.c b/mm/bootmem.c
index ac5a891f142a..51a0ccf61e0e 100644
--- a/mm/bootmem.c
+++ b/mm/bootmem.c
@@ -435,6 +435,10 @@ static void * __init alloc_bootmem_core(struct bootmem_data *bdata,
435 unsigned long fallback = 0; 435 unsigned long fallback = 0;
436 unsigned long min, max, start, sidx, midx, step; 436 unsigned long min, max, start, sidx, midx, step;
437 437
438 bdebug("nid=%td size=%lx [%lu pages] align=%lx goal=%lx limit=%lx\n",
439 bdata - bootmem_node_data, size, PAGE_ALIGN(size) >> PAGE_SHIFT,
440 align, goal, limit);
441
438 BUG_ON(!size); 442 BUG_ON(!size);
439 BUG_ON(align & (align - 1)); 443 BUG_ON(align & (align - 1));
440 BUG_ON(limit && goal + size > limit); 444 BUG_ON(limit && goal + size > limit);
@@ -442,10 +446,6 @@ static void * __init alloc_bootmem_core(struct bootmem_data *bdata,
442 if (!bdata->node_bootmem_map) 446 if (!bdata->node_bootmem_map)
443 return NULL; 447 return NULL;
444 448
445 bdebug("nid=%td size=%lx [%lu pages] align=%lx goal=%lx limit=%lx\n",
446 bdata - bootmem_node_data, size, PAGE_ALIGN(size) >> PAGE_SHIFT,
447 align, goal, limit);
448
449 min = bdata->node_min_pfn; 449 min = bdata->node_min_pfn;
450 max = bdata->node_low_pfn; 450 max = bdata->node_low_pfn;
451 451
diff --git a/mm/filemap.c b/mm/filemap.c
index f5769b4dc075..2f55a1e2baf7 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -210,7 +210,7 @@ int __filemap_fdatawrite_range(struct address_space *mapping, loff_t start,
210 int ret; 210 int ret;
211 struct writeback_control wbc = { 211 struct writeback_control wbc = {
212 .sync_mode = sync_mode, 212 .sync_mode = sync_mode,
213 .nr_to_write = mapping->nrpages * 2, 213 .nr_to_write = LONG_MAX,
214 .range_start = start, 214 .range_start = start,
215 .range_end = end, 215 .range_end = end,
216 }; 216 };
@@ -741,7 +741,14 @@ repeat:
741 page = __page_cache_alloc(gfp_mask); 741 page = __page_cache_alloc(gfp_mask);
742 if (!page) 742 if (!page)
743 return NULL; 743 return NULL;
744 err = add_to_page_cache_lru(page, mapping, index, gfp_mask); 744 /*
745 * We want a regular kernel memory (not highmem or DMA etc)
746 * allocation for the radix tree nodes, but we need to honour
747 * the context-specific requirements the caller has asked for.
748 * GFP_RECLAIM_MASK collects those requirements.
749 */
750 err = add_to_page_cache_lru(page, mapping, index,
751 (gfp_mask & GFP_RECLAIM_MASK));
745 if (unlikely(err)) { 752 if (unlikely(err)) {
746 page_cache_release(page); 753 page_cache_release(page);
747 page = NULL; 754 page = NULL;
@@ -950,7 +957,7 @@ grab_cache_page_nowait(struct address_space *mapping, pgoff_t index)
950 return NULL; 957 return NULL;
951 } 958 }
952 page = __page_cache_alloc(mapping_gfp_mask(mapping) & ~__GFP_FS); 959 page = __page_cache_alloc(mapping_gfp_mask(mapping) & ~__GFP_FS);
953 if (page && add_to_page_cache_lru(page, mapping, index, GFP_KERNEL)) { 960 if (page && add_to_page_cache_lru(page, mapping, index, GFP_NOFS)) {
954 page_cache_release(page); 961 page_cache_release(page);
955 page = NULL; 962 page = NULL;
956 } 963 }
@@ -1317,7 +1324,8 @@ generic_file_aio_read(struct kiocb *iocb, const struct iovec *iov,
1317 goto out; /* skip atime */ 1324 goto out; /* skip atime */
1318 size = i_size_read(inode); 1325 size = i_size_read(inode);
1319 if (pos < size) { 1326 if (pos < size) {
1320 retval = filemap_write_and_wait(mapping); 1327 retval = filemap_write_and_wait_range(mapping, pos,
1328 pos + iov_length(iov, nr_segs) - 1);
1321 if (!retval) { 1329 if (!retval) {
1322 retval = mapping->a_ops->direct_IO(READ, iocb, 1330 retval = mapping->a_ops->direct_IO(READ, iocb,
1323 iov, pos, nr_segs); 1331 iov, pos, nr_segs);
@@ -1530,7 +1538,6 @@ retry_find:
1530 /* 1538 /*
1531 * Found the page and have a reference on it. 1539 * Found the page and have a reference on it.
1532 */ 1540 */
1533 mark_page_accessed(page);
1534 ra->prev_pos = (loff_t)page->index << PAGE_CACHE_SHIFT; 1541 ra->prev_pos = (loff_t)page->index << PAGE_CACHE_SHIFT;
1535 vmf->page = page; 1542 vmf->page = page;
1536 return ret | VM_FAULT_LOCKED; 1543 return ret | VM_FAULT_LOCKED;
@@ -2060,18 +2067,10 @@ generic_file_direct_write(struct kiocb *iocb, const struct iovec *iov,
2060 if (count != ocount) 2067 if (count != ocount)
2061 *nr_segs = iov_shorten((struct iovec *)iov, *nr_segs, count); 2068 *nr_segs = iov_shorten((struct iovec *)iov, *nr_segs, count);
2062 2069
2063 /*
2064 * Unmap all mmappings of the file up-front.
2065 *
2066 * This will cause any pte dirty bits to be propagated into the
2067 * pageframes for the subsequent filemap_write_and_wait().
2068 */
2069 write_len = iov_length(iov, *nr_segs); 2070 write_len = iov_length(iov, *nr_segs);
2070 end = (pos + write_len - 1) >> PAGE_CACHE_SHIFT; 2071 end = (pos + write_len - 1) >> PAGE_CACHE_SHIFT;
2071 if (mapping_mapped(mapping))
2072 unmap_mapping_range(mapping, pos, write_len, 0);
2073 2072
2074 written = filemap_write_and_wait(mapping); 2073 written = filemap_write_and_wait_range(mapping, pos, pos + write_len - 1);
2075 if (written) 2074 if (written)
2076 goto out; 2075 goto out;
2077 2076
@@ -2291,7 +2290,8 @@ generic_file_buffered_write(struct kiocb *iocb, const struct iovec *iov,
2291 * the file data here, to try to honour O_DIRECT expectations. 2290 * the file data here, to try to honour O_DIRECT expectations.
2292 */ 2291 */
2293 if (unlikely(file->f_flags & O_DIRECT) && written) 2292 if (unlikely(file->f_flags & O_DIRECT) && written)
2294 status = filemap_write_and_wait(mapping); 2293 status = filemap_write_and_wait_range(mapping,
2294 pos, pos + written - 1);
2295 2295
2296 return written ? written : status; 2296 return written ? written : status;
2297} 2297}
diff --git a/mm/filemap_xip.c b/mm/filemap_xip.c
index b5167dfb2f2d..0c04615651b7 100644
--- a/mm/filemap_xip.c
+++ b/mm/filemap_xip.c
@@ -193,7 +193,7 @@ retry:
193 /* Nuke the page table entry. */ 193 /* Nuke the page table entry. */
194 flush_cache_page(vma, address, pte_pfn(*pte)); 194 flush_cache_page(vma, address, pte_pfn(*pte));
195 pteval = ptep_clear_flush_notify(vma, address, pte); 195 pteval = ptep_clear_flush_notify(vma, address, pte);
196 page_remove_rmap(page, vma); 196 page_remove_rmap(page);
197 dec_mm_counter(mm, file_rss); 197 dec_mm_counter(mm, file_rss);
198 BUG_ON(pte_dirty(pteval)); 198 BUG_ON(pte_dirty(pteval));
199 pte_unmap_unlock(pte, ptl); 199 pte_unmap_unlock(pte, ptl);
diff --git a/mm/fremap.c b/mm/fremap.c
index 7d12ca70ef7b..62d5bbda921a 100644
--- a/mm/fremap.c
+++ b/mm/fremap.c
@@ -37,7 +37,7 @@ static void zap_pte(struct mm_struct *mm, struct vm_area_struct *vma,
37 if (page) { 37 if (page) {
38 if (pte_dirty(pte)) 38 if (pte_dirty(pte))
39 set_page_dirty(page); 39 set_page_dirty(page);
40 page_remove_rmap(page, vma); 40 page_remove_rmap(page);
41 page_cache_release(page); 41 page_cache_release(page);
42 update_hiwater_rss(mm); 42 update_hiwater_rss(mm);
43 dec_mm_counter(mm, file_rss); 43 dec_mm_counter(mm, file_rss);
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 6058b53dcb89..618e98304080 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -220,6 +220,35 @@ static pgoff_t vma_hugecache_offset(struct hstate *h,
220} 220}
221 221
222/* 222/*
223 * Return the size of the pages allocated when backing a VMA. In the majority
224 * cases this will be same size as used by the page table entries.
225 */
226unsigned long vma_kernel_pagesize(struct vm_area_struct *vma)
227{
228 struct hstate *hstate;
229
230 if (!is_vm_hugetlb_page(vma))
231 return PAGE_SIZE;
232
233 hstate = hstate_vma(vma);
234
235 return 1UL << (hstate->order + PAGE_SHIFT);
236}
237
238/*
239 * Return the page size being used by the MMU to back a VMA. In the majority
240 * of cases, the page size used by the kernel matches the MMU size. On
241 * architectures where it differs, an architecture-specific version of this
242 * function is required.
243 */
244#ifndef vma_mmu_pagesize
245unsigned long vma_mmu_pagesize(struct vm_area_struct *vma)
246{
247 return vma_kernel_pagesize(vma);
248}
249#endif
250
251/*
223 * Flags for MAP_PRIVATE reservations. These are stored in the bottom 252 * Flags for MAP_PRIVATE reservations. These are stored in the bottom
224 * bits of the reservation map pointer, which are always clear due to 253 * bits of the reservation map pointer, which are always clear due to
225 * alignment. 254 * alignment.
@@ -371,8 +400,10 @@ static void clear_huge_page(struct page *page,
371{ 400{
372 int i; 401 int i;
373 402
374 if (unlikely(sz > MAX_ORDER_NR_PAGES)) 403 if (unlikely(sz > MAX_ORDER_NR_PAGES)) {
375 return clear_gigantic_page(page, addr, sz); 404 clear_gigantic_page(page, addr, sz);
405 return;
406 }
376 407
377 might_sleep(); 408 might_sleep();
378 for (i = 0; i < sz/PAGE_SIZE; i++) { 409 for (i = 0; i < sz/PAGE_SIZE; i++) {
@@ -404,8 +435,10 @@ static void copy_huge_page(struct page *dst, struct page *src,
404 int i; 435 int i;
405 struct hstate *h = hstate_vma(vma); 436 struct hstate *h = hstate_vma(vma);
406 437
407 if (unlikely(pages_per_huge_page(h) > MAX_ORDER_NR_PAGES)) 438 if (unlikely(pages_per_huge_page(h) > MAX_ORDER_NR_PAGES)) {
408 return copy_gigantic_page(dst, src, addr, vma); 439 copy_gigantic_page(dst, src, addr, vma);
440 return;
441 }
409 442
410 might_sleep(); 443 might_sleep();
411 for (i = 0; i < pages_per_huge_page(h); i++) { 444 for (i = 0; i < pages_per_huge_page(h); i++) {
@@ -972,7 +1005,7 @@ static struct page *alloc_huge_page(struct vm_area_struct *vma,
972 return page; 1005 return page;
973} 1006}
974 1007
975__attribute__((weak)) int alloc_bootmem_huge_page(struct hstate *h) 1008int __weak alloc_bootmem_huge_page(struct hstate *h)
976{ 1009{
977 struct huge_bootmem_page *m; 1010 struct huge_bootmem_page *m;
978 int nr_nodes = nodes_weight(node_online_map); 1011 int nr_nodes = nodes_weight(node_online_map);
@@ -991,8 +1024,7 @@ __attribute__((weak)) int alloc_bootmem_huge_page(struct hstate *h)
991 * puts them into the mem_map). 1024 * puts them into the mem_map).
992 */ 1025 */
993 m = addr; 1026 m = addr;
994 if (m) 1027 goto found;
995 goto found;
996 } 1028 }
997 hstate_next_node(h); 1029 hstate_next_node(h);
998 nr_nodes--; 1030 nr_nodes--;
diff --git a/mm/internal.h b/mm/internal.h
index 13333bc2eb68..478223b73a2a 100644
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -49,6 +49,7 @@ extern void putback_lru_page(struct page *page);
49/* 49/*
50 * in mm/page_alloc.c 50 * in mm/page_alloc.c
51 */ 51 */
52extern unsigned long highest_memmap_pfn;
52extern void __free_pages_bootmem(struct page *page, unsigned int order); 53extern void __free_pages_bootmem(struct page *page, unsigned int order);
53 54
54/* 55/*
@@ -275,6 +276,7 @@ static inline void mminit_validate_memmodel_limits(unsigned long *start_pfn,
275#define GUP_FLAGS_WRITE 0x1 276#define GUP_FLAGS_WRITE 0x1
276#define GUP_FLAGS_FORCE 0x2 277#define GUP_FLAGS_FORCE 0x2
277#define GUP_FLAGS_IGNORE_VMA_PERMISSIONS 0x4 278#define GUP_FLAGS_IGNORE_VMA_PERMISSIONS 0x4
279#define GUP_FLAGS_IGNORE_SIGKILL 0x8
278 280
279int __get_user_pages(struct task_struct *tsk, struct mm_struct *mm, 281int __get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
280 unsigned long start, int len, int flags, 282 unsigned long start, int len, int flags,
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 866dcc7eeb0c..51ee96545579 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -779,7 +779,8 @@ int mem_cgroup_shrink_usage(struct mm_struct *mm, gfp_t gfp_mask)
779 return 0; 779 return 0;
780} 780}
781 781
782int mem_cgroup_resize_limit(struct mem_cgroup *memcg, unsigned long long val) 782static int mem_cgroup_resize_limit(struct mem_cgroup *memcg,
783 unsigned long long val)
783{ 784{
784 785
785 int retry_count = MEM_CGROUP_RECLAIM_RETRIES; 786 int retry_count = MEM_CGROUP_RECLAIM_RETRIES;
diff --git a/mm/memory.c b/mm/memory.c
index 7b9db658aca2..3f8fa06b963b 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -52,6 +52,9 @@
52#include <linux/writeback.h> 52#include <linux/writeback.h>
53#include <linux/memcontrol.h> 53#include <linux/memcontrol.h>
54#include <linux/mmu_notifier.h> 54#include <linux/mmu_notifier.h>
55#include <linux/kallsyms.h>
56#include <linux/swapops.h>
57#include <linux/elf.h>
55 58
56#include <asm/pgalloc.h> 59#include <asm/pgalloc.h>
57#include <asm/uaccess.h> 60#include <asm/uaccess.h>
@@ -59,9 +62,6 @@
59#include <asm/tlbflush.h> 62#include <asm/tlbflush.h>
60#include <asm/pgtable.h> 63#include <asm/pgtable.h>
61 64
62#include <linux/swapops.h>
63#include <linux/elf.h>
64
65#include "internal.h" 65#include "internal.h"
66 66
67#ifndef CONFIG_NEED_MULTIPLE_NODES 67#ifndef CONFIG_NEED_MULTIPLE_NODES
@@ -375,15 +375,65 @@ static inline void add_mm_rss(struct mm_struct *mm, int file_rss, int anon_rss)
375 * 375 *
376 * The calling function must still handle the error. 376 * The calling function must still handle the error.
377 */ 377 */
378static void print_bad_pte(struct vm_area_struct *vma, pte_t pte, 378static void print_bad_pte(struct vm_area_struct *vma, unsigned long addr,
379 unsigned long vaddr) 379 pte_t pte, struct page *page)
380{ 380{
381 printk(KERN_ERR "Bad pte = %08llx, process = %s, " 381 pgd_t *pgd = pgd_offset(vma->vm_mm, addr);
382 "vm_flags = %lx, vaddr = %lx\n", 382 pud_t *pud = pud_offset(pgd, addr);
383 (long long)pte_val(pte), 383 pmd_t *pmd = pmd_offset(pud, addr);
384 (vma->vm_mm == current->mm ? current->comm : "???"), 384 struct address_space *mapping;
385 vma->vm_flags, vaddr); 385 pgoff_t index;
386 static unsigned long resume;
387 static unsigned long nr_shown;
388 static unsigned long nr_unshown;
389
390 /*
391 * Allow a burst of 60 reports, then keep quiet for that minute;
392 * or allow a steady drip of one report per second.
393 */
394 if (nr_shown == 60) {
395 if (time_before(jiffies, resume)) {
396 nr_unshown++;
397 return;
398 }
399 if (nr_unshown) {
400 printk(KERN_ALERT
401 "BUG: Bad page map: %lu messages suppressed\n",
402 nr_unshown);
403 nr_unshown = 0;
404 }
405 nr_shown = 0;
406 }
407 if (nr_shown++ == 0)
408 resume = jiffies + 60 * HZ;
409
410 mapping = vma->vm_file ? vma->vm_file->f_mapping : NULL;
411 index = linear_page_index(vma, addr);
412
413 printk(KERN_ALERT
414 "BUG: Bad page map in process %s pte:%08llx pmd:%08llx\n",
415 current->comm,
416 (long long)pte_val(pte), (long long)pmd_val(*pmd));
417 if (page) {
418 printk(KERN_ALERT
419 "page:%p flags:%p count:%d mapcount:%d mapping:%p index:%lx\n",
420 page, (void *)page->flags, page_count(page),
421 page_mapcount(page), page->mapping, page->index);
422 }
423 printk(KERN_ALERT
424 "addr:%p vm_flags:%08lx anon_vma:%p mapping:%p index:%lx\n",
425 (void *)addr, vma->vm_flags, vma->anon_vma, mapping, index);
426 /*
427 * Choose text because data symbols depend on CONFIG_KALLSYMS_ALL=y
428 */
429 if (vma->vm_ops)
430 print_symbol(KERN_ALERT "vma->vm_ops->fault: %s\n",
431 (unsigned long)vma->vm_ops->fault);
432 if (vma->vm_file && vma->vm_file->f_op)
433 print_symbol(KERN_ALERT "vma->vm_file->f_op->mmap: %s\n",
434 (unsigned long)vma->vm_file->f_op->mmap);
386 dump_stack(); 435 dump_stack();
436 add_taint(TAINT_BAD_PAGE);
387} 437}
388 438
389static inline int is_cow_mapping(unsigned int flags) 439static inline int is_cow_mapping(unsigned int flags)
@@ -441,21 +491,18 @@ static inline int is_cow_mapping(unsigned int flags)
441struct page *vm_normal_page(struct vm_area_struct *vma, unsigned long addr, 491struct page *vm_normal_page(struct vm_area_struct *vma, unsigned long addr,
442 pte_t pte) 492 pte_t pte)
443{ 493{
444 unsigned long pfn; 494 unsigned long pfn = pte_pfn(pte);
445 495
446 if (HAVE_PTE_SPECIAL) { 496 if (HAVE_PTE_SPECIAL) {
447 if (likely(!pte_special(pte))) { 497 if (likely(!pte_special(pte)))
448 VM_BUG_ON(!pfn_valid(pte_pfn(pte))); 498 goto check_pfn;
449 return pte_page(pte); 499 if (!(vma->vm_flags & (VM_PFNMAP | VM_MIXEDMAP)))
450 } 500 print_bad_pte(vma, addr, pte, NULL);
451 VM_BUG_ON(!(vma->vm_flags & (VM_PFNMAP | VM_MIXEDMAP)));
452 return NULL; 501 return NULL;
453 } 502 }
454 503
455 /* !HAVE_PTE_SPECIAL case follows: */ 504 /* !HAVE_PTE_SPECIAL case follows: */
456 505
457 pfn = pte_pfn(pte);
458
459 if (unlikely(vma->vm_flags & (VM_PFNMAP|VM_MIXEDMAP))) { 506 if (unlikely(vma->vm_flags & (VM_PFNMAP|VM_MIXEDMAP))) {
460 if (vma->vm_flags & VM_MIXEDMAP) { 507 if (vma->vm_flags & VM_MIXEDMAP) {
461 if (!pfn_valid(pfn)) 508 if (!pfn_valid(pfn))
@@ -471,11 +518,14 @@ struct page *vm_normal_page(struct vm_area_struct *vma, unsigned long addr,
471 } 518 }
472 } 519 }
473 520
474 VM_BUG_ON(!pfn_valid(pfn)); 521check_pfn:
522 if (unlikely(pfn > highest_memmap_pfn)) {
523 print_bad_pte(vma, addr, pte, NULL);
524 return NULL;
525 }
475 526
476 /* 527 /*
477 * NOTE! We still have PageReserved() pages in the page tables. 528 * NOTE! We still have PageReserved() pages in the page tables.
478 *
479 * eg. VDSO mappings can cause them to exist. 529 * eg. VDSO mappings can cause them to exist.
480 */ 530 */
481out: 531out:
@@ -767,11 +817,14 @@ static unsigned long zap_pte_range(struct mmu_gather *tlb,
767 else { 817 else {
768 if (pte_dirty(ptent)) 818 if (pte_dirty(ptent))
769 set_page_dirty(page); 819 set_page_dirty(page);
770 if (pte_young(ptent)) 820 if (pte_young(ptent) &&
771 SetPageReferenced(page); 821 likely(!VM_SequentialReadHint(vma)))
822 mark_page_accessed(page);
772 file_rss--; 823 file_rss--;
773 } 824 }
774 page_remove_rmap(page, vma); 825 page_remove_rmap(page);
826 if (unlikely(page_mapcount(page) < 0))
827 print_bad_pte(vma, addr, ptent, page);
775 tlb_remove_page(tlb, page); 828 tlb_remove_page(tlb, page);
776 continue; 829 continue;
777 } 830 }
@@ -781,8 +834,12 @@ static unsigned long zap_pte_range(struct mmu_gather *tlb,
781 */ 834 */
782 if (unlikely(details)) 835 if (unlikely(details))
783 continue; 836 continue;
784 if (!pte_file(ptent)) 837 if (pte_file(ptent)) {
785 free_swap_and_cache(pte_to_swp_entry(ptent)); 838 if (unlikely(!(vma->vm_flags & VM_NONLINEAR)))
839 print_bad_pte(vma, addr, ptent, NULL);
840 } else if
841 (unlikely(!free_swap_and_cache(pte_to_swp_entry(ptent))))
842 print_bad_pte(vma, addr, ptent, NULL);
786 pte_clear_not_present_full(mm, addr, pte, tlb->fullmm); 843 pte_clear_not_present_full(mm, addr, pte, tlb->fullmm);
787 } while (pte++, addr += PAGE_SIZE, (addr != end && *zap_work > 0)); 844 } while (pte++, addr += PAGE_SIZE, (addr != end && *zap_work > 0));
788 845
@@ -1153,6 +1210,7 @@ int __get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
1153 int write = !!(flags & GUP_FLAGS_WRITE); 1210 int write = !!(flags & GUP_FLAGS_WRITE);
1154 int force = !!(flags & GUP_FLAGS_FORCE); 1211 int force = !!(flags & GUP_FLAGS_FORCE);
1155 int ignore = !!(flags & GUP_FLAGS_IGNORE_VMA_PERMISSIONS); 1212 int ignore = !!(flags & GUP_FLAGS_IGNORE_VMA_PERMISSIONS);
1213 int ignore_sigkill = !!(flags & GUP_FLAGS_IGNORE_SIGKILL);
1156 1214
1157 if (len <= 0) 1215 if (len <= 0)
1158 return 0; 1216 return 0;
@@ -1231,12 +1289,15 @@ int __get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
1231 struct page *page; 1289 struct page *page;
1232 1290
1233 /* 1291 /*
1234 * If tsk is ooming, cut off its access to large memory 1292 * If we have a pending SIGKILL, don't keep faulting
1235 * allocations. It has a pending SIGKILL, but it can't 1293 * pages and potentially allocating memory, unless
1236 * be processed until returning to user space. 1294 * current is handling munlock--e.g., on exit. In
1295 * that case, we are not allocating memory. Rather,
1296 * we're only unlocking already resident/mapped pages.
1237 */ 1297 */
1238 if (unlikely(test_tsk_thread_flag(tsk, TIF_MEMDIE))) 1298 if (unlikely(!ignore_sigkill &&
1239 return i ? i : -ENOMEM; 1299 fatal_signal_pending(current)))
1300 return i ? i : -ERESTARTSYS;
1240 1301
1241 if (write) 1302 if (write)
1242 foll_flags |= FOLL_WRITE; 1303 foll_flags |= FOLL_WRITE;
@@ -1263,9 +1324,15 @@ int __get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
1263 * do_wp_page has broken COW when necessary, 1324 * do_wp_page has broken COW when necessary,
1264 * even if maybe_mkwrite decided not to set 1325 * even if maybe_mkwrite decided not to set
1265 * pte_write. We can thus safely do subsequent 1326 * pte_write. We can thus safely do subsequent
1266 * page lookups as if they were reads. 1327 * page lookups as if they were reads. But only
1328 * do so when looping for pte_write is futile:
1329 * in some cases userspace may also be wanting
1330 * to write to the gotten user page, which a
1331 * read fault here might prevent (a readonly
1332 * page might get reCOWed by userspace write).
1267 */ 1333 */
1268 if (ret & VM_FAULT_WRITE) 1334 if ((ret & VM_FAULT_WRITE) &&
1335 !(vma->vm_flags & VM_WRITE))
1269 foll_flags &= ~FOLL_WRITE; 1336 foll_flags &= ~FOLL_WRITE;
1270 1337
1271 cond_resched(); 1338 cond_resched();
@@ -1644,6 +1711,8 @@ static int apply_to_pte_range(struct mm_struct *mm, pmd_t *pmd,
1644 1711
1645 BUG_ON(pmd_huge(*pmd)); 1712 BUG_ON(pmd_huge(*pmd));
1646 1713
1714 arch_enter_lazy_mmu_mode();
1715
1647 token = pmd_pgtable(*pmd); 1716 token = pmd_pgtable(*pmd);
1648 1717
1649 do { 1718 do {
@@ -1652,6 +1721,8 @@ static int apply_to_pte_range(struct mm_struct *mm, pmd_t *pmd,
1652 break; 1721 break;
1653 } while (pte++, addr += PAGE_SIZE, addr != end); 1722 } while (pte++, addr += PAGE_SIZE, addr != end);
1654 1723
1724 arch_leave_lazy_mmu_mode();
1725
1655 if (mm != &init_mm) 1726 if (mm != &init_mm)
1656 pte_unmap_unlock(pte-1, ptl); 1727 pte_unmap_unlock(pte-1, ptl);
1657 return err; 1728 return err;
@@ -1837,10 +1908,21 @@ static int do_wp_page(struct mm_struct *mm, struct vm_area_struct *vma,
1837 * not dirty accountable. 1908 * not dirty accountable.
1838 */ 1909 */
1839 if (PageAnon(old_page)) { 1910 if (PageAnon(old_page)) {
1840 if (trylock_page(old_page)) { 1911 if (!trylock_page(old_page)) {
1841 reuse = can_share_swap_page(old_page); 1912 page_cache_get(old_page);
1842 unlock_page(old_page); 1913 pte_unmap_unlock(page_table, ptl);
1914 lock_page(old_page);
1915 page_table = pte_offset_map_lock(mm, pmd, address,
1916 &ptl);
1917 if (!pte_same(*page_table, orig_pte)) {
1918 unlock_page(old_page);
1919 page_cache_release(old_page);
1920 goto unlock;
1921 }
1922 page_cache_release(old_page);
1843 } 1923 }
1924 reuse = reuse_swap_page(old_page);
1925 unlock_page(old_page);
1844 } else if (unlikely((vma->vm_flags & (VM_WRITE|VM_SHARED)) == 1926 } else if (unlikely((vma->vm_flags & (VM_WRITE|VM_SHARED)) ==
1845 (VM_WRITE|VM_SHARED))) { 1927 (VM_WRITE|VM_SHARED))) {
1846 /* 1928 /*
@@ -1943,11 +2025,7 @@ gotten:
1943 * thread doing COW. 2025 * thread doing COW.
1944 */ 2026 */
1945 ptep_clear_flush_notify(vma, address, page_table); 2027 ptep_clear_flush_notify(vma, address, page_table);
1946 SetPageSwapBacked(new_page);
1947 lru_cache_add_active_or_unevictable(new_page, vma);
1948 page_add_new_anon_rmap(new_page, vma, address); 2028 page_add_new_anon_rmap(new_page, vma, address);
1949
1950//TODO: is this safe? do_anonymous_page() does it this way.
1951 set_pte_at(mm, address, page_table, entry); 2029 set_pte_at(mm, address, page_table, entry);
1952 update_mmu_cache(vma, address, entry); 2030 update_mmu_cache(vma, address, entry);
1953 if (old_page) { 2031 if (old_page) {
@@ -1973,7 +2051,7 @@ gotten:
1973 * mapcount is visible. So transitively, TLBs to 2051 * mapcount is visible. So transitively, TLBs to
1974 * old page will be flushed before it can be reused. 2052 * old page will be flushed before it can be reused.
1975 */ 2053 */
1976 page_remove_rmap(old_page, vma); 2054 page_remove_rmap(old_page);
1977 } 2055 }
1978 2056
1979 /* Free the old page.. */ 2057 /* Free the old page.. */
@@ -2374,7 +2452,7 @@ static int do_swap_page(struct mm_struct *mm, struct vm_area_struct *vma,
2374 2452
2375 inc_mm_counter(mm, anon_rss); 2453 inc_mm_counter(mm, anon_rss);
2376 pte = mk_pte(page, vma->vm_page_prot); 2454 pte = mk_pte(page, vma->vm_page_prot);
2377 if (write_access && can_share_swap_page(page)) { 2455 if (write_access && reuse_swap_page(page)) {
2378 pte = maybe_mkwrite(pte_mkdirty(pte), vma); 2456 pte = maybe_mkwrite(pte_mkdirty(pte), vma);
2379 write_access = 0; 2457 write_access = 0;
2380 } 2458 }
@@ -2385,7 +2463,7 @@ static int do_swap_page(struct mm_struct *mm, struct vm_area_struct *vma,
2385 2463
2386 swap_free(entry); 2464 swap_free(entry);
2387 if (vm_swap_full() || (vma->vm_flags & VM_LOCKED) || PageMlocked(page)) 2465 if (vm_swap_full() || (vma->vm_flags & VM_LOCKED) || PageMlocked(page))
2388 remove_exclusive_swap_page(page); 2466 try_to_free_swap(page);
2389 unlock_page(page); 2467 unlock_page(page);
2390 2468
2391 if (write_access) { 2469 if (write_access) {
@@ -2442,8 +2520,6 @@ static int do_anonymous_page(struct mm_struct *mm, struct vm_area_struct *vma,
2442 if (!pte_none(*page_table)) 2520 if (!pte_none(*page_table))
2443 goto release; 2521 goto release;
2444 inc_mm_counter(mm, anon_rss); 2522 inc_mm_counter(mm, anon_rss);
2445 SetPageSwapBacked(page);
2446 lru_cache_add_active_or_unevictable(page, vma);
2447 page_add_new_anon_rmap(page, vma, address); 2523 page_add_new_anon_rmap(page, vma, address);
2448 set_pte_at(mm, address, page_table, entry); 2524 set_pte_at(mm, address, page_table, entry);
2449 2525
@@ -2591,8 +2667,6 @@ static int __do_fault(struct mm_struct *mm, struct vm_area_struct *vma,
2591 entry = maybe_mkwrite(pte_mkdirty(entry), vma); 2667 entry = maybe_mkwrite(pte_mkdirty(entry), vma);
2592 if (anon) { 2668 if (anon) {
2593 inc_mm_counter(mm, anon_rss); 2669 inc_mm_counter(mm, anon_rss);
2594 SetPageSwapBacked(page);
2595 lru_cache_add_active_or_unevictable(page, vma);
2596 page_add_new_anon_rmap(page, vma, address); 2670 page_add_new_anon_rmap(page, vma, address);
2597 } else { 2671 } else {
2598 inc_mm_counter(mm, file_rss); 2672 inc_mm_counter(mm, file_rss);
@@ -2602,7 +2676,6 @@ static int __do_fault(struct mm_struct *mm, struct vm_area_struct *vma,
2602 get_page(dirty_page); 2676 get_page(dirty_page);
2603 } 2677 }
2604 } 2678 }
2605//TODO: is this safe? do_anonymous_page() does it this way.
2606 set_pte_at(mm, address, page_table, entry); 2679 set_pte_at(mm, address, page_table, entry);
2607 2680
2608 /* no need to invalidate: a not-present page won't be cached */ 2681 /* no need to invalidate: a not-present page won't be cached */
@@ -2666,12 +2739,11 @@ static int do_nonlinear_fault(struct mm_struct *mm, struct vm_area_struct *vma,
2666 if (!pte_unmap_same(mm, pmd, page_table, orig_pte)) 2739 if (!pte_unmap_same(mm, pmd, page_table, orig_pte))
2667 return 0; 2740 return 0;
2668 2741
2669 if (unlikely(!(vma->vm_flags & VM_NONLINEAR) || 2742 if (unlikely(!(vma->vm_flags & VM_NONLINEAR))) {
2670 !(vma->vm_flags & VM_CAN_NONLINEAR))) {
2671 /* 2743 /*
2672 * Page table corrupted: show pte and kill process. 2744 * Page table corrupted: show pte and kill process.
2673 */ 2745 */
2674 print_bad_pte(vma, orig_pte, address); 2746 print_bad_pte(vma, address, orig_pte, NULL);
2675 return VM_FAULT_OOM; 2747 return VM_FAULT_OOM;
2676 } 2748 }
2677 2749
@@ -2953,7 +3025,7 @@ int generic_access_phys(struct vm_area_struct *vma, unsigned long addr,
2953{ 3025{
2954 resource_size_t phys_addr; 3026 resource_size_t phys_addr;
2955 unsigned long prot = 0; 3027 unsigned long prot = 0;
2956 void *maddr; 3028 void __iomem *maddr;
2957 int offset = addr & (PAGE_SIZE-1); 3029 int offset = addr & (PAGE_SIZE-1);
2958 3030
2959 if (follow_phys(vma, addr, write, &prot, &phys_addr)) 3031 if (follow_phys(vma, addr, write, &prot, &phys_addr))
diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
index b17371185468..c083cf5fd6df 100644
--- a/mm/memory_hotplug.c
+++ b/mm/memory_hotplug.c
@@ -216,7 +216,8 @@ static int __meminit __add_zone(struct zone *zone, unsigned long phys_start_pfn)
216 return 0; 216 return 0;
217} 217}
218 218
219static int __meminit __add_section(struct zone *zone, unsigned long phys_start_pfn) 219static int __meminit __add_section(int nid, struct zone *zone,
220 unsigned long phys_start_pfn)
220{ 221{
221 int nr_pages = PAGES_PER_SECTION; 222 int nr_pages = PAGES_PER_SECTION;
222 int ret; 223 int ret;
@@ -234,7 +235,7 @@ static int __meminit __add_section(struct zone *zone, unsigned long phys_start_p
234 if (ret < 0) 235 if (ret < 0)
235 return ret; 236 return ret;
236 237
237 return register_new_memory(__pfn_to_section(phys_start_pfn)); 238 return register_new_memory(nid, __pfn_to_section(phys_start_pfn));
238} 239}
239 240
240#ifdef CONFIG_SPARSEMEM_VMEMMAP 241#ifdef CONFIG_SPARSEMEM_VMEMMAP
@@ -273,8 +274,8 @@ static int __remove_section(struct zone *zone, struct mem_section *ms)
273 * call this function after deciding the zone to which to 274 * call this function after deciding the zone to which to
274 * add the new pages. 275 * add the new pages.
275 */ 276 */
276int __ref __add_pages(struct zone *zone, unsigned long phys_start_pfn, 277int __ref __add_pages(int nid, struct zone *zone, unsigned long phys_start_pfn,
277 unsigned long nr_pages) 278 unsigned long nr_pages)
278{ 279{
279 unsigned long i; 280 unsigned long i;
280 int err = 0; 281 int err = 0;
@@ -284,7 +285,7 @@ int __ref __add_pages(struct zone *zone, unsigned long phys_start_pfn,
284 end_sec = pfn_to_section_nr(phys_start_pfn + nr_pages - 1); 285 end_sec = pfn_to_section_nr(phys_start_pfn + nr_pages - 1);
285 286
286 for (i = start_sec; i <= end_sec; i++) { 287 for (i = start_sec; i <= end_sec; i++) {
287 err = __add_section(zone, i << PFN_SECTION_SHIFT); 288 err = __add_section(nid, zone, i << PFN_SECTION_SHIFT);
288 289
289 /* 290 /*
290 * EEXIST is finally dealt with by ioresource collision 291 * EEXIST is finally dealt with by ioresource collision
@@ -626,15 +627,12 @@ int scan_lru_pages(unsigned long start, unsigned long end)
626} 627}
627 628
628static struct page * 629static struct page *
629hotremove_migrate_alloc(struct page *page, 630hotremove_migrate_alloc(struct page *page, unsigned long private, int **x)
630 unsigned long private,
631 int **x)
632{ 631{
633 /* This should be improoooooved!! */ 632 /* This should be improooooved!! */
634 return alloc_page(GFP_HIGHUSER_PAGECACHE); 633 return alloc_page(GFP_HIGHUSER_MOVABLE);
635} 634}
636 635
637
638#define NR_OFFLINE_AT_ONCE_PAGES (256) 636#define NR_OFFLINE_AT_ONCE_PAGES (256)
639static int 637static int
640do_migrate_range(unsigned long start_pfn, unsigned long end_pfn) 638do_migrate_range(unsigned long start_pfn, unsigned long end_pfn)
diff --git a/mm/migrate.c b/mm/migrate.c
index 21631ab8c08b..55373983c9c6 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -300,12 +300,10 @@ static int migrate_page_move_mapping(struct address_space *mapping,
300 * Now we know that no one else is looking at the page. 300 * Now we know that no one else is looking at the page.
301 */ 301 */
302 get_page(newpage); /* add cache reference */ 302 get_page(newpage); /* add cache reference */
303#ifdef CONFIG_SWAP
304 if (PageSwapCache(page)) { 303 if (PageSwapCache(page)) {
305 SetPageSwapCache(newpage); 304 SetPageSwapCache(newpage);
306 set_page_private(newpage, page_private(page)); 305 set_page_private(newpage, page_private(page));
307 } 306 }
308#endif
309 307
310 radix_tree_replace_slot(pslot, newpage); 308 radix_tree_replace_slot(pslot, newpage);
311 309
@@ -373,9 +371,7 @@ static void migrate_page_copy(struct page *newpage, struct page *page)
373 371
374 mlock_migrate_page(newpage, page); 372 mlock_migrate_page(newpage, page);
375 373
376#ifdef CONFIG_SWAP
377 ClearPageSwapCache(page); 374 ClearPageSwapCache(page);
378#endif
379 ClearPagePrivate(page); 375 ClearPagePrivate(page);
380 set_page_private(page, 0); 376 set_page_private(page, 0);
381 /* page->mapping contains a flag for PageAnon() */ 377 /* page->mapping contains a flag for PageAnon() */
@@ -848,12 +844,6 @@ static int do_move_page_to_node_array(struct mm_struct *mm,
848 struct vm_area_struct *vma; 844 struct vm_area_struct *vma;
849 struct page *page; 845 struct page *page;
850 846
851 /*
852 * A valid page pointer that will not match any of the
853 * pages that will be moved.
854 */
855 pp->page = ZERO_PAGE(0);
856
857 err = -EFAULT; 847 err = -EFAULT;
858 vma = find_vma(mm, pp->addr); 848 vma = find_vma(mm, pp->addr);
859 if (!vma || !vma_migratable(vma)) 849 if (!vma || !vma_migratable(vma))
@@ -919,41 +909,43 @@ static int do_pages_move(struct mm_struct *mm, struct task_struct *task,
919 const int __user *nodes, 909 const int __user *nodes,
920 int __user *status, int flags) 910 int __user *status, int flags)
921{ 911{
922 struct page_to_node *pm = NULL; 912 struct page_to_node *pm;
923 nodemask_t task_nodes; 913 nodemask_t task_nodes;
924 int err = 0; 914 unsigned long chunk_nr_pages;
925 int i; 915 unsigned long chunk_start;
916 int err;
926 917
927 task_nodes = cpuset_mems_allowed(task); 918 task_nodes = cpuset_mems_allowed(task);
928 919
929 /* Limit nr_pages so that the multiplication may not overflow */ 920 err = -ENOMEM;
930 if (nr_pages >= ULONG_MAX / sizeof(struct page_to_node) - 1) { 921 pm = (struct page_to_node *)__get_free_page(GFP_KERNEL);
931 err = -E2BIG; 922 if (!pm)
932 goto out;
933 }
934
935 pm = vmalloc((nr_pages + 1) * sizeof(struct page_to_node));
936 if (!pm) {
937 err = -ENOMEM;
938 goto out; 923 goto out;
939 }
940
941 /* 924 /*
942 * Get parameters from user space and initialize the pm 925 * Store a chunk of page_to_node array in a page,
943 * array. Return various errors if the user did something wrong. 926 * but keep the last one as a marker
944 */ 927 */
945 for (i = 0; i < nr_pages; i++) { 928 chunk_nr_pages = (PAGE_SIZE / sizeof(struct page_to_node)) - 1;
946 const void __user *p;
947 929
948 err = -EFAULT; 930 for (chunk_start = 0;
949 if (get_user(p, pages + i)) 931 chunk_start < nr_pages;
950 goto out_pm; 932 chunk_start += chunk_nr_pages) {
933 int j;
951 934
952 pm[i].addr = (unsigned long)p; 935 if (chunk_start + chunk_nr_pages > nr_pages)
953 if (nodes) { 936 chunk_nr_pages = nr_pages - chunk_start;
937
938 /* fill the chunk pm with addrs and nodes from user-space */
939 for (j = 0; j < chunk_nr_pages; j++) {
940 const void __user *p;
954 int node; 941 int node;
955 942
956 if (get_user(node, nodes + i)) 943 err = -EFAULT;
944 if (get_user(p, pages + j + chunk_start))
945 goto out_pm;
946 pm[j].addr = (unsigned long) p;
947
948 if (get_user(node, nodes + j + chunk_start))
957 goto out_pm; 949 goto out_pm;
958 950
959 err = -ENODEV; 951 err = -ENODEV;
@@ -964,22 +956,29 @@ static int do_pages_move(struct mm_struct *mm, struct task_struct *task,
964 if (!node_isset(node, task_nodes)) 956 if (!node_isset(node, task_nodes))
965 goto out_pm; 957 goto out_pm;
966 958
967 pm[i].node = node; 959 pm[j].node = node;
968 } else 960 }
969 pm[i].node = 0; /* anything to not match MAX_NUMNODES */ 961
970 } 962 /* End marker for this chunk */
971 /* End marker */ 963 pm[chunk_nr_pages].node = MAX_NUMNODES;
972 pm[nr_pages].node = MAX_NUMNODES; 964
965 /* Migrate this chunk */
966 err = do_move_page_to_node_array(mm, pm,
967 flags & MPOL_MF_MOVE_ALL);
968 if (err < 0)
969 goto out_pm;
973 970
974 err = do_move_page_to_node_array(mm, pm, flags & MPOL_MF_MOVE_ALL);
975 if (err >= 0)
976 /* Return status information */ 971 /* Return status information */
977 for (i = 0; i < nr_pages; i++) 972 for (j = 0; j < chunk_nr_pages; j++)
978 if (put_user(pm[i].status, status + i)) 973 if (put_user(pm[j].status, status + j + chunk_start)) {
979 err = -EFAULT; 974 err = -EFAULT;
975 goto out_pm;
976 }
977 }
978 err = 0;
980 979
981out_pm: 980out_pm:
982 vfree(pm); 981 free_page((unsigned long)pm);
983out: 982out:
984 return err; 983 return err;
985} 984}
diff --git a/mm/mlock.c b/mm/mlock.c
index 3035a56e7616..e125156c664e 100644
--- a/mm/mlock.c
+++ b/mm/mlock.c
@@ -173,12 +173,13 @@ static long __mlock_vma_pages_range(struct vm_area_struct *vma,
173 (atomic_read(&mm->mm_users) != 0)); 173 (atomic_read(&mm->mm_users) != 0));
174 174
175 /* 175 /*
176 * mlock: don't page populate if page has PROT_NONE permission. 176 * mlock: don't page populate if vma has PROT_NONE permission.
177 * munlock: the pages always do munlock althrough 177 * munlock: always do munlock although the vma has PROT_NONE
178 * its has PROT_NONE permission. 178 * permission, or SIGKILL is pending.
179 */ 179 */
180 if (!mlock) 180 if (!mlock)
181 gup_flags |= GUP_FLAGS_IGNORE_VMA_PERMISSIONS; 181 gup_flags |= GUP_FLAGS_IGNORE_VMA_PERMISSIONS |
182 GUP_FLAGS_IGNORE_SIGKILL;
182 183
183 if (vma->vm_flags & VM_WRITE) 184 if (vma->vm_flags & VM_WRITE)
184 gup_flags |= GUP_FLAGS_WRITE; 185 gup_flags |= GUP_FLAGS_WRITE;
diff --git a/mm/mmap.c b/mm/mmap.c
index 2c778fcfd9bd..a910c045cfd4 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -413,7 +413,7 @@ void __vma_link_rb(struct mm_struct *mm, struct vm_area_struct *vma,
413 413
414static void __vma_link_file(struct vm_area_struct *vma) 414static void __vma_link_file(struct vm_area_struct *vma)
415{ 415{
416 struct file * file; 416 struct file *file;
417 417
418 file = vma->vm_file; 418 file = vma->vm_file;
419 if (file) { 419 if (file) {
@@ -474,11 +474,10 @@ static void vma_link(struct mm_struct *mm, struct vm_area_struct *vma,
474 * insert vm structure into list and rbtree and anon_vma, 474 * insert vm structure into list and rbtree and anon_vma,
475 * but it has already been inserted into prio_tree earlier. 475 * but it has already been inserted into prio_tree earlier.
476 */ 476 */
477static void 477static void __insert_vm_struct(struct mm_struct *mm, struct vm_area_struct *vma)
478__insert_vm_struct(struct mm_struct * mm, struct vm_area_struct * vma)
479{ 478{
480 struct vm_area_struct * __vma, * prev; 479 struct vm_area_struct *__vma, *prev;
481 struct rb_node ** rb_link, * rb_parent; 480 struct rb_node **rb_link, *rb_parent;
482 481
483 __vma = find_vma_prepare(mm, vma->vm_start,&prev, &rb_link, &rb_parent); 482 __vma = find_vma_prepare(mm, vma->vm_start,&prev, &rb_link, &rb_parent);
484 BUG_ON(__vma && __vma->vm_start < vma->vm_end); 483 BUG_ON(__vma && __vma->vm_start < vma->vm_end);
@@ -908,7 +907,7 @@ void vm_stat_account(struct mm_struct *mm, unsigned long flags,
908 * The caller must hold down_write(current->mm->mmap_sem). 907 * The caller must hold down_write(current->mm->mmap_sem).
909 */ 908 */
910 909
911unsigned long do_mmap_pgoff(struct file * file, unsigned long addr, 910unsigned long do_mmap_pgoff(struct file *file, unsigned long addr,
912 unsigned long len, unsigned long prot, 911 unsigned long len, unsigned long prot,
913 unsigned long flags, unsigned long pgoff) 912 unsigned long flags, unsigned long pgoff)
914{ 913{
@@ -1464,7 +1463,7 @@ get_unmapped_area(struct file *file, unsigned long addr, unsigned long len,
1464EXPORT_SYMBOL(get_unmapped_area); 1463EXPORT_SYMBOL(get_unmapped_area);
1465 1464
1466/* Look up the first VMA which satisfies addr < vm_end, NULL if none. */ 1465/* Look up the first VMA which satisfies addr < vm_end, NULL if none. */
1467struct vm_area_struct * find_vma(struct mm_struct * mm, unsigned long addr) 1466struct vm_area_struct *find_vma(struct mm_struct *mm, unsigned long addr)
1468{ 1467{
1469 struct vm_area_struct *vma = NULL; 1468 struct vm_area_struct *vma = NULL;
1470 1469
@@ -1507,7 +1506,7 @@ find_vma_prev(struct mm_struct *mm, unsigned long addr,
1507 struct vm_area_struct **pprev) 1506 struct vm_area_struct **pprev)
1508{ 1507{
1509 struct vm_area_struct *vma = NULL, *prev = NULL; 1508 struct vm_area_struct *vma = NULL, *prev = NULL;
1510 struct rb_node * rb_node; 1509 struct rb_node *rb_node;
1511 if (!mm) 1510 if (!mm)
1512 goto out; 1511 goto out;
1513 1512
@@ -1541,7 +1540,7 @@ out:
1541 * update accounting. This is shared with both the 1540 * update accounting. This is shared with both the
1542 * grow-up and grow-down cases. 1541 * grow-up and grow-down cases.
1543 */ 1542 */
1544static int acct_stack_growth(struct vm_area_struct * vma, unsigned long size, unsigned long grow) 1543static int acct_stack_growth(struct vm_area_struct *vma, unsigned long size, unsigned long grow)
1545{ 1544{
1546 struct mm_struct *mm = vma->vm_mm; 1545 struct mm_struct *mm = vma->vm_mm;
1547 struct rlimit *rlim = current->signal->rlim; 1546 struct rlimit *rlim = current->signal->rlim;
@@ -2091,6 +2090,9 @@ void exit_mmap(struct mm_struct *mm)
2091 arch_exit_mmap(mm); 2090 arch_exit_mmap(mm);
2092 mmu_notifier_release(mm); 2091 mmu_notifier_release(mm);
2093 2092
2093 if (!mm->mmap) /* Can happen if dup_mmap() received an OOM */
2094 return;
2095
2094 if (mm->locked_vm) { 2096 if (mm->locked_vm) {
2095 vma = mm->mmap; 2097 vma = mm->mmap;
2096 while (vma) { 2098 while (vma) {
@@ -2103,7 +2105,7 @@ void exit_mmap(struct mm_struct *mm)
2103 lru_add_drain(); 2105 lru_add_drain();
2104 flush_cache_mm(mm); 2106 flush_cache_mm(mm);
2105 tlb = tlb_gather_mmu(mm, 1); 2107 tlb = tlb_gather_mmu(mm, 1);
2106 /* Don't update_hiwater_rss(mm) here, do_exit already did */ 2108 /* update_hiwater_rss(mm) here? but nobody should be looking */
2107 /* Use -1 here to ensure all VMAs in the mm are unmapped */ 2109 /* Use -1 here to ensure all VMAs in the mm are unmapped */
2108 end = unmap_vmas(&tlb, vma, 0, -1, &nr_accounted, NULL); 2110 end = unmap_vmas(&tlb, vma, 0, -1, &nr_accounted, NULL);
2109 vm_unacct_memory(nr_accounted); 2111 vm_unacct_memory(nr_accounted);
diff --git a/mm/mprotect.c b/mm/mprotect.c
index cfb4c4852062..d0f6e7ce09f1 100644
--- a/mm/mprotect.c
+++ b/mm/mprotect.c
@@ -22,6 +22,7 @@
22#include <linux/swap.h> 22#include <linux/swap.h>
23#include <linux/swapops.h> 23#include <linux/swapops.h>
24#include <linux/mmu_notifier.h> 24#include <linux/mmu_notifier.h>
25#include <linux/migrate.h>
25#include <asm/uaccess.h> 26#include <asm/uaccess.h>
26#include <asm/pgtable.h> 27#include <asm/pgtable.h>
27#include <asm/cacheflush.h> 28#include <asm/cacheflush.h>
@@ -59,8 +60,7 @@ static void change_pte_range(struct mm_struct *mm, pmd_t *pmd,
59 ptent = pte_mkwrite(ptent); 60 ptent = pte_mkwrite(ptent);
60 61
61 ptep_modify_prot_commit(mm, addr, pte, ptent); 62 ptep_modify_prot_commit(mm, addr, pte, ptent);
62#ifdef CONFIG_MIGRATION 63 } else if (PAGE_MIGRATION && !pte_file(oldpte)) {
63 } else if (!pte_file(oldpte)) {
64 swp_entry_t entry = pte_to_swp_entry(oldpte); 64 swp_entry_t entry = pte_to_swp_entry(oldpte);
65 65
66 if (is_write_migration_entry(entry)) { 66 if (is_write_migration_entry(entry)) {
@@ -72,9 +72,7 @@ static void change_pte_range(struct mm_struct *mm, pmd_t *pmd,
72 set_pte_at(mm, addr, pte, 72 set_pte_at(mm, addr, pte,
73 swp_entry_to_pte(entry)); 73 swp_entry_to_pte(entry));
74 } 74 }
75#endif
76 } 75 }
77
78 } while (pte++, addr += PAGE_SIZE, addr != end); 76 } while (pte++, addr += PAGE_SIZE, addr != end);
79 arch_leave_lazy_mmu_mode(); 77 arch_leave_lazy_mmu_mode();
80 pte_unmap_unlock(pte - 1, ptl); 78 pte_unmap_unlock(pte - 1, ptl);
diff --git a/mm/oom_kill.c b/mm/oom_kill.c
index 558f9afe6e4e..6b9e758c98a5 100644
--- a/mm/oom_kill.c
+++ b/mm/oom_kill.c
@@ -31,7 +31,7 @@
31int sysctl_panic_on_oom; 31int sysctl_panic_on_oom;
32int sysctl_oom_kill_allocating_task; 32int sysctl_oom_kill_allocating_task;
33int sysctl_oom_dump_tasks; 33int sysctl_oom_dump_tasks;
34static DEFINE_SPINLOCK(zone_scan_mutex); 34static DEFINE_SPINLOCK(zone_scan_lock);
35/* #define DEBUG */ 35/* #define DEBUG */
36 36
37/** 37/**
@@ -392,6 +392,9 @@ static int oom_kill_process(struct task_struct *p, gfp_t gfp_mask, int order,
392 printk(KERN_WARNING "%s invoked oom-killer: " 392 printk(KERN_WARNING "%s invoked oom-killer: "
393 "gfp_mask=0x%x, order=%d, oomkilladj=%d\n", 393 "gfp_mask=0x%x, order=%d, oomkilladj=%d\n",
394 current->comm, gfp_mask, order, current->oomkilladj); 394 current->comm, gfp_mask, order, current->oomkilladj);
395 task_lock(current);
396 cpuset_print_task_mems_allowed(current);
397 task_unlock(current);
395 dump_stack(); 398 dump_stack();
396 show_mem(); 399 show_mem();
397 if (sysctl_oom_dump_tasks) 400 if (sysctl_oom_dump_tasks)
@@ -470,7 +473,7 @@ int try_set_zone_oom(struct zonelist *zonelist, gfp_t gfp_mask)
470 struct zone *zone; 473 struct zone *zone;
471 int ret = 1; 474 int ret = 1;
472 475
473 spin_lock(&zone_scan_mutex); 476 spin_lock(&zone_scan_lock);
474 for_each_zone_zonelist(zone, z, zonelist, gfp_zone(gfp_mask)) { 477 for_each_zone_zonelist(zone, z, zonelist, gfp_zone(gfp_mask)) {
475 if (zone_is_oom_locked(zone)) { 478 if (zone_is_oom_locked(zone)) {
476 ret = 0; 479 ret = 0;
@@ -480,7 +483,7 @@ int try_set_zone_oom(struct zonelist *zonelist, gfp_t gfp_mask)
480 483
481 for_each_zone_zonelist(zone, z, zonelist, gfp_zone(gfp_mask)) { 484 for_each_zone_zonelist(zone, z, zonelist, gfp_zone(gfp_mask)) {
482 /* 485 /*
483 * Lock each zone in the zonelist under zone_scan_mutex so a 486 * Lock each zone in the zonelist under zone_scan_lock so a
484 * parallel invocation of try_set_zone_oom() doesn't succeed 487 * parallel invocation of try_set_zone_oom() doesn't succeed
485 * when it shouldn't. 488 * when it shouldn't.
486 */ 489 */
@@ -488,7 +491,7 @@ int try_set_zone_oom(struct zonelist *zonelist, gfp_t gfp_mask)
488 } 491 }
489 492
490out: 493out:
491 spin_unlock(&zone_scan_mutex); 494 spin_unlock(&zone_scan_lock);
492 return ret; 495 return ret;
493} 496}
494 497
@@ -502,11 +505,74 @@ void clear_zonelist_oom(struct zonelist *zonelist, gfp_t gfp_mask)
502 struct zoneref *z; 505 struct zoneref *z;
503 struct zone *zone; 506 struct zone *zone;
504 507
505 spin_lock(&zone_scan_mutex); 508 spin_lock(&zone_scan_lock);
506 for_each_zone_zonelist(zone, z, zonelist, gfp_zone(gfp_mask)) { 509 for_each_zone_zonelist(zone, z, zonelist, gfp_zone(gfp_mask)) {
507 zone_clear_flag(zone, ZONE_OOM_LOCKED); 510 zone_clear_flag(zone, ZONE_OOM_LOCKED);
508 } 511 }
509 spin_unlock(&zone_scan_mutex); 512 spin_unlock(&zone_scan_lock);
513}
514
515/*
516 * Must be called with tasklist_lock held for read.
517 */
518static void __out_of_memory(gfp_t gfp_mask, int order)
519{
520 if (sysctl_oom_kill_allocating_task) {
521 oom_kill_process(current, gfp_mask, order, 0, NULL,
522 "Out of memory (oom_kill_allocating_task)");
523
524 } else {
525 unsigned long points;
526 struct task_struct *p;
527
528retry:
529 /*
530 * Rambo mode: Shoot down a process and hope it solves whatever
531 * issues we may have.
532 */
533 p = select_bad_process(&points, NULL);
534
535 if (PTR_ERR(p) == -1UL)
536 return;
537
538 /* Found nothing?!?! Either we hang forever, or we panic. */
539 if (!p) {
540 read_unlock(&tasklist_lock);
541 panic("Out of memory and no killable processes...\n");
542 }
543
544 if (oom_kill_process(p, gfp_mask, order, points, NULL,
545 "Out of memory"))
546 goto retry;
547 }
548}
549
550/*
551 * pagefault handler calls into here because it is out of memory but
552 * doesn't know exactly how or why.
553 */
554void pagefault_out_of_memory(void)
555{
556 unsigned long freed = 0;
557
558 blocking_notifier_call_chain(&oom_notify_list, 0, &freed);
559 if (freed > 0)
560 /* Got some memory back in the last second. */
561 return;
562
563 if (sysctl_panic_on_oom)
564 panic("out of memory from page fault. panic_on_oom is selected.\n");
565
566 read_lock(&tasklist_lock);
567 __out_of_memory(0, 0); /* unknown gfp_mask and order */
568 read_unlock(&tasklist_lock);
569
570 /*
571 * Give "p" a good chance of killing itself before we
572 * retry to allocate memory.
573 */
574 if (!test_thread_flag(TIF_MEMDIE))
575 schedule_timeout_uninterruptible(1);
510} 576}
511 577
512/** 578/**
@@ -522,8 +588,6 @@ void clear_zonelist_oom(struct zonelist *zonelist, gfp_t gfp_mask)
522 */ 588 */
523void out_of_memory(struct zonelist *zonelist, gfp_t gfp_mask, int order) 589void out_of_memory(struct zonelist *zonelist, gfp_t gfp_mask, int order)
524{ 590{
525 struct task_struct *p;
526 unsigned long points = 0;
527 unsigned long freed = 0; 591 unsigned long freed = 0;
528 enum oom_constraint constraint; 592 enum oom_constraint constraint;
529 593
@@ -544,7 +608,7 @@ void out_of_memory(struct zonelist *zonelist, gfp_t gfp_mask, int order)
544 608
545 switch (constraint) { 609 switch (constraint) {
546 case CONSTRAINT_MEMORY_POLICY: 610 case CONSTRAINT_MEMORY_POLICY:
547 oom_kill_process(current, gfp_mask, order, points, NULL, 611 oom_kill_process(current, gfp_mask, order, 0, NULL,
548 "No available memory (MPOL_BIND)"); 612 "No available memory (MPOL_BIND)");
549 break; 613 break;
550 614
@@ -553,35 +617,10 @@ void out_of_memory(struct zonelist *zonelist, gfp_t gfp_mask, int order)
553 panic("out of memory. panic_on_oom is selected\n"); 617 panic("out of memory. panic_on_oom is selected\n");
554 /* Fall-through */ 618 /* Fall-through */
555 case CONSTRAINT_CPUSET: 619 case CONSTRAINT_CPUSET:
556 if (sysctl_oom_kill_allocating_task) { 620 __out_of_memory(gfp_mask, order);
557 oom_kill_process(current, gfp_mask, order, points, NULL,
558 "Out of memory (oom_kill_allocating_task)");
559 break;
560 }
561retry:
562 /*
563 * Rambo mode: Shoot down a process and hope it solves whatever
564 * issues we may have.
565 */
566 p = select_bad_process(&points, NULL);
567
568 if (PTR_ERR(p) == -1UL)
569 goto out;
570
571 /* Found nothing?!?! Either we hang forever, or we panic. */
572 if (!p) {
573 read_unlock(&tasklist_lock);
574 panic("Out of memory and no killable processes...\n");
575 }
576
577 if (oom_kill_process(p, gfp_mask, order, points, NULL,
578 "Out of memory"))
579 goto retry;
580
581 break; 621 break;
582 } 622 }
583 623
584out:
585 read_unlock(&tasklist_lock); 624 read_unlock(&tasklist_lock);
586 625
587 /* 626 /*
diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index 2970e35fd03f..b493db7841dc 100644
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -69,6 +69,12 @@ static inline long sync_writeback_pages(void)
69int dirty_background_ratio = 5; 69int dirty_background_ratio = 5;
70 70
71/* 71/*
72 * dirty_background_bytes starts at 0 (disabled) so that it is a function of
73 * dirty_background_ratio * the amount of dirtyable memory
74 */
75unsigned long dirty_background_bytes;
76
77/*
72 * free highmem will not be subtracted from the total free memory 78 * free highmem will not be subtracted from the total free memory
73 * for calculating free ratios if vm_highmem_is_dirtyable is true 79 * for calculating free ratios if vm_highmem_is_dirtyable is true
74 */ 80 */
@@ -80,6 +86,12 @@ int vm_highmem_is_dirtyable;
80int vm_dirty_ratio = 10; 86int vm_dirty_ratio = 10;
81 87
82/* 88/*
89 * vm_dirty_bytes starts at 0 (disabled) so that it is a function of
90 * vm_dirty_ratio * the amount of dirtyable memory
91 */
92unsigned long vm_dirty_bytes;
93
94/*
83 * The interval between `kupdate'-style writebacks, in jiffies 95 * The interval between `kupdate'-style writebacks, in jiffies
84 */ 96 */
85int dirty_writeback_interval = 5 * HZ; 97int dirty_writeback_interval = 5 * HZ;
@@ -135,23 +147,75 @@ static int calc_period_shift(void)
135{ 147{
136 unsigned long dirty_total; 148 unsigned long dirty_total;
137 149
138 dirty_total = (vm_dirty_ratio * determine_dirtyable_memory()) / 100; 150 if (vm_dirty_bytes)
151 dirty_total = vm_dirty_bytes / PAGE_SIZE;
152 else
153 dirty_total = (vm_dirty_ratio * determine_dirtyable_memory()) /
154 100;
139 return 2 + ilog2(dirty_total - 1); 155 return 2 + ilog2(dirty_total - 1);
140} 156}
141 157
142/* 158/*
143 * update the period when the dirty ratio changes. 159 * update the period when the dirty threshold changes.
144 */ 160 */
161static void update_completion_period(void)
162{
163 int shift = calc_period_shift();
164 prop_change_shift(&vm_completions, shift);
165 prop_change_shift(&vm_dirties, shift);
166}
167
168int dirty_background_ratio_handler(struct ctl_table *table, int write,
169 struct file *filp, void __user *buffer, size_t *lenp,
170 loff_t *ppos)
171{
172 int ret;
173
174 ret = proc_dointvec_minmax(table, write, filp, buffer, lenp, ppos);
175 if (ret == 0 && write)
176 dirty_background_bytes = 0;
177 return ret;
178}
179
180int dirty_background_bytes_handler(struct ctl_table *table, int write,
181 struct file *filp, void __user *buffer, size_t *lenp,
182 loff_t *ppos)
183{
184 int ret;
185
186 ret = proc_doulongvec_minmax(table, write, filp, buffer, lenp, ppos);
187 if (ret == 0 && write)
188 dirty_background_ratio = 0;
189 return ret;
190}
191
145int dirty_ratio_handler(struct ctl_table *table, int write, 192int dirty_ratio_handler(struct ctl_table *table, int write,
146 struct file *filp, void __user *buffer, size_t *lenp, 193 struct file *filp, void __user *buffer, size_t *lenp,
147 loff_t *ppos) 194 loff_t *ppos)
148{ 195{
149 int old_ratio = vm_dirty_ratio; 196 int old_ratio = vm_dirty_ratio;
150 int ret = proc_dointvec_minmax(table, write, filp, buffer, lenp, ppos); 197 int ret;
198
199 ret = proc_dointvec_minmax(table, write, filp, buffer, lenp, ppos);
151 if (ret == 0 && write && vm_dirty_ratio != old_ratio) { 200 if (ret == 0 && write && vm_dirty_ratio != old_ratio) {
152 int shift = calc_period_shift(); 201 update_completion_period();
153 prop_change_shift(&vm_completions, shift); 202 vm_dirty_bytes = 0;
154 prop_change_shift(&vm_dirties, shift); 203 }
204 return ret;
205}
206
207
208int dirty_bytes_handler(struct ctl_table *table, int write,
209 struct file *filp, void __user *buffer, size_t *lenp,
210 loff_t *ppos)
211{
212 int old_bytes = vm_dirty_bytes;
213 int ret;
214
215 ret = proc_doulongvec_minmax(table, write, filp, buffer, lenp, ppos);
216 if (ret == 0 && write && vm_dirty_bytes != old_bytes) {
217 update_completion_period();
218 vm_dirty_ratio = 0;
155 } 219 }
156 return ret; 220 return ret;
157} 221}
@@ -362,26 +426,32 @@ unsigned long determine_dirtyable_memory(void)
362} 426}
363 427
364void 428void
365get_dirty_limits(long *pbackground, long *pdirty, long *pbdi_dirty, 429get_dirty_limits(unsigned long *pbackground, unsigned long *pdirty,
366 struct backing_dev_info *bdi) 430 unsigned long *pbdi_dirty, struct backing_dev_info *bdi)
367{ 431{
368 int background_ratio; /* Percentages */ 432 unsigned long background;
369 int dirty_ratio; 433 unsigned long dirty;
370 long background;
371 long dirty;
372 unsigned long available_memory = determine_dirtyable_memory(); 434 unsigned long available_memory = determine_dirtyable_memory();
373 struct task_struct *tsk; 435 struct task_struct *tsk;
374 436
375 dirty_ratio = vm_dirty_ratio; 437 if (vm_dirty_bytes)
376 if (dirty_ratio < 5) 438 dirty = DIV_ROUND_UP(vm_dirty_bytes, PAGE_SIZE);
377 dirty_ratio = 5; 439 else {
440 int dirty_ratio;
378 441
379 background_ratio = dirty_background_ratio; 442 dirty_ratio = vm_dirty_ratio;
380 if (background_ratio >= dirty_ratio) 443 if (dirty_ratio < 5)
381 background_ratio = dirty_ratio / 2; 444 dirty_ratio = 5;
445 dirty = (dirty_ratio * available_memory) / 100;
446 }
447
448 if (dirty_background_bytes)
449 background = DIV_ROUND_UP(dirty_background_bytes, PAGE_SIZE);
450 else
451 background = (dirty_background_ratio * available_memory) / 100;
382 452
383 background = (background_ratio * available_memory) / 100; 453 if (background >= dirty)
384 dirty = (dirty_ratio * available_memory) / 100; 454 background = dirty / 2;
385 tsk = current; 455 tsk = current;
386 if (tsk->flags & PF_LESS_THROTTLE || rt_task(tsk)) { 456 if (tsk->flags & PF_LESS_THROTTLE || rt_task(tsk)) {
387 background += background / 4; 457 background += background / 4;
@@ -423,9 +493,9 @@ static void balance_dirty_pages(struct address_space *mapping)
423{ 493{
424 long nr_reclaimable, bdi_nr_reclaimable; 494 long nr_reclaimable, bdi_nr_reclaimable;
425 long nr_writeback, bdi_nr_writeback; 495 long nr_writeback, bdi_nr_writeback;
426 long background_thresh; 496 unsigned long background_thresh;
427 long dirty_thresh; 497 unsigned long dirty_thresh;
428 long bdi_thresh; 498 unsigned long bdi_thresh;
429 unsigned long pages_written = 0; 499 unsigned long pages_written = 0;
430 unsigned long write_chunk = sync_writeback_pages(); 500 unsigned long write_chunk = sync_writeback_pages();
431 501
@@ -580,8 +650,8 @@ EXPORT_SYMBOL(balance_dirty_pages_ratelimited_nr);
580 650
581void throttle_vm_writeout(gfp_t gfp_mask) 651void throttle_vm_writeout(gfp_t gfp_mask)
582{ 652{
583 long background_thresh; 653 unsigned long background_thresh;
584 long dirty_thresh; 654 unsigned long dirty_thresh;
585 655
586 for ( ; ; ) { 656 for ( ; ; ) {
587 get_dirty_limits(&background_thresh, &dirty_thresh, NULL, NULL); 657 get_dirty_limits(&background_thresh, &dirty_thresh, NULL, NULL);
@@ -624,8 +694,8 @@ static void background_writeout(unsigned long _min_pages)
624 }; 694 };
625 695
626 for ( ; ; ) { 696 for ( ; ; ) {
627 long background_thresh; 697 unsigned long background_thresh;
628 long dirty_thresh; 698 unsigned long dirty_thresh;
629 699
630 get_dirty_limits(&background_thresh, &dirty_thresh, NULL, NULL); 700 get_dirty_limits(&background_thresh, &dirty_thresh, NULL, NULL);
631 if (global_page_state(NR_FILE_DIRTY) + 701 if (global_page_state(NR_FILE_DIRTY) +
@@ -868,9 +938,11 @@ int write_cache_pages(struct address_space *mapping,
868 int done = 0; 938 int done = 0;
869 struct pagevec pvec; 939 struct pagevec pvec;
870 int nr_pages; 940 int nr_pages;
941 pgoff_t uninitialized_var(writeback_index);
871 pgoff_t index; 942 pgoff_t index;
872 pgoff_t end; /* Inclusive */ 943 pgoff_t end; /* Inclusive */
873 int scanned = 0; 944 pgoff_t done_index;
945 int cycled;
874 int range_whole = 0; 946 int range_whole = 0;
875 long nr_to_write = wbc->nr_to_write; 947 long nr_to_write = wbc->nr_to_write;
876 948
@@ -881,83 +953,134 @@ int write_cache_pages(struct address_space *mapping,
881 953
882 pagevec_init(&pvec, 0); 954 pagevec_init(&pvec, 0);
883 if (wbc->range_cyclic) { 955 if (wbc->range_cyclic) {
884 index = mapping->writeback_index; /* Start from prev offset */ 956 writeback_index = mapping->writeback_index; /* prev offset */
957 index = writeback_index;
958 if (index == 0)
959 cycled = 1;
960 else
961 cycled = 0;
885 end = -1; 962 end = -1;
886 } else { 963 } else {
887 index = wbc->range_start >> PAGE_CACHE_SHIFT; 964 index = wbc->range_start >> PAGE_CACHE_SHIFT;
888 end = wbc->range_end >> PAGE_CACHE_SHIFT; 965 end = wbc->range_end >> PAGE_CACHE_SHIFT;
889 if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX) 966 if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX)
890 range_whole = 1; 967 range_whole = 1;
891 scanned = 1; 968 cycled = 1; /* ignore range_cyclic tests */
892 } 969 }
893retry: 970retry:
894 while (!done && (index <= end) && 971 done_index = index;
895 (nr_pages = pagevec_lookup_tag(&pvec, mapping, &index, 972 while (!done && (index <= end)) {
896 PAGECACHE_TAG_DIRTY, 973 int i;
897 min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1))) { 974
898 unsigned i; 975 nr_pages = pagevec_lookup_tag(&pvec, mapping, &index,
976 PAGECACHE_TAG_DIRTY,
977 min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1);
978 if (nr_pages == 0)
979 break;
899 980
900 scanned = 1;
901 for (i = 0; i < nr_pages; i++) { 981 for (i = 0; i < nr_pages; i++) {
902 struct page *page = pvec.pages[i]; 982 struct page *page = pvec.pages[i];
903 983
904 /* 984 /*
905 * At this point we hold neither mapping->tree_lock nor 985 * At this point, the page may be truncated or
906 * lock on the page itself: the page may be truncated or 986 * invalidated (changing page->mapping to NULL), or
907 * invalidated (changing page->mapping to NULL), or even 987 * even swizzled back from swapper_space to tmpfs file
908 * swizzled back from swapper_space to tmpfs file 988 * mapping. However, page->index will not change
909 * mapping 989 * because we have a reference on the page.
910 */ 990 */
991 if (page->index > end) {
992 /*
993 * can't be range_cyclic (1st pass) because
994 * end == -1 in that case.
995 */
996 done = 1;
997 break;
998 }
999
1000 done_index = page->index + 1;
1001
911 lock_page(page); 1002 lock_page(page);
912 1003
1004 /*
1005 * Page truncated or invalidated. We can freely skip it
1006 * then, even for data integrity operations: the page
1007 * has disappeared concurrently, so there could be no
1008 * real expectation of this data interity operation
1009 * even if there is now a new, dirty page at the same
1010 * pagecache address.
1011 */
913 if (unlikely(page->mapping != mapping)) { 1012 if (unlikely(page->mapping != mapping)) {
1013continue_unlock:
914 unlock_page(page); 1014 unlock_page(page);
915 continue; 1015 continue;
916 } 1016 }
917 1017
918 if (!wbc->range_cyclic && page->index > end) { 1018 if (!PageDirty(page)) {
919 done = 1; 1019 /* someone wrote it for us */
920 unlock_page(page); 1020 goto continue_unlock;
921 continue;
922 } 1021 }
923 1022
924 if (wbc->sync_mode != WB_SYNC_NONE) 1023 if (PageWriteback(page)) {
925 wait_on_page_writeback(page); 1024 if (wbc->sync_mode != WB_SYNC_NONE)
926 1025 wait_on_page_writeback(page);
927 if (PageWriteback(page) || 1026 else
928 !clear_page_dirty_for_io(page)) { 1027 goto continue_unlock;
929 unlock_page(page);
930 continue;
931 } 1028 }
932 1029
933 ret = (*writepage)(page, wbc, data); 1030 BUG_ON(PageWriteback(page));
1031 if (!clear_page_dirty_for_io(page))
1032 goto continue_unlock;
934 1033
935 if (unlikely(ret == AOP_WRITEPAGE_ACTIVATE)) { 1034 ret = (*writepage)(page, wbc, data);
936 unlock_page(page); 1035 if (unlikely(ret)) {
937 ret = 0; 1036 if (ret == AOP_WRITEPAGE_ACTIVATE) {
1037 unlock_page(page);
1038 ret = 0;
1039 } else {
1040 /*
1041 * done_index is set past this page,
1042 * so media errors will not choke
1043 * background writeout for the entire
1044 * file. This has consequences for
1045 * range_cyclic semantics (ie. it may
1046 * not be suitable for data integrity
1047 * writeout).
1048 */
1049 done = 1;
1050 break;
1051 }
1052 }
1053
1054 if (wbc->sync_mode == WB_SYNC_NONE) {
1055 wbc->nr_to_write--;
1056 if (wbc->nr_to_write <= 0) {
1057 done = 1;
1058 break;
1059 }
938 } 1060 }
939 if (ret || (--nr_to_write <= 0))
940 done = 1;
941 if (wbc->nonblocking && bdi_write_congested(bdi)) { 1061 if (wbc->nonblocking && bdi_write_congested(bdi)) {
942 wbc->encountered_congestion = 1; 1062 wbc->encountered_congestion = 1;
943 done = 1; 1063 done = 1;
1064 break;
944 } 1065 }
945 } 1066 }
946 pagevec_release(&pvec); 1067 pagevec_release(&pvec);
947 cond_resched(); 1068 cond_resched();
948 } 1069 }
949 if (!scanned && !done) { 1070 if (!cycled) {
950 /* 1071 /*
1072 * range_cyclic:
951 * We hit the last page and there is more work to be done: wrap 1073 * We hit the last page and there is more work to be done: wrap
952 * back to the start of the file 1074 * back to the start of the file
953 */ 1075 */
954 scanned = 1; 1076 cycled = 1;
955 index = 0; 1077 index = 0;
1078 end = writeback_index - 1;
956 goto retry; 1079 goto retry;
957 } 1080 }
958 if (!wbc->no_nrwrite_index_update) { 1081 if (!wbc->no_nrwrite_index_update) {
959 if (wbc->range_cyclic || (range_whole && nr_to_write > 0)) 1082 if (wbc->range_cyclic || (range_whole && nr_to_write > 0))
960 mapping->writeback_index = index; 1083 mapping->writeback_index = done_index;
961 wbc->nr_to_write = nr_to_write; 1084 wbc->nr_to_write = nr_to_write;
962 } 1085 }
963 1086
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index d8ac01474563..7bf22e045318 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -69,7 +69,7 @@ EXPORT_SYMBOL(node_states);
69 69
70unsigned long totalram_pages __read_mostly; 70unsigned long totalram_pages __read_mostly;
71unsigned long totalreserve_pages __read_mostly; 71unsigned long totalreserve_pages __read_mostly;
72long nr_swap_pages; 72unsigned long highest_memmap_pfn __read_mostly;
73int percpu_pagelist_fraction; 73int percpu_pagelist_fraction;
74 74
75#ifdef CONFIG_HUGETLB_PAGE_SIZE_VARIABLE 75#ifdef CONFIG_HUGETLB_PAGE_SIZE_VARIABLE
@@ -223,19 +223,41 @@ static inline int bad_range(struct zone *zone, struct page *page)
223 223
224static void bad_page(struct page *page) 224static void bad_page(struct page *page)
225{ 225{
226 printk(KERN_EMERG "Bad page state in process '%s'\n" KERN_EMERG 226 static unsigned long resume;
227 "page:%p flags:0x%0*lx mapping:%p mapcount:%d count:%d\n", 227 static unsigned long nr_shown;
228 current->comm, page, (int)(2*sizeof(unsigned long)), 228 static unsigned long nr_unshown;
229 (unsigned long)page->flags, page->mapping, 229
230 page_mapcount(page), page_count(page)); 230 /*
231 * Allow a burst of 60 reports, then keep quiet for that minute;
232 * or allow a steady drip of one report per second.
233 */
234 if (nr_shown == 60) {
235 if (time_before(jiffies, resume)) {
236 nr_unshown++;
237 goto out;
238 }
239 if (nr_unshown) {
240 printk(KERN_ALERT
241 "BUG: Bad page state: %lu messages suppressed\n",
242 nr_unshown);
243 nr_unshown = 0;
244 }
245 nr_shown = 0;
246 }
247 if (nr_shown++ == 0)
248 resume = jiffies + 60 * HZ;
249
250 printk(KERN_ALERT "BUG: Bad page state in process %s pfn:%05lx\n",
251 current->comm, page_to_pfn(page));
252 printk(KERN_ALERT
253 "page:%p flags:%p count:%d mapcount:%d mapping:%p index:%lx\n",
254 page, (void *)page->flags, page_count(page),
255 page_mapcount(page), page->mapping, page->index);
231 256
232 printk(KERN_EMERG "Trying to fix it up, but a reboot is needed\n"
233 KERN_EMERG "Backtrace:\n");
234 dump_stack(); 257 dump_stack();
235 page->flags &= ~PAGE_FLAGS_CLEAR_WHEN_BAD; 258out:
236 set_page_count(page, 0); 259 /* Leave bad fields for debug, except PageBuddy could make trouble */
237 reset_page_mapcount(page); 260 __ClearPageBuddy(page);
238 page->mapping = NULL;
239 add_taint(TAINT_BAD_PAGE); 261 add_taint(TAINT_BAD_PAGE);
240} 262}
241 263
@@ -292,25 +314,31 @@ void prep_compound_gigantic_page(struct page *page, unsigned long order)
292} 314}
293#endif 315#endif
294 316
295static void destroy_compound_page(struct page *page, unsigned long order) 317static int destroy_compound_page(struct page *page, unsigned long order)
296{ 318{
297 int i; 319 int i;
298 int nr_pages = 1 << order; 320 int nr_pages = 1 << order;
321 int bad = 0;
299 322
300 if (unlikely(compound_order(page) != order)) 323 if (unlikely(compound_order(page) != order) ||
324 unlikely(!PageHead(page))) {
301 bad_page(page); 325 bad_page(page);
326 bad++;
327 }
302 328
303 if (unlikely(!PageHead(page)))
304 bad_page(page);
305 __ClearPageHead(page); 329 __ClearPageHead(page);
330
306 for (i = 1; i < nr_pages; i++) { 331 for (i = 1; i < nr_pages; i++) {
307 struct page *p = page + i; 332 struct page *p = page + i;
308 333
309 if (unlikely(!PageTail(p) | 334 if (unlikely(!PageTail(p) | (p->first_page != page))) {
310 (p->first_page != page)))
311 bad_page(page); 335 bad_page(page);
336 bad++;
337 }
312 __ClearPageTail(p); 338 __ClearPageTail(p);
313 } 339 }
340
341 return bad;
314} 342}
315 343
316static inline void prep_zero_page(struct page *page, int order, gfp_t gfp_flags) 344static inline void prep_zero_page(struct page *page, int order, gfp_t gfp_flags)
@@ -430,7 +458,8 @@ static inline void __free_one_page(struct page *page,
430 int migratetype = get_pageblock_migratetype(page); 458 int migratetype = get_pageblock_migratetype(page);
431 459
432 if (unlikely(PageCompound(page))) 460 if (unlikely(PageCompound(page)))
433 destroy_compound_page(page, order); 461 if (unlikely(destroy_compound_page(page, order)))
462 return;
434 463
435 page_idx = page_to_pfn(page) & ((1 << MAX_ORDER) - 1); 464 page_idx = page_to_pfn(page) & ((1 << MAX_ORDER) - 1);
436 465
@@ -467,18 +496,13 @@ static inline int free_pages_check(struct page *page)
467 if (unlikely(page_mapcount(page) | 496 if (unlikely(page_mapcount(page) |
468 (page->mapping != NULL) | 497 (page->mapping != NULL) |
469 (page_count(page) != 0) | 498 (page_count(page) != 0) |
470 (page->flags & PAGE_FLAGS_CHECK_AT_FREE))) 499 (page->flags & PAGE_FLAGS_CHECK_AT_FREE))) {
471 bad_page(page); 500 bad_page(page);
472 if (PageDirty(page)) 501 return 1;
473 __ClearPageDirty(page); 502 }
474 if (PageSwapBacked(page)) 503 if (page->flags & PAGE_FLAGS_CHECK_AT_PREP)
475 __ClearPageSwapBacked(page); 504 page->flags &= ~PAGE_FLAGS_CHECK_AT_PREP;
476 /* 505 return 0;
477 * For now, we report if PG_reserved was found set, but do not
478 * clear it, and do not free the page. But we shall soon need
479 * to do more, for when the ZERO_PAGE count wraps negative.
480 */
481 return PageReserved(page);
482} 506}
483 507
484/* 508/*
@@ -523,11 +547,11 @@ static void __free_pages_ok(struct page *page, unsigned int order)
523{ 547{
524 unsigned long flags; 548 unsigned long flags;
525 int i; 549 int i;
526 int reserved = 0; 550 int bad = 0;
527 551
528 for (i = 0 ; i < (1 << order) ; ++i) 552 for (i = 0 ; i < (1 << order) ; ++i)
529 reserved += free_pages_check(page + i); 553 bad += free_pages_check(page + i);
530 if (reserved) 554 if (bad)
531 return; 555 return;
532 556
533 if (!PageHighMem(page)) { 557 if (!PageHighMem(page)) {
@@ -612,23 +636,11 @@ static int prep_new_page(struct page *page, int order, gfp_t gfp_flags)
612 if (unlikely(page_mapcount(page) | 636 if (unlikely(page_mapcount(page) |
613 (page->mapping != NULL) | 637 (page->mapping != NULL) |
614 (page_count(page) != 0) | 638 (page_count(page) != 0) |
615 (page->flags & PAGE_FLAGS_CHECK_AT_PREP))) 639 (page->flags & PAGE_FLAGS_CHECK_AT_PREP))) {
616 bad_page(page); 640 bad_page(page);
617
618 /*
619 * For now, we report if PG_reserved was found set, but do not
620 * clear it, and do not allocate the page: as a safety net.
621 */
622 if (PageReserved(page))
623 return 1; 641 return 1;
642 }
624 643
625 page->flags &= ~(1 << PG_uptodate | 1 << PG_error | 1 << PG_reclaim |
626 1 << PG_referenced | 1 << PG_arch_1 |
627 1 << PG_owner_priv_1 | 1 << PG_mappedtodisk
628#ifdef CONFIG_UNEVICTABLE_LRU
629 | 1 << PG_mlocked
630#endif
631 );
632 set_page_private(page, 0); 644 set_page_private(page, 0);
633 set_page_refcounted(page); 645 set_page_refcounted(page);
634 646
@@ -2609,6 +2621,9 @@ void __meminit memmap_init_zone(unsigned long size, int nid, unsigned long zone,
2609 unsigned long pfn; 2621 unsigned long pfn;
2610 struct zone *z; 2622 struct zone *z;
2611 2623
2624 if (highest_memmap_pfn < end_pfn - 1)
2625 highest_memmap_pfn = end_pfn - 1;
2626
2612 z = &NODE_DATA(nid)->node_zones[zone]; 2627 z = &NODE_DATA(nid)->node_zones[zone];
2613 for (pfn = start_pfn; pfn < end_pfn; pfn++) { 2628 for (pfn = start_pfn; pfn < end_pfn; pfn++) {
2614 /* 2629 /*
@@ -3381,10 +3396,8 @@ static void __init setup_usemap(struct pglist_data *pgdat,
3381{ 3396{
3382 unsigned long usemapsize = usemap_size(zonesize); 3397 unsigned long usemapsize = usemap_size(zonesize);
3383 zone->pageblock_flags = NULL; 3398 zone->pageblock_flags = NULL;
3384 if (usemapsize) { 3399 if (usemapsize)
3385 zone->pageblock_flags = alloc_bootmem_node(pgdat, usemapsize); 3400 zone->pageblock_flags = alloc_bootmem_node(pgdat, usemapsize);
3386 memset(zone->pageblock_flags, 0, usemapsize);
3387 }
3388} 3401}
3389#else 3402#else
3390static void inline setup_usemap(struct pglist_data *pgdat, 3403static void inline setup_usemap(struct pglist_data *pgdat,
@@ -3469,9 +3482,10 @@ static void __paginginit free_area_init_core(struct pglist_data *pgdat,
3469 PAGE_ALIGN(size * sizeof(struct page)) >> PAGE_SHIFT; 3482 PAGE_ALIGN(size * sizeof(struct page)) >> PAGE_SHIFT;
3470 if (realsize >= memmap_pages) { 3483 if (realsize >= memmap_pages) {
3471 realsize -= memmap_pages; 3484 realsize -= memmap_pages;
3472 printk(KERN_DEBUG 3485 if (memmap_pages)
3473 " %s zone: %lu pages used for memmap\n", 3486 printk(KERN_DEBUG
3474 zone_names[j], memmap_pages); 3487 " %s zone: %lu pages used for memmap\n",
3488 zone_names[j], memmap_pages);
3475 } else 3489 } else
3476 printk(KERN_WARNING 3490 printk(KERN_WARNING
3477 " %s zone: %lu pages exceeds realsize %lu\n", 3491 " %s zone: %lu pages exceeds realsize %lu\n",
@@ -4316,7 +4330,7 @@ void setup_per_zone_pages_min(void)
4316 * 1TB 101 10GB 4330 * 1TB 101 10GB
4317 * 10TB 320 32GB 4331 * 10TB 320 32GB
4318 */ 4332 */
4319void setup_per_zone_inactive_ratio(void) 4333static void setup_per_zone_inactive_ratio(void)
4320{ 4334{
4321 struct zone *zone; 4335 struct zone *zone;
4322 4336
@@ -4573,19 +4587,6 @@ void *__init alloc_large_system_hash(const char *tablename,
4573 return table; 4587 return table;
4574} 4588}
4575 4589
4576#ifdef CONFIG_OUT_OF_LINE_PFN_TO_PAGE
4577struct page *pfn_to_page(unsigned long pfn)
4578{
4579 return __pfn_to_page(pfn);
4580}
4581unsigned long page_to_pfn(struct page *page)
4582{
4583 return __page_to_pfn(page);
4584}
4585EXPORT_SYMBOL(pfn_to_page);
4586EXPORT_SYMBOL(page_to_pfn);
4587#endif /* CONFIG_OUT_OF_LINE_PFN_TO_PAGE */
4588
4589/* Return a pointer to the bitmap storing bits affecting a block of pages */ 4590/* Return a pointer to the bitmap storing bits affecting a block of pages */
4590static inline unsigned long *get_pageblock_bitmap(struct zone *zone, 4591static inline unsigned long *get_pageblock_bitmap(struct zone *zone,
4591 unsigned long pfn) 4592 unsigned long pfn)
diff --git a/mm/page_cgroup.c b/mm/page_cgroup.c
index ab27ff750519..d6507a660ed6 100644
--- a/mm/page_cgroup.c
+++ b/mm/page_cgroup.c
@@ -101,7 +101,7 @@ struct page_cgroup *lookup_page_cgroup(struct page *page)
101} 101}
102 102
103/* __alloc_bootmem...() is protected by !slab_available() */ 103/* __alloc_bootmem...() is protected by !slab_available() */
104int __init_refok init_section_page_cgroup(unsigned long pfn) 104static int __init_refok init_section_page_cgroup(unsigned long pfn)
105{ 105{
106 struct mem_section *section; 106 struct mem_section *section;
107 struct page_cgroup *base, *pc; 107 struct page_cgroup *base, *pc;
diff --git a/mm/page_io.c b/mm/page_io.c
index 065c4480eaf0..dc6ce0afbded 100644
--- a/mm/page_io.c
+++ b/mm/page_io.c
@@ -98,7 +98,7 @@ int swap_writepage(struct page *page, struct writeback_control *wbc)
98 struct bio *bio; 98 struct bio *bio;
99 int ret = 0, rw = WRITE; 99 int ret = 0, rw = WRITE;
100 100
101 if (remove_exclusive_swap_page(page)) { 101 if (try_to_free_swap(page)) {
102 unlock_page(page); 102 unlock_page(page);
103 goto out; 103 goto out;
104 } 104 }
@@ -125,8 +125,8 @@ int swap_readpage(struct file *file, struct page *page)
125 struct bio *bio; 125 struct bio *bio;
126 int ret = 0; 126 int ret = 0;
127 127
128 BUG_ON(!PageLocked(page)); 128 VM_BUG_ON(!PageLocked(page));
129 BUG_ON(PageUptodate(page)); 129 VM_BUG_ON(PageUptodate(page));
130 bio = get_swap_bio(GFP_KERNEL, page_private(page), page, 130 bio = get_swap_bio(GFP_KERNEL, page_private(page), page,
131 end_swap_bio_read); 131 end_swap_bio_read);
132 if (bio == NULL) { 132 if (bio == NULL) {
diff --git a/mm/rmap.c b/mm/rmap.c
index 10993942d6c9..ac4af8cffbf9 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -47,9 +47,9 @@
47#include <linux/rmap.h> 47#include <linux/rmap.h>
48#include <linux/rcupdate.h> 48#include <linux/rcupdate.h>
49#include <linux/module.h> 49#include <linux/module.h>
50#include <linux/kallsyms.h>
51#include <linux/memcontrol.h> 50#include <linux/memcontrol.h>
52#include <linux/mmu_notifier.h> 51#include <linux/mmu_notifier.h>
52#include <linux/migrate.h>
53 53
54#include <asm/tlbflush.h> 54#include <asm/tlbflush.h>
55 55
@@ -191,7 +191,7 @@ void __init anon_vma_init(void)
191 * Getting a lock on a stable anon_vma from a page off the LRU is 191 * Getting a lock on a stable anon_vma from a page off the LRU is
192 * tricky: page_lock_anon_vma rely on RCU to guard against the races. 192 * tricky: page_lock_anon_vma rely on RCU to guard against the races.
193 */ 193 */
194struct anon_vma *page_lock_anon_vma(struct page *page) 194static struct anon_vma *page_lock_anon_vma(struct page *page)
195{ 195{
196 struct anon_vma *anon_vma; 196 struct anon_vma *anon_vma;
197 unsigned long anon_mapping; 197 unsigned long anon_mapping;
@@ -211,7 +211,7 @@ out:
211 return NULL; 211 return NULL;
212} 212}
213 213
214void page_unlock_anon_vma(struct anon_vma *anon_vma) 214static void page_unlock_anon_vma(struct anon_vma *anon_vma)
215{ 215{
216 spin_unlock(&anon_vma->lock); 216 spin_unlock(&anon_vma->lock);
217 rcu_read_unlock(); 217 rcu_read_unlock();
@@ -359,8 +359,17 @@ static int page_referenced_one(struct page *page,
359 goto out_unmap; 359 goto out_unmap;
360 } 360 }
361 361
362 if (ptep_clear_flush_young_notify(vma, address, pte)) 362 if (ptep_clear_flush_young_notify(vma, address, pte)) {
363 referenced++; 363 /*
364 * Don't treat a reference through a sequentially read
365 * mapping as such. If the page has been used in
366 * another mapping, we will catch it; if this other
367 * mapping is already gone, the unmap path will have
368 * set PG_referenced or activated the page.
369 */
370 if (likely(!VM_SequentialReadHint(vma)))
371 referenced++;
372 }
364 373
365 /* Pretend the page is referenced if the task has the 374 /* Pretend the page is referenced if the task has the
366 swap token and is in the middle of a page fault. */ 375 swap token and is in the middle of a page fault. */
@@ -661,9 +670,14 @@ void page_add_anon_rmap(struct page *page,
661void page_add_new_anon_rmap(struct page *page, 670void page_add_new_anon_rmap(struct page *page,
662 struct vm_area_struct *vma, unsigned long address) 671 struct vm_area_struct *vma, unsigned long address)
663{ 672{
664 BUG_ON(address < vma->vm_start || address >= vma->vm_end); 673 VM_BUG_ON(address < vma->vm_start || address >= vma->vm_end);
665 atomic_set(&page->_mapcount, 0); /* elevate count by 1 (starts at -1) */ 674 SetPageSwapBacked(page);
675 atomic_set(&page->_mapcount, 0); /* increment count (starts at -1) */
666 __page_set_anon_rmap(page, vma, address); 676 __page_set_anon_rmap(page, vma, address);
677 if (page_evictable(page, vma))
678 lru_cache_add_lru(page, LRU_ACTIVE_ANON);
679 else
680 add_page_to_unevictable_list(page);
667} 681}
668 682
669/** 683/**
@@ -693,7 +707,6 @@ void page_add_file_rmap(struct page *page)
693 */ 707 */
694void page_dup_rmap(struct page *page, struct vm_area_struct *vma, unsigned long address) 708void page_dup_rmap(struct page *page, struct vm_area_struct *vma, unsigned long address)
695{ 709{
696 BUG_ON(page_mapcount(page) == 0);
697 if (PageAnon(page)) 710 if (PageAnon(page))
698 __page_check_anon_rmap(page, vma, address); 711 __page_check_anon_rmap(page, vma, address);
699 atomic_inc(&page->_mapcount); 712 atomic_inc(&page->_mapcount);
@@ -703,28 +716,12 @@ void page_dup_rmap(struct page *page, struct vm_area_struct *vma, unsigned long
703/** 716/**
704 * page_remove_rmap - take down pte mapping from a page 717 * page_remove_rmap - take down pte mapping from a page
705 * @page: page to remove mapping from 718 * @page: page to remove mapping from
706 * @vma: the vm area in which the mapping is removed
707 * 719 *
708 * The caller needs to hold the pte lock. 720 * The caller needs to hold the pte lock.
709 */ 721 */
710void page_remove_rmap(struct page *page, struct vm_area_struct *vma) 722void page_remove_rmap(struct page *page)
711{ 723{
712 if (atomic_add_negative(-1, &page->_mapcount)) { 724 if (atomic_add_negative(-1, &page->_mapcount)) {
713 if (unlikely(page_mapcount(page) < 0)) {
714 printk (KERN_EMERG "Eeek! page_mapcount(page) went negative! (%d)\n", page_mapcount(page));
715 printk (KERN_EMERG " page pfn = %lx\n", page_to_pfn(page));
716 printk (KERN_EMERG " page->flags = %lx\n", page->flags);
717 printk (KERN_EMERG " page->count = %x\n", page_count(page));
718 printk (KERN_EMERG " page->mapping = %p\n", page->mapping);
719 print_symbol (KERN_EMERG " vma->vm_ops = %s\n", (unsigned long)vma->vm_ops);
720 if (vma->vm_ops) {
721 print_symbol (KERN_EMERG " vma->vm_ops->fault = %s\n", (unsigned long)vma->vm_ops->fault);
722 }
723 if (vma->vm_file && vma->vm_file->f_op)
724 print_symbol (KERN_EMERG " vma->vm_file->f_op->mmap = %s\n", (unsigned long)vma->vm_file->f_op->mmap);
725 BUG();
726 }
727
728 /* 725 /*
729 * Now that the last pte has gone, s390 must transfer dirty 726 * Now that the last pte has gone, s390 must transfer dirty
730 * flag from storage key to struct page. We can usually skip 727 * flag from storage key to struct page. We can usually skip
@@ -818,8 +815,7 @@ static int try_to_unmap_one(struct page *page, struct vm_area_struct *vma,
818 spin_unlock(&mmlist_lock); 815 spin_unlock(&mmlist_lock);
819 } 816 }
820 dec_mm_counter(mm, anon_rss); 817 dec_mm_counter(mm, anon_rss);
821#ifdef CONFIG_MIGRATION 818 } else if (PAGE_MIGRATION) {
822 } else {
823 /* 819 /*
824 * Store the pfn of the page in a special migration 820 * Store the pfn of the page in a special migration
825 * pte. do_swap_page() will wait until the migration 821 * pte. do_swap_page() will wait until the migration
@@ -827,23 +823,19 @@ static int try_to_unmap_one(struct page *page, struct vm_area_struct *vma,
827 */ 823 */
828 BUG_ON(!migration); 824 BUG_ON(!migration);
829 entry = make_migration_entry(page, pte_write(pteval)); 825 entry = make_migration_entry(page, pte_write(pteval));
830#endif
831 } 826 }
832 set_pte_at(mm, address, pte, swp_entry_to_pte(entry)); 827 set_pte_at(mm, address, pte, swp_entry_to_pte(entry));
833 BUG_ON(pte_file(*pte)); 828 BUG_ON(pte_file(*pte));
834 } else 829 } else if (PAGE_MIGRATION && migration) {
835#ifdef CONFIG_MIGRATION
836 if (migration) {
837 /* Establish migration entry for a file page */ 830 /* Establish migration entry for a file page */
838 swp_entry_t entry; 831 swp_entry_t entry;
839 entry = make_migration_entry(page, pte_write(pteval)); 832 entry = make_migration_entry(page, pte_write(pteval));
840 set_pte_at(mm, address, pte, swp_entry_to_pte(entry)); 833 set_pte_at(mm, address, pte, swp_entry_to_pte(entry));
841 } else 834 } else
842#endif
843 dec_mm_counter(mm, file_rss); 835 dec_mm_counter(mm, file_rss);
844 836
845 837
846 page_remove_rmap(page, vma); 838 page_remove_rmap(page);
847 page_cache_release(page); 839 page_cache_release(page);
848 840
849out_unmap: 841out_unmap:
@@ -958,7 +950,7 @@ static int try_to_unmap_cluster(unsigned long cursor, unsigned int *mapcount,
958 if (pte_dirty(pteval)) 950 if (pte_dirty(pteval))
959 set_page_dirty(page); 951 set_page_dirty(page);
960 952
961 page_remove_rmap(page, vma); 953 page_remove_rmap(page);
962 page_cache_release(page); 954 page_cache_release(page);
963 dec_mm_counter(mm, file_rss); 955 dec_mm_counter(mm, file_rss);
964 (*mapcount)--; 956 (*mapcount)--;
diff --git a/mm/shmem.c b/mm/shmem.c
index f1b0d4871f3a..5941f9801363 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -14,31 +14,39 @@
14 * Copyright (c) 2004, Luke Kenneth Casson Leighton <lkcl@lkcl.net> 14 * Copyright (c) 2004, Luke Kenneth Casson Leighton <lkcl@lkcl.net>
15 * Copyright (c) 2004 Red Hat, Inc., James Morris <jmorris@redhat.com> 15 * Copyright (c) 2004 Red Hat, Inc., James Morris <jmorris@redhat.com>
16 * 16 *
17 * tiny-shmem:
18 * Copyright (c) 2004, 2008 Matt Mackall <mpm@selenic.com>
19 *
17 * This file is released under the GPL. 20 * This file is released under the GPL.
18 */ 21 */
19 22
23#include <linux/fs.h>
24#include <linux/init.h>
25#include <linux/vfs.h>
26#include <linux/mount.h>
27#include <linux/file.h>
28#include <linux/mm.h>
29#include <linux/module.h>
30#include <linux/swap.h>
31
32static struct vfsmount *shm_mnt;
33
34#ifdef CONFIG_SHMEM
20/* 35/*
21 * This virtual memory filesystem is heavily based on the ramfs. It 36 * This virtual memory filesystem is heavily based on the ramfs. It
22 * extends ramfs by the ability to use swap and honor resource limits 37 * extends ramfs by the ability to use swap and honor resource limits
23 * which makes it a completely usable filesystem. 38 * which makes it a completely usable filesystem.
24 */ 39 */
25 40
26#include <linux/module.h>
27#include <linux/init.h>
28#include <linux/fs.h>
29#include <linux/xattr.h> 41#include <linux/xattr.h>
30#include <linux/exportfs.h> 42#include <linux/exportfs.h>
31#include <linux/generic_acl.h> 43#include <linux/generic_acl.h>
32#include <linux/mm.h>
33#include <linux/mman.h> 44#include <linux/mman.h>
34#include <linux/file.h>
35#include <linux/swap.h>
36#include <linux/pagemap.h> 45#include <linux/pagemap.h>
37#include <linux/string.h> 46#include <linux/string.h>
38#include <linux/slab.h> 47#include <linux/slab.h>
39#include <linux/backing-dev.h> 48#include <linux/backing-dev.h>
40#include <linux/shmem_fs.h> 49#include <linux/shmem_fs.h>
41#include <linux/mount.h>
42#include <linux/writeback.h> 50#include <linux/writeback.h>
43#include <linux/vfs.h> 51#include <linux/vfs.h>
44#include <linux/blkdev.h> 52#include <linux/blkdev.h>
@@ -1444,7 +1452,6 @@ static int shmem_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
1444 if (error) 1452 if (error)
1445 return ((error == -ENOMEM) ? VM_FAULT_OOM : VM_FAULT_SIGBUS); 1453 return ((error == -ENOMEM) ? VM_FAULT_OOM : VM_FAULT_SIGBUS);
1446 1454
1447 mark_page_accessed(vmf->page);
1448 return ret | VM_FAULT_LOCKED; 1455 return ret | VM_FAULT_LOCKED;
1449} 1456}
1450 1457
@@ -2486,7 +2493,6 @@ static struct file_system_type tmpfs_fs_type = {
2486 .get_sb = shmem_get_sb, 2493 .get_sb = shmem_get_sb,
2487 .kill_sb = kill_litter_super, 2494 .kill_sb = kill_litter_super,
2488}; 2495};
2489static struct vfsmount *shm_mnt;
2490 2496
2491static int __init init_tmpfs(void) 2497static int __init init_tmpfs(void)
2492{ 2498{
@@ -2525,7 +2531,51 @@ out4:
2525 shm_mnt = ERR_PTR(error); 2531 shm_mnt = ERR_PTR(error);
2526 return error; 2532 return error;
2527} 2533}
2528module_init(init_tmpfs) 2534
2535#else /* !CONFIG_SHMEM */
2536
2537/*
2538 * tiny-shmem: simple shmemfs and tmpfs using ramfs code
2539 *
2540 * This is intended for small system where the benefits of the full
2541 * shmem code (swap-backed and resource-limited) are outweighed by
2542 * their complexity. On systems without swap this code should be
2543 * effectively equivalent, but much lighter weight.
2544 */
2545
2546#include <linux/ramfs.h>
2547
2548static struct file_system_type tmpfs_fs_type = {
2549 .name = "tmpfs",
2550 .get_sb = ramfs_get_sb,
2551 .kill_sb = kill_litter_super,
2552};
2553
2554static int __init init_tmpfs(void)
2555{
2556 BUG_ON(register_filesystem(&tmpfs_fs_type) != 0);
2557
2558 shm_mnt = kern_mount(&tmpfs_fs_type);
2559 BUG_ON(IS_ERR(shm_mnt));
2560
2561 return 0;
2562}
2563
2564int shmem_unuse(swp_entry_t entry, struct page *page)
2565{
2566 return 0;
2567}
2568
2569#define shmem_file_operations ramfs_file_operations
2570#define shmem_vm_ops generic_file_vm_ops
2571#define shmem_get_inode ramfs_get_inode
2572#define shmem_acct_size(a, b) 0
2573#define shmem_unacct_size(a, b) do {} while (0)
2574#define SHMEM_MAX_BYTES LLONG_MAX
2575
2576#endif /* CONFIG_SHMEM */
2577
2578/* common code */
2529 2579
2530/** 2580/**
2531 * shmem_file_setup - get an unlinked file living in tmpfs 2581 * shmem_file_setup - get an unlinked file living in tmpfs
@@ -2569,12 +2619,20 @@ struct file *shmem_file_setup(char *name, loff_t size, unsigned long flags)
2569 if (!inode) 2619 if (!inode)
2570 goto close_file; 2620 goto close_file;
2571 2621
2622#ifdef CONFIG_SHMEM
2572 SHMEM_I(inode)->flags = flags & VM_ACCOUNT; 2623 SHMEM_I(inode)->flags = flags & VM_ACCOUNT;
2624#endif
2573 d_instantiate(dentry, inode); 2625 d_instantiate(dentry, inode);
2574 inode->i_size = size; 2626 inode->i_size = size;
2575 inode->i_nlink = 0; /* It is unlinked */ 2627 inode->i_nlink = 0; /* It is unlinked */
2576 init_file(file, shm_mnt, dentry, FMODE_WRITE | FMODE_READ, 2628 init_file(file, shm_mnt, dentry, FMODE_WRITE | FMODE_READ,
2577 &shmem_file_operations); 2629 &shmem_file_operations);
2630
2631#ifndef CONFIG_MMU
2632 error = ramfs_nommu_expand_for_mapping(inode, size);
2633 if (error)
2634 goto close_file;
2635#endif
2578 return file; 2636 return file;
2579 2637
2580close_file: 2638close_file:
@@ -2606,3 +2664,5 @@ int shmem_zero_setup(struct vm_area_struct *vma)
2606 vma->vm_ops = &shmem_vm_ops; 2664 vma->vm_ops = &shmem_vm_ops;
2607 return 0; 2665 return 0;
2608} 2666}
2667
2668module_init(init_tmpfs)
diff --git a/mm/swap.c b/mm/swap.c
index b135ec90cdeb..ba2c0e8b8b54 100644
--- a/mm/swap.c
+++ b/mm/swap.c
@@ -246,25 +246,6 @@ void add_page_to_unevictable_list(struct page *page)
246 spin_unlock_irq(&zone->lru_lock); 246 spin_unlock_irq(&zone->lru_lock);
247} 247}
248 248
249/**
250 * lru_cache_add_active_or_unevictable
251 * @page: the page to be added to LRU
252 * @vma: vma in which page is mapped for determining reclaimability
253 *
254 * place @page on active or unevictable LRU list, depending on
255 * page_evictable(). Note that if the page is not evictable,
256 * it goes directly back onto it's zone's unevictable list. It does
257 * NOT use a per cpu pagevec.
258 */
259void lru_cache_add_active_or_unevictable(struct page *page,
260 struct vm_area_struct *vma)
261{
262 if (page_evictable(page, vma))
263 lru_cache_add_lru(page, LRU_ACTIVE + page_is_file_cache(page));
264 else
265 add_page_to_unevictable_list(page);
266}
267
268/* 249/*
269 * Drain pages out of the cpu's pagevecs. 250 * Drain pages out of the cpu's pagevecs.
270 * Either "cpu" is the current CPU, and preemption has already been 251 * Either "cpu" is the current CPU, and preemption has already been
@@ -398,28 +379,6 @@ void __pagevec_release(struct pagevec *pvec)
398EXPORT_SYMBOL(__pagevec_release); 379EXPORT_SYMBOL(__pagevec_release);
399 380
400/* 381/*
401 * pagevec_release() for pages which are known to not be on the LRU
402 *
403 * This function reinitialises the caller's pagevec.
404 */
405void __pagevec_release_nonlru(struct pagevec *pvec)
406{
407 int i;
408 struct pagevec pages_to_free;
409
410 pagevec_init(&pages_to_free, pvec->cold);
411 for (i = 0; i < pagevec_count(pvec); i++) {
412 struct page *page = pvec->pages[i];
413
414 VM_BUG_ON(PageLRU(page));
415 if (put_page_testzero(page))
416 pagevec_add(&pages_to_free, page);
417 }
418 pagevec_free(&pages_to_free);
419 pagevec_reinit(pvec);
420}
421
422/*
423 * Add the passed pages to the LRU, then drop the caller's refcount 382 * Add the passed pages to the LRU, then drop the caller's refcount
424 * on them. Reinitialises the caller's pagevec. 383 * on them. Reinitialises the caller's pagevec.
425 */ 384 */
@@ -495,8 +454,7 @@ void pagevec_swap_free(struct pagevec *pvec)
495 struct page *page = pvec->pages[i]; 454 struct page *page = pvec->pages[i];
496 455
497 if (PageSwapCache(page) && trylock_page(page)) { 456 if (PageSwapCache(page) && trylock_page(page)) {
498 if (PageSwapCache(page)) 457 try_to_free_swap(page);
499 remove_exclusive_swap_page_ref(page);
500 unlock_page(page); 458 unlock_page(page);
501 } 459 }
502 } 460 }
diff --git a/mm/swap_state.c b/mm/swap_state.c
index 3353c9029cef..81c825f67a7f 100644
--- a/mm/swap_state.c
+++ b/mm/swap_state.c
@@ -72,10 +72,10 @@ int add_to_swap_cache(struct page *page, swp_entry_t entry, gfp_t gfp_mask)
72{ 72{
73 int error; 73 int error;
74 74
75 BUG_ON(!PageLocked(page)); 75 VM_BUG_ON(!PageLocked(page));
76 BUG_ON(PageSwapCache(page)); 76 VM_BUG_ON(PageSwapCache(page));
77 BUG_ON(PagePrivate(page)); 77 VM_BUG_ON(!PageSwapBacked(page));
78 BUG_ON(!PageSwapBacked(page)); 78
79 error = radix_tree_preload(gfp_mask); 79 error = radix_tree_preload(gfp_mask);
80 if (!error) { 80 if (!error) {
81 page_cache_get(page); 81 page_cache_get(page);
@@ -108,10 +108,9 @@ int add_to_swap_cache(struct page *page, swp_entry_t entry, gfp_t gfp_mask)
108 */ 108 */
109void __delete_from_swap_cache(struct page *page) 109void __delete_from_swap_cache(struct page *page)
110{ 110{
111 BUG_ON(!PageLocked(page)); 111 VM_BUG_ON(!PageLocked(page));
112 BUG_ON(!PageSwapCache(page)); 112 VM_BUG_ON(!PageSwapCache(page));
113 BUG_ON(PageWriteback(page)); 113 VM_BUG_ON(PageWriteback(page));
114 BUG_ON(PagePrivate(page));
115 114
116 radix_tree_delete(&swapper_space.page_tree, page_private(page)); 115 radix_tree_delete(&swapper_space.page_tree, page_private(page));
117 set_page_private(page, 0); 116 set_page_private(page, 0);
@@ -129,13 +128,13 @@ void __delete_from_swap_cache(struct page *page)
129 * Allocate swap space for the page and add the page to the 128 * Allocate swap space for the page and add the page to the
130 * swap cache. Caller needs to hold the page lock. 129 * swap cache. Caller needs to hold the page lock.
131 */ 130 */
132int add_to_swap(struct page * page, gfp_t gfp_mask) 131int add_to_swap(struct page *page)
133{ 132{
134 swp_entry_t entry; 133 swp_entry_t entry;
135 int err; 134 int err;
136 135
137 BUG_ON(!PageLocked(page)); 136 VM_BUG_ON(!PageLocked(page));
138 BUG_ON(!PageUptodate(page)); 137 VM_BUG_ON(!PageUptodate(page));
139 138
140 for (;;) { 139 for (;;) {
141 entry = get_swap_page(); 140 entry = get_swap_page();
@@ -154,7 +153,7 @@ int add_to_swap(struct page * page, gfp_t gfp_mask)
154 * Add it to the swap cache and mark it dirty 153 * Add it to the swap cache and mark it dirty
155 */ 154 */
156 err = add_to_swap_cache(page, entry, 155 err = add_to_swap_cache(page, entry,
157 gfp_mask|__GFP_NOMEMALLOC|__GFP_NOWARN); 156 __GFP_HIGH|__GFP_NOMEMALLOC|__GFP_NOWARN);
158 157
159 switch (err) { 158 switch (err) {
160 case 0: /* Success */ 159 case 0: /* Success */
@@ -196,14 +195,14 @@ void delete_from_swap_cache(struct page *page)
196 * If we are the only user, then try to free up the swap cache. 195 * If we are the only user, then try to free up the swap cache.
197 * 196 *
198 * Its ok to check for PageSwapCache without the page lock 197 * Its ok to check for PageSwapCache without the page lock
199 * here because we are going to recheck again inside 198 * here because we are going to recheck again inside
200 * exclusive_swap_page() _with_ the lock. 199 * try_to_free_swap() _with_ the lock.
201 * - Marcelo 200 * - Marcelo
202 */ 201 */
203static inline void free_swap_cache(struct page *page) 202static inline void free_swap_cache(struct page *page)
204{ 203{
205 if (PageSwapCache(page) && trylock_page(page)) { 204 if (PageSwapCache(page) && !page_mapped(page) && trylock_page(page)) {
206 remove_exclusive_swap_page(page); 205 try_to_free_swap(page);
207 unlock_page(page); 206 unlock_page(page);
208 } 207 }
209} 208}
diff --git a/mm/swapfile.c b/mm/swapfile.c
index 54a9f87e5162..eec5ca758a23 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -16,6 +16,7 @@
16#include <linux/namei.h> 16#include <linux/namei.h>
17#include <linux/shm.h> 17#include <linux/shm.h>
18#include <linux/blkdev.h> 18#include <linux/blkdev.h>
19#include <linux/random.h>
19#include <linux/writeback.h> 20#include <linux/writeback.h>
20#include <linux/proc_fs.h> 21#include <linux/proc_fs.h>
21#include <linux/seq_file.h> 22#include <linux/seq_file.h>
@@ -35,6 +36,7 @@
35 36
36static DEFINE_SPINLOCK(swap_lock); 37static DEFINE_SPINLOCK(swap_lock);
37static unsigned int nr_swapfiles; 38static unsigned int nr_swapfiles;
39long nr_swap_pages;
38long total_swap_pages; 40long total_swap_pages;
39static int swap_overflow; 41static int swap_overflow;
40static int least_priority; 42static int least_priority;
@@ -83,15 +85,96 @@ void swap_unplug_io_fn(struct backing_dev_info *unused_bdi, struct page *page)
83 up_read(&swap_unplug_sem); 85 up_read(&swap_unplug_sem);
84} 86}
85 87
88/*
89 * swapon tell device that all the old swap contents can be discarded,
90 * to allow the swap device to optimize its wear-levelling.
91 */
92static int discard_swap(struct swap_info_struct *si)
93{
94 struct swap_extent *se;
95 int err = 0;
96
97 list_for_each_entry(se, &si->extent_list, list) {
98 sector_t start_block = se->start_block << (PAGE_SHIFT - 9);
99 sector_t nr_blocks = (sector_t)se->nr_pages << (PAGE_SHIFT - 9);
100
101 if (se->start_page == 0) {
102 /* Do not discard the swap header page! */
103 start_block += 1 << (PAGE_SHIFT - 9);
104 nr_blocks -= 1 << (PAGE_SHIFT - 9);
105 if (!nr_blocks)
106 continue;
107 }
108
109 err = blkdev_issue_discard(si->bdev, start_block,
110 nr_blocks, GFP_KERNEL);
111 if (err)
112 break;
113
114 cond_resched();
115 }
116 return err; /* That will often be -EOPNOTSUPP */
117}
118
119/*
120 * swap allocation tell device that a cluster of swap can now be discarded,
121 * to allow the swap device to optimize its wear-levelling.
122 */
123static void discard_swap_cluster(struct swap_info_struct *si,
124 pgoff_t start_page, pgoff_t nr_pages)
125{
126 struct swap_extent *se = si->curr_swap_extent;
127 int found_extent = 0;
128
129 while (nr_pages) {
130 struct list_head *lh;
131
132 if (se->start_page <= start_page &&
133 start_page < se->start_page + se->nr_pages) {
134 pgoff_t offset = start_page - se->start_page;
135 sector_t start_block = se->start_block + offset;
136 sector_t nr_blocks = se->nr_pages - offset;
137
138 if (nr_blocks > nr_pages)
139 nr_blocks = nr_pages;
140 start_page += nr_blocks;
141 nr_pages -= nr_blocks;
142
143 if (!found_extent++)
144 si->curr_swap_extent = se;
145
146 start_block <<= PAGE_SHIFT - 9;
147 nr_blocks <<= PAGE_SHIFT - 9;
148 if (blkdev_issue_discard(si->bdev, start_block,
149 nr_blocks, GFP_NOIO))
150 break;
151 }
152
153 lh = se->list.next;
154 if (lh == &si->extent_list)
155 lh = lh->next;
156 se = list_entry(lh, struct swap_extent, list);
157 }
158}
159
160static int wait_for_discard(void *word)
161{
162 schedule();
163 return 0;
164}
165
86#define SWAPFILE_CLUSTER 256 166#define SWAPFILE_CLUSTER 256
87#define LATENCY_LIMIT 256 167#define LATENCY_LIMIT 256
88 168
89static inline unsigned long scan_swap_map(struct swap_info_struct *si) 169static inline unsigned long scan_swap_map(struct swap_info_struct *si)
90{ 170{
91 unsigned long offset, last_in_cluster; 171 unsigned long offset;
172 unsigned long scan_base;
173 unsigned long last_in_cluster = 0;
92 int latency_ration = LATENCY_LIMIT; 174 int latency_ration = LATENCY_LIMIT;
175 int found_free_cluster = 0;
93 176
94 /* 177 /*
95 * We try to cluster swap pages by allocating them sequentially 178 * We try to cluster swap pages by allocating them sequentially
96 * in swap. Once we've allocated SWAPFILE_CLUSTER pages this 179 * in swap. Once we've allocated SWAPFILE_CLUSTER pages this
97 * way, however, we resort to first-free allocation, starting 180 * way, however, we resort to first-free allocation, starting
@@ -99,16 +182,42 @@ static inline unsigned long scan_swap_map(struct swap_info_struct *si)
99 * all over the entire swap partition, so that we reduce 182 * all over the entire swap partition, so that we reduce
100 * overall disk seek times between swap pages. -- sct 183 * overall disk seek times between swap pages. -- sct
101 * But we do now try to find an empty cluster. -Andrea 184 * But we do now try to find an empty cluster. -Andrea
185 * And we let swap pages go all over an SSD partition. Hugh
102 */ 186 */
103 187
104 si->flags += SWP_SCANNING; 188 si->flags += SWP_SCANNING;
105 if (unlikely(!si->cluster_nr)) { 189 scan_base = offset = si->cluster_next;
106 si->cluster_nr = SWAPFILE_CLUSTER - 1; 190
107 if (si->pages - si->inuse_pages < SWAPFILE_CLUSTER) 191 if (unlikely(!si->cluster_nr--)) {
108 goto lowest; 192 if (si->pages - si->inuse_pages < SWAPFILE_CLUSTER) {
193 si->cluster_nr = SWAPFILE_CLUSTER - 1;
194 goto checks;
195 }
196 if (si->flags & SWP_DISCARDABLE) {
197 /*
198 * Start range check on racing allocations, in case
199 * they overlap the cluster we eventually decide on
200 * (we scan without swap_lock to allow preemption).
201 * It's hardly conceivable that cluster_nr could be
202 * wrapped during our scan, but don't depend on it.
203 */
204 if (si->lowest_alloc)
205 goto checks;
206 si->lowest_alloc = si->max;
207 si->highest_alloc = 0;
208 }
109 spin_unlock(&swap_lock); 209 spin_unlock(&swap_lock);
110 210
111 offset = si->lowest_bit; 211 /*
212 * If seek is expensive, start searching for new cluster from
213 * start of partition, to minimize the span of allocated swap.
214 * But if seek is cheap, search from our current position, so
215 * that swap is allocated from all over the partition: if the
216 * Flash Translation Layer only remaps within limited zones,
217 * we don't want to wear out the first zone too quickly.
218 */
219 if (!(si->flags & SWP_SOLIDSTATE))
220 scan_base = offset = si->lowest_bit;
112 last_in_cluster = offset + SWAPFILE_CLUSTER - 1; 221 last_in_cluster = offset + SWAPFILE_CLUSTER - 1;
113 222
114 /* Locate the first empty (unaligned) cluster */ 223 /* Locate the first empty (unaligned) cluster */
@@ -117,43 +226,124 @@ static inline unsigned long scan_swap_map(struct swap_info_struct *si)
117 last_in_cluster = offset + SWAPFILE_CLUSTER; 226 last_in_cluster = offset + SWAPFILE_CLUSTER;
118 else if (offset == last_in_cluster) { 227 else if (offset == last_in_cluster) {
119 spin_lock(&swap_lock); 228 spin_lock(&swap_lock);
120 si->cluster_next = offset-SWAPFILE_CLUSTER+1; 229 offset -= SWAPFILE_CLUSTER - 1;
121 goto cluster; 230 si->cluster_next = offset;
231 si->cluster_nr = SWAPFILE_CLUSTER - 1;
232 found_free_cluster = 1;
233 goto checks;
122 } 234 }
123 if (unlikely(--latency_ration < 0)) { 235 if (unlikely(--latency_ration < 0)) {
124 cond_resched(); 236 cond_resched();
125 latency_ration = LATENCY_LIMIT; 237 latency_ration = LATENCY_LIMIT;
126 } 238 }
127 } 239 }
240
241 offset = si->lowest_bit;
242 last_in_cluster = offset + SWAPFILE_CLUSTER - 1;
243
244 /* Locate the first empty (unaligned) cluster */
245 for (; last_in_cluster < scan_base; offset++) {
246 if (si->swap_map[offset])
247 last_in_cluster = offset + SWAPFILE_CLUSTER;
248 else if (offset == last_in_cluster) {
249 spin_lock(&swap_lock);
250 offset -= SWAPFILE_CLUSTER - 1;
251 si->cluster_next = offset;
252 si->cluster_nr = SWAPFILE_CLUSTER - 1;
253 found_free_cluster = 1;
254 goto checks;
255 }
256 if (unlikely(--latency_ration < 0)) {
257 cond_resched();
258 latency_ration = LATENCY_LIMIT;
259 }
260 }
261
262 offset = scan_base;
128 spin_lock(&swap_lock); 263 spin_lock(&swap_lock);
129 goto lowest; 264 si->cluster_nr = SWAPFILE_CLUSTER - 1;
265 si->lowest_alloc = 0;
130 } 266 }
131 267
132 si->cluster_nr--; 268checks:
133cluster: 269 if (!(si->flags & SWP_WRITEOK))
134 offset = si->cluster_next;
135 if (offset > si->highest_bit)
136lowest: offset = si->lowest_bit;
137checks: if (!(si->flags & SWP_WRITEOK))
138 goto no_page; 270 goto no_page;
139 if (!si->highest_bit) 271 if (!si->highest_bit)
140 goto no_page; 272 goto no_page;
141 if (!si->swap_map[offset]) { 273 if (offset > si->highest_bit)
142 if (offset == si->lowest_bit) 274 scan_base = offset = si->lowest_bit;
143 si->lowest_bit++; 275 if (si->swap_map[offset])
144 if (offset == si->highest_bit) 276 goto scan;
145 si->highest_bit--; 277
146 si->inuse_pages++; 278 if (offset == si->lowest_bit)
147 if (si->inuse_pages == si->pages) { 279 si->lowest_bit++;
148 si->lowest_bit = si->max; 280 if (offset == si->highest_bit)
149 si->highest_bit = 0; 281 si->highest_bit--;
282 si->inuse_pages++;
283 if (si->inuse_pages == si->pages) {
284 si->lowest_bit = si->max;
285 si->highest_bit = 0;
286 }
287 si->swap_map[offset] = 1;
288 si->cluster_next = offset + 1;
289 si->flags -= SWP_SCANNING;
290
291 if (si->lowest_alloc) {
292 /*
293 * Only set when SWP_DISCARDABLE, and there's a scan
294 * for a free cluster in progress or just completed.
295 */
296 if (found_free_cluster) {
297 /*
298 * To optimize wear-levelling, discard the
299 * old data of the cluster, taking care not to
300 * discard any of its pages that have already
301 * been allocated by racing tasks (offset has
302 * already stepped over any at the beginning).
303 */
304 if (offset < si->highest_alloc &&
305 si->lowest_alloc <= last_in_cluster)
306 last_in_cluster = si->lowest_alloc - 1;
307 si->flags |= SWP_DISCARDING;
308 spin_unlock(&swap_lock);
309
310 if (offset < last_in_cluster)
311 discard_swap_cluster(si, offset,
312 last_in_cluster - offset + 1);
313
314 spin_lock(&swap_lock);
315 si->lowest_alloc = 0;
316 si->flags &= ~SWP_DISCARDING;
317
318 smp_mb(); /* wake_up_bit advises this */
319 wake_up_bit(&si->flags, ilog2(SWP_DISCARDING));
320
321 } else if (si->flags & SWP_DISCARDING) {
322 /*
323 * Delay using pages allocated by racing tasks
324 * until the whole discard has been issued. We
325 * could defer that delay until swap_writepage,
326 * but it's easier to keep this self-contained.
327 */
328 spin_unlock(&swap_lock);
329 wait_on_bit(&si->flags, ilog2(SWP_DISCARDING),
330 wait_for_discard, TASK_UNINTERRUPTIBLE);
331 spin_lock(&swap_lock);
332 } else {
333 /*
334 * Note pages allocated by racing tasks while
335 * scan for a free cluster is in progress, so
336 * that its final discard can exclude them.
337 */
338 if (offset < si->lowest_alloc)
339 si->lowest_alloc = offset;
340 if (offset > si->highest_alloc)
341 si->highest_alloc = offset;
150 } 342 }
151 si->swap_map[offset] = 1;
152 si->cluster_next = offset + 1;
153 si->flags -= SWP_SCANNING;
154 return offset;
155 } 343 }
344 return offset;
156 345
346scan:
157 spin_unlock(&swap_lock); 347 spin_unlock(&swap_lock);
158 while (++offset <= si->highest_bit) { 348 while (++offset <= si->highest_bit) {
159 if (!si->swap_map[offset]) { 349 if (!si->swap_map[offset]) {
@@ -165,8 +355,18 @@ checks: if (!(si->flags & SWP_WRITEOK))
165 latency_ration = LATENCY_LIMIT; 355 latency_ration = LATENCY_LIMIT;
166 } 356 }
167 } 357 }
358 offset = si->lowest_bit;
359 while (++offset < scan_base) {
360 if (!si->swap_map[offset]) {
361 spin_lock(&swap_lock);
362 goto checks;
363 }
364 if (unlikely(--latency_ration < 0)) {
365 cond_resched();
366 latency_ration = LATENCY_LIMIT;
367 }
368 }
168 spin_lock(&swap_lock); 369 spin_lock(&swap_lock);
169 goto lowest;
170 370
171no_page: 371no_page:
172 si->flags -= SWP_SCANNING; 372 si->flags -= SWP_SCANNING;
@@ -268,7 +468,7 @@ bad_nofile:
268 printk(KERN_ERR "swap_free: %s%08lx\n", Bad_file, entry.val); 468 printk(KERN_ERR "swap_free: %s%08lx\n", Bad_file, entry.val);
269out: 469out:
270 return NULL; 470 return NULL;
271} 471}
272 472
273static int swap_entry_free(struct swap_info_struct *p, unsigned long offset) 473static int swap_entry_free(struct swap_info_struct *p, unsigned long offset)
274{ 474{
@@ -326,97 +526,58 @@ static inline int page_swapcount(struct page *page)
326} 526}
327 527
328/* 528/*
329 * We can use this swap cache entry directly 529 * We can write to an anon page without COW if there are no other references
330 * if there are no other references to it. 530 * to it. And as a side-effect, free up its swap: because the old content
531 * on disk will never be read, and seeking back there to write new content
532 * later would only waste time away from clustering.
331 */ 533 */
332int can_share_swap_page(struct page *page) 534int reuse_swap_page(struct page *page)
333{ 535{
334 int count; 536 int count;
335 537
336 BUG_ON(!PageLocked(page)); 538 VM_BUG_ON(!PageLocked(page));
337 count = page_mapcount(page); 539 count = page_mapcount(page);
338 if (count <= 1 && PageSwapCache(page)) 540 if (count <= 1 && PageSwapCache(page)) {
339 count += page_swapcount(page); 541 count += page_swapcount(page);
542 if (count == 1 && !PageWriteback(page)) {
543 delete_from_swap_cache(page);
544 SetPageDirty(page);
545 }
546 }
340 return count == 1; 547 return count == 1;
341} 548}
342 549
343/* 550/*
344 * Work out if there are any other processes sharing this 551 * If swap is getting full, or if there are no more mappings of this page,
345 * swap cache page. Free it if you can. Return success. 552 * then try_to_free_swap is called to free its swap space.
346 */ 553 */
347static int remove_exclusive_swap_page_count(struct page *page, int count) 554int try_to_free_swap(struct page *page)
348{ 555{
349 int retval; 556 VM_BUG_ON(!PageLocked(page));
350 struct swap_info_struct * p;
351 swp_entry_t entry;
352
353 BUG_ON(PagePrivate(page));
354 BUG_ON(!PageLocked(page));
355 557
356 if (!PageSwapCache(page)) 558 if (!PageSwapCache(page))
357 return 0; 559 return 0;
358 if (PageWriteback(page)) 560 if (PageWriteback(page))
359 return 0; 561 return 0;
360 if (page_count(page) != count) /* us + cache + ptes */ 562 if (page_swapcount(page))
361 return 0;
362
363 entry.val = page_private(page);
364 p = swap_info_get(entry);
365 if (!p)
366 return 0; 563 return 0;
367 564
368 /* Is the only swap cache user the cache itself? */ 565 delete_from_swap_cache(page);
369 retval = 0; 566 SetPageDirty(page);
370 if (p->swap_map[swp_offset(entry)] == 1) { 567 return 1;
371 /* Recheck the page count with the swapcache lock held.. */
372 spin_lock_irq(&swapper_space.tree_lock);
373 if ((page_count(page) == count) && !PageWriteback(page)) {
374 __delete_from_swap_cache(page);
375 SetPageDirty(page);
376 retval = 1;
377 }
378 spin_unlock_irq(&swapper_space.tree_lock);
379 }
380 spin_unlock(&swap_lock);
381
382 if (retval) {
383 swap_free(entry);
384 page_cache_release(page);
385 }
386
387 return retval;
388}
389
390/*
391 * Most of the time the page should have two references: one for the
392 * process and one for the swap cache.
393 */
394int remove_exclusive_swap_page(struct page *page)
395{
396 return remove_exclusive_swap_page_count(page, 2);
397}
398
399/*
400 * The pageout code holds an extra reference to the page. That raises
401 * the reference count to test for to 2 for a page that is only in the
402 * swap cache plus 1 for each process that maps the page.
403 */
404int remove_exclusive_swap_page_ref(struct page *page)
405{
406 return remove_exclusive_swap_page_count(page, 2 + page_mapcount(page));
407} 568}
408 569
409/* 570/*
410 * Free the swap entry like above, but also try to 571 * Free the swap entry like above, but also try to
411 * free the page cache entry if it is the last user. 572 * free the page cache entry if it is the last user.
412 */ 573 */
413void free_swap_and_cache(swp_entry_t entry) 574int free_swap_and_cache(swp_entry_t entry)
414{ 575{
415 struct swap_info_struct * p; 576 struct swap_info_struct *p;
416 struct page *page = NULL; 577 struct page *page = NULL;
417 578
418 if (is_migration_entry(entry)) 579 if (is_migration_entry(entry))
419 return; 580 return 1;
420 581
421 p = swap_info_get(entry); 582 p = swap_info_get(entry);
422 if (p) { 583 if (p) {
@@ -430,20 +591,19 @@ void free_swap_and_cache(swp_entry_t entry)
430 spin_unlock(&swap_lock); 591 spin_unlock(&swap_lock);
431 } 592 }
432 if (page) { 593 if (page) {
433 int one_user; 594 /*
434 595 * Not mapped elsewhere, or swap space full? Free it!
435 BUG_ON(PagePrivate(page)); 596 * Also recheck PageSwapCache now page is locked (above).
436 one_user = (page_count(page) == 2); 597 */
437 /* Only cache user (+us), or swap space full? Free it! */
438 /* Also recheck PageSwapCache after page is locked (above) */
439 if (PageSwapCache(page) && !PageWriteback(page) && 598 if (PageSwapCache(page) && !PageWriteback(page) &&
440 (one_user || vm_swap_full())) { 599 (!page_mapped(page) || vm_swap_full())) {
441 delete_from_swap_cache(page); 600 delete_from_swap_cache(page);
442 SetPageDirty(page); 601 SetPageDirty(page);
443 } 602 }
444 unlock_page(page); 603 unlock_page(page);
445 page_cache_release(page); 604 page_cache_release(page);
446 } 605 }
606 return p != NULL;
447} 607}
448 608
449#ifdef CONFIG_HIBERNATION 609#ifdef CONFIG_HIBERNATION
@@ -776,10 +936,10 @@ static int try_to_unuse(unsigned int type)
776 break; 936 break;
777 } 937 }
778 938
779 /* 939 /*
780 * Get a page for the entry, using the existing swap 940 * Get a page for the entry, using the existing swap
781 * cache page if there is one. Otherwise, get a clean 941 * cache page if there is one. Otherwise, get a clean
782 * page and read the swap into it. 942 * page and read the swap into it.
783 */ 943 */
784 swap_map = &si->swap_map[i]; 944 swap_map = &si->swap_map[i];
785 entry = swp_entry(type, i); 945 entry = swp_entry(type, i);
@@ -930,7 +1090,16 @@ static int try_to_unuse(unsigned int type)
930 lock_page(page); 1090 lock_page(page);
931 wait_on_page_writeback(page); 1091 wait_on_page_writeback(page);
932 } 1092 }
933 if (PageSwapCache(page)) 1093
1094 /*
1095 * It is conceivable that a racing task removed this page from
1096 * swap cache just before we acquired the page lock at the top,
1097 * or while we dropped it in unuse_mm(). The page might even
1098 * be back in swap cache on another swap area: that we must not
1099 * delete, since it may not have been written out to swap yet.
1100 */
1101 if (PageSwapCache(page) &&
1102 likely(page_private(page) == entry.val))
934 delete_from_swap_cache(page); 1103 delete_from_swap_cache(page);
935 1104
936 /* 1105 /*
@@ -1203,26 +1372,6 @@ out:
1203 return ret; 1372 return ret;
1204} 1373}
1205 1374
1206#if 0 /* We don't need this yet */
1207#include <linux/backing-dev.h>
1208int page_queue_congested(struct page *page)
1209{
1210 struct backing_dev_info *bdi;
1211
1212 BUG_ON(!PageLocked(page)); /* It pins the swap_info_struct */
1213
1214 if (PageSwapCache(page)) {
1215 swp_entry_t entry = { .val = page_private(page) };
1216 struct swap_info_struct *sis;
1217
1218 sis = get_swap_info_struct(swp_type(entry));
1219 bdi = sis->bdev->bd_inode->i_mapping->backing_dev_info;
1220 } else
1221 bdi = page->mapping->backing_dev_info;
1222 return bdi_write_congested(bdi);
1223}
1224#endif
1225
1226asmlinkage long sys_swapoff(const char __user * specialfile) 1375asmlinkage long sys_swapoff(const char __user * specialfile)
1227{ 1376{
1228 struct swap_info_struct * p = NULL; 1377 struct swap_info_struct * p = NULL;
@@ -1233,7 +1382,7 @@ asmlinkage long sys_swapoff(const char __user * specialfile)
1233 char * pathname; 1382 char * pathname;
1234 int i, type, prev; 1383 int i, type, prev;
1235 int err; 1384 int err;
1236 1385
1237 if (!capable(CAP_SYS_ADMIN)) 1386 if (!capable(CAP_SYS_ADMIN))
1238 return -EPERM; 1387 return -EPERM;
1239 1388
@@ -1253,7 +1402,7 @@ asmlinkage long sys_swapoff(const char __user * specialfile)
1253 spin_lock(&swap_lock); 1402 spin_lock(&swap_lock);
1254 for (type = swap_list.head; type >= 0; type = swap_info[type].next) { 1403 for (type = swap_list.head; type >= 0; type = swap_info[type].next) {
1255 p = swap_info + type; 1404 p = swap_info + type;
1256 if ((p->flags & SWP_ACTIVE) == SWP_ACTIVE) { 1405 if (p->flags & SWP_WRITEOK) {
1257 if (p->swap_file->f_mapping == mapping) 1406 if (p->swap_file->f_mapping == mapping)
1258 break; 1407 break;
1259 } 1408 }
@@ -1426,12 +1575,12 @@ static int swap_show(struct seq_file *swap, void *v)
1426 file = ptr->swap_file; 1575 file = ptr->swap_file;
1427 len = seq_path(swap, &file->f_path, " \t\n\\"); 1576 len = seq_path(swap, &file->f_path, " \t\n\\");
1428 seq_printf(swap, "%*s%s\t%u\t%u\t%d\n", 1577 seq_printf(swap, "%*s%s\t%u\t%u\t%d\n",
1429 len < 40 ? 40 - len : 1, " ", 1578 len < 40 ? 40 - len : 1, " ",
1430 S_ISBLK(file->f_path.dentry->d_inode->i_mode) ? 1579 S_ISBLK(file->f_path.dentry->d_inode->i_mode) ?
1431 "partition" : "file\t", 1580 "partition" : "file\t",
1432 ptr->pages << (PAGE_SHIFT - 10), 1581 ptr->pages << (PAGE_SHIFT - 10),
1433 ptr->inuse_pages << (PAGE_SHIFT - 10), 1582 ptr->inuse_pages << (PAGE_SHIFT - 10),
1434 ptr->prio); 1583 ptr->prio);
1435 return 0; 1584 return 0;
1436} 1585}
1437 1586
@@ -1487,12 +1636,11 @@ asmlinkage long sys_swapon(const char __user * specialfile, int swap_flags)
1487 int i, prev; 1636 int i, prev;
1488 int error; 1637 int error;
1489 union swap_header *swap_header = NULL; 1638 union swap_header *swap_header = NULL;
1490 int swap_header_version;
1491 unsigned int nr_good_pages = 0; 1639 unsigned int nr_good_pages = 0;
1492 int nr_extents = 0; 1640 int nr_extents = 0;
1493 sector_t span; 1641 sector_t span;
1494 unsigned long maxpages = 1; 1642 unsigned long maxpages = 1;
1495 int swapfilesize; 1643 unsigned long swapfilepages;
1496 unsigned short *swap_map = NULL; 1644 unsigned short *swap_map = NULL;
1497 struct page *page = NULL; 1645 struct page *page = NULL;
1498 struct inode *inode = NULL; 1646 struct inode *inode = NULL;
@@ -1570,7 +1718,7 @@ asmlinkage long sys_swapon(const char __user * specialfile, int swap_flags)
1570 goto bad_swap; 1718 goto bad_swap;
1571 } 1719 }
1572 1720
1573 swapfilesize = i_size_read(inode) >> PAGE_SHIFT; 1721 swapfilepages = i_size_read(inode) >> PAGE_SHIFT;
1574 1722
1575 /* 1723 /*
1576 * Read the swap header. 1724 * Read the swap header.
@@ -1584,101 +1732,86 @@ asmlinkage long sys_swapon(const char __user * specialfile, int swap_flags)
1584 error = PTR_ERR(page); 1732 error = PTR_ERR(page);
1585 goto bad_swap; 1733 goto bad_swap;
1586 } 1734 }
1587 kmap(page); 1735 swap_header = kmap(page);
1588 swap_header = page_address(page);
1589 1736
1590 if (!memcmp("SWAP-SPACE",swap_header->magic.magic,10)) 1737 if (memcmp("SWAPSPACE2", swap_header->magic.magic, 10)) {
1591 swap_header_version = 1;
1592 else if (!memcmp("SWAPSPACE2",swap_header->magic.magic,10))
1593 swap_header_version = 2;
1594 else {
1595 printk(KERN_ERR "Unable to find swap-space signature\n"); 1738 printk(KERN_ERR "Unable to find swap-space signature\n");
1596 error = -EINVAL; 1739 error = -EINVAL;
1597 goto bad_swap; 1740 goto bad_swap;
1598 } 1741 }
1599 1742
1600 switch (swap_header_version) { 1743 /* swap partition endianess hack... */
1601 case 1: 1744 if (swab32(swap_header->info.version) == 1) {
1602 printk(KERN_ERR "version 0 swap is no longer supported. " 1745 swab32s(&swap_header->info.version);
1603 "Use mkswap -v1 %s\n", name); 1746 swab32s(&swap_header->info.last_page);
1747 swab32s(&swap_header->info.nr_badpages);
1748 for (i = 0; i < swap_header->info.nr_badpages; i++)
1749 swab32s(&swap_header->info.badpages[i]);
1750 }
1751 /* Check the swap header's sub-version */
1752 if (swap_header->info.version != 1) {
1753 printk(KERN_WARNING
1754 "Unable to handle swap header version %d\n",
1755 swap_header->info.version);
1604 error = -EINVAL; 1756 error = -EINVAL;
1605 goto bad_swap; 1757 goto bad_swap;
1606 case 2: 1758 }
1607 /* swap partition endianess hack... */
1608 if (swab32(swap_header->info.version) == 1) {
1609 swab32s(&swap_header->info.version);
1610 swab32s(&swap_header->info.last_page);
1611 swab32s(&swap_header->info.nr_badpages);
1612 for (i = 0; i < swap_header->info.nr_badpages; i++)
1613 swab32s(&swap_header->info.badpages[i]);
1614 }
1615 /* Check the swap header's sub-version and the size of
1616 the swap file and bad block lists */
1617 if (swap_header->info.version != 1) {
1618 printk(KERN_WARNING
1619 "Unable to handle swap header version %d\n",
1620 swap_header->info.version);
1621 error = -EINVAL;
1622 goto bad_swap;
1623 }
1624 1759
1625 p->lowest_bit = 1; 1760 p->lowest_bit = 1;
1626 p->cluster_next = 1; 1761 p->cluster_next = 1;
1627 1762
1628 /* 1763 /*
1629 * Find out how many pages are allowed for a single swap 1764 * Find out how many pages are allowed for a single swap
1630 * device. There are two limiting factors: 1) the number of 1765 * device. There are two limiting factors: 1) the number of
1631 * bits for the swap offset in the swp_entry_t type and 1766 * bits for the swap offset in the swp_entry_t type and
1632 * 2) the number of bits in the a swap pte as defined by 1767 * 2) the number of bits in the a swap pte as defined by
1633 * the different architectures. In order to find the 1768 * the different architectures. In order to find the
1634 * largest possible bit mask a swap entry with swap type 0 1769 * largest possible bit mask a swap entry with swap type 0
1635 * and swap offset ~0UL is created, encoded to a swap pte, 1770 * and swap offset ~0UL is created, encoded to a swap pte,
1636 * decoded to a swp_entry_t again and finally the swap 1771 * decoded to a swp_entry_t again and finally the swap
1637 * offset is extracted. This will mask all the bits from 1772 * offset is extracted. This will mask all the bits from
1638 * the initial ~0UL mask that can't be encoded in either 1773 * the initial ~0UL mask that can't be encoded in either
1639 * the swp_entry_t or the architecture definition of a 1774 * the swp_entry_t or the architecture definition of a
1640 * swap pte. 1775 * swap pte.
1641 */ 1776 */
1642 maxpages = swp_offset(pte_to_swp_entry(swp_entry_to_pte(swp_entry(0,~0UL)))) - 1; 1777 maxpages = swp_offset(pte_to_swp_entry(
1643 if (maxpages > swap_header->info.last_page) 1778 swp_entry_to_pte(swp_entry(0, ~0UL)))) - 1;
1644 maxpages = swap_header->info.last_page; 1779 if (maxpages > swap_header->info.last_page)
1645 p->highest_bit = maxpages - 1; 1780 maxpages = swap_header->info.last_page;
1781 p->highest_bit = maxpages - 1;
1646 1782
1647 error = -EINVAL; 1783 error = -EINVAL;
1648 if (!maxpages) 1784 if (!maxpages)
1649 goto bad_swap; 1785 goto bad_swap;
1650 if (swapfilesize && maxpages > swapfilesize) { 1786 if (swapfilepages && maxpages > swapfilepages) {
1651 printk(KERN_WARNING 1787 printk(KERN_WARNING
1652 "Swap area shorter than signature indicates\n"); 1788 "Swap area shorter than signature indicates\n");
1653 goto bad_swap; 1789 goto bad_swap;
1654 } 1790 }
1655 if (swap_header->info.nr_badpages && S_ISREG(inode->i_mode)) 1791 if (swap_header->info.nr_badpages && S_ISREG(inode->i_mode))
1656 goto bad_swap; 1792 goto bad_swap;
1657 if (swap_header->info.nr_badpages > MAX_SWAP_BADPAGES) 1793 if (swap_header->info.nr_badpages > MAX_SWAP_BADPAGES)
1658 goto bad_swap; 1794 goto bad_swap;
1659 1795
1660 /* OK, set up the swap map and apply the bad block list */ 1796 /* OK, set up the swap map and apply the bad block list */
1661 swap_map = vmalloc(maxpages * sizeof(short)); 1797 swap_map = vmalloc(maxpages * sizeof(short));
1662 if (!swap_map) { 1798 if (!swap_map) {
1663 error = -ENOMEM; 1799 error = -ENOMEM;
1664 goto bad_swap; 1800 goto bad_swap;
1665 } 1801 }
1666 1802
1667 error = 0; 1803 memset(swap_map, 0, maxpages * sizeof(short));
1668 memset(swap_map, 0, maxpages * sizeof(short)); 1804 for (i = 0; i < swap_header->info.nr_badpages; i++) {
1669 for (i = 0; i < swap_header->info.nr_badpages; i++) { 1805 int page_nr = swap_header->info.badpages[i];
1670 int page_nr = swap_header->info.badpages[i]; 1806 if (page_nr <= 0 || page_nr >= swap_header->info.last_page) {
1671 if (page_nr <= 0 || page_nr >= swap_header->info.last_page) 1807 error = -EINVAL;
1672 error = -EINVAL;
1673 else
1674 swap_map[page_nr] = SWAP_MAP_BAD;
1675 }
1676 nr_good_pages = swap_header->info.last_page -
1677 swap_header->info.nr_badpages -
1678 1 /* header page */;
1679 if (error)
1680 goto bad_swap; 1808 goto bad_swap;
1809 }
1810 swap_map[page_nr] = SWAP_MAP_BAD;
1681 } 1811 }
1812 nr_good_pages = swap_header->info.last_page -
1813 swap_header->info.nr_badpages -
1814 1 /* header page */;
1682 1815
1683 if (nr_good_pages) { 1816 if (nr_good_pages) {
1684 swap_map[0] = SWAP_MAP_BAD; 1817 swap_map[0] = SWAP_MAP_BAD;
@@ -1697,6 +1830,13 @@ asmlinkage long sys_swapon(const char __user * specialfile, int swap_flags)
1697 goto bad_swap; 1830 goto bad_swap;
1698 } 1831 }
1699 1832
1833 if (blk_queue_nonrot(bdev_get_queue(p->bdev))) {
1834 p->flags |= SWP_SOLIDSTATE;
1835 p->cluster_next = 1 + (random32() % p->highest_bit);
1836 }
1837 if (discard_swap(p) == 0)
1838 p->flags |= SWP_DISCARDABLE;
1839
1700 mutex_lock(&swapon_mutex); 1840 mutex_lock(&swapon_mutex);
1701 spin_lock(&swap_lock); 1841 spin_lock(&swap_lock);
1702 if (swap_flags & SWAP_FLAG_PREFER) 1842 if (swap_flags & SWAP_FLAG_PREFER)
@@ -1705,14 +1845,16 @@ asmlinkage long sys_swapon(const char __user * specialfile, int swap_flags)
1705 else 1845 else
1706 p->prio = --least_priority; 1846 p->prio = --least_priority;
1707 p->swap_map = swap_map; 1847 p->swap_map = swap_map;
1708 p->flags = SWP_ACTIVE; 1848 p->flags |= SWP_WRITEOK;
1709 nr_swap_pages += nr_good_pages; 1849 nr_swap_pages += nr_good_pages;
1710 total_swap_pages += nr_good_pages; 1850 total_swap_pages += nr_good_pages;
1711 1851
1712 printk(KERN_INFO "Adding %uk swap on %s. " 1852 printk(KERN_INFO "Adding %uk swap on %s. "
1713 "Priority:%d extents:%d across:%lluk\n", 1853 "Priority:%d extents:%d across:%lluk %s%s\n",
1714 nr_good_pages<<(PAGE_SHIFT-10), name, p->prio, 1854 nr_good_pages<<(PAGE_SHIFT-10), name, p->prio,
1715 nr_extents, (unsigned long long)span<<(PAGE_SHIFT-10)); 1855 nr_extents, (unsigned long long)span<<(PAGE_SHIFT-10),
1856 (p->flags & SWP_SOLIDSTATE) ? "SS" : "",
1857 (p->flags & SWP_DISCARDABLE) ? "D" : "");
1716 1858
1717 /* insert swap space into swap_list: */ 1859 /* insert swap space into swap_list: */
1718 prev = -1; 1860 prev = -1;
diff --git a/mm/tiny-shmem.c b/mm/tiny-shmem.c
deleted file mode 100644
index 3e67d575ee6e..000000000000
--- a/mm/tiny-shmem.c
+++ /dev/null
@@ -1,134 +0,0 @@
1/*
2 * tiny-shmem.c: simple shmemfs and tmpfs using ramfs code
3 *
4 * Matt Mackall <mpm@selenic.com> January, 2004
5 * derived from mm/shmem.c and fs/ramfs/inode.c
6 *
7 * This is intended for small system where the benefits of the full
8 * shmem code (swap-backed and resource-limited) are outweighed by
9 * their complexity. On systems without swap this code should be
10 * effectively equivalent, but much lighter weight.
11 */
12
13#include <linux/fs.h>
14#include <linux/init.h>
15#include <linux/vfs.h>
16#include <linux/mount.h>
17#include <linux/file.h>
18#include <linux/mm.h>
19#include <linux/module.h>
20#include <linux/swap.h>
21#include <linux/ramfs.h>
22
23static struct file_system_type tmpfs_fs_type = {
24 .name = "tmpfs",
25 .get_sb = ramfs_get_sb,
26 .kill_sb = kill_litter_super,
27};
28
29static struct vfsmount *shm_mnt;
30
31static int __init init_tmpfs(void)
32{
33 BUG_ON(register_filesystem(&tmpfs_fs_type) != 0);
34
35 shm_mnt = kern_mount(&tmpfs_fs_type);
36 BUG_ON(IS_ERR(shm_mnt));
37
38 return 0;
39}
40module_init(init_tmpfs)
41
42/**
43 * shmem_file_setup - get an unlinked file living in tmpfs
44 * @name: name for dentry (to be seen in /proc/<pid>/maps
45 * @size: size to be set for the file
46 * @flags: vm_flags
47 */
48struct file *shmem_file_setup(char *name, loff_t size, unsigned long flags)
49{
50 int error;
51 struct file *file;
52 struct inode *inode;
53 struct dentry *dentry, *root;
54 struct qstr this;
55
56 if (IS_ERR(shm_mnt))
57 return (void *)shm_mnt;
58
59 error = -ENOMEM;
60 this.name = name;
61 this.len = strlen(name);
62 this.hash = 0; /* will go */
63 root = shm_mnt->mnt_root;
64 dentry = d_alloc(root, &this);
65 if (!dentry)
66 goto put_memory;
67
68 error = -ENFILE;
69 file = get_empty_filp();
70 if (!file)
71 goto put_dentry;
72
73 error = -ENOSPC;
74 inode = ramfs_get_inode(root->d_sb, S_IFREG | S_IRWXUGO, 0);
75 if (!inode)
76 goto close_file;
77
78 d_instantiate(dentry, inode);
79 inode->i_size = size;
80 inode->i_nlink = 0; /* It is unlinked */
81 init_file(file, shm_mnt, dentry, FMODE_WRITE | FMODE_READ,
82 &ramfs_file_operations);
83
84#ifndef CONFIG_MMU
85 error = ramfs_nommu_expand_for_mapping(inode, size);
86 if (error)
87 goto close_file;
88#endif
89 return file;
90
91close_file:
92 put_filp(file);
93put_dentry:
94 dput(dentry);
95put_memory:
96 return ERR_PTR(error);
97}
98EXPORT_SYMBOL_GPL(shmem_file_setup);
99
100/**
101 * shmem_zero_setup - setup a shared anonymous mapping
102 * @vma: the vma to be mmapped is prepared by do_mmap_pgoff
103 */
104int shmem_zero_setup(struct vm_area_struct *vma)
105{
106 struct file *file;
107 loff_t size = vma->vm_end - vma->vm_start;
108
109 file = shmem_file_setup("dev/zero", size, vma->vm_flags);
110 if (IS_ERR(file))
111 return PTR_ERR(file);
112
113 if (vma->vm_file)
114 fput(vma->vm_file);
115 vma->vm_file = file;
116 vma->vm_ops = &generic_file_vm_ops;
117 return 0;
118}
119
120int shmem_unuse(swp_entry_t entry, struct page *page)
121{
122 return 0;
123}
124
125#ifndef CONFIG_MMU
126unsigned long shmem_get_unmapped_area(struct file *file,
127 unsigned long addr,
128 unsigned long len,
129 unsigned long pgoff,
130 unsigned long flags)
131{
132 return ramfs_nommu_get_unmapped_area(file, addr, len, pgoff, flags);
133}
134#endif
diff --git a/mm/vmalloc.c b/mm/vmalloc.c
index 7465f22fec0c..c5db9a7264d9 100644
--- a/mm/vmalloc.c
+++ b/mm/vmalloc.c
@@ -14,6 +14,7 @@
14#include <linux/highmem.h> 14#include <linux/highmem.h>
15#include <linux/slab.h> 15#include <linux/slab.h>
16#include <linux/spinlock.h> 16#include <linux/spinlock.h>
17#include <linux/mutex.h>
17#include <linux/interrupt.h> 18#include <linux/interrupt.h>
18#include <linux/proc_fs.h> 19#include <linux/proc_fs.h>
19#include <linux/seq_file.h> 20#include <linux/seq_file.h>
@@ -381,8 +382,9 @@ found:
381 goto retry; 382 goto retry;
382 } 383 }
383 if (printk_ratelimit()) 384 if (printk_ratelimit())
384 printk(KERN_WARNING "vmap allocation failed: " 385 printk(KERN_WARNING
385 "use vmalloc=<size> to increase size.\n"); 386 "vmap allocation for size %lu failed: "
387 "use vmalloc=<size> to increase size.\n", size);
386 return ERR_PTR(-EBUSY); 388 return ERR_PTR(-EBUSY);
387 } 389 }
388 390
@@ -432,6 +434,27 @@ static void unmap_vmap_area(struct vmap_area *va)
432 vunmap_page_range(va->va_start, va->va_end); 434 vunmap_page_range(va->va_start, va->va_end);
433} 435}
434 436
437static void vmap_debug_free_range(unsigned long start, unsigned long end)
438{
439 /*
440 * Unmap page tables and force a TLB flush immediately if
441 * CONFIG_DEBUG_PAGEALLOC is set. This catches use after free
442 * bugs similarly to those in linear kernel virtual address
443 * space after a page has been freed.
444 *
445 * All the lazy freeing logic is still retained, in order to
446 * minimise intrusiveness of this debugging feature.
447 *
448 * This is going to be *slow* (linear kernel virtual address
449 * debugging doesn't do a broadcast TLB flush so it is a lot
450 * faster).
451 */
452#ifdef CONFIG_DEBUG_PAGEALLOC
453 vunmap_page_range(start, end);
454 flush_tlb_kernel_range(start, end);
455#endif
456}
457
435/* 458/*
436 * lazy_max_pages is the maximum amount of virtual address space we gather up 459 * lazy_max_pages is the maximum amount of virtual address space we gather up
437 * before attempting to purge with a TLB flush. 460 * before attempting to purge with a TLB flush.
@@ -472,7 +495,7 @@ static atomic_t vmap_lazy_nr = ATOMIC_INIT(0);
472static void __purge_vmap_area_lazy(unsigned long *start, unsigned long *end, 495static void __purge_vmap_area_lazy(unsigned long *start, unsigned long *end,
473 int sync, int force_flush) 496 int sync, int force_flush)
474{ 497{
475 static DEFINE_SPINLOCK(purge_lock); 498 static DEFINE_MUTEX(purge_lock);
476 LIST_HEAD(valist); 499 LIST_HEAD(valist);
477 struct vmap_area *va; 500 struct vmap_area *va;
478 int nr = 0; 501 int nr = 0;
@@ -483,10 +506,10 @@ static void __purge_vmap_area_lazy(unsigned long *start, unsigned long *end,
483 * the case that isn't actually used at the moment anyway. 506 * the case that isn't actually used at the moment anyway.
484 */ 507 */
485 if (!sync && !force_flush) { 508 if (!sync && !force_flush) {
486 if (!spin_trylock(&purge_lock)) 509 if (!mutex_trylock(&purge_lock))
487 return; 510 return;
488 } else 511 } else
489 spin_lock(&purge_lock); 512 mutex_lock(&purge_lock);
490 513
491 rcu_read_lock(); 514 rcu_read_lock();
492 list_for_each_entry_rcu(va, &vmap_area_list, list) { 515 list_for_each_entry_rcu(va, &vmap_area_list, list) {
@@ -518,7 +541,7 @@ static void __purge_vmap_area_lazy(unsigned long *start, unsigned long *end,
518 __free_vmap_area(va); 541 __free_vmap_area(va);
519 spin_unlock(&vmap_area_lock); 542 spin_unlock(&vmap_area_lock);
520 } 543 }
521 spin_unlock(&purge_lock); 544 mutex_unlock(&purge_lock);
522} 545}
523 546
524/* 547/*
@@ -912,6 +935,7 @@ void vm_unmap_ram(const void *mem, unsigned int count)
912 BUG_ON(addr & (PAGE_SIZE-1)); 935 BUG_ON(addr & (PAGE_SIZE-1));
913 936
914 debug_check_no_locks_freed(mem, size); 937 debug_check_no_locks_freed(mem, size);
938 vmap_debug_free_range(addr, addr+size);
915 939
916 if (likely(count <= VMAP_MAX_ALLOC)) 940 if (likely(count <= VMAP_MAX_ALLOC))
917 vb_free(mem, size); 941 vb_free(mem, size);
@@ -1128,6 +1152,8 @@ struct vm_struct *remove_vm_area(const void *addr)
1128 if (va && va->flags & VM_VM_AREA) { 1152 if (va && va->flags & VM_VM_AREA) {
1129 struct vm_struct *vm = va->private; 1153 struct vm_struct *vm = va->private;
1130 struct vm_struct *tmp, **p; 1154 struct vm_struct *tmp, **p;
1155
1156 vmap_debug_free_range(va->va_start, va->va_end);
1131 free_unmap_vmap_area(va); 1157 free_unmap_vmap_area(va);
1132 vm->size -= PAGE_SIZE; 1158 vm->size -= PAGE_SIZE;
1133 1159
@@ -1375,7 +1401,8 @@ void *vmalloc_user(unsigned long size)
1375 struct vm_struct *area; 1401 struct vm_struct *area;
1376 void *ret; 1402 void *ret;
1377 1403
1378 ret = __vmalloc(size, GFP_KERNEL | __GFP_HIGHMEM | __GFP_ZERO, PAGE_KERNEL); 1404 ret = __vmalloc_node(size, GFP_KERNEL | __GFP_HIGHMEM | __GFP_ZERO,
1405 PAGE_KERNEL, -1, __builtin_return_address(0));
1379 if (ret) { 1406 if (ret) {
1380 area = find_vm_area(ret); 1407 area = find_vm_area(ret);
1381 area->flags |= VM_USERMAP; 1408 area->flags |= VM_USERMAP;
@@ -1420,7 +1447,8 @@ EXPORT_SYMBOL(vmalloc_node);
1420 1447
1421void *vmalloc_exec(unsigned long size) 1448void *vmalloc_exec(unsigned long size)
1422{ 1449{
1423 return __vmalloc(size, GFP_KERNEL | __GFP_HIGHMEM, PAGE_KERNEL_EXEC); 1450 return __vmalloc_node(size, GFP_KERNEL | __GFP_HIGHMEM, PAGE_KERNEL_EXEC,
1451 -1, __builtin_return_address(0));
1424} 1452}
1425 1453
1426#if defined(CONFIG_64BIT) && defined(CONFIG_ZONE_DMA32) 1454#if defined(CONFIG_64BIT) && defined(CONFIG_ZONE_DMA32)
@@ -1440,7 +1468,8 @@ void *vmalloc_exec(unsigned long size)
1440 */ 1468 */
1441void *vmalloc_32(unsigned long size) 1469void *vmalloc_32(unsigned long size)
1442{ 1470{
1443 return __vmalloc(size, GFP_VMALLOC32, PAGE_KERNEL); 1471 return __vmalloc_node(size, GFP_VMALLOC32, PAGE_KERNEL,
1472 -1, __builtin_return_address(0));
1444} 1473}
1445EXPORT_SYMBOL(vmalloc_32); 1474EXPORT_SYMBOL(vmalloc_32);
1446 1475
@@ -1456,7 +1485,8 @@ void *vmalloc_32_user(unsigned long size)
1456 struct vm_struct *area; 1485 struct vm_struct *area;
1457 void *ret; 1486 void *ret;
1458 1487
1459 ret = __vmalloc(size, GFP_VMALLOC32 | __GFP_ZERO, PAGE_KERNEL); 1488 ret = __vmalloc_node(size, GFP_VMALLOC32 | __GFP_ZERO, PAGE_KERNEL,
1489 -1, __builtin_return_address(0));
1460 if (ret) { 1490 if (ret) {
1461 area = find_vm_area(ret); 1491 area = find_vm_area(ret);
1462 area->flags |= VM_USERMAP; 1492 area->flags |= VM_USERMAP;
diff --git a/mm/vmscan.c b/mm/vmscan.c
index d196f46c8808..b07c48b09a93 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -52,6 +52,9 @@ struct scan_control {
52 /* Incremented by the number of inactive pages that were scanned */ 52 /* Incremented by the number of inactive pages that were scanned */
53 unsigned long nr_scanned; 53 unsigned long nr_scanned;
54 54
55 /* Number of pages freed so far during a call to shrink_zones() */
56 unsigned long nr_reclaimed;
57
55 /* This context's GFP mask */ 58 /* This context's GFP mask */
56 gfp_t gfp_mask; 59 gfp_t gfp_mask;
57 60
@@ -617,7 +620,6 @@ static unsigned long shrink_page_list(struct list_head *page_list,
617 referenced && page_mapping_inuse(page)) 620 referenced && page_mapping_inuse(page))
618 goto activate_locked; 621 goto activate_locked;
619 622
620#ifdef CONFIG_SWAP
621 /* 623 /*
622 * Anonymous process memory has backing store? 624 * Anonymous process memory has backing store?
623 * Try to allocate it some swap space here. 625 * Try to allocate it some swap space here.
@@ -625,20 +627,10 @@ static unsigned long shrink_page_list(struct list_head *page_list,
625 if (PageAnon(page) && !PageSwapCache(page)) { 627 if (PageAnon(page) && !PageSwapCache(page)) {
626 if (!(sc->gfp_mask & __GFP_IO)) 628 if (!(sc->gfp_mask & __GFP_IO))
627 goto keep_locked; 629 goto keep_locked;
628 switch (try_to_munlock(page)) { 630 if (!add_to_swap(page))
629 case SWAP_FAIL: /* shouldn't happen */
630 case SWAP_AGAIN:
631 goto keep_locked;
632 case SWAP_MLOCK:
633 goto cull_mlocked;
634 case SWAP_SUCCESS:
635 ; /* fall thru'; add to swap cache */
636 }
637 if (!add_to_swap(page, GFP_ATOMIC))
638 goto activate_locked; 631 goto activate_locked;
639 may_enter_fs = 1; 632 may_enter_fs = 1;
640 } 633 }
641#endif /* CONFIG_SWAP */
642 634
643 mapping = page_mapping(page); 635 mapping = page_mapping(page);
644 636
@@ -752,6 +744,8 @@ free_it:
752 continue; 744 continue;
753 745
754cull_mlocked: 746cull_mlocked:
747 if (PageSwapCache(page))
748 try_to_free_swap(page);
755 unlock_page(page); 749 unlock_page(page);
756 putback_lru_page(page); 750 putback_lru_page(page);
757 continue; 751 continue;
@@ -759,7 +753,7 @@ cull_mlocked:
759activate_locked: 753activate_locked:
760 /* Not a candidate for swapping, so reclaim swap space. */ 754 /* Not a candidate for swapping, so reclaim swap space. */
761 if (PageSwapCache(page) && vm_swap_full()) 755 if (PageSwapCache(page) && vm_swap_full())
762 remove_exclusive_swap_page_ref(page); 756 try_to_free_swap(page);
763 VM_BUG_ON(PageActive(page)); 757 VM_BUG_ON(PageActive(page));
764 SetPageActive(page); 758 SetPageActive(page);
765 pgactivate++; 759 pgactivate++;
@@ -1173,11 +1167,6 @@ static inline void note_zone_scanning_priority(struct zone *zone, int priority)
1173 zone->prev_priority = priority; 1167 zone->prev_priority = priority;
1174} 1168}
1175 1169
1176static inline int zone_is_near_oom(struct zone *zone)
1177{
1178 return zone->pages_scanned >= (zone_lru_pages(zone) * 3);
1179}
1180
1181/* 1170/*
1182 * This moves pages from the active list to the inactive list. 1171 * This moves pages from the active list to the inactive list.
1183 * 1172 *
@@ -1248,6 +1237,13 @@ static void shrink_active_list(unsigned long nr_pages, struct zone *zone,
1248 list_add(&page->lru, &l_inactive); 1237 list_add(&page->lru, &l_inactive);
1249 } 1238 }
1250 1239
1240 /*
1241 * Move the pages to the [file or anon] inactive list.
1242 */
1243 pagevec_init(&pvec, 1);
1244 pgmoved = 0;
1245 lru = LRU_BASE + file * LRU_FILE;
1246
1251 spin_lock_irq(&zone->lru_lock); 1247 spin_lock_irq(&zone->lru_lock);
1252 /* 1248 /*
1253 * Count referenced pages from currently used mappings as 1249 * Count referenced pages from currently used mappings as
@@ -1255,15 +1251,9 @@ static void shrink_active_list(unsigned long nr_pages, struct zone *zone,
1255 * This helps balance scan pressure between file and anonymous 1251 * This helps balance scan pressure between file and anonymous
1256 * pages in get_scan_ratio. 1252 * pages in get_scan_ratio.
1257 */ 1253 */
1258 zone->recent_rotated[!!file] += pgmoved; 1254 if (scan_global_lru(sc))
1259 1255 zone->recent_rotated[!!file] += pgmoved;
1260 /*
1261 * Move the pages to the [file or anon] inactive list.
1262 */
1263 pagevec_init(&pvec, 1);
1264 1256
1265 pgmoved = 0;
1266 lru = LRU_BASE + file * LRU_FILE;
1267 while (!list_empty(&l_inactive)) { 1257 while (!list_empty(&l_inactive)) {
1268 page = lru_to_page(&l_inactive); 1258 page = lru_to_page(&l_inactive);
1269 prefetchw_prev_lru_page(page, &l_inactive, flags); 1259 prefetchw_prev_lru_page(page, &l_inactive, flags);
@@ -1336,12 +1326,6 @@ static void get_scan_ratio(struct zone *zone, struct scan_control *sc,
1336 unsigned long anon_prio, file_prio; 1326 unsigned long anon_prio, file_prio;
1337 unsigned long ap, fp; 1327 unsigned long ap, fp;
1338 1328
1339 anon = zone_page_state(zone, NR_ACTIVE_ANON) +
1340 zone_page_state(zone, NR_INACTIVE_ANON);
1341 file = zone_page_state(zone, NR_ACTIVE_FILE) +
1342 zone_page_state(zone, NR_INACTIVE_FILE);
1343 free = zone_page_state(zone, NR_FREE_PAGES);
1344
1345 /* If we have no swap space, do not bother scanning anon pages. */ 1329 /* If we have no swap space, do not bother scanning anon pages. */
1346 if (nr_swap_pages <= 0) { 1330 if (nr_swap_pages <= 0) {
1347 percent[0] = 0; 1331 percent[0] = 0;
@@ -1349,6 +1333,12 @@ static void get_scan_ratio(struct zone *zone, struct scan_control *sc,
1349 return; 1333 return;
1350 } 1334 }
1351 1335
1336 anon = zone_page_state(zone, NR_ACTIVE_ANON) +
1337 zone_page_state(zone, NR_INACTIVE_ANON);
1338 file = zone_page_state(zone, NR_ACTIVE_FILE) +
1339 zone_page_state(zone, NR_INACTIVE_FILE);
1340 free = zone_page_state(zone, NR_FREE_PAGES);
1341
1352 /* If we have very few page cache pages, force-scan anon pages. */ 1342 /* If we have very few page cache pages, force-scan anon pages. */
1353 if (unlikely(file + free <= zone->pages_high)) { 1343 if (unlikely(file + free <= zone->pages_high)) {
1354 percent[0] = 100; 1344 percent[0] = 100;
@@ -1408,14 +1398,15 @@ static void get_scan_ratio(struct zone *zone, struct scan_control *sc,
1408/* 1398/*
1409 * This is a basic per-zone page freer. Used by both kswapd and direct reclaim. 1399 * This is a basic per-zone page freer. Used by both kswapd and direct reclaim.
1410 */ 1400 */
1411static unsigned long shrink_zone(int priority, struct zone *zone, 1401static void shrink_zone(int priority, struct zone *zone,
1412 struct scan_control *sc) 1402 struct scan_control *sc)
1413{ 1403{
1414 unsigned long nr[NR_LRU_LISTS]; 1404 unsigned long nr[NR_LRU_LISTS];
1415 unsigned long nr_to_scan; 1405 unsigned long nr_to_scan;
1416 unsigned long nr_reclaimed = 0;
1417 unsigned long percent[2]; /* anon @ 0; file @ 1 */ 1406 unsigned long percent[2]; /* anon @ 0; file @ 1 */
1418 enum lru_list l; 1407 enum lru_list l;
1408 unsigned long nr_reclaimed = sc->nr_reclaimed;
1409 unsigned long swap_cluster_max = sc->swap_cluster_max;
1419 1410
1420 get_scan_ratio(zone, sc, percent); 1411 get_scan_ratio(zone, sc, percent);
1421 1412
@@ -1431,7 +1422,7 @@ static unsigned long shrink_zone(int priority, struct zone *zone,
1431 } 1422 }
1432 zone->lru[l].nr_scan += scan; 1423 zone->lru[l].nr_scan += scan;
1433 nr[l] = zone->lru[l].nr_scan; 1424 nr[l] = zone->lru[l].nr_scan;
1434 if (nr[l] >= sc->swap_cluster_max) 1425 if (nr[l] >= swap_cluster_max)
1435 zone->lru[l].nr_scan = 0; 1426 zone->lru[l].nr_scan = 0;
1436 else 1427 else
1437 nr[l] = 0; 1428 nr[l] = 0;
@@ -1450,16 +1441,28 @@ static unsigned long shrink_zone(int priority, struct zone *zone,
1450 nr[LRU_INACTIVE_FILE]) { 1441 nr[LRU_INACTIVE_FILE]) {
1451 for_each_evictable_lru(l) { 1442 for_each_evictable_lru(l) {
1452 if (nr[l]) { 1443 if (nr[l]) {
1453 nr_to_scan = min(nr[l], 1444 nr_to_scan = min(nr[l], swap_cluster_max);
1454 (unsigned long)sc->swap_cluster_max);
1455 nr[l] -= nr_to_scan; 1445 nr[l] -= nr_to_scan;
1456 1446
1457 nr_reclaimed += shrink_list(l, nr_to_scan, 1447 nr_reclaimed += shrink_list(l, nr_to_scan,
1458 zone, sc, priority); 1448 zone, sc, priority);
1459 } 1449 }
1460 } 1450 }
1451 /*
1452 * On large memory systems, scan >> priority can become
1453 * really large. This is fine for the starting priority;
1454 * we want to put equal scanning pressure on each zone.
1455 * However, if the VM has a harder time of freeing pages,
1456 * with multiple processes reclaiming pages, the total
1457 * freeing target can get unreasonably large.
1458 */
1459 if (nr_reclaimed > swap_cluster_max &&
1460 priority < DEF_PRIORITY && !current_is_kswapd())
1461 break;
1461 } 1462 }
1462 1463
1464 sc->nr_reclaimed = nr_reclaimed;
1465
1463 /* 1466 /*
1464 * Even if we did not try to evict anon pages at all, we want to 1467 * Even if we did not try to evict anon pages at all, we want to
1465 * rebalance the anon lru active/inactive ratio. 1468 * rebalance the anon lru active/inactive ratio.
@@ -1470,7 +1473,6 @@ static unsigned long shrink_zone(int priority, struct zone *zone,
1470 shrink_active_list(SWAP_CLUSTER_MAX, zone, sc, priority, 0); 1473 shrink_active_list(SWAP_CLUSTER_MAX, zone, sc, priority, 0);
1471 1474
1472 throttle_vm_writeout(sc->gfp_mask); 1475 throttle_vm_writeout(sc->gfp_mask);
1473 return nr_reclaimed;
1474} 1476}
1475 1477
1476/* 1478/*
@@ -1484,16 +1486,13 @@ static unsigned long shrink_zone(int priority, struct zone *zone,
1484 * b) The zones may be over pages_high but they must go *over* pages_high to 1486 * b) The zones may be over pages_high but they must go *over* pages_high to
1485 * satisfy the `incremental min' zone defense algorithm. 1487 * satisfy the `incremental min' zone defense algorithm.
1486 * 1488 *
1487 * Returns the number of reclaimed pages.
1488 *
1489 * If a zone is deemed to be full of pinned pages then just give it a light 1489 * If a zone is deemed to be full of pinned pages then just give it a light
1490 * scan then give up on it. 1490 * scan then give up on it.
1491 */ 1491 */
1492static unsigned long shrink_zones(int priority, struct zonelist *zonelist, 1492static void shrink_zones(int priority, struct zonelist *zonelist,
1493 struct scan_control *sc) 1493 struct scan_control *sc)
1494{ 1494{
1495 enum zone_type high_zoneidx = gfp_zone(sc->gfp_mask); 1495 enum zone_type high_zoneidx = gfp_zone(sc->gfp_mask);
1496 unsigned long nr_reclaimed = 0;
1497 struct zoneref *z; 1496 struct zoneref *z;
1498 struct zone *zone; 1497 struct zone *zone;
1499 1498
@@ -1524,10 +1523,8 @@ static unsigned long shrink_zones(int priority, struct zonelist *zonelist,
1524 priority); 1523 priority);
1525 } 1524 }
1526 1525
1527 nr_reclaimed += shrink_zone(priority, zone, sc); 1526 shrink_zone(priority, zone, sc);
1528 } 1527 }
1529
1530 return nr_reclaimed;
1531} 1528}
1532 1529
1533/* 1530/*
@@ -1552,7 +1549,6 @@ static unsigned long do_try_to_free_pages(struct zonelist *zonelist,
1552 int priority; 1549 int priority;
1553 unsigned long ret = 0; 1550 unsigned long ret = 0;
1554 unsigned long total_scanned = 0; 1551 unsigned long total_scanned = 0;
1555 unsigned long nr_reclaimed = 0;
1556 struct reclaim_state *reclaim_state = current->reclaim_state; 1552 struct reclaim_state *reclaim_state = current->reclaim_state;
1557 unsigned long lru_pages = 0; 1553 unsigned long lru_pages = 0;
1558 struct zoneref *z; 1554 struct zoneref *z;
@@ -1580,7 +1576,7 @@ static unsigned long do_try_to_free_pages(struct zonelist *zonelist,
1580 sc->nr_scanned = 0; 1576 sc->nr_scanned = 0;
1581 if (!priority) 1577 if (!priority)
1582 disable_swap_token(); 1578 disable_swap_token();
1583 nr_reclaimed += shrink_zones(priority, zonelist, sc); 1579 shrink_zones(priority, zonelist, sc);
1584 /* 1580 /*
1585 * Don't shrink slabs when reclaiming memory from 1581 * Don't shrink slabs when reclaiming memory from
1586 * over limit cgroups 1582 * over limit cgroups
@@ -1588,13 +1584,13 @@ static unsigned long do_try_to_free_pages(struct zonelist *zonelist,
1588 if (scan_global_lru(sc)) { 1584 if (scan_global_lru(sc)) {
1589 shrink_slab(sc->nr_scanned, sc->gfp_mask, lru_pages); 1585 shrink_slab(sc->nr_scanned, sc->gfp_mask, lru_pages);
1590 if (reclaim_state) { 1586 if (reclaim_state) {
1591 nr_reclaimed += reclaim_state->reclaimed_slab; 1587 sc->nr_reclaimed += reclaim_state->reclaimed_slab;
1592 reclaim_state->reclaimed_slab = 0; 1588 reclaim_state->reclaimed_slab = 0;
1593 } 1589 }
1594 } 1590 }
1595 total_scanned += sc->nr_scanned; 1591 total_scanned += sc->nr_scanned;
1596 if (nr_reclaimed >= sc->swap_cluster_max) { 1592 if (sc->nr_reclaimed >= sc->swap_cluster_max) {
1597 ret = nr_reclaimed; 1593 ret = sc->nr_reclaimed;
1598 goto out; 1594 goto out;
1599 } 1595 }
1600 1596
@@ -1617,7 +1613,7 @@ static unsigned long do_try_to_free_pages(struct zonelist *zonelist,
1617 } 1613 }
1618 /* top priority shrink_zones still had more to do? don't OOM, then */ 1614 /* top priority shrink_zones still had more to do? don't OOM, then */
1619 if (!sc->all_unreclaimable && scan_global_lru(sc)) 1615 if (!sc->all_unreclaimable && scan_global_lru(sc))
1620 ret = nr_reclaimed; 1616 ret = sc->nr_reclaimed;
1621out: 1617out:
1622 /* 1618 /*
1623 * Now that we've scanned all the zones at this priority level, note 1619 * Now that we've scanned all the zones at this priority level, note
@@ -1712,7 +1708,6 @@ static unsigned long balance_pgdat(pg_data_t *pgdat, int order)
1712 int priority; 1708 int priority;
1713 int i; 1709 int i;
1714 unsigned long total_scanned; 1710 unsigned long total_scanned;
1715 unsigned long nr_reclaimed;
1716 struct reclaim_state *reclaim_state = current->reclaim_state; 1711 struct reclaim_state *reclaim_state = current->reclaim_state;
1717 struct scan_control sc = { 1712 struct scan_control sc = {
1718 .gfp_mask = GFP_KERNEL, 1713 .gfp_mask = GFP_KERNEL,
@@ -1731,7 +1726,7 @@ static unsigned long balance_pgdat(pg_data_t *pgdat, int order)
1731 1726
1732loop_again: 1727loop_again:
1733 total_scanned = 0; 1728 total_scanned = 0;
1734 nr_reclaimed = 0; 1729 sc.nr_reclaimed = 0;
1735 sc.may_writepage = !laptop_mode; 1730 sc.may_writepage = !laptop_mode;
1736 count_vm_event(PAGEOUTRUN); 1731 count_vm_event(PAGEOUTRUN);
1737 1732
@@ -1817,11 +1812,11 @@ loop_again:
1817 */ 1812 */
1818 if (!zone_watermark_ok(zone, order, 8*zone->pages_high, 1813 if (!zone_watermark_ok(zone, order, 8*zone->pages_high,
1819 end_zone, 0)) 1814 end_zone, 0))
1820 nr_reclaimed += shrink_zone(priority, zone, &sc); 1815 shrink_zone(priority, zone, &sc);
1821 reclaim_state->reclaimed_slab = 0; 1816 reclaim_state->reclaimed_slab = 0;
1822 nr_slab = shrink_slab(sc.nr_scanned, GFP_KERNEL, 1817 nr_slab = shrink_slab(sc.nr_scanned, GFP_KERNEL,
1823 lru_pages); 1818 lru_pages);
1824 nr_reclaimed += reclaim_state->reclaimed_slab; 1819 sc.nr_reclaimed += reclaim_state->reclaimed_slab;
1825 total_scanned += sc.nr_scanned; 1820 total_scanned += sc.nr_scanned;
1826 if (zone_is_all_unreclaimable(zone)) 1821 if (zone_is_all_unreclaimable(zone))
1827 continue; 1822 continue;
@@ -1835,7 +1830,7 @@ loop_again:
1835 * even in laptop mode 1830 * even in laptop mode
1836 */ 1831 */
1837 if (total_scanned > SWAP_CLUSTER_MAX * 2 && 1832 if (total_scanned > SWAP_CLUSTER_MAX * 2 &&
1838 total_scanned > nr_reclaimed + nr_reclaimed / 2) 1833 total_scanned > sc.nr_reclaimed + sc.nr_reclaimed / 2)
1839 sc.may_writepage = 1; 1834 sc.may_writepage = 1;
1840 } 1835 }
1841 if (all_zones_ok) 1836 if (all_zones_ok)
@@ -1853,7 +1848,7 @@ loop_again:
1853 * matches the direct reclaim path behaviour in terms of impact 1848 * matches the direct reclaim path behaviour in terms of impact
1854 * on zone->*_priority. 1849 * on zone->*_priority.
1855 */ 1850 */
1856 if (nr_reclaimed >= SWAP_CLUSTER_MAX) 1851 if (sc.nr_reclaimed >= SWAP_CLUSTER_MAX)
1857 break; 1852 break;
1858 } 1853 }
1859out: 1854out:
@@ -1872,10 +1867,27 @@ out:
1872 1867
1873 try_to_freeze(); 1868 try_to_freeze();
1874 1869
1870 /*
1871 * Fragmentation may mean that the system cannot be
1872 * rebalanced for high-order allocations in all zones.
1873 * At this point, if nr_reclaimed < SWAP_CLUSTER_MAX,
1874 * it means the zones have been fully scanned and are still
1875 * not balanced. For high-order allocations, there is
1876 * little point trying all over again as kswapd may
1877 * infinite loop.
1878 *
1879 * Instead, recheck all watermarks at order-0 as they
1880 * are the most important. If watermarks are ok, kswapd will go
1881 * back to sleep. High-order users can still perform direct
1882 * reclaim if they wish.
1883 */
1884 if (sc.nr_reclaimed < SWAP_CLUSTER_MAX)
1885 order = sc.order = 0;
1886
1875 goto loop_again; 1887 goto loop_again;
1876 } 1888 }
1877 1889
1878 return nr_reclaimed; 1890 return sc.nr_reclaimed;
1879} 1891}
1880 1892
1881/* 1893/*
@@ -2227,7 +2239,6 @@ static int __zone_reclaim(struct zone *zone, gfp_t gfp_mask, unsigned int order)
2227 struct task_struct *p = current; 2239 struct task_struct *p = current;
2228 struct reclaim_state reclaim_state; 2240 struct reclaim_state reclaim_state;
2229 int priority; 2241 int priority;
2230 unsigned long nr_reclaimed = 0;
2231 struct scan_control sc = { 2242 struct scan_control sc = {
2232 .may_writepage = !!(zone_reclaim_mode & RECLAIM_WRITE), 2243 .may_writepage = !!(zone_reclaim_mode & RECLAIM_WRITE),
2233 .may_swap = !!(zone_reclaim_mode & RECLAIM_SWAP), 2244 .may_swap = !!(zone_reclaim_mode & RECLAIM_SWAP),
@@ -2260,9 +2271,9 @@ static int __zone_reclaim(struct zone *zone, gfp_t gfp_mask, unsigned int order)
2260 priority = ZONE_RECLAIM_PRIORITY; 2271 priority = ZONE_RECLAIM_PRIORITY;
2261 do { 2272 do {
2262 note_zone_scanning_priority(zone, priority); 2273 note_zone_scanning_priority(zone, priority);
2263 nr_reclaimed += shrink_zone(priority, zone, &sc); 2274 shrink_zone(priority, zone, &sc);
2264 priority--; 2275 priority--;
2265 } while (priority >= 0 && nr_reclaimed < nr_pages); 2276 } while (priority >= 0 && sc.nr_reclaimed < nr_pages);
2266 } 2277 }
2267 2278
2268 slab_reclaimable = zone_page_state(zone, NR_SLAB_RECLAIMABLE); 2279 slab_reclaimable = zone_page_state(zone, NR_SLAB_RECLAIMABLE);
@@ -2286,13 +2297,13 @@ static int __zone_reclaim(struct zone *zone, gfp_t gfp_mask, unsigned int order)
2286 * Update nr_reclaimed by the number of slab pages we 2297 * Update nr_reclaimed by the number of slab pages we
2287 * reclaimed from this zone. 2298 * reclaimed from this zone.
2288 */ 2299 */
2289 nr_reclaimed += slab_reclaimable - 2300 sc.nr_reclaimed += slab_reclaimable -
2290 zone_page_state(zone, NR_SLAB_RECLAIMABLE); 2301 zone_page_state(zone, NR_SLAB_RECLAIMABLE);
2291 } 2302 }
2292 2303
2293 p->reclaim_state = NULL; 2304 p->reclaim_state = NULL;
2294 current->flags &= ~(PF_MEMALLOC | PF_SWAPWRITE); 2305 current->flags &= ~(PF_MEMALLOC | PF_SWAPWRITE);
2295 return nr_reclaimed >= nr_pages; 2306 return sc.nr_reclaimed >= nr_pages;
2296} 2307}
2297 2308
2298int zone_reclaim(struct zone *zone, gfp_t gfp_mask, unsigned int order) 2309int zone_reclaim(struct zone *zone, gfp_t gfp_mask, unsigned int order)
@@ -2472,7 +2483,7 @@ void scan_mapping_unevictable_pages(struct address_space *mapping)
2472 * back onto @zone's unevictable list. 2483 * back onto @zone's unevictable list.
2473 */ 2484 */
2474#define SCAN_UNEVICTABLE_BATCH_SIZE 16UL /* arbitrary lock hold batch size */ 2485#define SCAN_UNEVICTABLE_BATCH_SIZE 16UL /* arbitrary lock hold batch size */
2475void scan_zone_unevictable_pages(struct zone *zone) 2486static void scan_zone_unevictable_pages(struct zone *zone)
2476{ 2487{
2477 struct list_head *l_unevictable = &zone->lru[LRU_UNEVICTABLE].list; 2488 struct list_head *l_unevictable = &zone->lru[LRU_UNEVICTABLE].list;
2478 unsigned long scan; 2489 unsigned long scan;
@@ -2514,7 +2525,7 @@ void scan_zone_unevictable_pages(struct zone *zone)
2514 * that has possibly/probably made some previously unevictable pages 2525 * that has possibly/probably made some previously unevictable pages
2515 * evictable. 2526 * evictable.
2516 */ 2527 */
2517void scan_all_zones_unevictable_pages(void) 2528static void scan_all_zones_unevictable_pages(void)
2518{ 2529{
2519 struct zone *zone; 2530 struct zone *zone;
2520 2531
diff --git a/samples/firmware_class/firmware_sample_driver.c b/samples/firmware_class/firmware_sample_driver.c
index 11114f389c49..219a29896603 100644
--- a/samples/firmware_class/firmware_sample_driver.c
+++ b/samples/firmware_class/firmware_sample_driver.c
@@ -100,7 +100,7 @@ static void sample_probe_async(void)
100 " request_firmware_nowait failed\n"); 100 " request_firmware_nowait failed\n");
101} 101}
102 102
103static int sample_init(void) 103static int __init sample_init(void)
104{ 104{
105 device_initialize(&ghost_device); 105 device_initialize(&ghost_device);
106 /* since there is no real hardware insertion I just call the 106 /* since there is no real hardware insertion I just call the
diff --git a/samples/kobject/kobject-example.c b/samples/kobject/kobject-example.c
index 08d0d3ff3263..8d9b55a12023 100644
--- a/samples/kobject/kobject-example.c
+++ b/samples/kobject/kobject-example.c
@@ -101,7 +101,7 @@ static struct attribute_group attr_group = {
101 101
102static struct kobject *example_kobj; 102static struct kobject *example_kobj;
103 103
104static int example_init(void) 104static int __init example_init(void)
105{ 105{
106 int retval; 106 int retval;
107 107
@@ -126,7 +126,7 @@ static int example_init(void)
126 return retval; 126 return retval;
127} 127}
128 128
129static void example_exit(void) 129static void __exit example_exit(void)
130{ 130{
131 kobject_put(example_kobj); 131 kobject_put(example_kobj);
132} 132}
diff --git a/samples/kobject/kset-example.c b/samples/kobject/kset-example.c
index 7395c0bbae18..45b7d56fb541 100644
--- a/samples/kobject/kset-example.c
+++ b/samples/kobject/kset-example.c
@@ -229,7 +229,7 @@ static void destroy_foo_obj(struct foo_obj *foo)
229 kobject_put(&foo->kobj); 229 kobject_put(&foo->kobj);
230} 230}
231 231
232static int example_init(void) 232static int __init example_init(void)
233{ 233{
234 /* 234 /*
235 * Create a kset with the name of "kset_example", 235 * Create a kset with the name of "kset_example",
@@ -264,7 +264,7 @@ foo_error:
264 return -EINVAL; 264 return -EINVAL;
265} 265}
266 266
267static void example_exit(void) 267static void __exit example_exit(void)
268{ 268{
269 destroy_foo_obj(baz_obj); 269 destroy_foo_obj(baz_obj);
270 destroy_foo_obj(bar_obj); 270 destroy_foo_obj(bar_obj);
diff --git a/samples/markers/marker-example.c b/samples/markers/marker-example.c
index e90dc5d04392..e9cd9c0bc84f 100644
--- a/samples/markers/marker-example.c
+++ b/samples/markers/marker-example.c
@@ -30,7 +30,7 @@ static struct file_operations mark_ops = {
30 .open = my_open, 30 .open = my_open,
31}; 31};
32 32
33static int example_init(void) 33static int __init example_init(void)
34{ 34{
35 printk(KERN_ALERT "example init\n"); 35 printk(KERN_ALERT "example init\n");
36 pentry_example = proc_create("marker-example", 0444, NULL, &mark_ops); 36 pentry_example = proc_create("marker-example", 0444, NULL, &mark_ops);
@@ -39,7 +39,7 @@ static int example_init(void)
39 return 0; 39 return 0;
40} 40}
41 41
42static void example_exit(void) 42static void __exit example_exit(void)
43{ 43{
44 printk(KERN_ALERT "example exit\n"); 44 printk(KERN_ALERT "example exit\n");
45 remove_proc_entry("marker-example", NULL); 45 remove_proc_entry("marker-example", NULL);
diff --git a/samples/tracepoints/tracepoint-probe-sample.c b/samples/tracepoints/tracepoint-probe-sample.c
index e3a964889dc7..9e60eb6ca2d8 100644
--- a/samples/tracepoints/tracepoint-probe-sample.c
+++ b/samples/tracepoints/tracepoint-probe-sample.c
@@ -28,7 +28,7 @@ static void probe_subsys_eventb(void)
28 printk(KERN_INFO "Event B is encountered\n"); 28 printk(KERN_INFO "Event B is encountered\n");
29} 29}
30 30
31int __init tp_sample_trace_init(void) 31static int __init tp_sample_trace_init(void)
32{ 32{
33 int ret; 33 int ret;
34 34
@@ -42,7 +42,7 @@ int __init tp_sample_trace_init(void)
42 42
43module_init(tp_sample_trace_init); 43module_init(tp_sample_trace_init);
44 44
45void __exit tp_sample_trace_exit(void) 45static void __exit tp_sample_trace_exit(void)
46{ 46{
47 unregister_trace_subsys_eventb(probe_subsys_eventb); 47 unregister_trace_subsys_eventb(probe_subsys_eventb);
48 unregister_trace_subsys_event(probe_subsys_event); 48 unregister_trace_subsys_event(probe_subsys_event);
diff --git a/samples/tracepoints/tracepoint-probe-sample2.c b/samples/tracepoints/tracepoint-probe-sample2.c
index 685a5acb4562..be2a960573f1 100644
--- a/samples/tracepoints/tracepoint-probe-sample2.c
+++ b/samples/tracepoints/tracepoint-probe-sample2.c
@@ -18,7 +18,7 @@ static void probe_subsys_event(struct inode *inode, struct file *file)
18 inode->i_ino); 18 inode->i_ino);
19} 19}
20 20
21int __init tp_sample_trace_init(void) 21static int __init tp_sample_trace_init(void)
22{ 22{
23 int ret; 23 int ret;
24 24
@@ -30,7 +30,7 @@ int __init tp_sample_trace_init(void)
30 30
31module_init(tp_sample_trace_init); 31module_init(tp_sample_trace_init);
32 32
33void __exit tp_sample_trace_exit(void) 33static void __exit tp_sample_trace_exit(void)
34{ 34{
35 unregister_trace_subsys_event(probe_subsys_event); 35 unregister_trace_subsys_event(probe_subsys_event);
36 tracepoint_synchronize_unregister(); 36 tracepoint_synchronize_unregister();
diff --git a/samples/tracepoints/tracepoint-sample.c b/samples/tracepoints/tracepoint-sample.c
index 00d169792a3e..68d5dc0310e4 100644
--- a/samples/tracepoints/tracepoint-sample.c
+++ b/samples/tracepoints/tracepoint-sample.c
@@ -32,7 +32,7 @@ static struct file_operations mark_ops = {
32 .open = my_open, 32 .open = my_open,
33}; 33};
34 34
35static int example_init(void) 35static int __init example_init(void)
36{ 36{
37 printk(KERN_ALERT "example init\n"); 37 printk(KERN_ALERT "example init\n");
38 pentry_example = proc_create("tracepoint-example", 0444, NULL, 38 pentry_example = proc_create("tracepoint-example", 0444, NULL,
@@ -42,7 +42,7 @@ static int example_init(void)
42 return 0; 42 return 0;
43} 43}
44 44
45static void example_exit(void) 45static void __exit example_exit(void)
46{ 46{
47 printk(KERN_ALERT "example exit\n"); 47 printk(KERN_ALERT "example exit\n");
48 remove_proc_entry("tracepoint-example", NULL); 48 remove_proc_entry("tracepoint-example", NULL);
diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl
index f88bb3e21cda..7bed4ed2c519 100755
--- a/scripts/checkpatch.pl
+++ b/scripts/checkpatch.pl
@@ -1,7 +1,8 @@
1#!/usr/bin/perl -w 1#!/usr/bin/perl -w
2# (c) 2001, Dave Jones. <davej@redhat.com> (the file handling bit) 2# (c) 2001, Dave Jones. <davej@redhat.com> (the file handling bit)
3# (c) 2005, Joel Schopp <jschopp@austin.ibm.com> (the ugly bit) 3# (c) 2005, Joel Schopp <jschopp@austin.ibm.com> (the ugly bit)
4# (c) 2007, Andy Whitcroft <apw@uk.ibm.com> (new conditions, test suite, etc) 4# (c) 2007,2008, Andy Whitcroft <apw@uk.ibm.com> (new conditions, test suite)
5# (c) 2008, Andy Whitcroft <apw@canonical.com>
5# Licensed under the terms of the GNU GPL License version 2 6# Licensed under the terms of the GNU GPL License version 2
6 7
7use strict; 8use strict;
@@ -9,7 +10,7 @@ use strict;
9my $P = $0; 10my $P = $0;
10$P =~ s@.*/@@g; 11$P =~ s@.*/@@g;
11 12
12my $V = '0.24'; 13my $V = '0.26';
13 14
14use Getopt::Long qw(:config no_auto_abbrev); 15use Getopt::Long qw(:config no_auto_abbrev);
15 16
@@ -68,7 +69,9 @@ my $dbg_possible = 0;
68my $dbg_type = 0; 69my $dbg_type = 0;
69my $dbg_attr = 0; 70my $dbg_attr = 0;
70for my $key (keys %debug) { 71for my $key (keys %debug) {
71 eval "\${dbg_$key} = '$debug{$key}';" 72 ## no critic
73 eval "\${dbg_$key} = '$debug{$key}';";
74 die "$@" if ($@);
72} 75}
73 76
74if ($terse) { 77if ($terse) {
@@ -116,7 +119,8 @@ our $Attribute = qr{
116 __(?:mem|cpu|dev|)(?:initdata|init)| 119 __(?:mem|cpu|dev|)(?:initdata|init)|
117 ____cacheline_aligned| 120 ____cacheline_aligned|
118 ____cacheline_aligned_in_smp| 121 ____cacheline_aligned_in_smp|
119 ____cacheline_internodealigned_in_smp 122 ____cacheline_internodealigned_in_smp|
123 __weak
120 }x; 124 }x;
121our $Modifier; 125our $Modifier;
122our $Inline = qr{inline|__always_inline|noinline}; 126our $Inline = qr{inline|__always_inline|noinline};
@@ -125,6 +129,7 @@ our $Lval = qr{$Ident(?:$Member)*};
125 129
126our $Constant = qr{(?:[0-9]+|0x[0-9a-fA-F]+)[UL]*}; 130our $Constant = qr{(?:[0-9]+|0x[0-9a-fA-F]+)[UL]*};
127our $Assignment = qr{(?:\*\=|/=|%=|\+=|-=|<<=|>>=|&=|\^=|\|=|=)}; 131our $Assignment = qr{(?:\*\=|/=|%=|\+=|-=|<<=|>>=|&=|\^=|\|=|=)};
132our $Compare = qr{<=|>=|==|!=|<|>};
128our $Operators = qr{ 133our $Operators = qr{
129 <=|>=|==|!=| 134 <=|>=|==|!=|
130 =>|->|<<|>>|<|>|!|~| 135 =>|->|<<|>>|<|>|!|~|
@@ -190,7 +195,7 @@ sub build_types {
190 }x; 195 }x;
191 $Type = qr{ 196 $Type = qr{
192 $NonptrType 197 $NonptrType
193 (?:\s*\*+\s*const|\s*\*+|(?:\s*\[\s*\])+)? 198 (?:[\s\*]+\s*const|[\s\*]+|(?:\s*\[\s*\])+)?
194 (?:\s+$Inline|\s+$Modifier)* 199 (?:\s+$Inline|\s+$Modifier)*
195 }x; 200 }x;
196 $Declare = qr{(?:$Storage\s+)?$Type}; 201 $Declare = qr{(?:$Storage\s+)?$Type};
@@ -203,9 +208,9 @@ my @dep_includes = ();
203my @dep_functions = (); 208my @dep_functions = ();
204my $removal = "Documentation/feature-removal-schedule.txt"; 209my $removal = "Documentation/feature-removal-schedule.txt";
205if ($tree && -f "$root/$removal") { 210if ($tree && -f "$root/$removal") {
206 open(REMOVE, "<$root/$removal") || 211 open(my $REMOVE, '<', "$root/$removal") ||
207 die "$P: $removal: open failed - $!\n"; 212 die "$P: $removal: open failed - $!\n";
208 while (<REMOVE>) { 213 while (<$REMOVE>) {
209 if (/^Check:\s+(.*\S)/) { 214 if (/^Check:\s+(.*\S)/) {
210 for my $entry (split(/[, ]+/, $1)) { 215 for my $entry (split(/[, ]+/, $1)) {
211 if ($entry =~ m@include/(.*)@) { 216 if ($entry =~ m@include/(.*)@) {
@@ -217,17 +222,21 @@ if ($tree && -f "$root/$removal") {
217 } 222 }
218 } 223 }
219 } 224 }
225 close($REMOVE);
220} 226}
221 227
222my @rawlines = (); 228my @rawlines = ();
223my @lines = (); 229my @lines = ();
224my $vname; 230my $vname;
225for my $filename (@ARGV) { 231for my $filename (@ARGV) {
232 my $FILE;
226 if ($file) { 233 if ($file) {
227 open(FILE, "diff -u /dev/null $filename|") || 234 open($FILE, '-|', "diff -u /dev/null $filename") ||
228 die "$P: $filename: diff failed - $!\n"; 235 die "$P: $filename: diff failed - $!\n";
236 } elsif ($filename eq '-') {
237 open($FILE, '<&STDIN');
229 } else { 238 } else {
230 open(FILE, "<$filename") || 239 open($FILE, '<', "$filename") ||
231 die "$P: $filename: open failed - $!\n"; 240 die "$P: $filename: open failed - $!\n";
232 } 241 }
233 if ($filename eq '-') { 242 if ($filename eq '-') {
@@ -235,11 +244,11 @@ for my $filename (@ARGV) {
235 } else { 244 } else {
236 $vname = $filename; 245 $vname = $filename;
237 } 246 }
238 while (<FILE>) { 247 while (<$FILE>) {
239 chomp; 248 chomp;
240 push(@rawlines, $_); 249 push(@rawlines, $_);
241 } 250 }
242 close(FILE); 251 close($FILE);
243 if (!process($filename)) { 252 if (!process($filename)) {
244 $exit = 1; 253 $exit = 1;
245 } 254 }
@@ -366,7 +375,7 @@ sub sanitise_line {
366 } 375 }
367 } 376 }
368 377
369 #print "SQ:$sanitise_quote\n"; 378 #print "c<$c> SQ<$sanitise_quote>\n";
370 if ($off != 0 && $sanitise_quote eq '*/' && $c ne "\t") { 379 if ($off != 0 && $sanitise_quote eq '*/' && $c ne "\t") {
371 substr($res, $off, 1, $;); 380 substr($res, $off, 1, $;);
372 } elsif ($off != 0 && $sanitise_quote && $c ne "\t") { 381 } elsif ($off != 0 && $sanitise_quote && $c ne "\t") {
@@ -402,6 +411,7 @@ sub ctx_statement_block {
402 411
403 my $type = ''; 412 my $type = '';
404 my $level = 0; 413 my $level = 0;
414 my @stack = ([$type, $level]);
405 my $p; 415 my $p;
406 my $c; 416 my $c;
407 my $len = 0; 417 my $len = 0;
@@ -433,6 +443,16 @@ sub ctx_statement_block {
433 $remainder = substr($blk, $off); 443 $remainder = substr($blk, $off);
434 444
435 #warn "CSB: c<$c> type<$type> level<$level> remainder<$remainder> coff_set<$coff_set>\n"; 445 #warn "CSB: c<$c> type<$type> level<$level> remainder<$remainder> coff_set<$coff_set>\n";
446
447 # Handle nested #if/#else.
448 if ($remainder =~ /^#\s*(?:ifndef|ifdef|if)\s/) {
449 push(@stack, [ $type, $level ]);
450 } elsif ($remainder =~ /^#\s*(?:else|elif)\b/) {
451 ($type, $level) = @{$stack[$#stack - 1]};
452 } elsif ($remainder =~ /^#\s*endif\b/) {
453 ($type, $level) = @{pop(@stack)};
454 }
455
436 # Statement ends at the ';' or a close '}' at the 456 # Statement ends at the ';' or a close '}' at the
437 # outermost level. 457 # outermost level.
438 if ($level == 0 && $c eq ';') { 458 if ($level == 0 && $c eq ';') {
@@ -579,11 +599,22 @@ sub ctx_block_get {
579 my @res = (); 599 my @res = ();
580 600
581 my $level = 0; 601 my $level = 0;
602 my @stack = ($level);
582 for ($line = $start; $remain > 0; $line++) { 603 for ($line = $start; $remain > 0; $line++) {
583 next if ($rawlines[$line] =~ /^-/); 604 next if ($rawlines[$line] =~ /^-/);
584 $remain--; 605 $remain--;
585 606
586 $blk .= $rawlines[$line]; 607 $blk .= $rawlines[$line];
608
609 # Handle nested #if/#else.
610 if ($rawlines[$line] =~ /^.\s*#\s*(?:ifndef|ifdef|if)\s/) {
611 push(@stack, $level);
612 } elsif ($rawlines[$line] =~ /^.\s*#\s*(?:else|elif)\b/) {
613 $level = $stack[$#stack - 1];
614 } elsif ($rawlines[$line] =~ /^.\s*#\s*endif\b/) {
615 $level = pop(@stack);
616 }
617
587 foreach my $c (split(//, $rawlines[$line])) { 618 foreach my $c (split(//, $rawlines[$line])) {
588 ##print "C<$c>L<$level><$open$close>O<$off>\n"; 619 ##print "C<$c>L<$level><$open$close>O<$off>\n";
589 if ($off > 0) { 620 if ($off > 0) {
@@ -843,11 +874,11 @@ sub annotate_values {
843 $type = 'V'; 874 $type = 'V';
844 $av_pending = 'V'; 875 $av_pending = 'V';
845 876
846 } elsif ($cur =~ /^($Ident\s*):/) { 877 } elsif ($cur =~ /^($Ident\s*):(?:\s*\d+\s*(,|=|;))?/) {
847 if ($type eq 'E') { 878 if (defined $2 && $type eq 'C' || $type eq 'T') {
848 $av_pend_colon = 'L';
849 } elsif ($type eq 'T') {
850 $av_pend_colon = 'B'; 879 $av_pend_colon = 'B';
880 } elsif ($type eq 'E') {
881 $av_pend_colon = 'L';
851 } 882 }
852 print "IDENT_COLON($1,$type>$av_pend_colon)\n" if ($dbg_values > 1); 883 print "IDENT_COLON($1,$type>$av_pend_colon)\n" if ($dbg_values > 1);
853 $type = 'V'; 884 $type = 'V';
@@ -865,6 +896,10 @@ sub annotate_values {
865 $type = 'E'; 896 $type = 'E';
866 $av_pend_colon = 'O'; 897 $av_pend_colon = 'O';
867 898
899 } elsif ($cur =~/^(,)/) {
900 print "COMMA($1)\n" if ($dbg_values > 1);
901 $type = 'C';
902
868 } elsif ($cur =~ /^(\?)/o) { 903 } elsif ($cur =~ /^(\?)/o) {
869 print "QUESTION($1)\n" if ($dbg_values > 1); 904 print "QUESTION($1)\n" if ($dbg_values > 1);
870 $type = 'N'; 905 $type = 'N';
@@ -880,7 +915,7 @@ sub annotate_values {
880 } 915 }
881 $av_pend_colon = 'O'; 916 $av_pend_colon = 'O';
882 917
883 } elsif ($cur =~ /^(;|\[)/o) { 918 } elsif ($cur =~ /^(\[)/o) {
884 print "CLOSE($1)\n" if ($dbg_values > 1); 919 print "CLOSE($1)\n" if ($dbg_values > 1);
885 $type = 'N'; 920 $type = 'N';
886 921
@@ -1051,6 +1086,7 @@ sub process {
1051 my $in_comment = 0; 1086 my $in_comment = 0;
1052 my $comment_edge = 0; 1087 my $comment_edge = 0;
1053 my $first_line = 0; 1088 my $first_line = 0;
1089 my $p1_prefix = '';
1054 1090
1055 my $prev_values = 'E'; 1091 my $prev_values = 'E';
1056 1092
@@ -1097,9 +1133,12 @@ sub process {
1097 $rawlines[$ln - 1] =~ /^-/); 1133 $rawlines[$ln - 1] =~ /^-/);
1098 $cnt--; 1134 $cnt--;
1099 #print "RAW<$rawlines[$ln - 1]>\n"; 1135 #print "RAW<$rawlines[$ln - 1]>\n";
1100 ($edge) = (defined $rawlines[$ln - 1] && 1136 last if (!defined $rawlines[$ln - 1]);
1101 $rawlines[$ln - 1] =~ m@(/\*|\*/)@); 1137 if ($rawlines[$ln - 1] =~ m@(/\*|\*/)@ &&
1102 last if (defined $edge); 1138 $rawlines[$ln - 1] !~ m@"[^"]*(?:/\*|\*/)[^"]*"@) {
1139 ($edge) = $1;
1140 last;
1141 }
1103 } 1142 }
1104 if (defined $edge && $edge eq '*/') { 1143 if (defined $edge && $edge eq '*/') {
1105 $in_comment = 1; 1144 $in_comment = 1;
@@ -1109,7 +1148,7 @@ sub process {
1109 # is the start of a diff block and this line starts 1148 # is the start of a diff block and this line starts
1110 # ' *' then it is very likely a comment. 1149 # ' *' then it is very likely a comment.
1111 if (!defined $edge && 1150 if (!defined $edge &&
1112 $rawlines[$linenr] =~ m@^.\s* \*(?:\s|$)@) 1151 $rawlines[$linenr] =~ m@^.\s*(?:\*\*+| \*)(?:\s|$)@)
1113 { 1152 {
1114 $in_comment = 1; 1153 $in_comment = 1;
1115 } 1154 }
@@ -1196,7 +1235,12 @@ sub process {
1196 # extract the filename as it passes 1235 # extract the filename as it passes
1197 if ($line=~/^\+\+\+\s+(\S+)/) { 1236 if ($line=~/^\+\+\+\s+(\S+)/) {
1198 $realfile = $1; 1237 $realfile = $1;
1199 $realfile =~ s@^[^/]*/@@; 1238 $realfile =~ s@^([^/]*)/@@;
1239
1240 $p1_prefix = $1;
1241 if ($tree && $p1_prefix ne '' && -e "$root/$p1_prefix") {
1242 WARN("patch prefix '$p1_prefix' exists, appears to be a -p0 patch\n");
1243 }
1200 1244
1201 if ($realfile =~ m@^include/asm/@) { 1245 if ($realfile =~ m@^include/asm/@) {
1202 ERROR("do not modify files in include/asm, change architecture specific files in include/asm-<architecture>\n" . "$here$rawline\n"); 1246 ERROR("do not modify files in include/asm, change architecture specific files in include/asm-<architecture>\n" . "$here$rawline\n");
@@ -1336,7 +1380,7 @@ sub process {
1336 } 1380 }
1337 1381
1338 # any (foo ... *) is a pointer cast, and foo is a type 1382 # any (foo ... *) is a pointer cast, and foo is a type
1339 while ($s =~ /\(($Ident)(?:\s+$Sparse)*\s*\*+\s*\)/sg) { 1383 while ($s =~ /\(($Ident)(?:\s+$Sparse)*[\s\*]+\s*\)/sg) {
1340 possible($1, "C:" . $s); 1384 possible($1, "C:" . $s);
1341 } 1385 }
1342 1386
@@ -1594,7 +1638,7 @@ sub process {
1594 $herecurr); 1638 $herecurr);
1595 } 1639 }
1596# check for static initialisers. 1640# check for static initialisers.
1597 if ($line =~ /\s*static\s.*=\s*(0|NULL|false)\s*;/) { 1641 if ($line =~ /\bstatic\s.*=\s*(0|NULL|false)\s*;/) {
1598 ERROR("do not initialise statics to 0 or NULL\n" . 1642 ERROR("do not initialise statics to 0 or NULL\n" .
1599 $herecurr); 1643 $herecurr);
1600 } 1644 }
@@ -1602,7 +1646,7 @@ sub process {
1602# check for new typedefs, only function parameters and sparse annotations 1646# check for new typedefs, only function parameters and sparse annotations
1603# make sense. 1647# make sense.
1604 if ($line =~ /\btypedef\s/ && 1648 if ($line =~ /\btypedef\s/ &&
1605 $line !~ /\btypedef\s+$Type\s+\(\s*\*?$Ident\s*\)\s*\(/ && 1649 $line !~ /\btypedef\s+$Type\s*\(\s*\*?$Ident\s*\)\s*\(/ &&
1606 $line !~ /\btypedef\s+$Type\s+$Ident\s*\(/ && 1650 $line !~ /\btypedef\s+$Type\s+$Ident\s*\(/ &&
1607 $line !~ /\b$typeTypedefs\b/ && 1651 $line !~ /\b$typeTypedefs\b/ &&
1608 $line !~ /\b__bitwise(?:__|)\b/) { 1652 $line !~ /\b__bitwise(?:__|)\b/) {
@@ -1610,21 +1654,39 @@ sub process {
1610 } 1654 }
1611 1655
1612# * goes on variable not on type 1656# * goes on variable not on type
1613 if ($line =~ m{\($NonptrType(\*+)(?:\s+const)?\)}) { 1657 # (char*[ const])
1614 ERROR("\"(foo$1)\" should be \"(foo $1)\"\n" . 1658 if ($line =~ m{\($NonptrType(\s*\*[\s\*]*(?:$Modifier\s*)*)\)}) {
1615 $herecurr); 1659 my ($from, $to) = ($1, $1);
1616 1660
1617 } elsif ($line =~ m{\($NonptrType\s+(\*+)(?!\s+const)\s+\)}) { 1661 # Should start with a space.
1618 ERROR("\"(foo $1 )\" should be \"(foo $1)\"\n" . 1662 $to =~ s/^(\S)/ $1/;
1619 $herecurr); 1663 # Should not end with a space.
1664 $to =~ s/\s+$//;
1665 # '*'s should not have spaces between.
1666 while ($to =~ s/(.)\s\*/$1\*/) {
1667 }
1620 1668
1621 } elsif ($line =~ m{\b$NonptrType(\*+)(?:\s+(?:$Attribute|$Sparse))?\s+[A-Za-z\d_]+}) { 1669 #print "from<$from> to<$to>\n";
1622 ERROR("\"foo$1 bar\" should be \"foo $1bar\"\n" . 1670 if ($from ne $to) {
1623 $herecurr); 1671 ERROR("\"(foo$from)\" should be \"(foo$to)\"\n" . $herecurr);
1672 }
1673 } elsif ($line =~ m{\b$NonptrType(\s*\*[\s\*]*(?:$Modifier\s*)?)($Ident)}) {
1674 my ($from, $to, $ident) = ($1, $1, $2);
1624 1675
1625 } elsif ($line =~ m{\b$NonptrType\s+(\*+)(?!\s+(?:$Attribute|$Sparse))\s+[A-Za-z\d_]+}) { 1676 # Should start with a space.
1626 ERROR("\"foo $1 bar\" should be \"foo $1bar\"\n" . 1677 $to =~ s/^(\S)/ $1/;
1627 $herecurr); 1678 # Should not end with a space.
1679 $to =~ s/\s+$//;
1680 # '*'s should not have spaces between.
1681 while ($to =~ s/(.)\s\*/$1\*/) {
1682 }
1683 # Modifiers should have spaces.
1684 $to =~ s/(\b$Modifier$)/$1 /;
1685
1686 #print "from<$from> to<$to>\n";
1687 if ($from ne $to) {
1688 ERROR("\"foo${from}bar\" should be \"foo${to}bar\"\n" . $herecurr);
1689 }
1628 } 1690 }
1629 1691
1630# # no BUG() or BUG_ON() 1692# # no BUG() or BUG_ON()
@@ -1759,7 +1821,7 @@ sub process {
1759 $c = 'C' if ($elements[$n + 2] =~ /^$;/); 1821 $c = 'C' if ($elements[$n + 2] =~ /^$;/);
1760 $c = 'B' if ($elements[$n + 2] =~ /^(\)|\]|;)/); 1822 $c = 'B' if ($elements[$n + 2] =~ /^(\)|\]|;)/);
1761 $c = 'O' if ($elements[$n + 2] eq ''); 1823 $c = 'O' if ($elements[$n + 2] eq '');
1762 $c = 'E' if ($elements[$n + 2] =~ /\s*\\$/); 1824 $c = 'E' if ($elements[$n + 2] =~ /^\s*\\$/);
1763 } else { 1825 } else {
1764 $c = 'E'; 1826 $c = 'E';
1765 } 1827 }
@@ -1950,9 +2012,9 @@ sub process {
1950 my $spacing = $1; 2012 my $spacing = $1;
1951 my $value = $2; 2013 my $value = $2;
1952 2014
1953 # Flatten any parentheses and braces 2015 # Flatten any parentheses
1954 $value =~ s/\)\(/\) \(/g; 2016 $value =~ s/\)\(/\) \(/g;
1955 while ($value =~ s/\([^\(\)]*\)/1/) { 2017 while ($value !~ /(?:$Ident|-?$Constant)\s*$Compare\s*(?:$Ident|-?$Constant)/ && $value =~ s/\([^\(\)]*\)/1/) {
1956 } 2018 }
1957 2019
1958 if ($value =~ /^(?:$Ident|-?$Constant)$/) { 2020 if ($value =~ /^(?:$Ident|-?$Constant)$/) {
@@ -1992,7 +2054,7 @@ sub process {
1992 $line =~ /\b(?:if|while|for)\s*\(/ && $line !~ /^.\s*#/) { 2054 $line =~ /\b(?:if|while|for)\s*\(/ && $line !~ /^.\s*#/) {
1993 my ($s, $c) = ($stat, $cond); 2055 my ($s, $c) = ($stat, $cond);
1994 2056
1995 if ($c =~ /\bif\s*\(.*[^<>!=]=[^=].*/) { 2057 if ($c =~ /\bif\s*\(.*[^<>!=]=[^=].*/s) {
1996 ERROR("do not use assignment in if condition\n" . $herecurr); 2058 ERROR("do not use assignment in if condition\n" . $herecurr);
1997 } 2059 }
1998 2060
@@ -2167,9 +2229,10 @@ sub process {
2167 MODULE_PARAM_DESC| 2229 MODULE_PARAM_DESC|
2168 DECLARE_PER_CPU| 2230 DECLARE_PER_CPU|
2169 DEFINE_PER_CPU| 2231 DEFINE_PER_CPU|
2170 __typeof__\( 2232 __typeof__\(|
2233 \.$Ident\s*=\s*
2171 }x; 2234 }x;
2172 #print "REST<$rest>\n"; 2235 #print "REST<$rest> dstat<$dstat>\n";
2173 if ($rest ne '') { 2236 if ($rest ne '') {
2174 if ($rest !~ /while\s*\(/ && 2237 if ($rest !~ /while\s*\(/ &&
2175 $dstat !~ /$exceptions/) 2238 $dstat !~ /$exceptions/)
@@ -2189,6 +2252,15 @@ sub process {
2189 } 2252 }
2190 } 2253 }
2191 2254
2255# make sure symbols are always wrapped with VMLINUX_SYMBOL() ...
2256# all assignments may have only one of the following with an assignment:
2257# .
2258# ALIGN(...)
2259# VMLINUX_SYMBOL(...)
2260 if ($realfile eq 'vmlinux.lds.h' && $line =~ /(?:(?:^|\s)$Ident\s*=|=\s*$Ident(?:\s|$))/) {
2261 WARN("vmlinux.lds.h needs VMLINUX_SYMBOL() around C-visible symbols\n" . $herecurr);
2262 }
2263
2192# check for redundant bracing round if etc 2264# check for redundant bracing round if etc
2193 if ($line =~ /(^.*)\bif\b/ && $1 !~ /else\s*$/) { 2265 if ($line =~ /(^.*)\bif\b/ && $1 !~ /else\s*$/) {
2194 my ($level, $endln, @chunks) = 2266 my ($level, $endln, @chunks) =
@@ -2443,6 +2515,11 @@ sub process {
2443 if ($line =~ /^.\s*__initcall\s*\(/) { 2515 if ($line =~ /^.\s*__initcall\s*\(/) {
2444 WARN("please use device_initcall() instead of __initcall()\n" . $herecurr); 2516 WARN("please use device_initcall() instead of __initcall()\n" . $herecurr);
2445 } 2517 }
2518# check for struct file_operations, ensure they are const.
2519 if ($line =~ /\bstruct\s+file_operations\b/ &&
2520 $line !~ /\bconst\b/) {
2521 WARN("struct file_operations should normally be const\n" . $herecurr);
2522 }
2446 2523
2447# use of NR_CPUS is usually wrong 2524# use of NR_CPUS is usually wrong
2448# ignore definitions of NR_CPUS and usage to define arrays as likely right 2525# ignore definitions of NR_CPUS and usage to define arrays as likely right
@@ -2466,6 +2543,15 @@ sub process {
2466 last; 2543 last;
2467 } 2544 }
2468 } 2545 }
2546
2547# whine mightly about in_atomic
2548 if ($line =~ /\bin_atomic\s*\(/) {
2549 if ($realfile =~ m@^drivers/@) {
2550 ERROR("do not use in_atomic in drivers\n" . $herecurr);
2551 } else {
2552 WARN("use of in_atomic() is incorrect outside core kernel code\n" . $herecurr);
2553 }
2554 }
2469 } 2555 }
2470 2556
2471 # If we have no input at all, then there is nothing to report on 2557 # If we have no input at all, then there is nothing to report on
diff --git a/scripts/markup_oops.pl b/scripts/markup_oops.pl
new file mode 100644
index 000000000000..700a7a654a3f
--- /dev/null
+++ b/scripts/markup_oops.pl
@@ -0,0 +1,162 @@
1#!/usr/bin/perl -w
2
3# Copyright 2008, Intel Corporation
4#
5# This file is part of the Linux kernel
6#
7# This program file is free software; you can redistribute it and/or modify it
8# under the terms of the GNU General Public License as published by the
9# Free Software Foundation; version 2 of the License.
10#
11# Authors:
12# Arjan van de Ven <arjan@linux.intel.com>
13
14
15my $vmlinux_name = $ARGV[0];
16
17#
18# Step 1: Parse the oops to find the EIP value
19#
20
21my $target = "0";
22while (<STDIN>) {
23 if ($_ =~ /EIP: 0060:\[\<([a-z0-9]+)\>\]/) {
24 $target = $1;
25 }
26}
27
28if ($target =~ /^f8/) {
29 print "This script does not work on modules ... \n";
30 exit;
31}
32
33if ($target eq "0") {
34 print "No oops found!\n";
35 print "Usage: \n";
36 print " dmesg | perl scripts/markup_oops.pl vmlinux\n";
37 exit;
38}
39
40my $counter = 0;
41my $state = 0;
42my $center = 0;
43my @lines;
44
45sub InRange {
46 my ($address, $target) = @_;
47 my $ad = "0x".$address;
48 my $ta = "0x".$target;
49 my $delta = hex($ad) - hex($ta);
50
51 if (($delta > -4096) && ($delta < 4096)) {
52 return 1;
53 }
54 return 0;
55}
56
57
58
59# first, parse the input into the lines array, but to keep size down,
60# we only do this for 4Kb around the sweet spot
61
62my $filename;
63
64open(FILE, "objdump -dS $vmlinux_name |") || die "Cannot start objdump";
65
66while (<FILE>) {
67 my $line = $_;
68 chomp($line);
69 if ($state == 0) {
70 if ($line =~ /^([a-f0-9]+)\:/) {
71 if (InRange($1, $target)) {
72 $state = 1;
73 }
74 }
75 } else {
76 if ($line =~ /^([a-f0-9][a-f0-9][a-f0-9][a-f0-9][a-f0-9][a-f0-9]+)\:/) {
77 my $val = $1;
78 if (!InRange($val, $target)) {
79 last;
80 }
81 if ($val eq $target) {
82 $center = $counter;
83 }
84 }
85 $lines[$counter] = $line;
86
87 $counter = $counter + 1;
88 }
89}
90
91close(FILE);
92
93if ($counter == 0) {
94 print "No matching code found \n";
95 exit;
96}
97
98if ($center == 0) {
99 print "No matching code found \n";
100 exit;
101}
102
103my $start;
104my $finish;
105my $codelines = 0;
106my $binarylines = 0;
107# now we go up and down in the array to find how much we want to print
108
109$start = $center;
110
111while ($start > 1) {
112 $start = $start - 1;
113 my $line = $lines[$start];
114 if ($line =~ /^([a-f0-9]+)\:/) {
115 $binarylines = $binarylines + 1;
116 } else {
117 $codelines = $codelines + 1;
118 }
119 if ($codelines > 10) {
120 last;
121 }
122 if ($binarylines > 20) {
123 last;
124 }
125}
126
127
128$finish = $center;
129$codelines = 0;
130$binarylines = 0;
131while ($finish < $counter) {
132 $finish = $finish + 1;
133 my $line = $lines[$finish];
134 if ($line =~ /^([a-f0-9]+)\:/) {
135 $binarylines = $binarylines + 1;
136 } else {
137 $codelines = $codelines + 1;
138 }
139 if ($codelines > 10) {
140 last;
141 }
142 if ($binarylines > 20) {
143 last;
144 }
145}
146
147
148my $i;
149
150my $fulltext = "";
151$i = $start;
152while ($i < $finish) {
153 if ($i == $center) {
154 $fulltext = $fulltext . "*$lines[$i] <----- faulting instruction\n";
155 } else {
156 $fulltext = $fulltext . " $lines[$i]\n";
157 }
158 $i = $i +1;
159}
160
161print $fulltext;
162
diff --git a/sound/core/sound.c b/sound/core/sound.c
index 44a69bb8d4f0..7872a02f6ca9 100644
--- a/sound/core/sound.c
+++ b/sound/core/sound.c
@@ -152,6 +152,10 @@ static int __snd_open(struct inode *inode, struct file *file)
152 } 152 }
153 old_fops = file->f_op; 153 old_fops = file->f_op;
154 file->f_op = fops_get(mptr->f_ops); 154 file->f_op = fops_get(mptr->f_ops);
155 if (file->f_op == NULL) {
156 file->f_op = old_fops;
157 return -ENODEV;
158 }
155 if (file->f_op->open) 159 if (file->f_op->open)
156 err = file->f_op->open(inode, file); 160 err = file->f_op->open(inode, file);
157 if (err) { 161 if (err) {