aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--CREDITS7
-rw-r--r--Documentation/ABI/testing/sysfs-fs-nilfs2269
-rw-r--r--Documentation/cgroups/memcg_test.txt160
-rw-r--r--Documentation/devicetree/bindings/i2c/trivial-devices.txt1
-rw-r--r--Documentation/oops-tracing.txt2
-rw-r--r--Documentation/rapidio/tsi721.txt19
-rw-r--r--Documentation/sysctl/kernel.txt1
-rw-r--r--MAINTAINERS73
-rw-r--r--arch/alpha/include/asm/Kbuild1
-rw-r--r--arch/alpha/include/asm/scatterlist.h6
-rw-r--r--arch/arm/Kconfig3
-rw-r--r--arch/arm/include/asm/Kbuild1
-rw-r--r--arch/arm/include/asm/scatterlist.h12
-rw-r--r--arch/arm/mach-omap2/board-omap3touchbook.c2
-rw-r--r--arch/arm/mach-omap2/mux.c22
-rw-r--r--arch/arm/mach-pxa/balloon3.c2
-rw-r--r--arch/arm/mach-pxa/viper.c2
-rw-r--r--arch/arm/mach-s3c24xx/mach-jive.c2
-rw-r--r--arch/arm/mach-w90x900/cpu.c3
-rw-r--r--arch/arm64/Kconfig1
-rw-r--r--arch/arm64/include/asm/page.h3
-rw-r--r--arch/arm64/kernel/vdso.c19
-rw-r--r--arch/cris/include/asm/Kbuild1
-rw-r--r--arch/cris/include/asm/scatterlist.h6
-rw-r--r--arch/frv/include/asm/Kbuild1
-rw-r--r--arch/frv/include/asm/scatterlist.h6
-rw-r--r--arch/ia64/Kconfig3
-rw-r--r--arch/ia64/include/asm/Kbuild1
-rw-r--r--arch/ia64/include/asm/page.h2
-rw-r--r--arch/ia64/include/asm/scatterlist.h7
-rw-r--r--arch/ia64/kernel/time.c15
-rw-r--r--arch/ia64/mm/init.c31
-rw-r--r--arch/m32r/include/asm/Kbuild1
-rw-r--r--arch/m32r/include/asm/scatterlist.h6
-rw-r--r--arch/m68k/Kconfig2
-rw-r--r--arch/microblaze/include/asm/Kbuild1
-rw-r--r--arch/microblaze/include/asm/scatterlist.h1
-rw-r--r--arch/mips/Kconfig2
-rw-r--r--arch/mn10300/include/asm/Kbuild1
-rw-r--r--arch/mn10300/include/asm/scatterlist.h16
-rw-r--r--arch/powerpc/Kconfig3
-rw-r--r--arch/powerpc/include/asm/Kbuild1
-rw-r--r--arch/powerpc/include/asm/page.h3
-rw-r--r--arch/powerpc/include/asm/scatterlist.h17
-rw-r--r--arch/powerpc/kernel/setup_64.c6
-rw-r--r--arch/powerpc/kernel/vdso.c16
-rw-r--r--arch/powerpc/kernel/vio.c2
-rw-r--r--arch/powerpc/mm/dma-noncoherent.c1
-rw-r--r--arch/powerpc/platforms/44x/warp.c1
-rw-r--r--arch/powerpc/platforms/52xx/efika.c1
-rw-r--r--arch/powerpc/platforms/amigaone/setup.c1
-rw-r--r--arch/powerpc/platforms/pseries/dlpar.c4
-rw-r--r--arch/powerpc/platforms/pseries/mobility.c2
-rw-r--r--arch/s390/Kconfig3
-rw-r--r--arch/s390/include/asm/Kbuild1
-rw-r--r--arch/s390/include/asm/page.h2
-rw-r--r--arch/s390/include/asm/scatterlist.h3
-rw-r--r--arch/s390/kernel/vdso.c15
-rw-r--r--arch/score/include/asm/Kbuild1
-rw-r--r--arch/score/include/asm/scatterlist.h6
-rw-r--r--arch/sh/Kconfig2
-rw-r--r--arch/sh/include/asm/page.h5
-rw-r--r--arch/sh/kernel/vsyscall/vsyscall.c15
-rw-r--r--arch/sparc/Kconfig1
-rw-r--r--arch/sparc/include/asm/Kbuild1
-rw-r--r--arch/sparc/include/asm/scatterlist.h8
-rw-r--r--arch/tile/Kconfig2
-rw-r--r--arch/tile/include/asm/hardwall.h2
-rw-r--r--arch/tile/include/asm/page.h6
-rw-r--r--arch/tile/kernel/hardwall.c6
-rw-r--r--arch/tile/kernel/vdso.c15
-rw-r--r--arch/um/include/asm/Kbuild1
-rw-r--r--arch/um/include/asm/page.h5
-rw-r--r--arch/x86/Kbuild4
-rw-r--r--arch/x86/Kconfig26
-rw-r--r--arch/x86/Makefile8
-rw-r--r--arch/x86/include/asm/Kbuild3
-rw-r--r--arch/x86/include/asm/crash.h9
-rw-r--r--arch/x86/include/asm/kexec-bzimage64.h6
-rw-r--r--arch/x86/include/asm/kexec.h45
-rw-r--r--arch/x86/include/asm/page.h1
-rw-r--r--arch/x86/include/asm/page_64.h2
-rw-r--r--arch/x86/include/asm/scatterlist.h8
-rw-r--r--arch/x86/kernel/Makefile1
-rw-r--r--arch/x86/kernel/cpu/intel_cacheinfo.c4
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce.c6
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce_amd.c4
-rw-r--r--arch/x86/kernel/crash.c563
-rw-r--r--arch/x86/kernel/kexec-bzimage64.c553
-rw-r--r--arch/x86/kernel/machine_kexec_64.c239
-rw-r--r--arch/x86/kvm/mmu_audit.c2
-rw-r--r--arch/x86/platform/uv/tlb_uv.c2
-rw-r--r--arch/x86/purgatory/Makefile30
-rw-r--r--arch/x86/purgatory/entry64.S101
-rw-r--r--arch/x86/purgatory/purgatory.c72
-rw-r--r--arch/x86/purgatory/setup-x86_64.S58
-rw-r--r--arch/x86/purgatory/sha256.c283
-rw-r--r--arch/x86/purgatory/sha256.h22
-rw-r--r--arch/x86/purgatory/stack.S19
-rw-r--r--arch/x86/purgatory/string.c13
-rw-r--r--arch/x86/syscalls/syscall_32.tbl1
-rw-r--r--arch/x86/syscalls/syscall_64.tbl2
-rw-r--r--arch/x86/um/asm/elf.h1
-rw-r--r--arch/x86/um/mem_64.c15
-rw-r--r--arch/x86/vdso/vdso32-setup.c19
-rw-r--r--crypto/zlib.c8
-rw-r--r--drivers/atm/he.c31
-rw-r--r--drivers/atm/idt77252.c15
-rw-r--r--drivers/block/DAC960.c18
-rw-r--r--drivers/block/cciss.c11
-rw-r--r--drivers/block/skd_main.c25
-rw-r--r--drivers/crypto/hifn_795x.c5
-rw-r--r--drivers/firmware/efi/runtime-map.c21
-rw-r--r--drivers/gpu/drm/i810/i810_dma.c5
-rw-r--r--drivers/infiniband/hw/amso1100/c2.c6
-rw-r--r--drivers/infiniband/hw/nes/nes_hw.c12
-rw-r--r--drivers/infiniband/hw/nes/nes_verbs.c5
-rw-r--r--drivers/media/common/saa7146/saa7146_core.c15
-rw-r--r--drivers/media/common/saa7146/saa7146_fops.c5
-rw-r--r--drivers/media/pci/bt8xx/bt878.c16
-rw-r--r--drivers/media/pci/ngene/ngene-core.c7
-rw-r--r--drivers/media/usb/ttusb-budget/dvb-ttusb-budget.c11
-rw-r--r--drivers/media/usb/ttusb-dec/ttusb_dec.c11
-rw-r--r--drivers/net/ethernet/amd/pcnet32.c45
-rw-r--r--drivers/net/ethernet/atheros/atl1e/atl1e_main.c7
-rw-r--r--drivers/net/ethernet/cisco/enic/vnic_dev.c8
-rw-r--r--drivers/net/ethernet/marvell/sky2.c5
-rw-r--r--drivers/net/ethernet/micrel/ksz884x.c7
-rw-r--r--drivers/net/ethernet/qlogic/netxen/netxen_nic_ctx.c4
-rw-r--r--drivers/net/ethernet/qlogic/qlge/qlge_main.c11
-rw-r--r--drivers/net/irda/vlsi_ir.c4
-rw-r--r--drivers/net/wireless/ipw2x00/ipw2100.c16
-rw-r--r--drivers/net/wireless/mwl8k.c6
-rw-r--r--drivers/net/wireless/rtl818x/rtl8180/dev.c11
-rw-r--r--drivers/net/wireless/rtlwifi/pci.c17
-rw-r--r--drivers/parport/parport_ip32.c2
-rw-r--r--drivers/rapidio/devices/tsi721.h12
-rw-r--r--drivers/rapidio/devices/tsi721_dma.c718
-rw-r--r--drivers/rapidio/rio.c66
-rw-r--r--drivers/rtc/Kconfig29
-rw-r--r--drivers/rtc/Makefile5
-rw-r--r--drivers/rtc/class.c16
-rw-r--r--drivers/rtc/interface.c2
-rw-r--r--drivers/rtc/rtc-ds1343.c75
-rw-r--r--drivers/rtc/rtc-ds1742.c2
-rw-r--r--drivers/rtc/rtc-efi-platform.c31
-rw-r--r--drivers/rtc/rtc-efi.c32
-rw-r--r--drivers/rtc/rtc-isl12022.c12
-rw-r--r--drivers/rtc/rtc-pcf85063.c204
-rw-r--r--drivers/rtc/rtc-pcf8563.c231
-rw-r--r--drivers/rtc/rtc-tps65910.c4
-rw-r--r--drivers/scsi/3w-sas.c5
-rw-r--r--drivers/scsi/a100u2w.c8
-rw-r--r--drivers/scsi/be2iscsi/be_main.c10
-rw-r--r--drivers/scsi/be2iscsi/be_mgmt.c3
-rw-r--r--drivers/scsi/csiostor/csio_wr.c8
-rw-r--r--drivers/scsi/eata.c5
-rw-r--r--drivers/scsi/hpsa.c8
-rw-r--r--drivers/scsi/megaraid/megaraid_mbox.c16
-rw-r--r--drivers/scsi/megaraid/megaraid_sas_base.c8
-rw-r--r--drivers/scsi/mesh.c6
-rw-r--r--drivers/scsi/mvumi.c9
-rw-r--r--drivers/scsi/pm8001/pm8001_sas.c5
-rw-r--r--drivers/scsi/pmcraid.c4
-rw-r--r--drivers/scsi/scsi_sysfs.c4
-rw-r--r--drivers/staging/rtl8192e/rtl8192e/rtl_core.c15
-rw-r--r--drivers/staging/rtl8192ee/pci.c37
-rw-r--r--drivers/staging/rtl8821ae/pci.c36
-rw-r--r--drivers/staging/slicoss/slicoss.c9
-rw-r--r--drivers/staging/vt6655/device_main.c40
-rw-r--r--drivers/tty/synclink_gt.c5
-rw-r--r--drivers/vme/bridges/vme_ca91cx42.c6
-rw-r--r--drivers/vme/bridges/vme_tsi148.c6
-rw-r--r--fs/adfs/adfs.h1
-rw-r--r--fs/adfs/dir.c2
-rw-r--r--fs/adfs/dir_fplus.c9
-rw-r--r--fs/autofs4/autofs_i.h63
-rw-r--r--fs/autofs4/expire.c1
-rw-r--r--fs/autofs4/root.c10
-rw-r--r--fs/befs/linuxvfs.c6
-rw-r--r--fs/bfs/bfs.h1
-rw-r--r--fs/bfs/dir.c4
-rw-r--r--fs/bfs/inode.c8
-rw-r--r--fs/coda/cache.c2
-rw-r--r--fs/coda/coda_linux.c2
-rw-r--r--fs/coda/dir.c3
-rw-r--r--fs/coda/file.c2
-rw-r--r--fs/coda/inode.c4
-rw-r--r--fs/coda/pioctl.c2
-rw-r--r--fs/coda/psdev.c2
-rw-r--r--fs/coda/upcall.c2
-rw-r--r--fs/cramfs/inode.c45
-rw-r--r--fs/cramfs/uncompress.c10
-rw-r--r--fs/dlm/debug_fs.c15
-rw-r--r--fs/efs/namei.c11
-rw-r--r--fs/exec.c4
-rw-r--r--fs/exofs/ore_raid.c2
-rw-r--r--fs/fcntl.c5
-rw-r--r--fs/hpfs/dnode.c17
-rw-r--r--fs/inode.c1
-rw-r--r--fs/isofs/compress.c4
-rw-r--r--fs/jffs2/compr_zlib.c7
-rw-r--r--fs/minix/bitmap.c2
-rw-r--r--fs/minix/inode.c4
-rw-r--r--fs/nilfs2/Makefile2
-rw-r--r--fs/nilfs2/nilfs.h8
-rw-r--r--fs/nilfs2/super.c9
-rw-r--r--fs/nilfs2/sysfs.c1137
-rw-r--r--fs/nilfs2/sysfs.h176
-rw-r--r--fs/nilfs2/the_nilfs.c17
-rw-r--r--fs/nilfs2/the_nilfs.h20
-rw-r--r--fs/omfs/inode.c2
-rw-r--r--fs/proc/base.c181
-rw-r--r--fs/proc/fd.c2
-rw-r--r--fs/proc/generic.c32
-rw-r--r--fs/proc/internal.h9
-rw-r--r--fs/proc/kcore.c2
-rw-r--r--fs/proc/proc_sysctl.c2
-rw-r--r--fs/proc/proc_tty.c4
-rw-r--r--fs/proc/root.c4
-rw-r--r--fs/proc/vmcore.c82
-rw-r--r--fs/pstore/ram_core.c2
-rw-r--r--fs/qnx6/Makefile1
-rw-r--r--fs/qnx6/dir.c26
-rw-r--r--fs/qnx6/inode.c99
-rw-r--r--fs/qnx6/namei.c6
-rw-r--r--fs/qnx6/qnx6.h12
-rw-r--r--fs/qnx6/super_mmi.c22
-rw-r--r--fs/ramfs/file-nommu.c2
-rw-r--r--fs/reiserfs/dir.c2
-rw-r--r--fs/reiserfs/do_balan.c2
-rw-r--r--fs/reiserfs/file.c2
-rw-r--r--fs/reiserfs/ibalance.c2
-rw-r--r--fs/reiserfs/inode.c2
-rw-r--r--fs/reiserfs/ioctl.c2
-rw-r--r--fs/reiserfs/item_ops.c4
-rw-r--r--fs/reiserfs/lbalance.c2
-rw-r--r--fs/reiserfs/prints.c4
-rw-r--r--fs/reiserfs/procfs.c2
-rw-r--r--fs/reiserfs/stree.c2
-rw-r--r--fs/reiserfs/super.c9
-rw-r--r--fs/reiserfs/xattr.c22
-rw-r--r--fs/reiserfs/xattr_acl.c2
-rw-r--r--fs/reiserfs/xattr_security.c2
-rw-r--r--fs/reiserfs/xattr_trusted.c2
-rw-r--r--fs/reiserfs/xattr_user.c2
-rw-r--r--fs/romfs/super.c23
-rw-r--r--fs/ufs/Makefile1
-rw-r--r--fs/ufs/inode.c32
-rw-r--r--fs/ufs/super.c304
-rw-r--r--fs/ufs/ufs.h10
-rw-r--r--include/asm-generic/pci-dma-compat.h8
-rw-r--r--include/linux/decompress/bunzip2.h8
-rw-r--r--include/linux/decompress/generic.h10
-rw-r--r--include/linux/decompress/inflate.h8
-rw-r--r--include/linux/decompress/unlz4.h8
-rw-r--r--include/linux/decompress/unlzma.h8
-rw-r--r--include/linux/decompress/unlzo.h8
-rw-r--r--include/linux/decompress/unxz.h8
-rw-r--r--include/linux/efi.h19
-rw-r--r--include/linux/fs.h29
-rw-r--r--include/linux/ioport.h6
-rw-r--r--include/linux/kernel.h1
-rw-r--r--include/linux/kexec.h104
-rw-r--r--include/linux/memcontrol.h98
-rw-r--r--include/linux/mm.h17
-rw-r--r--include/linux/mm_types.h1
-rw-r--r--include/linux/page_cgroup.h70
-rw-r--r--include/linux/rio_drv.h5
-rw-r--r--include/linux/scatterlist.h2
-rw-r--r--include/linux/sched.h13
-rw-r--r--include/linux/shm.h18
-rw-r--r--include/linux/shmem_fs.h17
-rw-r--r--include/linux/swap.h15
-rw-r--r--include/linux/syscalls.h5
-rw-r--r--include/linux/sysctl.h2
-rw-r--r--include/linux/user_namespace.h6
-rw-r--r--include/linux/zlib.h4
-rw-r--r--include/scsi/scsi.h2
-rw-r--r--include/uapi/linux/fcntl.h15
-rw-r--r--include/uapi/linux/kexec.h11
-rw-r--r--include/uapi/linux/memfd.h8
-rw-r--r--init/Kconfig5
-rw-r--r--init/do_mounts.c12
-rw-r--r--init/do_mounts_rd.c10
-rw-r--r--init/initramfs.c60
-rw-r--r--init/main.c23
-rw-r--r--ipc/shm.c75
-rw-r--r--kernel/Makefile2
-rw-r--r--kernel/acct.c30
-rw-r--r--kernel/bounds.c2
-rw-r--r--kernel/events/uprobes.c15
-rw-r--r--kernel/exit.c49
-rw-r--r--kernel/fork.c79
-rw-r--r--kernel/gcov/fs.c3
-rw-r--r--kernel/kallsyms.c2
-rw-r--r--kernel/kexec.c1291
-rw-r--r--kernel/panic.c1
-rw-r--r--kernel/resource.c101
-rw-r--r--kernel/sys_ni.c2
-rw-r--r--kernel/test_kprobes.c87
-rw-r--r--kernel/user_namespace.c6
-rw-r--r--kernel/watchdog.c1
-rw-r--r--lib/Kconfig7
-rw-r--r--lib/decompress.c2
-rw-r--r--lib/decompress_bunzip2.c26
-rw-r--r--lib/decompress_inflate.c12
-rw-r--r--lib/decompress_unlz4.c83
-rw-r--r--lib/decompress_unlzma.c28
-rw-r--r--lib/decompress_unlzo.c12
-rw-r--r--lib/decompress_unxz.c10
-rw-r--r--lib/idr.c25
-rw-r--r--lib/kfifo.c6
-rw-r--r--lib/rbtree.c2
-rw-r--r--lib/scatterlist.c4
-rw-r--r--mm/filemap.c25
-rw-r--r--mm/huge_memory.c57
-rw-r--r--mm/memcontrol.c1310
-rw-r--r--mm/memory.c81
-rw-r--r--mm/migrate.c38
-rw-r--r--mm/mmap.c30
-rw-r--r--mm/nommu.c5
-rw-r--r--mm/rmap.c20
-rw-r--r--mm/shmem.c369
-rw-r--r--mm/slab.c4
-rw-r--r--mm/swap.c36
-rw-r--r--mm/swap_state.c9
-rw-r--r--mm/swapfile.c21
-rw-r--r--mm/truncate.c9
-rw-r--r--mm/util.c9
-rw-r--r--mm/vmscan.c12
-rw-r--r--mm/zswap.c6
-rw-r--r--scripts/.gitignore1
-rw-r--r--scripts/Makefile1
-rw-r--r--scripts/basic/.gitignore1
-rw-r--r--scripts/basic/Makefile1
-rw-r--r--scripts/basic/bin2c.c (renamed from scripts/bin2c.c)7
-rwxr-xr-xscripts/checkstack.pl12
-rw-r--r--scripts/coccinelle/free/ifnullfree.cocci53
-rwxr-xr-xscripts/tags.sh2
-rw-r--r--tools/testing/selftests/Makefile1
-rw-r--r--tools/testing/selftests/memfd/.gitignore4
-rw-r--r--tools/testing/selftests/memfd/Makefile41
-rw-r--r--tools/testing/selftests/memfd/fuse_mnt.c110
-rw-r--r--tools/testing/selftests/memfd/fuse_test.c311
-rw-r--r--tools/testing/selftests/memfd/memfd_test.c913
-rw-r--r--tools/testing/selftests/memfd/run_fuse_test.sh14
-rw-r--r--tools/testing/selftests/ptrace/peeksiginfo.c4
348 files changed, 10189 insertions, 3536 deletions
diff --git a/CREDITS b/CREDITS
index a80b66718f66..bb6278884f89 100644
--- a/CREDITS
+++ b/CREDITS
@@ -1381,6 +1381,9 @@ S: 17 rue Danton
1381S: F - 94270 Le Kremlin-Bicêtre 1381S: F - 94270 Le Kremlin-Bicêtre
1382S: France 1382S: France
1383 1383
1384N: Jack Hammer
1385D: IBM ServeRAID RAID (ips) driver maintenance
1386
1384N: Greg Hankins 1387N: Greg Hankins
1385E: gregh@cc.gatech.edu 1388E: gregh@cc.gatech.edu
1386D: fixed keyboard driver to separate LED and locking status 1389D: fixed keyboard driver to separate LED and locking status
@@ -1691,6 +1694,10 @@ S: Reading
1691S: RG6 2NU 1694S: RG6 2NU
1692S: United Kingdom 1695S: United Kingdom
1693 1696
1697N: Dave Jeffery
1698E: dhjeffery@gmail.com
1699D: SCSI hacks and IBM ServeRAID RAID driver maintenance
1700
1694N: Jakub Jelinek 1701N: Jakub Jelinek
1695E: jakub@redhat.com 1702E: jakub@redhat.com
1696W: http://sunsite.mff.cuni.cz/~jj 1703W: http://sunsite.mff.cuni.cz/~jj
diff --git a/Documentation/ABI/testing/sysfs-fs-nilfs2 b/Documentation/ABI/testing/sysfs-fs-nilfs2
new file mode 100644
index 000000000000..304ba84a973a
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-fs-nilfs2
@@ -0,0 +1,269 @@
1
2What: /sys/fs/nilfs2/features/revision
3Date: April 2014
4Contact: "Vyacheslav Dubeyko" <slava@dubeyko.com>
5Description:
6 Show current revision of NILFS file system driver.
7 This value informs about file system revision that
8 driver is ready to support.
9
10What: /sys/fs/nilfs2/features/README
11Date: April 2014
12Contact: "Vyacheslav Dubeyko" <slava@dubeyko.com>
13Description:
14 Describe attributes of /sys/fs/nilfs2/features group.
15
16What: /sys/fs/nilfs2/<device>/revision
17Date: April 2014
18Contact: "Vyacheslav Dubeyko" <slava@dubeyko.com>
19Description:
20 Show NILFS file system revision on volume.
21 This value informs about metadata structures'
22 revision on mounted volume.
23
24What: /sys/fs/nilfs2/<device>/blocksize
25Date: April 2014
26Contact: "Vyacheslav Dubeyko" <slava@dubeyko.com>
27Description:
28 Show volume's block size in bytes.
29
30What: /sys/fs/nilfs2/<device>/device_size
31Date: April 2014
32Contact: "Vyacheslav Dubeyko" <slava@dubeyko.com>
33Description:
34 Show volume size in bytes.
35
36What: /sys/fs/nilfs2/<device>/free_blocks
37Date: April 2014
38Contact: "Vyacheslav Dubeyko" <slava@dubeyko.com>
39Description:
40 Show count of free blocks on volume.
41
42What: /sys/fs/nilfs2/<device>/uuid
43Date: April 2014
44Contact: "Vyacheslav Dubeyko" <slava@dubeyko.com>
45Description:
46 Show volume's UUID (Universally Unique Identifier).
47
48What: /sys/fs/nilfs2/<device>/volume_name
49Date: April 2014
50Contact: "Vyacheslav Dubeyko" <slava@dubeyko.com>
51Description:
52 Show volume's label.
53
54What: /sys/fs/nilfs2/<device>/README
55Date: April 2014
56Contact: "Vyacheslav Dubeyko" <slava@dubeyko.com>
57Description:
58 Describe attributes of /sys/fs/nilfs2/<device> group.
59
60What: /sys/fs/nilfs2/<device>/superblock/sb_write_time
61Date: April 2014
62Contact: "Vyacheslav Dubeyko" <slava@dubeyko.com>
63Description:
64 Show last write time of super block in human-readable
65 format.
66
67What: /sys/fs/nilfs2/<device>/superblock/sb_write_time_secs
68Date: April 2014
69Contact: "Vyacheslav Dubeyko" <slava@dubeyko.com>
70Description:
71 Show last write time of super block in seconds.
72
73What: /sys/fs/nilfs2/<device>/superblock/sb_write_count
74Date: April 2014
75Contact: "Vyacheslav Dubeyko" <slava@dubeyko.com>
76Description:
77 Show current write count of super block.
78
79What: /sys/fs/nilfs2/<device>/superblock/sb_update_frequency
80Date: April 2014
81Contact: "Vyacheslav Dubeyko" <slava@dubeyko.com>
82Description:
83 Show/Set interval of periodical update of superblock
84 (in seconds).
85
86What: /sys/fs/nilfs2/<device>/superblock/README
87Date: April 2014
88Contact: "Vyacheslav Dubeyko" <slava@dubeyko.com>
89Description:
90 Describe attributes of /sys/fs/nilfs2/<device>/superblock
91 group.
92
93What: /sys/fs/nilfs2/<device>/segctor/last_pseg_block
94Date: April 2014
95Contact: "Vyacheslav Dubeyko" <slava@dubeyko.com>
96Description:
97 Show start block number of the latest segment.
98
99What: /sys/fs/nilfs2/<device>/segctor/last_seg_sequence
100Date: April 2014
101Contact: "Vyacheslav Dubeyko" <slava@dubeyko.com>
102Description:
103 Show sequence value of the latest segment.
104
105What: /sys/fs/nilfs2/<device>/segctor/last_seg_checkpoint
106Date: April 2014
107Contact: "Vyacheslav Dubeyko" <slava@dubeyko.com>
108Description:
109 Show checkpoint number of the latest segment.
110
111What: /sys/fs/nilfs2/<device>/segctor/current_seg_sequence
112Date: April 2014
113Contact: "Vyacheslav Dubeyko" <slava@dubeyko.com>
114Description:
115 Show segment sequence counter.
116
117What: /sys/fs/nilfs2/<device>/segctor/current_last_full_seg
118Date: April 2014
119Contact: "Vyacheslav Dubeyko" <slava@dubeyko.com>
120Description:
121 Show index number of the latest full segment.
122
123What: /sys/fs/nilfs2/<device>/segctor/next_full_seg
124Date: April 2014
125Contact: "Vyacheslav Dubeyko" <slava@dubeyko.com>
126Description:
127 Show index number of the full segment index
128 to be used next.
129
130What: /sys/fs/nilfs2/<device>/segctor/next_pseg_offset
131Date: April 2014
132Contact: "Vyacheslav Dubeyko" <slava@dubeyko.com>
133Description:
134 Show offset of next partial segment in the current
135 full segment.
136
137What: /sys/fs/nilfs2/<device>/segctor/next_checkpoint
138Date: April 2014
139Contact: "Vyacheslav Dubeyko" <slava@dubeyko.com>
140Description:
141 Show next checkpoint number.
142
143What: /sys/fs/nilfs2/<device>/segctor/last_seg_write_time
144Date: April 2014
145Contact: "Vyacheslav Dubeyko" <slava@dubeyko.com>
146Description:
147 Show write time of the last segment in
148 human-readable format.
149
150What: /sys/fs/nilfs2/<device>/segctor/last_seg_write_time_secs
151Date: April 2014
152Contact: "Vyacheslav Dubeyko" <slava@dubeyko.com>
153Description:
154 Show write time of the last segment in seconds.
155
156What: /sys/fs/nilfs2/<device>/segctor/last_nongc_write_time
157Date: April 2014
158Contact: "Vyacheslav Dubeyko" <slava@dubeyko.com>
159Description:
160 Show write time of the last segment not for cleaner
161 operation in human-readable format.
162
163What: /sys/fs/nilfs2/<device>/segctor/last_nongc_write_time_secs
164Date: April 2014
165Contact: "Vyacheslav Dubeyko" <slava@dubeyko.com>
166Description:
167 Show write time of the last segment not for cleaner
168 operation in seconds.
169
170What: /sys/fs/nilfs2/<device>/segctor/dirty_data_blocks_count
171Date: April 2014
172Contact: "Vyacheslav Dubeyko" <slava@dubeyko.com>
173Description:
174 Show number of dirty data blocks.
175
176What: /sys/fs/nilfs2/<device>/segctor/README
177Date: April 2014
178Contact: "Vyacheslav Dubeyko" <slava@dubeyko.com>
179Description:
180 Describe attributes of /sys/fs/nilfs2/<device>/segctor
181 group.
182
183What: /sys/fs/nilfs2/<device>/segments/segments_number
184Date: April 2014
185Contact: "Vyacheslav Dubeyko" <slava@dubeyko.com>
186Description:
187 Show number of segments on a volume.
188
189What: /sys/fs/nilfs2/<device>/segments/blocks_per_segment
190Date: April 2014
191Contact: "Vyacheslav Dubeyko" <slava@dubeyko.com>
192Description:
193 Show number of blocks in segment.
194
195What: /sys/fs/nilfs2/<device>/segments/clean_segments
196Date: April 2014
197Contact: "Vyacheslav Dubeyko" <slava@dubeyko.com>
198Description:
199 Show count of clean segments.
200
201What: /sys/fs/nilfs2/<device>/segments/dirty_segments
202Date: April 2014
203Contact: "Vyacheslav Dubeyko" <slava@dubeyko.com>
204Description:
205 Show count of dirty segments.
206
207What: /sys/fs/nilfs2/<device>/segments/README
208Date: April 2014
209Contact: "Vyacheslav Dubeyko" <slava@dubeyko.com>
210Description:
211 Describe attributes of /sys/fs/nilfs2/<device>/segments
212 group.
213
214What: /sys/fs/nilfs2/<device>/checkpoints/checkpoints_number
215Date: April 2014
216Contact: "Vyacheslav Dubeyko" <slava@dubeyko.com>
217Description:
218 Show number of checkpoints on volume.
219
220What: /sys/fs/nilfs2/<device>/checkpoints/snapshots_number
221Date: April 2014
222Contact: "Vyacheslav Dubeyko" <slava@dubeyko.com>
223Description:
224 Show number of snapshots on volume.
225
226What: /sys/fs/nilfs2/<device>/checkpoints/last_seg_checkpoint
227Date: April 2014
228Contact: "Vyacheslav Dubeyko" <slava@dubeyko.com>
229Description:
230 Show checkpoint number of the latest segment.
231
232What: /sys/fs/nilfs2/<device>/checkpoints/next_checkpoint
233Date: April 2014
234Contact: "Vyacheslav Dubeyko" <slava@dubeyko.com>
235Description:
236 Show next checkpoint number.
237
238What: /sys/fs/nilfs2/<device>/checkpoints/README
239Date: April 2014
240Contact: "Vyacheslav Dubeyko" <slava@dubeyko.com>
241Description:
242 Describe attributes of /sys/fs/nilfs2/<device>/checkpoints
243 group.
244
245What: /sys/fs/nilfs2/<device>/mounted_snapshots/README
246Date: April 2014
247Contact: "Vyacheslav Dubeyko" <slava@dubeyko.com>
248Description:
249 Describe content of /sys/fs/nilfs2/<device>/mounted_snapshots
250 group.
251
252What: /sys/fs/nilfs2/<device>/mounted_snapshots/<id>/inodes_count
253Date: April 2014
254Contact: "Vyacheslav Dubeyko" <slava@dubeyko.com>
255Description:
256 Show number of inodes for snapshot.
257
258What: /sys/fs/nilfs2/<device>/mounted_snapshots/<id>/blocks_count
259Date: April 2014
260Contact: "Vyacheslav Dubeyko" <slava@dubeyko.com>
261Description:
262 Show number of blocks for snapshot.
263
264What: /sys/fs/nilfs2/<device>/mounted_snapshots/<id>/README
265Date: April 2014
266Contact: "Vyacheslav Dubeyko" <slava@dubeyko.com>
267Description:
268 Describe attributes of /sys/fs/nilfs2/<device>/mounted_snapshots/<id>
269 group.
diff --git a/Documentation/cgroups/memcg_test.txt b/Documentation/cgroups/memcg_test.txt
index 80ac454704b8..8870b0212150 100644
--- a/Documentation/cgroups/memcg_test.txt
+++ b/Documentation/cgroups/memcg_test.txt
@@ -24,64 +24,27 @@ Please note that implementation details can be changed.
24 24
25 a page/swp_entry may be charged (usage += PAGE_SIZE) at 25 a page/swp_entry may be charged (usage += PAGE_SIZE) at
26 26
27 mem_cgroup_charge_anon() 27 mem_cgroup_try_charge()
28 Called at new page fault and Copy-On-Write.
29
30 mem_cgroup_try_charge_swapin()
31 Called at do_swap_page() (page fault on swap entry) and swapoff.
32 Followed by charge-commit-cancel protocol. (With swap accounting)
33 At commit, a charge recorded in swap_cgroup is removed.
34
35 mem_cgroup_charge_file()
36 Called at add_to_page_cache()
37
38 mem_cgroup_cache_charge_swapin()
39 Called at shmem's swapin.
40
41 mem_cgroup_prepare_migration()
42 Called before migration. "extra" charge is done and followed by
43 charge-commit-cancel protocol.
44 At commit, charge against oldpage or newpage will be committed.
45 28
462. Uncharge 292. Uncharge
47 a page/swp_entry may be uncharged (usage -= PAGE_SIZE) by 30 a page/swp_entry may be uncharged (usage -= PAGE_SIZE) by
48 31
49 mem_cgroup_uncharge_page() 32 mem_cgroup_uncharge()
50 Called when an anonymous page is fully unmapped. I.e., mapcount goes 33 Called when a page's refcount goes down to 0.
51 to 0. If the page is SwapCache, uncharge is delayed until
52 mem_cgroup_uncharge_swapcache().
53
54 mem_cgroup_uncharge_cache_page()
55 Called when a page-cache is deleted from radix-tree. If the page is
56 SwapCache, uncharge is delayed until mem_cgroup_uncharge_swapcache().
57
58 mem_cgroup_uncharge_swapcache()
59 Called when SwapCache is removed from radix-tree. The charge itself
60 is moved to swap_cgroup. (If mem+swap controller is disabled, no
61 charge to swap occurs.)
62 34
63 mem_cgroup_uncharge_swap() 35 mem_cgroup_uncharge_swap()
64 Called when swp_entry's refcnt goes down to 0. A charge against swap 36 Called when swp_entry's refcnt goes down to 0. A charge against swap
65 disappears. 37 disappears.
66 38
67 mem_cgroup_end_migration(old, new)
68 At success of migration old is uncharged (if necessary), a charge
69 to new page is committed. At failure, charge to old page is committed.
70
713. charge-commit-cancel 393. charge-commit-cancel
72 In some case, we can't know this "charge" is valid or not at charging 40 Memcg pages are charged in two steps:
73 (because of races). 41 mem_cgroup_try_charge()
74 To handle such case, there are charge-commit-cancel functions. 42 mem_cgroup_commit_charge() or mem_cgroup_cancel_charge()
75 mem_cgroup_try_charge_XXX
76 mem_cgroup_commit_charge_XXX
77 mem_cgroup_cancel_charge_XXX
78 these are used in swap-in and migration.
79 43
80 At try_charge(), there are no flags to say "this page is charged". 44 At try_charge(), there are no flags to say "this page is charged".
81 at this point, usage += PAGE_SIZE. 45 at this point, usage += PAGE_SIZE.
82 46
83 At commit(), the function checks the page should be charged or not 47 At commit(), the page is associated with the memcg.
84 and set flags or avoid charging.(usage -= PAGE_SIZE)
85 48
86 At cancel(), simply usage -= PAGE_SIZE. 49 At cancel(), simply usage -= PAGE_SIZE.
87 50
@@ -91,18 +54,6 @@ Under below explanation, we assume CONFIG_MEM_RES_CTRL_SWAP=y.
91 Anonymous page is newly allocated at 54 Anonymous page is newly allocated at
92 - page fault into MAP_ANONYMOUS mapping. 55 - page fault into MAP_ANONYMOUS mapping.
93 - Copy-On-Write. 56 - Copy-On-Write.
94 It is charged right after it's allocated before doing any page table
95 related operations. Of course, it's uncharged when another page is used
96 for the fault address.
97
98 At freeing anonymous page (by exit() or munmap()), zap_pte() is called
99 and pages for ptes are freed one by one.(see mm/memory.c). Uncharges
100 are done at page_remove_rmap() when page_mapcount() goes down to 0.
101
102 Another page freeing is by page-reclaim (vmscan.c) and anonymous
103 pages are swapped out. In this case, the page is marked as
104 PageSwapCache(). uncharge() routine doesn't uncharge the page marked
105 as SwapCache(). It's delayed until __delete_from_swap_cache().
106 57
107 4.1 Swap-in. 58 4.1 Swap-in.
108 At swap-in, the page is taken from swap-cache. There are 2 cases. 59 At swap-in, the page is taken from swap-cache. There are 2 cases.
@@ -111,41 +62,6 @@ Under below explanation, we assume CONFIG_MEM_RES_CTRL_SWAP=y.
111 (b) If the SwapCache has been mapped by processes, it has been 62 (b) If the SwapCache has been mapped by processes, it has been
112 charged already. 63 charged already.
113 64
114 This swap-in is one of the most complicated work. In do_swap_page(),
115 following events occur when pte is unchanged.
116
117 (1) the page (SwapCache) is looked up.
118 (2) lock_page()
119 (3) try_charge_swapin()
120 (4) reuse_swap_page() (may call delete_swap_cache())
121 (5) commit_charge_swapin()
122 (6) swap_free().
123
124 Considering following situation for example.
125
126 (A) The page has not been charged before (2) and reuse_swap_page()
127 doesn't call delete_from_swap_cache().
128 (B) The page has not been charged before (2) and reuse_swap_page()
129 calls delete_from_swap_cache().
130 (C) The page has been charged before (2) and reuse_swap_page() doesn't
131 call delete_from_swap_cache().
132 (D) The page has been charged before (2) and reuse_swap_page() calls
133 delete_from_swap_cache().
134
135 memory.usage/memsw.usage changes to this page/swp_entry will be
136 Case (A) (B) (C) (D)
137 Event
138 Before (2) 0/ 1 0/ 1 1/ 1 1/ 1
139 ===========================================
140 (3) +1/+1 +1/+1 +1/+1 +1/+1
141 (4) - 0/ 0 - -1/ 0
142 (5) 0/-1 0/ 0 -1/-1 0/ 0
143 (6) - 0/-1 - 0/-1
144 ===========================================
145 Result 1/ 1 1/ 1 1/ 1 1/ 1
146
147 In any cases, charges to this page should be 1/ 1.
148
149 4.2 Swap-out. 65 4.2 Swap-out.
150 At swap-out, typical state transition is below. 66 At swap-out, typical state transition is below.
151 67
@@ -158,28 +74,20 @@ Under below explanation, we assume CONFIG_MEM_RES_CTRL_SWAP=y.
158 swp_entry's refcnt -= 1. 74 swp_entry's refcnt -= 1.
159 75
160 76
161 At (b), the page is marked as SwapCache and not uncharged.
162 At (d), the page is removed from SwapCache and a charge in page_cgroup
163 is moved to swap_cgroup.
164
165 Finally, at task exit, 77 Finally, at task exit,
166 (e) zap_pte() is called and swp_entry's refcnt -=1 -> 0. 78 (e) zap_pte() is called and swp_entry's refcnt -=1 -> 0.
167 Here, a charge in swap_cgroup disappears.
168 79
1695. Page Cache 805. Page Cache
170 Page Cache is charged at 81 Page Cache is charged at
171 - add_to_page_cache_locked(). 82 - add_to_page_cache_locked().
172 83
173 uncharged at
174 - __remove_from_page_cache().
175
176 The logic is very clear. (About migration, see below) 84 The logic is very clear. (About migration, see below)
177 Note: __remove_from_page_cache() is called by remove_from_page_cache() 85 Note: __remove_from_page_cache() is called by remove_from_page_cache()
178 and __remove_mapping(). 86 and __remove_mapping().
179 87
1806. Shmem(tmpfs) Page Cache 886. Shmem(tmpfs) Page Cache
181 Memcg's charge/uncharge have special handlers of shmem. The best way 89 The best way to understand shmem's page state transition is to read
182 to understand shmem's page state transition is to read mm/shmem.c. 90 mm/shmem.c.
183 But brief explanation of the behavior of memcg around shmem will be 91 But brief explanation of the behavior of memcg around shmem will be
184 helpful to understand the logic. 92 helpful to understand the logic.
185 93
@@ -192,56 +100,10 @@ Under below explanation, we assume CONFIG_MEM_RES_CTRL_SWAP=y.
192 It's charged when... 100 It's charged when...
193 - A new page is added to shmem's radix-tree. 101 - A new page is added to shmem's radix-tree.
194 - A swp page is read. (move a charge from swap_cgroup to page_cgroup) 102 - A swp page is read. (move a charge from swap_cgroup to page_cgroup)
195 It's uncharged when
196 - A page is removed from radix-tree and not SwapCache.
197 - When SwapCache is removed, a charge is moved to swap_cgroup.
198 - When swp_entry's refcnt goes down to 0, a charge in swap_cgroup
199 disappears.
200 103
2017. Page Migration 1047. Page Migration
202 One of the most complicated functions is page-migration-handler. 105
203 Memcg has 2 routines. Assume that we are migrating a page's contents 106 mem_cgroup_migrate()
204 from OLDPAGE to NEWPAGE.
205
206 Usual migration logic is..
207 (a) remove the page from LRU.
208 (b) allocate NEWPAGE (migration target)
209 (c) lock by lock_page().
210 (d) unmap all mappings.
211 (e-1) If necessary, replace entry in radix-tree.
212 (e-2) move contents of a page.
213 (f) map all mappings again.
214 (g) pushback the page to LRU.
215 (-) OLDPAGE will be freed.
216
217 Before (g), memcg should complete all necessary charge/uncharge to
218 NEWPAGE/OLDPAGE.
219
220 The point is....
221 - If OLDPAGE is anonymous, all charges will be dropped at (d) because
222 try_to_unmap() drops all mapcount and the page will not be
223 SwapCache.
224
225 - If OLDPAGE is SwapCache, charges will be kept at (g) because
226 __delete_from_swap_cache() isn't called at (e-1)
227
228 - If OLDPAGE is page-cache, charges will be kept at (g) because
229 remove_from_swap_cache() isn't called at (e-1)
230
231 memcg provides following hooks.
232
233 - mem_cgroup_prepare_migration(OLDPAGE)
234 Called after (b) to account a charge (usage += PAGE_SIZE) against
235 memcg which OLDPAGE belongs to.
236
237 - mem_cgroup_end_migration(OLDPAGE, NEWPAGE)
238 Called after (f) before (g).
239 If OLDPAGE is used, commit OLDPAGE again. If OLDPAGE is already
240 charged, a charge by prepare_migration() is automatically canceled.
241 If NEWPAGE is used, commit NEWPAGE and uncharge OLDPAGE.
242
243 But zap_pte() (by exit or munmap) can be called while migration,
244 we have to check if OLDPAGE/NEWPAGE is a valid page after commit().
245 107
2468. LRU 1088. LRU
247 Each memcg has its own private LRU. Now, its handling is under global 109 Each memcg has its own private LRU. Now, its handling is under global
diff --git a/Documentation/devicetree/bindings/i2c/trivial-devices.txt b/Documentation/devicetree/bindings/i2c/trivial-devices.txt
index 37803eb5521e..6af570ec53b4 100644
--- a/Documentation/devicetree/bindings/i2c/trivial-devices.txt
+++ b/Documentation/devicetree/bindings/i2c/trivial-devices.txt
@@ -70,6 +70,7 @@ nuvoton,npct501 i2c trusted platform module (TPM)
70nxp,pca9556 Octal SMBus and I2C registered interface 70nxp,pca9556 Octal SMBus and I2C registered interface
71nxp,pca9557 8-bit I2C-bus and SMBus I/O port with reset 71nxp,pca9557 8-bit I2C-bus and SMBus I/O port with reset
72nxp,pcf8563 Real-time clock/calendar 72nxp,pcf8563 Real-time clock/calendar
73nxp,pcf85063 Tiny Real-Time Clock
73ovti,ov5642 OV5642: Color CMOS QSXGA (5-megapixel) Image Sensor with OmniBSI and Embedded TrueFocus 74ovti,ov5642 OV5642: Color CMOS QSXGA (5-megapixel) Image Sensor with OmniBSI and Embedded TrueFocus
74pericom,pt7c4338 Real-time Clock Module 75pericom,pt7c4338 Real-time Clock Module
75plx,pex8648 48-Lane, 12-Port PCI Express Gen 2 (5.0 GT/s) Switch 76plx,pex8648 48-Lane, 12-Port PCI Express Gen 2 (5.0 GT/s) Switch
diff --git a/Documentation/oops-tracing.txt b/Documentation/oops-tracing.txt
index e3155995ddd8..beefb9f82902 100644
--- a/Documentation/oops-tracing.txt
+++ b/Documentation/oops-tracing.txt
@@ -268,6 +268,8 @@ characters, each representing a particular tainted value.
268 14: 'E' if an unsigned module has been loaded in a kernel supporting 268 14: 'E' if an unsigned module has been loaded in a kernel supporting
269 module signature. 269 module signature.
270 270
271 15: 'L' if a soft lockup has previously occurred on the system.
272
271The primary reason for the 'Tainted: ' string is to tell kernel 273The primary reason for the 'Tainted: ' string is to tell kernel
272debuggers if this is a clean kernel or if anything unusual has 274debuggers if this is a clean kernel or if anything unusual has
273occurred. Tainting is permanent: even if an offending module is 275occurred. Tainting is permanent: even if an offending module is
diff --git a/Documentation/rapidio/tsi721.txt b/Documentation/rapidio/tsi721.txt
index 335f3c6087dc..626052f403bb 100644
--- a/Documentation/rapidio/tsi721.txt
+++ b/Documentation/rapidio/tsi721.txt
@@ -20,13 +20,26 @@ II. Known problems
20 20
21 None. 21 None.
22 22
23III. To do 23III. DMA Engine Support
24 24
25 Add DMA data transfers (non-messaging). 25Tsi721 mport driver supports DMA data transfers between local system memory and
26 Add inbound region (SRIO-to-PCIe) mapping. 26remote RapidIO devices. This functionality is implemented according to SLAVE
27mode API defined by common Linux kernel DMA Engine framework.
28
29Depending on system requirements RapidIO DMA operations can be included/excluded
30by setting CONFIG_RAPIDIO_DMA_ENGINE option. Tsi721 miniport driver uses seven
31out of eight available BDMA channels to support DMA data transfers.
32One BDMA channel is reserved for generation of maintenance read/write requests.
33
34If Tsi721 mport driver have been built with RAPIDIO_DMA_ENGINE support included,
35this driver will accept DMA-specific module parameter:
36 "dma_desc_per_channel" - defines number of hardware buffer descriptors used by
37 each BDMA channel of Tsi721 (by default - 128).
27 38
28IV. Version History 39IV. Version History
29 40
41 1.1.0 - DMA operations re-worked to support data scatter/gather lists larger
42 than hardware buffer descriptors ring.
30 1.0.0 - Initial driver release. 43 1.0.0 - Initial driver release.
31 44
32V. License 45V. License
diff --git a/Documentation/sysctl/kernel.txt b/Documentation/sysctl/kernel.txt
index c14374e71775..f79eb9666379 100644
--- a/Documentation/sysctl/kernel.txt
+++ b/Documentation/sysctl/kernel.txt
@@ -826,6 +826,7 @@ can be ORed together:
8264096 - An out-of-tree module has been loaded. 8264096 - An out-of-tree module has been loaded.
8278192 - An unsigned module has been loaded in a kernel supporting module 8278192 - An unsigned module has been loaded in a kernel supporting module
828 signature. 828 signature.
82916384 - A soft lockup has previously occurred on the system.
829 830
830============================================================== 831==============================================================
831 832
diff --git a/MAINTAINERS b/MAINTAINERS
index e065c3881626..30873e781dfa 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -597,7 +597,7 @@ AMD GEODE CS5536 USB DEVICE CONTROLLER DRIVER
597M: Thomas Dahlmann <dahlmann.thomas@arcor.de> 597M: Thomas Dahlmann <dahlmann.thomas@arcor.de>
598L: linux-geode@lists.infradead.org (moderated for non-subscribers) 598L: linux-geode@lists.infradead.org (moderated for non-subscribers)
599S: Supported 599S: Supported
600F: drivers/usb/gadget/amd5536udc.* 600F: drivers/usb/gadget/udc/amd5536udc.*
601 601
602AMD GEODE PROCESSOR/CHIPSET SUPPORT 602AMD GEODE PROCESSOR/CHIPSET SUPPORT
603P: Andres Salomon <dilinger@queued.net> 603P: Andres Salomon <dilinger@queued.net>
@@ -621,7 +621,7 @@ AMD MICROCODE UPDATE SUPPORT
621M: Andreas Herrmann <herrmann.der.user@googlemail.com> 621M: Andreas Herrmann <herrmann.der.user@googlemail.com>
622L: amd64-microcode@amd64.org 622L: amd64-microcode@amd64.org
623S: Maintained 623S: Maintained
624F: arch/x86/kernel/microcode_amd.c 624F: arch/x86/kernel/cpu/microcode/amd*
625 625
626AMD XGBE DRIVER 626AMD XGBE DRIVER
627M: Tom Lendacky <thomas.lendacky@amd.com> 627M: Tom Lendacky <thomas.lendacky@amd.com>
@@ -911,7 +911,7 @@ L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
911T: git git://git.kernel.org/pub/scm/linux/kernel/git/baohua/linux.git 911T: git git://git.kernel.org/pub/scm/linux/kernel/git/baohua/linux.git
912S: Maintained 912S: Maintained
913F: arch/arm/mach-prima2/ 913F: arch/arm/mach-prima2/
914F: drivers/clk/clk-prima2.c 914F: drivers/clk/sirf/
915F: drivers/clocksource/timer-prima2.c 915F: drivers/clocksource/timer-prima2.c
916F: drivers/clocksource/timer-marco.c 916F: drivers/clocksource/timer-marco.c
917N: [^a-z]sirf 917N: [^a-z]sirf
@@ -1164,6 +1164,7 @@ M: Linus Walleij <linus.walleij@linaro.org>
1164L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers) 1164L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
1165S: Maintained 1165S: Maintained
1166F: arch/arm/mach-nomadik/ 1166F: arch/arm/mach-nomadik/
1167F: drivers/pinctrl/nomadik/
1167F: drivers/i2c/busses/i2c-nomadik.c 1168F: drivers/i2c/busses/i2c-nomadik.c
1168T: git git://git.kernel.org/pub/scm/linux/kernel/git/linusw/linux-nomadik.git 1169T: git git://git.kernel.org/pub/scm/linux/kernel/git/linusw/linux-nomadik.git
1169 1170
@@ -1185,8 +1186,7 @@ F: drivers/mmc/host/msm_sdcc.h
1185F: drivers/tty/serial/msm_serial.h 1186F: drivers/tty/serial/msm_serial.h
1186F: drivers/tty/serial/msm_serial.c 1187F: drivers/tty/serial/msm_serial.c
1187F: drivers/*/pm8???-* 1188F: drivers/*/pm8???-*
1188F: drivers/mfd/ssbi/ 1189F: drivers/mfd/ssbi.c
1189F: include/linux/mfd/pm8xxx/
1190T: git git://git.kernel.org/pub/scm/linux/kernel/git/davidb/linux-msm.git 1190T: git git://git.kernel.org/pub/scm/linux/kernel/git/davidb/linux-msm.git
1191S: Maintained 1191S: Maintained
1192 1192
@@ -1443,7 +1443,8 @@ F: drivers/mfd/abx500*
1443F: drivers/mfd/ab8500* 1443F: drivers/mfd/ab8500*
1444F: drivers/mfd/dbx500* 1444F: drivers/mfd/dbx500*
1445F: drivers/mfd/db8500* 1445F: drivers/mfd/db8500*
1446F: drivers/pinctrl/pinctrl-nomadik* 1446F: drivers/pinctrl/nomadik/pinctrl-ab*
1447F: drivers/pinctrl/nomadik/pinctrl-nomadik*
1447F: drivers/rtc/rtc-ab8500.c 1448F: drivers/rtc/rtc-ab8500.c
1448F: drivers/rtc/rtc-pl031.c 1449F: drivers/rtc/rtc-pl031.c
1449T: git git://git.kernel.org/pub/scm/linux/kernel/git/linusw/linux-stericsson.git 1450T: git git://git.kernel.org/pub/scm/linux/kernel/git/linusw/linux-stericsson.git
@@ -1699,7 +1700,7 @@ ATMEL USBA UDC DRIVER
1699M: Nicolas Ferre <nicolas.ferre@atmel.com> 1700M: Nicolas Ferre <nicolas.ferre@atmel.com>
1700L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers) 1701L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
1701S: Supported 1702S: Supported
1702F: drivers/usb/gadget/atmel_usba_udc.* 1703F: drivers/usb/gadget/udc/atmel_usba_udc.*
1703 1704
1704ATMEL WIRELESS DRIVER 1705ATMEL WIRELESS DRIVER
1705M: Simon Kelley <simon@thekelleys.org.uk> 1706M: Simon Kelley <simon@thekelleys.org.uk>
@@ -1991,7 +1992,7 @@ F: arch/arm/boot/dts/bcm113*
1991F: arch/arm/boot/dts/bcm216* 1992F: arch/arm/boot/dts/bcm216*
1992F: arch/arm/boot/dts/bcm281* 1993F: arch/arm/boot/dts/bcm281*
1993F: arch/arm/configs/bcm_defconfig 1994F: arch/arm/configs/bcm_defconfig
1994F: drivers/mmc/host/sdhci_bcm_kona.c 1995F: drivers/mmc/host/sdhci-bcm-kona.c
1995F: drivers/clocksource/bcm_kona_timer.c 1996F: drivers/clocksource/bcm_kona_timer.c
1996 1997
1997BROADCOM BCM2835 ARM ARCHICTURE 1998BROADCOM BCM2835 ARM ARCHICTURE
@@ -2341,12 +2342,6 @@ L: netdev@vger.kernel.org
2341S: Maintained 2342S: Maintained
2342F: drivers/net/ethernet/cirrus/ep93xx_eth.c 2343F: drivers/net/ethernet/cirrus/ep93xx_eth.c
2343 2344
2344CIRRUS LOGIC EP93XX OHCI USB HOST DRIVER
2345M: Lennert Buytenhek <kernel@wantstofly.org>
2346L: linux-usb@vger.kernel.org
2347S: Maintained
2348F: drivers/usb/host/ohci-ep93xx.c
2349
2350CIRRUS LOGIC AUDIO CODEC DRIVERS 2345CIRRUS LOGIC AUDIO CODEC DRIVERS
2351M: Brian Austin <brian.austin@cirrus.com> 2346M: Brian Austin <brian.austin@cirrus.com>
2352M: Paul Handrigan <Paul.Handrigan@cirrus.com> 2347M: Paul Handrigan <Paul.Handrigan@cirrus.com>
@@ -2431,7 +2426,7 @@ W: http://linux-cifs.samba.org/
2431Q: http://patchwork.ozlabs.org/project/linux-cifs-client/list/ 2426Q: http://patchwork.ozlabs.org/project/linux-cifs-client/list/
2432T: git git://git.kernel.org/pub/scm/linux/kernel/git/sfrench/cifs-2.6.git 2427T: git git://git.kernel.org/pub/scm/linux/kernel/git/sfrench/cifs-2.6.git
2433S: Supported 2428S: Supported
2434F: Documentation/filesystems/cifs.txt 2429F: Documentation/filesystems/cifs/
2435F: fs/cifs/ 2430F: fs/cifs/
2436 2431
2437COMPACTPCI HOTPLUG CORE 2432COMPACTPCI HOTPLUG CORE
@@ -2966,7 +2961,9 @@ L: linux-media@vger.kernel.org
2966L: dri-devel@lists.freedesktop.org 2961L: dri-devel@lists.freedesktop.org
2967L: linaro-mm-sig@lists.linaro.org 2962L: linaro-mm-sig@lists.linaro.org
2968F: drivers/dma-buf/ 2963F: drivers/dma-buf/
2969F: include/linux/dma-buf* include/linux/reservation.h include/linux/*fence.h 2964F: include/linux/dma-buf*
2965F: include/linux/reservation.h
2966F: include/linux/*fence.h
2970F: Documentation/dma-buf-sharing.txt 2967F: Documentation/dma-buf-sharing.txt
2971T: git git://git.linaro.org/people/sumitsemwal/linux-dma-buf.git 2968T: git git://git.linaro.org/people/sumitsemwal/linux-dma-buf.git
2972 2969
@@ -3061,7 +3058,6 @@ L: dri-devel@lists.freedesktop.org
3061T: git git://people.freedesktop.org/~agd5f/linux 3058T: git git://people.freedesktop.org/~agd5f/linux
3062S: Supported 3059S: Supported
3063F: drivers/gpu/drm/radeon/ 3060F: drivers/gpu/drm/radeon/
3064F: include/drm/radeon*
3065F: include/uapi/drm/radeon* 3061F: include/uapi/drm/radeon*
3066 3062
3067DRM PANEL DRIVERS 3063DRM PANEL DRIVERS
@@ -3255,26 +3251,12 @@ T: git git://linuxtv.org/anttip/media_tree.git
3255S: Maintained 3251S: Maintained
3256F: drivers/media/tuners/e4000* 3252F: drivers/media/tuners/e4000*
3257 3253
3258EATA-DMA SCSI DRIVER
3259M: Michael Neuffer <mike@i-Connect.Net>
3260L: linux-eata@i-connect.net
3261L: linux-scsi@vger.kernel.org
3262S: Maintained
3263F: drivers/scsi/eata*
3264
3265EATA ISA/EISA/PCI SCSI DRIVER 3254EATA ISA/EISA/PCI SCSI DRIVER
3266M: Dario Ballabio <ballabio_dario@emc.com> 3255M: Dario Ballabio <ballabio_dario@emc.com>
3267L: linux-scsi@vger.kernel.org 3256L: linux-scsi@vger.kernel.org
3268S: Maintained 3257S: Maintained
3269F: drivers/scsi/eata.c 3258F: drivers/scsi/eata.c
3270 3259
3271EATA-PIO SCSI DRIVER
3272M: Michael Neuffer <mike@i-Connect.Net>
3273L: linux-eata@i-connect.net
3274L: linux-scsi@vger.kernel.org
3275S: Maintained
3276F: drivers/scsi/eata_pio.*
3277
3278EC100 MEDIA DRIVER 3260EC100 MEDIA DRIVER
3279M: Antti Palosaari <crope@iki.fi> 3261M: Antti Palosaari <crope@iki.fi>
3280L: linux-media@vger.kernel.org 3262L: linux-media@vger.kernel.org
@@ -3449,7 +3431,7 @@ M: Matt Fleming <matt.fleming@intel.com>
3449L: linux-efi@vger.kernel.org 3431L: linux-efi@vger.kernel.org
3450T: git git://git.kernel.org/pub/scm/linux/kernel/git/mfleming/efi.git 3432T: git git://git.kernel.org/pub/scm/linux/kernel/git/mfleming/efi.git
3451S: Maintained 3433S: Maintained
3452F: Documentation/x86/efi-stub.txt 3434F: Documentation/efi-stub.txt
3453F: arch/ia64/kernel/efi.c 3435F: arch/ia64/kernel/efi.c
3454F: arch/x86/boot/compressed/eboot.[ch] 3436F: arch/x86/boot/compressed/eboot.[ch]
3455F: arch/x86/include/asm/efi.h 3437F: arch/x86/include/asm/efi.h
@@ -3836,7 +3818,7 @@ M: Li Yang <leoli@freescale.com>
3836L: linux-usb@vger.kernel.org 3818L: linux-usb@vger.kernel.org
3837L: linuxppc-dev@lists.ozlabs.org 3819L: linuxppc-dev@lists.ozlabs.org
3838S: Maintained 3820S: Maintained
3839F: drivers/usb/gadget/fsl* 3821F: drivers/usb/gadget/udc/fsl*
3840 3822
3841FREESCALE QUICC ENGINE UCC ETHERNET DRIVER 3823FREESCALE QUICC ENGINE UCC ETHERNET DRIVER
3842M: Li Yang <leoli@freescale.com> 3824M: Li Yang <leoli@freescale.com>
@@ -4525,10 +4507,7 @@ S: Supported
4525F: drivers/scsi/ibmvscsi/ibmvfc* 4507F: drivers/scsi/ibmvscsi/ibmvfc*
4526 4508
4527IBM ServeRAID RAID DRIVER 4509IBM ServeRAID RAID DRIVER
4528P: Jack Hammer 4510S: Orphan
4529M: Dave Jeffery <ipslinux@adaptec.com>
4530W: http://www.developer.ibm.com/welcome/netfinity/serveraid.html
4531S: Supported
4532F: drivers/scsi/ips.* 4511F: drivers/scsi/ips.*
4533 4512
4534ICH LPC AND GPIO DRIVER 4513ICH LPC AND GPIO DRIVER
@@ -4725,8 +4704,8 @@ F: drivers/platform/x86/intel_menlow.c
4725INTEL IA32 MICROCODE UPDATE SUPPORT 4704INTEL IA32 MICROCODE UPDATE SUPPORT
4726M: Tigran Aivazian <tigran@aivazian.fsnet.co.uk> 4705M: Tigran Aivazian <tigran@aivazian.fsnet.co.uk>
4727S: Maintained 4706S: Maintained
4728F: arch/x86/kernel/microcode_core.c 4707F: arch/x86/kernel/cpu/microcode/core*
4729F: arch/x86/kernel/microcode_intel.c 4708F: arch/x86/kernel/cpu/microcode/intel*
4730 4709
4731INTEL I/OAT DMA DRIVER 4710INTEL I/OAT DMA DRIVER
4732M: Dan Williams <dan.j.williams@intel.com> 4711M: Dan Williams <dan.j.williams@intel.com>
@@ -5185,7 +5164,6 @@ L: linux-nfs@vger.kernel.org
5185W: http://nfs.sourceforge.net/ 5164W: http://nfs.sourceforge.net/
5186S: Supported 5165S: Supported
5187F: fs/nfsd/ 5166F: fs/nfsd/
5188F: include/linux/nfsd/
5189F: include/uapi/linux/nfsd/ 5167F: include/uapi/linux/nfsd/
5190F: fs/lockd/ 5168F: fs/lockd/
5191F: fs/nfs_common/ 5169F: fs/nfs_common/
@@ -5906,7 +5884,6 @@ F: drivers/clocksource/metag_generic.c
5906F: drivers/irqchip/irq-metag.c 5884F: drivers/irqchip/irq-metag.c
5907F: drivers/irqchip/irq-metag-ext.c 5885F: drivers/irqchip/irq-metag-ext.c
5908F: drivers/tty/metag_da.c 5886F: drivers/tty/metag_da.c
5909F: fs/imgdafs/
5910 5887
5911MICROBLAZE ARCHITECTURE 5888MICROBLAZE ARCHITECTURE
5912M: Michal Simek <monstr@monstr.eu> 5889M: Michal Simek <monstr@monstr.eu>
@@ -6997,9 +6974,9 @@ M: Jamie Iles <jamie@jamieiles.com>
6997L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers) 6974L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
6998T: git git://github.com/jamieiles/linux-2.6-ji.git 6975T: git git://github.com/jamieiles/linux-2.6-ji.git
6999S: Supported 6976S: Supported
6977F: arch/arm/boot/dts/picoxcell*
7000F: arch/arm/mach-picoxcell/ 6978F: arch/arm/mach-picoxcell/
7001F: drivers/*/picoxcell* 6979F: drivers/crypto/picoxcell*
7002F: drivers/*/*/picoxcell*
7003 6980
7004PIN CONTROL SUBSYSTEM 6981PIN CONTROL SUBSYSTEM
7005M: Linus Walleij <linus.walleij@linaro.org> 6982M: Linus Walleij <linus.walleij@linaro.org>
@@ -7224,7 +7201,7 @@ F: drivers/ptp/*
7224F: include/linux/ptp_cl* 7201F: include/linux/ptp_cl*
7225 7202
7226PTRACE SUPPORT 7203PTRACE SUPPORT
7227M: Roland McGrath <roland@redhat.com> 7204M: Roland McGrath <roland@hack.frob.com>
7228M: Oleg Nesterov <oleg@redhat.com> 7205M: Oleg Nesterov <oleg@redhat.com>
7229S: Maintained 7206S: Maintained
7230F: include/asm-generic/syscall.h 7207F: include/asm-generic/syscall.h
@@ -7274,7 +7251,7 @@ S: Maintained
7274F: arch/arm/mach-pxa/ 7251F: arch/arm/mach-pxa/
7275F: drivers/pcmcia/pxa2xx* 7252F: drivers/pcmcia/pxa2xx*
7276F: drivers/spi/spi-pxa2xx* 7253F: drivers/spi/spi-pxa2xx*
7277F: drivers/usb/gadget/pxa2* 7254F: drivers/usb/gadget/udc/pxa2*
7278F: include/sound/pxa2xx-lib.h 7255F: include/sound/pxa2xx-lib.h
7279F: sound/arm/pxa* 7256F: sound/arm/pxa*
7280F: sound/soc/pxa/ 7257F: sound/soc/pxa/
@@ -7283,7 +7260,7 @@ PXA3xx NAND FLASH DRIVER
7283M: Ezequiel Garcia <ezequiel.garcia@free-electrons.com> 7260M: Ezequiel Garcia <ezequiel.garcia@free-electrons.com>
7284L: linux-mtd@lists.infradead.org 7261L: linux-mtd@lists.infradead.org
7285S: Maintained 7262S: Maintained
7286F: drivers/mtd/nand/pxa3xx-nand.c 7263F: drivers/mtd/nand/pxa3xx_nand.c
7287 7264
7288MMP SUPPORT 7265MMP SUPPORT
7289M: Eric Miao <eric.y.miao@gmail.com> 7266M: Eric Miao <eric.y.miao@gmail.com>
@@ -9628,8 +9605,8 @@ USB WEBCAM GADGET
9628M: Laurent Pinchart <laurent.pinchart@ideasonboard.com> 9605M: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
9629L: linux-usb@vger.kernel.org 9606L: linux-usb@vger.kernel.org
9630S: Maintained 9607S: Maintained
9631F: drivers/usb/gadget/*uvc*.c 9608F: drivers/usb/gadget/function/*uvc*.c
9632F: drivers/usb/gadget/webcam.c 9609F: drivers/usb/gadget/legacy/webcam.c
9633 9610
9634USB WIRELESS RNDIS DRIVER (rndis_wlan) 9611USB WIRELESS RNDIS DRIVER (rndis_wlan)
9635M: Jussi Kivilinna <jussi.kivilinna@iki.fi> 9612M: Jussi Kivilinna <jussi.kivilinna@iki.fi>
diff --git a/arch/alpha/include/asm/Kbuild b/arch/alpha/include/asm/Kbuild
index 96e54bed5088..e858aa0ad8af 100644
--- a/arch/alpha/include/asm/Kbuild
+++ b/arch/alpha/include/asm/Kbuild
@@ -6,4 +6,5 @@ generic-y += exec.h
6generic-y += hash.h 6generic-y += hash.h
7generic-y += mcs_spinlock.h 7generic-y += mcs_spinlock.h
8generic-y += preempt.h 8generic-y += preempt.h
9generic-y += scatterlist.h
9generic-y += trace_clock.h 10generic-y += trace_clock.h
diff --git a/arch/alpha/include/asm/scatterlist.h b/arch/alpha/include/asm/scatterlist.h
deleted file mode 100644
index 017d7471c3c4..000000000000
--- a/arch/alpha/include/asm/scatterlist.h
+++ /dev/null
@@ -1,6 +0,0 @@
1#ifndef _ALPHA_SCATTERLIST_H
2#define _ALPHA_SCATTERLIST_H
3
4#include <asm-generic/scatterlist.h>
5
6#endif /* !(_ALPHA_SCATTERLIST_H) */
diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
index 916cedbd7a67..c49a775937db 100644
--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@@ -83,6 +83,7 @@ config ARM
83 <http://www.arm.linux.org.uk/>. 83 <http://www.arm.linux.org.uk/>.
84 84
85config ARM_HAS_SG_CHAIN 85config ARM_HAS_SG_CHAIN
86 select ARCH_HAS_SG_CHAIN
86 bool 87 bool
87 88
88config NEED_SG_DMA_LENGTH 89config NEED_SG_DMA_LENGTH
@@ -1982,6 +1983,8 @@ config XIP_PHYS_ADDR
1982config KEXEC 1983config KEXEC
1983 bool "Kexec system call (EXPERIMENTAL)" 1984 bool "Kexec system call (EXPERIMENTAL)"
1984 depends on (!SMP || PM_SLEEP_SMP) 1985 depends on (!SMP || PM_SLEEP_SMP)
1986 select CRYPTO
1987 select CRYPTO_SHA256
1985 help 1988 help
1986 kexec is a system call that implements the ability to shutdown your 1989 kexec is a system call that implements the ability to shutdown your
1987 current kernel, and to start another kernel. It is like a reboot 1990 current kernel, and to start another kernel. It is like a reboot
diff --git a/arch/arm/include/asm/Kbuild b/arch/arm/include/asm/Kbuild
index f5a357601983..70cd84eb7fda 100644
--- a/arch/arm/include/asm/Kbuild
+++ b/arch/arm/include/asm/Kbuild
@@ -22,6 +22,7 @@ generic-y += poll.h
22generic-y += preempt.h 22generic-y += preempt.h
23generic-y += resource.h 23generic-y += resource.h
24generic-y += rwsem.h 24generic-y += rwsem.h
25generic-y += scatterlist.h
25generic-y += sections.h 26generic-y += sections.h
26generic-y += segment.h 27generic-y += segment.h
27generic-y += sembuf.h 28generic-y += sembuf.h
diff --git a/arch/arm/include/asm/scatterlist.h b/arch/arm/include/asm/scatterlist.h
deleted file mode 100644
index cefdb8f898a1..000000000000
--- a/arch/arm/include/asm/scatterlist.h
+++ /dev/null
@@ -1,12 +0,0 @@
1#ifndef _ASMARM_SCATTERLIST_H
2#define _ASMARM_SCATTERLIST_H
3
4#ifdef CONFIG_ARM_HAS_SG_CHAIN
5#define ARCH_HAS_SG_CHAIN
6#endif
7
8#include <asm/memory.h>
9#include <asm/types.h>
10#include <asm-generic/scatterlist.h>
11
12#endif /* _ASMARM_SCATTERLIST_H */
diff --git a/arch/arm/mach-omap2/board-omap3touchbook.c b/arch/arm/mach-omap2/board-omap3touchbook.c
index 7da48bc42bbf..70b904c010c6 100644
--- a/arch/arm/mach-omap2/board-omap3touchbook.c
+++ b/arch/arm/mach-omap2/board-omap3touchbook.c
@@ -336,7 +336,7 @@ static int __init early_touchbook_revision(char *p)
336 if (!p) 336 if (!p)
337 return 0; 337 return 0;
338 338
339 return strict_strtoul(p, 10, &touchbook_revision); 339 return kstrtoul(p, 10, &touchbook_revision);
340} 340}
341early_param("tbr", early_touchbook_revision); 341early_param("tbr", early_touchbook_revision);
342 342
diff --git a/arch/arm/mach-omap2/mux.c b/arch/arm/mach-omap2/mux.c
index f62f7537d899..ac8a249779f2 100644
--- a/arch/arm/mach-omap2/mux.c
+++ b/arch/arm/mach-omap2/mux.c
@@ -681,29 +681,19 @@ static ssize_t omap_mux_dbg_signal_write(struct file *file,
681 const char __user *user_buf, 681 const char __user *user_buf,
682 size_t count, loff_t *ppos) 682 size_t count, loff_t *ppos)
683{ 683{
684 char buf[OMAP_MUX_MAX_ARG_CHAR];
685 struct seq_file *seqf; 684 struct seq_file *seqf;
686 struct omap_mux *m; 685 struct omap_mux *m;
687 unsigned long val; 686 u16 val;
688 int buf_size, ret; 687 int ret;
689 struct omap_mux_partition *partition; 688 struct omap_mux_partition *partition;
690 689
691 if (count > OMAP_MUX_MAX_ARG_CHAR) 690 if (count > OMAP_MUX_MAX_ARG_CHAR)
692 return -EINVAL; 691 return -EINVAL;
693 692
694 memset(buf, 0, sizeof(buf)); 693 ret = kstrtou16_from_user(user_buf, count, 0x10, &val);
695 buf_size = min(count, sizeof(buf) - 1);
696
697 if (copy_from_user(buf, user_buf, buf_size))
698 return -EFAULT;
699
700 ret = strict_strtoul(buf, 0x10, &val);
701 if (ret < 0) 694 if (ret < 0)
702 return ret; 695 return ret;
703 696
704 if (val > 0xffff)
705 return -EINVAL;
706
707 seqf = file->private_data; 697 seqf = file->private_data;
708 m = seqf->private; 698 m = seqf->private;
709 699
@@ -711,7 +701,7 @@ static ssize_t omap_mux_dbg_signal_write(struct file *file,
711 if (!partition) 701 if (!partition)
712 return -ENODEV; 702 return -ENODEV;
713 703
714 omap_mux_write(partition, (u16)val, m->reg_offset); 704 omap_mux_write(partition, val, m->reg_offset);
715 *ppos += count; 705 *ppos += count;
716 706
717 return count; 707 return count;
@@ -917,14 +907,14 @@ static void __init omap_mux_set_cmdline_signals(void)
917 907
918 while ((token = strsep(&next_opt, ",")) != NULL) { 908 while ((token = strsep(&next_opt, ",")) != NULL) {
919 char *keyval, *name; 909 char *keyval, *name;
920 unsigned long val; 910 u16 val;
921 911
922 keyval = token; 912 keyval = token;
923 name = strsep(&keyval, "="); 913 name = strsep(&keyval, "=");
924 if (name) { 914 if (name) {
925 int res; 915 int res;
926 916
927 res = strict_strtoul(keyval, 0x10, &val); 917 res = kstrtou16(keyval, 0x10, &val);
928 if (res < 0) 918 if (res < 0)
929 continue; 919 continue;
930 920
diff --git a/arch/arm/mach-pxa/balloon3.c b/arch/arm/mach-pxa/balloon3.c
index 43596e0ed051..d897292712eb 100644
--- a/arch/arm/mach-pxa/balloon3.c
+++ b/arch/arm/mach-pxa/balloon3.c
@@ -90,7 +90,7 @@ int __init parse_balloon3_features(char *arg)
90 if (!arg) 90 if (!arg)
91 return 0; 91 return 0;
92 92
93 return strict_strtoul(arg, 0, &balloon3_features_present); 93 return kstrtoul(arg, 0, &balloon3_features_present);
94} 94}
95early_param("balloon3_features", parse_balloon3_features); 95early_param("balloon3_features", parse_balloon3_features);
96 96
diff --git a/arch/arm/mach-pxa/viper.c b/arch/arm/mach-pxa/viper.c
index 41f27f667ca8..de3b08073fe7 100644
--- a/arch/arm/mach-pxa/viper.c
+++ b/arch/arm/mach-pxa/viper.c
@@ -769,7 +769,7 @@ static unsigned long viper_tpm;
769 769
770static int __init viper_tpm_setup(char *str) 770static int __init viper_tpm_setup(char *str)
771{ 771{
772 return strict_strtoul(str, 10, &viper_tpm) >= 0; 772 return kstrtoul(str, 10, &viper_tpm) >= 0;
773} 773}
774 774
775__setup("tpm=", viper_tpm_setup); 775__setup("tpm=", viper_tpm_setup);
diff --git a/arch/arm/mach-s3c24xx/mach-jive.c b/arch/arm/mach-s3c24xx/mach-jive.c
index e647b47244a9..7804d3c6991b 100644
--- a/arch/arm/mach-s3c24xx/mach-jive.c
+++ b/arch/arm/mach-s3c24xx/mach-jive.c
@@ -242,7 +242,7 @@ static int __init jive_mtdset(char *options)
242 if (options == NULL || options[0] == '\0') 242 if (options == NULL || options[0] == '\0')
243 return 0; 243 return 0;
244 244
245 if (strict_strtoul(options, 10, &set)) { 245 if (kstrtoul(options, 10, &set)) {
246 printk(KERN_ERR "failed to parse mtdset=%s\n", options); 246 printk(KERN_ERR "failed to parse mtdset=%s\n", options);
247 return 0; 247 return 0;
248 } 248 }
diff --git a/arch/arm/mach-w90x900/cpu.c b/arch/arm/mach-w90x900/cpu.c
index b1eabaad50a5..213230ee57d1 100644
--- a/arch/arm/mach-w90x900/cpu.c
+++ b/arch/arm/mach-w90x900/cpu.c
@@ -178,7 +178,8 @@ static int __init nuc900_set_cpufreq(char *str)
178 if (!*str) 178 if (!*str)
179 return 0; 179 return 0;
180 180
181 strict_strtoul(str, 0, &cpufreq); 181 if (kstrtoul(str, 0, &cpufreq))
182 return 0;
182 183
183 nuc900_clock_source(NULL, "ext"); 184 nuc900_clock_source(NULL, "ext");
184 185
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index b0f9c9db9590..fd4e81a4e1ce 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -1,6 +1,7 @@
1config ARM64 1config ARM64
2 def_bool y 2 def_bool y
3 select ARCH_HAS_ATOMIC64_DEC_IF_POSITIVE 3 select ARCH_HAS_ATOMIC64_DEC_IF_POSITIVE
4 select ARCH_HAS_SG_CHAIN
4 select ARCH_HAS_TICK_BROADCAST if GENERIC_CLOCKEVENTS_BROADCAST 5 select ARCH_HAS_TICK_BROADCAST if GENERIC_CLOCKEVENTS_BROADCAST
5 select ARCH_USE_CMPXCHG_LOCKREF 6 select ARCH_USE_CMPXCHG_LOCKREF
6 select ARCH_SUPPORTS_ATOMIC_RMW 7 select ARCH_SUPPORTS_ATOMIC_RMW
diff --git a/arch/arm64/include/asm/page.h b/arch/arm64/include/asm/page.h
index 7a3f462133b0..22b16232bd60 100644
--- a/arch/arm64/include/asm/page.h
+++ b/arch/arm64/include/asm/page.h
@@ -28,9 +28,6 @@
28#define PAGE_SIZE (_AC(1,UL) << PAGE_SHIFT) 28#define PAGE_SIZE (_AC(1,UL) << PAGE_SHIFT)
29#define PAGE_MASK (~(PAGE_SIZE-1)) 29#define PAGE_MASK (~(PAGE_SIZE-1))
30 30
31/* We do define AT_SYSINFO_EHDR but don't use the gate mechanism */
32#define __HAVE_ARCH_GATE_AREA 1
33
34/* 31/*
35 * The idmap and swapper page tables need some space reserved in the kernel 32 * The idmap and swapper page tables need some space reserved in the kernel
36 * image. Both require pgd, pud (4 levels only) and pmd tables to (section) 33 * image. Both require pgd, pud (4 levels only) and pmd tables to (section)
diff --git a/arch/arm64/kernel/vdso.c b/arch/arm64/kernel/vdso.c
index a81a446a5786..32aeea083d93 100644
--- a/arch/arm64/kernel/vdso.c
+++ b/arch/arm64/kernel/vdso.c
@@ -195,25 +195,6 @@ up_fail:
195} 195}
196 196
197/* 197/*
198 * We define AT_SYSINFO_EHDR, so we need these function stubs to keep
199 * Linux happy.
200 */
201int in_gate_area_no_mm(unsigned long addr)
202{
203 return 0;
204}
205
206int in_gate_area(struct mm_struct *mm, unsigned long addr)
207{
208 return 0;
209}
210
211struct vm_area_struct *get_gate_vma(struct mm_struct *mm)
212{
213 return NULL;
214}
215
216/*
217 * Update the vDSO data page to keep in sync with kernel timekeeping. 198 * Update the vDSO data page to keep in sync with kernel timekeeping.
218 */ 199 */
219void update_vsyscall(struct timekeeper *tk) 200void update_vsyscall(struct timekeeper *tk)
diff --git a/arch/cris/include/asm/Kbuild b/arch/cris/include/asm/Kbuild
index afff5105909d..31742dfadff9 100644
--- a/arch/cris/include/asm/Kbuild
+++ b/arch/cris/include/asm/Kbuild
@@ -13,6 +13,7 @@ generic-y += linkage.h
13generic-y += mcs_spinlock.h 13generic-y += mcs_spinlock.h
14generic-y += module.h 14generic-y += module.h
15generic-y += preempt.h 15generic-y += preempt.h
16generic-y += scatterlist.h
16generic-y += trace_clock.h 17generic-y += trace_clock.h
17generic-y += vga.h 18generic-y += vga.h
18generic-y += xor.h 19generic-y += xor.h
diff --git a/arch/cris/include/asm/scatterlist.h b/arch/cris/include/asm/scatterlist.h
deleted file mode 100644
index f11f8f40ec4a..000000000000
--- a/arch/cris/include/asm/scatterlist.h
+++ /dev/null
@@ -1,6 +0,0 @@
1#ifndef __ASM_CRIS_SCATTERLIST_H
2#define __ASM_CRIS_SCATTERLIST_H
3
4#include <asm-generic/scatterlist.h>
5
6#endif /* !(__ASM_CRIS_SCATTERLIST_H) */
diff --git a/arch/frv/include/asm/Kbuild b/arch/frv/include/asm/Kbuild
index 87b95eb8aee5..5b73921b6e9d 100644
--- a/arch/frv/include/asm/Kbuild
+++ b/arch/frv/include/asm/Kbuild
@@ -5,4 +5,5 @@ generic-y += exec.h
5generic-y += hash.h 5generic-y += hash.h
6generic-y += mcs_spinlock.h 6generic-y += mcs_spinlock.h
7generic-y += preempt.h 7generic-y += preempt.h
8generic-y += scatterlist.h
8generic-y += trace_clock.h 9generic-y += trace_clock.h
diff --git a/arch/frv/include/asm/scatterlist.h b/arch/frv/include/asm/scatterlist.h
deleted file mode 100644
index 0e5eb3018468..000000000000
--- a/arch/frv/include/asm/scatterlist.h
+++ /dev/null
@@ -1,6 +0,0 @@
1#ifndef _ASM_SCATTERLIST_H
2#define _ASM_SCATTERLIST_H
3
4#include <asm-generic/scatterlist.h>
5
6#endif /* !_ASM_SCATTERLIST_H */
diff --git a/arch/ia64/Kconfig b/arch/ia64/Kconfig
index 44a6915ab13d..64aefb76bd69 100644
--- a/arch/ia64/Kconfig
+++ b/arch/ia64/Kconfig
@@ -28,6 +28,7 @@ config IA64
28 select HAVE_MEMBLOCK 28 select HAVE_MEMBLOCK
29 select HAVE_MEMBLOCK_NODE_MAP 29 select HAVE_MEMBLOCK_NODE_MAP
30 select HAVE_VIRT_CPU_ACCOUNTING 30 select HAVE_VIRT_CPU_ACCOUNTING
31 select ARCH_HAS_SG_CHAIN
31 select VIRT_TO_BUS 32 select VIRT_TO_BUS
32 select ARCH_DISCARD_MEMBLOCK 33 select ARCH_DISCARD_MEMBLOCK
33 select GENERIC_IRQ_PROBE 34 select GENERIC_IRQ_PROBE
@@ -548,6 +549,8 @@ source "drivers/sn/Kconfig"
548config KEXEC 549config KEXEC
549 bool "kexec system call" 550 bool "kexec system call"
550 depends on !IA64_HP_SIM && (!SMP || HOTPLUG_CPU) 551 depends on !IA64_HP_SIM && (!SMP || HOTPLUG_CPU)
552 select CRYPTO
553 select CRYPTO_SHA256
551 help 554 help
552 kexec is a system call that implements the ability to shutdown your 555 kexec is a system call that implements the ability to shutdown your
553 current kernel, and to start another kernel. It is like a reboot 556 current kernel, and to start another kernel. It is like a reboot
diff --git a/arch/ia64/include/asm/Kbuild b/arch/ia64/include/asm/Kbuild
index 0da4aa2602ae..e8317d2d6c8d 100644
--- a/arch/ia64/include/asm/Kbuild
+++ b/arch/ia64/include/asm/Kbuild
@@ -5,5 +5,6 @@ generic-y += hash.h
5generic-y += kvm_para.h 5generic-y += kvm_para.h
6generic-y += mcs_spinlock.h 6generic-y += mcs_spinlock.h
7generic-y += preempt.h 7generic-y += preempt.h
8generic-y += scatterlist.h
8generic-y += trace_clock.h 9generic-y += trace_clock.h
9generic-y += vtime.h 10generic-y += vtime.h
diff --git a/arch/ia64/include/asm/page.h b/arch/ia64/include/asm/page.h
index f1e1b2e3cdb3..1f1bf144fe62 100644
--- a/arch/ia64/include/asm/page.h
+++ b/arch/ia64/include/asm/page.h
@@ -231,4 +231,6 @@ get_order (unsigned long size)
231#define PERCPU_ADDR (-PERCPU_PAGE_SIZE) 231#define PERCPU_ADDR (-PERCPU_PAGE_SIZE)
232#define LOAD_OFFSET (KERNEL_START - KERNEL_TR_PAGE_SIZE) 232#define LOAD_OFFSET (KERNEL_START - KERNEL_TR_PAGE_SIZE)
233 233
234#define __HAVE_ARCH_GATE_AREA 1
235
234#endif /* _ASM_IA64_PAGE_H */ 236#endif /* _ASM_IA64_PAGE_H */
diff --git a/arch/ia64/include/asm/scatterlist.h b/arch/ia64/include/asm/scatterlist.h
deleted file mode 100644
index 08fd93bff1db..000000000000
--- a/arch/ia64/include/asm/scatterlist.h
+++ /dev/null
@@ -1,7 +0,0 @@
1#ifndef _ASM_IA64_SCATTERLIST_H
2#define _ASM_IA64_SCATTERLIST_H
3
4#include <asm-generic/scatterlist.h>
5#define ARCH_HAS_SG_CHAIN
6
7#endif /* _ASM_IA64_SCATTERLIST_H */
diff --git a/arch/ia64/kernel/time.c b/arch/ia64/kernel/time.c
index 3e71ef85e439..9a0104a38cd3 100644
--- a/arch/ia64/kernel/time.c
+++ b/arch/ia64/kernel/time.c
@@ -384,21 +384,6 @@ static struct irqaction timer_irqaction = {
384 .name = "timer" 384 .name = "timer"
385}; 385};
386 386
387static struct platform_device rtc_efi_dev = {
388 .name = "rtc-efi",
389 .id = -1,
390};
391
392static int __init rtc_init(void)
393{
394 if (platform_device_register(&rtc_efi_dev) < 0)
395 printk(KERN_ERR "unable to register rtc device...\n");
396
397 /* not necessarily an error */
398 return 0;
399}
400module_init(rtc_init);
401
402void read_persistent_clock(struct timespec *ts) 387void read_persistent_clock(struct timespec *ts)
403{ 388{
404 efi_gettimeofday(ts); 389 efi_gettimeofday(ts);
diff --git a/arch/ia64/mm/init.c b/arch/ia64/mm/init.c
index 892d43e32f3b..6b3345758d3e 100644
--- a/arch/ia64/mm/init.c
+++ b/arch/ia64/mm/init.c
@@ -278,6 +278,37 @@ setup_gate (void)
278 ia64_patch_gate(); 278 ia64_patch_gate();
279} 279}
280 280
281static struct vm_area_struct gate_vma;
282
283static int __init gate_vma_init(void)
284{
285 gate_vma.vm_mm = NULL;
286 gate_vma.vm_start = FIXADDR_USER_START;
287 gate_vma.vm_end = FIXADDR_USER_END;
288 gate_vma.vm_flags = VM_READ | VM_MAYREAD | VM_EXEC | VM_MAYEXEC;
289 gate_vma.vm_page_prot = __P101;
290
291 return 0;
292}
293__initcall(gate_vma_init);
294
295struct vm_area_struct *get_gate_vma(struct mm_struct *mm)
296{
297 return &gate_vma;
298}
299
300int in_gate_area_no_mm(unsigned long addr)
301{
302 if ((addr >= FIXADDR_USER_START) && (addr < FIXADDR_USER_END))
303 return 1;
304 return 0;
305}
306
307int in_gate_area(struct mm_struct *mm, unsigned long addr)
308{
309 return in_gate_area_no_mm(addr);
310}
311
281void ia64_mmu_init(void *my_cpu_data) 312void ia64_mmu_init(void *my_cpu_data)
282{ 313{
283 unsigned long pta, impl_va_bits; 314 unsigned long pta, impl_va_bits;
diff --git a/arch/m32r/include/asm/Kbuild b/arch/m32r/include/asm/Kbuild
index 67779a74b62d..accc10a3dc78 100644
--- a/arch/m32r/include/asm/Kbuild
+++ b/arch/m32r/include/asm/Kbuild
@@ -6,4 +6,5 @@ generic-y += hash.h
6generic-y += mcs_spinlock.h 6generic-y += mcs_spinlock.h
7generic-y += module.h 7generic-y += module.h
8generic-y += preempt.h 8generic-y += preempt.h
9generic-y += scatterlist.h
9generic-y += trace_clock.h 10generic-y += trace_clock.h
diff --git a/arch/m32r/include/asm/scatterlist.h b/arch/m32r/include/asm/scatterlist.h
deleted file mode 100644
index 7370b8b6243e..000000000000
--- a/arch/m32r/include/asm/scatterlist.h
+++ /dev/null
@@ -1,6 +0,0 @@
1#ifndef _ASM_M32R_SCATTERLIST_H
2#define _ASM_M32R_SCATTERLIST_H
3
4#include <asm-generic/scatterlist.h>
5
6#endif /* _ASM_M32R_SCATTERLIST_H */
diff --git a/arch/m68k/Kconfig b/arch/m68k/Kconfig
index 87b7c7581b1d..3ff8c9a25335 100644
--- a/arch/m68k/Kconfig
+++ b/arch/m68k/Kconfig
@@ -91,6 +91,8 @@ config MMU_SUN3
91config KEXEC 91config KEXEC
92 bool "kexec system call" 92 bool "kexec system call"
93 depends on M68KCLASSIC 93 depends on M68KCLASSIC
94 select CRYPTO
95 select CRYPTO_SHA256
94 help 96 help
95 kexec is a system call that implements the ability to shutdown your 97 kexec is a system call that implements the ability to shutdown your
96 current kernel, and to start another kernel. It is like a reboot 98 current kernel, and to start another kernel. It is like a reboot
diff --git a/arch/microblaze/include/asm/Kbuild b/arch/microblaze/include/asm/Kbuild
index 35b3ecaf25d5..27a3acda6c19 100644
--- a/arch/microblaze/include/asm/Kbuild
+++ b/arch/microblaze/include/asm/Kbuild
@@ -7,5 +7,6 @@ generic-y += exec.h
7generic-y += hash.h 7generic-y += hash.h
8generic-y += mcs_spinlock.h 8generic-y += mcs_spinlock.h
9generic-y += preempt.h 9generic-y += preempt.h
10generic-y += scatterlist.h
10generic-y += syscalls.h 11generic-y += syscalls.h
11generic-y += trace_clock.h 12generic-y += trace_clock.h
diff --git a/arch/microblaze/include/asm/scatterlist.h b/arch/microblaze/include/asm/scatterlist.h
deleted file mode 100644
index 35d786fe93ae..000000000000
--- a/arch/microblaze/include/asm/scatterlist.h
+++ /dev/null
@@ -1 +0,0 @@
1#include <asm-generic/scatterlist.h>
diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig
index 900c7e5333b6..df51e78a72cc 100644
--- a/arch/mips/Kconfig
+++ b/arch/mips/Kconfig
@@ -2396,6 +2396,8 @@ source "kernel/Kconfig.preempt"
2396 2396
2397config KEXEC 2397config KEXEC
2398 bool "Kexec system call" 2398 bool "Kexec system call"
2399 select CRYPTO
2400 select CRYPTO_SHA256
2399 help 2401 help
2400 kexec is a system call that implements the ability to shutdown your 2402 kexec is a system call that implements the ability to shutdown your
2401 current kernel, and to start another kernel. It is like a reboot 2403 current kernel, and to start another kernel. It is like a reboot
diff --git a/arch/mn10300/include/asm/Kbuild b/arch/mn10300/include/asm/Kbuild
index 654d5ba6e310..ecbd6676bd33 100644
--- a/arch/mn10300/include/asm/Kbuild
+++ b/arch/mn10300/include/asm/Kbuild
@@ -6,4 +6,5 @@ generic-y += exec.h
6generic-y += hash.h 6generic-y += hash.h
7generic-y += mcs_spinlock.h 7generic-y += mcs_spinlock.h
8generic-y += preempt.h 8generic-y += preempt.h
9generic-y += scatterlist.h
9generic-y += trace_clock.h 10generic-y += trace_clock.h
diff --git a/arch/mn10300/include/asm/scatterlist.h b/arch/mn10300/include/asm/scatterlist.h
deleted file mode 100644
index 7baa4006008a..000000000000
--- a/arch/mn10300/include/asm/scatterlist.h
+++ /dev/null
@@ -1,16 +0,0 @@
1/* MN10300 Scatterlist definitions
2 *
3 * Copyright (C) 2007 Matsushita Electric Industrial Co., Ltd.
4 * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public Licence
8 * as published by the Free Software Foundation; either version
9 * 2 of the Licence, or (at your option) any later version.
10 */
11#ifndef _ASM_SCATTERLIST_H
12#define _ASM_SCATTERLIST_H
13
14#include <asm-generic/scatterlist.h>
15
16#endif /* _ASM_SCATTERLIST_H */
diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index 80b94b0add1f..a577609f8ed6 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -111,6 +111,7 @@ config PPC
111 select HAVE_DMA_API_DEBUG 111 select HAVE_DMA_API_DEBUG
112 select HAVE_OPROFILE 112 select HAVE_OPROFILE
113 select HAVE_DEBUG_KMEMLEAK 113 select HAVE_DEBUG_KMEMLEAK
114 select ARCH_HAS_SG_CHAIN
114 select GENERIC_ATOMIC64 if PPC32 115 select GENERIC_ATOMIC64 if PPC32
115 select ARCH_HAS_ATOMIC64_DEC_IF_POSITIVE 116 select ARCH_HAS_ATOMIC64_DEC_IF_POSITIVE
116 select HAVE_PERF_EVENTS 117 select HAVE_PERF_EVENTS
@@ -398,6 +399,8 @@ config PPC64_SUPPORTS_MEMORY_FAILURE
398config KEXEC 399config KEXEC
399 bool "kexec system call" 400 bool "kexec system call"
400 depends on (PPC_BOOK3S || FSL_BOOKE || (44x && !SMP)) 401 depends on (PPC_BOOK3S || FSL_BOOKE || (44x && !SMP))
402 select CRYPTO
403 select CRYPTO_SHA256
401 help 404 help
402 kexec is a system call that implements the ability to shutdown your 405 kexec is a system call that implements the ability to shutdown your
403 current kernel, and to start another kernel. It is like a reboot 406 current kernel, and to start another kernel. It is like a reboot
diff --git a/arch/powerpc/include/asm/Kbuild b/arch/powerpc/include/asm/Kbuild
index 3fb1bc432f4f..7f23f162ce9c 100644
--- a/arch/powerpc/include/asm/Kbuild
+++ b/arch/powerpc/include/asm/Kbuild
@@ -4,5 +4,6 @@ generic-y += hash.h
4generic-y += mcs_spinlock.h 4generic-y += mcs_spinlock.h
5generic-y += preempt.h 5generic-y += preempt.h
6generic-y += rwsem.h 6generic-y += rwsem.h
7generic-y += scatterlist.h
7generic-y += trace_clock.h 8generic-y += trace_clock.h
8generic-y += vtime.h 9generic-y += vtime.h
diff --git a/arch/powerpc/include/asm/page.h b/arch/powerpc/include/asm/page.h
index 32e4e212b9c1..26fe1ae15212 100644
--- a/arch/powerpc/include/asm/page.h
+++ b/arch/powerpc/include/asm/page.h
@@ -48,9 +48,6 @@ extern unsigned int HPAGE_SHIFT;
48#define HUGE_MAX_HSTATE (MMU_PAGE_COUNT-1) 48#define HUGE_MAX_HSTATE (MMU_PAGE_COUNT-1)
49#endif 49#endif
50 50
51/* We do define AT_SYSINFO_EHDR but don't use the gate mechanism */
52#define __HAVE_ARCH_GATE_AREA 1
53
54/* 51/*
55 * Subtle: (1 << PAGE_SHIFT) is an int, not an unsigned long. So if we 52 * Subtle: (1 << PAGE_SHIFT) is an int, not an unsigned long. So if we
56 * assign PAGE_MASK to a larger type it gets extended the way we want 53 * assign PAGE_MASK to a larger type it gets extended the way we want
diff --git a/arch/powerpc/include/asm/scatterlist.h b/arch/powerpc/include/asm/scatterlist.h
deleted file mode 100644
index de1f620bd5c9..000000000000
--- a/arch/powerpc/include/asm/scatterlist.h
+++ /dev/null
@@ -1,17 +0,0 @@
1#ifndef _ASM_POWERPC_SCATTERLIST_H
2#define _ASM_POWERPC_SCATTERLIST_H
3/*
4 * Copyright (C) 2001 PPC64 Team, IBM Corp
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
10 */
11
12#include <asm/dma.h>
13#include <asm-generic/scatterlist.h>
14
15#define ARCH_HAS_SG_CHAIN
16
17#endif /* _ASM_POWERPC_SCATTERLIST_H */
diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c
index d0225572faa1..75d62d63fe68 100644
--- a/arch/powerpc/kernel/setup_64.c
+++ b/arch/powerpc/kernel/setup_64.c
@@ -149,13 +149,13 @@ static void check_smt_enabled(void)
149 else if (!strcmp(smt_enabled_cmdline, "off")) 149 else if (!strcmp(smt_enabled_cmdline, "off"))
150 smt_enabled_at_boot = 0; 150 smt_enabled_at_boot = 0;
151 else { 151 else {
152 long smt; 152 int smt;
153 int rc; 153 int rc;
154 154
155 rc = strict_strtol(smt_enabled_cmdline, 10, &smt); 155 rc = kstrtoint(smt_enabled_cmdline, 10, &smt);
156 if (!rc) 156 if (!rc)
157 smt_enabled_at_boot = 157 smt_enabled_at_boot =
158 min(threads_per_core, (int)smt); 158 min(threads_per_core, smt);
159 } 159 }
160 } else { 160 } else {
161 dn = of_find_node_by_path("/options"); 161 dn = of_find_node_by_path("/options");
diff --git a/arch/powerpc/kernel/vdso.c b/arch/powerpc/kernel/vdso.c
index ce74c335a6a4..f174351842cf 100644
--- a/arch/powerpc/kernel/vdso.c
+++ b/arch/powerpc/kernel/vdso.c
@@ -840,19 +840,3 @@ static int __init vdso_init(void)
840 return 0; 840 return 0;
841} 841}
842arch_initcall(vdso_init); 842arch_initcall(vdso_init);
843
844int in_gate_area_no_mm(unsigned long addr)
845{
846 return 0;
847}
848
849int in_gate_area(struct mm_struct *mm, unsigned long addr)
850{
851 return 0;
852}
853
854struct vm_area_struct *get_gate_vma(struct mm_struct *mm)
855{
856 return NULL;
857}
858
diff --git a/arch/powerpc/kernel/vio.c b/arch/powerpc/kernel/vio.c
index 904c66128fae..5bfdab9047be 100644
--- a/arch/powerpc/kernel/vio.c
+++ b/arch/powerpc/kernel/vio.c
@@ -977,7 +977,7 @@ static ssize_t viodev_cmo_desired_set(struct device *dev,
977 size_t new_desired; 977 size_t new_desired;
978 int ret; 978 int ret;
979 979
980 ret = strict_strtoul(buf, 10, &new_desired); 980 ret = kstrtoul(buf, 10, &new_desired);
981 if (ret) 981 if (ret)
982 return ret; 982 return ret;
983 983
diff --git a/arch/powerpc/mm/dma-noncoherent.c b/arch/powerpc/mm/dma-noncoherent.c
index 7b6c10750179..d85e86aac7fb 100644
--- a/arch/powerpc/mm/dma-noncoherent.c
+++ b/arch/powerpc/mm/dma-noncoherent.c
@@ -33,6 +33,7 @@
33#include <linux/export.h> 33#include <linux/export.h>
34 34
35#include <asm/tlbflush.h> 35#include <asm/tlbflush.h>
36#include <asm/dma.h>
36 37
37#include "mmu_decl.h" 38#include "mmu_decl.h"
38 39
diff --git a/arch/powerpc/platforms/44x/warp.c b/arch/powerpc/platforms/44x/warp.c
index 534574a97ec9..3a104284b338 100644
--- a/arch/powerpc/platforms/44x/warp.c
+++ b/arch/powerpc/platforms/44x/warp.c
@@ -25,6 +25,7 @@
25#include <asm/time.h> 25#include <asm/time.h>
26#include <asm/uic.h> 26#include <asm/uic.h>
27#include <asm/ppc4xx.h> 27#include <asm/ppc4xx.h>
28#include <asm/dma.h>
28 29
29 30
30static __initdata struct of_device_id warp_of_bus[] = { 31static __initdata struct of_device_id warp_of_bus[] = {
diff --git a/arch/powerpc/platforms/52xx/efika.c b/arch/powerpc/platforms/52xx/efika.c
index 6e19b0ad5d26..3feffde9128d 100644
--- a/arch/powerpc/platforms/52xx/efika.c
+++ b/arch/powerpc/platforms/52xx/efika.c
@@ -13,6 +13,7 @@
13#include <generated/utsrelease.h> 13#include <generated/utsrelease.h>
14#include <linux/pci.h> 14#include <linux/pci.h>
15#include <linux/of.h> 15#include <linux/of.h>
16#include <asm/dma.h>
16#include <asm/prom.h> 17#include <asm/prom.h>
17#include <asm/time.h> 18#include <asm/time.h>
18#include <asm/machdep.h> 19#include <asm/machdep.h>
diff --git a/arch/powerpc/platforms/amigaone/setup.c b/arch/powerpc/platforms/amigaone/setup.c
index 03aabc0e16ac..2fe12046279e 100644
--- a/arch/powerpc/platforms/amigaone/setup.c
+++ b/arch/powerpc/platforms/amigaone/setup.c
@@ -24,6 +24,7 @@
24#include <asm/i8259.h> 24#include <asm/i8259.h>
25#include <asm/time.h> 25#include <asm/time.h>
26#include <asm/udbg.h> 26#include <asm/udbg.h>
27#include <asm/dma.h>
27 28
28extern void __flush_disable_L1(void); 29extern void __flush_disable_L1(void);
29 30
diff --git a/arch/powerpc/platforms/pseries/dlpar.c b/arch/powerpc/platforms/pseries/dlpar.c
index 2d0b4d68a40a..a2450b8a50a5 100644
--- a/arch/powerpc/platforms/pseries/dlpar.c
+++ b/arch/powerpc/platforms/pseries/dlpar.c
@@ -400,10 +400,10 @@ out:
400static ssize_t dlpar_cpu_probe(const char *buf, size_t count) 400static ssize_t dlpar_cpu_probe(const char *buf, size_t count)
401{ 401{
402 struct device_node *dn, *parent; 402 struct device_node *dn, *parent;
403 unsigned long drc_index; 403 u32 drc_index;
404 int rc; 404 int rc;
405 405
406 rc = strict_strtoul(buf, 0, &drc_index); 406 rc = kstrtou32(buf, 0, &drc_index);
407 if (rc) 407 if (rc)
408 return -EINVAL; 408 return -EINVAL;
409 409
diff --git a/arch/powerpc/platforms/pseries/mobility.c b/arch/powerpc/platforms/pseries/mobility.c
index d146fef038b8..e7cb6d4a871a 100644
--- a/arch/powerpc/platforms/pseries/mobility.c
+++ b/arch/powerpc/platforms/pseries/mobility.c
@@ -320,7 +320,7 @@ static ssize_t migrate_store(struct class *class, struct class_attribute *attr,
320 u64 streamid; 320 u64 streamid;
321 int rc; 321 int rc;
322 322
323 rc = strict_strtoull(buf, 0, &streamid); 323 rc = kstrtou64(buf, 0, &streamid);
324 if (rc) 324 if (rc)
325 return rc; 325 return rc;
326 326
diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig
index 8ca60f8d5683..ab39ceb89ecf 100644
--- a/arch/s390/Kconfig
+++ b/arch/s390/Kconfig
@@ -48,6 +48,8 @@ config ARCH_SUPPORTS_DEBUG_PAGEALLOC
48 48
49config KEXEC 49config KEXEC
50 def_bool y 50 def_bool y
51 select CRYPTO
52 select CRYPTO_SHA256
51 53
52config AUDIT_ARCH 54config AUDIT_ARCH
53 def_bool y 55 def_bool y
@@ -145,6 +147,7 @@ config S390
145 select TTY 147 select TTY
146 select VIRT_CPU_ACCOUNTING 148 select VIRT_CPU_ACCOUNTING
147 select VIRT_TO_BUS 149 select VIRT_TO_BUS
150 select ARCH_HAS_SG_CHAIN
148 151
149config SCHED_OMIT_FRAME_POINTER 152config SCHED_OMIT_FRAME_POINTER
150 def_bool y 153 def_bool y
diff --git a/arch/s390/include/asm/Kbuild b/arch/s390/include/asm/Kbuild
index 57892a8a9055..b3fea0722ff1 100644
--- a/arch/s390/include/asm/Kbuild
+++ b/arch/s390/include/asm/Kbuild
@@ -4,4 +4,5 @@ generic-y += clkdev.h
4generic-y += hash.h 4generic-y += hash.h
5generic-y += mcs_spinlock.h 5generic-y += mcs_spinlock.h
6generic-y += preempt.h 6generic-y += preempt.h
7generic-y += scatterlist.h
7generic-y += trace_clock.h 8generic-y += trace_clock.h
diff --git a/arch/s390/include/asm/page.h b/arch/s390/include/asm/page.h
index 114258eeaacd..7b2ac6e44166 100644
--- a/arch/s390/include/asm/page.h
+++ b/arch/s390/include/asm/page.h
@@ -162,6 +162,4 @@ static inline int devmem_is_allowed(unsigned long pfn)
162#include <asm-generic/memory_model.h> 162#include <asm-generic/memory_model.h>
163#include <asm-generic/getorder.h> 163#include <asm-generic/getorder.h>
164 164
165#define __HAVE_ARCH_GATE_AREA 1
166
167#endif /* _S390_PAGE_H */ 165#endif /* _S390_PAGE_H */
diff --git a/arch/s390/include/asm/scatterlist.h b/arch/s390/include/asm/scatterlist.h
deleted file mode 100644
index 6d45ef6c12a7..000000000000
--- a/arch/s390/include/asm/scatterlist.h
+++ /dev/null
@@ -1,3 +0,0 @@
1#include <asm-generic/scatterlist.h>
2
3#define ARCH_HAS_SG_CHAIN
diff --git a/arch/s390/kernel/vdso.c b/arch/s390/kernel/vdso.c
index 613649096783..0bbb7e027c5a 100644
--- a/arch/s390/kernel/vdso.c
+++ b/arch/s390/kernel/vdso.c
@@ -316,18 +316,3 @@ static int __init vdso_init(void)
316 return 0; 316 return 0;
317} 317}
318early_initcall(vdso_init); 318early_initcall(vdso_init);
319
320int in_gate_area_no_mm(unsigned long addr)
321{
322 return 0;
323}
324
325int in_gate_area(struct mm_struct *mm, unsigned long addr)
326{
327 return 0;
328}
329
330struct vm_area_struct *get_gate_vma(struct mm_struct *mm)
331{
332 return NULL;
333}
diff --git a/arch/score/include/asm/Kbuild b/arch/score/include/asm/Kbuild
index 2f947aba4bd4..aad209199f7e 100644
--- a/arch/score/include/asm/Kbuild
+++ b/arch/score/include/asm/Kbuild
@@ -8,5 +8,6 @@ generic-y += cputime.h
8generic-y += hash.h 8generic-y += hash.h
9generic-y += mcs_spinlock.h 9generic-y += mcs_spinlock.h
10generic-y += preempt.h 10generic-y += preempt.h
11generic-y += scatterlist.h
11generic-y += trace_clock.h 12generic-y += trace_clock.h
12generic-y += xor.h 13generic-y += xor.h
diff --git a/arch/score/include/asm/scatterlist.h b/arch/score/include/asm/scatterlist.h
deleted file mode 100644
index 9f533b8362c7..000000000000
--- a/arch/score/include/asm/scatterlist.h
+++ /dev/null
@@ -1,6 +0,0 @@
1#ifndef _ASM_SCORE_SCATTERLIST_H
2#define _ASM_SCORE_SCATTERLIST_H
3
4#include <asm-generic/scatterlist.h>
5
6#endif /* _ASM_SCORE_SCATTERLIST_H */
diff --git a/arch/sh/Kconfig b/arch/sh/Kconfig
index aa2df3eaeb29..453fa5c09550 100644
--- a/arch/sh/Kconfig
+++ b/arch/sh/Kconfig
@@ -595,6 +595,8 @@ source kernel/Kconfig.hz
595config KEXEC 595config KEXEC
596 bool "kexec system call (EXPERIMENTAL)" 596 bool "kexec system call (EXPERIMENTAL)"
597 depends on SUPERH32 && MMU 597 depends on SUPERH32 && MMU
598 select CRYPTO
599 select CRYPTO_SHA256
598 help 600 help
599 kexec is a system call that implements the ability to shutdown your 601 kexec is a system call that implements the ability to shutdown your
600 current kernel, and to start another kernel. It is like a reboot 602 current kernel, and to start another kernel. It is like a reboot
diff --git a/arch/sh/include/asm/page.h b/arch/sh/include/asm/page.h
index 15d970328f71..fe20d14ae051 100644
--- a/arch/sh/include/asm/page.h
+++ b/arch/sh/include/asm/page.h
@@ -186,11 +186,6 @@ typedef struct page *pgtable_t;
186#include <asm-generic/memory_model.h> 186#include <asm-generic/memory_model.h>
187#include <asm-generic/getorder.h> 187#include <asm-generic/getorder.h>
188 188
189/* vDSO support */
190#ifdef CONFIG_VSYSCALL
191#define __HAVE_ARCH_GATE_AREA
192#endif
193
194/* 189/*
195 * Some drivers need to perform DMA into kmalloc'ed buffers 190 * Some drivers need to perform DMA into kmalloc'ed buffers
196 * and so we have to increase the kmalloc minalign for this. 191 * and so we have to increase the kmalloc minalign for this.
diff --git a/arch/sh/kernel/vsyscall/vsyscall.c b/arch/sh/kernel/vsyscall/vsyscall.c
index 5ca579720a09..ea2aa1393b87 100644
--- a/arch/sh/kernel/vsyscall/vsyscall.c
+++ b/arch/sh/kernel/vsyscall/vsyscall.c
@@ -92,18 +92,3 @@ const char *arch_vma_name(struct vm_area_struct *vma)
92 92
93 return NULL; 93 return NULL;
94} 94}
95
96struct vm_area_struct *get_gate_vma(struct mm_struct *mm)
97{
98 return NULL;
99}
100
101int in_gate_area(struct mm_struct *mm, unsigned long address)
102{
103 return 0;
104}
105
106int in_gate_area_no_mm(unsigned long address)
107{
108 return 0;
109}
diff --git a/arch/sparc/Kconfig b/arch/sparc/Kconfig
index 4692c90936f1..a537816613f9 100644
--- a/arch/sparc/Kconfig
+++ b/arch/sparc/Kconfig
@@ -42,6 +42,7 @@ config SPARC
42 select MODULES_USE_ELF_RELA 42 select MODULES_USE_ELF_RELA
43 select ODD_RT_SIGACTION 43 select ODD_RT_SIGACTION
44 select OLD_SIGSUSPEND 44 select OLD_SIGSUSPEND
45 select ARCH_HAS_SG_CHAIN
45 46
46config SPARC32 47config SPARC32
47 def_bool !64BIT 48 def_bool !64BIT
diff --git a/arch/sparc/include/asm/Kbuild b/arch/sparc/include/asm/Kbuild
index a45821818003..cdd1b447bb6c 100644
--- a/arch/sparc/include/asm/Kbuild
+++ b/arch/sparc/include/asm/Kbuild
@@ -15,6 +15,7 @@ generic-y += mcs_spinlock.h
15generic-y += module.h 15generic-y += module.h
16generic-y += mutex.h 16generic-y += mutex.h
17generic-y += preempt.h 17generic-y += preempt.h
18generic-y += scatterlist.h
18generic-y += serial.h 19generic-y += serial.h
19generic-y += trace_clock.h 20generic-y += trace_clock.h
20generic-y += types.h 21generic-y += types.h
diff --git a/arch/sparc/include/asm/scatterlist.h b/arch/sparc/include/asm/scatterlist.h
deleted file mode 100644
index 92bb638313f8..000000000000
--- a/arch/sparc/include/asm/scatterlist.h
+++ /dev/null
@@ -1,8 +0,0 @@
1#ifndef _SPARC_SCATTERLIST_H
2#define _SPARC_SCATTERLIST_H
3
4#include <asm-generic/scatterlist.h>
5
6#define ARCH_HAS_SG_CHAIN
7
8#endif /* !(_SPARC_SCATTERLIST_H) */
diff --git a/arch/tile/Kconfig b/arch/tile/Kconfig
index 7fcd492adbfc..a3ffe2dd4832 100644
--- a/arch/tile/Kconfig
+++ b/arch/tile/Kconfig
@@ -191,6 +191,8 @@ source "kernel/Kconfig.hz"
191 191
192config KEXEC 192config KEXEC
193 bool "kexec system call" 193 bool "kexec system call"
194 select CRYPTO
195 select CRYPTO_SHA256
194 ---help--- 196 ---help---
195 kexec is a system call that implements the ability to shutdown your 197 kexec is a system call that implements the ability to shutdown your
196 current kernel, and to start another kernel. It is like a reboot 198 current kernel, and to start another kernel. It is like a reboot
diff --git a/arch/tile/include/asm/hardwall.h b/arch/tile/include/asm/hardwall.h
index 2f572b6b7bc2..44d2765bde2b 100644
--- a/arch/tile/include/asm/hardwall.h
+++ b/arch/tile/include/asm/hardwall.h
@@ -23,7 +23,7 @@
23struct proc_dir_entry; 23struct proc_dir_entry;
24#ifdef CONFIG_HARDWALL 24#ifdef CONFIG_HARDWALL
25void proc_tile_hardwall_init(struct proc_dir_entry *root); 25void proc_tile_hardwall_init(struct proc_dir_entry *root);
26int proc_pid_hardwall(struct task_struct *task, char *buffer); 26int proc_pid_hardwall(struct seq_file *m, struct pid_namespace *ns, struct pid *pid, struct task_struct *task);
27#else 27#else
28static inline void proc_tile_hardwall_init(struct proc_dir_entry *root) {} 28static inline void proc_tile_hardwall_init(struct proc_dir_entry *root) {}
29#endif 29#endif
diff --git a/arch/tile/include/asm/page.h b/arch/tile/include/asm/page.h
index 672768008618..a213a8d84a95 100644
--- a/arch/tile/include/asm/page.h
+++ b/arch/tile/include/asm/page.h
@@ -39,12 +39,6 @@
39#define HPAGE_MASK (~(HPAGE_SIZE - 1)) 39#define HPAGE_MASK (~(HPAGE_SIZE - 1))
40 40
41/* 41/*
42 * We do define AT_SYSINFO_EHDR to support vDSO,
43 * but don't use the gate mechanism.
44 */
45#define __HAVE_ARCH_GATE_AREA 1
46
47/*
48 * If the Kconfig doesn't specify, set a maximum zone order that 42 * If the Kconfig doesn't specify, set a maximum zone order that
49 * is enough so that we can create huge pages from small pages given 43 * is enough so that we can create huge pages from small pages given
50 * the respective sizes of the two page types. See <linux/mmzone.h>. 44 * the respective sizes of the two page types. See <linux/mmzone.h>.
diff --git a/arch/tile/kernel/hardwall.c b/arch/tile/kernel/hardwall.c
index 531f4c365351..aca6000bca75 100644
--- a/arch/tile/kernel/hardwall.c
+++ b/arch/tile/kernel/hardwall.c
@@ -947,15 +947,15 @@ static void hardwall_remove_proc(struct hardwall_info *info)
947 remove_proc_entry(buf, info->type->proc_dir); 947 remove_proc_entry(buf, info->type->proc_dir);
948} 948}
949 949
950int proc_pid_hardwall(struct task_struct *task, char *buffer) 950int proc_pid_hardwall(struct seq_file *m, struct pid_namespace *ns,
951 struct pid *pid, struct task_struct *task)
951{ 952{
952 int i; 953 int i;
953 int n = 0; 954 int n = 0;
954 for (i = 0; i < HARDWALL_TYPES; ++i) { 955 for (i = 0; i < HARDWALL_TYPES; ++i) {
955 struct hardwall_info *info = task->thread.hardwall[i].info; 956 struct hardwall_info *info = task->thread.hardwall[i].info;
956 if (info) 957 if (info)
957 n += sprintf(&buffer[n], "%s: %d\n", 958 seq_printf(m, "%s: %d\n", info->type->name, info->id);
958 info->type->name, info->id);
959 } 959 }
960 return n; 960 return n;
961} 961}
diff --git a/arch/tile/kernel/vdso.c b/arch/tile/kernel/vdso.c
index 1533af24106e..5bc51d7dfdcb 100644
--- a/arch/tile/kernel/vdso.c
+++ b/arch/tile/kernel/vdso.c
@@ -121,21 +121,6 @@ const char *arch_vma_name(struct vm_area_struct *vma)
121 return NULL; 121 return NULL;
122} 122}
123 123
124struct vm_area_struct *get_gate_vma(struct mm_struct *mm)
125{
126 return NULL;
127}
128
129int in_gate_area(struct mm_struct *mm, unsigned long address)
130{
131 return 0;
132}
133
134int in_gate_area_no_mm(unsigned long address)
135{
136 return 0;
137}
138
139int setup_vdso_pages(void) 124int setup_vdso_pages(void)
140{ 125{
141 struct page **pagelist; 126 struct page **pagelist;
diff --git a/arch/um/include/asm/Kbuild b/arch/um/include/asm/Kbuild
index a5e4b6068213..7bd64aa2e94a 100644
--- a/arch/um/include/asm/Kbuild
+++ b/arch/um/include/asm/Kbuild
@@ -21,6 +21,7 @@ generic-y += param.h
21generic-y += pci.h 21generic-y += pci.h
22generic-y += percpu.h 22generic-y += percpu.h
23generic-y += preempt.h 23generic-y += preempt.h
24generic-y += scatterlist.h
24generic-y += sections.h 25generic-y += sections.h
25generic-y += switch_to.h 26generic-y += switch_to.h
26generic-y += topology.h 27generic-y += topology.h
diff --git a/arch/um/include/asm/page.h b/arch/um/include/asm/page.h
index 5ff53d9185f7..71c5d132062a 100644
--- a/arch/um/include/asm/page.h
+++ b/arch/um/include/asm/page.h
@@ -119,4 +119,9 @@ extern unsigned long uml_physmem;
119#include <asm-generic/getorder.h> 119#include <asm-generic/getorder.h>
120 120
121#endif /* __ASSEMBLY__ */ 121#endif /* __ASSEMBLY__ */
122
123#ifdef CONFIG_X86_32
124#define __HAVE_ARCH_GATE_AREA 1
125#endif
126
122#endif /* __UM_PAGE_H */ 127#endif /* __UM_PAGE_H */
diff --git a/arch/x86/Kbuild b/arch/x86/Kbuild
index e5287d8517aa..61b6d51866f8 100644
--- a/arch/x86/Kbuild
+++ b/arch/x86/Kbuild
@@ -16,3 +16,7 @@ obj-$(CONFIG_IA32_EMULATION) += ia32/
16 16
17obj-y += platform/ 17obj-y += platform/
18obj-y += net/ 18obj-y += net/
19
20ifeq ($(CONFIG_X86_64),y)
21obj-$(CONFIG_KEXEC) += purgatory/
22endif
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index bf2405053af5..4aafd322e21e 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -96,6 +96,7 @@ config X86
96 select IRQ_FORCED_THREADING 96 select IRQ_FORCED_THREADING
97 select HAVE_BPF_JIT if X86_64 97 select HAVE_BPF_JIT if X86_64
98 select HAVE_ARCH_TRANSPARENT_HUGEPAGE 98 select HAVE_ARCH_TRANSPARENT_HUGEPAGE
99 select ARCH_HAS_SG_CHAIN
99 select CLKEVT_I8253 100 select CLKEVT_I8253
100 select ARCH_HAVE_NMI_SAFE_CMPXCHG 101 select ARCH_HAVE_NMI_SAFE_CMPXCHG
101 select GENERIC_IOMAP 102 select GENERIC_IOMAP
@@ -1581,6 +1582,9 @@ source kernel/Kconfig.hz
1581 1582
1582config KEXEC 1583config KEXEC
1583 bool "kexec system call" 1584 bool "kexec system call"
1585 select BUILD_BIN2C
1586 select CRYPTO
1587 select CRYPTO_SHA256
1584 ---help--- 1588 ---help---
1585 kexec is a system call that implements the ability to shutdown your 1589 kexec is a system call that implements the ability to shutdown your
1586 current kernel, and to start another kernel. It is like a reboot 1590 current kernel, and to start another kernel. It is like a reboot
@@ -1595,6 +1599,28 @@ config KEXEC
1595 interface is strongly in flux, so no good recommendation can be 1599 interface is strongly in flux, so no good recommendation can be
1596 made. 1600 made.
1597 1601
1602config KEXEC_VERIFY_SIG
1603 bool "Verify kernel signature during kexec_file_load() syscall"
1604 depends on KEXEC
1605 ---help---
1606 This option makes kernel signature verification mandatory for
1607 kexec_file_load() syscall. If kernel is signature can not be
1608 verified, kexec_file_load() will fail.
1609
1610 This option enforces signature verification at generic level.
1611 One needs to enable signature verification for type of kernel
1612 image being loaded to make sure it works. For example, enable
1613 bzImage signature verification option to be able to load and
1614 verify signatures of bzImage. Otherwise kernel loading will fail.
1615
1616config KEXEC_BZIMAGE_VERIFY_SIG
1617 bool "Enable bzImage signature verification support"
1618 depends on KEXEC_VERIFY_SIG
1619 depends on SIGNED_PE_FILE_VERIFICATION
1620 select SYSTEM_TRUSTED_KEYRING
1621 ---help---
1622 Enable bzImage signature verification support.
1623
1598config CRASH_DUMP 1624config CRASH_DUMP
1599 bool "kernel crash dumps" 1625 bool "kernel crash dumps"
1600 depends on X86_64 || (X86_32 && HIGHMEM) 1626 depends on X86_64 || (X86_32 && HIGHMEM)
diff --git a/arch/x86/Makefile b/arch/x86/Makefile
index c65fd9650467..c1aa36887843 100644
--- a/arch/x86/Makefile
+++ b/arch/x86/Makefile
@@ -183,6 +183,14 @@ archscripts: scripts_basic
183archheaders: 183archheaders:
184 $(Q)$(MAKE) $(build)=arch/x86/syscalls all 184 $(Q)$(MAKE) $(build)=arch/x86/syscalls all
185 185
186archprepare:
187ifeq ($(CONFIG_KEXEC),y)
188# Build only for 64bit. No loaders for 32bit yet.
189 ifeq ($(CONFIG_X86_64),y)
190 $(Q)$(MAKE) $(build)=arch/x86/purgatory arch/x86/purgatory/kexec-purgatory.c
191 endif
192endif
193
186### 194###
187# Kernel objects 195# Kernel objects
188 196
diff --git a/arch/x86/include/asm/Kbuild b/arch/x86/include/asm/Kbuild
index 3ca9762e1649..3bf000fab0ae 100644
--- a/arch/x86/include/asm/Kbuild
+++ b/arch/x86/include/asm/Kbuild
@@ -5,6 +5,7 @@ genhdr-y += unistd_64.h
5genhdr-y += unistd_x32.h 5genhdr-y += unistd_x32.h
6 6
7generic-y += clkdev.h 7generic-y += clkdev.h
8generic-y += early_ioremap.h
9generic-y += cputime.h 8generic-y += cputime.h
9generic-y += early_ioremap.h
10generic-y += mcs_spinlock.h 10generic-y += mcs_spinlock.h
11generic-y += scatterlist.h
diff --git a/arch/x86/include/asm/crash.h b/arch/x86/include/asm/crash.h
new file mode 100644
index 000000000000..f498411f2500
--- /dev/null
+++ b/arch/x86/include/asm/crash.h
@@ -0,0 +1,9 @@
1#ifndef _ASM_X86_CRASH_H
2#define _ASM_X86_CRASH_H
3
4int crash_load_segments(struct kimage *image);
5int crash_copy_backup_region(struct kimage *image);
6int crash_setup_memmap_entries(struct kimage *image,
7 struct boot_params *params);
8
9#endif /* _ASM_X86_CRASH_H */
diff --git a/arch/x86/include/asm/kexec-bzimage64.h b/arch/x86/include/asm/kexec-bzimage64.h
new file mode 100644
index 000000000000..d1b5d194e31d
--- /dev/null
+++ b/arch/x86/include/asm/kexec-bzimage64.h
@@ -0,0 +1,6 @@
1#ifndef _ASM_KEXEC_BZIMAGE64_H
2#define _ASM_KEXEC_BZIMAGE64_H
3
4extern struct kexec_file_ops kexec_bzImage64_ops;
5
6#endif /* _ASM_KEXE_BZIMAGE64_H */
diff --git a/arch/x86/include/asm/kexec.h b/arch/x86/include/asm/kexec.h
index 17483a492f18..d2434c1cad05 100644
--- a/arch/x86/include/asm/kexec.h
+++ b/arch/x86/include/asm/kexec.h
@@ -23,6 +23,9 @@
23 23
24#include <asm/page.h> 24#include <asm/page.h>
25#include <asm/ptrace.h> 25#include <asm/ptrace.h>
26#include <asm/bootparam.h>
27
28struct kimage;
26 29
27/* 30/*
28 * KEXEC_SOURCE_MEMORY_LIMIT maximum page get_free_page can return. 31 * KEXEC_SOURCE_MEMORY_LIMIT maximum page get_free_page can return.
@@ -61,6 +64,10 @@
61# define KEXEC_ARCH KEXEC_ARCH_X86_64 64# define KEXEC_ARCH KEXEC_ARCH_X86_64
62#endif 65#endif
63 66
67/* Memory to backup during crash kdump */
68#define KEXEC_BACKUP_SRC_START (0UL)
69#define KEXEC_BACKUP_SRC_END (640 * 1024UL) /* 640K */
70
64/* 71/*
65 * CPU does not save ss and sp on stack if execution is already 72 * CPU does not save ss and sp on stack if execution is already
66 * running in kernel mode at the time of NMI occurrence. This code 73 * running in kernel mode at the time of NMI occurrence. This code
@@ -160,6 +167,44 @@ struct kimage_arch {
160 pud_t *pud; 167 pud_t *pud;
161 pmd_t *pmd; 168 pmd_t *pmd;
162 pte_t *pte; 169 pte_t *pte;
170 /* Details of backup region */
171 unsigned long backup_src_start;
172 unsigned long backup_src_sz;
173
174 /* Physical address of backup segment */
175 unsigned long backup_load_addr;
176
177 /* Core ELF header buffer */
178 void *elf_headers;
179 unsigned long elf_headers_sz;
180 unsigned long elf_load_addr;
181};
182#endif /* CONFIG_X86_32 */
183
184#ifdef CONFIG_X86_64
185/*
186 * Number of elements and order of elements in this structure should match
187 * with the ones in arch/x86/purgatory/entry64.S. If you make a change here
188 * make an appropriate change in purgatory too.
189 */
190struct kexec_entry64_regs {
191 uint64_t rax;
192 uint64_t rcx;
193 uint64_t rdx;
194 uint64_t rbx;
195 uint64_t rsp;
196 uint64_t rbp;
197 uint64_t rsi;
198 uint64_t rdi;
199 uint64_t r8;
200 uint64_t r9;
201 uint64_t r10;
202 uint64_t r11;
203 uint64_t r12;
204 uint64_t r13;
205 uint64_t r14;
206 uint64_t r15;
207 uint64_t rip;
163}; 208};
164#endif 209#endif
165 210
diff --git a/arch/x86/include/asm/page.h b/arch/x86/include/asm/page.h
index 775873d3be55..802dde30c928 100644
--- a/arch/x86/include/asm/page.h
+++ b/arch/x86/include/asm/page.h
@@ -70,7 +70,6 @@ extern bool __virt_addr_valid(unsigned long kaddr);
70#include <asm-generic/memory_model.h> 70#include <asm-generic/memory_model.h>
71#include <asm-generic/getorder.h> 71#include <asm-generic/getorder.h>
72 72
73#define __HAVE_ARCH_GATE_AREA 1
74#define HAVE_ARCH_HUGETLB_UNMAPPED_AREA 73#define HAVE_ARCH_HUGETLB_UNMAPPED_AREA
75 74
76#endif /* __KERNEL__ */ 75#endif /* __KERNEL__ */
diff --git a/arch/x86/include/asm/page_64.h b/arch/x86/include/asm/page_64.h
index 0f1ddee6a0ce..f408caf73430 100644
--- a/arch/x86/include/asm/page_64.h
+++ b/arch/x86/include/asm/page_64.h
@@ -39,4 +39,6 @@ void copy_page(void *to, void *from);
39 39
40#endif /* !__ASSEMBLY__ */ 40#endif /* !__ASSEMBLY__ */
41 41
42#define __HAVE_ARCH_GATE_AREA 1
43
42#endif /* _ASM_X86_PAGE_64_H */ 44#endif /* _ASM_X86_PAGE_64_H */
diff --git a/arch/x86/include/asm/scatterlist.h b/arch/x86/include/asm/scatterlist.h
deleted file mode 100644
index 4240878b9d76..000000000000
--- a/arch/x86/include/asm/scatterlist.h
+++ /dev/null
@@ -1,8 +0,0 @@
1#ifndef _ASM_X86_SCATTERLIST_H
2#define _ASM_X86_SCATTERLIST_H
3
4#include <asm-generic/scatterlist.h>
5
6#define ARCH_HAS_SG_CHAIN
7
8#endif /* _ASM_X86_SCATTERLIST_H */
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index bde3993624f1..b5ea75c4a4b4 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -118,4 +118,5 @@ ifeq ($(CONFIG_X86_64),y)
118 118
119 obj-$(CONFIG_PCI_MMCONFIG) += mmconf-fam10h_64.o 119 obj-$(CONFIG_PCI_MMCONFIG) += mmconf-fam10h_64.o
120 obj-y += vsmp_64.o 120 obj-y += vsmp_64.o
121 obj-$(CONFIG_KEXEC) += kexec-bzimage64.o
121endif 122endif
diff --git a/arch/x86/kernel/cpu/intel_cacheinfo.c b/arch/x86/kernel/cpu/intel_cacheinfo.c
index 9c8f7394c612..c7035073dfc1 100644
--- a/arch/x86/kernel/cpu/intel_cacheinfo.c
+++ b/arch/x86/kernel/cpu/intel_cacheinfo.c
@@ -461,7 +461,7 @@ static ssize_t store_cache_disable(struct _cpuid4_info *this_leaf,
461 461
462 cpu = cpumask_first(to_cpumask(this_leaf->shared_cpu_map)); 462 cpu = cpumask_first(to_cpumask(this_leaf->shared_cpu_map));
463 463
464 if (strict_strtoul(buf, 10, &val) < 0) 464 if (kstrtoul(buf, 10, &val) < 0)
465 return -EINVAL; 465 return -EINVAL;
466 466
467 err = amd_set_l3_disable_slot(this_leaf->base.nb, cpu, slot, val); 467 err = amd_set_l3_disable_slot(this_leaf->base.nb, cpu, slot, val);
@@ -511,7 +511,7 @@ store_subcaches(struct _cpuid4_info *this_leaf, const char *buf, size_t count,
511 if (!this_leaf->base.nb || !amd_nb_has_feature(AMD_NB_L3_PARTITIONING)) 511 if (!this_leaf->base.nb || !amd_nb_has_feature(AMD_NB_L3_PARTITIONING))
512 return -EINVAL; 512 return -EINVAL;
513 513
514 if (strict_strtoul(buf, 16, &val) < 0) 514 if (kstrtoul(buf, 16, &val) < 0)
515 return -EINVAL; 515 return -EINVAL;
516 516
517 if (amd_set_subcaches(cpu, val)) 517 if (amd_set_subcaches(cpu, val))
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
index 4fc57975acc1..bd9ccda8087f 100644
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -2136,7 +2136,7 @@ static ssize_t set_bank(struct device *s, struct device_attribute *attr,
2136{ 2136{
2137 u64 new; 2137 u64 new;
2138 2138
2139 if (strict_strtoull(buf, 0, &new) < 0) 2139 if (kstrtou64(buf, 0, &new) < 0)
2140 return -EINVAL; 2140 return -EINVAL;
2141 2141
2142 attr_to_bank(attr)->ctl = new; 2142 attr_to_bank(attr)->ctl = new;
@@ -2174,7 +2174,7 @@ static ssize_t set_ignore_ce(struct device *s,
2174{ 2174{
2175 u64 new; 2175 u64 new;
2176 2176
2177 if (strict_strtoull(buf, 0, &new) < 0) 2177 if (kstrtou64(buf, 0, &new) < 0)
2178 return -EINVAL; 2178 return -EINVAL;
2179 2179
2180 if (mca_cfg.ignore_ce ^ !!new) { 2180 if (mca_cfg.ignore_ce ^ !!new) {
@@ -2198,7 +2198,7 @@ static ssize_t set_cmci_disabled(struct device *s,
2198{ 2198{
2199 u64 new; 2199 u64 new;
2200 2200
2201 if (strict_strtoull(buf, 0, &new) < 0) 2201 if (kstrtou64(buf, 0, &new) < 0)
2202 return -EINVAL; 2202 return -EINVAL;
2203 2203
2204 if (mca_cfg.cmci_disabled ^ !!new) { 2204 if (mca_cfg.cmci_disabled ^ !!new) {
diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd.c b/arch/x86/kernel/cpu/mcheck/mce_amd.c
index 603df4f74640..1e49f8f41276 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_amd.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_amd.c
@@ -353,7 +353,7 @@ store_interrupt_enable(struct threshold_block *b, const char *buf, size_t size)
353 if (!b->interrupt_capable) 353 if (!b->interrupt_capable)
354 return -EINVAL; 354 return -EINVAL;
355 355
356 if (strict_strtoul(buf, 0, &new) < 0) 356 if (kstrtoul(buf, 0, &new) < 0)
357 return -EINVAL; 357 return -EINVAL;
358 358
359 b->interrupt_enable = !!new; 359 b->interrupt_enable = !!new;
@@ -372,7 +372,7 @@ store_threshold_limit(struct threshold_block *b, const char *buf, size_t size)
372 struct thresh_restart tr; 372 struct thresh_restart tr;
373 unsigned long new; 373 unsigned long new;
374 374
375 if (strict_strtoul(buf, 0, &new) < 0) 375 if (kstrtoul(buf, 0, &new) < 0)
376 return -EINVAL; 376 return -EINVAL;
377 377
378 if (new > THRESHOLD_MAX) 378 if (new > THRESHOLD_MAX)
diff --git a/arch/x86/kernel/crash.c b/arch/x86/kernel/crash.c
index 507de8066594..0553a34fa0df 100644
--- a/arch/x86/kernel/crash.c
+++ b/arch/x86/kernel/crash.c
@@ -4,9 +4,14 @@
4 * Created by: Hariprasad Nellitheertha (hari@in.ibm.com) 4 * Created by: Hariprasad Nellitheertha (hari@in.ibm.com)
5 * 5 *
6 * Copyright (C) IBM Corporation, 2004. All rights reserved. 6 * Copyright (C) IBM Corporation, 2004. All rights reserved.
7 * Copyright (C) Red Hat Inc., 2014. All rights reserved.
8 * Authors:
9 * Vivek Goyal <vgoyal@redhat.com>
7 * 10 *
8 */ 11 */
9 12
13#define pr_fmt(fmt) "kexec: " fmt
14
10#include <linux/types.h> 15#include <linux/types.h>
11#include <linux/kernel.h> 16#include <linux/kernel.h>
12#include <linux/smp.h> 17#include <linux/smp.h>
@@ -16,6 +21,7 @@
16#include <linux/elf.h> 21#include <linux/elf.h>
17#include <linux/elfcore.h> 22#include <linux/elfcore.h>
18#include <linux/module.h> 23#include <linux/module.h>
24#include <linux/slab.h>
19 25
20#include <asm/processor.h> 26#include <asm/processor.h>
21#include <asm/hardirq.h> 27#include <asm/hardirq.h>
@@ -28,6 +34,45 @@
28#include <asm/reboot.h> 34#include <asm/reboot.h>
29#include <asm/virtext.h> 35#include <asm/virtext.h>
30 36
37/* Alignment required for elf header segment */
38#define ELF_CORE_HEADER_ALIGN 4096
39
40/* This primarily represents number of split ranges due to exclusion */
41#define CRASH_MAX_RANGES 16
42
43struct crash_mem_range {
44 u64 start, end;
45};
46
47struct crash_mem {
48 unsigned int nr_ranges;
49 struct crash_mem_range ranges[CRASH_MAX_RANGES];
50};
51
52/* Misc data about ram ranges needed to prepare elf headers */
53struct crash_elf_data {
54 struct kimage *image;
55 /*
56 * Total number of ram ranges we have after various adjustments for
57 * GART, crash reserved region etc.
58 */
59 unsigned int max_nr_ranges;
60 unsigned long gart_start, gart_end;
61
62 /* Pointer to elf header */
63 void *ehdr;
64 /* Pointer to next phdr */
65 void *bufp;
66 struct crash_mem mem;
67};
68
69/* Used while preparing memory map entries for second kernel */
70struct crash_memmap_data {
71 struct boot_params *params;
72 /* Type of memory */
73 unsigned int type;
74};
75
31int in_crash_kexec; 76int in_crash_kexec;
32 77
33/* 78/*
@@ -39,6 +84,7 @@ int in_crash_kexec;
39 */ 84 */
40crash_vmclear_fn __rcu *crash_vmclear_loaded_vmcss = NULL; 85crash_vmclear_fn __rcu *crash_vmclear_loaded_vmcss = NULL;
41EXPORT_SYMBOL_GPL(crash_vmclear_loaded_vmcss); 86EXPORT_SYMBOL_GPL(crash_vmclear_loaded_vmcss);
87unsigned long crash_zero_bytes;
42 88
43static inline void cpu_crash_vmclear_loaded_vmcss(void) 89static inline void cpu_crash_vmclear_loaded_vmcss(void)
44{ 90{
@@ -135,3 +181,520 @@ void native_machine_crash_shutdown(struct pt_regs *regs)
135#endif 181#endif
136 crash_save_cpu(regs, safe_smp_processor_id()); 182 crash_save_cpu(regs, safe_smp_processor_id());
137} 183}
184
185#ifdef CONFIG_X86_64
186
187static int get_nr_ram_ranges_callback(unsigned long start_pfn,
188 unsigned long nr_pfn, void *arg)
189{
190 int *nr_ranges = arg;
191
192 (*nr_ranges)++;
193 return 0;
194}
195
196static int get_gart_ranges_callback(u64 start, u64 end, void *arg)
197{
198 struct crash_elf_data *ced = arg;
199
200 ced->gart_start = start;
201 ced->gart_end = end;
202
203 /* Not expecting more than 1 gart aperture */
204 return 1;
205}
206
207
208/* Gather all the required information to prepare elf headers for ram regions */
209static void fill_up_crash_elf_data(struct crash_elf_data *ced,
210 struct kimage *image)
211{
212 unsigned int nr_ranges = 0;
213
214 ced->image = image;
215
216 walk_system_ram_range(0, -1, &nr_ranges,
217 get_nr_ram_ranges_callback);
218
219 ced->max_nr_ranges = nr_ranges;
220
221 /*
222 * We don't create ELF headers for GART aperture as an attempt
223 * to dump this memory in second kernel leads to hang/crash.
224 * If gart aperture is present, one needs to exclude that region
225 * and that could lead to need of extra phdr.
226 */
227 walk_iomem_res("GART", IORESOURCE_MEM, 0, -1,
228 ced, get_gart_ranges_callback);
229
230 /*
231 * If we have gart region, excluding that could potentially split
232 * a memory range, resulting in extra header. Account for that.
233 */
234 if (ced->gart_end)
235 ced->max_nr_ranges++;
236
237 /* Exclusion of crash region could split memory ranges */
238 ced->max_nr_ranges++;
239
240 /* If crashk_low_res is not 0, another range split possible */
241 if (crashk_low_res.end != 0)
242 ced->max_nr_ranges++;
243}
244
245static int exclude_mem_range(struct crash_mem *mem,
246 unsigned long long mstart, unsigned long long mend)
247{
248 int i, j;
249 unsigned long long start, end;
250 struct crash_mem_range temp_range = {0, 0};
251
252 for (i = 0; i < mem->nr_ranges; i++) {
253 start = mem->ranges[i].start;
254 end = mem->ranges[i].end;
255
256 if (mstart > end || mend < start)
257 continue;
258
259 /* Truncate any area outside of range */
260 if (mstart < start)
261 mstart = start;
262 if (mend > end)
263 mend = end;
264
265 /* Found completely overlapping range */
266 if (mstart == start && mend == end) {
267 mem->ranges[i].start = 0;
268 mem->ranges[i].end = 0;
269 if (i < mem->nr_ranges - 1) {
270 /* Shift rest of the ranges to left */
271 for (j = i; j < mem->nr_ranges - 1; j++) {
272 mem->ranges[j].start =
273 mem->ranges[j+1].start;
274 mem->ranges[j].end =
275 mem->ranges[j+1].end;
276 }
277 }
278 mem->nr_ranges--;
279 return 0;
280 }
281
282 if (mstart > start && mend < end) {
283 /* Split original range */
284 mem->ranges[i].end = mstart - 1;
285 temp_range.start = mend + 1;
286 temp_range.end = end;
287 } else if (mstart != start)
288 mem->ranges[i].end = mstart - 1;
289 else
290 mem->ranges[i].start = mend + 1;
291 break;
292 }
293
294 /* If a split happend, add the split to array */
295 if (!temp_range.end)
296 return 0;
297
298 /* Split happened */
299 if (i == CRASH_MAX_RANGES - 1) {
300 pr_err("Too many crash ranges after split\n");
301 return -ENOMEM;
302 }
303
304 /* Location where new range should go */
305 j = i + 1;
306 if (j < mem->nr_ranges) {
307 /* Move over all ranges one slot towards the end */
308 for (i = mem->nr_ranges - 1; i >= j; i--)
309 mem->ranges[i + 1] = mem->ranges[i];
310 }
311
312 mem->ranges[j].start = temp_range.start;
313 mem->ranges[j].end = temp_range.end;
314 mem->nr_ranges++;
315 return 0;
316}
317
318/*
319 * Look for any unwanted ranges between mstart, mend and remove them. This
320 * might lead to split and split ranges are put in ced->mem.ranges[] array
321 */
322static int elf_header_exclude_ranges(struct crash_elf_data *ced,
323 unsigned long long mstart, unsigned long long mend)
324{
325 struct crash_mem *cmem = &ced->mem;
326 int ret = 0;
327
328 memset(cmem->ranges, 0, sizeof(cmem->ranges));
329
330 cmem->ranges[0].start = mstart;
331 cmem->ranges[0].end = mend;
332 cmem->nr_ranges = 1;
333
334 /* Exclude crashkernel region */
335 ret = exclude_mem_range(cmem, crashk_res.start, crashk_res.end);
336 if (ret)
337 return ret;
338
339 ret = exclude_mem_range(cmem, crashk_low_res.start, crashk_low_res.end);
340 if (ret)
341 return ret;
342
343 /* Exclude GART region */
344 if (ced->gart_end) {
345 ret = exclude_mem_range(cmem, ced->gart_start, ced->gart_end);
346 if (ret)
347 return ret;
348 }
349
350 return ret;
351}
352
353static int prepare_elf64_ram_headers_callback(u64 start, u64 end, void *arg)
354{
355 struct crash_elf_data *ced = arg;
356 Elf64_Ehdr *ehdr;
357 Elf64_Phdr *phdr;
358 unsigned long mstart, mend;
359 struct kimage *image = ced->image;
360 struct crash_mem *cmem;
361 int ret, i;
362
363 ehdr = ced->ehdr;
364
365 /* Exclude unwanted mem ranges */
366 ret = elf_header_exclude_ranges(ced, start, end);
367 if (ret)
368 return ret;
369
370 /* Go through all the ranges in ced->mem.ranges[] and prepare phdr */
371 cmem = &ced->mem;
372
373 for (i = 0; i < cmem->nr_ranges; i++) {
374 mstart = cmem->ranges[i].start;
375 mend = cmem->ranges[i].end;
376
377 phdr = ced->bufp;
378 ced->bufp += sizeof(Elf64_Phdr);
379
380 phdr->p_type = PT_LOAD;
381 phdr->p_flags = PF_R|PF_W|PF_X;
382 phdr->p_offset = mstart;
383
384 /*
385 * If a range matches backup region, adjust offset to backup
386 * segment.
387 */
388 if (mstart == image->arch.backup_src_start &&
389 (mend - mstart + 1) == image->arch.backup_src_sz)
390 phdr->p_offset = image->arch.backup_load_addr;
391
392 phdr->p_paddr = mstart;
393 phdr->p_vaddr = (unsigned long long) __va(mstart);
394 phdr->p_filesz = phdr->p_memsz = mend - mstart + 1;
395 phdr->p_align = 0;
396 ehdr->e_phnum++;
397 pr_debug("Crash PT_LOAD elf header. phdr=%p vaddr=0x%llx, paddr=0x%llx, sz=0x%llx e_phnum=%d p_offset=0x%llx\n",
398 phdr, phdr->p_vaddr, phdr->p_paddr, phdr->p_filesz,
399 ehdr->e_phnum, phdr->p_offset);
400 }
401
402 return ret;
403}
404
405static int prepare_elf64_headers(struct crash_elf_data *ced,
406 void **addr, unsigned long *sz)
407{
408 Elf64_Ehdr *ehdr;
409 Elf64_Phdr *phdr;
410 unsigned long nr_cpus = num_possible_cpus(), nr_phdr, elf_sz;
411 unsigned char *buf, *bufp;
412 unsigned int cpu;
413 unsigned long long notes_addr;
414 int ret;
415
416 /* extra phdr for vmcoreinfo elf note */
417 nr_phdr = nr_cpus + 1;
418 nr_phdr += ced->max_nr_ranges;
419
420 /*
421 * kexec-tools creates an extra PT_LOAD phdr for kernel text mapping
422 * area on x86_64 (ffffffff80000000 - ffffffffa0000000).
423 * I think this is required by tools like gdb. So same physical
424 * memory will be mapped in two elf headers. One will contain kernel
425 * text virtual addresses and other will have __va(physical) addresses.
426 */
427
428 nr_phdr++;
429 elf_sz = sizeof(Elf64_Ehdr) + nr_phdr * sizeof(Elf64_Phdr);
430 elf_sz = ALIGN(elf_sz, ELF_CORE_HEADER_ALIGN);
431
432 buf = vzalloc(elf_sz);
433 if (!buf)
434 return -ENOMEM;
435
436 bufp = buf;
437 ehdr = (Elf64_Ehdr *)bufp;
438 bufp += sizeof(Elf64_Ehdr);
439 memcpy(ehdr->e_ident, ELFMAG, SELFMAG);
440 ehdr->e_ident[EI_CLASS] = ELFCLASS64;
441 ehdr->e_ident[EI_DATA] = ELFDATA2LSB;
442 ehdr->e_ident[EI_VERSION] = EV_CURRENT;
443 ehdr->e_ident[EI_OSABI] = ELF_OSABI;
444 memset(ehdr->e_ident + EI_PAD, 0, EI_NIDENT - EI_PAD);
445 ehdr->e_type = ET_CORE;
446 ehdr->e_machine = ELF_ARCH;
447 ehdr->e_version = EV_CURRENT;
448 ehdr->e_phoff = sizeof(Elf64_Ehdr);
449 ehdr->e_ehsize = sizeof(Elf64_Ehdr);
450 ehdr->e_phentsize = sizeof(Elf64_Phdr);
451
452 /* Prepare one phdr of type PT_NOTE for each present cpu */
453 for_each_present_cpu(cpu) {
454 phdr = (Elf64_Phdr *)bufp;
455 bufp += sizeof(Elf64_Phdr);
456 phdr->p_type = PT_NOTE;
457 notes_addr = per_cpu_ptr_to_phys(per_cpu_ptr(crash_notes, cpu));
458 phdr->p_offset = phdr->p_paddr = notes_addr;
459 phdr->p_filesz = phdr->p_memsz = sizeof(note_buf_t);
460 (ehdr->e_phnum)++;
461 }
462
463 /* Prepare one PT_NOTE header for vmcoreinfo */
464 phdr = (Elf64_Phdr *)bufp;
465 bufp += sizeof(Elf64_Phdr);
466 phdr->p_type = PT_NOTE;
467 phdr->p_offset = phdr->p_paddr = paddr_vmcoreinfo_note();
468 phdr->p_filesz = phdr->p_memsz = sizeof(vmcoreinfo_note);
469 (ehdr->e_phnum)++;
470
471#ifdef CONFIG_X86_64
472 /* Prepare PT_LOAD type program header for kernel text region */
473 phdr = (Elf64_Phdr *)bufp;
474 bufp += sizeof(Elf64_Phdr);
475 phdr->p_type = PT_LOAD;
476 phdr->p_flags = PF_R|PF_W|PF_X;
477 phdr->p_vaddr = (Elf64_Addr)_text;
478 phdr->p_filesz = phdr->p_memsz = _end - _text;
479 phdr->p_offset = phdr->p_paddr = __pa_symbol(_text);
480 (ehdr->e_phnum)++;
481#endif
482
483 /* Prepare PT_LOAD headers for system ram chunks. */
484 ced->ehdr = ehdr;
485 ced->bufp = bufp;
486 ret = walk_system_ram_res(0, -1, ced,
487 prepare_elf64_ram_headers_callback);
488 if (ret < 0)
489 return ret;
490
491 *addr = buf;
492 *sz = elf_sz;
493 return 0;
494}
495
496/* Prepare elf headers. Return addr and size */
497static int prepare_elf_headers(struct kimage *image, void **addr,
498 unsigned long *sz)
499{
500 struct crash_elf_data *ced;
501 int ret;
502
503 ced = kzalloc(sizeof(*ced), GFP_KERNEL);
504 if (!ced)
505 return -ENOMEM;
506
507 fill_up_crash_elf_data(ced, image);
508
509 /* By default prepare 64bit headers */
510 ret = prepare_elf64_headers(ced, addr, sz);
511 kfree(ced);
512 return ret;
513}
514
515static int add_e820_entry(struct boot_params *params, struct e820entry *entry)
516{
517 unsigned int nr_e820_entries;
518
519 nr_e820_entries = params->e820_entries;
520 if (nr_e820_entries >= E820MAX)
521 return 1;
522
523 memcpy(&params->e820_map[nr_e820_entries], entry,
524 sizeof(struct e820entry));
525 params->e820_entries++;
526 return 0;
527}
528
529static int memmap_entry_callback(u64 start, u64 end, void *arg)
530{
531 struct crash_memmap_data *cmd = arg;
532 struct boot_params *params = cmd->params;
533 struct e820entry ei;
534
535 ei.addr = start;
536 ei.size = end - start + 1;
537 ei.type = cmd->type;
538 add_e820_entry(params, &ei);
539
540 return 0;
541}
542
543static int memmap_exclude_ranges(struct kimage *image, struct crash_mem *cmem,
544 unsigned long long mstart,
545 unsigned long long mend)
546{
547 unsigned long start, end;
548 int ret = 0;
549
550 cmem->ranges[0].start = mstart;
551 cmem->ranges[0].end = mend;
552 cmem->nr_ranges = 1;
553
554 /* Exclude Backup region */
555 start = image->arch.backup_load_addr;
556 end = start + image->arch.backup_src_sz - 1;
557 ret = exclude_mem_range(cmem, start, end);
558 if (ret)
559 return ret;
560
561 /* Exclude elf header region */
562 start = image->arch.elf_load_addr;
563 end = start + image->arch.elf_headers_sz - 1;
564 return exclude_mem_range(cmem, start, end);
565}
566
567/* Prepare memory map for crash dump kernel */
568int crash_setup_memmap_entries(struct kimage *image, struct boot_params *params)
569{
570 int i, ret = 0;
571 unsigned long flags;
572 struct e820entry ei;
573 struct crash_memmap_data cmd;
574 struct crash_mem *cmem;
575
576 cmem = vzalloc(sizeof(struct crash_mem));
577 if (!cmem)
578 return -ENOMEM;
579
580 memset(&cmd, 0, sizeof(struct crash_memmap_data));
581 cmd.params = params;
582
583 /* Add first 640K segment */
584 ei.addr = image->arch.backup_src_start;
585 ei.size = image->arch.backup_src_sz;
586 ei.type = E820_RAM;
587 add_e820_entry(params, &ei);
588
589 /* Add ACPI tables */
590 cmd.type = E820_ACPI;
591 flags = IORESOURCE_MEM | IORESOURCE_BUSY;
592 walk_iomem_res("ACPI Tables", flags, 0, -1, &cmd,
593 memmap_entry_callback);
594
595 /* Add ACPI Non-volatile Storage */
596 cmd.type = E820_NVS;
597 walk_iomem_res("ACPI Non-volatile Storage", flags, 0, -1, &cmd,
598 memmap_entry_callback);
599
600 /* Add crashk_low_res region */
601 if (crashk_low_res.end) {
602 ei.addr = crashk_low_res.start;
603 ei.size = crashk_low_res.end - crashk_low_res.start + 1;
604 ei.type = E820_RAM;
605 add_e820_entry(params, &ei);
606 }
607
608 /* Exclude some ranges from crashk_res and add rest to memmap */
609 ret = memmap_exclude_ranges(image, cmem, crashk_res.start,
610 crashk_res.end);
611 if (ret)
612 goto out;
613
614 for (i = 0; i < cmem->nr_ranges; i++) {
615 ei.size = cmem->ranges[i].end - cmem->ranges[i].start + 1;
616
617 /* If entry is less than a page, skip it */
618 if (ei.size < PAGE_SIZE)
619 continue;
620 ei.addr = cmem->ranges[i].start;
621 ei.type = E820_RAM;
622 add_e820_entry(params, &ei);
623 }
624
625out:
626 vfree(cmem);
627 return ret;
628}
629
630static int determine_backup_region(u64 start, u64 end, void *arg)
631{
632 struct kimage *image = arg;
633
634 image->arch.backup_src_start = start;
635 image->arch.backup_src_sz = end - start + 1;
636
637 /* Expecting only one range for backup region */
638 return 1;
639}
640
641int crash_load_segments(struct kimage *image)
642{
643 unsigned long src_start, src_sz, elf_sz;
644 void *elf_addr;
645 int ret;
646
647 /*
648 * Determine and load a segment for backup area. First 640K RAM
649 * region is backup source
650 */
651
652 ret = walk_system_ram_res(KEXEC_BACKUP_SRC_START, KEXEC_BACKUP_SRC_END,
653 image, determine_backup_region);
654
655 /* Zero or postive return values are ok */
656 if (ret < 0)
657 return ret;
658
659 src_start = image->arch.backup_src_start;
660 src_sz = image->arch.backup_src_sz;
661
662 /* Add backup segment. */
663 if (src_sz) {
664 /*
665 * Ideally there is no source for backup segment. This is
666 * copied in purgatory after crash. Just add a zero filled
667 * segment for now to make sure checksum logic works fine.
668 */
669 ret = kexec_add_buffer(image, (char *)&crash_zero_bytes,
670 sizeof(crash_zero_bytes), src_sz,
671 PAGE_SIZE, 0, -1, 0,
672 &image->arch.backup_load_addr);
673 if (ret)
674 return ret;
675 pr_debug("Loaded backup region at 0x%lx backup_start=0x%lx memsz=0x%lx\n",
676 image->arch.backup_load_addr, src_start, src_sz);
677 }
678
679 /* Prepare elf headers and add a segment */
680 ret = prepare_elf_headers(image, &elf_addr, &elf_sz);
681 if (ret)
682 return ret;
683
684 image->arch.elf_headers = elf_addr;
685 image->arch.elf_headers_sz = elf_sz;
686
687 ret = kexec_add_buffer(image, (char *)elf_addr, elf_sz, elf_sz,
688 ELF_CORE_HEADER_ALIGN, 0, -1, 0,
689 &image->arch.elf_load_addr);
690 if (ret) {
691 vfree((void *)image->arch.elf_headers);
692 return ret;
693 }
694 pr_debug("Loaded ELF headers at 0x%lx bufsz=0x%lx memsz=0x%lx\n",
695 image->arch.elf_load_addr, elf_sz, elf_sz);
696
697 return ret;
698}
699
700#endif /* CONFIG_X86_64 */
diff --git a/arch/x86/kernel/kexec-bzimage64.c b/arch/x86/kernel/kexec-bzimage64.c
new file mode 100644
index 000000000000..9642b9b33655
--- /dev/null
+++ b/arch/x86/kernel/kexec-bzimage64.c
@@ -0,0 +1,553 @@
1/*
2 * Kexec bzImage loader
3 *
4 * Copyright (C) 2014 Red Hat Inc.
5 * Authors:
6 * Vivek Goyal <vgoyal@redhat.com>
7 *
8 * This source code is licensed under the GNU General Public License,
9 * Version 2. See the file COPYING for more details.
10 */
11
12#define pr_fmt(fmt) "kexec-bzImage64: " fmt
13
14#include <linux/string.h>
15#include <linux/printk.h>
16#include <linux/errno.h>
17#include <linux/slab.h>
18#include <linux/kexec.h>
19#include <linux/kernel.h>
20#include <linux/mm.h>
21#include <linux/efi.h>
22#include <linux/verify_pefile.h>
23#include <keys/system_keyring.h>
24
25#include <asm/bootparam.h>
26#include <asm/setup.h>
27#include <asm/crash.h>
28#include <asm/efi.h>
29
30#define MAX_ELFCOREHDR_STR_LEN 30 /* elfcorehdr=0x<64bit-value> */
31
32/*
33 * Defines lowest physical address for various segments. Not sure where
34 * exactly these limits came from. Current bzimage64 loader in kexec-tools
35 * uses these so I am retaining it. It can be changed over time as we gain
36 * more insight.
37 */
38#define MIN_PURGATORY_ADDR 0x3000
39#define MIN_BOOTPARAM_ADDR 0x3000
40#define MIN_KERNEL_LOAD_ADDR 0x100000
41#define MIN_INITRD_LOAD_ADDR 0x1000000
42
43/*
44 * This is a place holder for all boot loader specific data structure which
45 * gets allocated in one call but gets freed much later during cleanup
46 * time. Right now there is only one field but it can grow as need be.
47 */
48struct bzimage64_data {
49 /*
50 * Temporary buffer to hold bootparams buffer. This should be
51 * freed once the bootparam segment has been loaded.
52 */
53 void *bootparams_buf;
54};
55
56static int setup_initrd(struct boot_params *params,
57 unsigned long initrd_load_addr, unsigned long initrd_len)
58{
59 params->hdr.ramdisk_image = initrd_load_addr & 0xffffffffUL;
60 params->hdr.ramdisk_size = initrd_len & 0xffffffffUL;
61
62 params->ext_ramdisk_image = initrd_load_addr >> 32;
63 params->ext_ramdisk_size = initrd_len >> 32;
64
65 return 0;
66}
67
68static int setup_cmdline(struct kimage *image, struct boot_params *params,
69 unsigned long bootparams_load_addr,
70 unsigned long cmdline_offset, char *cmdline,
71 unsigned long cmdline_len)
72{
73 char *cmdline_ptr = ((char *)params) + cmdline_offset;
74 unsigned long cmdline_ptr_phys, len;
75 uint32_t cmdline_low_32, cmdline_ext_32;
76
77 memcpy(cmdline_ptr, cmdline, cmdline_len);
78 if (image->type == KEXEC_TYPE_CRASH) {
79 len = sprintf(cmdline_ptr + cmdline_len - 1,
80 " elfcorehdr=0x%lx", image->arch.elf_load_addr);
81 cmdline_len += len;
82 }
83 cmdline_ptr[cmdline_len - 1] = '\0';
84
85 pr_debug("Final command line is: %s\n", cmdline_ptr);
86 cmdline_ptr_phys = bootparams_load_addr + cmdline_offset;
87 cmdline_low_32 = cmdline_ptr_phys & 0xffffffffUL;
88 cmdline_ext_32 = cmdline_ptr_phys >> 32;
89
90 params->hdr.cmd_line_ptr = cmdline_low_32;
91 if (cmdline_ext_32)
92 params->ext_cmd_line_ptr = cmdline_ext_32;
93
94 return 0;
95}
96
97static int setup_e820_entries(struct boot_params *params)
98{
99 unsigned int nr_e820_entries;
100
101 nr_e820_entries = e820_saved.nr_map;
102
103 /* TODO: Pass entries more than E820MAX in bootparams setup data */
104 if (nr_e820_entries > E820MAX)
105 nr_e820_entries = E820MAX;
106
107 params->e820_entries = nr_e820_entries;
108 memcpy(&params->e820_map, &e820_saved.map,
109 nr_e820_entries * sizeof(struct e820entry));
110
111 return 0;
112}
113
114#ifdef CONFIG_EFI
115static int setup_efi_info_memmap(struct boot_params *params,
116 unsigned long params_load_addr,
117 unsigned int efi_map_offset,
118 unsigned int efi_map_sz)
119{
120 void *efi_map = (void *)params + efi_map_offset;
121 unsigned long efi_map_phys_addr = params_load_addr + efi_map_offset;
122 struct efi_info *ei = &params->efi_info;
123
124 if (!efi_map_sz)
125 return 0;
126
127 efi_runtime_map_copy(efi_map, efi_map_sz);
128
129 ei->efi_memmap = efi_map_phys_addr & 0xffffffff;
130 ei->efi_memmap_hi = efi_map_phys_addr >> 32;
131 ei->efi_memmap_size = efi_map_sz;
132
133 return 0;
134}
135
136static int
137prepare_add_efi_setup_data(struct boot_params *params,
138 unsigned long params_load_addr,
139 unsigned int efi_setup_data_offset)
140{
141 unsigned long setup_data_phys;
142 struct setup_data *sd = (void *)params + efi_setup_data_offset;
143 struct efi_setup_data *esd = (void *)sd + sizeof(struct setup_data);
144
145 esd->fw_vendor = efi.fw_vendor;
146 esd->runtime = efi.runtime;
147 esd->tables = efi.config_table;
148 esd->smbios = efi.smbios;
149
150 sd->type = SETUP_EFI;
151 sd->len = sizeof(struct efi_setup_data);
152
153 /* Add setup data */
154 setup_data_phys = params_load_addr + efi_setup_data_offset;
155 sd->next = params->hdr.setup_data;
156 params->hdr.setup_data = setup_data_phys;
157
158 return 0;
159}
160
161static int
162setup_efi_state(struct boot_params *params, unsigned long params_load_addr,
163 unsigned int efi_map_offset, unsigned int efi_map_sz,
164 unsigned int efi_setup_data_offset)
165{
166 struct efi_info *current_ei = &boot_params.efi_info;
167 struct efi_info *ei = &params->efi_info;
168
169 if (!current_ei->efi_memmap_size)
170 return 0;
171
172 /*
173 * If 1:1 mapping is not enabled, second kernel can not setup EFI
174 * and use EFI run time services. User space will have to pass
175 * acpi_rsdp=<addr> on kernel command line to make second kernel boot
176 * without efi.
177 */
178 if (efi_enabled(EFI_OLD_MEMMAP))
179 return 0;
180
181 ei->efi_loader_signature = current_ei->efi_loader_signature;
182 ei->efi_systab = current_ei->efi_systab;
183 ei->efi_systab_hi = current_ei->efi_systab_hi;
184
185 ei->efi_memdesc_version = current_ei->efi_memdesc_version;
186 ei->efi_memdesc_size = efi_get_runtime_map_desc_size();
187
188 setup_efi_info_memmap(params, params_load_addr, efi_map_offset,
189 efi_map_sz);
190 prepare_add_efi_setup_data(params, params_load_addr,
191 efi_setup_data_offset);
192 return 0;
193}
194#endif /* CONFIG_EFI */
195
196static int
197setup_boot_parameters(struct kimage *image, struct boot_params *params,
198 unsigned long params_load_addr,
199 unsigned int efi_map_offset, unsigned int efi_map_sz,
200 unsigned int efi_setup_data_offset)
201{
202 unsigned int nr_e820_entries;
203 unsigned long long mem_k, start, end;
204 int i, ret = 0;
205
206 /* Get subarch from existing bootparams */
207 params->hdr.hardware_subarch = boot_params.hdr.hardware_subarch;
208
209 /* Copying screen_info will do? */
210 memcpy(&params->screen_info, &boot_params.screen_info,
211 sizeof(struct screen_info));
212
213 /* Fill in memsize later */
214 params->screen_info.ext_mem_k = 0;
215 params->alt_mem_k = 0;
216
217 /* Default APM info */
218 memset(&params->apm_bios_info, 0, sizeof(params->apm_bios_info));
219
220 /* Default drive info */
221 memset(&params->hd0_info, 0, sizeof(params->hd0_info));
222 memset(&params->hd1_info, 0, sizeof(params->hd1_info));
223
224 /* Default sysdesc table */
225 params->sys_desc_table.length = 0;
226
227 if (image->type == KEXEC_TYPE_CRASH) {
228 ret = crash_setup_memmap_entries(image, params);
229 if (ret)
230 return ret;
231 } else
232 setup_e820_entries(params);
233
234 nr_e820_entries = params->e820_entries;
235
236 for (i = 0; i < nr_e820_entries; i++) {
237 if (params->e820_map[i].type != E820_RAM)
238 continue;
239 start = params->e820_map[i].addr;
240 end = params->e820_map[i].addr + params->e820_map[i].size - 1;
241
242 if ((start <= 0x100000) && end > 0x100000) {
243 mem_k = (end >> 10) - (0x100000 >> 10);
244 params->screen_info.ext_mem_k = mem_k;
245 params->alt_mem_k = mem_k;
246 if (mem_k > 0xfc00)
247 params->screen_info.ext_mem_k = 0xfc00; /* 64M*/
248 if (mem_k > 0xffffffff)
249 params->alt_mem_k = 0xffffffff;
250 }
251 }
252
253#ifdef CONFIG_EFI
254 /* Setup EFI state */
255 setup_efi_state(params, params_load_addr, efi_map_offset, efi_map_sz,
256 efi_setup_data_offset);
257#endif
258
259 /* Setup EDD info */
260 memcpy(params->eddbuf, boot_params.eddbuf,
261 EDDMAXNR * sizeof(struct edd_info));
262 params->eddbuf_entries = boot_params.eddbuf_entries;
263
264 memcpy(params->edd_mbr_sig_buffer, boot_params.edd_mbr_sig_buffer,
265 EDD_MBR_SIG_MAX * sizeof(unsigned int));
266
267 return ret;
268}
269
270int bzImage64_probe(const char *buf, unsigned long len)
271{
272 int ret = -ENOEXEC;
273 struct setup_header *header;
274
275 /* kernel should be atleast two sectors long */
276 if (len < 2 * 512) {
277 pr_err("File is too short to be a bzImage\n");
278 return ret;
279 }
280
281 header = (struct setup_header *)(buf + offsetof(struct boot_params, hdr));
282 if (memcmp((char *)&header->header, "HdrS", 4) != 0) {
283 pr_err("Not a bzImage\n");
284 return ret;
285 }
286
287 if (header->boot_flag != 0xAA55) {
288 pr_err("No x86 boot sector present\n");
289 return ret;
290 }
291
292 if (header->version < 0x020C) {
293 pr_err("Must be at least protocol version 2.12\n");
294 return ret;
295 }
296
297 if (!(header->loadflags & LOADED_HIGH)) {
298 pr_err("zImage not a bzImage\n");
299 return ret;
300 }
301
302 if (!(header->xloadflags & XLF_KERNEL_64)) {
303 pr_err("Not a bzImage64. XLF_KERNEL_64 is not set.\n");
304 return ret;
305 }
306
307 if (!(header->xloadflags & XLF_CAN_BE_LOADED_ABOVE_4G)) {
308 pr_err("XLF_CAN_BE_LOADED_ABOVE_4G is not set.\n");
309 return ret;
310 }
311
312 /*
313 * Can't handle 32bit EFI as it does not allow loading kernel
314 * above 4G. This should be handled by 32bit bzImage loader
315 */
316 if (efi_enabled(EFI_RUNTIME_SERVICES) && !efi_enabled(EFI_64BIT)) {
317 pr_debug("EFI is 32 bit. Can't load kernel above 4G.\n");
318 return ret;
319 }
320
321 /* I've got a bzImage */
322 pr_debug("It's a relocatable bzImage64\n");
323 ret = 0;
324
325 return ret;
326}
327
328void *bzImage64_load(struct kimage *image, char *kernel,
329 unsigned long kernel_len, char *initrd,
330 unsigned long initrd_len, char *cmdline,
331 unsigned long cmdline_len)
332{
333
334 struct setup_header *header;
335 int setup_sects, kern16_size, ret = 0;
336 unsigned long setup_header_size, params_cmdline_sz, params_misc_sz;
337 struct boot_params *params;
338 unsigned long bootparam_load_addr, kernel_load_addr, initrd_load_addr;
339 unsigned long purgatory_load_addr;
340 unsigned long kernel_bufsz, kernel_memsz, kernel_align;
341 char *kernel_buf;
342 struct bzimage64_data *ldata;
343 struct kexec_entry64_regs regs64;
344 void *stack;
345 unsigned int setup_hdr_offset = offsetof(struct boot_params, hdr);
346 unsigned int efi_map_offset, efi_map_sz, efi_setup_data_offset;
347
348 header = (struct setup_header *)(kernel + setup_hdr_offset);
349 setup_sects = header->setup_sects;
350 if (setup_sects == 0)
351 setup_sects = 4;
352
353 kern16_size = (setup_sects + 1) * 512;
354 if (kernel_len < kern16_size) {
355 pr_err("bzImage truncated\n");
356 return ERR_PTR(-ENOEXEC);
357 }
358
359 if (cmdline_len > header->cmdline_size) {
360 pr_err("Kernel command line too long\n");
361 return ERR_PTR(-EINVAL);
362 }
363
364 /*
365 * In case of crash dump, we will append elfcorehdr=<addr> to
366 * command line. Make sure it does not overflow
367 */
368 if (cmdline_len + MAX_ELFCOREHDR_STR_LEN > header->cmdline_size) {
369 pr_debug("Appending elfcorehdr=<addr> to command line exceeds maximum allowed length\n");
370 return ERR_PTR(-EINVAL);
371 }
372
373 /* Allocate and load backup region */
374 if (image->type == KEXEC_TYPE_CRASH) {
375 ret = crash_load_segments(image);
376 if (ret)
377 return ERR_PTR(ret);
378 }
379
380 /*
381 * Load purgatory. For 64bit entry point, purgatory code can be
382 * anywhere.
383 */
384 ret = kexec_load_purgatory(image, MIN_PURGATORY_ADDR, ULONG_MAX, 1,
385 &purgatory_load_addr);
386 if (ret) {
387 pr_err("Loading purgatory failed\n");
388 return ERR_PTR(ret);
389 }
390
391 pr_debug("Loaded purgatory at 0x%lx\n", purgatory_load_addr);
392
393
394 /*
395 * Load Bootparams and cmdline and space for efi stuff.
396 *
397 * Allocate memory together for multiple data structures so
398 * that they all can go in single area/segment and we don't
399 * have to create separate segment for each. Keeps things
400 * little bit simple
401 */
402 efi_map_sz = efi_get_runtime_map_size();
403 efi_map_sz = ALIGN(efi_map_sz, 16);
404 params_cmdline_sz = sizeof(struct boot_params) + cmdline_len +
405 MAX_ELFCOREHDR_STR_LEN;
406 params_cmdline_sz = ALIGN(params_cmdline_sz, 16);
407 params_misc_sz = params_cmdline_sz + efi_map_sz +
408 sizeof(struct setup_data) +
409 sizeof(struct efi_setup_data);
410
411 params = kzalloc(params_misc_sz, GFP_KERNEL);
412 if (!params)
413 return ERR_PTR(-ENOMEM);
414 efi_map_offset = params_cmdline_sz;
415 efi_setup_data_offset = efi_map_offset + efi_map_sz;
416
417 /* Copy setup header onto bootparams. Documentation/x86/boot.txt */
418 setup_header_size = 0x0202 + kernel[0x0201] - setup_hdr_offset;
419
420 /* Is there a limit on setup header size? */
421 memcpy(&params->hdr, (kernel + setup_hdr_offset), setup_header_size);
422
423 ret = kexec_add_buffer(image, (char *)params, params_misc_sz,
424 params_misc_sz, 16, MIN_BOOTPARAM_ADDR,
425 ULONG_MAX, 1, &bootparam_load_addr);
426 if (ret)
427 goto out_free_params;
428 pr_debug("Loaded boot_param, command line and misc at 0x%lx bufsz=0x%lx memsz=0x%lx\n",
429 bootparam_load_addr, params_misc_sz, params_misc_sz);
430
431 /* Load kernel */
432 kernel_buf = kernel + kern16_size;
433 kernel_bufsz = kernel_len - kern16_size;
434 kernel_memsz = PAGE_ALIGN(header->init_size);
435 kernel_align = header->kernel_alignment;
436
437 ret = kexec_add_buffer(image, kernel_buf,
438 kernel_bufsz, kernel_memsz, kernel_align,
439 MIN_KERNEL_LOAD_ADDR, ULONG_MAX, 1,
440 &kernel_load_addr);
441 if (ret)
442 goto out_free_params;
443
444 pr_debug("Loaded 64bit kernel at 0x%lx bufsz=0x%lx memsz=0x%lx\n",
445 kernel_load_addr, kernel_memsz, kernel_memsz);
446
447 /* Load initrd high */
448 if (initrd) {
449 ret = kexec_add_buffer(image, initrd, initrd_len, initrd_len,
450 PAGE_SIZE, MIN_INITRD_LOAD_ADDR,
451 ULONG_MAX, 1, &initrd_load_addr);
452 if (ret)
453 goto out_free_params;
454
455 pr_debug("Loaded initrd at 0x%lx bufsz=0x%lx memsz=0x%lx\n",
456 initrd_load_addr, initrd_len, initrd_len);
457
458 setup_initrd(params, initrd_load_addr, initrd_len);
459 }
460
461 setup_cmdline(image, params, bootparam_load_addr,
462 sizeof(struct boot_params), cmdline, cmdline_len);
463
464 /* bootloader info. Do we need a separate ID for kexec kernel loader? */
465 params->hdr.type_of_loader = 0x0D << 4;
466 params->hdr.loadflags = 0;
467
468 /* Setup purgatory regs for entry */
469 ret = kexec_purgatory_get_set_symbol(image, "entry64_regs", &regs64,
470 sizeof(regs64), 1);
471 if (ret)
472 goto out_free_params;
473
474 regs64.rbx = 0; /* Bootstrap Processor */
475 regs64.rsi = bootparam_load_addr;
476 regs64.rip = kernel_load_addr + 0x200;
477 stack = kexec_purgatory_get_symbol_addr(image, "stack_end");
478 if (IS_ERR(stack)) {
479 pr_err("Could not find address of symbol stack_end\n");
480 ret = -EINVAL;
481 goto out_free_params;
482 }
483
484 regs64.rsp = (unsigned long)stack;
485 ret = kexec_purgatory_get_set_symbol(image, "entry64_regs", &regs64,
486 sizeof(regs64), 0);
487 if (ret)
488 goto out_free_params;
489
490 ret = setup_boot_parameters(image, params, bootparam_load_addr,
491 efi_map_offset, efi_map_sz,
492 efi_setup_data_offset);
493 if (ret)
494 goto out_free_params;
495
496 /* Allocate loader specific data */
497 ldata = kzalloc(sizeof(struct bzimage64_data), GFP_KERNEL);
498 if (!ldata) {
499 ret = -ENOMEM;
500 goto out_free_params;
501 }
502
503 /*
504 * Store pointer to params so that it could be freed after loading
505 * params segment has been loaded and contents have been copied
506 * somewhere else.
507 */
508 ldata->bootparams_buf = params;
509 return ldata;
510
511out_free_params:
512 kfree(params);
513 return ERR_PTR(ret);
514}
515
516/* This cleanup function is called after various segments have been loaded */
517int bzImage64_cleanup(void *loader_data)
518{
519 struct bzimage64_data *ldata = loader_data;
520
521 if (!ldata)
522 return 0;
523
524 kfree(ldata->bootparams_buf);
525 ldata->bootparams_buf = NULL;
526
527 return 0;
528}
529
530#ifdef CONFIG_KEXEC_BZIMAGE_VERIFY_SIG
531int bzImage64_verify_sig(const char *kernel, unsigned long kernel_len)
532{
533 bool trusted;
534 int ret;
535
536 ret = verify_pefile_signature(kernel, kernel_len,
537 system_trusted_keyring, &trusted);
538 if (ret < 0)
539 return ret;
540 if (!trusted)
541 return -EKEYREJECTED;
542 return 0;
543}
544#endif
545
546struct kexec_file_ops kexec_bzImage64_ops = {
547 .probe = bzImage64_probe,
548 .load = bzImage64_load,
549 .cleanup = bzImage64_cleanup,
550#ifdef CONFIG_KEXEC_BZIMAGE_VERIFY_SIG
551 .verify_sig = bzImage64_verify_sig,
552#endif
553};
diff --git a/arch/x86/kernel/machine_kexec_64.c b/arch/x86/kernel/machine_kexec_64.c
index 679cef0791cd..8b04018e5d1f 100644
--- a/arch/x86/kernel/machine_kexec_64.c
+++ b/arch/x86/kernel/machine_kexec_64.c
@@ -6,6 +6,8 @@
6 * Version 2. See the file COPYING for more details. 6 * Version 2. See the file COPYING for more details.
7 */ 7 */
8 8
9#define pr_fmt(fmt) "kexec: " fmt
10
9#include <linux/mm.h> 11#include <linux/mm.h>
10#include <linux/kexec.h> 12#include <linux/kexec.h>
11#include <linux/string.h> 13#include <linux/string.h>
@@ -21,6 +23,11 @@
21#include <asm/tlbflush.h> 23#include <asm/tlbflush.h>
22#include <asm/mmu_context.h> 24#include <asm/mmu_context.h>
23#include <asm/debugreg.h> 25#include <asm/debugreg.h>
26#include <asm/kexec-bzimage64.h>
27
28static struct kexec_file_ops *kexec_file_loaders[] = {
29 &kexec_bzImage64_ops,
30};
24 31
25static void free_transition_pgtable(struct kimage *image) 32static void free_transition_pgtable(struct kimage *image)
26{ 33{
@@ -171,6 +178,38 @@ static void load_segments(void)
171 ); 178 );
172} 179}
173 180
181/* Update purgatory as needed after various image segments have been prepared */
182static int arch_update_purgatory(struct kimage *image)
183{
184 int ret = 0;
185
186 if (!image->file_mode)
187 return 0;
188
189 /* Setup copying of backup region */
190 if (image->type == KEXEC_TYPE_CRASH) {
191 ret = kexec_purgatory_get_set_symbol(image, "backup_dest",
192 &image->arch.backup_load_addr,
193 sizeof(image->arch.backup_load_addr), 0);
194 if (ret)
195 return ret;
196
197 ret = kexec_purgatory_get_set_symbol(image, "backup_src",
198 &image->arch.backup_src_start,
199 sizeof(image->arch.backup_src_start), 0);
200 if (ret)
201 return ret;
202
203 ret = kexec_purgatory_get_set_symbol(image, "backup_sz",
204 &image->arch.backup_src_sz,
205 sizeof(image->arch.backup_src_sz), 0);
206 if (ret)
207 return ret;
208 }
209
210 return ret;
211}
212
174int machine_kexec_prepare(struct kimage *image) 213int machine_kexec_prepare(struct kimage *image)
175{ 214{
176 unsigned long start_pgtable; 215 unsigned long start_pgtable;
@@ -184,6 +223,11 @@ int machine_kexec_prepare(struct kimage *image)
184 if (result) 223 if (result)
185 return result; 224 return result;
186 225
226 /* update purgatory as needed */
227 result = arch_update_purgatory(image);
228 if (result)
229 return result;
230
187 return 0; 231 return 0;
188} 232}
189 233
@@ -283,3 +327,198 @@ void arch_crash_save_vmcoreinfo(void)
283 (unsigned long)&_text - __START_KERNEL); 327 (unsigned long)&_text - __START_KERNEL);
284} 328}
285 329
330/* arch-dependent functionality related to kexec file-based syscall */
331
332int arch_kexec_kernel_image_probe(struct kimage *image, void *buf,
333 unsigned long buf_len)
334{
335 int i, ret = -ENOEXEC;
336 struct kexec_file_ops *fops;
337
338 for (i = 0; i < ARRAY_SIZE(kexec_file_loaders); i++) {
339 fops = kexec_file_loaders[i];
340 if (!fops || !fops->probe)
341 continue;
342
343 ret = fops->probe(buf, buf_len);
344 if (!ret) {
345 image->fops = fops;
346 return ret;
347 }
348 }
349
350 return ret;
351}
352
353void *arch_kexec_kernel_image_load(struct kimage *image)
354{
355 vfree(image->arch.elf_headers);
356 image->arch.elf_headers = NULL;
357
358 if (!image->fops || !image->fops->load)
359 return ERR_PTR(-ENOEXEC);
360
361 return image->fops->load(image, image->kernel_buf,
362 image->kernel_buf_len, image->initrd_buf,
363 image->initrd_buf_len, image->cmdline_buf,
364 image->cmdline_buf_len);
365}
366
367int arch_kimage_file_post_load_cleanup(struct kimage *image)
368{
369 if (!image->fops || !image->fops->cleanup)
370 return 0;
371
372 return image->fops->cleanup(image->image_loader_data);
373}
374
375int arch_kexec_kernel_verify_sig(struct kimage *image, void *kernel,
376 unsigned long kernel_len)
377{
378 if (!image->fops || !image->fops->verify_sig) {
379 pr_debug("kernel loader does not support signature verification.");
380 return -EKEYREJECTED;
381 }
382
383 return image->fops->verify_sig(kernel, kernel_len);
384}
385
386/*
387 * Apply purgatory relocations.
388 *
389 * ehdr: Pointer to elf headers
390 * sechdrs: Pointer to section headers.
391 * relsec: section index of SHT_RELA section.
392 *
393 * TODO: Some of the code belongs to generic code. Move that in kexec.c.
394 */
395int arch_kexec_apply_relocations_add(const Elf64_Ehdr *ehdr,
396 Elf64_Shdr *sechdrs, unsigned int relsec)
397{
398 unsigned int i;
399 Elf64_Rela *rel;
400 Elf64_Sym *sym;
401 void *location;
402 Elf64_Shdr *section, *symtabsec;
403 unsigned long address, sec_base, value;
404 const char *strtab, *name, *shstrtab;
405
406 /*
407 * ->sh_offset has been modified to keep the pointer to section
408 * contents in memory
409 */
410 rel = (void *)sechdrs[relsec].sh_offset;
411
412 /* Section to which relocations apply */
413 section = &sechdrs[sechdrs[relsec].sh_info];
414
415 pr_debug("Applying relocate section %u to %u\n", relsec,
416 sechdrs[relsec].sh_info);
417
418 /* Associated symbol table */
419 symtabsec = &sechdrs[sechdrs[relsec].sh_link];
420
421 /* String table */
422 if (symtabsec->sh_link >= ehdr->e_shnum) {
423 /* Invalid strtab section number */
424 pr_err("Invalid string table section index %d\n",
425 symtabsec->sh_link);
426 return -ENOEXEC;
427 }
428
429 strtab = (char *)sechdrs[symtabsec->sh_link].sh_offset;
430
431 /* section header string table */
432 shstrtab = (char *)sechdrs[ehdr->e_shstrndx].sh_offset;
433
434 for (i = 0; i < sechdrs[relsec].sh_size / sizeof(*rel); i++) {
435
436 /*
437 * rel[i].r_offset contains byte offset from beginning
438 * of section to the storage unit affected.
439 *
440 * This is location to update (->sh_offset). This is temporary
441 * buffer where section is currently loaded. This will finally
442 * be loaded to a different address later, pointed to by
443 * ->sh_addr. kexec takes care of moving it
444 * (kexec_load_segment()).
445 */
446 location = (void *)(section->sh_offset + rel[i].r_offset);
447
448 /* Final address of the location */
449 address = section->sh_addr + rel[i].r_offset;
450
451 /*
452 * rel[i].r_info contains information about symbol table index
453 * w.r.t which relocation must be made and type of relocation
454 * to apply. ELF64_R_SYM() and ELF64_R_TYPE() macros get
455 * these respectively.
456 */
457 sym = (Elf64_Sym *)symtabsec->sh_offset +
458 ELF64_R_SYM(rel[i].r_info);
459
460 if (sym->st_name)
461 name = strtab + sym->st_name;
462 else
463 name = shstrtab + sechdrs[sym->st_shndx].sh_name;
464
465 pr_debug("Symbol: %s info: %02x shndx: %02x value=%llx size: %llx\n",
466 name, sym->st_info, sym->st_shndx, sym->st_value,
467 sym->st_size);
468
469 if (sym->st_shndx == SHN_UNDEF) {
470 pr_err("Undefined symbol: %s\n", name);
471 return -ENOEXEC;
472 }
473
474 if (sym->st_shndx == SHN_COMMON) {
475 pr_err("symbol '%s' in common section\n", name);
476 return -ENOEXEC;
477 }
478
479 if (sym->st_shndx == SHN_ABS)
480 sec_base = 0;
481 else if (sym->st_shndx >= ehdr->e_shnum) {
482 pr_err("Invalid section %d for symbol %s\n",
483 sym->st_shndx, name);
484 return -ENOEXEC;
485 } else
486 sec_base = sechdrs[sym->st_shndx].sh_addr;
487
488 value = sym->st_value;
489 value += sec_base;
490 value += rel[i].r_addend;
491
492 switch (ELF64_R_TYPE(rel[i].r_info)) {
493 case R_X86_64_NONE:
494 break;
495 case R_X86_64_64:
496 *(u64 *)location = value;
497 break;
498 case R_X86_64_32:
499 *(u32 *)location = value;
500 if (value != *(u32 *)location)
501 goto overflow;
502 break;
503 case R_X86_64_32S:
504 *(s32 *)location = value;
505 if ((s64)value != *(s32 *)location)
506 goto overflow;
507 break;
508 case R_X86_64_PC32:
509 value -= (u64)address;
510 *(u32 *)location = value;
511 break;
512 default:
513 pr_err("Unknown rela relocation: %llu\n",
514 ELF64_R_TYPE(rel[i].r_info));
515 return -ENOEXEC;
516 }
517 }
518 return 0;
519
520overflow:
521 pr_err("Overflow in relocation type %d value 0x%lx\n",
522 (int)ELF64_R_TYPE(rel[i].r_info), value);
523 return -ENOEXEC;
524}
diff --git a/arch/x86/kvm/mmu_audit.c b/arch/x86/kvm/mmu_audit.c
index 1185fe7a7f47..9ade5cfb5a4c 100644
--- a/arch/x86/kvm/mmu_audit.c
+++ b/arch/x86/kvm/mmu_audit.c
@@ -273,7 +273,7 @@ static int mmu_audit_set(const char *val, const struct kernel_param *kp)
273 int ret; 273 int ret;
274 unsigned long enable; 274 unsigned long enable;
275 275
276 ret = strict_strtoul(val, 10, &enable); 276 ret = kstrtoul(val, 10, &enable);
277 if (ret < 0) 277 if (ret < 0)
278 return -EINVAL; 278 return -EINVAL;
279 279
diff --git a/arch/x86/platform/uv/tlb_uv.c b/arch/x86/platform/uv/tlb_uv.c
index ed161c6e278b..3968d67d366b 100644
--- a/arch/x86/platform/uv/tlb_uv.c
+++ b/arch/x86/platform/uv/tlb_uv.c
@@ -1479,7 +1479,7 @@ static ssize_t ptc_proc_write(struct file *file, const char __user *user,
1479 return count; 1479 return count;
1480 } 1480 }
1481 1481
1482 if (strict_strtol(optstr, 10, &input_arg) < 0) { 1482 if (kstrtol(optstr, 10, &input_arg) < 0) {
1483 printk(KERN_DEBUG "%s is invalid\n", optstr); 1483 printk(KERN_DEBUG "%s is invalid\n", optstr);
1484 return -EINVAL; 1484 return -EINVAL;
1485 } 1485 }
diff --git a/arch/x86/purgatory/Makefile b/arch/x86/purgatory/Makefile
new file mode 100644
index 000000000000..7fde9ee438a4
--- /dev/null
+++ b/arch/x86/purgatory/Makefile
@@ -0,0 +1,30 @@
1purgatory-y := purgatory.o stack.o setup-x86_$(BITS).o sha256.o entry64.o string.o
2
3targets += $(purgatory-y)
4PURGATORY_OBJS = $(addprefix $(obj)/,$(purgatory-y))
5
6LDFLAGS_purgatory.ro := -e purgatory_start -r --no-undefined -nostdlib -z nodefaultlib
7targets += purgatory.ro
8
9# Default KBUILD_CFLAGS can have -pg option set when FTRACE is enabled. That
10# in turn leaves some undefined symbols like __fentry__ in purgatory and not
11# sure how to relocate those. Like kexec-tools, use custom flags.
12
13KBUILD_CFLAGS := -fno-strict-aliasing -Wall -Wstrict-prototypes -fno-zero-initialized-in-bss -fno-builtin -ffreestanding -c -MD -Os -mcmodel=large
14
15$(obj)/purgatory.ro: $(PURGATORY_OBJS) FORCE
16 $(call if_changed,ld)
17
18targets += kexec-purgatory.c
19
20quiet_cmd_bin2c = BIN2C $@
21 cmd_bin2c = cat $(obj)/purgatory.ro | $(objtree)/scripts/basic/bin2c kexec_purgatory > $(obj)/kexec-purgatory.c
22
23$(obj)/kexec-purgatory.c: $(obj)/purgatory.ro FORCE
24 $(call if_changed,bin2c)
25
26
27# No loaders for 32bits yet.
28ifeq ($(CONFIG_X86_64),y)
29 obj-$(CONFIG_KEXEC) += kexec-purgatory.o
30endif
diff --git a/arch/x86/purgatory/entry64.S b/arch/x86/purgatory/entry64.S
new file mode 100644
index 000000000000..d1a4291d3568
--- /dev/null
+++ b/arch/x86/purgatory/entry64.S
@@ -0,0 +1,101 @@
1/*
2 * Copyright (C) 2003,2004 Eric Biederman (ebiederm@xmission.com)
3 * Copyright (C) 2014 Red Hat Inc.
4
5 * Author(s): Vivek Goyal <vgoyal@redhat.com>
6 *
7 * This code has been taken from kexec-tools.
8 *
9 * This source code is licensed under the GNU General Public License,
10 * Version 2. See the file COPYING for more details.
11 */
12
13 .text
14 .balign 16
15 .code64
16 .globl entry64, entry64_regs
17
18
19entry64:
20 /* Setup a gdt that should be preserved */
21 lgdt gdt(%rip)
22
23 /* load the data segments */
24 movl $0x18, %eax /* data segment */
25 movl %eax, %ds
26 movl %eax, %es
27 movl %eax, %ss
28 movl %eax, %fs
29 movl %eax, %gs
30
31 /* Setup new stack */
32 leaq stack_init(%rip), %rsp
33 pushq $0x10 /* CS */
34 leaq new_cs_exit(%rip), %rax
35 pushq %rax
36 lretq
37new_cs_exit:
38
39 /* Load the registers */
40 movq rax(%rip), %rax
41 movq rbx(%rip), %rbx
42 movq rcx(%rip), %rcx
43 movq rdx(%rip), %rdx
44 movq rsi(%rip), %rsi
45 movq rdi(%rip), %rdi
46 movq rsp(%rip), %rsp
47 movq rbp(%rip), %rbp
48 movq r8(%rip), %r8
49 movq r9(%rip), %r9
50 movq r10(%rip), %r10
51 movq r11(%rip), %r11
52 movq r12(%rip), %r12
53 movq r13(%rip), %r13
54 movq r14(%rip), %r14
55 movq r15(%rip), %r15
56
57 /* Jump to the new code... */
58 jmpq *rip(%rip)
59
60 .section ".rodata"
61 .balign 4
62entry64_regs:
63rax: .quad 0x0
64rcx: .quad 0x0
65rdx: .quad 0x0
66rbx: .quad 0x0
67rsp: .quad 0x0
68rbp: .quad 0x0
69rsi: .quad 0x0
70rdi: .quad 0x0
71r8: .quad 0x0
72r9: .quad 0x0
73r10: .quad 0x0
74r11: .quad 0x0
75r12: .quad 0x0
76r13: .quad 0x0
77r14: .quad 0x0
78r15: .quad 0x0
79rip: .quad 0x0
80 .size entry64_regs, . - entry64_regs
81
82 /* GDT */
83 .section ".rodata"
84 .balign 16
85gdt:
86 /* 0x00 unusable segment
87 * 0x08 unused
88 * so use them as gdt ptr
89 */
90 .word gdt_end - gdt - 1
91 .quad gdt
92 .word 0, 0, 0
93
94 /* 0x10 4GB flat code segment */
95 .word 0xFFFF, 0x0000, 0x9A00, 0x00AF
96
97 /* 0x18 4GB flat data segment */
98 .word 0xFFFF, 0x0000, 0x9200, 0x00CF
99gdt_end:
100stack: .quad 0, 0
101stack_init:
diff --git a/arch/x86/purgatory/purgatory.c b/arch/x86/purgatory/purgatory.c
new file mode 100644
index 000000000000..25e068ba3382
--- /dev/null
+++ b/arch/x86/purgatory/purgatory.c
@@ -0,0 +1,72 @@
1/*
2 * purgatory: Runs between two kernels
3 *
4 * Copyright (C) 2014 Red Hat Inc.
5 *
6 * Author:
7 * Vivek Goyal <vgoyal@redhat.com>
8 *
9 * This source code is licensed under the GNU General Public License,
10 * Version 2. See the file COPYING for more details.
11 */
12
13#include "sha256.h"
14#include "../boot/string.h"
15
16struct sha_region {
17 unsigned long start;
18 unsigned long len;
19};
20
21unsigned long backup_dest = 0;
22unsigned long backup_src = 0;
23unsigned long backup_sz = 0;
24
25u8 sha256_digest[SHA256_DIGEST_SIZE] = { 0 };
26
27struct sha_region sha_regions[16] = {};
28
29/*
30 * On x86, second kernel requries first 640K of memory to boot. Copy
31 * first 640K to a backup region in reserved memory range so that second
32 * kernel can use first 640K.
33 */
34static int copy_backup_region(void)
35{
36 if (backup_dest)
37 memcpy((void *)backup_dest, (void *)backup_src, backup_sz);
38
39 return 0;
40}
41
42int verify_sha256_digest(void)
43{
44 struct sha_region *ptr, *end;
45 u8 digest[SHA256_DIGEST_SIZE];
46 struct sha256_state sctx;
47
48 sha256_init(&sctx);
49 end = &sha_regions[sizeof(sha_regions)/sizeof(sha_regions[0])];
50 for (ptr = sha_regions; ptr < end; ptr++)
51 sha256_update(&sctx, (uint8_t *)(ptr->start), ptr->len);
52
53 sha256_final(&sctx, digest);
54
55 if (memcmp(digest, sha256_digest, sizeof(digest)))
56 return 1;
57
58 return 0;
59}
60
61void purgatory(void)
62{
63 int ret;
64
65 ret = verify_sha256_digest();
66 if (ret) {
67 /* loop forever */
68 for (;;)
69 ;
70 }
71 copy_backup_region();
72}
diff --git a/arch/x86/purgatory/setup-x86_64.S b/arch/x86/purgatory/setup-x86_64.S
new file mode 100644
index 000000000000..fe3c91ba1bd0
--- /dev/null
+++ b/arch/x86/purgatory/setup-x86_64.S
@@ -0,0 +1,58 @@
1/*
2 * purgatory: setup code
3 *
4 * Copyright (C) 2003,2004 Eric Biederman (ebiederm@xmission.com)
5 * Copyright (C) 2014 Red Hat Inc.
6 *
7 * This code has been taken from kexec-tools.
8 *
9 * This source code is licensed under the GNU General Public License,
10 * Version 2. See the file COPYING for more details.
11 */
12
13 .text
14 .globl purgatory_start
15 .balign 16
16purgatory_start:
17 .code64
18
19 /* Load a gdt so I know what the segment registers are */
20 lgdt gdt(%rip)
21
22 /* load the data segments */
23 movl $0x18, %eax /* data segment */
24 movl %eax, %ds
25 movl %eax, %es
26 movl %eax, %ss
27 movl %eax, %fs
28 movl %eax, %gs
29
30 /* Setup a stack */
31 leaq lstack_end(%rip), %rsp
32
33 /* Call the C code */
34 call purgatory
35 jmp entry64
36
37 .section ".rodata"
38 .balign 16
39gdt: /* 0x00 unusable segment
40 * 0x08 unused
41 * so use them as the gdt ptr
42 */
43 .word gdt_end - gdt - 1
44 .quad gdt
45 .word 0, 0, 0
46
47 /* 0x10 4GB flat code segment */
48 .word 0xFFFF, 0x0000, 0x9A00, 0x00AF
49
50 /* 0x18 4GB flat data segment */
51 .word 0xFFFF, 0x0000, 0x9200, 0x00CF
52gdt_end:
53
54 .bss
55 .balign 4096
56lstack:
57 .skip 4096
58lstack_end:
diff --git a/arch/x86/purgatory/sha256.c b/arch/x86/purgatory/sha256.c
new file mode 100644
index 000000000000..548ca675a14a
--- /dev/null
+++ b/arch/x86/purgatory/sha256.c
@@ -0,0 +1,283 @@
1/*
2 * SHA-256, as specified in
3 * http://csrc.nist.gov/groups/STM/cavp/documents/shs/sha256-384-512.pdf
4 *
5 * SHA-256 code by Jean-Luc Cooke <jlcooke@certainkey.com>.
6 *
7 * Copyright (c) Jean-Luc Cooke <jlcooke@certainkey.com>
8 * Copyright (c) Andrew McDonald <andrew@mcdonald.org.uk>
9 * Copyright (c) 2002 James Morris <jmorris@intercode.com.au>
10 * Copyright (c) 2014 Red Hat Inc.
11 *
12 * This program is free software; you can redistribute it and/or modify it
13 * under the terms of the GNU General Public License as published by the Free
14 * Software Foundation; either version 2 of the License, or (at your option)
15 * any later version.
16 */
17
18#include <linux/bitops.h>
19#include <asm/byteorder.h>
20#include "sha256.h"
21#include "../boot/string.h"
22
23static inline u32 Ch(u32 x, u32 y, u32 z)
24{
25 return z ^ (x & (y ^ z));
26}
27
28static inline u32 Maj(u32 x, u32 y, u32 z)
29{
30 return (x & y) | (z & (x | y));
31}
32
33#define e0(x) (ror32(x, 2) ^ ror32(x, 13) ^ ror32(x, 22))
34#define e1(x) (ror32(x, 6) ^ ror32(x, 11) ^ ror32(x, 25))
35#define s0(x) (ror32(x, 7) ^ ror32(x, 18) ^ (x >> 3))
36#define s1(x) (ror32(x, 17) ^ ror32(x, 19) ^ (x >> 10))
37
38static inline void LOAD_OP(int I, u32 *W, const u8 *input)
39{
40 W[I] = __be32_to_cpu(((__be32 *)(input))[I]);
41}
42
43static inline void BLEND_OP(int I, u32 *W)
44{
45 W[I] = s1(W[I-2]) + W[I-7] + s0(W[I-15]) + W[I-16];
46}
47
48static void sha256_transform(u32 *state, const u8 *input)
49{
50 u32 a, b, c, d, e, f, g, h, t1, t2;
51 u32 W[64];
52 int i;
53
54 /* load the input */
55 for (i = 0; i < 16; i++)
56 LOAD_OP(i, W, input);
57
58 /* now blend */
59 for (i = 16; i < 64; i++)
60 BLEND_OP(i, W);
61
62 /* load the state into our registers */
63 a = state[0]; b = state[1]; c = state[2]; d = state[3];
64 e = state[4]; f = state[5]; g = state[6]; h = state[7];
65
66 /* now iterate */
67 t1 = h + e1(e) + Ch(e, f, g) + 0x428a2f98 + W[0];
68 t2 = e0(a) + Maj(a, b, c); d += t1; h = t1 + t2;
69 t1 = g + e1(d) + Ch(d, e, f) + 0x71374491 + W[1];
70 t2 = e0(h) + Maj(h, a, b); c += t1; g = t1 + t2;
71 t1 = f + e1(c) + Ch(c, d, e) + 0xb5c0fbcf + W[2];
72 t2 = e0(g) + Maj(g, h, a); b += t1; f = t1 + t2;
73 t1 = e + e1(b) + Ch(b, c, d) + 0xe9b5dba5 + W[3];
74 t2 = e0(f) + Maj(f, g, h); a += t1; e = t1 + t2;
75 t1 = d + e1(a) + Ch(a, b, c) + 0x3956c25b + W[4];
76 t2 = e0(e) + Maj(e, f, g); h += t1; d = t1 + t2;
77 t1 = c + e1(h) + Ch(h, a, b) + 0x59f111f1 + W[5];
78 t2 = e0(d) + Maj(d, e, f); g += t1; c = t1 + t2;
79 t1 = b + e1(g) + Ch(g, h, a) + 0x923f82a4 + W[6];
80 t2 = e0(c) + Maj(c, d, e); f += t1; b = t1 + t2;
81 t1 = a + e1(f) + Ch(f, g, h) + 0xab1c5ed5 + W[7];
82 t2 = e0(b) + Maj(b, c, d); e += t1; a = t1 + t2;
83
84 t1 = h + e1(e) + Ch(e, f, g) + 0xd807aa98 + W[8];
85 t2 = e0(a) + Maj(a, b, c); d += t1; h = t1 + t2;
86 t1 = g + e1(d) + Ch(d, e, f) + 0x12835b01 + W[9];
87 t2 = e0(h) + Maj(h, a, b); c += t1; g = t1 + t2;
88 t1 = f + e1(c) + Ch(c, d, e) + 0x243185be + W[10];
89 t2 = e0(g) + Maj(g, h, a); b += t1; f = t1 + t2;
90 t1 = e + e1(b) + Ch(b, c, d) + 0x550c7dc3 + W[11];
91 t2 = e0(f) + Maj(f, g, h); a += t1; e = t1 + t2;
92 t1 = d + e1(a) + Ch(a, b, c) + 0x72be5d74 + W[12];
93 t2 = e0(e) + Maj(e, f, g); h += t1; d = t1 + t2;
94 t1 = c + e1(h) + Ch(h, a, b) + 0x80deb1fe + W[13];
95 t2 = e0(d) + Maj(d, e, f); g += t1; c = t1 + t2;
96 t1 = b + e1(g) + Ch(g, h, a) + 0x9bdc06a7 + W[14];
97 t2 = e0(c) + Maj(c, d, e); f += t1; b = t1 + t2;
98 t1 = a + e1(f) + Ch(f, g, h) + 0xc19bf174 + W[15];
99 t2 = e0(b) + Maj(b, c, d); e += t1; a = t1+t2;
100
101 t1 = h + e1(e) + Ch(e, f, g) + 0xe49b69c1 + W[16];
102 t2 = e0(a) + Maj(a, b, c); d += t1; h = t1+t2;
103 t1 = g + e1(d) + Ch(d, e, f) + 0xefbe4786 + W[17];
104 t2 = e0(h) + Maj(h, a, b); c += t1; g = t1+t2;
105 t1 = f + e1(c) + Ch(c, d, e) + 0x0fc19dc6 + W[18];
106 t2 = e0(g) + Maj(g, h, a); b += t1; f = t1+t2;
107 t1 = e + e1(b) + Ch(b, c, d) + 0x240ca1cc + W[19];
108 t2 = e0(f) + Maj(f, g, h); a += t1; e = t1+t2;
109 t1 = d + e1(a) + Ch(a, b, c) + 0x2de92c6f + W[20];
110 t2 = e0(e) + Maj(e, f, g); h += t1; d = t1+t2;
111 t1 = c + e1(h) + Ch(h, a, b) + 0x4a7484aa + W[21];
112 t2 = e0(d) + Maj(d, e, f); g += t1; c = t1+t2;
113 t1 = b + e1(g) + Ch(g, h, a) + 0x5cb0a9dc + W[22];
114 t2 = e0(c) + Maj(c, d, e); f += t1; b = t1+t2;
115 t1 = a + e1(f) + Ch(f, g, h) + 0x76f988da + W[23];
116 t2 = e0(b) + Maj(b, c, d); e += t1; a = t1+t2;
117
118 t1 = h + e1(e) + Ch(e, f, g) + 0x983e5152 + W[24];
119 t2 = e0(a) + Maj(a, b, c); d += t1; h = t1+t2;
120 t1 = g + e1(d) + Ch(d, e, f) + 0xa831c66d + W[25];
121 t2 = e0(h) + Maj(h, a, b); c += t1; g = t1+t2;
122 t1 = f + e1(c) + Ch(c, d, e) + 0xb00327c8 + W[26];
123 t2 = e0(g) + Maj(g, h, a); b += t1; f = t1+t2;
124 t1 = e + e1(b) + Ch(b, c, d) + 0xbf597fc7 + W[27];
125 t2 = e0(f) + Maj(f, g, h); a += t1; e = t1+t2;
126 t1 = d + e1(a) + Ch(a, b, c) + 0xc6e00bf3 + W[28];
127 t2 = e0(e) + Maj(e, f, g); h += t1; d = t1+t2;
128 t1 = c + e1(h) + Ch(h, a, b) + 0xd5a79147 + W[29];
129 t2 = e0(d) + Maj(d, e, f); g += t1; c = t1+t2;
130 t1 = b + e1(g) + Ch(g, h, a) + 0x06ca6351 + W[30];
131 t2 = e0(c) + Maj(c, d, e); f += t1; b = t1+t2;
132 t1 = a + e1(f) + Ch(f, g, h) + 0x14292967 + W[31];
133 t2 = e0(b) + Maj(b, c, d); e += t1; a = t1+t2;
134
135 t1 = h + e1(e) + Ch(e, f, g) + 0x27b70a85 + W[32];
136 t2 = e0(a) + Maj(a, b, c); d += t1; h = t1+t2;
137 t1 = g + e1(d) + Ch(d, e, f) + 0x2e1b2138 + W[33];
138 t2 = e0(h) + Maj(h, a, b); c += t1; g = t1+t2;
139 t1 = f + e1(c) + Ch(c, d, e) + 0x4d2c6dfc + W[34];
140 t2 = e0(g) + Maj(g, h, a); b += t1; f = t1+t2;
141 t1 = e + e1(b) + Ch(b, c, d) + 0x53380d13 + W[35];
142 t2 = e0(f) + Maj(f, g, h); a += t1; e = t1+t2;
143 t1 = d + e1(a) + Ch(a, b, c) + 0x650a7354 + W[36];
144 t2 = e0(e) + Maj(e, f, g); h += t1; d = t1+t2;
145 t1 = c + e1(h) + Ch(h, a, b) + 0x766a0abb + W[37];
146 t2 = e0(d) + Maj(d, e, f); g += t1; c = t1+t2;
147 t1 = b + e1(g) + Ch(g, h, a) + 0x81c2c92e + W[38];
148 t2 = e0(c) + Maj(c, d, e); f += t1; b = t1+t2;
149 t1 = a + e1(f) + Ch(f, g, h) + 0x92722c85 + W[39];
150 t2 = e0(b) + Maj(b, c, d); e += t1; a = t1+t2;
151
152 t1 = h + e1(e) + Ch(e, f, g) + 0xa2bfe8a1 + W[40];
153 t2 = e0(a) + Maj(a, b, c); d += t1; h = t1+t2;
154 t1 = g + e1(d) + Ch(d, e, f) + 0xa81a664b + W[41];
155 t2 = e0(h) + Maj(h, a, b); c += t1; g = t1+t2;
156 t1 = f + e1(c) + Ch(c, d, e) + 0xc24b8b70 + W[42];
157 t2 = e0(g) + Maj(g, h, a); b += t1; f = t1+t2;
158 t1 = e + e1(b) + Ch(b, c, d) + 0xc76c51a3 + W[43];
159 t2 = e0(f) + Maj(f, g, h); a += t1; e = t1+t2;
160 t1 = d + e1(a) + Ch(a, b, c) + 0xd192e819 + W[44];
161 t2 = e0(e) + Maj(e, f, g); h += t1; d = t1+t2;
162 t1 = c + e1(h) + Ch(h, a, b) + 0xd6990624 + W[45];
163 t2 = e0(d) + Maj(d, e, f); g += t1; c = t1+t2;
164 t1 = b + e1(g) + Ch(g, h, a) + 0xf40e3585 + W[46];
165 t2 = e0(c) + Maj(c, d, e); f += t1; b = t1+t2;
166 t1 = a + e1(f) + Ch(f, g, h) + 0x106aa070 + W[47];
167 t2 = e0(b) + Maj(b, c, d); e += t1; a = t1+t2;
168
169 t1 = h + e1(e) + Ch(e, f, g) + 0x19a4c116 + W[48];
170 t2 = e0(a) + Maj(a, b, c); d += t1; h = t1+t2;
171 t1 = g + e1(d) + Ch(d, e, f) + 0x1e376c08 + W[49];
172 t2 = e0(h) + Maj(h, a, b); c += t1; g = t1+t2;
173 t1 = f + e1(c) + Ch(c, d, e) + 0x2748774c + W[50];
174 t2 = e0(g) + Maj(g, h, a); b += t1; f = t1+t2;
175 t1 = e + e1(b) + Ch(b, c, d) + 0x34b0bcb5 + W[51];
176 t2 = e0(f) + Maj(f, g, h); a += t1; e = t1+t2;
177 t1 = d + e1(a) + Ch(a, b, c) + 0x391c0cb3 + W[52];
178 t2 = e0(e) + Maj(e, f, g); h += t1; d = t1+t2;
179 t1 = c + e1(h) + Ch(h, a, b) + 0x4ed8aa4a + W[53];
180 t2 = e0(d) + Maj(d, e, f); g += t1; c = t1+t2;
181 t1 = b + e1(g) + Ch(g, h, a) + 0x5b9cca4f + W[54];
182 t2 = e0(c) + Maj(c, d, e); f += t1; b = t1+t2;
183 t1 = a + e1(f) + Ch(f, g, h) + 0x682e6ff3 + W[55];
184 t2 = e0(b) + Maj(b, c, d); e += t1; a = t1+t2;
185
186 t1 = h + e1(e) + Ch(e, f, g) + 0x748f82ee + W[56];
187 t2 = e0(a) + Maj(a, b, c); d += t1; h = t1+t2;
188 t1 = g + e1(d) + Ch(d, e, f) + 0x78a5636f + W[57];
189 t2 = e0(h) + Maj(h, a, b); c += t1; g = t1+t2;
190 t1 = f + e1(c) + Ch(c, d, e) + 0x84c87814 + W[58];
191 t2 = e0(g) + Maj(g, h, a); b += t1; f = t1+t2;
192 t1 = e + e1(b) + Ch(b, c, d) + 0x8cc70208 + W[59];
193 t2 = e0(f) + Maj(f, g, h); a += t1; e = t1+t2;
194 t1 = d + e1(a) + Ch(a, b, c) + 0x90befffa + W[60];
195 t2 = e0(e) + Maj(e, f, g); h += t1; d = t1+t2;
196 t1 = c + e1(h) + Ch(h, a, b) + 0xa4506ceb + W[61];
197 t2 = e0(d) + Maj(d, e, f); g += t1; c = t1+t2;
198 t1 = b + e1(g) + Ch(g, h, a) + 0xbef9a3f7 + W[62];
199 t2 = e0(c) + Maj(c, d, e); f += t1; b = t1+t2;
200 t1 = a + e1(f) + Ch(f, g, h) + 0xc67178f2 + W[63];
201 t2 = e0(b) + Maj(b, c, d); e += t1; a = t1+t2;
202
203 state[0] += a; state[1] += b; state[2] += c; state[3] += d;
204 state[4] += e; state[5] += f; state[6] += g; state[7] += h;
205
206 /* clear any sensitive info... */
207 a = b = c = d = e = f = g = h = t1 = t2 = 0;
208 memset(W, 0, 64 * sizeof(u32));
209}
210
211int sha256_init(struct sha256_state *sctx)
212{
213 sctx->state[0] = SHA256_H0;
214 sctx->state[1] = SHA256_H1;
215 sctx->state[2] = SHA256_H2;
216 sctx->state[3] = SHA256_H3;
217 sctx->state[4] = SHA256_H4;
218 sctx->state[5] = SHA256_H5;
219 sctx->state[6] = SHA256_H6;
220 sctx->state[7] = SHA256_H7;
221 sctx->count = 0;
222
223 return 0;
224}
225
226int sha256_update(struct sha256_state *sctx, const u8 *data, unsigned int len)
227{
228 unsigned int partial, done;
229 const u8 *src;
230
231 partial = sctx->count & 0x3f;
232 sctx->count += len;
233 done = 0;
234 src = data;
235
236 if ((partial + len) > 63) {
237 if (partial) {
238 done = -partial;
239 memcpy(sctx->buf + partial, data, done + 64);
240 src = sctx->buf;
241 }
242
243 do {
244 sha256_transform(sctx->state, src);
245 done += 64;
246 src = data + done;
247 } while (done + 63 < len);
248
249 partial = 0;
250 }
251 memcpy(sctx->buf + partial, src, len - done);
252
253 return 0;
254}
255
256int sha256_final(struct sha256_state *sctx, u8 *out)
257{
258 __be32 *dst = (__be32 *)out;
259 __be64 bits;
260 unsigned int index, pad_len;
261 int i;
262 static const u8 padding[64] = { 0x80, };
263
264 /* Save number of bits */
265 bits = cpu_to_be64(sctx->count << 3);
266
267 /* Pad out to 56 mod 64. */
268 index = sctx->count & 0x3f;
269 pad_len = (index < 56) ? (56 - index) : ((64+56) - index);
270 sha256_update(sctx, padding, pad_len);
271
272 /* Append length (before padding) */
273 sha256_update(sctx, (const u8 *)&bits, sizeof(bits));
274
275 /* Store state in digest */
276 for (i = 0; i < 8; i++)
277 dst[i] = cpu_to_be32(sctx->state[i]);
278
279 /* Zeroize sensitive information. */
280 memset(sctx, 0, sizeof(*sctx));
281
282 return 0;
283}
diff --git a/arch/x86/purgatory/sha256.h b/arch/x86/purgatory/sha256.h
new file mode 100644
index 000000000000..bd15a4127735
--- /dev/null
+++ b/arch/x86/purgatory/sha256.h
@@ -0,0 +1,22 @@
1/*
2 * Copyright (C) 2014 Red Hat Inc.
3 *
4 * Author: Vivek Goyal <vgoyal@redhat.com>
5 *
6 * This source code is licensed under the GNU General Public License,
7 * Version 2. See the file COPYING for more details.
8 */
9
10#ifndef SHA256_H
11#define SHA256_H
12
13
14#include <linux/types.h>
15#include <crypto/sha.h>
16
17extern int sha256_init(struct sha256_state *sctx);
18extern int sha256_update(struct sha256_state *sctx, const u8 *input,
19 unsigned int length);
20extern int sha256_final(struct sha256_state *sctx, u8 *hash);
21
22#endif /* SHA256_H */
diff --git a/arch/x86/purgatory/stack.S b/arch/x86/purgatory/stack.S
new file mode 100644
index 000000000000..3cefba1fefc8
--- /dev/null
+++ b/arch/x86/purgatory/stack.S
@@ -0,0 +1,19 @@
1/*
2 * purgatory: stack
3 *
4 * Copyright (C) 2014 Red Hat Inc.
5 *
6 * This source code is licensed under the GNU General Public License,
7 * Version 2. See the file COPYING for more details.
8 */
9
10 /* A stack for the loaded kernel.
11 * Seperate and in the data section so it can be prepopulated.
12 */
13 .data
14 .balign 4096
15 .globl stack, stack_end
16
17stack:
18 .skip 4096
19stack_end:
diff --git a/arch/x86/purgatory/string.c b/arch/x86/purgatory/string.c
new file mode 100644
index 000000000000..d886b1fa36f0
--- /dev/null
+++ b/arch/x86/purgatory/string.c
@@ -0,0 +1,13 @@
1/*
2 * Simple string functions.
3 *
4 * Copyright (C) 2014 Red Hat Inc.
5 *
6 * Author:
7 * Vivek Goyal <vgoyal@redhat.com>
8 *
9 * This source code is licensed under the GNU General Public License,
10 * Version 2. See the file COPYING for more details.
11 */
12
13#include "../boot/string.c"
diff --git a/arch/x86/syscalls/syscall_32.tbl b/arch/x86/syscalls/syscall_32.tbl
index d1b4a119d4a5..028b78168d85 100644
--- a/arch/x86/syscalls/syscall_32.tbl
+++ b/arch/x86/syscalls/syscall_32.tbl
@@ -362,3 +362,4 @@
362353 i386 renameat2 sys_renameat2 362353 i386 renameat2 sys_renameat2
363354 i386 seccomp sys_seccomp 363354 i386 seccomp sys_seccomp
364355 i386 getrandom sys_getrandom 364355 i386 getrandom sys_getrandom
365356 i386 memfd_create sys_memfd_create
diff --git a/arch/x86/syscalls/syscall_64.tbl b/arch/x86/syscalls/syscall_64.tbl
index 252c804bb1aa..35dd922727b9 100644
--- a/arch/x86/syscalls/syscall_64.tbl
+++ b/arch/x86/syscalls/syscall_64.tbl
@@ -325,6 +325,8 @@
325316 common renameat2 sys_renameat2 325316 common renameat2 sys_renameat2
326317 common seccomp sys_seccomp 326317 common seccomp sys_seccomp
327318 common getrandom sys_getrandom 327318 common getrandom sys_getrandom
328319 common memfd_create sys_memfd_create
329320 common kexec_file_load sys_kexec_file_load
328 330
329# 331#
330# x32-specific system call numbers start at 512 to avoid cache impact 332# x32-specific system call numbers start at 512 to avoid cache impact
diff --git a/arch/x86/um/asm/elf.h b/arch/x86/um/asm/elf.h
index 0feee2fd5077..25a1022dd793 100644
--- a/arch/x86/um/asm/elf.h
+++ b/arch/x86/um/asm/elf.h
@@ -216,6 +216,5 @@ extern long elf_aux_hwcap;
216#define ELF_HWCAP (elf_aux_hwcap) 216#define ELF_HWCAP (elf_aux_hwcap)
217 217
218#define SET_PERSONALITY(ex) do ; while(0) 218#define SET_PERSONALITY(ex) do ; while(0)
219#define __HAVE_ARCH_GATE_AREA 1
220 219
221#endif 220#endif
diff --git a/arch/x86/um/mem_64.c b/arch/x86/um/mem_64.c
index c6492e75797b..f8fecaddcc0d 100644
--- a/arch/x86/um/mem_64.c
+++ b/arch/x86/um/mem_64.c
@@ -9,18 +9,3 @@ const char *arch_vma_name(struct vm_area_struct *vma)
9 9
10 return NULL; 10 return NULL;
11} 11}
12
13struct vm_area_struct *get_gate_vma(struct mm_struct *mm)
14{
15 return NULL;
16}
17
18int in_gate_area(struct mm_struct *mm, unsigned long addr)
19{
20 return 0;
21}
22
23int in_gate_area_no_mm(unsigned long addr)
24{
25 return 0;
26}
diff --git a/arch/x86/vdso/vdso32-setup.c b/arch/x86/vdso/vdso32-setup.c
index e4f7781ee162..e904c270573b 100644
--- a/arch/x86/vdso/vdso32-setup.c
+++ b/arch/x86/vdso/vdso32-setup.c
@@ -115,23 +115,6 @@ static __init int ia32_binfmt_init(void)
115 return 0; 115 return 0;
116} 116}
117__initcall(ia32_binfmt_init); 117__initcall(ia32_binfmt_init);
118#endif 118#endif /* CONFIG_SYSCTL */
119
120#else /* CONFIG_X86_32 */
121
122struct vm_area_struct *get_gate_vma(struct mm_struct *mm)
123{
124 return NULL;
125}
126
127int in_gate_area(struct mm_struct *mm, unsigned long addr)
128{
129 return 0;
130}
131
132int in_gate_area_no_mm(unsigned long addr)
133{
134 return 0;
135}
136 119
137#endif /* CONFIG_X86_64 */ 120#endif /* CONFIG_X86_64 */
diff --git a/crypto/zlib.c b/crypto/zlib.c
index 06b62e5cdcc7..c9ee681d57fd 100644
--- a/crypto/zlib.c
+++ b/crypto/zlib.c
@@ -168,7 +168,7 @@ static int zlib_compress_update(struct crypto_pcomp *tfm,
168 } 168 }
169 169
170 ret = req->avail_out - stream->avail_out; 170 ret = req->avail_out - stream->avail_out;
171 pr_debug("avail_in %u, avail_out %u (consumed %u, produced %u)\n", 171 pr_debug("avail_in %lu, avail_out %lu (consumed %lu, produced %u)\n",
172 stream->avail_in, stream->avail_out, 172 stream->avail_in, stream->avail_out,
173 req->avail_in - stream->avail_in, ret); 173 req->avail_in - stream->avail_in, ret);
174 req->next_in = stream->next_in; 174 req->next_in = stream->next_in;
@@ -198,7 +198,7 @@ static int zlib_compress_final(struct crypto_pcomp *tfm,
198 } 198 }
199 199
200 ret = req->avail_out - stream->avail_out; 200 ret = req->avail_out - stream->avail_out;
201 pr_debug("avail_in %u, avail_out %u (consumed %u, produced %u)\n", 201 pr_debug("avail_in %lu, avail_out %lu (consumed %lu, produced %u)\n",
202 stream->avail_in, stream->avail_out, 202 stream->avail_in, stream->avail_out,
203 req->avail_in - stream->avail_in, ret); 203 req->avail_in - stream->avail_in, ret);
204 req->next_in = stream->next_in; 204 req->next_in = stream->next_in;
@@ -283,7 +283,7 @@ static int zlib_decompress_update(struct crypto_pcomp *tfm,
283 } 283 }
284 284
285 ret = req->avail_out - stream->avail_out; 285 ret = req->avail_out - stream->avail_out;
286 pr_debug("avail_in %u, avail_out %u (consumed %u, produced %u)\n", 286 pr_debug("avail_in %lu, avail_out %lu (consumed %lu, produced %u)\n",
287 stream->avail_in, stream->avail_out, 287 stream->avail_in, stream->avail_out,
288 req->avail_in - stream->avail_in, ret); 288 req->avail_in - stream->avail_in, ret);
289 req->next_in = stream->next_in; 289 req->next_in = stream->next_in;
@@ -331,7 +331,7 @@ static int zlib_decompress_final(struct crypto_pcomp *tfm,
331 } 331 }
332 332
333 ret = req->avail_out - stream->avail_out; 333 ret = req->avail_out - stream->avail_out;
334 pr_debug("avail_in %u, avail_out %u (consumed %u, produced %u)\n", 334 pr_debug("avail_in %lu, avail_out %lu (consumed %lu, produced %u)\n",
335 stream->avail_in, stream->avail_out, 335 stream->avail_in, stream->avail_out,
336 req->avail_in - stream->avail_in, ret); 336 req->avail_in - stream->avail_in, ret);
337 req->next_in = stream->next_in; 337 req->next_in = stream->next_in;
diff --git a/drivers/atm/he.c b/drivers/atm/he.c
index aa6be2698669..c39702bc279d 100644
--- a/drivers/atm/he.c
+++ b/drivers/atm/he.c
@@ -533,14 +533,13 @@ static void he_init_tx_lbfp(struct he_dev *he_dev)
533 533
534static int he_init_tpdrq(struct he_dev *he_dev) 534static int he_init_tpdrq(struct he_dev *he_dev)
535{ 535{
536 he_dev->tpdrq_base = pci_alloc_consistent(he_dev->pci_dev, 536 he_dev->tpdrq_base = pci_zalloc_consistent(he_dev->pci_dev,
537 CONFIG_TPDRQ_SIZE * sizeof(struct he_tpdrq), &he_dev->tpdrq_phys); 537 CONFIG_TPDRQ_SIZE * sizeof(struct he_tpdrq),
538 &he_dev->tpdrq_phys);
538 if (he_dev->tpdrq_base == NULL) { 539 if (he_dev->tpdrq_base == NULL) {
539 hprintk("failed to alloc tpdrq\n"); 540 hprintk("failed to alloc tpdrq\n");
540 return -ENOMEM; 541 return -ENOMEM;
541 } 542 }
542 memset(he_dev->tpdrq_base, 0,
543 CONFIG_TPDRQ_SIZE * sizeof(struct he_tpdrq));
544 543
545 he_dev->tpdrq_tail = he_dev->tpdrq_base; 544 he_dev->tpdrq_tail = he_dev->tpdrq_base;
546 he_dev->tpdrq_head = he_dev->tpdrq_base; 545 he_dev->tpdrq_head = he_dev->tpdrq_base;
@@ -804,13 +803,13 @@ static int he_init_group(struct he_dev *he_dev, int group)
804 goto out_free_rbpl_virt; 803 goto out_free_rbpl_virt;
805 } 804 }
806 805
807 he_dev->rbpl_base = pci_alloc_consistent(he_dev->pci_dev, 806 he_dev->rbpl_base = pci_zalloc_consistent(he_dev->pci_dev,
808 CONFIG_RBPL_SIZE * sizeof(struct he_rbp), &he_dev->rbpl_phys); 807 CONFIG_RBPL_SIZE * sizeof(struct he_rbp),
808 &he_dev->rbpl_phys);
809 if (he_dev->rbpl_base == NULL) { 809 if (he_dev->rbpl_base == NULL) {
810 hprintk("failed to alloc rbpl_base\n"); 810 hprintk("failed to alloc rbpl_base\n");
811 goto out_destroy_rbpl_pool; 811 goto out_destroy_rbpl_pool;
812 } 812 }
813 memset(he_dev->rbpl_base, 0, CONFIG_RBPL_SIZE * sizeof(struct he_rbp));
814 813
815 INIT_LIST_HEAD(&he_dev->rbpl_outstanding); 814 INIT_LIST_HEAD(&he_dev->rbpl_outstanding);
816 815
@@ -843,13 +842,13 @@ static int he_init_group(struct he_dev *he_dev, int group)
843 842
844 /* rx buffer ready queue */ 843 /* rx buffer ready queue */
845 844
846 he_dev->rbrq_base = pci_alloc_consistent(he_dev->pci_dev, 845 he_dev->rbrq_base = pci_zalloc_consistent(he_dev->pci_dev,
847 CONFIG_RBRQ_SIZE * sizeof(struct he_rbrq), &he_dev->rbrq_phys); 846 CONFIG_RBRQ_SIZE * sizeof(struct he_rbrq),
847 &he_dev->rbrq_phys);
848 if (he_dev->rbrq_base == NULL) { 848 if (he_dev->rbrq_base == NULL) {
849 hprintk("failed to allocate rbrq\n"); 849 hprintk("failed to allocate rbrq\n");
850 goto out_free_rbpl; 850 goto out_free_rbpl;
851 } 851 }
852 memset(he_dev->rbrq_base, 0, CONFIG_RBRQ_SIZE * sizeof(struct he_rbrq));
853 852
854 he_dev->rbrq_head = he_dev->rbrq_base; 853 he_dev->rbrq_head = he_dev->rbrq_base;
855 he_writel(he_dev, he_dev->rbrq_phys, G0_RBRQ_ST + (group * 16)); 854 he_writel(he_dev, he_dev->rbrq_phys, G0_RBRQ_ST + (group * 16));
@@ -867,13 +866,13 @@ static int he_init_group(struct he_dev *he_dev, int group)
867 866
868 /* tx buffer ready queue */ 867 /* tx buffer ready queue */
869 868
870 he_dev->tbrq_base = pci_alloc_consistent(he_dev->pci_dev, 869 he_dev->tbrq_base = pci_zalloc_consistent(he_dev->pci_dev,
871 CONFIG_TBRQ_SIZE * sizeof(struct he_tbrq), &he_dev->tbrq_phys); 870 CONFIG_TBRQ_SIZE * sizeof(struct he_tbrq),
871 &he_dev->tbrq_phys);
872 if (he_dev->tbrq_base == NULL) { 872 if (he_dev->tbrq_base == NULL) {
873 hprintk("failed to allocate tbrq\n"); 873 hprintk("failed to allocate tbrq\n");
874 goto out_free_rbpq_base; 874 goto out_free_rbpq_base;
875 } 875 }
876 memset(he_dev->tbrq_base, 0, CONFIG_TBRQ_SIZE * sizeof(struct he_tbrq));
877 876
878 he_dev->tbrq_head = he_dev->tbrq_base; 877 he_dev->tbrq_head = he_dev->tbrq_base;
879 878
@@ -1460,13 +1459,13 @@ static int he_start(struct atm_dev *dev)
1460 1459
1461 /* host status page */ 1460 /* host status page */
1462 1461
1463 he_dev->hsp = pci_alloc_consistent(he_dev->pci_dev, 1462 he_dev->hsp = pci_zalloc_consistent(he_dev->pci_dev,
1464 sizeof(struct he_hsp), &he_dev->hsp_phys); 1463 sizeof(struct he_hsp),
1464 &he_dev->hsp_phys);
1465 if (he_dev->hsp == NULL) { 1465 if (he_dev->hsp == NULL) {
1466 hprintk("failed to allocate host status page\n"); 1466 hprintk("failed to allocate host status page\n");
1467 return -ENOMEM; 1467 return -ENOMEM;
1468 } 1468 }
1469 memset(he_dev->hsp, 0, sizeof(struct he_hsp));
1470 he_writel(he_dev, he_dev->hsp_phys, HSP_BA); 1469 he_writel(he_dev, he_dev->hsp_phys, HSP_BA);
1471 1470
1472 /* initialize framer */ 1471 /* initialize framer */
diff --git a/drivers/atm/idt77252.c b/drivers/atm/idt77252.c
index b621f56a36be..2b24ed056728 100644
--- a/drivers/atm/idt77252.c
+++ b/drivers/atm/idt77252.c
@@ -641,13 +641,11 @@ alloc_scq(struct idt77252_dev *card, int class)
641 scq = kzalloc(sizeof(struct scq_info), GFP_KERNEL); 641 scq = kzalloc(sizeof(struct scq_info), GFP_KERNEL);
642 if (!scq) 642 if (!scq)
643 return NULL; 643 return NULL;
644 scq->base = pci_alloc_consistent(card->pcidev, SCQ_SIZE, 644 scq->base = pci_zalloc_consistent(card->pcidev, SCQ_SIZE, &scq->paddr);
645 &scq->paddr);
646 if (scq->base == NULL) { 645 if (scq->base == NULL) {
647 kfree(scq); 646 kfree(scq);
648 return NULL; 647 return NULL;
649 } 648 }
650 memset(scq->base, 0, SCQ_SIZE);
651 649
652 scq->next = scq->base; 650 scq->next = scq->base;
653 scq->last = scq->base + (SCQ_ENTRIES - 1); 651 scq->last = scq->base + (SCQ_ENTRIES - 1);
@@ -972,13 +970,12 @@ init_rsq(struct idt77252_dev *card)
972{ 970{
973 struct rsq_entry *rsqe; 971 struct rsq_entry *rsqe;
974 972
975 card->rsq.base = pci_alloc_consistent(card->pcidev, RSQSIZE, 973 card->rsq.base = pci_zalloc_consistent(card->pcidev, RSQSIZE,
976 &card->rsq.paddr); 974 &card->rsq.paddr);
977 if (card->rsq.base == NULL) { 975 if (card->rsq.base == NULL) {
978 printk("%s: can't allocate RSQ.\n", card->name); 976 printk("%s: can't allocate RSQ.\n", card->name);
979 return -1; 977 return -1;
980 } 978 }
981 memset(card->rsq.base, 0, RSQSIZE);
982 979
983 card->rsq.last = card->rsq.base + RSQ_NUM_ENTRIES - 1; 980 card->rsq.last = card->rsq.base + RSQ_NUM_ENTRIES - 1;
984 card->rsq.next = card->rsq.last; 981 card->rsq.next = card->rsq.last;
@@ -3400,14 +3397,14 @@ static int init_card(struct atm_dev *dev)
3400 writel(0, SAR_REG_GP); 3397 writel(0, SAR_REG_GP);
3401 3398
3402 /* Initialize RAW Cell Handle Register */ 3399 /* Initialize RAW Cell Handle Register */
3403 card->raw_cell_hnd = pci_alloc_consistent(card->pcidev, 2 * sizeof(u32), 3400 card->raw_cell_hnd = pci_zalloc_consistent(card->pcidev,
3404 &card->raw_cell_paddr); 3401 2 * sizeof(u32),
3402 &card->raw_cell_paddr);
3405 if (!card->raw_cell_hnd) { 3403 if (!card->raw_cell_hnd) {
3406 printk("%s: memory allocation failure.\n", card->name); 3404 printk("%s: memory allocation failure.\n", card->name);
3407 deinit_card(card); 3405 deinit_card(card);
3408 return -1; 3406 return -1;
3409 } 3407 }
3410 memset(card->raw_cell_hnd, 0, 2 * sizeof(u32));
3411 writel(card->raw_cell_paddr, SAR_REG_RAWHND); 3408 writel(card->raw_cell_paddr, SAR_REG_RAWHND);
3412 IPRINTK("%s: raw cell handle is at 0x%p.\n", card->name, 3409 IPRINTK("%s: raw cell handle is at 0x%p.\n", card->name,
3413 card->raw_cell_hnd); 3410 card->raw_cell_hnd);
diff --git a/drivers/block/DAC960.c b/drivers/block/DAC960.c
index 125d84505738..811e11c82f32 100644
--- a/drivers/block/DAC960.c
+++ b/drivers/block/DAC960.c
@@ -6741,11 +6741,11 @@ static long DAC960_gam_ioctl(struct file *file, unsigned int Request,
6741 ErrorCode = -ENOMEM; 6741 ErrorCode = -ENOMEM;
6742 if (DataTransferLength > 0) 6742 if (DataTransferLength > 0)
6743 { 6743 {
6744 DataTransferBuffer = pci_alloc_consistent(Controller->PCIDevice, 6744 DataTransferBuffer = pci_zalloc_consistent(Controller->PCIDevice,
6745 DataTransferLength, &DataTransferBufferDMA); 6745 DataTransferLength,
6746 &DataTransferBufferDMA);
6746 if (DataTransferBuffer == NULL) 6747 if (DataTransferBuffer == NULL)
6747 break; 6748 break;
6748 memset(DataTransferBuffer, 0, DataTransferLength);
6749 } 6749 }
6750 else if (DataTransferLength < 0) 6750 else if (DataTransferLength < 0)
6751 { 6751 {
@@ -6877,11 +6877,11 @@ static long DAC960_gam_ioctl(struct file *file, unsigned int Request,
6877 ErrorCode = -ENOMEM; 6877 ErrorCode = -ENOMEM;
6878 if (DataTransferLength > 0) 6878 if (DataTransferLength > 0)
6879 { 6879 {
6880 DataTransferBuffer = pci_alloc_consistent(Controller->PCIDevice, 6880 DataTransferBuffer = pci_zalloc_consistent(Controller->PCIDevice,
6881 DataTransferLength, &DataTransferBufferDMA); 6881 DataTransferLength,
6882 &DataTransferBufferDMA);
6882 if (DataTransferBuffer == NULL) 6883 if (DataTransferBuffer == NULL)
6883 break; 6884 break;
6884 memset(DataTransferBuffer, 0, DataTransferLength);
6885 } 6885 }
6886 else if (DataTransferLength < 0) 6886 else if (DataTransferLength < 0)
6887 { 6887 {
@@ -6899,14 +6899,14 @@ static long DAC960_gam_ioctl(struct file *file, unsigned int Request,
6899 RequestSenseLength = UserCommand.RequestSenseLength; 6899 RequestSenseLength = UserCommand.RequestSenseLength;
6900 if (RequestSenseLength > 0) 6900 if (RequestSenseLength > 0)
6901 { 6901 {
6902 RequestSenseBuffer = pci_alloc_consistent(Controller->PCIDevice, 6902 RequestSenseBuffer = pci_zalloc_consistent(Controller->PCIDevice,
6903 RequestSenseLength, &RequestSenseBufferDMA); 6903 RequestSenseLength,
6904 &RequestSenseBufferDMA);
6904 if (RequestSenseBuffer == NULL) 6905 if (RequestSenseBuffer == NULL)
6905 { 6906 {
6906 ErrorCode = -ENOMEM; 6907 ErrorCode = -ENOMEM;
6907 goto Failure2; 6908 goto Failure2;
6908 } 6909 }
6909 memset(RequestSenseBuffer, 0, RequestSenseLength);
6910 } 6910 }
6911 spin_lock_irqsave(&Controller->queue_lock, flags); 6911 spin_lock_irqsave(&Controller->queue_lock, flags);
6912 while ((Command = DAC960_AllocateCommand(Controller)) == NULL) 6912 while ((Command = DAC960_AllocateCommand(Controller)) == NULL)
diff --git a/drivers/block/cciss.c b/drivers/block/cciss.c
index 4595c22f33f7..ff20f192b0f6 100644
--- a/drivers/block/cciss.c
+++ b/drivers/block/cciss.c
@@ -1014,24 +1014,21 @@ static CommandList_struct *cmd_special_alloc(ctlr_info_t *h)
1014 u64bit temp64; 1014 u64bit temp64;
1015 dma_addr_t cmd_dma_handle, err_dma_handle; 1015 dma_addr_t cmd_dma_handle, err_dma_handle;
1016 1016
1017 c = (CommandList_struct *) pci_alloc_consistent(h->pdev, 1017 c = pci_zalloc_consistent(h->pdev, sizeof(CommandList_struct),
1018 sizeof(CommandList_struct), &cmd_dma_handle); 1018 &cmd_dma_handle);
1019 if (c == NULL) 1019 if (c == NULL)
1020 return NULL; 1020 return NULL;
1021 memset(c, 0, sizeof(CommandList_struct));
1022 1021
1023 c->cmdindex = -1; 1022 c->cmdindex = -1;
1024 1023
1025 c->err_info = (ErrorInfo_struct *) 1024 c->err_info = pci_zalloc_consistent(h->pdev, sizeof(ErrorInfo_struct),
1026 pci_alloc_consistent(h->pdev, sizeof(ErrorInfo_struct), 1025 &err_dma_handle);
1027 &err_dma_handle);
1028 1026
1029 if (c->err_info == NULL) { 1027 if (c->err_info == NULL) {
1030 pci_free_consistent(h->pdev, 1028 pci_free_consistent(h->pdev,
1031 sizeof(CommandList_struct), c, cmd_dma_handle); 1029 sizeof(CommandList_struct), c, cmd_dma_handle);
1032 return NULL; 1030 return NULL;
1033 } 1031 }
1034 memset(c->err_info, 0, sizeof(ErrorInfo_struct));
1035 1032
1036 INIT_LIST_HEAD(&c->list); 1033 INIT_LIST_HEAD(&c->list);
1037 c->busaddr = (__u32) cmd_dma_handle; 1034 c->busaddr = (__u32) cmd_dma_handle;
diff --git a/drivers/block/skd_main.c b/drivers/block/skd_main.c
index 608532d3f8c9..f0a089df85cc 100644
--- a/drivers/block/skd_main.c
+++ b/drivers/block/skd_main.c
@@ -4112,16 +4112,14 @@ static int skd_cons_skcomp(struct skd_device *skdev)
4112 skdev->name, __func__, __LINE__, 4112 skdev->name, __func__, __LINE__,
4113 nbytes, SKD_N_COMPLETION_ENTRY); 4113 nbytes, SKD_N_COMPLETION_ENTRY);
4114 4114
4115 skcomp = pci_alloc_consistent(skdev->pdev, nbytes, 4115 skcomp = pci_zalloc_consistent(skdev->pdev, nbytes,
4116 &skdev->cq_dma_address); 4116 &skdev->cq_dma_address);
4117 4117
4118 if (skcomp == NULL) { 4118 if (skcomp == NULL) {
4119 rc = -ENOMEM; 4119 rc = -ENOMEM;
4120 goto err_out; 4120 goto err_out;
4121 } 4121 }
4122 4122
4123 memset(skcomp, 0, nbytes);
4124
4125 skdev->skcomp_table = skcomp; 4123 skdev->skcomp_table = skcomp;
4126 skdev->skerr_table = (struct fit_comp_error_info *)((char *)skcomp + 4124 skdev->skerr_table = (struct fit_comp_error_info *)((char *)skcomp +
4127 sizeof(*skcomp) * 4125 sizeof(*skcomp) *
@@ -4304,15 +4302,14 @@ static int skd_cons_skspcl(struct skd_device *skdev)
4304 4302
4305 nbytes = SKD_N_SPECIAL_FITMSG_BYTES; 4303 nbytes = SKD_N_SPECIAL_FITMSG_BYTES;
4306 4304
4307 skspcl->msg_buf = pci_alloc_consistent(skdev->pdev, nbytes, 4305 skspcl->msg_buf =
4308 &skspcl->mb_dma_address); 4306 pci_zalloc_consistent(skdev->pdev, nbytes,
4307 &skspcl->mb_dma_address);
4309 if (skspcl->msg_buf == NULL) { 4308 if (skspcl->msg_buf == NULL) {
4310 rc = -ENOMEM; 4309 rc = -ENOMEM;
4311 goto err_out; 4310 goto err_out;
4312 } 4311 }
4313 4312
4314 memset(skspcl->msg_buf, 0, nbytes);
4315
4316 skspcl->req.sg = kzalloc(sizeof(struct scatterlist) * 4313 skspcl->req.sg = kzalloc(sizeof(struct scatterlist) *
4317 SKD_N_SG_PER_SPECIAL, GFP_KERNEL); 4314 SKD_N_SG_PER_SPECIAL, GFP_KERNEL);
4318 if (skspcl->req.sg == NULL) { 4315 if (skspcl->req.sg == NULL) {
@@ -4353,25 +4350,21 @@ static int skd_cons_sksb(struct skd_device *skdev)
4353 4350
4354 nbytes = SKD_N_INTERNAL_BYTES; 4351 nbytes = SKD_N_INTERNAL_BYTES;
4355 4352
4356 skspcl->data_buf = pci_alloc_consistent(skdev->pdev, nbytes, 4353 skspcl->data_buf = pci_zalloc_consistent(skdev->pdev, nbytes,
4357 &skspcl->db_dma_address); 4354 &skspcl->db_dma_address);
4358 if (skspcl->data_buf == NULL) { 4355 if (skspcl->data_buf == NULL) {
4359 rc = -ENOMEM; 4356 rc = -ENOMEM;
4360 goto err_out; 4357 goto err_out;
4361 } 4358 }
4362 4359
4363 memset(skspcl->data_buf, 0, nbytes);
4364
4365 nbytes = SKD_N_SPECIAL_FITMSG_BYTES; 4360 nbytes = SKD_N_SPECIAL_FITMSG_BYTES;
4366 skspcl->msg_buf = pci_alloc_consistent(skdev->pdev, nbytes, 4361 skspcl->msg_buf = pci_zalloc_consistent(skdev->pdev, nbytes,
4367 &skspcl->mb_dma_address); 4362 &skspcl->mb_dma_address);
4368 if (skspcl->msg_buf == NULL) { 4363 if (skspcl->msg_buf == NULL) {
4369 rc = -ENOMEM; 4364 rc = -ENOMEM;
4370 goto err_out; 4365 goto err_out;
4371 } 4366 }
4372 4367
4373 memset(skspcl->msg_buf, 0, nbytes);
4374
4375 skspcl->req.sksg_list = skd_cons_sg_list(skdev, 1, 4368 skspcl->req.sksg_list = skd_cons_sg_list(skdev, 1,
4376 &skspcl->req.sksg_dma_address); 4369 &skspcl->req.sksg_dma_address);
4377 if (skspcl->req.sksg_list == NULL) { 4370 if (skspcl->req.sksg_list == NULL) {
diff --git a/drivers/crypto/hifn_795x.c b/drivers/crypto/hifn_795x.c
index 12fea3e22348..8d2a7728434d 100644
--- a/drivers/crypto/hifn_795x.c
+++ b/drivers/crypto/hifn_795x.c
@@ -2617,14 +2617,13 @@ static int hifn_probe(struct pci_dev *pdev, const struct pci_device_id *id)
2617 } 2617 }
2618 } 2618 }
2619 2619
2620 dev->desc_virt = pci_alloc_consistent(pdev, sizeof(struct hifn_dma), 2620 dev->desc_virt = pci_zalloc_consistent(pdev, sizeof(struct hifn_dma),
2621 &dev->desc_dma); 2621 &dev->desc_dma);
2622 if (!dev->desc_virt) { 2622 if (!dev->desc_virt) {
2623 dprintk("Failed to allocate descriptor rings.\n"); 2623 dprintk("Failed to allocate descriptor rings.\n");
2624 err = -ENOMEM; 2624 err = -ENOMEM;
2625 goto err_out_unmap_bars; 2625 goto err_out_unmap_bars;
2626 } 2626 }
2627 memset(dev->desc_virt, 0, sizeof(struct hifn_dma));
2628 2627
2629 dev->pdev = pdev; 2628 dev->pdev = pdev;
2630 dev->irq = pdev->irq; 2629 dev->irq = pdev->irq;
diff --git a/drivers/firmware/efi/runtime-map.c b/drivers/firmware/efi/runtime-map.c
index 97cdd16a2169..018c29a26615 100644
--- a/drivers/firmware/efi/runtime-map.c
+++ b/drivers/firmware/efi/runtime-map.c
@@ -138,6 +138,27 @@ add_sysfs_runtime_map_entry(struct kobject *kobj, int nr)
138 return entry; 138 return entry;
139} 139}
140 140
141int efi_get_runtime_map_size(void)
142{
143 return nr_efi_runtime_map * efi_memdesc_size;
144}
145
146int efi_get_runtime_map_desc_size(void)
147{
148 return efi_memdesc_size;
149}
150
151int efi_runtime_map_copy(void *buf, size_t bufsz)
152{
153 size_t sz = efi_get_runtime_map_size();
154
155 if (sz > bufsz)
156 sz = bufsz;
157
158 memcpy(buf, efi_runtime_map, sz);
159 return 0;
160}
161
141void efi_runtime_map_setup(void *map, int nr_entries, u32 desc_size) 162void efi_runtime_map_setup(void *map, int nr_entries, u32 desc_size)
142{ 163{
143 efi_runtime_map = map; 164 efi_runtime_map = map;
diff --git a/drivers/gpu/drm/i810/i810_dma.c b/drivers/gpu/drm/i810/i810_dma.c
index e88bac1d781f..bae897de9468 100644
--- a/drivers/gpu/drm/i810/i810_dma.c
+++ b/drivers/gpu/drm/i810/i810_dma.c
@@ -393,15 +393,14 @@ static int i810_dma_initialize(struct drm_device *dev,
393 393
394 /* Program Hardware Status Page */ 394 /* Program Hardware Status Page */
395 dev_priv->hw_status_page = 395 dev_priv->hw_status_page =
396 pci_alloc_consistent(dev->pdev, PAGE_SIZE, 396 pci_zalloc_consistent(dev->pdev, PAGE_SIZE,
397 &dev_priv->dma_status_page); 397 &dev_priv->dma_status_page);
398 if (!dev_priv->hw_status_page) { 398 if (!dev_priv->hw_status_page) {
399 dev->dev_private = (void *)dev_priv; 399 dev->dev_private = (void *)dev_priv;
400 i810_dma_cleanup(dev); 400 i810_dma_cleanup(dev);
401 DRM_ERROR("Can not allocate hardware status page\n"); 401 DRM_ERROR("Can not allocate hardware status page\n");
402 return -ENOMEM; 402 return -ENOMEM;
403 } 403 }
404 memset(dev_priv->hw_status_page, 0, PAGE_SIZE);
405 DRM_DEBUG("hw status page @ %p\n", dev_priv->hw_status_page); 404 DRM_DEBUG("hw status page @ %p\n", dev_priv->hw_status_page);
406 405
407 I810_WRITE(0x02080, dev_priv->dma_status_page); 406 I810_WRITE(0x02080, dev_priv->dma_status_page);
diff --git a/drivers/infiniband/hw/amso1100/c2.c b/drivers/infiniband/hw/amso1100/c2.c
index 00400c352c1a..766a71ccefed 100644
--- a/drivers/infiniband/hw/amso1100/c2.c
+++ b/drivers/infiniband/hw/amso1100/c2.c
@@ -604,16 +604,14 @@ static int c2_up(struct net_device *netdev)
604 tx_size = c2_port->tx_ring.count * sizeof(struct c2_tx_desc); 604 tx_size = c2_port->tx_ring.count * sizeof(struct c2_tx_desc);
605 605
606 c2_port->mem_size = tx_size + rx_size; 606 c2_port->mem_size = tx_size + rx_size;
607 c2_port->mem = pci_alloc_consistent(c2dev->pcidev, c2_port->mem_size, 607 c2_port->mem = pci_zalloc_consistent(c2dev->pcidev, c2_port->mem_size,
608 &c2_port->dma); 608 &c2_port->dma);
609 if (c2_port->mem == NULL) { 609 if (c2_port->mem == NULL) {
610 pr_debug("Unable to allocate memory for " 610 pr_debug("Unable to allocate memory for "
611 "host descriptor rings\n"); 611 "host descriptor rings\n");
612 return -ENOMEM; 612 return -ENOMEM;
613 } 613 }
614 614
615 memset(c2_port->mem, 0, c2_port->mem_size);
616
617 /* Create the Rx host descriptor ring */ 615 /* Create the Rx host descriptor ring */
618 if ((ret = 616 if ((ret =
619 c2_rx_ring_alloc(&c2_port->rx_ring, c2_port->mem, c2_port->dma, 617 c2_rx_ring_alloc(&c2_port->rx_ring, c2_port->mem, c2_port->dma,
diff --git a/drivers/infiniband/hw/nes/nes_hw.c b/drivers/infiniband/hw/nes/nes_hw.c
index 90200245c5eb..02120d340d50 100644
--- a/drivers/infiniband/hw/nes/nes_hw.c
+++ b/drivers/infiniband/hw/nes/nes_hw.c
@@ -1003,13 +1003,13 @@ int nes_init_cqp(struct nes_device *nesdev)
1003 (sizeof(struct nes_hw_aeqe) * nesadapter->max_qp) + 1003 (sizeof(struct nes_hw_aeqe) * nesadapter->max_qp) +
1004 sizeof(struct nes_hw_cqp_qp_context); 1004 sizeof(struct nes_hw_cqp_qp_context);
1005 1005
1006 nesdev->cqp_vbase = pci_alloc_consistent(nesdev->pcidev, nesdev->cqp_mem_size, 1006 nesdev->cqp_vbase = pci_zalloc_consistent(nesdev->pcidev,
1007 &nesdev->cqp_pbase); 1007 nesdev->cqp_mem_size,
1008 &nesdev->cqp_pbase);
1008 if (!nesdev->cqp_vbase) { 1009 if (!nesdev->cqp_vbase) {
1009 nes_debug(NES_DBG_INIT, "Unable to allocate memory for host descriptor rings\n"); 1010 nes_debug(NES_DBG_INIT, "Unable to allocate memory for host descriptor rings\n");
1010 return -ENOMEM; 1011 return -ENOMEM;
1011 } 1012 }
1012 memset(nesdev->cqp_vbase, 0, nesdev->cqp_mem_size);
1013 1013
1014 /* Allocate a twice the number of CQP requests as the SQ size */ 1014 /* Allocate a twice the number of CQP requests as the SQ size */
1015 nesdev->nes_cqp_requests = kzalloc(sizeof(struct nes_cqp_request) * 1015 nesdev->nes_cqp_requests = kzalloc(sizeof(struct nes_cqp_request) *
@@ -1691,13 +1691,13 @@ int nes_init_nic_qp(struct nes_device *nesdev, struct net_device *netdev)
1691 (NES_NIC_WQ_SIZE * 2 * sizeof(struct nes_hw_nic_cqe)) + 1691 (NES_NIC_WQ_SIZE * 2 * sizeof(struct nes_hw_nic_cqe)) +
1692 sizeof(struct nes_hw_nic_qp_context); 1692 sizeof(struct nes_hw_nic_qp_context);
1693 1693
1694 nesvnic->nic_vbase = pci_alloc_consistent(nesdev->pcidev, nesvnic->nic_mem_size, 1694 nesvnic->nic_vbase = pci_zalloc_consistent(nesdev->pcidev,
1695 &nesvnic->nic_pbase); 1695 nesvnic->nic_mem_size,
1696 &nesvnic->nic_pbase);
1696 if (!nesvnic->nic_vbase) { 1697 if (!nesvnic->nic_vbase) {
1697 nes_debug(NES_DBG_INIT, "Unable to allocate memory for NIC host descriptor rings\n"); 1698 nes_debug(NES_DBG_INIT, "Unable to allocate memory for NIC host descriptor rings\n");
1698 return -ENOMEM; 1699 return -ENOMEM;
1699 } 1700 }
1700 memset(nesvnic->nic_vbase, 0, nesvnic->nic_mem_size);
1701 nes_debug(NES_DBG_INIT, "Allocated NIC QP structures at %p (phys = %016lX), size = %u.\n", 1701 nes_debug(NES_DBG_INIT, "Allocated NIC QP structures at %p (phys = %016lX), size = %u.\n",
1702 nesvnic->nic_vbase, (unsigned long)nesvnic->nic_pbase, nesvnic->nic_mem_size); 1702 nesvnic->nic_vbase, (unsigned long)nesvnic->nic_pbase, nesvnic->nic_mem_size);
1703 1703
diff --git a/drivers/infiniband/hw/nes/nes_verbs.c b/drivers/infiniband/hw/nes/nes_verbs.c
index 218dd3574285..fef067c959fc 100644
--- a/drivers/infiniband/hw/nes/nes_verbs.c
+++ b/drivers/infiniband/hw/nes/nes_verbs.c
@@ -1616,8 +1616,8 @@ static struct ib_cq *nes_create_cq(struct ib_device *ibdev, int entries,
1616 entries, nescq->cq_mem_size, nescq->hw_cq.cq_number); 1616 entries, nescq->cq_mem_size, nescq->hw_cq.cq_number);
1617 1617
1618 /* allocate the physical buffer space */ 1618 /* allocate the physical buffer space */
1619 mem = pci_alloc_consistent(nesdev->pcidev, nescq->cq_mem_size, 1619 mem = pci_zalloc_consistent(nesdev->pcidev, nescq->cq_mem_size,
1620 &nescq->hw_cq.cq_pbase); 1620 &nescq->hw_cq.cq_pbase);
1621 if (!mem) { 1621 if (!mem) {
1622 printk(KERN_ERR PFX "Unable to allocate pci memory for cq\n"); 1622 printk(KERN_ERR PFX "Unable to allocate pci memory for cq\n");
1623 nes_free_resource(nesadapter, nesadapter->allocated_cqs, cq_num); 1623 nes_free_resource(nesadapter, nesadapter->allocated_cqs, cq_num);
@@ -1625,7 +1625,6 @@ static struct ib_cq *nes_create_cq(struct ib_device *ibdev, int entries,
1625 return ERR_PTR(-ENOMEM); 1625 return ERR_PTR(-ENOMEM);
1626 } 1626 }
1627 1627
1628 memset(mem, 0, nescq->cq_mem_size);
1629 nescq->hw_cq.cq_vbase = mem; 1628 nescq->hw_cq.cq_vbase = mem;
1630 nescq->hw_cq.cq_head = 0; 1629 nescq->hw_cq.cq_head = 0;
1631 nes_debug(NES_DBG_CQ, "CQ%u virtual address @ %p, phys = 0x%08X\n", 1630 nes_debug(NES_DBG_CQ, "CQ%u virtual address @ %p, phys = 0x%08X\n",
diff --git a/drivers/media/common/saa7146/saa7146_core.c b/drivers/media/common/saa7146/saa7146_core.c
index 34b0d0ddeef3..97afee672d07 100644
--- a/drivers/media/common/saa7146/saa7146_core.c
+++ b/drivers/media/common/saa7146/saa7146_core.c
@@ -421,23 +421,20 @@ static int saa7146_init_one(struct pci_dev *pci, const struct pci_device_id *ent
421 err = -ENOMEM; 421 err = -ENOMEM;
422 422
423 /* get memory for various stuff */ 423 /* get memory for various stuff */
424 dev->d_rps0.cpu_addr = pci_alloc_consistent(pci, SAA7146_RPS_MEM, 424 dev->d_rps0.cpu_addr = pci_zalloc_consistent(pci, SAA7146_RPS_MEM,
425 &dev->d_rps0.dma_handle); 425 &dev->d_rps0.dma_handle);
426 if (!dev->d_rps0.cpu_addr) 426 if (!dev->d_rps0.cpu_addr)
427 goto err_free_irq; 427 goto err_free_irq;
428 memset(dev->d_rps0.cpu_addr, 0x0, SAA7146_RPS_MEM);
429 428
430 dev->d_rps1.cpu_addr = pci_alloc_consistent(pci, SAA7146_RPS_MEM, 429 dev->d_rps1.cpu_addr = pci_zalloc_consistent(pci, SAA7146_RPS_MEM,
431 &dev->d_rps1.dma_handle); 430 &dev->d_rps1.dma_handle);
432 if (!dev->d_rps1.cpu_addr) 431 if (!dev->d_rps1.cpu_addr)
433 goto err_free_rps0; 432 goto err_free_rps0;
434 memset(dev->d_rps1.cpu_addr, 0x0, SAA7146_RPS_MEM);
435 433
436 dev->d_i2c.cpu_addr = pci_alloc_consistent(pci, SAA7146_RPS_MEM, 434 dev->d_i2c.cpu_addr = pci_zalloc_consistent(pci, SAA7146_RPS_MEM,
437 &dev->d_i2c.dma_handle); 435 &dev->d_i2c.dma_handle);
438 if (!dev->d_i2c.cpu_addr) 436 if (!dev->d_i2c.cpu_addr)
439 goto err_free_rps1; 437 goto err_free_rps1;
440 memset(dev->d_i2c.cpu_addr, 0x0, SAA7146_RPS_MEM);
441 438
442 /* the rest + print status message */ 439 /* the rest + print status message */
443 440
diff --git a/drivers/media/common/saa7146/saa7146_fops.c b/drivers/media/common/saa7146/saa7146_fops.c
index d9e1d6395ed9..6c47f3fe9b0f 100644
--- a/drivers/media/common/saa7146/saa7146_fops.c
+++ b/drivers/media/common/saa7146/saa7146_fops.c
@@ -520,14 +520,15 @@ int saa7146_vv_init(struct saa7146_dev* dev, struct saa7146_ext_vv *ext_vv)
520 configuration data) */ 520 configuration data) */
521 dev->ext_vv_data = ext_vv; 521 dev->ext_vv_data = ext_vv;
522 522
523 vv->d_clipping.cpu_addr = pci_alloc_consistent(dev->pci, SAA7146_CLIPPING_MEM, &vv->d_clipping.dma_handle); 523 vv->d_clipping.cpu_addr =
524 pci_zalloc_consistent(dev->pci, SAA7146_CLIPPING_MEM,
525 &vv->d_clipping.dma_handle);
524 if( NULL == vv->d_clipping.cpu_addr ) { 526 if( NULL == vv->d_clipping.cpu_addr ) {
525 ERR("out of memory. aborting.\n"); 527 ERR("out of memory. aborting.\n");
526 kfree(vv); 528 kfree(vv);
527 v4l2_ctrl_handler_free(hdl); 529 v4l2_ctrl_handler_free(hdl);
528 return -1; 530 return -1;
529 } 531 }
530 memset(vv->d_clipping.cpu_addr, 0x0, SAA7146_CLIPPING_MEM);
531 532
532 saa7146_video_uops.init(dev,vv); 533 saa7146_video_uops.init(dev,vv);
533 if (dev->ext_vv_data->capabilities & V4L2_CAP_VBI_CAPTURE) 534 if (dev->ext_vv_data->capabilities & V4L2_CAP_VBI_CAPTURE)
diff --git a/drivers/media/pci/bt8xx/bt878.c b/drivers/media/pci/bt8xx/bt878.c
index d0c281f41a0a..11765835d7b2 100644
--- a/drivers/media/pci/bt8xx/bt878.c
+++ b/drivers/media/pci/bt8xx/bt878.c
@@ -101,28 +101,20 @@ static int bt878_mem_alloc(struct bt878 *bt)
101 if (!bt->buf_cpu) { 101 if (!bt->buf_cpu) {
102 bt->buf_size = 128 * 1024; 102 bt->buf_size = 128 * 1024;
103 103
104 bt->buf_cpu = 104 bt->buf_cpu = pci_zalloc_consistent(bt->dev, bt->buf_size,
105 pci_alloc_consistent(bt->dev, bt->buf_size, 105 &bt->buf_dma);
106 &bt->buf_dma);
107
108 if (!bt->buf_cpu) 106 if (!bt->buf_cpu)
109 return -ENOMEM; 107 return -ENOMEM;
110
111 memset(bt->buf_cpu, 0, bt->buf_size);
112 } 108 }
113 109
114 if (!bt->risc_cpu) { 110 if (!bt->risc_cpu) {
115 bt->risc_size = PAGE_SIZE; 111 bt->risc_size = PAGE_SIZE;
116 bt->risc_cpu = 112 bt->risc_cpu = pci_zalloc_consistent(bt->dev, bt->risc_size,
117 pci_alloc_consistent(bt->dev, bt->risc_size, 113 &bt->risc_dma);
118 &bt->risc_dma);
119
120 if (!bt->risc_cpu) { 114 if (!bt->risc_cpu) {
121 bt878_mem_free(bt); 115 bt878_mem_free(bt);
122 return -ENOMEM; 116 return -ENOMEM;
123 } 117 }
124
125 memset(bt->risc_cpu, 0, bt->risc_size);
126 } 118 }
127 119
128 return 0; 120 return 0;
diff --git a/drivers/media/pci/ngene/ngene-core.c b/drivers/media/pci/ngene/ngene-core.c
index 826228c3800e..4930b55fd5f4 100644
--- a/drivers/media/pci/ngene/ngene-core.c
+++ b/drivers/media/pci/ngene/ngene-core.c
@@ -1075,12 +1075,11 @@ static int AllocCommonBuffers(struct ngene *dev)
1075 dev->ngenetohost = dev->FWInterfaceBuffer + 256; 1075 dev->ngenetohost = dev->FWInterfaceBuffer + 256;
1076 dev->EventBuffer = dev->FWInterfaceBuffer + 512; 1076 dev->EventBuffer = dev->FWInterfaceBuffer + 512;
1077 1077
1078 dev->OverflowBuffer = pci_alloc_consistent(dev->pci_dev, 1078 dev->OverflowBuffer = pci_zalloc_consistent(dev->pci_dev,
1079 OVERFLOW_BUFFER_SIZE, 1079 OVERFLOW_BUFFER_SIZE,
1080 &dev->PAOverflowBuffer); 1080 &dev->PAOverflowBuffer);
1081 if (!dev->OverflowBuffer) 1081 if (!dev->OverflowBuffer)
1082 return -ENOMEM; 1082 return -ENOMEM;
1083 memset(dev->OverflowBuffer, 0, OVERFLOW_BUFFER_SIZE);
1084 1083
1085 for (i = STREAM_VIDEOIN1; i < MAX_STREAM; i++) { 1084 for (i = STREAM_VIDEOIN1; i < MAX_STREAM; i++) {
1086 int type = dev->card_info->io_type[i]; 1085 int type = dev->card_info->io_type[i];
diff --git a/drivers/media/usb/ttusb-budget/dvb-ttusb-budget.c b/drivers/media/usb/ttusb-budget/dvb-ttusb-budget.c
index f166ffc9800a..cef7a00099ea 100644
--- a/drivers/media/usb/ttusb-budget/dvb-ttusb-budget.c
+++ b/drivers/media/usb/ttusb-budget/dvb-ttusb-budget.c
@@ -803,11 +803,9 @@ static int ttusb_alloc_iso_urbs(struct ttusb *ttusb)
803{ 803{
804 int i; 804 int i;
805 805
806 ttusb->iso_buffer = pci_alloc_consistent(NULL, 806 ttusb->iso_buffer = pci_zalloc_consistent(NULL,
807 ISO_FRAME_SIZE * 807 ISO_FRAME_SIZE * FRAMES_PER_ISO_BUF * ISO_BUF_COUNT,
808 FRAMES_PER_ISO_BUF * 808 &ttusb->iso_dma_handle);
809 ISO_BUF_COUNT,
810 &ttusb->iso_dma_handle);
811 809
812 if (!ttusb->iso_buffer) { 810 if (!ttusb->iso_buffer) {
813 dprintk("%s: pci_alloc_consistent - not enough memory\n", 811 dprintk("%s: pci_alloc_consistent - not enough memory\n",
@@ -815,9 +813,6 @@ static int ttusb_alloc_iso_urbs(struct ttusb *ttusb)
815 return -ENOMEM; 813 return -ENOMEM;
816 } 814 }
817 815
818 memset(ttusb->iso_buffer, 0,
819 ISO_FRAME_SIZE * FRAMES_PER_ISO_BUF * ISO_BUF_COUNT);
820
821 for (i = 0; i < ISO_BUF_COUNT; i++) { 816 for (i = 0; i < ISO_BUF_COUNT; i++) {
822 struct urb *urb; 817 struct urb *urb;
823 818
diff --git a/drivers/media/usb/ttusb-dec/ttusb_dec.c b/drivers/media/usb/ttusb-dec/ttusb_dec.c
index 29724af9b9ab..15ab584cf265 100644
--- a/drivers/media/usb/ttusb-dec/ttusb_dec.c
+++ b/drivers/media/usb/ttusb-dec/ttusb_dec.c
@@ -1151,11 +1151,9 @@ static int ttusb_dec_alloc_iso_urbs(struct ttusb_dec *dec)
1151 1151
1152 dprintk("%s\n", __func__); 1152 dprintk("%s\n", __func__);
1153 1153
1154 dec->iso_buffer = pci_alloc_consistent(NULL, 1154 dec->iso_buffer = pci_zalloc_consistent(NULL,
1155 ISO_FRAME_SIZE * 1155 ISO_FRAME_SIZE * (FRAMES_PER_ISO_BUF * ISO_BUF_COUNT),
1156 (FRAMES_PER_ISO_BUF * 1156 &dec->iso_dma_handle);
1157 ISO_BUF_COUNT),
1158 &dec->iso_dma_handle);
1159 1157
1160 if (!dec->iso_buffer) { 1158 if (!dec->iso_buffer) {
1161 dprintk("%s: pci_alloc_consistent - not enough memory\n", 1159 dprintk("%s: pci_alloc_consistent - not enough memory\n",
@@ -1163,9 +1161,6 @@ static int ttusb_dec_alloc_iso_urbs(struct ttusb_dec *dec)
1163 return -ENOMEM; 1161 return -ENOMEM;
1164 } 1162 }
1165 1163
1166 memset(dec->iso_buffer, 0,
1167 ISO_FRAME_SIZE * (FRAMES_PER_ISO_BUF * ISO_BUF_COUNT));
1168
1169 for (i = 0; i < ISO_BUF_COUNT; i++) { 1164 for (i = 0; i < ISO_BUF_COUNT; i++) {
1170 struct urb *urb; 1165 struct urb *urb;
1171 1166
diff --git a/drivers/net/ethernet/amd/pcnet32.c b/drivers/net/ethernet/amd/pcnet32.c
index e7cc9174e364..4a8fdc4721d5 100644
--- a/drivers/net/ethernet/amd/pcnet32.c
+++ b/drivers/net/ethernet/amd/pcnet32.c
@@ -481,37 +481,32 @@ static void pcnet32_realloc_tx_ring(struct net_device *dev,
481 dma_addr_t *new_dma_addr_list; 481 dma_addr_t *new_dma_addr_list;
482 struct pcnet32_tx_head *new_tx_ring; 482 struct pcnet32_tx_head *new_tx_ring;
483 struct sk_buff **new_skb_list; 483 struct sk_buff **new_skb_list;
484 unsigned int entries = BIT(size);
484 485
485 pcnet32_purge_tx_ring(dev); 486 pcnet32_purge_tx_ring(dev);
486 487
487 new_tx_ring = pci_alloc_consistent(lp->pci_dev, 488 new_tx_ring =
488 sizeof(struct pcnet32_tx_head) * 489 pci_zalloc_consistent(lp->pci_dev,
489 (1 << size), 490 sizeof(struct pcnet32_tx_head) * entries,
490 &new_ring_dma_addr); 491 &new_ring_dma_addr);
491 if (new_tx_ring == NULL) { 492 if (new_tx_ring == NULL)
492 netif_err(lp, drv, dev, "Consistent memory allocation failed\n");
493 return; 493 return;
494 }
495 memset(new_tx_ring, 0, sizeof(struct pcnet32_tx_head) * (1 << size));
496 494
497 new_dma_addr_list = kcalloc(1 << size, sizeof(dma_addr_t), 495 new_dma_addr_list = kcalloc(entries, sizeof(dma_addr_t), GFP_ATOMIC);
498 GFP_ATOMIC);
499 if (!new_dma_addr_list) 496 if (!new_dma_addr_list)
500 goto free_new_tx_ring; 497 goto free_new_tx_ring;
501 498
502 new_skb_list = kcalloc(1 << size, sizeof(struct sk_buff *), 499 new_skb_list = kcalloc(entries, sizeof(struct sk_buff *), GFP_ATOMIC);
503 GFP_ATOMIC);
504 if (!new_skb_list) 500 if (!new_skb_list)
505 goto free_new_lists; 501 goto free_new_lists;
506 502
507 kfree(lp->tx_skbuff); 503 kfree(lp->tx_skbuff);
508 kfree(lp->tx_dma_addr); 504 kfree(lp->tx_dma_addr);
509 pci_free_consistent(lp->pci_dev, 505 pci_free_consistent(lp->pci_dev,
510 sizeof(struct pcnet32_tx_head) * 506 sizeof(struct pcnet32_tx_head) * lp->tx_ring_size,
511 lp->tx_ring_size, lp->tx_ring, 507 lp->tx_ring, lp->tx_ring_dma_addr);
512 lp->tx_ring_dma_addr);
513 508
514 lp->tx_ring_size = (1 << size); 509 lp->tx_ring_size = entries;
515 lp->tx_mod_mask = lp->tx_ring_size - 1; 510 lp->tx_mod_mask = lp->tx_ring_size - 1;
516 lp->tx_len_bits = (size << 12); 511 lp->tx_len_bits = (size << 12);
517 lp->tx_ring = new_tx_ring; 512 lp->tx_ring = new_tx_ring;
@@ -524,8 +519,7 @@ free_new_lists:
524 kfree(new_dma_addr_list); 519 kfree(new_dma_addr_list);
525free_new_tx_ring: 520free_new_tx_ring:
526 pci_free_consistent(lp->pci_dev, 521 pci_free_consistent(lp->pci_dev,
527 sizeof(struct pcnet32_tx_head) * 522 sizeof(struct pcnet32_tx_head) * entries,
528 (1 << size),
529 new_tx_ring, 523 new_tx_ring,
530 new_ring_dma_addr); 524 new_ring_dma_addr);
531} 525}
@@ -549,17 +543,14 @@ static void pcnet32_realloc_rx_ring(struct net_device *dev,
549 struct pcnet32_rx_head *new_rx_ring; 543 struct pcnet32_rx_head *new_rx_ring;
550 struct sk_buff **new_skb_list; 544 struct sk_buff **new_skb_list;
551 int new, overlap; 545 int new, overlap;
552 unsigned int entries = 1 << size; 546 unsigned int entries = BIT(size);
553 547
554 new_rx_ring = pci_alloc_consistent(lp->pci_dev, 548 new_rx_ring =
555 sizeof(struct pcnet32_rx_head) * 549 pci_zalloc_consistent(lp->pci_dev,
556 entries, 550 sizeof(struct pcnet32_rx_head) * entries,
557 &new_ring_dma_addr); 551 &new_ring_dma_addr);
558 if (new_rx_ring == NULL) { 552 if (new_rx_ring == NULL)
559 netif_err(lp, drv, dev, "Consistent memory allocation failed\n");
560 return; 553 return;
561 }
562 memset(new_rx_ring, 0, sizeof(struct pcnet32_rx_head) * entries);
563 554
564 new_dma_addr_list = kcalloc(entries, sizeof(dma_addr_t), GFP_ATOMIC); 555 new_dma_addr_list = kcalloc(entries, sizeof(dma_addr_t), GFP_ATOMIC);
565 if (!new_dma_addr_list) 556 if (!new_dma_addr_list)
diff --git a/drivers/net/ethernet/atheros/atl1e/atl1e_main.c b/drivers/net/ethernet/atheros/atl1e/atl1e_main.c
index 4345332533ad..316e0c3fe048 100644
--- a/drivers/net/ethernet/atheros/atl1e/atl1e_main.c
+++ b/drivers/net/ethernet/atheros/atl1e/atl1e_main.c
@@ -831,17 +831,14 @@ static int atl1e_setup_ring_resources(struct atl1e_adapter *adapter)
831 /* real ring DMA buffer */ 831 /* real ring DMA buffer */
832 832
833 size = adapter->ring_size; 833 size = adapter->ring_size;
834 adapter->ring_vir_addr = pci_alloc_consistent(pdev, 834 adapter->ring_vir_addr = pci_zalloc_consistent(pdev, adapter->ring_size,
835 adapter->ring_size, &adapter->ring_dma); 835 &adapter->ring_dma);
836
837 if (adapter->ring_vir_addr == NULL) { 836 if (adapter->ring_vir_addr == NULL) {
838 netdev_err(adapter->netdev, 837 netdev_err(adapter->netdev,
839 "pci_alloc_consistent failed, size = D%d\n", size); 838 "pci_alloc_consistent failed, size = D%d\n", size);
840 return -ENOMEM; 839 return -ENOMEM;
841 } 840 }
842 841
843 memset(adapter->ring_vir_addr, 0, adapter->ring_size);
844
845 rx_page_desc = rx_ring->rx_page_desc; 842 rx_page_desc = rx_ring->rx_page_desc;
846 843
847 /* Init TPD Ring */ 844 /* Init TPD Ring */
diff --git a/drivers/net/ethernet/cisco/enic/vnic_dev.c b/drivers/net/ethernet/cisco/enic/vnic_dev.c
index 5abc496bcf29..37472ce4fac3 100644
--- a/drivers/net/ethernet/cisco/enic/vnic_dev.c
+++ b/drivers/net/ethernet/cisco/enic/vnic_dev.c
@@ -432,14 +432,12 @@ int vnic_dev_fw_info(struct vnic_dev *vdev,
432 int err = 0; 432 int err = 0;
433 433
434 if (!vdev->fw_info) { 434 if (!vdev->fw_info) {
435 vdev->fw_info = pci_alloc_consistent(vdev->pdev, 435 vdev->fw_info = pci_zalloc_consistent(vdev->pdev,
436 sizeof(struct vnic_devcmd_fw_info), 436 sizeof(struct vnic_devcmd_fw_info),
437 &vdev->fw_info_pa); 437 &vdev->fw_info_pa);
438 if (!vdev->fw_info) 438 if (!vdev->fw_info)
439 return -ENOMEM; 439 return -ENOMEM;
440 440
441 memset(vdev->fw_info, 0, sizeof(struct vnic_devcmd_fw_info));
442
443 a0 = vdev->fw_info_pa; 441 a0 = vdev->fw_info_pa;
444 a1 = sizeof(struct vnic_devcmd_fw_info); 442 a1 = sizeof(struct vnic_devcmd_fw_info);
445 443
diff --git a/drivers/net/ethernet/marvell/sky2.c b/drivers/net/ethernet/marvell/sky2.c
index 69693384b58c..59915144aabb 100644
--- a/drivers/net/ethernet/marvell/sky2.c
+++ b/drivers/net/ethernet/marvell/sky2.c
@@ -1622,11 +1622,10 @@ static int sky2_alloc_buffers(struct sky2_port *sky2)
1622 if (!sky2->tx_ring) 1622 if (!sky2->tx_ring)
1623 goto nomem; 1623 goto nomem;
1624 1624
1625 sky2->rx_le = pci_alloc_consistent(hw->pdev, RX_LE_BYTES, 1625 sky2->rx_le = pci_zalloc_consistent(hw->pdev, RX_LE_BYTES,
1626 &sky2->rx_le_map); 1626 &sky2->rx_le_map);
1627 if (!sky2->rx_le) 1627 if (!sky2->rx_le)
1628 goto nomem; 1628 goto nomem;
1629 memset(sky2->rx_le, 0, RX_LE_BYTES);
1630 1629
1631 sky2->rx_ring = kcalloc(sky2->rx_pending, sizeof(struct rx_ring_info), 1630 sky2->rx_ring = kcalloc(sky2->rx_pending, sizeof(struct rx_ring_info),
1632 GFP_KERNEL); 1631 GFP_KERNEL);
diff --git a/drivers/net/ethernet/micrel/ksz884x.c b/drivers/net/ethernet/micrel/ksz884x.c
index 064a48d0c368..cd5f106306d9 100644
--- a/drivers/net/ethernet/micrel/ksz884x.c
+++ b/drivers/net/ethernet/micrel/ksz884x.c
@@ -4409,14 +4409,13 @@ static int ksz_alloc_desc(struct dev_info *adapter)
4409 DESC_ALIGNMENT; 4409 DESC_ALIGNMENT;
4410 4410
4411 adapter->desc_pool.alloc_virt = 4411 adapter->desc_pool.alloc_virt =
4412 pci_alloc_consistent( 4412 pci_zalloc_consistent(adapter->pdev,
4413 adapter->pdev, adapter->desc_pool.alloc_size, 4413 adapter->desc_pool.alloc_size,
4414 &adapter->desc_pool.dma_addr); 4414 &adapter->desc_pool.dma_addr);
4415 if (adapter->desc_pool.alloc_virt == NULL) { 4415 if (adapter->desc_pool.alloc_virt == NULL) {
4416 adapter->desc_pool.alloc_size = 0; 4416 adapter->desc_pool.alloc_size = 0;
4417 return 1; 4417 return 1;
4418 } 4418 }
4419 memset(adapter->desc_pool.alloc_virt, 0, adapter->desc_pool.alloc_size);
4420 4419
4421 /* Align to the next cache line boundary. */ 4420 /* Align to the next cache line boundary. */
4422 offset = (((ulong) adapter->desc_pool.alloc_virt % DESC_ALIGNMENT) ? 4421 offset = (((ulong) adapter->desc_pool.alloc_virt % DESC_ALIGNMENT) ?
diff --git a/drivers/net/ethernet/qlogic/netxen/netxen_nic_ctx.c b/drivers/net/ethernet/qlogic/netxen/netxen_nic_ctx.c
index 6f6be57f4690..b8d5270359cd 100644
--- a/drivers/net/ethernet/qlogic/netxen/netxen_nic_ctx.c
+++ b/drivers/net/ethernet/qlogic/netxen/netxen_nic_ctx.c
@@ -129,14 +129,12 @@ netxen_get_minidump_template(struct netxen_adapter *adapter)
129 return NX_RCODE_INVALID_ARGS; 129 return NX_RCODE_INVALID_ARGS;
130 } 130 }
131 131
132 addr = pci_alloc_consistent(adapter->pdev, size, &md_template_addr); 132 addr = pci_zalloc_consistent(adapter->pdev, size, &md_template_addr);
133
134 if (!addr) { 133 if (!addr) {
135 dev_err(&adapter->pdev->dev, "Unable to allocate dmable memory for template.\n"); 134 dev_err(&adapter->pdev->dev, "Unable to allocate dmable memory for template.\n");
136 return -ENOMEM; 135 return -ENOMEM;
137 } 136 }
138 137
139 memset(addr, 0, size);
140 memset(&cmd, 0, sizeof(cmd)); 138 memset(&cmd, 0, sizeof(cmd));
141 memset(&cmd.rsp, 1, sizeof(struct _cdrp_cmd)); 139 memset(&cmd.rsp, 1, sizeof(struct _cdrp_cmd));
142 cmd.req.cmd = NX_CDRP_CMD_GET_TEMP_HDR; 140 cmd.req.cmd = NX_CDRP_CMD_GET_TEMP_HDR;
diff --git a/drivers/net/ethernet/qlogic/qlge/qlge_main.c b/drivers/net/ethernet/qlogic/qlge/qlge_main.c
index b40050e03a56..d836ace52277 100644
--- a/drivers/net/ethernet/qlogic/qlge/qlge_main.c
+++ b/drivers/net/ethernet/qlogic/qlge/qlge_main.c
@@ -2727,23 +2727,22 @@ static void ql_free_shadow_space(struct ql_adapter *qdev)
2727static int ql_alloc_shadow_space(struct ql_adapter *qdev) 2727static int ql_alloc_shadow_space(struct ql_adapter *qdev)
2728{ 2728{
2729 qdev->rx_ring_shadow_reg_area = 2729 qdev->rx_ring_shadow_reg_area =
2730 pci_alloc_consistent(qdev->pdev, 2730 pci_zalloc_consistent(qdev->pdev, PAGE_SIZE,
2731 PAGE_SIZE, &qdev->rx_ring_shadow_reg_dma); 2731 &qdev->rx_ring_shadow_reg_dma);
2732 if (qdev->rx_ring_shadow_reg_area == NULL) { 2732 if (qdev->rx_ring_shadow_reg_area == NULL) {
2733 netif_err(qdev, ifup, qdev->ndev, 2733 netif_err(qdev, ifup, qdev->ndev,
2734 "Allocation of RX shadow space failed.\n"); 2734 "Allocation of RX shadow space failed.\n");
2735 return -ENOMEM; 2735 return -ENOMEM;
2736 } 2736 }
2737 memset(qdev->rx_ring_shadow_reg_area, 0, PAGE_SIZE); 2737
2738 qdev->tx_ring_shadow_reg_area = 2738 qdev->tx_ring_shadow_reg_area =
2739 pci_alloc_consistent(qdev->pdev, PAGE_SIZE, 2739 pci_zalloc_consistent(qdev->pdev, PAGE_SIZE,
2740 &qdev->tx_ring_shadow_reg_dma); 2740 &qdev->tx_ring_shadow_reg_dma);
2741 if (qdev->tx_ring_shadow_reg_area == NULL) { 2741 if (qdev->tx_ring_shadow_reg_area == NULL) {
2742 netif_err(qdev, ifup, qdev->ndev, 2742 netif_err(qdev, ifup, qdev->ndev,
2743 "Allocation of TX shadow space failed.\n"); 2743 "Allocation of TX shadow space failed.\n");
2744 goto err_wqp_sh_area; 2744 goto err_wqp_sh_area;
2745 } 2745 }
2746 memset(qdev->tx_ring_shadow_reg_area, 0, PAGE_SIZE);
2747 return 0; 2746 return 0;
2748 2747
2749err_wqp_sh_area: 2748err_wqp_sh_area:
diff --git a/drivers/net/irda/vlsi_ir.c b/drivers/net/irda/vlsi_ir.c
index 485006604bbc..58ef59469dd0 100644
--- a/drivers/net/irda/vlsi_ir.c
+++ b/drivers/net/irda/vlsi_ir.c
@@ -485,13 +485,13 @@ static int vlsi_create_hwif(vlsi_irda_dev_t *idev)
485 idev->virtaddr = NULL; 485 idev->virtaddr = NULL;
486 idev->busaddr = 0; 486 idev->busaddr = 0;
487 487
488 ringarea = pci_alloc_consistent(idev->pdev, HW_RING_AREA_SIZE, &idev->busaddr); 488 ringarea = pci_zalloc_consistent(idev->pdev, HW_RING_AREA_SIZE,
489 &idev->busaddr);
489 if (!ringarea) { 490 if (!ringarea) {
490 IRDA_ERROR("%s: insufficient memory for descriptor rings\n", 491 IRDA_ERROR("%s: insufficient memory for descriptor rings\n",
491 __func__); 492 __func__);
492 goto out; 493 goto out;
493 } 494 }
494 memset(ringarea, 0, HW_RING_AREA_SIZE);
495 495
496 hwmap = (struct ring_descr_hw *)ringarea; 496 hwmap = (struct ring_descr_hw *)ringarea;
497 idev->rx_ring = vlsi_alloc_ring(idev->pdev, hwmap, ringsize[1], 497 idev->rx_ring = vlsi_alloc_ring(idev->pdev, hwmap, ringsize[1],
diff --git a/drivers/net/wireless/ipw2x00/ipw2100.c b/drivers/net/wireless/ipw2x00/ipw2100.c
index dfc6dfc56d52..1ab8e500fb77 100644
--- a/drivers/net/wireless/ipw2x00/ipw2100.c
+++ b/drivers/net/wireless/ipw2x00/ipw2100.c
@@ -3449,8 +3449,9 @@ static int ipw2100_msg_allocate(struct ipw2100_priv *priv)
3449 return -ENOMEM; 3449 return -ENOMEM;
3450 3450
3451 for (i = 0; i < IPW_COMMAND_POOL_SIZE; i++) { 3451 for (i = 0; i < IPW_COMMAND_POOL_SIZE; i++) {
3452 v = pci_alloc_consistent(priv->pci_dev, 3452 v = pci_zalloc_consistent(priv->pci_dev,
3453 sizeof(struct ipw2100_cmd_header), &p); 3453 sizeof(struct ipw2100_cmd_header),
3454 &p);
3454 if (!v) { 3455 if (!v) {
3455 printk(KERN_ERR DRV_NAME ": " 3456 printk(KERN_ERR DRV_NAME ": "
3456 "%s: PCI alloc failed for msg " 3457 "%s: PCI alloc failed for msg "
@@ -3459,8 +3460,6 @@ static int ipw2100_msg_allocate(struct ipw2100_priv *priv)
3459 break; 3460 break;
3460 } 3461 }
3461 3462
3462 memset(v, 0, sizeof(struct ipw2100_cmd_header));
3463
3464 priv->msg_buffers[i].type = COMMAND; 3463 priv->msg_buffers[i].type = COMMAND;
3465 priv->msg_buffers[i].info.c_struct.cmd = 3464 priv->msg_buffers[i].info.c_struct.cmd =
3466 (struct ipw2100_cmd_header *)v; 3465 (struct ipw2100_cmd_header *)v;
@@ -4336,16 +4335,12 @@ static int status_queue_allocate(struct ipw2100_priv *priv, int entries)
4336 IPW_DEBUG_INFO("enter\n"); 4335 IPW_DEBUG_INFO("enter\n");
4337 4336
4338 q->size = entries * sizeof(struct ipw2100_status); 4337 q->size = entries * sizeof(struct ipw2100_status);
4339 q->drv = 4338 q->drv = pci_zalloc_consistent(priv->pci_dev, q->size, &q->nic);
4340 (struct ipw2100_status *)pci_alloc_consistent(priv->pci_dev,
4341 q->size, &q->nic);
4342 if (!q->drv) { 4339 if (!q->drv) {
4343 IPW_DEBUG_WARNING("Can not allocate status queue.\n"); 4340 IPW_DEBUG_WARNING("Can not allocate status queue.\n");
4344 return -ENOMEM; 4341 return -ENOMEM;
4345 } 4342 }
4346 4343
4347 memset(q->drv, 0, q->size);
4348
4349 IPW_DEBUG_INFO("exit\n"); 4344 IPW_DEBUG_INFO("exit\n");
4350 4345
4351 return 0; 4346 return 0;
@@ -4374,13 +4369,12 @@ static int bd_queue_allocate(struct ipw2100_priv *priv,
4374 4369
4375 q->entries = entries; 4370 q->entries = entries;
4376 q->size = entries * sizeof(struct ipw2100_bd); 4371 q->size = entries * sizeof(struct ipw2100_bd);
4377 q->drv = pci_alloc_consistent(priv->pci_dev, q->size, &q->nic); 4372 q->drv = pci_zalloc_consistent(priv->pci_dev, q->size, &q->nic);
4378 if (!q->drv) { 4373 if (!q->drv) {
4379 IPW_DEBUG_INFO 4374 IPW_DEBUG_INFO
4380 ("can't allocate shared memory for buffer descriptors\n"); 4375 ("can't allocate shared memory for buffer descriptors\n");
4381 return -ENOMEM; 4376 return -ENOMEM;
4382 } 4377 }
4383 memset(q->drv, 0, q->size);
4384 4378
4385 IPW_DEBUG_INFO("exit\n"); 4379 IPW_DEBUG_INFO("exit\n");
4386 4380
diff --git a/drivers/net/wireless/mwl8k.c b/drivers/net/wireless/mwl8k.c
index 9a3d4d6724f7..fc6cb215e761 100644
--- a/drivers/net/wireless/mwl8k.c
+++ b/drivers/net/wireless/mwl8k.c
@@ -1159,12 +1159,11 @@ static int mwl8k_rxq_init(struct ieee80211_hw *hw, int index)
1159 1159
1160 size = MWL8K_RX_DESCS * priv->rxd_ops->rxd_size; 1160 size = MWL8K_RX_DESCS * priv->rxd_ops->rxd_size;
1161 1161
1162 rxq->rxd = pci_alloc_consistent(priv->pdev, size, &rxq->rxd_dma); 1162 rxq->rxd = pci_zalloc_consistent(priv->pdev, size, &rxq->rxd_dma);
1163 if (rxq->rxd == NULL) { 1163 if (rxq->rxd == NULL) {
1164 wiphy_err(hw->wiphy, "failed to alloc RX descriptors\n"); 1164 wiphy_err(hw->wiphy, "failed to alloc RX descriptors\n");
1165 return -ENOMEM; 1165 return -ENOMEM;
1166 } 1166 }
1167 memset(rxq->rxd, 0, size);
1168 1167
1169 rxq->buf = kcalloc(MWL8K_RX_DESCS, sizeof(*rxq->buf), GFP_KERNEL); 1168 rxq->buf = kcalloc(MWL8K_RX_DESCS, sizeof(*rxq->buf), GFP_KERNEL);
1170 if (rxq->buf == NULL) { 1169 if (rxq->buf == NULL) {
@@ -1451,12 +1450,11 @@ static int mwl8k_txq_init(struct ieee80211_hw *hw, int index)
1451 1450
1452 size = MWL8K_TX_DESCS * sizeof(struct mwl8k_tx_desc); 1451 size = MWL8K_TX_DESCS * sizeof(struct mwl8k_tx_desc);
1453 1452
1454 txq->txd = pci_alloc_consistent(priv->pdev, size, &txq->txd_dma); 1453 txq->txd = pci_zalloc_consistent(priv->pdev, size, &txq->txd_dma);
1455 if (txq->txd == NULL) { 1454 if (txq->txd == NULL) {
1456 wiphy_err(hw->wiphy, "failed to alloc TX descriptors\n"); 1455 wiphy_err(hw->wiphy, "failed to alloc TX descriptors\n");
1457 return -ENOMEM; 1456 return -ENOMEM;
1458 } 1457 }
1459 memset(txq->txd, 0, size);
1460 1458
1461 txq->skb = kcalloc(MWL8K_TX_DESCS, sizeof(*txq->skb), GFP_KERNEL); 1459 txq->skb = kcalloc(MWL8K_TX_DESCS, sizeof(*txq->skb), GFP_KERNEL);
1462 if (txq->skb == NULL) { 1460 if (txq->skb == NULL) {
diff --git a/drivers/net/wireless/rtl818x/rtl8180/dev.c b/drivers/net/wireless/rtl818x/rtl8180/dev.c
index 4b904f708184..fcc45e5bf50a 100644
--- a/drivers/net/wireless/rtl818x/rtl8180/dev.c
+++ b/drivers/net/wireless/rtl818x/rtl8180/dev.c
@@ -972,16 +972,13 @@ static int rtl8180_init_rx_ring(struct ieee80211_hw *dev)
972 else 972 else
973 priv->rx_ring_sz = sizeof(struct rtl8180_rx_desc); 973 priv->rx_ring_sz = sizeof(struct rtl8180_rx_desc);
974 974
975 priv->rx_ring = pci_alloc_consistent(priv->pdev, 975 priv->rx_ring = pci_zalloc_consistent(priv->pdev, priv->rx_ring_sz * 32,
976 priv->rx_ring_sz * 32, 976 &priv->rx_ring_dma);
977 &priv->rx_ring_dma);
978
979 if (!priv->rx_ring || (unsigned long)priv->rx_ring & 0xFF) { 977 if (!priv->rx_ring || (unsigned long)priv->rx_ring & 0xFF) {
980 wiphy_err(dev->wiphy, "Cannot allocate RX ring\n"); 978 wiphy_err(dev->wiphy, "Cannot allocate RX ring\n");
981 return -ENOMEM; 979 return -ENOMEM;
982 } 980 }
983 981
984 memset(priv->rx_ring, 0, priv->rx_ring_sz * 32);
985 priv->rx_idx = 0; 982 priv->rx_idx = 0;
986 983
987 for (i = 0; i < 32; i++) { 984 for (i = 0; i < 32; i++) {
@@ -1040,14 +1037,14 @@ static int rtl8180_init_tx_ring(struct ieee80211_hw *dev,
1040 dma_addr_t dma; 1037 dma_addr_t dma;
1041 int i; 1038 int i;
1042 1039
1043 ring = pci_alloc_consistent(priv->pdev, sizeof(*ring) * entries, &dma); 1040 ring = pci_zalloc_consistent(priv->pdev, sizeof(*ring) * entries,
1041 &dma);
1044 if (!ring || (unsigned long)ring & 0xFF) { 1042 if (!ring || (unsigned long)ring & 0xFF) {
1045 wiphy_err(dev->wiphy, "Cannot allocate TX ring (prio = %d)\n", 1043 wiphy_err(dev->wiphy, "Cannot allocate TX ring (prio = %d)\n",
1046 prio); 1044 prio);
1047 return -ENOMEM; 1045 return -ENOMEM;
1048 } 1046 }
1049 1047
1050 memset(ring, 0, sizeof(*ring)*entries);
1051 priv->tx_ring[prio].desc = ring; 1048 priv->tx_ring[prio].desc = ring;
1052 priv->tx_ring[prio].dma = dma; 1049 priv->tx_ring[prio].dma = dma;
1053 priv->tx_ring[prio].idx = 0; 1050 priv->tx_ring[prio].idx = 0;
diff --git a/drivers/net/wireless/rtlwifi/pci.c b/drivers/net/wireless/rtlwifi/pci.c
index dae55257f0e8..67d1ee6edcad 100644
--- a/drivers/net/wireless/rtlwifi/pci.c
+++ b/drivers/net/wireless/rtlwifi/pci.c
@@ -1092,16 +1092,14 @@ static int _rtl_pci_init_tx_ring(struct ieee80211_hw *hw,
1092 u32 nextdescaddress; 1092 u32 nextdescaddress;
1093 int i; 1093 int i;
1094 1094
1095 ring = pci_alloc_consistent(rtlpci->pdev, 1095 ring = pci_zalloc_consistent(rtlpci->pdev, sizeof(*ring) * entries,
1096 sizeof(*ring) * entries, &dma); 1096 &dma);
1097
1098 if (!ring || (unsigned long)ring & 0xFF) { 1097 if (!ring || (unsigned long)ring & 0xFF) {
1099 RT_TRACE(rtlpriv, COMP_ERR, DBG_EMERG, 1098 RT_TRACE(rtlpriv, COMP_ERR, DBG_EMERG,
1100 "Cannot allocate TX ring (prio = %d)\n", prio); 1099 "Cannot allocate TX ring (prio = %d)\n", prio);
1101 return -ENOMEM; 1100 return -ENOMEM;
1102 } 1101 }
1103 1102
1104 memset(ring, 0, sizeof(*ring) * entries);
1105 rtlpci->tx_ring[prio].desc = ring; 1103 rtlpci->tx_ring[prio].desc = ring;
1106 rtlpci->tx_ring[prio].dma = dma; 1104 rtlpci->tx_ring[prio].dma = dma;
1107 rtlpci->tx_ring[prio].idx = 0; 1105 rtlpci->tx_ring[prio].idx = 0;
@@ -1139,10 +1137,9 @@ static int _rtl_pci_init_rx_ring(struct ieee80211_hw *hw)
1139 for (rx_queue_idx = 0; rx_queue_idx < RTL_PCI_MAX_RX_QUEUE; 1137 for (rx_queue_idx = 0; rx_queue_idx < RTL_PCI_MAX_RX_QUEUE;
1140 rx_queue_idx++) { 1138 rx_queue_idx++) {
1141 rtlpci->rx_ring[rx_queue_idx].desc = 1139 rtlpci->rx_ring[rx_queue_idx].desc =
1142 pci_alloc_consistent(rtlpci->pdev, 1140 pci_zalloc_consistent(rtlpci->pdev,
1143 sizeof(*rtlpci->rx_ring[rx_queue_idx]. 1141 sizeof(*rtlpci->rx_ring[rx_queue_idx].desc) * rtlpci->rxringcount,
1144 desc) * rtlpci->rxringcount, 1142 &rtlpci->rx_ring[rx_queue_idx].dma);
1145 &rtlpci->rx_ring[rx_queue_idx].dma);
1146 1143
1147 if (!rtlpci->rx_ring[rx_queue_idx].desc || 1144 if (!rtlpci->rx_ring[rx_queue_idx].desc ||
1148 (unsigned long)rtlpci->rx_ring[rx_queue_idx].desc & 0xFF) { 1145 (unsigned long)rtlpci->rx_ring[rx_queue_idx].desc & 0xFF) {
@@ -1151,10 +1148,6 @@ static int _rtl_pci_init_rx_ring(struct ieee80211_hw *hw)
1151 return -ENOMEM; 1148 return -ENOMEM;
1152 } 1149 }
1153 1150
1154 memset(rtlpci->rx_ring[rx_queue_idx].desc, 0,
1155 sizeof(*rtlpci->rx_ring[rx_queue_idx].desc) *
1156 rtlpci->rxringcount);
1157
1158 rtlpci->rx_ring[rx_queue_idx].idx = 0; 1151 rtlpci->rx_ring[rx_queue_idx].idx = 0;
1159 1152
1160 /* If amsdu_8k is disabled, set buffersize to 4096. This 1153 /* If amsdu_8k is disabled, set buffersize to 4096. This
diff --git a/drivers/parport/parport_ip32.c b/drivers/parport/parport_ip32.c
index c864f82bd37d..30e981be14c2 100644
--- a/drivers/parport/parport_ip32.c
+++ b/drivers/parport/parport_ip32.c
@@ -2204,7 +2204,7 @@ static int __init parport_ip32_init(void)
2204{ 2204{
2205 pr_info(PPIP32 "SGI IP32 built-in parallel port driver v0.6\n"); 2205 pr_info(PPIP32 "SGI IP32 built-in parallel port driver v0.6\n");
2206 this_port = parport_ip32_probe_port(); 2206 this_port = parport_ip32_probe_port();
2207 return IS_ERR(this_port) ? PTR_ERR(this_port) : 0; 2207 return PTR_ERR_OR_ZERO(this_port);
2208} 2208}
2209 2209
2210/** 2210/**
diff --git a/drivers/rapidio/devices/tsi721.h b/drivers/rapidio/devices/tsi721.h
index 0305675270ee..a7b42680a06a 100644
--- a/drivers/rapidio/devices/tsi721.h
+++ b/drivers/rapidio/devices/tsi721.h
@@ -644,27 +644,26 @@ enum tsi721_smsg_int_flag {
644 644
645#ifdef CONFIG_RAPIDIO_DMA_ENGINE 645#ifdef CONFIG_RAPIDIO_DMA_ENGINE
646 646
647#define TSI721_BDMA_BD_RING_SZ 128
648#define TSI721_BDMA_MAX_BCOUNT (TSI721_DMAD_BCOUNT1 + 1) 647#define TSI721_BDMA_MAX_BCOUNT (TSI721_DMAD_BCOUNT1 + 1)
649 648
650struct tsi721_tx_desc { 649struct tsi721_tx_desc {
651 struct dma_async_tx_descriptor txd; 650 struct dma_async_tx_descriptor txd;
652 struct tsi721_dma_desc *hw_desc;
653 u16 destid; 651 u16 destid;
654 /* low 64-bits of 66-bit RIO address */ 652 /* low 64-bits of 66-bit RIO address */
655 u64 rio_addr; 653 u64 rio_addr;
656 /* upper 2-bits of 66-bit RIO address */ 654 /* upper 2-bits of 66-bit RIO address */
657 u8 rio_addr_u; 655 u8 rio_addr_u;
658 u32 bcount; 656 enum dma_rtype rtype;
659 bool interrupt;
660 struct list_head desc_node; 657 struct list_head desc_node;
661 struct list_head tx_list; 658 struct scatterlist *sg;
659 unsigned int sg_len;
660 enum dma_status status;
662}; 661};
663 662
664struct tsi721_bdma_chan { 663struct tsi721_bdma_chan {
665 int id; 664 int id;
666 void __iomem *regs; 665 void __iomem *regs;
667 int bd_num; /* number of buffer descriptors */ 666 int bd_num; /* number of HW buffer descriptors */
668 void *bd_base; /* start of DMA descriptors */ 667 void *bd_base; /* start of DMA descriptors */
669 dma_addr_t bd_phys; 668 dma_addr_t bd_phys;
670 void *sts_base; /* start of DMA BD status FIFO */ 669 void *sts_base; /* start of DMA BD status FIFO */
@@ -680,7 +679,6 @@ struct tsi721_bdma_chan {
680 struct list_head active_list; 679 struct list_head active_list;
681 struct list_head queue; 680 struct list_head queue;
682 struct list_head free_list; 681 struct list_head free_list;
683 dma_cookie_t completed_cookie;
684 struct tasklet_struct tasklet; 682 struct tasklet_struct tasklet;
685 bool active; 683 bool active;
686}; 684};
diff --git a/drivers/rapidio/devices/tsi721_dma.c b/drivers/rapidio/devices/tsi721_dma.c
index 44341dc5b148..f64c5decb747 100644
--- a/drivers/rapidio/devices/tsi721_dma.c
+++ b/drivers/rapidio/devices/tsi721_dma.c
@@ -1,7 +1,7 @@
1/* 1/*
2 * DMA Engine support for Tsi721 PCIExpress-to-SRIO bridge 2 * DMA Engine support for Tsi721 PCIExpress-to-SRIO bridge
3 * 3 *
4 * Copyright 2011 Integrated Device Technology, Inc. 4 * Copyright (c) 2011-2014 Integrated Device Technology, Inc.
5 * Alexandre Bounine <alexandre.bounine@idt.com> 5 * Alexandre Bounine <alexandre.bounine@idt.com>
6 * 6 *
7 * This program is free software; you can redistribute it and/or modify it 7 * This program is free software; you can redistribute it and/or modify it
@@ -14,9 +14,8 @@
14 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for 14 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
15 * more details. 15 * more details.
16 * 16 *
17 * You should have received a copy of the GNU General Public License along with 17 * The full GNU General Public License is included in this distribution in the
18 * this program; if not, write to the Free Software Foundation, Inc., 59 18 * file called COPYING.
19 * Temple Place - Suite 330, Boston, MA 02111-1307, USA.
20 */ 19 */
21 20
22#include <linux/io.h> 21#include <linux/io.h>
@@ -32,9 +31,22 @@
32#include <linux/interrupt.h> 31#include <linux/interrupt.h>
33#include <linux/kfifo.h> 32#include <linux/kfifo.h>
34#include <linux/delay.h> 33#include <linux/delay.h>
34#include "../../dma/dmaengine.h"
35 35
36#include "tsi721.h" 36#include "tsi721.h"
37 37
38#define TSI721_DMA_TX_QUEUE_SZ 16 /* number of transaction descriptors */
39
40#ifdef CONFIG_PCI_MSI
41static irqreturn_t tsi721_bdma_msix(int irq, void *ptr);
42#endif
43static int tsi721_submit_sg(struct tsi721_tx_desc *desc);
44
45static unsigned int dma_desc_per_channel = 128;
46module_param(dma_desc_per_channel, uint, S_IWUSR | S_IRUGO);
47MODULE_PARM_DESC(dma_desc_per_channel,
48 "Number of DMA descriptors per channel (default: 128)");
49
38static inline struct tsi721_bdma_chan *to_tsi721_chan(struct dma_chan *chan) 50static inline struct tsi721_bdma_chan *to_tsi721_chan(struct dma_chan *chan)
39{ 51{
40 return container_of(chan, struct tsi721_bdma_chan, dchan); 52 return container_of(chan, struct tsi721_bdma_chan, dchan);
@@ -59,7 +71,7 @@ struct tsi721_tx_desc *tsi721_dma_first_active(
59 struct tsi721_tx_desc, desc_node); 71 struct tsi721_tx_desc, desc_node);
60} 72}
61 73
62static int tsi721_bdma_ch_init(struct tsi721_bdma_chan *bdma_chan) 74static int tsi721_bdma_ch_init(struct tsi721_bdma_chan *bdma_chan, int bd_num)
63{ 75{
64 struct tsi721_dma_desc *bd_ptr; 76 struct tsi721_dma_desc *bd_ptr;
65 struct device *dev = bdma_chan->dchan.device->dev; 77 struct device *dev = bdma_chan->dchan.device->dev;
@@ -67,17 +79,23 @@ static int tsi721_bdma_ch_init(struct tsi721_bdma_chan *bdma_chan)
67 dma_addr_t bd_phys; 79 dma_addr_t bd_phys;
68 dma_addr_t sts_phys; 80 dma_addr_t sts_phys;
69 int sts_size; 81 int sts_size;
70 int bd_num = bdma_chan->bd_num; 82#ifdef CONFIG_PCI_MSI
83 struct tsi721_device *priv = to_tsi721(bdma_chan->dchan.device);
84#endif
71 85
72 dev_dbg(dev, "Init Block DMA Engine, CH%d\n", bdma_chan->id); 86 dev_dbg(dev, "Init Block DMA Engine, CH%d\n", bdma_chan->id);
73 87
74 /* Allocate space for DMA descriptors */ 88 /*
89 * Allocate space for DMA descriptors
90 * (add an extra element for link descriptor)
91 */
75 bd_ptr = dma_zalloc_coherent(dev, 92 bd_ptr = dma_zalloc_coherent(dev,
76 bd_num * sizeof(struct tsi721_dma_desc), 93 (bd_num + 1) * sizeof(struct tsi721_dma_desc),
77 &bd_phys, GFP_KERNEL); 94 &bd_phys, GFP_KERNEL);
78 if (!bd_ptr) 95 if (!bd_ptr)
79 return -ENOMEM; 96 return -ENOMEM;
80 97
98 bdma_chan->bd_num = bd_num;
81 bdma_chan->bd_phys = bd_phys; 99 bdma_chan->bd_phys = bd_phys;
82 bdma_chan->bd_base = bd_ptr; 100 bdma_chan->bd_base = bd_ptr;
83 101
@@ -85,8 +103,8 @@ static int tsi721_bdma_ch_init(struct tsi721_bdma_chan *bdma_chan)
85 bd_ptr, (unsigned long long)bd_phys); 103 bd_ptr, (unsigned long long)bd_phys);
86 104
87 /* Allocate space for descriptor status FIFO */ 105 /* Allocate space for descriptor status FIFO */
88 sts_size = (bd_num >= TSI721_DMA_MINSTSSZ) ? 106 sts_size = ((bd_num + 1) >= TSI721_DMA_MINSTSSZ) ?
89 bd_num : TSI721_DMA_MINSTSSZ; 107 (bd_num + 1) : TSI721_DMA_MINSTSSZ;
90 sts_size = roundup_pow_of_two(sts_size); 108 sts_size = roundup_pow_of_two(sts_size);
91 sts_ptr = dma_zalloc_coherent(dev, 109 sts_ptr = dma_zalloc_coherent(dev,
92 sts_size * sizeof(struct tsi721_dma_sts), 110 sts_size * sizeof(struct tsi721_dma_sts),
@@ -94,7 +112,7 @@ static int tsi721_bdma_ch_init(struct tsi721_bdma_chan *bdma_chan)
94 if (!sts_ptr) { 112 if (!sts_ptr) {
95 /* Free space allocated for DMA descriptors */ 113 /* Free space allocated for DMA descriptors */
96 dma_free_coherent(dev, 114 dma_free_coherent(dev,
97 bd_num * sizeof(struct tsi721_dma_desc), 115 (bd_num + 1) * sizeof(struct tsi721_dma_desc),
98 bd_ptr, bd_phys); 116 bd_ptr, bd_phys);
99 bdma_chan->bd_base = NULL; 117 bdma_chan->bd_base = NULL;
100 return -ENOMEM; 118 return -ENOMEM;
@@ -108,11 +126,11 @@ static int tsi721_bdma_ch_init(struct tsi721_bdma_chan *bdma_chan)
108 "desc status FIFO @ %p (phys = %llx) size=0x%x\n", 126 "desc status FIFO @ %p (phys = %llx) size=0x%x\n",
109 sts_ptr, (unsigned long long)sts_phys, sts_size); 127 sts_ptr, (unsigned long long)sts_phys, sts_size);
110 128
111 /* Initialize DMA descriptors ring */ 129 /* Initialize DMA descriptors ring using added link descriptor */
112 bd_ptr[bd_num - 1].type_id = cpu_to_le32(DTYPE3 << 29); 130 bd_ptr[bd_num].type_id = cpu_to_le32(DTYPE3 << 29);
113 bd_ptr[bd_num - 1].next_lo = cpu_to_le32((u64)bd_phys & 131 bd_ptr[bd_num].next_lo = cpu_to_le32((u64)bd_phys &
114 TSI721_DMAC_DPTRL_MASK); 132 TSI721_DMAC_DPTRL_MASK);
115 bd_ptr[bd_num - 1].next_hi = cpu_to_le32((u64)bd_phys >> 32); 133 bd_ptr[bd_num].next_hi = cpu_to_le32((u64)bd_phys >> 32);
116 134
117 /* Setup DMA descriptor pointers */ 135 /* Setup DMA descriptor pointers */
118 iowrite32(((u64)bd_phys >> 32), 136 iowrite32(((u64)bd_phys >> 32),
@@ -134,6 +152,55 @@ static int tsi721_bdma_ch_init(struct tsi721_bdma_chan *bdma_chan)
134 152
135 ioread32(bdma_chan->regs + TSI721_DMAC_INT); 153 ioread32(bdma_chan->regs + TSI721_DMAC_INT);
136 154
155#ifdef CONFIG_PCI_MSI
156 /* Request interrupt service if we are in MSI-X mode */
157 if (priv->flags & TSI721_USING_MSIX) {
158 int rc, idx;
159
160 idx = TSI721_VECT_DMA0_DONE + bdma_chan->id;
161
162 rc = request_irq(priv->msix[idx].vector, tsi721_bdma_msix, 0,
163 priv->msix[idx].irq_name, (void *)bdma_chan);
164
165 if (rc) {
166 dev_dbg(dev, "Unable to get MSI-X for BDMA%d-DONE\n",
167 bdma_chan->id);
168 goto err_out;
169 }
170
171 idx = TSI721_VECT_DMA0_INT + bdma_chan->id;
172
173 rc = request_irq(priv->msix[idx].vector, tsi721_bdma_msix, 0,
174 priv->msix[idx].irq_name, (void *)bdma_chan);
175
176 if (rc) {
177 dev_dbg(dev, "Unable to get MSI-X for BDMA%d-INT\n",
178 bdma_chan->id);
179 free_irq(
180 priv->msix[TSI721_VECT_DMA0_DONE +
181 bdma_chan->id].vector,
182 (void *)bdma_chan);
183 }
184
185err_out:
186 if (rc) {
187 /* Free space allocated for DMA descriptors */
188 dma_free_coherent(dev,
189 (bd_num + 1) * sizeof(struct tsi721_dma_desc),
190 bd_ptr, bd_phys);
191 bdma_chan->bd_base = NULL;
192
193 /* Free space allocated for status descriptors */
194 dma_free_coherent(dev,
195 sts_size * sizeof(struct tsi721_dma_sts),
196 sts_ptr, sts_phys);
197 bdma_chan->sts_base = NULL;
198
199 return -EIO;
200 }
201 }
202#endif /* CONFIG_PCI_MSI */
203
137 /* Toggle DMA channel initialization */ 204 /* Toggle DMA channel initialization */
138 iowrite32(TSI721_DMAC_CTL_INIT, bdma_chan->regs + TSI721_DMAC_CTL); 205 iowrite32(TSI721_DMAC_CTL_INIT, bdma_chan->regs + TSI721_DMAC_CTL);
139 ioread32(bdma_chan->regs + TSI721_DMAC_CTL); 206 ioread32(bdma_chan->regs + TSI721_DMAC_CTL);
@@ -147,6 +214,9 @@ static int tsi721_bdma_ch_init(struct tsi721_bdma_chan *bdma_chan)
147static int tsi721_bdma_ch_free(struct tsi721_bdma_chan *bdma_chan) 214static int tsi721_bdma_ch_free(struct tsi721_bdma_chan *bdma_chan)
148{ 215{
149 u32 ch_stat; 216 u32 ch_stat;
217#ifdef CONFIG_PCI_MSI
218 struct tsi721_device *priv = to_tsi721(bdma_chan->dchan.device);
219#endif
150 220
151 if (bdma_chan->bd_base == NULL) 221 if (bdma_chan->bd_base == NULL)
152 return 0; 222 return 0;
@@ -159,9 +229,18 @@ static int tsi721_bdma_ch_free(struct tsi721_bdma_chan *bdma_chan)
159 /* Put DMA channel into init state */ 229 /* Put DMA channel into init state */
160 iowrite32(TSI721_DMAC_CTL_INIT, bdma_chan->regs + TSI721_DMAC_CTL); 230 iowrite32(TSI721_DMAC_CTL_INIT, bdma_chan->regs + TSI721_DMAC_CTL);
161 231
232#ifdef CONFIG_PCI_MSI
233 if (priv->flags & TSI721_USING_MSIX) {
234 free_irq(priv->msix[TSI721_VECT_DMA0_DONE +
235 bdma_chan->id].vector, (void *)bdma_chan);
236 free_irq(priv->msix[TSI721_VECT_DMA0_INT +
237 bdma_chan->id].vector, (void *)bdma_chan);
238 }
239#endif /* CONFIG_PCI_MSI */
240
162 /* Free space allocated for DMA descriptors */ 241 /* Free space allocated for DMA descriptors */
163 dma_free_coherent(bdma_chan->dchan.device->dev, 242 dma_free_coherent(bdma_chan->dchan.device->dev,
164 bdma_chan->bd_num * sizeof(struct tsi721_dma_desc), 243 (bdma_chan->bd_num + 1) * sizeof(struct tsi721_dma_desc),
165 bdma_chan->bd_base, bdma_chan->bd_phys); 244 bdma_chan->bd_base, bdma_chan->bd_phys);
166 bdma_chan->bd_base = NULL; 245 bdma_chan->bd_base = NULL;
167 246
@@ -243,8 +322,8 @@ static void tsi721_start_dma(struct tsi721_bdma_chan *bdma_chan)
243 } 322 }
244 323
245 dev_dbg(bdma_chan->dchan.device->dev, 324 dev_dbg(bdma_chan->dchan.device->dev,
246 "tx_chan: %p, chan: %d, regs: %p\n", 325 "%s: chan_%d (wrc=%d)\n", __func__, bdma_chan->id,
247 bdma_chan, bdma_chan->dchan.chan_id, bdma_chan->regs); 326 bdma_chan->wr_count_next);
248 327
249 iowrite32(bdma_chan->wr_count_next, 328 iowrite32(bdma_chan->wr_count_next,
250 bdma_chan->regs + TSI721_DMAC_DWRCNT); 329 bdma_chan->regs + TSI721_DMAC_DWRCNT);
@@ -253,72 +332,19 @@ static void tsi721_start_dma(struct tsi721_bdma_chan *bdma_chan)
253 bdma_chan->wr_count = bdma_chan->wr_count_next; 332 bdma_chan->wr_count = bdma_chan->wr_count_next;
254} 333}
255 334
256static void tsi721_desc_put(struct tsi721_bdma_chan *bdma_chan,
257 struct tsi721_tx_desc *desc)
258{
259 dev_dbg(bdma_chan->dchan.device->dev,
260 "Put desc: %p into free list\n", desc);
261
262 if (desc) {
263 spin_lock_bh(&bdma_chan->lock);
264 list_splice_init(&desc->tx_list, &bdma_chan->free_list);
265 list_add(&desc->desc_node, &bdma_chan->free_list);
266 bdma_chan->wr_count_next = bdma_chan->wr_count;
267 spin_unlock_bh(&bdma_chan->lock);
268 }
269}
270
271static
272struct tsi721_tx_desc *tsi721_desc_get(struct tsi721_bdma_chan *bdma_chan)
273{
274 struct tsi721_tx_desc *tx_desc, *_tx_desc;
275 struct tsi721_tx_desc *ret = NULL;
276 int i;
277
278 spin_lock_bh(&bdma_chan->lock);
279 list_for_each_entry_safe(tx_desc, _tx_desc,
280 &bdma_chan->free_list, desc_node) {
281 if (async_tx_test_ack(&tx_desc->txd)) {
282 list_del(&tx_desc->desc_node);
283 ret = tx_desc;
284 break;
285 }
286 dev_dbg(bdma_chan->dchan.device->dev,
287 "desc %p not ACKed\n", tx_desc);
288 }
289
290 if (ret == NULL) {
291 dev_dbg(bdma_chan->dchan.device->dev,
292 "%s: unable to obtain tx descriptor\n", __func__);
293 goto err_out;
294 }
295
296 i = bdma_chan->wr_count_next % bdma_chan->bd_num;
297 if (i == bdma_chan->bd_num - 1) {
298 i = 0;
299 bdma_chan->wr_count_next++; /* skip link descriptor */
300 }
301
302 bdma_chan->wr_count_next++;
303 tx_desc->txd.phys = bdma_chan->bd_phys +
304 i * sizeof(struct tsi721_dma_desc);
305 tx_desc->hw_desc = &((struct tsi721_dma_desc *)bdma_chan->bd_base)[i];
306err_out:
307 spin_unlock_bh(&bdma_chan->lock);
308
309 return ret;
310}
311
312static int 335static int
313tsi721_desc_fill_init(struct tsi721_tx_desc *desc, struct scatterlist *sg, 336tsi721_desc_fill_init(struct tsi721_tx_desc *desc,
314 enum dma_rtype rtype, u32 sys_size) 337 struct tsi721_dma_desc *bd_ptr,
338 struct scatterlist *sg, u32 sys_size)
315{ 339{
316 struct tsi721_dma_desc *bd_ptr = desc->hw_desc;
317 u64 rio_addr; 340 u64 rio_addr;
318 341
342 if (bd_ptr == NULL)
343 return -EINVAL;
344
319 /* Initialize DMA descriptor */ 345 /* Initialize DMA descriptor */
320 bd_ptr->type_id = cpu_to_le32((DTYPE1 << 29) | 346 bd_ptr->type_id = cpu_to_le32((DTYPE1 << 29) |
321 (rtype << 19) | desc->destid); 347 (desc->rtype << 19) | desc->destid);
322 bd_ptr->bcount = cpu_to_le32(((desc->rio_addr & 0x3) << 30) | 348 bd_ptr->bcount = cpu_to_le32(((desc->rio_addr & 0x3) << 30) |
323 (sys_size << 26)); 349 (sys_size << 26));
324 rio_addr = (desc->rio_addr >> 2) | 350 rio_addr = (desc->rio_addr >> 2) |
@@ -335,51 +361,32 @@ tsi721_desc_fill_init(struct tsi721_tx_desc *desc, struct scatterlist *sg,
335} 361}
336 362
337static int 363static int
338tsi721_desc_fill_end(struct tsi721_tx_desc *desc) 364tsi721_desc_fill_end(struct tsi721_dma_desc *bd_ptr, u32 bcount, bool interrupt)
339{ 365{
340 struct tsi721_dma_desc *bd_ptr = desc->hw_desc; 366 if (bd_ptr == NULL)
367 return -EINVAL;
341 368
342 /* Update DMA descriptor */ 369 /* Update DMA descriptor */
343 if (desc->interrupt) 370 if (interrupt)
344 bd_ptr->type_id |= cpu_to_le32(TSI721_DMAD_IOF); 371 bd_ptr->type_id |= cpu_to_le32(TSI721_DMAD_IOF);
345 bd_ptr->bcount |= cpu_to_le32(desc->bcount & TSI721_DMAD_BCOUNT1); 372 bd_ptr->bcount |= cpu_to_le32(bcount & TSI721_DMAD_BCOUNT1);
346 373
347 return 0; 374 return 0;
348} 375}
349 376
350 377static void tsi721_dma_tx_err(struct tsi721_bdma_chan *bdma_chan,
351static void tsi721_dma_chain_complete(struct tsi721_bdma_chan *bdma_chan, 378 struct tsi721_tx_desc *desc)
352 struct tsi721_tx_desc *desc)
353{ 379{
354 struct dma_async_tx_descriptor *txd = &desc->txd; 380 struct dma_async_tx_descriptor *txd = &desc->txd;
355 dma_async_tx_callback callback = txd->callback; 381 dma_async_tx_callback callback = txd->callback;
356 void *param = txd->callback_param; 382 void *param = txd->callback_param;
357 383
358 list_splice_init(&desc->tx_list, &bdma_chan->free_list);
359 list_move(&desc->desc_node, &bdma_chan->free_list); 384 list_move(&desc->desc_node, &bdma_chan->free_list);
360 bdma_chan->completed_cookie = txd->cookie;
361 385
362 if (callback) 386 if (callback)
363 callback(param); 387 callback(param);
364} 388}
365 389
366static void tsi721_dma_complete_all(struct tsi721_bdma_chan *bdma_chan)
367{
368 struct tsi721_tx_desc *desc, *_d;
369 LIST_HEAD(list);
370
371 BUG_ON(!tsi721_dma_is_idle(bdma_chan));
372
373 if (!list_empty(&bdma_chan->queue))
374 tsi721_start_dma(bdma_chan);
375
376 list_splice_init(&bdma_chan->active_list, &list);
377 list_splice_init(&bdma_chan->queue, &bdma_chan->active_list);
378
379 list_for_each_entry_safe(desc, _d, &list, desc_node)
380 tsi721_dma_chain_complete(bdma_chan, desc);
381}
382
383static void tsi721_clr_stat(struct tsi721_bdma_chan *bdma_chan) 390static void tsi721_clr_stat(struct tsi721_bdma_chan *bdma_chan)
384{ 391{
385 u32 srd_ptr; 392 u32 srd_ptr;
@@ -403,20 +410,159 @@ static void tsi721_clr_stat(struct tsi721_bdma_chan *bdma_chan)
403 bdma_chan->sts_rdptr = srd_ptr; 410 bdma_chan->sts_rdptr = srd_ptr;
404} 411}
405 412
413/* Must be called with the channel spinlock held */
414static int tsi721_submit_sg(struct tsi721_tx_desc *desc)
415{
416 struct dma_chan *dchan = desc->txd.chan;
417 struct tsi721_bdma_chan *bdma_chan = to_tsi721_chan(dchan);
418 u32 sys_size;
419 u64 rio_addr;
420 dma_addr_t next_addr;
421 u32 bcount;
422 struct scatterlist *sg;
423 unsigned int i;
424 int err = 0;
425 struct tsi721_dma_desc *bd_ptr = NULL;
426 u32 idx, rd_idx;
427 u32 add_count = 0;
428
429 if (!tsi721_dma_is_idle(bdma_chan)) {
430 dev_err(bdma_chan->dchan.device->dev,
431 "BUG: Attempt to use non-idle channel\n");
432 return -EIO;
433 }
434
435 /*
436 * Fill DMA channel's hardware buffer descriptors.
437 * (NOTE: RapidIO destination address is limited to 64 bits for now)
438 */
439 rio_addr = desc->rio_addr;
440 next_addr = -1;
441 bcount = 0;
442 sys_size = dma_to_mport(bdma_chan->dchan.device)->sys_size;
443
444 rd_idx = ioread32(bdma_chan->regs + TSI721_DMAC_DRDCNT);
445 rd_idx %= (bdma_chan->bd_num + 1);
446
447 idx = bdma_chan->wr_count_next % (bdma_chan->bd_num + 1);
448 if (idx == bdma_chan->bd_num) {
449 /* wrap around link descriptor */
450 idx = 0;
451 add_count++;
452 }
453
454 dev_dbg(dchan->device->dev, "%s: BD ring status: rdi=%d wri=%d\n",
455 __func__, rd_idx, idx);
456
457 for_each_sg(desc->sg, sg, desc->sg_len, i) {
458
459 dev_dbg(dchan->device->dev, "sg%d/%d addr: 0x%llx len: %d\n",
460 i, desc->sg_len,
461 (unsigned long long)sg_dma_address(sg), sg_dma_len(sg));
462
463 if (sg_dma_len(sg) > TSI721_BDMA_MAX_BCOUNT) {
464 dev_err(dchan->device->dev,
465 "%s: SG entry %d is too large\n", __func__, i);
466 err = -EINVAL;
467 break;
468 }
469
470 /*
471 * If this sg entry forms contiguous block with previous one,
472 * try to merge it into existing DMA descriptor
473 */
474 if (next_addr == sg_dma_address(sg) &&
475 bcount + sg_dma_len(sg) <= TSI721_BDMA_MAX_BCOUNT) {
476 /* Adjust byte count of the descriptor */
477 bcount += sg_dma_len(sg);
478 goto entry_done;
479 } else if (next_addr != -1) {
480 /* Finalize descriptor using total byte count value */
481 tsi721_desc_fill_end(bd_ptr, bcount, 0);
482 dev_dbg(dchan->device->dev,
483 "%s: prev desc final len: %d\n",
484 __func__, bcount);
485 }
486
487 desc->rio_addr = rio_addr;
488
489 if (i && idx == rd_idx) {
490 dev_dbg(dchan->device->dev,
491 "%s: HW descriptor ring is full @ %d\n",
492 __func__, i);
493 desc->sg = sg;
494 desc->sg_len -= i;
495 break;
496 }
497
498 bd_ptr = &((struct tsi721_dma_desc *)bdma_chan->bd_base)[idx];
499 err = tsi721_desc_fill_init(desc, bd_ptr, sg, sys_size);
500 if (err) {
501 dev_err(dchan->device->dev,
502 "Failed to build desc: err=%d\n", err);
503 break;
504 }
505
506 dev_dbg(dchan->device->dev, "bd_ptr = %p did=%d raddr=0x%llx\n",
507 bd_ptr, desc->destid, desc->rio_addr);
508
509 next_addr = sg_dma_address(sg);
510 bcount = sg_dma_len(sg);
511
512 add_count++;
513 if (++idx == bdma_chan->bd_num) {
514 /* wrap around link descriptor */
515 idx = 0;
516 add_count++;
517 }
518
519entry_done:
520 if (sg_is_last(sg)) {
521 tsi721_desc_fill_end(bd_ptr, bcount, 0);
522 dev_dbg(dchan->device->dev, "%s: last desc final len: %d\n",
523 __func__, bcount);
524 desc->sg_len = 0;
525 } else {
526 rio_addr += sg_dma_len(sg);
527 next_addr += sg_dma_len(sg);
528 }
529 }
530
531 if (!err)
532 bdma_chan->wr_count_next += add_count;
533
534 return err;
535}
536
406static void tsi721_advance_work(struct tsi721_bdma_chan *bdma_chan) 537static void tsi721_advance_work(struct tsi721_bdma_chan *bdma_chan)
407{ 538{
408 if (list_empty(&bdma_chan->active_list) || 539 struct tsi721_tx_desc *desc;
409 list_is_singular(&bdma_chan->active_list)) { 540 int err;
410 dev_dbg(bdma_chan->dchan.device->dev, 541
411 "%s: Active_list empty\n", __func__); 542 dev_dbg(bdma_chan->dchan.device->dev, "%s: Enter\n", __func__);
412 tsi721_dma_complete_all(bdma_chan); 543
413 } else { 544 /*
414 dev_dbg(bdma_chan->dchan.device->dev, 545 * If there are any new transactions in the queue add them
415 "%s: Active_list NOT empty\n", __func__); 546 * into the processing list
416 tsi721_dma_chain_complete(bdma_chan, 547 */
417 tsi721_dma_first_active(bdma_chan)); 548 if (!list_empty(&bdma_chan->queue))
418 tsi721_start_dma(bdma_chan); 549 list_splice_init(&bdma_chan->queue, &bdma_chan->active_list);
550
551 /* Start new transaction (if available) */
552 if (!list_empty(&bdma_chan->active_list)) {
553 desc = tsi721_dma_first_active(bdma_chan);
554 err = tsi721_submit_sg(desc);
555 if (!err)
556 tsi721_start_dma(bdma_chan);
557 else {
558 tsi721_dma_tx_err(bdma_chan, desc);
559 dev_dbg(bdma_chan->dchan.device->dev,
560 "ERR: tsi721_submit_sg failed with err=%d\n",
561 err);
562 }
419 } 563 }
564
565 dev_dbg(bdma_chan->dchan.device->dev, "%s: Exit\n", __func__);
420} 566}
421 567
422static void tsi721_dma_tasklet(unsigned long data) 568static void tsi721_dma_tasklet(unsigned long data)
@@ -444,8 +590,29 @@ static void tsi721_dma_tasklet(unsigned long data)
444 } 590 }
445 591
446 if (dmac_int & (TSI721_DMAC_INT_DONE | TSI721_DMAC_INT_IOFDONE)) { 592 if (dmac_int & (TSI721_DMAC_INT_DONE | TSI721_DMAC_INT_IOFDONE)) {
593 struct tsi721_tx_desc *desc;
594
447 tsi721_clr_stat(bdma_chan); 595 tsi721_clr_stat(bdma_chan);
448 spin_lock(&bdma_chan->lock); 596 spin_lock(&bdma_chan->lock);
597 desc = tsi721_dma_first_active(bdma_chan);
598
599 if (desc->sg_len == 0) {
600 dma_async_tx_callback callback = NULL;
601 void *param = NULL;
602
603 desc->status = DMA_COMPLETE;
604 dma_cookie_complete(&desc->txd);
605 if (desc->txd.flags & DMA_PREP_INTERRUPT) {
606 callback = desc->txd.callback;
607 param = desc->txd.callback_param;
608 }
609 list_move(&desc->desc_node, &bdma_chan->free_list);
610 spin_unlock(&bdma_chan->lock);
611 if (callback)
612 callback(param);
613 spin_lock(&bdma_chan->lock);
614 }
615
449 tsi721_advance_work(bdma_chan); 616 tsi721_advance_work(bdma_chan);
450 spin_unlock(&bdma_chan->lock); 617 spin_unlock(&bdma_chan->lock);
451 } 618 }
@@ -460,21 +627,24 @@ static dma_cookie_t tsi721_tx_submit(struct dma_async_tx_descriptor *txd)
460 struct tsi721_bdma_chan *bdma_chan = to_tsi721_chan(txd->chan); 627 struct tsi721_bdma_chan *bdma_chan = to_tsi721_chan(txd->chan);
461 dma_cookie_t cookie; 628 dma_cookie_t cookie;
462 629
463 spin_lock_bh(&bdma_chan->lock); 630 /* Check if the descriptor is detached from any lists */
631 if (!list_empty(&desc->desc_node)) {
632 dev_err(bdma_chan->dchan.device->dev,
633 "%s: wrong state of descriptor %p\n", __func__, txd);
634 return -EIO;
635 }
464 636
465 cookie = txd->chan->cookie; 637 spin_lock_bh(&bdma_chan->lock);
466 if (++cookie < 0)
467 cookie = 1;
468 txd->chan->cookie = cookie;
469 txd->cookie = cookie;
470 638
471 if (list_empty(&bdma_chan->active_list)) { 639 if (!bdma_chan->active) {
472 list_add_tail(&desc->desc_node, &bdma_chan->active_list); 640 spin_unlock_bh(&bdma_chan->lock);
473 tsi721_start_dma(bdma_chan); 641 return -ENODEV;
474 } else {
475 list_add_tail(&desc->desc_node, &bdma_chan->queue);
476 } 642 }
477 643
644 cookie = dma_cookie_assign(txd);
645 desc->status = DMA_IN_PROGRESS;
646 list_add_tail(&desc->desc_node, &bdma_chan->queue);
647
478 spin_unlock_bh(&bdma_chan->lock); 648 spin_unlock_bh(&bdma_chan->lock);
479 return cookie; 649 return cookie;
480} 650}
@@ -482,115 +652,52 @@ static dma_cookie_t tsi721_tx_submit(struct dma_async_tx_descriptor *txd)
482static int tsi721_alloc_chan_resources(struct dma_chan *dchan) 652static int tsi721_alloc_chan_resources(struct dma_chan *dchan)
483{ 653{
484 struct tsi721_bdma_chan *bdma_chan = to_tsi721_chan(dchan); 654 struct tsi721_bdma_chan *bdma_chan = to_tsi721_chan(dchan);
485#ifdef CONFIG_PCI_MSI
486 struct tsi721_device *priv = to_tsi721(dchan->device);
487#endif
488 struct tsi721_tx_desc *desc = NULL; 655 struct tsi721_tx_desc *desc = NULL;
489 LIST_HEAD(tmp_list);
490 int i; 656 int i;
491 int rc; 657
658 dev_dbg(dchan->device->dev, "%s: for channel %d\n",
659 __func__, bdma_chan->id);
492 660
493 if (bdma_chan->bd_base) 661 if (bdma_chan->bd_base)
494 return bdma_chan->bd_num - 1; 662 return TSI721_DMA_TX_QUEUE_SZ;
495 663
496 /* Initialize BDMA channel */ 664 /* Initialize BDMA channel */
497 if (tsi721_bdma_ch_init(bdma_chan)) { 665 if (tsi721_bdma_ch_init(bdma_chan, dma_desc_per_channel)) {
498 dev_err(dchan->device->dev, "Unable to initialize data DMA" 666 dev_err(dchan->device->dev, "Unable to initialize data DMA"
499 " channel %d, aborting\n", bdma_chan->id); 667 " channel %d, aborting\n", bdma_chan->id);
500 return -ENOMEM; 668 return -ENODEV;
501 } 669 }
502 670
503 /* Alocate matching number of logical descriptors */ 671 /* Allocate queue of transaction descriptors */
504 desc = kcalloc((bdma_chan->bd_num - 1), sizeof(struct tsi721_tx_desc), 672 desc = kcalloc(TSI721_DMA_TX_QUEUE_SZ, sizeof(struct tsi721_tx_desc),
505 GFP_KERNEL); 673 GFP_KERNEL);
506 if (!desc) { 674 if (!desc) {
507 dev_err(dchan->device->dev, 675 dev_err(dchan->device->dev,
508 "Failed to allocate logical descriptors\n"); 676 "Failed to allocate logical descriptors\n");
509 rc = -ENOMEM; 677 tsi721_bdma_ch_free(bdma_chan);
510 goto err_out; 678 return -ENOMEM;
511 } 679 }
512 680
513 bdma_chan->tx_desc = desc; 681 bdma_chan->tx_desc = desc;
514 682
515 for (i = 0; i < bdma_chan->bd_num - 1; i++) { 683 for (i = 0; i < TSI721_DMA_TX_QUEUE_SZ; i++) {
516 dma_async_tx_descriptor_init(&desc[i].txd, dchan); 684 dma_async_tx_descriptor_init(&desc[i].txd, dchan);
517 desc[i].txd.tx_submit = tsi721_tx_submit; 685 desc[i].txd.tx_submit = tsi721_tx_submit;
518 desc[i].txd.flags = DMA_CTRL_ACK; 686 desc[i].txd.flags = DMA_CTRL_ACK;
519 INIT_LIST_HEAD(&desc[i].tx_list); 687 list_add(&desc[i].desc_node, &bdma_chan->free_list);
520 list_add_tail(&desc[i].desc_node, &tmp_list);
521 } 688 }
522 689
523 spin_lock_bh(&bdma_chan->lock); 690 dma_cookie_init(dchan);
524 list_splice(&tmp_list, &bdma_chan->free_list);
525 bdma_chan->completed_cookie = dchan->cookie = 1;
526 spin_unlock_bh(&bdma_chan->lock);
527
528#ifdef CONFIG_PCI_MSI
529 if (priv->flags & TSI721_USING_MSIX) {
530 /* Request interrupt service if we are in MSI-X mode */
531 rc = request_irq(
532 priv->msix[TSI721_VECT_DMA0_DONE +
533 bdma_chan->id].vector,
534 tsi721_bdma_msix, 0,
535 priv->msix[TSI721_VECT_DMA0_DONE +
536 bdma_chan->id].irq_name,
537 (void *)bdma_chan);
538
539 if (rc) {
540 dev_dbg(dchan->device->dev,
541 "Unable to allocate MSI-X interrupt for "
542 "BDMA%d-DONE\n", bdma_chan->id);
543 goto err_out;
544 }
545
546 rc = request_irq(priv->msix[TSI721_VECT_DMA0_INT +
547 bdma_chan->id].vector,
548 tsi721_bdma_msix, 0,
549 priv->msix[TSI721_VECT_DMA0_INT +
550 bdma_chan->id].irq_name,
551 (void *)bdma_chan);
552
553 if (rc) {
554 dev_dbg(dchan->device->dev,
555 "Unable to allocate MSI-X interrupt for "
556 "BDMA%d-INT\n", bdma_chan->id);
557 free_irq(
558 priv->msix[TSI721_VECT_DMA0_DONE +
559 bdma_chan->id].vector,
560 (void *)bdma_chan);
561 rc = -EIO;
562 goto err_out;
563 }
564 }
565#endif /* CONFIG_PCI_MSI */
566 691
567 bdma_chan->active = true; 692 bdma_chan->active = true;
568 tsi721_bdma_interrupt_enable(bdma_chan, 1); 693 tsi721_bdma_interrupt_enable(bdma_chan, 1);
569 694
570 return bdma_chan->bd_num - 1; 695 return TSI721_DMA_TX_QUEUE_SZ;
571
572err_out:
573 kfree(desc);
574 tsi721_bdma_ch_free(bdma_chan);
575 return rc;
576} 696}
577 697
578static void tsi721_free_chan_resources(struct dma_chan *dchan) 698static void tsi721_sync_dma_irq(struct tsi721_bdma_chan *bdma_chan)
579{ 699{
580 struct tsi721_bdma_chan *bdma_chan = to_tsi721_chan(dchan); 700 struct tsi721_device *priv = to_tsi721(bdma_chan->dchan.device);
581 struct tsi721_device *priv = to_tsi721(dchan->device);
582 LIST_HEAD(list);
583
584 dev_dbg(dchan->device->dev, "%s: Entry\n", __func__);
585
586 if (bdma_chan->bd_base == NULL)
587 return;
588
589 BUG_ON(!list_empty(&bdma_chan->active_list));
590 BUG_ON(!list_empty(&bdma_chan->queue));
591
592 tsi721_bdma_interrupt_enable(bdma_chan, 0);
593 bdma_chan->active = false;
594 701
595#ifdef CONFIG_PCI_MSI 702#ifdef CONFIG_PCI_MSI
596 if (priv->flags & TSI721_USING_MSIX) { 703 if (priv->flags & TSI721_USING_MSIX) {
@@ -601,64 +708,48 @@ static void tsi721_free_chan_resources(struct dma_chan *dchan)
601 } else 708 } else
602#endif 709#endif
603 synchronize_irq(priv->pdev->irq); 710 synchronize_irq(priv->pdev->irq);
711}
604 712
605 tasklet_kill(&bdma_chan->tasklet); 713static void tsi721_free_chan_resources(struct dma_chan *dchan)
714{
715 struct tsi721_bdma_chan *bdma_chan = to_tsi721_chan(dchan);
606 716
607 spin_lock_bh(&bdma_chan->lock); 717 dev_dbg(dchan->device->dev, "%s: for channel %d\n",
608 list_splice_init(&bdma_chan->free_list, &list); 718 __func__, bdma_chan->id);
609 spin_unlock_bh(&bdma_chan->lock);
610 719
611#ifdef CONFIG_PCI_MSI 720 if (bdma_chan->bd_base == NULL)
612 if (priv->flags & TSI721_USING_MSIX) { 721 return;
613 free_irq(priv->msix[TSI721_VECT_DMA0_DONE +
614 bdma_chan->id].vector, (void *)bdma_chan);
615 free_irq(priv->msix[TSI721_VECT_DMA0_INT +
616 bdma_chan->id].vector, (void *)bdma_chan);
617 }
618#endif /* CONFIG_PCI_MSI */
619 722
620 tsi721_bdma_ch_free(bdma_chan); 723 BUG_ON(!list_empty(&bdma_chan->active_list));
724 BUG_ON(!list_empty(&bdma_chan->queue));
725
726 tsi721_bdma_interrupt_enable(bdma_chan, 0);
727 bdma_chan->active = false;
728 tsi721_sync_dma_irq(bdma_chan);
729 tasklet_kill(&bdma_chan->tasklet);
730 INIT_LIST_HEAD(&bdma_chan->free_list);
621 kfree(bdma_chan->tx_desc); 731 kfree(bdma_chan->tx_desc);
732 tsi721_bdma_ch_free(bdma_chan);
622} 733}
623 734
624static 735static
625enum dma_status tsi721_tx_status(struct dma_chan *dchan, dma_cookie_t cookie, 736enum dma_status tsi721_tx_status(struct dma_chan *dchan, dma_cookie_t cookie,
626 struct dma_tx_state *txstate) 737 struct dma_tx_state *txstate)
627{ 738{
628 struct tsi721_bdma_chan *bdma_chan = to_tsi721_chan(dchan); 739 return dma_cookie_status(dchan, cookie, txstate);
629 dma_cookie_t last_used;
630 dma_cookie_t last_completed;
631 int ret;
632
633 spin_lock_bh(&bdma_chan->lock);
634 last_completed = bdma_chan->completed_cookie;
635 last_used = dchan->cookie;
636 spin_unlock_bh(&bdma_chan->lock);
637
638 ret = dma_async_is_complete(cookie, last_completed, last_used);
639
640 dma_set_tx_state(txstate, last_completed, last_used, 0);
641
642 dev_dbg(dchan->device->dev,
643 "%s: exit, ret: %d, last_completed: %d, last_used: %d\n",
644 __func__, ret, last_completed, last_used);
645
646 return ret;
647} 740}
648 741
649static void tsi721_issue_pending(struct dma_chan *dchan) 742static void tsi721_issue_pending(struct dma_chan *dchan)
650{ 743{
651 struct tsi721_bdma_chan *bdma_chan = to_tsi721_chan(dchan); 744 struct tsi721_bdma_chan *bdma_chan = to_tsi721_chan(dchan);
652 745
653 dev_dbg(dchan->device->dev, "%s: Entry\n", __func__); 746 dev_dbg(dchan->device->dev, "%s: Enter\n", __func__);
654 747
655 if (tsi721_dma_is_idle(bdma_chan)) { 748 if (tsi721_dma_is_idle(bdma_chan) && bdma_chan->active) {
656 spin_lock_bh(&bdma_chan->lock); 749 spin_lock_bh(&bdma_chan->lock);
657 tsi721_advance_work(bdma_chan); 750 tsi721_advance_work(bdma_chan);
658 spin_unlock_bh(&bdma_chan->lock); 751 spin_unlock_bh(&bdma_chan->lock);
659 } else 752 }
660 dev_dbg(dchan->device->dev,
661 "%s: DMA channel still busy\n", __func__);
662} 753}
663 754
664static 755static
@@ -668,21 +759,19 @@ struct dma_async_tx_descriptor *tsi721_prep_rio_sg(struct dma_chan *dchan,
668 void *tinfo) 759 void *tinfo)
669{ 760{
670 struct tsi721_bdma_chan *bdma_chan = to_tsi721_chan(dchan); 761 struct tsi721_bdma_chan *bdma_chan = to_tsi721_chan(dchan);
671 struct tsi721_tx_desc *desc = NULL; 762 struct tsi721_tx_desc *desc, *_d;
672 struct tsi721_tx_desc *first = NULL;
673 struct scatterlist *sg;
674 struct rio_dma_ext *rext = tinfo; 763 struct rio_dma_ext *rext = tinfo;
675 u64 rio_addr = rext->rio_addr; /* limited to 64-bit rio_addr for now */
676 unsigned int i;
677 u32 sys_size = dma_to_mport(dchan->device)->sys_size;
678 enum dma_rtype rtype; 764 enum dma_rtype rtype;
679 dma_addr_t next_addr = -1; 765 struct dma_async_tx_descriptor *txd = NULL;
680 766
681 if (!sgl || !sg_len) { 767 if (!sgl || !sg_len) {
682 dev_err(dchan->device->dev, "%s: No SG list\n", __func__); 768 dev_err(dchan->device->dev, "%s: No SG list\n", __func__);
683 return NULL; 769 return NULL;
684 } 770 }
685 771
772 dev_dbg(dchan->device->dev, "%s: %s\n", __func__,
773 (dir == DMA_DEV_TO_MEM)?"READ":"WRITE");
774
686 if (dir == DMA_DEV_TO_MEM) 775 if (dir == DMA_DEV_TO_MEM)
687 rtype = NREAD; 776 rtype = NREAD;
688 else if (dir == DMA_MEM_TO_DEV) { 777 else if (dir == DMA_MEM_TO_DEV) {
@@ -704,97 +793,26 @@ struct dma_async_tx_descriptor *tsi721_prep_rio_sg(struct dma_chan *dchan,
704 return NULL; 793 return NULL;
705 } 794 }
706 795
707 for_each_sg(sgl, sg, sg_len, i) { 796 spin_lock_bh(&bdma_chan->lock);
708 int err;
709
710 if (sg_dma_len(sg) > TSI721_BDMA_MAX_BCOUNT) {
711 dev_err(dchan->device->dev,
712 "%s: SG entry %d is too large\n", __func__, i);
713 goto err_desc_put;
714 }
715
716 /*
717 * If this sg entry forms contiguous block with previous one,
718 * try to merge it into existing DMA descriptor
719 */
720 if (desc) {
721 if (next_addr == sg_dma_address(sg) &&
722 desc->bcount + sg_dma_len(sg) <=
723 TSI721_BDMA_MAX_BCOUNT) {
724 /* Adjust byte count of the descriptor */
725 desc->bcount += sg_dma_len(sg);
726 goto entry_done;
727 }
728
729 /*
730 * Finalize this descriptor using total
731 * byte count value.
732 */
733 tsi721_desc_fill_end(desc);
734 dev_dbg(dchan->device->dev, "%s: desc final len: %d\n",
735 __func__, desc->bcount);
736 }
737
738 /*
739 * Obtain and initialize a new descriptor
740 */
741 desc = tsi721_desc_get(bdma_chan);
742 if (!desc) {
743 dev_err(dchan->device->dev,
744 "%s: Failed to get new descriptor for SG %d\n",
745 __func__, i);
746 goto err_desc_put;
747 }
748
749 desc->destid = rext->destid;
750 desc->rio_addr = rio_addr;
751 desc->rio_addr_u = 0;
752 desc->bcount = sg_dma_len(sg);
753
754 dev_dbg(dchan->device->dev,
755 "sg%d desc: 0x%llx, addr: 0x%llx len: %d\n",
756 i, (u64)desc->txd.phys,
757 (unsigned long long)sg_dma_address(sg),
758 sg_dma_len(sg));
759
760 dev_dbg(dchan->device->dev,
761 "bd_ptr = %p did=%d raddr=0x%llx\n",
762 desc->hw_desc, desc->destid, desc->rio_addr);
763
764 err = tsi721_desc_fill_init(desc, sg, rtype, sys_size);
765 if (err) {
766 dev_err(dchan->device->dev,
767 "Failed to build desc: %d\n", err);
768 goto err_desc_put;
769 }
770
771 next_addr = sg_dma_address(sg);
772
773 if (!first)
774 first = desc;
775 else
776 list_add_tail(&desc->desc_node, &first->tx_list);
777 797
778entry_done: 798 list_for_each_entry_safe(desc, _d, &bdma_chan->free_list, desc_node) {
779 if (sg_is_last(sg)) { 799 if (async_tx_test_ack(&desc->txd)) {
780 desc->interrupt = (flags & DMA_PREP_INTERRUPT) != 0; 800 list_del_init(&desc->desc_node);
781 tsi721_desc_fill_end(desc); 801 desc->destid = rext->destid;
782 dev_dbg(dchan->device->dev, "%s: desc final len: %d\n", 802 desc->rio_addr = rext->rio_addr;
783 __func__, desc->bcount); 803 desc->rio_addr_u = 0;
784 } else { 804 desc->rtype = rtype;
785 rio_addr += sg_dma_len(sg); 805 desc->sg_len = sg_len;
786 next_addr += sg_dma_len(sg); 806 desc->sg = sgl;
807 txd = &desc->txd;
808 txd->flags = flags;
809 break;
787 } 810 }
788 } 811 }
789 812
790 first->txd.cookie = -EBUSY; 813 spin_unlock_bh(&bdma_chan->lock);
791 desc->txd.flags = flags;
792
793 return &first->txd;
794 814
795err_desc_put: 815 return txd;
796 tsi721_desc_put(bdma_chan, first);
797 return NULL;
798} 816}
799 817
800static int tsi721_device_control(struct dma_chan *dchan, enum dma_ctrl_cmd cmd, 818static int tsi721_device_control(struct dma_chan *dchan, enum dma_ctrl_cmd cmd,
@@ -802,23 +820,34 @@ static int tsi721_device_control(struct dma_chan *dchan, enum dma_ctrl_cmd cmd,
802{ 820{
803 struct tsi721_bdma_chan *bdma_chan = to_tsi721_chan(dchan); 821 struct tsi721_bdma_chan *bdma_chan = to_tsi721_chan(dchan);
804 struct tsi721_tx_desc *desc, *_d; 822 struct tsi721_tx_desc *desc, *_d;
823 u32 dmac_int;
805 LIST_HEAD(list); 824 LIST_HEAD(list);
806 825
807 dev_dbg(dchan->device->dev, "%s: Entry\n", __func__); 826 dev_dbg(dchan->device->dev, "%s: Entry\n", __func__);
808 827
809 if (cmd != DMA_TERMINATE_ALL) 828 if (cmd != DMA_TERMINATE_ALL)
810 return -ENXIO; 829 return -ENOSYS;
811 830
812 spin_lock_bh(&bdma_chan->lock); 831 spin_lock_bh(&bdma_chan->lock);
813 832
814 /* make sure to stop the transfer */ 833 bdma_chan->active = false;
815 iowrite32(TSI721_DMAC_CTL_SUSP, bdma_chan->regs + TSI721_DMAC_CTL); 834
835 if (!tsi721_dma_is_idle(bdma_chan)) {
836 /* make sure to stop the transfer */
837 iowrite32(TSI721_DMAC_CTL_SUSP,
838 bdma_chan->regs + TSI721_DMAC_CTL);
839
840 /* Wait until DMA channel stops */
841 do {
842 dmac_int = ioread32(bdma_chan->regs + TSI721_DMAC_INT);
843 } while ((dmac_int & TSI721_DMAC_INT_SUSP) == 0);
844 }
816 845
817 list_splice_init(&bdma_chan->active_list, &list); 846 list_splice_init(&bdma_chan->active_list, &list);
818 list_splice_init(&bdma_chan->queue, &list); 847 list_splice_init(&bdma_chan->queue, &list);
819 848
820 list_for_each_entry_safe(desc, _d, &list, desc_node) 849 list_for_each_entry_safe(desc, _d, &list, desc_node)
821 tsi721_dma_chain_complete(bdma_chan, desc); 850 tsi721_dma_tx_err(bdma_chan, desc);
822 851
823 spin_unlock_bh(&bdma_chan->lock); 852 spin_unlock_bh(&bdma_chan->lock);
824 853
@@ -828,22 +857,18 @@ static int tsi721_device_control(struct dma_chan *dchan, enum dma_ctrl_cmd cmd,
828int tsi721_register_dma(struct tsi721_device *priv) 857int tsi721_register_dma(struct tsi721_device *priv)
829{ 858{
830 int i; 859 int i;
831 int nr_channels = TSI721_DMA_MAXCH; 860 int nr_channels = 0;
832 int err; 861 int err;
833 struct rio_mport *mport = priv->mport; 862 struct rio_mport *mport = priv->mport;
834 863
835 mport->dma.dev = &priv->pdev->dev;
836 mport->dma.chancnt = nr_channels;
837
838 INIT_LIST_HEAD(&mport->dma.channels); 864 INIT_LIST_HEAD(&mport->dma.channels);
839 865
840 for (i = 0; i < nr_channels; i++) { 866 for (i = 0; i < TSI721_DMA_MAXCH; i++) {
841 struct tsi721_bdma_chan *bdma_chan = &priv->bdma[i]; 867 struct tsi721_bdma_chan *bdma_chan = &priv->bdma[i];
842 868
843 if (i == TSI721_DMACH_MAINT) 869 if (i == TSI721_DMACH_MAINT)
844 continue; 870 continue;
845 871
846 bdma_chan->bd_num = TSI721_BDMA_BD_RING_SZ;
847 bdma_chan->regs = priv->regs + TSI721_DMAC_BASE(i); 872 bdma_chan->regs = priv->regs + TSI721_DMAC_BASE(i);
848 873
849 bdma_chan->dchan.device = &mport->dma; 874 bdma_chan->dchan.device = &mport->dma;
@@ -862,12 +887,15 @@ int tsi721_register_dma(struct tsi721_device *priv)
862 (unsigned long)bdma_chan); 887 (unsigned long)bdma_chan);
863 list_add_tail(&bdma_chan->dchan.device_node, 888 list_add_tail(&bdma_chan->dchan.device_node,
864 &mport->dma.channels); 889 &mport->dma.channels);
890 nr_channels++;
865 } 891 }
866 892
893 mport->dma.chancnt = nr_channels;
867 dma_cap_zero(mport->dma.cap_mask); 894 dma_cap_zero(mport->dma.cap_mask);
868 dma_cap_set(DMA_PRIVATE, mport->dma.cap_mask); 895 dma_cap_set(DMA_PRIVATE, mport->dma.cap_mask);
869 dma_cap_set(DMA_SLAVE, mport->dma.cap_mask); 896 dma_cap_set(DMA_SLAVE, mport->dma.cap_mask);
870 897
898 mport->dma.dev = &priv->pdev->dev;
871 mport->dma.device_alloc_chan_resources = tsi721_alloc_chan_resources; 899 mport->dma.device_alloc_chan_resources = tsi721_alloc_chan_resources;
872 mport->dma.device_free_chan_resources = tsi721_free_chan_resources; 900 mport->dma.device_free_chan_resources = tsi721_free_chan_resources;
873 mport->dma.device_tx_status = tsi721_tx_status; 901 mport->dma.device_tx_status = tsi721_tx_status;
diff --git a/drivers/rapidio/rio.c b/drivers/rapidio/rio.c
index a54ba0494dd3..d7b87c64b7cd 100644
--- a/drivers/rapidio/rio.c
+++ b/drivers/rapidio/rio.c
@@ -1509,30 +1509,39 @@ EXPORT_SYMBOL_GPL(rio_route_clr_table);
1509 1509
1510static bool rio_chan_filter(struct dma_chan *chan, void *arg) 1510static bool rio_chan_filter(struct dma_chan *chan, void *arg)
1511{ 1511{
1512 struct rio_dev *rdev = arg; 1512 struct rio_mport *mport = arg;
1513 1513
1514 /* Check that DMA device belongs to the right MPORT */ 1514 /* Check that DMA device belongs to the right MPORT */
1515 return (rdev->net->hport == 1515 return mport == container_of(chan->device, struct rio_mport, dma);
1516 container_of(chan->device, struct rio_mport, dma));
1517} 1516}
1518 1517
1519/** 1518/**
1520 * rio_request_dma - request RapidIO capable DMA channel that supports 1519 * rio_request_mport_dma - request RapidIO capable DMA channel associated
1521 * specified target RapidIO device. 1520 * with specified local RapidIO mport device.
1522 * @rdev: RIO device control structure 1521 * @mport: RIO mport to perform DMA data transfers
1523 * 1522 *
1524 * Returns pointer to allocated DMA channel or NULL if failed. 1523 * Returns pointer to allocated DMA channel or NULL if failed.
1525 */ 1524 */
1526struct dma_chan *rio_request_dma(struct rio_dev *rdev) 1525struct dma_chan *rio_request_mport_dma(struct rio_mport *mport)
1527{ 1526{
1528 dma_cap_mask_t mask; 1527 dma_cap_mask_t mask;
1529 struct dma_chan *dchan;
1530 1528
1531 dma_cap_zero(mask); 1529 dma_cap_zero(mask);
1532 dma_cap_set(DMA_SLAVE, mask); 1530 dma_cap_set(DMA_SLAVE, mask);
1533 dchan = dma_request_channel(mask, rio_chan_filter, rdev); 1531 return dma_request_channel(mask, rio_chan_filter, mport);
1532}
1533EXPORT_SYMBOL_GPL(rio_request_mport_dma);
1534 1534
1535 return dchan; 1535/**
1536 * rio_request_dma - request RapidIO capable DMA channel that supports
1537 * specified target RapidIO device.
1538 * @rdev: RIO device associated with DMA transfer
1539 *
1540 * Returns pointer to allocated DMA channel or NULL if failed.
1541 */
1542struct dma_chan *rio_request_dma(struct rio_dev *rdev)
1543{
1544 return rio_request_mport_dma(rdev->net->hport);
1536} 1545}
1537EXPORT_SYMBOL_GPL(rio_request_dma); 1546EXPORT_SYMBOL_GPL(rio_request_dma);
1538 1547
@@ -1547,10 +1556,10 @@ void rio_release_dma(struct dma_chan *dchan)
1547EXPORT_SYMBOL_GPL(rio_release_dma); 1556EXPORT_SYMBOL_GPL(rio_release_dma);
1548 1557
1549/** 1558/**
1550 * rio_dma_prep_slave_sg - RapidIO specific wrapper 1559 * rio_dma_prep_xfer - RapidIO specific wrapper
1551 * for device_prep_slave_sg callback defined by DMAENGINE. 1560 * for device_prep_slave_sg callback defined by DMAENGINE.
1552 * @rdev: RIO device control structure
1553 * @dchan: DMA channel to configure 1561 * @dchan: DMA channel to configure
1562 * @destid: target RapidIO device destination ID
1554 * @data: RIO specific data descriptor 1563 * @data: RIO specific data descriptor
1555 * @direction: DMA data transfer direction (TO or FROM the device) 1564 * @direction: DMA data transfer direction (TO or FROM the device)
1556 * @flags: dmaengine defined flags 1565 * @flags: dmaengine defined flags
@@ -1560,11 +1569,10 @@ EXPORT_SYMBOL_GPL(rio_release_dma);
1560 * target RIO device. 1569 * target RIO device.
1561 * Returns pointer to DMA transaction descriptor or NULL if failed. 1570 * Returns pointer to DMA transaction descriptor or NULL if failed.
1562 */ 1571 */
1563struct dma_async_tx_descriptor *rio_dma_prep_slave_sg(struct rio_dev *rdev, 1572struct dma_async_tx_descriptor *rio_dma_prep_xfer(struct dma_chan *dchan,
1564 struct dma_chan *dchan, struct rio_dma_data *data, 1573 u16 destid, struct rio_dma_data *data,
1565 enum dma_transfer_direction direction, unsigned long flags) 1574 enum dma_transfer_direction direction, unsigned long flags)
1566{ 1575{
1567 struct dma_async_tx_descriptor *txd = NULL;
1568 struct rio_dma_ext rio_ext; 1576 struct rio_dma_ext rio_ext;
1569 1577
1570 if (dchan->device->device_prep_slave_sg == NULL) { 1578 if (dchan->device->device_prep_slave_sg == NULL) {
@@ -1572,15 +1580,35 @@ struct dma_async_tx_descriptor *rio_dma_prep_slave_sg(struct rio_dev *rdev,
1572 return NULL; 1580 return NULL;
1573 } 1581 }
1574 1582
1575 rio_ext.destid = rdev->destid; 1583 rio_ext.destid = destid;
1576 rio_ext.rio_addr_u = data->rio_addr_u; 1584 rio_ext.rio_addr_u = data->rio_addr_u;
1577 rio_ext.rio_addr = data->rio_addr; 1585 rio_ext.rio_addr = data->rio_addr;
1578 rio_ext.wr_type = data->wr_type; 1586 rio_ext.wr_type = data->wr_type;
1579 1587
1580 txd = dmaengine_prep_rio_sg(dchan, data->sg, data->sg_len, 1588 return dmaengine_prep_rio_sg(dchan, data->sg, data->sg_len,
1581 direction, flags, &rio_ext); 1589 direction, flags, &rio_ext);
1590}
1591EXPORT_SYMBOL_GPL(rio_dma_prep_xfer);
1582 1592
1583 return txd; 1593/**
1594 * rio_dma_prep_slave_sg - RapidIO specific wrapper
1595 * for device_prep_slave_sg callback defined by DMAENGINE.
1596 * @rdev: RIO device control structure
1597 * @dchan: DMA channel to configure
1598 * @data: RIO specific data descriptor
1599 * @direction: DMA data transfer direction (TO or FROM the device)
1600 * @flags: dmaengine defined flags
1601 *
1602 * Initializes RapidIO capable DMA channel for the specified data transfer.
1603 * Uses DMA channel private extension to pass information related to remote
1604 * target RIO device.
1605 * Returns pointer to DMA transaction descriptor or NULL if failed.
1606 */
1607struct dma_async_tx_descriptor *rio_dma_prep_slave_sg(struct rio_dev *rdev,
1608 struct dma_chan *dchan, struct rio_dma_data *data,
1609 enum dma_transfer_direction direction, unsigned long flags)
1610{
1611 return rio_dma_prep_xfer(dchan, rdev->destid, data, direction, flags);
1584} 1612}
1585EXPORT_SYMBOL_GPL(rio_dma_prep_slave_sg); 1613EXPORT_SYMBOL_GPL(rio_dma_prep_slave_sg);
1586 1614
diff --git a/drivers/rtc/Kconfig b/drivers/rtc/Kconfig
index 0754f5c7cb3b..a168e96142b9 100644
--- a/drivers/rtc/Kconfig
+++ b/drivers/rtc/Kconfig
@@ -373,6 +373,14 @@ config RTC_DRV_PCF8563
373 This driver can also be built as a module. If so, the module 373 This driver can also be built as a module. If so, the module
374 will be called rtc-pcf8563. 374 will be called rtc-pcf8563.
375 375
376config RTC_DRV_PCF85063
377 tristate "nxp PCF85063"
378 help
379 If you say yes here you get support for the PCF85063 RTC chip
380
381 This driver can also be built as a module. If so, the module
382 will be called rtc-pcf85063.
383
376config RTC_DRV_PCF8583 384config RTC_DRV_PCF8583
377 tristate "Philips PCF8583" 385 tristate "Philips PCF8583"
378 help 386 help
@@ -760,6 +768,15 @@ config RTC_DRV_DS1742
760 This driver can also be built as a module. If so, the module 768 This driver can also be built as a module. If so, the module
761 will be called rtc-ds1742. 769 will be called rtc-ds1742.
762 770
771config RTC_DRV_DS2404
772 tristate "Maxim/Dallas DS2404"
773 help
774 If you say yes here you get support for the
775 Dallas DS2404 RTC chip.
776
777 This driver can also be built as a module. If so, the module
778 will be called rtc-ds2404.
779
763config RTC_DRV_DA9052 780config RTC_DRV_DA9052
764 tristate "Dialog DA9052/DA9053 RTC" 781 tristate "Dialog DA9052/DA9053 RTC"
765 depends on PMIC_DA9052 782 depends on PMIC_DA9052
@@ -789,7 +806,7 @@ config RTC_DRV_DA9063
789 806
790config RTC_DRV_EFI 807config RTC_DRV_EFI
791 tristate "EFI RTC" 808 tristate "EFI RTC"
792 depends on IA64 809 depends on EFI
793 help 810 help
794 If you say yes here you will get support for the EFI 811 If you say yes here you will get support for the EFI
795 Real Time Clock. 812 Real Time Clock.
@@ -873,15 +890,6 @@ config RTC_DRV_V3020
873 This driver can also be built as a module. If so, the module 890 This driver can also be built as a module. If so, the module
874 will be called rtc-v3020. 891 will be called rtc-v3020.
875 892
876config RTC_DRV_DS2404
877 tristate "Dallas DS2404"
878 help
879 If you say yes here you get support for the
880 Dallas DS2404 RTC chip.
881
882 This driver can also be built as a module. If so, the module
883 will be called rtc-ds2404.
884
885config RTC_DRV_WM831X 893config RTC_DRV_WM831X
886 tristate "Wolfson Microelectronics WM831x RTC" 894 tristate "Wolfson Microelectronics WM831x RTC"
887 depends on MFD_WM831X 895 depends on MFD_WM831X
@@ -1349,6 +1357,7 @@ config RTC_DRV_SIRFSOC
1349 1357
1350config RTC_DRV_MOXART 1358config RTC_DRV_MOXART
1351 tristate "MOXA ART RTC" 1359 tristate "MOXA ART RTC"
1360 depends on ARCH_MOXART || COMPILE_TEST
1352 help 1361 help
1353 If you say yes here you get support for the MOXA ART 1362 If you say yes here you get support for the MOXA ART
1354 RTC module. 1363 RTC module.
diff --git a/drivers/rtc/Makefile b/drivers/rtc/Makefile
index 70347d041d10..56f061c7c815 100644
--- a/drivers/rtc/Makefile
+++ b/drivers/rtc/Makefile
@@ -10,6 +10,10 @@ obj-$(CONFIG_RTC_SYSTOHC) += systohc.o
10obj-$(CONFIG_RTC_CLASS) += rtc-core.o 10obj-$(CONFIG_RTC_CLASS) += rtc-core.o
11rtc-core-y := class.o interface.o 11rtc-core-y := class.o interface.o
12 12
13ifdef CONFIG_RTC_DRV_EFI
14rtc-core-y += rtc-efi-platform.o
15endif
16
13rtc-core-$(CONFIG_RTC_INTF_DEV) += rtc-dev.o 17rtc-core-$(CONFIG_RTC_INTF_DEV) += rtc-dev.o
14rtc-core-$(CONFIG_RTC_INTF_PROC) += rtc-proc.o 18rtc-core-$(CONFIG_RTC_INTF_PROC) += rtc-proc.o
15rtc-core-$(CONFIG_RTC_INTF_SYSFS) += rtc-sysfs.o 19rtc-core-$(CONFIG_RTC_INTF_SYSFS) += rtc-sysfs.o
@@ -93,6 +97,7 @@ obj-$(CONFIG_RTC_DRV_PCAP) += rtc-pcap.o
93obj-$(CONFIG_RTC_DRV_PCF2127) += rtc-pcf2127.o 97obj-$(CONFIG_RTC_DRV_PCF2127) += rtc-pcf2127.o
94obj-$(CONFIG_RTC_DRV_PCF8523) += rtc-pcf8523.o 98obj-$(CONFIG_RTC_DRV_PCF8523) += rtc-pcf8523.o
95obj-$(CONFIG_RTC_DRV_PCF8563) += rtc-pcf8563.o 99obj-$(CONFIG_RTC_DRV_PCF8563) += rtc-pcf8563.o
100obj-$(CONFIG_RTC_DRV_PCF85063) += rtc-pcf85063.o
96obj-$(CONFIG_RTC_DRV_PCF8583) += rtc-pcf8583.o 101obj-$(CONFIG_RTC_DRV_PCF8583) += rtc-pcf8583.o
97obj-$(CONFIG_RTC_DRV_PCF2123) += rtc-pcf2123.o 102obj-$(CONFIG_RTC_DRV_PCF2123) += rtc-pcf2123.o
98obj-$(CONFIG_RTC_DRV_PCF50633) += rtc-pcf50633.o 103obj-$(CONFIG_RTC_DRV_PCF50633) += rtc-pcf50633.o
diff --git a/drivers/rtc/class.c b/drivers/rtc/class.c
index 589351ef75d0..38e26be705be 100644
--- a/drivers/rtc/class.c
+++ b/drivers/rtc/class.c
@@ -53,6 +53,7 @@ static int rtc_suspend(struct device *dev)
53 struct rtc_device *rtc = to_rtc_device(dev); 53 struct rtc_device *rtc = to_rtc_device(dev);
54 struct rtc_time tm; 54 struct rtc_time tm;
55 struct timespec delta, delta_delta; 55 struct timespec delta, delta_delta;
56 int err;
56 57
57 if (has_persistent_clock()) 58 if (has_persistent_clock())
58 return 0; 59 return 0;
@@ -61,7 +62,12 @@ static int rtc_suspend(struct device *dev)
61 return 0; 62 return 0;
62 63
63 /* snapshot the current RTC and system time at suspend*/ 64 /* snapshot the current RTC and system time at suspend*/
64 rtc_read_time(rtc, &tm); 65 err = rtc_read_time(rtc, &tm);
66 if (err < 0) {
67 pr_debug("%s: fail to read rtc time\n", dev_name(&rtc->dev));
68 return 0;
69 }
70
65 getnstimeofday(&old_system); 71 getnstimeofday(&old_system);
66 rtc_tm_to_time(&tm, &old_rtc.tv_sec); 72 rtc_tm_to_time(&tm, &old_rtc.tv_sec);
67 73
@@ -94,6 +100,7 @@ static int rtc_resume(struct device *dev)
94 struct rtc_time tm; 100 struct rtc_time tm;
95 struct timespec new_system, new_rtc; 101 struct timespec new_system, new_rtc;
96 struct timespec sleep_time; 102 struct timespec sleep_time;
103 int err;
97 104
98 if (has_persistent_clock()) 105 if (has_persistent_clock())
99 return 0; 106 return 0;
@@ -104,7 +111,12 @@ static int rtc_resume(struct device *dev)
104 111
105 /* snapshot the current rtc and system time at resume */ 112 /* snapshot the current rtc and system time at resume */
106 getnstimeofday(&new_system); 113 getnstimeofday(&new_system);
107 rtc_read_time(rtc, &tm); 114 err = rtc_read_time(rtc, &tm);
115 if (err < 0) {
116 pr_debug("%s: fail to read rtc time\n", dev_name(&rtc->dev));
117 return 0;
118 }
119
108 if (rtc_valid_tm(&tm) != 0) { 120 if (rtc_valid_tm(&tm) != 0) {
109 pr_debug("%s: bogus resume time\n", dev_name(&rtc->dev)); 121 pr_debug("%s: bogus resume time\n", dev_name(&rtc->dev));
110 return 0; 122 return 0;
diff --git a/drivers/rtc/interface.c b/drivers/rtc/interface.c
index 5813fa52c3d4..5b2717f5dafa 100644
--- a/drivers/rtc/interface.c
+++ b/drivers/rtc/interface.c
@@ -348,6 +348,8 @@ static int __rtc_set_alarm(struct rtc_device *rtc, struct rtc_wkalrm *alarm)
348 348
349 /* Make sure we're not setting alarms in the past */ 349 /* Make sure we're not setting alarms in the past */
350 err = __rtc_read_time(rtc, &tm); 350 err = __rtc_read_time(rtc, &tm);
351 if (err)
352 return err;
351 rtc_tm_to_time(&tm, &now); 353 rtc_tm_to_time(&tm, &now);
352 if (scheduled <= now) 354 if (scheduled <= now)
353 return -ETIME; 355 return -ETIME;
diff --git a/drivers/rtc/rtc-ds1343.c b/drivers/rtc/rtc-ds1343.c
index c3719189dd96..ae9f997223b1 100644
--- a/drivers/rtc/rtc-ds1343.c
+++ b/drivers/rtc/rtc-ds1343.c
@@ -4,6 +4,7 @@
4 * Real Time Clock 4 * Real Time Clock
5 * 5 *
6 * Author : Raghavendra Chandra Ganiga <ravi23ganiga@gmail.com> 6 * Author : Raghavendra Chandra Ganiga <ravi23ganiga@gmail.com>
7 * Ankur Srivastava <sankurece@gmail.com> : DS1343 Nvram Support
7 * 8 *
8 * This program is free software; you can redistribute it and/or modify 9 * This program is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License version 2 as 10 * it under the terms of the GNU General Public License version 2 as
@@ -45,6 +46,9 @@
45#define DS1343_CONTROL_REG 0x0F 46#define DS1343_CONTROL_REG 0x0F
46#define DS1343_STATUS_REG 0x10 47#define DS1343_STATUS_REG 0x10
47#define DS1343_TRICKLE_REG 0x11 48#define DS1343_TRICKLE_REG 0x11
49#define DS1343_NVRAM 0x20
50
51#define DS1343_NVRAM_LEN 96
48 52
49/* DS1343 Control Registers bits */ 53/* DS1343 Control Registers bits */
50#define DS1343_EOSC 0x80 54#define DS1343_EOSC 0x80
@@ -149,6 +153,64 @@ static ssize_t ds1343_store_glitchfilter(struct device *dev,
149static DEVICE_ATTR(glitch_filter, S_IRUGO | S_IWUSR, ds1343_show_glitchfilter, 153static DEVICE_ATTR(glitch_filter, S_IRUGO | S_IWUSR, ds1343_show_glitchfilter,
150 ds1343_store_glitchfilter); 154 ds1343_store_glitchfilter);
151 155
156static ssize_t ds1343_nvram_write(struct file *filp, struct kobject *kobj,
157 struct bin_attribute *attr,
158 char *buf, loff_t off, size_t count)
159{
160 int ret;
161 unsigned char address;
162 struct device *dev = kobj_to_dev(kobj);
163 struct ds1343_priv *priv = dev_get_drvdata(dev);
164
165 if (unlikely(!count))
166 return count;
167
168 if ((count + off) > DS1343_NVRAM_LEN)
169 count = DS1343_NVRAM_LEN - off;
170
171 address = DS1343_NVRAM + off;
172
173 ret = regmap_bulk_write(priv->map, address, buf, count);
174 if (ret < 0)
175 dev_err(&priv->spi->dev, "Error in nvram write %d", ret);
176
177 return (ret < 0) ? ret : count;
178}
179
180
181static ssize_t ds1343_nvram_read(struct file *filp, struct kobject *kobj,
182 struct bin_attribute *attr,
183 char *buf, loff_t off, size_t count)
184{
185 int ret;
186 unsigned char address;
187 struct device *dev = kobj_to_dev(kobj);
188 struct ds1343_priv *priv = dev_get_drvdata(dev);
189
190 if (unlikely(!count))
191 return count;
192
193 if ((count + off) > DS1343_NVRAM_LEN)
194 count = DS1343_NVRAM_LEN - off;
195
196 address = DS1343_NVRAM + off;
197
198 ret = regmap_bulk_read(priv->map, address, buf, count);
199 if (ret < 0)
200 dev_err(&priv->spi->dev, "Error in nvram read %d\n", ret);
201
202 return (ret < 0) ? ret : count;
203}
204
205
206static struct bin_attribute nvram_attr = {
207 .attr.name = "nvram",
208 .attr.mode = S_IRUGO | S_IWUSR,
209 .read = ds1343_nvram_read,
210 .write = ds1343_nvram_write,
211 .size = DS1343_NVRAM_LEN,
212};
213
152static ssize_t ds1343_show_alarmstatus(struct device *dev, 214static ssize_t ds1343_show_alarmstatus(struct device *dev,
153 struct device_attribute *attr, char *buf) 215 struct device_attribute *attr, char *buf)
154{ 216{
@@ -274,12 +336,16 @@ static int ds1343_sysfs_register(struct device *dev)
274 if (err) 336 if (err)
275 goto error1; 337 goto error1;
276 338
339 err = device_create_bin_file(dev, &nvram_attr);
340 if (err)
341 goto error2;
342
277 if (priv->irq <= 0) 343 if (priv->irq <= 0)
278 return err; 344 return err;
279 345
280 err = device_create_file(dev, &dev_attr_alarm_mode); 346 err = device_create_file(dev, &dev_attr_alarm_mode);
281 if (err) 347 if (err)
282 goto error2; 348 goto error3;
283 349
284 err = device_create_file(dev, &dev_attr_alarm_status); 350 err = device_create_file(dev, &dev_attr_alarm_status);
285 if (!err) 351 if (!err)
@@ -287,6 +353,9 @@ static int ds1343_sysfs_register(struct device *dev)
287 353
288 device_remove_file(dev, &dev_attr_alarm_mode); 354 device_remove_file(dev, &dev_attr_alarm_mode);
289 355
356error3:
357 device_remove_bin_file(dev, &nvram_attr);
358
290error2: 359error2:
291 device_remove_file(dev, &dev_attr_trickle_charger); 360 device_remove_file(dev, &dev_attr_trickle_charger);
292 361
@@ -302,6 +371,7 @@ static void ds1343_sysfs_unregister(struct device *dev)
302 371
303 device_remove_file(dev, &dev_attr_glitch_filter); 372 device_remove_file(dev, &dev_attr_glitch_filter);
304 device_remove_file(dev, &dev_attr_trickle_charger); 373 device_remove_file(dev, &dev_attr_trickle_charger);
374 device_remove_bin_file(dev, &nvram_attr);
305 375
306 if (priv->irq <= 0) 376 if (priv->irq <= 0)
307 return; 377 return;
@@ -684,6 +754,7 @@ static struct spi_driver ds1343_driver = {
684module_spi_driver(ds1343_driver); 754module_spi_driver(ds1343_driver);
685 755
686MODULE_DESCRIPTION("DS1343 RTC SPI Driver"); 756MODULE_DESCRIPTION("DS1343 RTC SPI Driver");
687MODULE_AUTHOR("Raghavendra Chandra Ganiga <ravi23ganiga@gmail.com>"); 757MODULE_AUTHOR("Raghavendra Chandra Ganiga <ravi23ganiga@gmail.com>,"
758 "Ankur Srivastava <sankurece@gmail.com>");
688MODULE_LICENSE("GPL v2"); 759MODULE_LICENSE("GPL v2");
689MODULE_VERSION(DS1343_DRV_VERSION); 760MODULE_VERSION(DS1343_DRV_VERSION);
diff --git a/drivers/rtc/rtc-ds1742.c b/drivers/rtc/rtc-ds1742.c
index c6b2191a4128..9822715db8ba 100644
--- a/drivers/rtc/rtc-ds1742.c
+++ b/drivers/rtc/rtc-ds1742.c
@@ -231,7 +231,7 @@ static struct platform_driver ds1742_rtc_driver = {
231 .driver = { 231 .driver = {
232 .name = "rtc-ds1742", 232 .name = "rtc-ds1742",
233 .owner = THIS_MODULE, 233 .owner = THIS_MODULE,
234 .of_match_table = ds1742_rtc_of_match, 234 .of_match_table = of_match_ptr(ds1742_rtc_of_match),
235 }, 235 },
236}; 236};
237 237
diff --git a/drivers/rtc/rtc-efi-platform.c b/drivers/rtc/rtc-efi-platform.c
new file mode 100644
index 000000000000..b40fbe332af4
--- /dev/null
+++ b/drivers/rtc/rtc-efi-platform.c
@@ -0,0 +1,31 @@
1/*
2 * Moved from arch/ia64/kernel/time.c
3 *
4 * Copyright (C) 1998-2003 Hewlett-Packard Co
5 * Stephane Eranian <eranian@hpl.hp.com>
6 * David Mosberger <davidm@hpl.hp.com>
7 * Copyright (C) 1999 Don Dugger <don.dugger@intel.com>
8 * Copyright (C) 1999-2000 VA Linux Systems
9 * Copyright (C) 1999-2000 Walt Drummond <drummond@valinux.com>
10 */
11#include <linux/init.h>
12#include <linux/kernel.h>
13#include <linux/module.h>
14#include <linux/efi.h>
15#include <linux/platform_device.h>
16
17static struct platform_device rtc_efi_dev = {
18 .name = "rtc-efi",
19 .id = -1,
20};
21
22static int __init rtc_init(void)
23{
24 if (efi_enabled(EFI_RUNTIME_SERVICES))
25 if (platform_device_register(&rtc_efi_dev) < 0)
26 pr_err("unable to register rtc device...\n");
27
28 /* not necessarily an error */
29 return 0;
30}
31module_init(rtc_init);
diff --git a/drivers/rtc/rtc-efi.c b/drivers/rtc/rtc-efi.c
index c4c38431012e..8225b89de810 100644
--- a/drivers/rtc/rtc-efi.c
+++ b/drivers/rtc/rtc-efi.c
@@ -17,6 +17,7 @@
17 17
18#include <linux/kernel.h> 18#include <linux/kernel.h>
19#include <linux/module.h> 19#include <linux/module.h>
20#include <linux/stringify.h>
20#include <linux/time.h> 21#include <linux/time.h>
21#include <linux/platform_device.h> 22#include <linux/platform_device.h>
22#include <linux/rtc.h> 23#include <linux/rtc.h>
@@ -48,8 +49,8 @@ compute_wday(efi_time_t *eft)
48 int y; 49 int y;
49 int ndays = 0; 50 int ndays = 0;
50 51
51 if (eft->year < 1998) { 52 if (eft->year < EFI_RTC_EPOCH) {
52 pr_err("EFI year < 1998, invalid date\n"); 53 pr_err("EFI year < " __stringify(EFI_RTC_EPOCH) ", invalid date\n");
53 return -1; 54 return -1;
54 } 55 }
55 56
@@ -78,19 +79,36 @@ convert_to_efi_time(struct rtc_time *wtime, efi_time_t *eft)
78 eft->timezone = EFI_UNSPECIFIED_TIMEZONE; 79 eft->timezone = EFI_UNSPECIFIED_TIMEZONE;
79} 80}
80 81
81static void 82static bool
82convert_from_efi_time(efi_time_t *eft, struct rtc_time *wtime) 83convert_from_efi_time(efi_time_t *eft, struct rtc_time *wtime)
83{ 84{
84 memset(wtime, 0, sizeof(*wtime)); 85 memset(wtime, 0, sizeof(*wtime));
86
87 if (eft->second >= 60)
88 return false;
85 wtime->tm_sec = eft->second; 89 wtime->tm_sec = eft->second;
90
91 if (eft->minute >= 60)
92 return false;
86 wtime->tm_min = eft->minute; 93 wtime->tm_min = eft->minute;
94
95 if (eft->hour >= 24)
96 return false;
87 wtime->tm_hour = eft->hour; 97 wtime->tm_hour = eft->hour;
98
99 if (!eft->day || eft->day > 31)
100 return false;
88 wtime->tm_mday = eft->day; 101 wtime->tm_mday = eft->day;
102
103 if (!eft->month || eft->month > 12)
104 return false;
89 wtime->tm_mon = eft->month - 1; 105 wtime->tm_mon = eft->month - 1;
90 wtime->tm_year = eft->year - 1900; 106 wtime->tm_year = eft->year - 1900;
91 107
92 /* day of the week [0-6], Sunday=0 */ 108 /* day of the week [0-6], Sunday=0 */
93 wtime->tm_wday = compute_wday(eft); 109 wtime->tm_wday = compute_wday(eft);
110 if (wtime->tm_wday < 0)
111 return false;
94 112
95 /* day in the year [1-365]*/ 113 /* day in the year [1-365]*/
96 wtime->tm_yday = compute_yday(eft); 114 wtime->tm_yday = compute_yday(eft);
@@ -106,6 +124,8 @@ convert_from_efi_time(efi_time_t *eft, struct rtc_time *wtime)
106 default: 124 default:
107 wtime->tm_isdst = -1; 125 wtime->tm_isdst = -1;
108 } 126 }
127
128 return true;
109} 129}
110 130
111static int efi_read_alarm(struct device *dev, struct rtc_wkalrm *wkalrm) 131static int efi_read_alarm(struct device *dev, struct rtc_wkalrm *wkalrm)
@@ -122,7 +142,8 @@ static int efi_read_alarm(struct device *dev, struct rtc_wkalrm *wkalrm)
122 if (status != EFI_SUCCESS) 142 if (status != EFI_SUCCESS)
123 return -EINVAL; 143 return -EINVAL;
124 144
125 convert_from_efi_time(&eft, &wkalrm->time); 145 if (!convert_from_efi_time(&eft, &wkalrm->time))
146 return -EIO;
126 147
127 return rtc_valid_tm(&wkalrm->time); 148 return rtc_valid_tm(&wkalrm->time);
128} 149}
@@ -163,7 +184,8 @@ static int efi_read_time(struct device *dev, struct rtc_time *tm)
163 return -EINVAL; 184 return -EINVAL;
164 } 185 }
165 186
166 convert_from_efi_time(&eft, tm); 187 if (!convert_from_efi_time(&eft, tm))
188 return -EIO;
167 189
168 return rtc_valid_tm(tm); 190 return rtc_valid_tm(tm);
169} 191}
diff --git a/drivers/rtc/rtc-isl12022.c b/drivers/rtc/rtc-isl12022.c
index 03b891129428..aa55f081c505 100644
--- a/drivers/rtc/rtc-isl12022.c
+++ b/drivers/rtc/rtc-isl12022.c
@@ -17,6 +17,8 @@
17#include <linux/slab.h> 17#include <linux/slab.h>
18#include <linux/module.h> 18#include <linux/module.h>
19#include <linux/err.h> 19#include <linux/err.h>
20#include <linux/of.h>
21#include <linux/of_device.h>
20 22
21#define DRV_VERSION "0.1" 23#define DRV_VERSION "0.1"
22 24
@@ -271,6 +273,13 @@ static int isl12022_probe(struct i2c_client *client,
271 return PTR_ERR_OR_ZERO(isl12022->rtc); 273 return PTR_ERR_OR_ZERO(isl12022->rtc);
272} 274}
273 275
276#ifdef CONFIG_OF
277static struct of_device_id isl12022_dt_match[] = {
278 { .compatible = "isl,isl12022" },
279 { },
280};
281#endif
282
274static const struct i2c_device_id isl12022_id[] = { 283static const struct i2c_device_id isl12022_id[] = {
275 { "isl12022", 0 }, 284 { "isl12022", 0 },
276 { } 285 { }
@@ -280,6 +289,9 @@ MODULE_DEVICE_TABLE(i2c, isl12022_id);
280static struct i2c_driver isl12022_driver = { 289static struct i2c_driver isl12022_driver = {
281 .driver = { 290 .driver = {
282 .name = "rtc-isl12022", 291 .name = "rtc-isl12022",
292#ifdef CONFIG_OF
293 .of_match_table = of_match_ptr(isl12022_dt_match),
294#endif
283 }, 295 },
284 .probe = isl12022_probe, 296 .probe = isl12022_probe,
285 .id_table = isl12022_id, 297 .id_table = isl12022_id,
diff --git a/drivers/rtc/rtc-pcf85063.c b/drivers/rtc/rtc-pcf85063.c
new file mode 100644
index 000000000000..6a12bf62c504
--- /dev/null
+++ b/drivers/rtc/rtc-pcf85063.c
@@ -0,0 +1,204 @@
1/*
2 * An I2C driver for the PCF85063 RTC
3 * Copyright 2014 Rose Technology
4 *
5 * Author: Søren Andersen <san@rosetechnology.dk>
6 * Maintainers: http://www.nslu2-linux.org/
7 *
8 * based on the other drivers in this same directory.
9 *
10 * This program is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU General Public License version 2 as
12 * published by the Free Software Foundation.
13 */
14#include <linux/i2c.h>
15#include <linux/bcd.h>
16#include <linux/rtc.h>
17#include <linux/module.h>
18
19#define DRV_VERSION "0.0.1"
20
21#define PCF85063_REG_CTRL1 0x00 /* status */
22#define PCF85063_REG_CTRL2 0x01
23
24#define PCF85063_REG_SC 0x04 /* datetime */
25#define PCF85063_REG_MN 0x05
26#define PCF85063_REG_HR 0x06
27#define PCF85063_REG_DM 0x07
28#define PCF85063_REG_DW 0x08
29#define PCF85063_REG_MO 0x09
30#define PCF85063_REG_YR 0x0A
31
32#define PCF85063_MO_C 0x80 /* century */
33
34static struct i2c_driver pcf85063_driver;
35
36struct pcf85063 {
37 struct rtc_device *rtc;
38 int c_polarity; /* 0: MO_C=1 means 19xx, otherwise MO_C=1 means 20xx */
39 int voltage_low; /* indicates if a low_voltage was detected */
40};
41
42/*
43 * In the routines that deal directly with the pcf85063 hardware, we use
44 * rtc_time -- month 0-11, hour 0-23, yr = calendar year-epoch.
45 */
46static int pcf85063_get_datetime(struct i2c_client *client, struct rtc_time *tm)
47{
48 struct pcf85063 *pcf85063 = i2c_get_clientdata(client);
49 unsigned char buf[13] = { PCF85063_REG_CTRL1 };
50 struct i2c_msg msgs[] = {
51 {/* setup read ptr */
52 .addr = client->addr,
53 .len = 1,
54 .buf = buf
55 },
56 {/* read status + date */
57 .addr = client->addr,
58 .flags = I2C_M_RD,
59 .len = 13,
60 .buf = buf
61 },
62 };
63
64 /* read registers */
65 if ((i2c_transfer(client->adapter, msgs, 2)) != 2) {
66 dev_err(&client->dev, "%s: read error\n", __func__);
67 return -EIO;
68 }
69
70 tm->tm_sec = bcd2bin(buf[PCF85063_REG_SC] & 0x7F);
71 tm->tm_min = bcd2bin(buf[PCF85063_REG_MN] & 0x7F);
72 tm->tm_hour = bcd2bin(buf[PCF85063_REG_HR] & 0x3F); /* rtc hr 0-23 */
73 tm->tm_mday = bcd2bin(buf[PCF85063_REG_DM] & 0x3F);
74 tm->tm_wday = buf[PCF85063_REG_DW] & 0x07;
75 tm->tm_mon = bcd2bin(buf[PCF85063_REG_MO] & 0x1F) - 1; /* rtc mn 1-12 */
76 tm->tm_year = bcd2bin(buf[PCF85063_REG_YR]);
77 if (tm->tm_year < 70)
78 tm->tm_year += 100; /* assume we are in 1970...2069 */
79 /* detect the polarity heuristically. see note above. */
80 pcf85063->c_polarity = (buf[PCF85063_REG_MO] & PCF85063_MO_C) ?
81 (tm->tm_year >= 100) : (tm->tm_year < 100);
82
83 /* the clock can give out invalid datetime, but we cannot return
84 * -EINVAL otherwise hwclock will refuse to set the time on bootup.
85 */
86 if (rtc_valid_tm(tm) < 0)
87 dev_err(&client->dev, "retrieved date/time is not valid.\n");
88
89 return 0;
90}
91
92static int pcf85063_set_datetime(struct i2c_client *client, struct rtc_time *tm)
93{
94 int i = 0, err = 0;
95 unsigned char buf[11];
96
97 /* Control & status */
98 buf[PCF85063_REG_CTRL1] = 0;
99 buf[PCF85063_REG_CTRL2] = 5;
100
101 /* hours, minutes and seconds */
102 buf[PCF85063_REG_SC] = bin2bcd(tm->tm_sec) & 0x7F;
103
104 buf[PCF85063_REG_MN] = bin2bcd(tm->tm_min);
105 buf[PCF85063_REG_HR] = bin2bcd(tm->tm_hour);
106
107 /* Day of month, 1 - 31 */
108 buf[PCF85063_REG_DM] = bin2bcd(tm->tm_mday);
109
110 /* Day, 0 - 6 */
111 buf[PCF85063_REG_DW] = tm->tm_wday & 0x07;
112
113 /* month, 1 - 12 */
114 buf[PCF85063_REG_MO] = bin2bcd(tm->tm_mon + 1);
115
116 /* year and century */
117 buf[PCF85063_REG_YR] = bin2bcd(tm->tm_year % 100);
118
119 /* write register's data */
120 for (i = 0; i < sizeof(buf); i++) {
121 unsigned char data[2] = { i, buf[i] };
122
123 err = i2c_master_send(client, data, sizeof(data));
124 if (err != sizeof(data)) {
125 dev_err(&client->dev, "%s: err=%d addr=%02x, data=%02x\n",
126 __func__, err, data[0], data[1]);
127 return -EIO;
128 }
129 }
130
131 return 0;
132}
133
134static int pcf85063_rtc_read_time(struct device *dev, struct rtc_time *tm)
135{
136 return pcf85063_get_datetime(to_i2c_client(dev), tm);
137}
138
139static int pcf85063_rtc_set_time(struct device *dev, struct rtc_time *tm)
140{
141 return pcf85063_set_datetime(to_i2c_client(dev), tm);
142}
143
144static const struct rtc_class_ops pcf85063_rtc_ops = {
145 .read_time = pcf85063_rtc_read_time,
146 .set_time = pcf85063_rtc_set_time
147};
148
149static int pcf85063_probe(struct i2c_client *client,
150 const struct i2c_device_id *id)
151{
152 struct pcf85063 *pcf85063;
153
154 dev_dbg(&client->dev, "%s\n", __func__);
155
156 if (!i2c_check_functionality(client->adapter, I2C_FUNC_I2C))
157 return -ENODEV;
158
159 pcf85063 = devm_kzalloc(&client->dev, sizeof(struct pcf85063),
160 GFP_KERNEL);
161 if (!pcf85063)
162 return -ENOMEM;
163
164 dev_info(&client->dev, "chip found, driver version " DRV_VERSION "\n");
165
166 i2c_set_clientdata(client, pcf85063);
167
168 pcf85063->rtc = devm_rtc_device_register(&client->dev,
169 pcf85063_driver.driver.name,
170 &pcf85063_rtc_ops, THIS_MODULE);
171
172 return PTR_ERR_OR_ZERO(pcf85063->rtc);
173}
174
175static const struct i2c_device_id pcf85063_id[] = {
176 { "pcf85063", 0 },
177 { }
178};
179MODULE_DEVICE_TABLE(i2c, pcf85063_id);
180
181#ifdef CONFIG_OF
182static const struct of_device_id pcf85063_of_match[] = {
183 { .compatible = "nxp,pcf85063" },
184 {}
185};
186MODULE_DEVICE_TABLE(of, pcf85063_of_match);
187#endif
188
189static struct i2c_driver pcf85063_driver = {
190 .driver = {
191 .name = "rtc-pcf85063",
192 .owner = THIS_MODULE,
193 .of_match_table = of_match_ptr(pcf85063_of_match),
194 },
195 .probe = pcf85063_probe,
196 .id_table = pcf85063_id,
197};
198
199module_i2c_driver(pcf85063_driver);
200
201MODULE_AUTHOR("Søren Andersen <san@rosetechnology.dk>");
202MODULE_DESCRIPTION("PCF85063 RTC driver");
203MODULE_LICENSE("GPL");
204MODULE_VERSION(DRV_VERSION);
diff --git a/drivers/rtc/rtc-pcf8563.c b/drivers/rtc/rtc-pcf8563.c
index 63b558c48196..5a197d9dc7e7 100644
--- a/drivers/rtc/rtc-pcf8563.c
+++ b/drivers/rtc/rtc-pcf8563.c
@@ -26,6 +26,8 @@
26 26
27#define PCF8563_REG_ST1 0x00 /* status */ 27#define PCF8563_REG_ST1 0x00 /* status */
28#define PCF8563_REG_ST2 0x01 28#define PCF8563_REG_ST2 0x01
29#define PCF8563_BIT_AIE (1 << 1)
30#define PCF8563_BIT_AF (1 << 3)
29 31
30#define PCF8563_REG_SC 0x02 /* datetime */ 32#define PCF8563_REG_SC 0x02 /* datetime */
31#define PCF8563_REG_MN 0x03 33#define PCF8563_REG_MN 0x03
@@ -36,9 +38,6 @@
36#define PCF8563_REG_YR 0x08 38#define PCF8563_REG_YR 0x08
37 39
38#define PCF8563_REG_AMN 0x09 /* alarm */ 40#define PCF8563_REG_AMN 0x09 /* alarm */
39#define PCF8563_REG_AHR 0x0A
40#define PCF8563_REG_ADM 0x0B
41#define PCF8563_REG_ADW 0x0C
42 41
43#define PCF8563_REG_CLKO 0x0D /* clock out */ 42#define PCF8563_REG_CLKO 0x0D /* clock out */
44#define PCF8563_REG_TMRC 0x0E /* timer control */ 43#define PCF8563_REG_TMRC 0x0E /* timer control */
@@ -67,37 +66,133 @@ struct pcf8563 {
67 */ 66 */
68 int c_polarity; /* 0: MO_C=1 means 19xx, otherwise MO_C=1 means 20xx */ 67 int c_polarity; /* 0: MO_C=1 means 19xx, otherwise MO_C=1 means 20xx */
69 int voltage_low; /* incicates if a low_voltage was detected */ 68 int voltage_low; /* incicates if a low_voltage was detected */
69
70 struct i2c_client *client;
70}; 71};
71 72
72/* 73static int pcf8563_read_block_data(struct i2c_client *client, unsigned char reg,
73 * In the routines that deal directly with the pcf8563 hardware, we use 74 unsigned char length, unsigned char *buf)
74 * rtc_time -- month 0-11, hour 0-23, yr = calendar year-epoch.
75 */
76static int pcf8563_get_datetime(struct i2c_client *client, struct rtc_time *tm)
77{ 75{
78 struct pcf8563 *pcf8563 = i2c_get_clientdata(client);
79 unsigned char buf[13] = { PCF8563_REG_ST1 };
80
81 struct i2c_msg msgs[] = { 76 struct i2c_msg msgs[] = {
82 {/* setup read ptr */ 77 {/* setup read ptr */
83 .addr = client->addr, 78 .addr = client->addr,
84 .len = 1, 79 .len = 1,
85 .buf = buf 80 .buf = &reg,
86 }, 81 },
87 {/* read status + date */ 82 {
88 .addr = client->addr, 83 .addr = client->addr,
89 .flags = I2C_M_RD, 84 .flags = I2C_M_RD,
90 .len = 13, 85 .len = length,
91 .buf = buf 86 .buf = buf
92 }, 87 },
93 }; 88 };
94 89
95 /* read registers */
96 if ((i2c_transfer(client->adapter, msgs, 2)) != 2) { 90 if ((i2c_transfer(client->adapter, msgs, 2)) != 2) {
97 dev_err(&client->dev, "%s: read error\n", __func__); 91 dev_err(&client->dev, "%s: read error\n", __func__);
98 return -EIO; 92 return -EIO;
99 } 93 }
100 94
95 return 0;
96}
97
98static int pcf8563_write_block_data(struct i2c_client *client,
99 unsigned char reg, unsigned char length,
100 unsigned char *buf)
101{
102 int i, err;
103
104 for (i = 0; i < length; i++) {
105 unsigned char data[2] = { reg + i, buf[i] };
106
107 err = i2c_master_send(client, data, sizeof(data));
108 if (err != sizeof(data)) {
109 dev_err(&client->dev,
110 "%s: err=%d addr=%02x, data=%02x\n",
111 __func__, err, data[0], data[1]);
112 return -EIO;
113 }
114 }
115
116 return 0;
117}
118
119static int pcf8563_set_alarm_mode(struct i2c_client *client, bool on)
120{
121 unsigned char buf[2];
122 int err;
123
124 err = pcf8563_read_block_data(client, PCF8563_REG_ST2, 1, buf + 1);
125 if (err < 0)
126 return err;
127
128 if (on)
129 buf[1] |= PCF8563_BIT_AIE;
130 else
131 buf[1] &= ~PCF8563_BIT_AIE;
132
133 buf[1] &= ~PCF8563_BIT_AF;
134 buf[0] = PCF8563_REG_ST2;
135
136 err = pcf8563_write_block_data(client, PCF8563_REG_ST2, 1, buf + 1);
137 if (err < 0) {
138 dev_err(&client->dev, "%s: write error\n", __func__);
139 return -EIO;
140 }
141
142 return 0;
143}
144
145static int pcf8563_get_alarm_mode(struct i2c_client *client, unsigned char *en,
146 unsigned char *pen)
147{
148 unsigned char buf;
149 int err;
150
151 err = pcf8563_read_block_data(client, PCF8563_REG_ST2, 1, &buf);
152 if (err)
153 return err;
154
155 if (en)
156 *en = !!(buf & PCF8563_BIT_AIE);
157 if (pen)
158 *pen = !!(buf & PCF8563_BIT_AF);
159
160 return 0;
161}
162
163static irqreturn_t pcf8563_irq(int irq, void *dev_id)
164{
165 struct pcf8563 *pcf8563 = i2c_get_clientdata(dev_id);
166 int err;
167 char pending;
168
169 err = pcf8563_get_alarm_mode(pcf8563->client, NULL, &pending);
170 if (err < 0)
171 return err;
172
173 if (pending) {
174 rtc_update_irq(pcf8563->rtc, 1, RTC_IRQF | RTC_AF);
175 pcf8563_set_alarm_mode(pcf8563->client, 1);
176 return IRQ_HANDLED;
177 }
178
179 return IRQ_NONE;
180}
181
182/*
183 * In the routines that deal directly with the pcf8563 hardware, we use
184 * rtc_time -- month 0-11, hour 0-23, yr = calendar year-epoch.
185 */
186static int pcf8563_get_datetime(struct i2c_client *client, struct rtc_time *tm)
187{
188 struct pcf8563 *pcf8563 = i2c_get_clientdata(client);
189 unsigned char buf[9];
190 int err;
191
192 err = pcf8563_read_block_data(client, PCF8563_REG_ST1, 9, buf);
193 if (err)
194 return err;
195
101 if (buf[PCF8563_REG_SC] & PCF8563_SC_LV) { 196 if (buf[PCF8563_REG_SC] & PCF8563_SC_LV) {
102 pcf8563->voltage_low = 1; 197 pcf8563->voltage_low = 1;
103 dev_info(&client->dev, 198 dev_info(&client->dev,
@@ -144,7 +239,7 @@ static int pcf8563_get_datetime(struct i2c_client *client, struct rtc_time *tm)
144static int pcf8563_set_datetime(struct i2c_client *client, struct rtc_time *tm) 239static int pcf8563_set_datetime(struct i2c_client *client, struct rtc_time *tm)
145{ 240{
146 struct pcf8563 *pcf8563 = i2c_get_clientdata(client); 241 struct pcf8563 *pcf8563 = i2c_get_clientdata(client);
147 int i, err; 242 int err;
148 unsigned char buf[9]; 243 unsigned char buf[9];
149 244
150 dev_dbg(&client->dev, "%s: secs=%d, mins=%d, hours=%d, " 245 dev_dbg(&client->dev, "%s: secs=%d, mins=%d, hours=%d, "
@@ -170,19 +265,10 @@ static int pcf8563_set_datetime(struct i2c_client *client, struct rtc_time *tm)
170 265
171 buf[PCF8563_REG_DW] = tm->tm_wday & 0x07; 266 buf[PCF8563_REG_DW] = tm->tm_wday & 0x07;
172 267
173 /* write register's data */ 268 err = pcf8563_write_block_data(client, PCF8563_REG_SC,
174 for (i = 0; i < 7; i++) { 269 9 - PCF8563_REG_SC, buf + PCF8563_REG_SC);
175 unsigned char data[2] = { PCF8563_REG_SC + i, 270 if (err)
176 buf[PCF8563_REG_SC + i] }; 271 return err;
177
178 err = i2c_master_send(client, data, sizeof(data));
179 if (err != sizeof(data)) {
180 dev_err(&client->dev,
181 "%s: err=%d addr=%02x, data=%02x\n",
182 __func__, err, data[0], data[1]);
183 return -EIO;
184 }
185 }
186 272
187 return 0; 273 return 0;
188} 274}
@@ -235,16 +321,83 @@ static int pcf8563_rtc_set_time(struct device *dev, struct rtc_time *tm)
235 return pcf8563_set_datetime(to_i2c_client(dev), tm); 321 return pcf8563_set_datetime(to_i2c_client(dev), tm);
236} 322}
237 323
324static int pcf8563_rtc_read_alarm(struct device *dev, struct rtc_wkalrm *tm)
325{
326 struct i2c_client *client = to_i2c_client(dev);
327 unsigned char buf[4];
328 int err;
329
330 err = pcf8563_read_block_data(client, PCF8563_REG_AMN, 4, buf);
331 if (err)
332 return err;
333
334 dev_dbg(&client->dev,
335 "%s: raw data is min=%02x, hr=%02x, mday=%02x, wday=%02x\n",
336 __func__, buf[0], buf[1], buf[2], buf[3]);
337
338 tm->time.tm_min = bcd2bin(buf[0] & 0x7F);
339 tm->time.tm_hour = bcd2bin(buf[1] & 0x7F);
340 tm->time.tm_mday = bcd2bin(buf[2] & 0x1F);
341 tm->time.tm_wday = bcd2bin(buf[3] & 0x7);
342 tm->time.tm_mon = -1;
343 tm->time.tm_year = -1;
344 tm->time.tm_yday = -1;
345 tm->time.tm_isdst = -1;
346
347 err = pcf8563_get_alarm_mode(client, &tm->enabled, &tm->pending);
348 if (err < 0)
349 return err;
350
351 dev_dbg(&client->dev, "%s: tm is mins=%d, hours=%d, mday=%d, wday=%d,"
352 " enabled=%d, pending=%d\n", __func__, tm->time.tm_min,
353 tm->time.tm_hour, tm->time.tm_mday, tm->time.tm_wday,
354 tm->enabled, tm->pending);
355
356 return 0;
357}
358
359static int pcf8563_rtc_set_alarm(struct device *dev, struct rtc_wkalrm *tm)
360{
361 struct i2c_client *client = to_i2c_client(dev);
362 unsigned char buf[4];
363 int err;
364
365 dev_dbg(dev, "%s, min=%d hour=%d wday=%d mday=%d "
366 "enabled=%d pending=%d\n", __func__,
367 tm->time.tm_min, tm->time.tm_hour, tm->time.tm_wday,
368 tm->time.tm_mday, tm->enabled, tm->pending);
369
370 buf[0] = bin2bcd(tm->time.tm_min);
371 buf[1] = bin2bcd(tm->time.tm_hour);
372 buf[2] = bin2bcd(tm->time.tm_mday);
373 buf[3] = tm->time.tm_wday & 0x07;
374
375 err = pcf8563_write_block_data(client, PCF8563_REG_AMN, 4, buf);
376 if (err)
377 return err;
378
379 return pcf8563_set_alarm_mode(client, 1);
380}
381
382static int pcf8563_irq_enable(struct device *dev, unsigned int enabled)
383{
384 return pcf8563_set_alarm_mode(to_i2c_client(dev), !!enabled);
385}
386
238static const struct rtc_class_ops pcf8563_rtc_ops = { 387static const struct rtc_class_ops pcf8563_rtc_ops = {
239 .ioctl = pcf8563_rtc_ioctl, 388 .ioctl = pcf8563_rtc_ioctl,
240 .read_time = pcf8563_rtc_read_time, 389 .read_time = pcf8563_rtc_read_time,
241 .set_time = pcf8563_rtc_set_time, 390 .set_time = pcf8563_rtc_set_time,
391 .read_alarm = pcf8563_rtc_read_alarm,
392 .set_alarm = pcf8563_rtc_set_alarm,
393 .alarm_irq_enable = pcf8563_irq_enable,
242}; 394};
243 395
244static int pcf8563_probe(struct i2c_client *client, 396static int pcf8563_probe(struct i2c_client *client,
245 const struct i2c_device_id *id) 397 const struct i2c_device_id *id)
246{ 398{
247 struct pcf8563 *pcf8563; 399 struct pcf8563 *pcf8563;
400 int err;
248 401
249 dev_dbg(&client->dev, "%s\n", __func__); 402 dev_dbg(&client->dev, "%s\n", __func__);
250 403
@@ -259,12 +412,30 @@ static int pcf8563_probe(struct i2c_client *client,
259 dev_info(&client->dev, "chip found, driver version " DRV_VERSION "\n"); 412 dev_info(&client->dev, "chip found, driver version " DRV_VERSION "\n");
260 413
261 i2c_set_clientdata(client, pcf8563); 414 i2c_set_clientdata(client, pcf8563);
415 pcf8563->client = client;
416 device_set_wakeup_capable(&client->dev, 1);
262 417
263 pcf8563->rtc = devm_rtc_device_register(&client->dev, 418 pcf8563->rtc = devm_rtc_device_register(&client->dev,
264 pcf8563_driver.driver.name, 419 pcf8563_driver.driver.name,
265 &pcf8563_rtc_ops, THIS_MODULE); 420 &pcf8563_rtc_ops, THIS_MODULE);
266 421
267 return PTR_ERR_OR_ZERO(pcf8563->rtc); 422 if (IS_ERR(pcf8563->rtc))
423 return PTR_ERR(pcf8563->rtc);
424
425 if (client->irq > 0) {
426 err = devm_request_threaded_irq(&client->dev, client->irq,
427 NULL, pcf8563_irq,
428 IRQF_SHARED|IRQF_ONESHOT|IRQF_TRIGGER_FALLING,
429 pcf8563->rtc->name, client);
430 if (err) {
431 dev_err(&client->dev, "unable to request IRQ %d\n",
432 client->irq);
433 return err;
434 }
435
436 }
437
438 return 0;
268} 439}
269 440
270static const struct i2c_device_id pcf8563_id[] = { 441static const struct i2c_device_id pcf8563_id[] = {
diff --git a/drivers/rtc/rtc-tps65910.c b/drivers/rtc/rtc-tps65910.c
index 7af00208d637..2583349fbde5 100644
--- a/drivers/rtc/rtc-tps65910.c
+++ b/drivers/rtc/rtc-tps65910.c
@@ -258,6 +258,8 @@ static int tps65910_rtc_probe(struct platform_device *pdev)
258 if (ret < 0) 258 if (ret < 0)
259 return ret; 259 return ret;
260 260
261 platform_set_drvdata(pdev, tps_rtc);
262
261 irq = platform_get_irq(pdev, 0); 263 irq = platform_get_irq(pdev, 0);
262 if (irq <= 0) { 264 if (irq <= 0) {
263 dev_warn(&pdev->dev, "Wake up is not possible as irq = %d\n", 265 dev_warn(&pdev->dev, "Wake up is not possible as irq = %d\n",
@@ -283,8 +285,6 @@ static int tps65910_rtc_probe(struct platform_device *pdev)
283 return ret; 285 return ret;
284 } 286 }
285 287
286 platform_set_drvdata(pdev, tps_rtc);
287
288 return 0; 288 return 0;
289} 289}
290 290
diff --git a/drivers/scsi/3w-sas.c b/drivers/scsi/3w-sas.c
index 4de346017e9f..6da6cec9a651 100644
--- a/drivers/scsi/3w-sas.c
+++ b/drivers/scsi/3w-sas.c
@@ -683,14 +683,13 @@ static int twl_allocate_memory(TW_Device_Extension *tw_dev, int size, int which)
683 unsigned long *cpu_addr; 683 unsigned long *cpu_addr;
684 int retval = 1; 684 int retval = 1;
685 685
686 cpu_addr = pci_alloc_consistent(tw_dev->tw_pci_dev, size*TW_Q_LENGTH, &dma_handle); 686 cpu_addr = pci_zalloc_consistent(tw_dev->tw_pci_dev, size * TW_Q_LENGTH,
687 &dma_handle);
687 if (!cpu_addr) { 688 if (!cpu_addr) {
688 TW_PRINTK(tw_dev->host, TW_DRIVER, 0x5, "Memory allocation failed"); 689 TW_PRINTK(tw_dev->host, TW_DRIVER, 0x5, "Memory allocation failed");
689 goto out; 690 goto out;
690 } 691 }
691 692
692 memset(cpu_addr, 0, size*TW_Q_LENGTH);
693
694 for (i = 0; i < TW_Q_LENGTH; i++) { 693 for (i = 0; i < TW_Q_LENGTH; i++) {
695 switch(which) { 694 switch(which) {
696 case 0: 695 case 0:
diff --git a/drivers/scsi/a100u2w.c b/drivers/scsi/a100u2w.c
index 522570d297ca..7e33a61c1ba4 100644
--- a/drivers/scsi/a100u2w.c
+++ b/drivers/scsi/a100u2w.c
@@ -1125,23 +1125,19 @@ static int inia100_probe_one(struct pci_dev *pdev,
1125 1125
1126 /* Get total memory needed for SCB */ 1126 /* Get total memory needed for SCB */
1127 sz = ORC_MAXQUEUE * sizeof(struct orc_scb); 1127 sz = ORC_MAXQUEUE * sizeof(struct orc_scb);
1128 host->scb_virt = pci_alloc_consistent(pdev, sz, 1128 host->scb_virt = pci_zalloc_consistent(pdev, sz, &host->scb_phys);
1129 &host->scb_phys);
1130 if (!host->scb_virt) { 1129 if (!host->scb_virt) {
1131 printk("inia100: SCB memory allocation error\n"); 1130 printk("inia100: SCB memory allocation error\n");
1132 goto out_host_put; 1131 goto out_host_put;
1133 } 1132 }
1134 memset(host->scb_virt, 0, sz);
1135 1133
1136 /* Get total memory needed for ESCB */ 1134 /* Get total memory needed for ESCB */
1137 sz = ORC_MAXQUEUE * sizeof(struct orc_extended_scb); 1135 sz = ORC_MAXQUEUE * sizeof(struct orc_extended_scb);
1138 host->escb_virt = pci_alloc_consistent(pdev, sz, 1136 host->escb_virt = pci_zalloc_consistent(pdev, sz, &host->escb_phys);
1139 &host->escb_phys);
1140 if (!host->escb_virt) { 1137 if (!host->escb_virt) {
1141 printk("inia100: ESCB memory allocation error\n"); 1138 printk("inia100: ESCB memory allocation error\n");
1142 goto out_free_scb_array; 1139 goto out_free_scb_array;
1143 } 1140 }
1144 memset(host->escb_virt, 0, sz);
1145 1141
1146 biosaddr = host->BIOScfg; 1142 biosaddr = host->BIOScfg;
1147 biosaddr = (biosaddr << 4); 1143 biosaddr = (biosaddr << 4);
diff --git a/drivers/scsi/be2iscsi/be_main.c b/drivers/scsi/be2iscsi/be_main.c
index 56467df3d6de..eb3e3e619155 100644
--- a/drivers/scsi/be2iscsi/be_main.c
+++ b/drivers/scsi/be2iscsi/be_main.c
@@ -3538,10 +3538,9 @@ static int be_queue_alloc(struct beiscsi_hba *phba, struct be_queue_info *q,
3538 q->len = len; 3538 q->len = len;
3539 q->entry_size = entry_size; 3539 q->entry_size = entry_size;
3540 mem->size = len * entry_size; 3540 mem->size = len * entry_size;
3541 mem->va = pci_alloc_consistent(phba->pcidev, mem->size, &mem->dma); 3541 mem->va = pci_zalloc_consistent(phba->pcidev, mem->size, &mem->dma);
3542 if (!mem->va) 3542 if (!mem->va)
3543 return -ENOMEM; 3543 return -ENOMEM;
3544 memset(mem->va, 0, mem->size);
3545 return 0; 3544 return 0;
3546} 3545}
3547 3546
@@ -4320,9 +4319,9 @@ static int beiscsi_get_boot_info(struct beiscsi_hba *phba)
4320 "BM_%d : No boot session\n"); 4319 "BM_%d : No boot session\n");
4321 return ret; 4320 return ret;
4322 } 4321 }
4323 nonemb_cmd.va = pci_alloc_consistent(phba->ctrl.pdev, 4322 nonemb_cmd.va = pci_zalloc_consistent(phba->ctrl.pdev,
4324 sizeof(*session_resp), 4323 sizeof(*session_resp),
4325 &nonemb_cmd.dma); 4324 &nonemb_cmd.dma);
4326 if (nonemb_cmd.va == NULL) { 4325 if (nonemb_cmd.va == NULL) {
4327 beiscsi_log(phba, KERN_ERR, 4326 beiscsi_log(phba, KERN_ERR,
4328 BEISCSI_LOG_INIT | BEISCSI_LOG_CONFIG, 4327 BEISCSI_LOG_INIT | BEISCSI_LOG_CONFIG,
@@ -4332,7 +4331,6 @@ static int beiscsi_get_boot_info(struct beiscsi_hba *phba)
4332 return -ENOMEM; 4331 return -ENOMEM;
4333 } 4332 }
4334 4333
4335 memset(nonemb_cmd.va, 0, sizeof(*session_resp));
4336 tag = mgmt_get_session_info(phba, s_handle, 4334 tag = mgmt_get_session_info(phba, s_handle,
4337 &nonemb_cmd); 4335 &nonemb_cmd);
4338 if (!tag) { 4336 if (!tag) {
diff --git a/drivers/scsi/be2iscsi/be_mgmt.c b/drivers/scsi/be2iscsi/be_mgmt.c
index a3e56487616c..665afcb74a56 100644
--- a/drivers/scsi/be2iscsi/be_mgmt.c
+++ b/drivers/scsi/be2iscsi/be_mgmt.c
@@ -900,13 +900,12 @@ free_cmd:
900static int mgmt_alloc_cmd_data(struct beiscsi_hba *phba, struct be_dma_mem *cmd, 900static int mgmt_alloc_cmd_data(struct beiscsi_hba *phba, struct be_dma_mem *cmd,
901 int iscsi_cmd, int size) 901 int iscsi_cmd, int size)
902{ 902{
903 cmd->va = pci_alloc_consistent(phba->ctrl.pdev, size, &cmd->dma); 903 cmd->va = pci_zalloc_consistent(phba->ctrl.pdev, size, &cmd->dma);
904 if (!cmd->va) { 904 if (!cmd->va) {
905 beiscsi_log(phba, KERN_ERR, BEISCSI_LOG_CONFIG, 905 beiscsi_log(phba, KERN_ERR, BEISCSI_LOG_CONFIG,
906 "BG_%d : Failed to allocate memory for if info\n"); 906 "BG_%d : Failed to allocate memory for if info\n");
907 return -ENOMEM; 907 return -ENOMEM;
908 } 908 }
909 memset(cmd->va, 0, size);
910 cmd->size = size; 909 cmd->size = size;
911 be_cmd_hdr_prepare(cmd->va, CMD_SUBSYSTEM_ISCSI, iscsi_cmd, size); 910 be_cmd_hdr_prepare(cmd->va, CMD_SUBSYSTEM_ISCSI, iscsi_cmd, size);
912 return 0; 911 return 0;
diff --git a/drivers/scsi/csiostor/csio_wr.c b/drivers/scsi/csiostor/csio_wr.c
index 4255ce264abf..773da14cfa14 100644
--- a/drivers/scsi/csiostor/csio_wr.c
+++ b/drivers/scsi/csiostor/csio_wr.c
@@ -232,7 +232,7 @@ csio_wr_alloc_q(struct csio_hw *hw, uint32_t qsize, uint32_t wrsize,
232 232
233 q = wrm->q_arr[free_idx]; 233 q = wrm->q_arr[free_idx];
234 234
235 q->vstart = pci_alloc_consistent(hw->pdev, qsz, &q->pstart); 235 q->vstart = pci_zalloc_consistent(hw->pdev, qsz, &q->pstart);
236 if (!q->vstart) { 236 if (!q->vstart) {
237 csio_err(hw, 237 csio_err(hw,
238 "Failed to allocate DMA memory for " 238 "Failed to allocate DMA memory for "
@@ -240,12 +240,6 @@ csio_wr_alloc_q(struct csio_hw *hw, uint32_t qsize, uint32_t wrsize,
240 return -1; 240 return -1;
241 } 241 }
242 242
243 /*
244 * We need to zero out the contents, importantly for ingress,
245 * since we start with a generatiom bit of 1 for ingress.
246 */
247 memset(q->vstart, 0, qsz);
248
249 q->type = type; 243 q->type = type;
250 q->owner = owner; 244 q->owner = owner;
251 q->pidx = q->cidx = q->inc_idx = 0; 245 q->pidx = q->cidx = q->inc_idx = 0;
diff --git a/drivers/scsi/eata.c b/drivers/scsi/eata.c
index 03372cff38f3..813dd5c998e4 100644
--- a/drivers/scsi/eata.c
+++ b/drivers/scsi/eata.c
@@ -1238,8 +1238,8 @@ static int port_detect(unsigned long port_base, unsigned int j,
1238 struct eata_config *cf; 1238 struct eata_config *cf;
1239 dma_addr_t cf_dma_addr; 1239 dma_addr_t cf_dma_addr;
1240 1240
1241 cf = pci_alloc_consistent(pdev, sizeof(struct eata_config), 1241 cf = pci_zalloc_consistent(pdev, sizeof(struct eata_config),
1242 &cf_dma_addr); 1242 &cf_dma_addr);
1243 1243
1244 if (!cf) { 1244 if (!cf) {
1245 printk 1245 printk
@@ -1249,7 +1249,6 @@ static int port_detect(unsigned long port_base, unsigned int j,
1249 } 1249 }
1250 1250
1251 /* Set board configuration */ 1251 /* Set board configuration */
1252 memset((char *)cf, 0, sizeof(struct eata_config));
1253 cf->len = (ushort) H2DEV16((ushort) 510); 1252 cf->len = (ushort) H2DEV16((ushort) 510);
1254 cf->ocena = 1; 1253 cf->ocena = 1;
1255 1254
diff --git a/drivers/scsi/hpsa.c b/drivers/scsi/hpsa.c
index 8545d1826725..6b35d0dfe64c 100644
--- a/drivers/scsi/hpsa.c
+++ b/drivers/scsi/hpsa.c
@@ -4732,23 +4732,21 @@ static struct CommandList *cmd_special_alloc(struct ctlr_info *h)
4732 union u64bit temp64; 4732 union u64bit temp64;
4733 dma_addr_t cmd_dma_handle, err_dma_handle; 4733 dma_addr_t cmd_dma_handle, err_dma_handle;
4734 4734
4735 c = pci_alloc_consistent(h->pdev, sizeof(*c), &cmd_dma_handle); 4735 c = pci_zalloc_consistent(h->pdev, sizeof(*c), &cmd_dma_handle);
4736 if (c == NULL) 4736 if (c == NULL)
4737 return NULL; 4737 return NULL;
4738 memset(c, 0, sizeof(*c));
4739 4738
4740 c->cmd_type = CMD_SCSI; 4739 c->cmd_type = CMD_SCSI;
4741 c->cmdindex = -1; 4740 c->cmdindex = -1;
4742 4741
4743 c->err_info = pci_alloc_consistent(h->pdev, sizeof(*c->err_info), 4742 c->err_info = pci_zalloc_consistent(h->pdev, sizeof(*c->err_info),
4744 &err_dma_handle); 4743 &err_dma_handle);
4745 4744
4746 if (c->err_info == NULL) { 4745 if (c->err_info == NULL) {
4747 pci_free_consistent(h->pdev, 4746 pci_free_consistent(h->pdev,
4748 sizeof(*c), c, cmd_dma_handle); 4747 sizeof(*c), c, cmd_dma_handle);
4749 return NULL; 4748 return NULL;
4750 } 4749 }
4751 memset(c->err_info, 0, sizeof(*c->err_info));
4752 4750
4753 INIT_LIST_HEAD(&c->list); 4751 INIT_LIST_HEAD(&c->list);
4754 c->busaddr = (u32) cmd_dma_handle; 4752 c->busaddr = (u32) cmd_dma_handle;
diff --git a/drivers/scsi/megaraid/megaraid_mbox.c b/drivers/scsi/megaraid/megaraid_mbox.c
index e2237a97cb9d..531dce419c18 100644
--- a/drivers/scsi/megaraid/megaraid_mbox.c
+++ b/drivers/scsi/megaraid/megaraid_mbox.c
@@ -998,8 +998,9 @@ megaraid_alloc_cmd_packets(adapter_t *adapter)
998 * Allocate the common 16-byte aligned memory for the handshake 998 * Allocate the common 16-byte aligned memory for the handshake
999 * mailbox. 999 * mailbox.
1000 */ 1000 */
1001 raid_dev->una_mbox64 = pci_alloc_consistent(adapter->pdev, 1001 raid_dev->una_mbox64 = pci_zalloc_consistent(adapter->pdev,
1002 sizeof(mbox64_t), &raid_dev->una_mbox64_dma); 1002 sizeof(mbox64_t),
1003 &raid_dev->una_mbox64_dma);
1003 1004
1004 if (!raid_dev->una_mbox64) { 1005 if (!raid_dev->una_mbox64) {
1005 con_log(CL_ANN, (KERN_WARNING 1006 con_log(CL_ANN, (KERN_WARNING
@@ -1007,7 +1008,6 @@ megaraid_alloc_cmd_packets(adapter_t *adapter)
1007 __LINE__)); 1008 __LINE__));
1008 return -1; 1009 return -1;
1009 } 1010 }
1010 memset(raid_dev->una_mbox64, 0, sizeof(mbox64_t));
1011 1011
1012 /* 1012 /*
1013 * Align the mailbox at 16-byte boundary 1013 * Align the mailbox at 16-byte boundary
@@ -1026,8 +1026,8 @@ megaraid_alloc_cmd_packets(adapter_t *adapter)
1026 align; 1026 align;
1027 1027
1028 // Allocate memory for commands issued internally 1028 // Allocate memory for commands issued internally
1029 adapter->ibuf = pci_alloc_consistent(pdev, MBOX_IBUF_SIZE, 1029 adapter->ibuf = pci_zalloc_consistent(pdev, MBOX_IBUF_SIZE,
1030 &adapter->ibuf_dma_h); 1030 &adapter->ibuf_dma_h);
1031 if (!adapter->ibuf) { 1031 if (!adapter->ibuf) {
1032 1032
1033 con_log(CL_ANN, (KERN_WARNING 1033 con_log(CL_ANN, (KERN_WARNING
@@ -1036,7 +1036,6 @@ megaraid_alloc_cmd_packets(adapter_t *adapter)
1036 1036
1037 goto out_free_common_mbox; 1037 goto out_free_common_mbox;
1038 } 1038 }
1039 memset(adapter->ibuf, 0, MBOX_IBUF_SIZE);
1040 1039
1041 // Allocate memory for our SCSI Command Blocks and their associated 1040 // Allocate memory for our SCSI Command Blocks and their associated
1042 // memory 1041 // memory
@@ -2972,8 +2971,8 @@ megaraid_mbox_product_info(adapter_t *adapter)
2972 * Issue an ENQUIRY3 command to find out certain adapter parameters, 2971 * Issue an ENQUIRY3 command to find out certain adapter parameters,
2973 * e.g., max channels, max commands etc. 2972 * e.g., max channels, max commands etc.
2974 */ 2973 */
2975 pinfo = pci_alloc_consistent(adapter->pdev, sizeof(mraid_pinfo_t), 2974 pinfo = pci_zalloc_consistent(adapter->pdev, sizeof(mraid_pinfo_t),
2976 &pinfo_dma_h); 2975 &pinfo_dma_h);
2977 2976
2978 if (pinfo == NULL) { 2977 if (pinfo == NULL) {
2979 con_log(CL_ANN, (KERN_WARNING 2978 con_log(CL_ANN, (KERN_WARNING
@@ -2982,7 +2981,6 @@ megaraid_mbox_product_info(adapter_t *adapter)
2982 2981
2983 return -1; 2982 return -1;
2984 } 2983 }
2985 memset(pinfo, 0, sizeof(mraid_pinfo_t));
2986 2984
2987 mbox->xferaddr = (uint32_t)adapter->ibuf_dma_h; 2985 mbox->xferaddr = (uint32_t)adapter->ibuf_dma_h;
2988 memset((void *)adapter->ibuf, 0, MBOX_IBUF_SIZE); 2986 memset((void *)adapter->ibuf, 0, MBOX_IBUF_SIZE);
diff --git a/drivers/scsi/megaraid/megaraid_sas_base.c b/drivers/scsi/megaraid/megaraid_sas_base.c
index 112799b131a9..22a04e37b70a 100644
--- a/drivers/scsi/megaraid/megaraid_sas_base.c
+++ b/drivers/scsi/megaraid/megaraid_sas_base.c
@@ -2038,9 +2038,9 @@ int megasas_sriov_start_heartbeat(struct megasas_instance *instance,
2038 2038
2039 if (initial) { 2039 if (initial) {
2040 instance->hb_host_mem = 2040 instance->hb_host_mem =
2041 pci_alloc_consistent(instance->pdev, 2041 pci_zalloc_consistent(instance->pdev,
2042 sizeof(struct MR_CTRL_HB_HOST_MEM), 2042 sizeof(struct MR_CTRL_HB_HOST_MEM),
2043 &instance->hb_host_mem_h); 2043 &instance->hb_host_mem_h);
2044 if (!instance->hb_host_mem) { 2044 if (!instance->hb_host_mem) {
2045 printk(KERN_DEBUG "megasas: SR-IOV: Couldn't allocate" 2045 printk(KERN_DEBUG "megasas: SR-IOV: Couldn't allocate"
2046 " memory for heartbeat host memory for " 2046 " memory for heartbeat host memory for "
@@ -2048,8 +2048,6 @@ int megasas_sriov_start_heartbeat(struct megasas_instance *instance,
2048 retval = -ENOMEM; 2048 retval = -ENOMEM;
2049 goto out; 2049 goto out;
2050 } 2050 }
2051 memset(instance->hb_host_mem, 0,
2052 sizeof(struct MR_CTRL_HB_HOST_MEM));
2053 } 2051 }
2054 2052
2055 memset(dcmd->mbox.b, 0, MFI_MBOX_SIZE); 2053 memset(dcmd->mbox.b, 0, MFI_MBOX_SIZE);
diff --git a/drivers/scsi/mesh.c b/drivers/scsi/mesh.c
index 7a6160f172ce..57a95e2c3442 100644
--- a/drivers/scsi/mesh.c
+++ b/drivers/scsi/mesh.c
@@ -1915,14 +1915,12 @@ static int mesh_probe(struct macio_dev *mdev, const struct of_device_id *match)
1915 /* We use the PCI APIs for now until the generic one gets fixed 1915 /* We use the PCI APIs for now until the generic one gets fixed
1916 * enough or until we get some macio-specific versions 1916 * enough or until we get some macio-specific versions
1917 */ 1917 */
1918 dma_cmd_space = pci_alloc_consistent(macio_get_pci_dev(mdev), 1918 dma_cmd_space = pci_zalloc_consistent(macio_get_pci_dev(mdev),
1919 ms->dma_cmd_size, 1919 ms->dma_cmd_size, &dma_cmd_bus);
1920 &dma_cmd_bus);
1921 if (dma_cmd_space == NULL) { 1920 if (dma_cmd_space == NULL) {
1922 printk(KERN_ERR "mesh: can't allocate DMA table\n"); 1921 printk(KERN_ERR "mesh: can't allocate DMA table\n");
1923 goto out_unmap; 1922 goto out_unmap;
1924 } 1923 }
1925 memset(dma_cmd_space, 0, ms->dma_cmd_size);
1926 1924
1927 ms->dma_cmds = (struct dbdma_cmd *) DBDMA_ALIGN(dma_cmd_space); 1925 ms->dma_cmds = (struct dbdma_cmd *) DBDMA_ALIGN(dma_cmd_space);
1928 ms->dma_cmd_space = dma_cmd_space; 1926 ms->dma_cmd_space = dma_cmd_space;
diff --git a/drivers/scsi/mvumi.c b/drivers/scsi/mvumi.c
index edbee8dc62c9..3e716b2f611a 100644
--- a/drivers/scsi/mvumi.c
+++ b/drivers/scsi/mvumi.c
@@ -142,8 +142,8 @@ static struct mvumi_res *mvumi_alloc_mem_resource(struct mvumi_hba *mhba,
142 142
143 case RESOURCE_UNCACHED_MEMORY: 143 case RESOURCE_UNCACHED_MEMORY:
144 size = round_up(size, 8); 144 size = round_up(size, 8);
145 res->virt_addr = pci_alloc_consistent(mhba->pdev, size, 145 res->virt_addr = pci_zalloc_consistent(mhba->pdev, size,
146 &res->bus_addr); 146 &res->bus_addr);
147 if (!res->virt_addr) { 147 if (!res->virt_addr) {
148 dev_err(&mhba->pdev->dev, 148 dev_err(&mhba->pdev->dev,
149 "unable to allocate consistent mem," 149 "unable to allocate consistent mem,"
@@ -151,7 +151,6 @@ static struct mvumi_res *mvumi_alloc_mem_resource(struct mvumi_hba *mhba,
151 kfree(res); 151 kfree(res);
152 return NULL; 152 return NULL;
153 } 153 }
154 memset(res->virt_addr, 0, size);
155 break; 154 break;
156 155
157 default: 156 default:
@@ -258,12 +257,10 @@ static int mvumi_internal_cmd_sgl(struct mvumi_hba *mhba, struct mvumi_cmd *cmd,
258 if (size == 0) 257 if (size == 0)
259 return 0; 258 return 0;
260 259
261 virt_addr = pci_alloc_consistent(mhba->pdev, size, &phy_addr); 260 virt_addr = pci_zalloc_consistent(mhba->pdev, size, &phy_addr);
262 if (!virt_addr) 261 if (!virt_addr)
263 return -1; 262 return -1;
264 263
265 memset(virt_addr, 0, size);
266
267 m_sg = (struct mvumi_sgl *) &cmd->frame->payload[0]; 264 m_sg = (struct mvumi_sgl *) &cmd->frame->payload[0];
268 cmd->frame->sg_counts = 1; 265 cmd->frame->sg_counts = 1;
269 cmd->data_buf = virt_addr; 266 cmd->data_buf = virt_addr;
diff --git a/drivers/scsi/pm8001/pm8001_sas.c b/drivers/scsi/pm8001/pm8001_sas.c
index 34cea8291772..76570e6a547d 100644
--- a/drivers/scsi/pm8001/pm8001_sas.c
+++ b/drivers/scsi/pm8001/pm8001_sas.c
@@ -116,13 +116,12 @@ int pm8001_mem_alloc(struct pci_dev *pdev, void **virt_addr,
116 u64 align_offset = 0; 116 u64 align_offset = 0;
117 if (align) 117 if (align)
118 align_offset = (dma_addr_t)align - 1; 118 align_offset = (dma_addr_t)align - 1;
119 mem_virt_alloc = 119 mem_virt_alloc = pci_zalloc_consistent(pdev, mem_size + align,
120 pci_alloc_consistent(pdev, mem_size + align, &mem_dma_handle); 120 &mem_dma_handle);
121 if (!mem_virt_alloc) { 121 if (!mem_virt_alloc) {
122 pm8001_printk("memory allocation error\n"); 122 pm8001_printk("memory allocation error\n");
123 return -1; 123 return -1;
124 } 124 }
125 memset((void *)mem_virt_alloc, 0, mem_size+align);
126 *pphys_addr = mem_dma_handle; 125 *pphys_addr = mem_dma_handle;
127 phys_align = (*pphys_addr + align_offset) & ~align_offset; 126 phys_align = (*pphys_addr + align_offset) & ~align_offset;
128 *virt_addr = (void *)mem_virt_alloc + phys_align - *pphys_addr; 127 *virt_addr = (void *)mem_virt_alloc + phys_align - *pphys_addr;
diff --git a/drivers/scsi/pmcraid.c b/drivers/scsi/pmcraid.c
index 017f8b9554e5..6f3275d020a0 100644
--- a/drivers/scsi/pmcraid.c
+++ b/drivers/scsi/pmcraid.c
@@ -4213,9 +4213,9 @@ static ssize_t pmcraid_store_log_level(
4213{ 4213{
4214 struct Scsi_Host *shost; 4214 struct Scsi_Host *shost;
4215 struct pmcraid_instance *pinstance; 4215 struct pmcraid_instance *pinstance;
4216 unsigned long val; 4216 u8 val;
4217 4217
4218 if (strict_strtoul(buf, 10, &val)) 4218 if (kstrtou8(buf, 10, &val))
4219 return -EINVAL; 4219 return -EINVAL;
4220 /* log-level should be from 0 to 2 */ 4220 /* log-level should be from 0 to 2 */
4221 if (val > 2) 4221 if (val > 2)
diff --git a/drivers/scsi/scsi_sysfs.c b/drivers/scsi/scsi_sysfs.c
index 406b3038bbad..8b4105a22ac2 100644
--- a/drivers/scsi/scsi_sysfs.c
+++ b/drivers/scsi/scsi_sysfs.c
@@ -910,9 +910,9 @@ sdev_store_queue_ramp_up_period(struct device *dev,
910 const char *buf, size_t count) 910 const char *buf, size_t count)
911{ 911{
912 struct scsi_device *sdev = to_scsi_device(dev); 912 struct scsi_device *sdev = to_scsi_device(dev);
913 unsigned long period; 913 unsigned int period;
914 914
915 if (strict_strtoul(buf, 10, &period)) 915 if (kstrtouint(buf, 10, &period))
916 return -EINVAL; 916 return -EINVAL;
917 917
918 sdev->queue_ramp_up_period = msecs_to_jiffies(period); 918 sdev->queue_ramp_up_period = msecs_to_jiffies(period);
diff --git a/drivers/staging/rtl8192e/rtl8192e/rtl_core.c b/drivers/staging/rtl8192e/rtl8192e/rtl_core.c
index 2920e406030a..5729cf678765 100644
--- a/drivers/staging/rtl8192e/rtl8192e/rtl_core.c
+++ b/drivers/staging/rtl8192e/rtl8192e/rtl_core.c
@@ -2065,20 +2065,16 @@ static short rtl8192_alloc_rx_desc_ring(struct net_device *dev)
2065 int i, rx_queue_idx; 2065 int i, rx_queue_idx;
2066 2066
2067 for (rx_queue_idx = 0; rx_queue_idx < MAX_RX_QUEUE; rx_queue_idx++) { 2067 for (rx_queue_idx = 0; rx_queue_idx < MAX_RX_QUEUE; rx_queue_idx++) {
2068 priv->rx_ring[rx_queue_idx] = pci_alloc_consistent(priv->pdev, 2068 priv->rx_ring[rx_queue_idx] =
2069 sizeof(*priv->rx_ring[rx_queue_idx]) * 2069 pci_zalloc_consistent(priv->pdev,
2070 priv->rxringcount, 2070 sizeof(*priv->rx_ring[rx_queue_idx]) * priv->rxringcount,
2071 &priv->rx_ring_dma[rx_queue_idx]); 2071 &priv->rx_ring_dma[rx_queue_idx]);
2072
2073 if (!priv->rx_ring[rx_queue_idx] || 2072 if (!priv->rx_ring[rx_queue_idx] ||
2074 (unsigned long)priv->rx_ring[rx_queue_idx] & 0xFF) { 2073 (unsigned long)priv->rx_ring[rx_queue_idx] & 0xFF) {
2075 RT_TRACE(COMP_ERR, "Cannot allocate RX ring\n"); 2074 RT_TRACE(COMP_ERR, "Cannot allocate RX ring\n");
2076 return -ENOMEM; 2075 return -ENOMEM;
2077 } 2076 }
2078 2077
2079 memset(priv->rx_ring[rx_queue_idx], 0,
2080 sizeof(*priv->rx_ring[rx_queue_idx]) *
2081 priv->rxringcount);
2082 priv->rx_idx[rx_queue_idx] = 0; 2078 priv->rx_idx[rx_queue_idx] = 0;
2083 2079
2084 for (i = 0; i < priv->rxringcount; i++) { 2080 for (i = 0; i < priv->rxringcount; i++) {
@@ -2118,14 +2114,13 @@ static int rtl8192_alloc_tx_desc_ring(struct net_device *dev,
2118 dma_addr_t dma; 2114 dma_addr_t dma;
2119 int i; 2115 int i;
2120 2116
2121 ring = pci_alloc_consistent(priv->pdev, sizeof(*ring) * entries, &dma); 2117 ring = pci_zalloc_consistent(priv->pdev, sizeof(*ring) * entries, &dma);
2122 if (!ring || (unsigned long)ring & 0xFF) { 2118 if (!ring || (unsigned long)ring & 0xFF) {
2123 RT_TRACE(COMP_ERR, "Cannot allocate TX ring (prio = %d)\n", 2119 RT_TRACE(COMP_ERR, "Cannot allocate TX ring (prio = %d)\n",
2124 prio); 2120 prio);
2125 return -ENOMEM; 2121 return -ENOMEM;
2126 } 2122 }
2127 2123
2128 memset(ring, 0, sizeof(*ring)*entries);
2129 priv->tx_ring[prio].desc = ring; 2124 priv->tx_ring[prio].desc = ring;
2130 priv->tx_ring[prio].dma = dma; 2125 priv->tx_ring[prio].dma = dma;
2131 priv->tx_ring[prio].idx = 0; 2126 priv->tx_ring[prio].idx = 0;
diff --git a/drivers/staging/rtl8192ee/pci.c b/drivers/staging/rtl8192ee/pci.c
index f3abbcc9f3ba..0215aef1eacc 100644
--- a/drivers/staging/rtl8192ee/pci.c
+++ b/drivers/staging/rtl8192ee/pci.c
@@ -1224,10 +1224,10 @@ static int _rtl_pci_init_tx_ring(struct ieee80211_hw *hw,
1224 1224
1225 /* alloc tx buffer desc for new trx flow*/ 1225 /* alloc tx buffer desc for new trx flow*/
1226 if (rtlpriv->use_new_trx_flow) { 1226 if (rtlpriv->use_new_trx_flow) {
1227 buffer_desc = pci_alloc_consistent(rtlpci->pdev, 1227 buffer_desc =
1228 sizeof(*buffer_desc) * entries, 1228 pci_zalloc_consistent(rtlpci->pdev,
1229 &buffer_desc_dma); 1229 sizeof(*buffer_desc) * entries,
1230 1230 &buffer_desc_dma);
1231 if (!buffer_desc || (unsigned long)buffer_desc & 0xFF) { 1231 if (!buffer_desc || (unsigned long)buffer_desc & 0xFF) {
1232 RT_TRACE(COMP_ERR, DBG_EMERG, 1232 RT_TRACE(COMP_ERR, DBG_EMERG,
1233 ("Cannot allocate TX ring (prio = %d)\n", 1233 ("Cannot allocate TX ring (prio = %d)\n",
@@ -1235,7 +1235,6 @@ static int _rtl_pci_init_tx_ring(struct ieee80211_hw *hw,
1235 return -ENOMEM; 1235 return -ENOMEM;
1236 } 1236 }
1237 1237
1238 memset(buffer_desc, 0, sizeof(*buffer_desc) * entries);
1239 rtlpci->tx_ring[prio].buffer_desc = buffer_desc; 1238 rtlpci->tx_ring[prio].buffer_desc = buffer_desc;
1240 rtlpci->tx_ring[prio].buffer_desc_dma = buffer_desc_dma; 1239 rtlpci->tx_ring[prio].buffer_desc_dma = buffer_desc_dma;
1241 1240
@@ -1245,16 +1244,14 @@ static int _rtl_pci_init_tx_ring(struct ieee80211_hw *hw,
1245 } 1244 }
1246 1245
1247 /* alloc dma for this ring */ 1246 /* alloc dma for this ring */
1248 desc = pci_alloc_consistent(rtlpci->pdev, 1247 desc = pci_zalloc_consistent(rtlpci->pdev, sizeof(*desc) * entries,
1249 sizeof(*desc) * entries, &desc_dma); 1248 &desc_dma);
1250
1251 if (!desc || (unsigned long)desc & 0xFF) { 1249 if (!desc || (unsigned long)desc & 0xFF) {
1252 RT_TRACE(COMP_ERR, DBG_EMERG, 1250 RT_TRACE(COMP_ERR, DBG_EMERG,
1253 ("Cannot allocate TX ring (prio = %d)\n", prio)); 1251 ("Cannot allocate TX ring (prio = %d)\n", prio));
1254 return -ENOMEM; 1252 return -ENOMEM;
1255 } 1253 }
1256 1254
1257 memset(desc, 0, sizeof(*desc) * entries);
1258 rtlpci->tx_ring[prio].desc = desc; 1255 rtlpci->tx_ring[prio].desc = desc;
1259 rtlpci->tx_ring[prio].dma = desc_dma; 1256 rtlpci->tx_ring[prio].dma = desc_dma;
1260 1257
@@ -1290,11 +1287,9 @@ static int _rtl_pci_init_rx_ring(struct ieee80211_hw *hw, int rxring_idx)
1290 struct rtl_rx_buffer_desc *entry = NULL; 1287 struct rtl_rx_buffer_desc *entry = NULL;
1291 /* alloc dma for this ring */ 1288 /* alloc dma for this ring */
1292 rtlpci->rx_ring[rxring_idx].buffer_desc = 1289 rtlpci->rx_ring[rxring_idx].buffer_desc =
1293 pci_alloc_consistent(rtlpci->pdev, 1290 pci_zalloc_consistent(rtlpci->pdev,
1294 sizeof(*rtlpci->rx_ring[rxring_idx]. 1291 sizeof(*rtlpci->rx_ring[rxring_idx].buffer_desc) * rtlpci->rxringcount,
1295 buffer_desc) * 1292 &rtlpci->rx_ring[rxring_idx].dma);
1296 rtlpci->rxringcount,
1297 &rtlpci->rx_ring[rxring_idx].dma);
1298 if (!rtlpci->rx_ring[rxring_idx].buffer_desc || 1293 if (!rtlpci->rx_ring[rxring_idx].buffer_desc ||
1299 (unsigned long)rtlpci->rx_ring[rxring_idx].buffer_desc & 0xFF) { 1294 (unsigned long)rtlpci->rx_ring[rxring_idx].buffer_desc & 0xFF) {
1300 RT_TRACE(COMP_ERR, DBG_EMERG, 1295 RT_TRACE(COMP_ERR, DBG_EMERG,
@@ -1302,10 +1297,6 @@ static int _rtl_pci_init_rx_ring(struct ieee80211_hw *hw, int rxring_idx)
1302 return -ENOMEM; 1297 return -ENOMEM;
1303 } 1298 }
1304 1299
1305 memset(rtlpci->rx_ring[rxring_idx].buffer_desc, 0,
1306 sizeof(*rtlpci->rx_ring[rxring_idx].buffer_desc) *
1307 rtlpci->rxringcount);
1308
1309 /* init every desc in this ring */ 1300 /* init every desc in this ring */
1310 rtlpci->rx_ring[rxring_idx].idx = 0; 1301 rtlpci->rx_ring[rxring_idx].idx = 0;
1311 1302
@@ -1320,19 +1311,15 @@ static int _rtl_pci_init_rx_ring(struct ieee80211_hw *hw, int rxring_idx)
1320 u8 tmp_one = 1; 1311 u8 tmp_one = 1;
1321 /* alloc dma for this ring */ 1312 /* alloc dma for this ring */
1322 rtlpci->rx_ring[rxring_idx].desc = 1313 rtlpci->rx_ring[rxring_idx].desc =
1323 pci_alloc_consistent(rtlpci->pdev, 1314 pci_zalloc_consistent(rtlpci->pdev,
1324 sizeof(*rtlpci->rx_ring[rxring_idx]. 1315 sizeof(*rtlpci->rx_ring[rxring_idx].desc) * rtlpci->rxringcount,
1325 desc) * rtlpci->rxringcount, 1316 &rtlpci->rx_ring[rxring_idx].dma);
1326 &rtlpci->rx_ring[rxring_idx].dma);
1327 if (!rtlpci->rx_ring[rxring_idx].desc || 1317 if (!rtlpci->rx_ring[rxring_idx].desc ||
1328 (unsigned long)rtlpci->rx_ring[rxring_idx].desc & 0xFF) { 1318 (unsigned long)rtlpci->rx_ring[rxring_idx].desc & 0xFF) {
1329 RT_TRACE(COMP_ERR, DBG_EMERG, 1319 RT_TRACE(COMP_ERR, DBG_EMERG,
1330 ("Cannot allocate RX ring\n")); 1320 ("Cannot allocate RX ring\n"));
1331 return -ENOMEM; 1321 return -ENOMEM;
1332 } 1322 }
1333 memset(rtlpci->rx_ring[rxring_idx].desc, 0,
1334 sizeof(*rtlpci->rx_ring[rxring_idx].desc) *
1335 rtlpci->rxringcount);
1336 1323
1337 /* init every desc in this ring */ 1324 /* init every desc in this ring */
1338 rtlpci->rx_ring[rxring_idx].idx = 0; 1325 rtlpci->rx_ring[rxring_idx].idx = 0;
diff --git a/drivers/staging/rtl8821ae/pci.c b/drivers/staging/rtl8821ae/pci.c
index f9847d1fbdeb..26d7b2fc852a 100644
--- a/drivers/staging/rtl8821ae/pci.c
+++ b/drivers/staging/rtl8821ae/pci.c
@@ -1248,9 +1248,10 @@ static int _rtl_pci_init_tx_ring(struct ieee80211_hw *hw,
1248 1248
1249 /* alloc tx buffer desc for new trx flow*/ 1249 /* alloc tx buffer desc for new trx flow*/
1250 if (rtlpriv->use_new_trx_flow) { 1250 if (rtlpriv->use_new_trx_flow) {
1251 buffer_desc = pci_alloc_consistent(rtlpci->pdev, 1251 buffer_desc =
1252 sizeof(*buffer_desc) * entries, 1252 pci_zalloc_consistent(rtlpci->pdev,
1253 &buffer_desc_dma); 1253 sizeof(*buffer_desc) * entries,
1254 &buffer_desc_dma);
1254 1255
1255 if (!buffer_desc || (unsigned long)buffer_desc & 0xFF) { 1256 if (!buffer_desc || (unsigned long)buffer_desc & 0xFF) {
1256 RT_TRACE(COMP_ERR, DBG_EMERG, 1257 RT_TRACE(COMP_ERR, DBG_EMERG,
@@ -1259,7 +1260,6 @@ static int _rtl_pci_init_tx_ring(struct ieee80211_hw *hw,
1259 return -ENOMEM; 1260 return -ENOMEM;
1260 } 1261 }
1261 1262
1262 memset(buffer_desc, 0, sizeof(*buffer_desc) * entries);
1263 rtlpci->tx_ring[prio].buffer_desc = buffer_desc; 1263 rtlpci->tx_ring[prio].buffer_desc = buffer_desc;
1264 rtlpci->tx_ring[prio].buffer_desc_dma = buffer_desc_dma; 1264 rtlpci->tx_ring[prio].buffer_desc_dma = buffer_desc_dma;
1265 1265
@@ -1270,8 +1270,8 @@ static int _rtl_pci_init_tx_ring(struct ieee80211_hw *hw,
1270 } 1270 }
1271 1271
1272 /* alloc dma for this ring */ 1272 /* alloc dma for this ring */
1273 desc = pci_alloc_consistent(rtlpci->pdev, 1273 desc = pci_zalloc_consistent(rtlpci->pdev, sizeof(*desc) * entries,
1274 sizeof(*desc) * entries, &desc_dma); 1274 &desc_dma);
1275 1275
1276 if (!desc || (unsigned long)desc & 0xFF) { 1276 if (!desc || (unsigned long)desc & 0xFF) {
1277 RT_TRACE(COMP_ERR, DBG_EMERG, 1277 RT_TRACE(COMP_ERR, DBG_EMERG,
@@ -1279,7 +1279,6 @@ static int _rtl_pci_init_tx_ring(struct ieee80211_hw *hw,
1279 return -ENOMEM; 1279 return -ENOMEM;
1280 } 1280 }
1281 1281
1282 memset(desc, 0, sizeof(*desc) * entries);
1283 rtlpci->tx_ring[prio].desc = desc; 1282 rtlpci->tx_ring[prio].desc = desc;
1284 rtlpci->tx_ring[prio].dma = desc_dma; 1283 rtlpci->tx_ring[prio].dma = desc_dma;
1285 1284
@@ -1316,21 +1315,15 @@ static int _rtl_pci_init_rx_ring(struct ieee80211_hw *hw, int rxring_idx)
1316 struct rtl_rx_buffer_desc *entry = NULL; 1315 struct rtl_rx_buffer_desc *entry = NULL;
1317 /* alloc dma for this ring */ 1316 /* alloc dma for this ring */
1318 rtlpci->rx_ring[rxring_idx].buffer_desc = 1317 rtlpci->rx_ring[rxring_idx].buffer_desc =
1319 pci_alloc_consistent(rtlpci->pdev, 1318 pci_zalloc_consistent(rtlpci->pdev,
1320 sizeof(*rtlpci->rx_ring[rxring_idx]. 1319 sizeof(*rtlpci->rx_ring[rxring_idx].buffer_desc) * rtlpci->rxringcount,
1321 buffer_desc) * 1320 &rtlpci->rx_ring[rxring_idx].dma);
1322 rtlpci->rxringcount,
1323 &rtlpci->rx_ring[rxring_idx].dma);
1324 if (!rtlpci->rx_ring[rxring_idx].buffer_desc || 1321 if (!rtlpci->rx_ring[rxring_idx].buffer_desc ||
1325 (unsigned long)rtlpci->rx_ring[rxring_idx].buffer_desc & 0xFF) { 1322 (unsigned long)rtlpci->rx_ring[rxring_idx].buffer_desc & 0xFF) {
1326 RT_TRACE(COMP_ERR, DBG_EMERG, ("Cannot allocate RX ring\n")); 1323 RT_TRACE(COMP_ERR, DBG_EMERG, ("Cannot allocate RX ring\n"));
1327 return -ENOMEM; 1324 return -ENOMEM;
1328 } 1325 }
1329 1326
1330 memset(rtlpci->rx_ring[rxring_idx].buffer_desc, 0,
1331 sizeof(*rtlpci->rx_ring[rxring_idx].buffer_desc) *
1332 rtlpci->rxringcount);
1333
1334 /* init every desc in this ring */ 1327 /* init every desc in this ring */
1335 rtlpci->rx_ring[rxring_idx].idx = 0; 1328 rtlpci->rx_ring[rxring_idx].idx = 0;
1336 for (i = 0; i < rtlpci->rxringcount; i++) { 1329 for (i = 0; i < rtlpci->rxringcount; i++) {
@@ -1344,10 +1337,9 @@ static int _rtl_pci_init_rx_ring(struct ieee80211_hw *hw, int rxring_idx)
1344 u8 tmp_one = 1; 1337 u8 tmp_one = 1;
1345 /* alloc dma for this ring */ 1338 /* alloc dma for this ring */
1346 rtlpci->rx_ring[rxring_idx].desc = 1339 rtlpci->rx_ring[rxring_idx].desc =
1347 pci_alloc_consistent(rtlpci->pdev, 1340 pci_zalloc_consistent(rtlpci->pdev,
1348 sizeof(*rtlpci->rx_ring[rxring_idx]. 1341 sizeof(*rtlpci->rx_ring[rxring_idx].desc) * rtlpci->rxringcount,
1349 desc) * rtlpci->rxringcount, 1342 &rtlpci->rx_ring[rxring_idx].dma);
1350 &rtlpci->rx_ring[rxring_idx].dma);
1351 if (!rtlpci->rx_ring[rxring_idx].desc || 1343 if (!rtlpci->rx_ring[rxring_idx].desc ||
1352 (unsigned long)rtlpci->rx_ring[rxring_idx].desc & 0xFF) { 1344 (unsigned long)rtlpci->rx_ring[rxring_idx].desc & 0xFF) {
1353 RT_TRACE(COMP_ERR, DBG_EMERG, 1345 RT_TRACE(COMP_ERR, DBG_EMERG,
@@ -1355,10 +1347,6 @@ static int _rtl_pci_init_rx_ring(struct ieee80211_hw *hw, int rxring_idx)
1355 return -ENOMEM; 1347 return -ENOMEM;
1356 } 1348 }
1357 1349
1358 memset(rtlpci->rx_ring[rxring_idx].desc, 0,
1359 sizeof(*rtlpci->rx_ring[rxring_idx].desc) *
1360 rtlpci->rxringcount);
1361
1362 /* init every desc in this ring */ 1350 /* init every desc in this ring */
1363 rtlpci->rx_ring[rxring_idx].idx = 0; 1351 rtlpci->rx_ring[rxring_idx].idx = 0;
1364 for (i = 0; i < rtlpci->rxringcount; i++) { 1352 for (i = 0; i < rtlpci->rxringcount; i++) {
diff --git a/drivers/staging/slicoss/slicoss.c b/drivers/staging/slicoss/slicoss.c
index 50ece291fc6a..f35fa3dfe22c 100644
--- a/drivers/staging/slicoss/slicoss.c
+++ b/drivers/staging/slicoss/slicoss.c
@@ -1191,18 +1191,15 @@ static int slic_rspqueue_init(struct adapter *adapter)
1191 rspq->num_pages = SLIC_RSPQ_PAGES_GB; 1191 rspq->num_pages = SLIC_RSPQ_PAGES_GB;
1192 1192
1193 for (i = 0; i < rspq->num_pages; i++) { 1193 for (i = 0; i < rspq->num_pages; i++) {
1194 rspq->vaddr[i] = pci_alloc_consistent(adapter->pcidev, 1194 rspq->vaddr[i] = pci_zalloc_consistent(adapter->pcidev,
1195 PAGE_SIZE, 1195 PAGE_SIZE,
1196 &rspq->paddr[i]); 1196 &rspq->paddr[i]);
1197 if (!rspq->vaddr[i]) { 1197 if (!rspq->vaddr[i]) {
1198 dev_err(&adapter->pcidev->dev, 1198 dev_err(&adapter->pcidev->dev,
1199 "pci_alloc_consistent failed\n"); 1199 "pci_alloc_consistent failed\n");
1200 slic_rspqueue_free(adapter); 1200 slic_rspqueue_free(adapter);
1201 return -ENOMEM; 1201 return -ENOMEM;
1202 } 1202 }
1203 /* FIXME:
1204 * do we really need this assertions (4K PAGE_SIZE aligned addr)? */
1205 memset(rspq->vaddr[i], 0, PAGE_SIZE);
1206 1203
1207 if (paddrh == 0) { 1204 if (paddrh == 0) {
1208 slic_reg32_write(&slic_regs->slic_rbar, 1205 slic_reg32_write(&slic_regs->slic_rbar,
diff --git a/drivers/staging/vt6655/device_main.c b/drivers/staging/vt6655/device_main.c
index c78d06eff7ea..0b583a37f5b3 100644
--- a/drivers/staging/vt6655/device_main.c
+++ b/drivers/staging/vt6655/device_main.c
@@ -1111,25 +1111,17 @@ static bool device_init_rings(PSDevice pDevice)
1111 void *vir_pool; 1111 void *vir_pool;
1112 1112
1113 /*allocate all RD/TD rings a single pool*/ 1113 /*allocate all RD/TD rings a single pool*/
1114 vir_pool = pci_alloc_consistent(pDevice->pcid, 1114 vir_pool = pci_zalloc_consistent(pDevice->pcid,
1115 pDevice->sOpts.nRxDescs0 * sizeof(SRxDesc) + 1115 pDevice->sOpts.nRxDescs0 * sizeof(SRxDesc) +
1116 pDevice->sOpts.nRxDescs1 * sizeof(SRxDesc) + 1116 pDevice->sOpts.nRxDescs1 * sizeof(SRxDesc) +
1117 pDevice->sOpts.nTxDescs[0] * sizeof(STxDesc) + 1117 pDevice->sOpts.nTxDescs[0] * sizeof(STxDesc) +
1118 pDevice->sOpts.nTxDescs[1] * sizeof(STxDesc), 1118 pDevice->sOpts.nTxDescs[1] * sizeof(STxDesc),
1119 &pDevice->pool_dma); 1119 &pDevice->pool_dma);
1120
1121 if (vir_pool == NULL) { 1120 if (vir_pool == NULL) {
1122 DBG_PRT(MSG_LEVEL_ERR, KERN_ERR "%s : allocate desc dma memory failed\n", pDevice->dev->name); 1121 DBG_PRT(MSG_LEVEL_ERR, KERN_ERR "%s : allocate desc dma memory failed\n", pDevice->dev->name);
1123 return false; 1122 return false;
1124 } 1123 }
1125 1124
1126 memset(vir_pool, 0,
1127 pDevice->sOpts.nRxDescs0 * sizeof(SRxDesc) +
1128 pDevice->sOpts.nRxDescs1 * sizeof(SRxDesc) +
1129 pDevice->sOpts.nTxDescs[0] * sizeof(STxDesc) +
1130 pDevice->sOpts.nTxDescs[1] * sizeof(STxDesc)
1131 );
1132
1133 pDevice->aRD0Ring = vir_pool; 1125 pDevice->aRD0Ring = vir_pool;
1134 pDevice->aRD1Ring = vir_pool + 1126 pDevice->aRD1Ring = vir_pool +
1135 pDevice->sOpts.nRxDescs0 * sizeof(SRxDesc); 1127 pDevice->sOpts.nRxDescs0 * sizeof(SRxDesc);
@@ -1138,13 +1130,12 @@ static bool device_init_rings(PSDevice pDevice)
1138 pDevice->rd1_pool_dma = pDevice->rd0_pool_dma + 1130 pDevice->rd1_pool_dma = pDevice->rd0_pool_dma +
1139 pDevice->sOpts.nRxDescs0 * sizeof(SRxDesc); 1131 pDevice->sOpts.nRxDescs0 * sizeof(SRxDesc);
1140 1132
1141 pDevice->tx0_bufs = pci_alloc_consistent(pDevice->pcid, 1133 pDevice->tx0_bufs = pci_zalloc_consistent(pDevice->pcid,
1142 pDevice->sOpts.nTxDescs[0] * PKT_BUF_SZ + 1134 pDevice->sOpts.nTxDescs[0] * PKT_BUF_SZ +
1143 pDevice->sOpts.nTxDescs[1] * PKT_BUF_SZ + 1135 pDevice->sOpts.nTxDescs[1] * PKT_BUF_SZ +
1144 CB_BEACON_BUF_SIZE + 1136 CB_BEACON_BUF_SIZE +
1145 CB_MAX_BUF_SIZE, 1137 CB_MAX_BUF_SIZE,
1146 &pDevice->tx_bufs_dma0); 1138 &pDevice->tx_bufs_dma0);
1147
1148 if (pDevice->tx0_bufs == NULL) { 1139 if (pDevice->tx0_bufs == NULL) {
1149 DBG_PRT(MSG_LEVEL_ERR, KERN_ERR "%s: allocate buf dma memory failed\n", pDevice->dev->name); 1140 DBG_PRT(MSG_LEVEL_ERR, KERN_ERR "%s: allocate buf dma memory failed\n", pDevice->dev->name);
1150 pci_free_consistent(pDevice->pcid, 1141 pci_free_consistent(pDevice->pcid,
@@ -1157,13 +1148,6 @@ static bool device_init_rings(PSDevice pDevice)
1157 return false; 1148 return false;
1158 } 1149 }
1159 1150
1160 memset(pDevice->tx0_bufs, 0,
1161 pDevice->sOpts.nTxDescs[0] * PKT_BUF_SZ +
1162 pDevice->sOpts.nTxDescs[1] * PKT_BUF_SZ +
1163 CB_BEACON_BUF_SIZE +
1164 CB_MAX_BUF_SIZE
1165 );
1166
1167 pDevice->td0_pool_dma = pDevice->rd1_pool_dma + 1151 pDevice->td0_pool_dma = pDevice->rd1_pool_dma +
1168 pDevice->sOpts.nRxDescs1 * sizeof(SRxDesc); 1152 pDevice->sOpts.nRxDescs1 * sizeof(SRxDesc);
1169 1153
diff --git a/drivers/tty/synclink_gt.c b/drivers/tty/synclink_gt.c
index ba1dbcdf4609..0e8c39b6ccd4 100644
--- a/drivers/tty/synclink_gt.c
+++ b/drivers/tty/synclink_gt.c
@@ -3383,12 +3383,11 @@ static int alloc_desc(struct slgt_info *info)
3383 unsigned int pbufs; 3383 unsigned int pbufs;
3384 3384
3385 /* allocate memory to hold descriptor lists */ 3385 /* allocate memory to hold descriptor lists */
3386 info->bufs = pci_alloc_consistent(info->pdev, DESC_LIST_SIZE, &info->bufs_dma_addr); 3386 info->bufs = pci_zalloc_consistent(info->pdev, DESC_LIST_SIZE,
3387 &info->bufs_dma_addr);
3387 if (info->bufs == NULL) 3388 if (info->bufs == NULL)
3388 return -ENOMEM; 3389 return -ENOMEM;
3389 3390
3390 memset(info->bufs, 0, DESC_LIST_SIZE);
3391
3392 info->rbufs = (struct slgt_desc*)info->bufs; 3391 info->rbufs = (struct slgt_desc*)info->bufs;
3393 info->tbufs = ((struct slgt_desc*)info->bufs) + info->rbuf_count; 3392 info->tbufs = ((struct slgt_desc*)info->bufs) + info->rbuf_count;
3394 3393
diff --git a/drivers/vme/bridges/vme_ca91cx42.c b/drivers/vme/bridges/vme_ca91cx42.c
index bfb2d3f06738..18078ecbfcc6 100644
--- a/drivers/vme/bridges/vme_ca91cx42.c
+++ b/drivers/vme/bridges/vme_ca91cx42.c
@@ -1555,16 +1555,14 @@ static int ca91cx42_crcsr_init(struct vme_bridge *ca91cx42_bridge,
1555 } 1555 }
1556 1556
1557 /* Allocate mem for CR/CSR image */ 1557 /* Allocate mem for CR/CSR image */
1558 bridge->crcsr_kernel = pci_alloc_consistent(pdev, VME_CRCSR_BUF_SIZE, 1558 bridge->crcsr_kernel = pci_zalloc_consistent(pdev, VME_CRCSR_BUF_SIZE,
1559 &bridge->crcsr_bus); 1559 &bridge->crcsr_bus);
1560 if (bridge->crcsr_kernel == NULL) { 1560 if (bridge->crcsr_kernel == NULL) {
1561 dev_err(&pdev->dev, "Failed to allocate memory for CR/CSR " 1561 dev_err(&pdev->dev, "Failed to allocate memory for CR/CSR "
1562 "image\n"); 1562 "image\n");
1563 return -ENOMEM; 1563 return -ENOMEM;
1564 } 1564 }
1565 1565
1566 memset(bridge->crcsr_kernel, 0, VME_CRCSR_BUF_SIZE);
1567
1568 crcsr_addr = slot * (512 * 1024); 1566 crcsr_addr = slot * (512 * 1024);
1569 iowrite32(bridge->crcsr_bus - crcsr_addr, bridge->base + VCSR_TO); 1567 iowrite32(bridge->crcsr_bus - crcsr_addr, bridge->base + VCSR_TO);
1570 1568
diff --git a/drivers/vme/bridges/vme_tsi148.c b/drivers/vme/bridges/vme_tsi148.c
index 61e706c0e00c..e07cfa8001bb 100644
--- a/drivers/vme/bridges/vme_tsi148.c
+++ b/drivers/vme/bridges/vme_tsi148.c
@@ -2275,16 +2275,14 @@ static int tsi148_crcsr_init(struct vme_bridge *tsi148_bridge,
2275 bridge = tsi148_bridge->driver_priv; 2275 bridge = tsi148_bridge->driver_priv;
2276 2276
2277 /* Allocate mem for CR/CSR image */ 2277 /* Allocate mem for CR/CSR image */
2278 bridge->crcsr_kernel = pci_alloc_consistent(pdev, VME_CRCSR_BUF_SIZE, 2278 bridge->crcsr_kernel = pci_zalloc_consistent(pdev, VME_CRCSR_BUF_SIZE,
2279 &bridge->crcsr_bus); 2279 &bridge->crcsr_bus);
2280 if (bridge->crcsr_kernel == NULL) { 2280 if (bridge->crcsr_kernel == NULL) {
2281 dev_err(tsi148_bridge->parent, "Failed to allocate memory for " 2281 dev_err(tsi148_bridge->parent, "Failed to allocate memory for "
2282 "CR/CSR image\n"); 2282 "CR/CSR image\n");
2283 return -ENOMEM; 2283 return -ENOMEM;
2284 } 2284 }
2285 2285
2286 memset(bridge->crcsr_kernel, 0, VME_CRCSR_BUF_SIZE);
2287
2288 reg_split(bridge->crcsr_bus, &crcsr_bus_high, &crcsr_bus_low); 2286 reg_split(bridge->crcsr_bus, &crcsr_bus_high, &crcsr_bus_low);
2289 2287
2290 iowrite32be(crcsr_bus_high, bridge->base + TSI148_LCSR_CROU); 2288 iowrite32be(crcsr_bus_high, bridge->base + TSI148_LCSR_CROU);
diff --git a/fs/adfs/adfs.h b/fs/adfs/adfs.h
index c770337c4b45..24575d9d882d 100644
--- a/fs/adfs/adfs.h
+++ b/fs/adfs/adfs.h
@@ -153,6 +153,7 @@ extern int adfs_map_lookup(struct super_block *sb, unsigned int frag_id, unsigne
153extern unsigned int adfs_map_free(struct super_block *sb); 153extern unsigned int adfs_map_free(struct super_block *sb);
154 154
155/* Misc */ 155/* Misc */
156__printf(3, 4)
156void __adfs_error(struct super_block *sb, const char *function, 157void __adfs_error(struct super_block *sb, const char *function,
157 const char *fmt, ...); 158 const char *fmt, ...);
158#define adfs_error(sb, fmt...) __adfs_error(sb, __func__, fmt) 159#define adfs_error(sb, fmt...) __adfs_error(sb, __func__, fmt)
diff --git a/fs/adfs/dir.c b/fs/adfs/dir.c
index 0d138c0de293..51c279a29845 100644
--- a/fs/adfs/dir.c
+++ b/fs/adfs/dir.c
@@ -138,7 +138,7 @@ adfs_dir_lookup_byname(struct inode *inode, struct qstr *name, struct object_inf
138 goto out; 138 goto out;
139 139
140 if (ADFS_I(inode)->parent_id != dir.parent_id) { 140 if (ADFS_I(inode)->parent_id != dir.parent_id) {
141 adfs_error(sb, "parent directory changed under me! (%lx but got %lx)\n", 141 adfs_error(sb, "parent directory changed under me! (%lx but got %x)\n",
142 ADFS_I(inode)->parent_id, dir.parent_id); 142 ADFS_I(inode)->parent_id, dir.parent_id);
143 ret = -EIO; 143 ret = -EIO;
144 goto free_out; 144 goto free_out;
diff --git a/fs/adfs/dir_fplus.c b/fs/adfs/dir_fplus.c
index d9e3bee4e653..f2ba88ab4aed 100644
--- a/fs/adfs/dir_fplus.c
+++ b/fs/adfs/dir_fplus.c
@@ -55,10 +55,10 @@ adfs_fplus_read(struct super_block *sb, unsigned int id, unsigned int sz, struct
55 } 55 }
56 56
57 size >>= sb->s_blocksize_bits; 57 size >>= sb->s_blocksize_bits;
58 if (size > sizeof(dir->bh)/sizeof(dir->bh[0])) { 58 if (size > ARRAY_SIZE(dir->bh)) {
59 /* this directory is too big for fixed bh set, must allocate */ 59 /* this directory is too big for fixed bh set, must allocate */
60 struct buffer_head **bh_fplus = 60 struct buffer_head **bh_fplus =
61 kzalloc(size * sizeof(struct buffer_head *), 61 kcalloc(size, sizeof(struct buffer_head *),
62 GFP_KERNEL); 62 GFP_KERNEL);
63 if (!bh_fplus) { 63 if (!bh_fplus) {
64 adfs_error(sb, "not enough memory for" 64 adfs_error(sb, "not enough memory for"
@@ -79,9 +79,8 @@ adfs_fplus_read(struct super_block *sb, unsigned int id, unsigned int sz, struct
79 79
80 dir->bh_fplus[blk] = sb_bread(sb, block); 80 dir->bh_fplus[blk] = sb_bread(sb, block);
81 if (!dir->bh_fplus[blk]) { 81 if (!dir->bh_fplus[blk]) {
82 adfs_error(sb, "dir object %X failed read for" 82 adfs_error(sb, "dir object %x failed read for offset %d, mapped block %lX",
83 " offset %d, mapped block %X", 83 id, blk, block);
84 id, blk, block);
85 goto out; 84 goto out;
86 } 85 }
87 86
diff --git a/fs/autofs4/autofs_i.h b/fs/autofs4/autofs_i.h
index acf32054edd8..9e359fb20c0a 100644
--- a/fs/autofs4/autofs_i.h
+++ b/fs/autofs4/autofs_i.h
@@ -143,20 +143,6 @@ static inline int autofs4_oz_mode(struct autofs_sb_info *sbi) {
143 return sbi->catatonic || task_pgrp(current) == sbi->oz_pgrp; 143 return sbi->catatonic || task_pgrp(current) == sbi->oz_pgrp;
144} 144}
145 145
146/* Does a dentry have some pending activity? */
147static inline int autofs4_ispending(struct dentry *dentry)
148{
149 struct autofs_info *inf = autofs4_dentry_ino(dentry);
150
151 if (inf->flags & AUTOFS_INF_PENDING)
152 return 1;
153
154 if (inf->flags & AUTOFS_INF_EXPIRING)
155 return 1;
156
157 return 0;
158}
159
160struct inode *autofs4_get_inode(struct super_block *, umode_t); 146struct inode *autofs4_get_inode(struct super_block *, umode_t);
161void autofs4_free_ino(struct autofs_info *); 147void autofs4_free_ino(struct autofs_info *);
162 148
@@ -191,55 +177,6 @@ extern const struct file_operations autofs4_root_operations;
191extern const struct dentry_operations autofs4_dentry_operations; 177extern const struct dentry_operations autofs4_dentry_operations;
192 178
193/* VFS automount flags management functions */ 179/* VFS automount flags management functions */
194
195static inline void __managed_dentry_set_automount(struct dentry *dentry)
196{
197 dentry->d_flags |= DCACHE_NEED_AUTOMOUNT;
198}
199
200static inline void managed_dentry_set_automount(struct dentry *dentry)
201{
202 spin_lock(&dentry->d_lock);
203 __managed_dentry_set_automount(dentry);
204 spin_unlock(&dentry->d_lock);
205}
206
207static inline void __managed_dentry_clear_automount(struct dentry *dentry)
208{
209 dentry->d_flags &= ~DCACHE_NEED_AUTOMOUNT;
210}
211
212static inline void managed_dentry_clear_automount(struct dentry *dentry)
213{
214 spin_lock(&dentry->d_lock);
215 __managed_dentry_clear_automount(dentry);
216 spin_unlock(&dentry->d_lock);
217}
218
219static inline void __managed_dentry_set_transit(struct dentry *dentry)
220{
221 dentry->d_flags |= DCACHE_MANAGE_TRANSIT;
222}
223
224static inline void managed_dentry_set_transit(struct dentry *dentry)
225{
226 spin_lock(&dentry->d_lock);
227 __managed_dentry_set_transit(dentry);
228 spin_unlock(&dentry->d_lock);
229}
230
231static inline void __managed_dentry_clear_transit(struct dentry *dentry)
232{
233 dentry->d_flags &= ~DCACHE_MANAGE_TRANSIT;
234}
235
236static inline void managed_dentry_clear_transit(struct dentry *dentry)
237{
238 spin_lock(&dentry->d_lock);
239 __managed_dentry_clear_transit(dentry);
240 spin_unlock(&dentry->d_lock);
241}
242
243static inline void __managed_dentry_set_managed(struct dentry *dentry) 180static inline void __managed_dentry_set_managed(struct dentry *dentry)
244{ 181{
245 dentry->d_flags |= (DCACHE_NEED_AUTOMOUNT|DCACHE_MANAGE_TRANSIT); 182 dentry->d_flags |= (DCACHE_NEED_AUTOMOUNT|DCACHE_MANAGE_TRANSIT);
diff --git a/fs/autofs4/expire.c b/fs/autofs4/expire.c
index 394e90b02c5e..a7be57e39be7 100644
--- a/fs/autofs4/expire.c
+++ b/fs/autofs4/expire.c
@@ -333,7 +333,6 @@ struct dentry *autofs4_expire_direct(struct super_block *sb,
333 if (ino->flags & AUTOFS_INF_PENDING) 333 if (ino->flags & AUTOFS_INF_PENDING)
334 goto out; 334 goto out;
335 if (!autofs4_direct_busy(mnt, root, timeout, do_now)) { 335 if (!autofs4_direct_busy(mnt, root, timeout, do_now)) {
336 struct autofs_info *ino = autofs4_dentry_ino(root);
337 ino->flags |= AUTOFS_INF_EXPIRING; 336 ino->flags |= AUTOFS_INF_EXPIRING;
338 init_completion(&ino->expire_complete); 337 init_completion(&ino->expire_complete);
339 spin_unlock(&sbi->fs_lock); 338 spin_unlock(&sbi->fs_lock);
diff --git a/fs/autofs4/root.c b/fs/autofs4/root.c
index cc87c1abac97..cdb25ebccc4c 100644
--- a/fs/autofs4/root.c
+++ b/fs/autofs4/root.c
@@ -166,8 +166,10 @@ static struct dentry *autofs4_lookup_active(struct dentry *dentry)
166 const unsigned char *str = name->name; 166 const unsigned char *str = name->name;
167 struct list_head *p, *head; 167 struct list_head *p, *head;
168 168
169 spin_lock(&sbi->lookup_lock);
170 head = &sbi->active_list; 169 head = &sbi->active_list;
170 if (list_empty(head))
171 return NULL;
172 spin_lock(&sbi->lookup_lock);
171 list_for_each(p, head) { 173 list_for_each(p, head) {
172 struct autofs_info *ino; 174 struct autofs_info *ino;
173 struct dentry *active; 175 struct dentry *active;
@@ -218,8 +220,10 @@ static struct dentry *autofs4_lookup_expiring(struct dentry *dentry)
218 const unsigned char *str = name->name; 220 const unsigned char *str = name->name;
219 struct list_head *p, *head; 221 struct list_head *p, *head;
220 222
221 spin_lock(&sbi->lookup_lock);
222 head = &sbi->expiring_list; 223 head = &sbi->expiring_list;
224 if (list_empty(head))
225 return NULL;
226 spin_lock(&sbi->lookup_lock);
223 list_for_each(p, head) { 227 list_for_each(p, head) {
224 struct autofs_info *ino; 228 struct autofs_info *ino;
225 struct dentry *expiring; 229 struct dentry *expiring;
@@ -373,7 +377,7 @@ static struct vfsmount *autofs4_d_automount(struct path *path)
373 * this because the leaves of the directory tree under the 377 * this because the leaves of the directory tree under the
374 * mount never trigger mounts themselves (they have an autofs 378 * mount never trigger mounts themselves (they have an autofs
375 * trigger mount mounted on them). But v4 pseudo direct mounts 379 * trigger mount mounted on them). But v4 pseudo direct mounts
376 * do need the leaves to to trigger mounts. In this case we 380 * do need the leaves to trigger mounts. In this case we
377 * have no choice but to use the list_empty() check and 381 * have no choice but to use the list_empty() check and
378 * require user space behave. 382 * require user space behave.
379 */ 383 */
diff --git a/fs/befs/linuxvfs.c b/fs/befs/linuxvfs.c
index 0d6c07cc1149..4cf61ec6b7a8 100644
--- a/fs/befs/linuxvfs.c
+++ b/fs/befs/linuxvfs.c
@@ -832,16 +832,14 @@ befs_fill_super(struct super_block *sb, void *data, int silent)
832 (befs_super_block *) ((void *) bh->b_data + x86_sb_off); 832 (befs_super_block *) ((void *) bh->b_data + x86_sb_off);
833 } 833 }
834 834
835 if (befs_load_sb(sb, disk_sb) != BEFS_OK) 835 if ((befs_load_sb(sb, disk_sb) != BEFS_OK) ||
836 (befs_check_sb(sb) != BEFS_OK))
836 goto unacquire_bh; 837 goto unacquire_bh;
837 838
838 befs_dump_super_block(sb, disk_sb); 839 befs_dump_super_block(sb, disk_sb);
839 840
840 brelse(bh); 841 brelse(bh);
841 842
842 if (befs_check_sb(sb) != BEFS_OK)
843 goto unacquire_priv_sbp;
844
845 if( befs_sb->num_blocks > ~((sector_t)0) ) { 843 if( befs_sb->num_blocks > ~((sector_t)0) ) {
846 befs_error(sb, "blocks count: %llu " 844 befs_error(sb, "blocks count: %llu "
847 "is larger than the host can use", 845 "is larger than the host can use",
diff --git a/fs/bfs/bfs.h b/fs/bfs/bfs.h
index f7f87e233dd9..f40006db36df 100644
--- a/fs/bfs/bfs.h
+++ b/fs/bfs/bfs.h
@@ -46,6 +46,7 @@ static inline struct bfs_inode_info *BFS_I(struct inode *inode)
46 46
47/* inode.c */ 47/* inode.c */
48extern struct inode *bfs_iget(struct super_block *sb, unsigned long ino); 48extern struct inode *bfs_iget(struct super_block *sb, unsigned long ino);
49extern void bfs_dump_imap(const char *, struct super_block *);
49 50
50/* file.c */ 51/* file.c */
51extern const struct inode_operations bfs_file_inops; 52extern const struct inode_operations bfs_file_inops;
diff --git a/fs/bfs/dir.c b/fs/bfs/dir.c
index a399e6d9dc74..08063ae0a17c 100644
--- a/fs/bfs/dir.c
+++ b/fs/bfs/dir.c
@@ -75,8 +75,6 @@ const struct file_operations bfs_dir_operations = {
75 .llseek = generic_file_llseek, 75 .llseek = generic_file_llseek,
76}; 76};
77 77
78extern void dump_imap(const char *, struct super_block *);
79
80static int bfs_create(struct inode *dir, struct dentry *dentry, umode_t mode, 78static int bfs_create(struct inode *dir, struct dentry *dentry, umode_t mode,
81 bool excl) 79 bool excl)
82{ 80{
@@ -110,7 +108,7 @@ static int bfs_create(struct inode *dir, struct dentry *dentry, umode_t mode,
110 BFS_I(inode)->i_eblock = 0; 108 BFS_I(inode)->i_eblock = 0;
111 insert_inode_hash(inode); 109 insert_inode_hash(inode);
112 mark_inode_dirty(inode); 110 mark_inode_dirty(inode);
113 dump_imap("create", s); 111 bfs_dump_imap("create", s);
114 112
115 err = bfs_add_entry(dir, dentry->d_name.name, dentry->d_name.len, 113 err = bfs_add_entry(dir, dentry->d_name.name, dentry->d_name.len,
116 inode->i_ino); 114 inode->i_ino);
diff --git a/fs/bfs/inode.c b/fs/bfs/inode.c
index 7041ac35ace8..90bc079d9982 100644
--- a/fs/bfs/inode.c
+++ b/fs/bfs/inode.c
@@ -30,8 +30,6 @@ MODULE_LICENSE("GPL");
30#define dprintf(x...) 30#define dprintf(x...)
31#endif 31#endif
32 32
33void dump_imap(const char *prefix, struct super_block *s);
34
35struct inode *bfs_iget(struct super_block *sb, unsigned long ino) 33struct inode *bfs_iget(struct super_block *sb, unsigned long ino)
36{ 34{
37 struct bfs_inode *di; 35 struct bfs_inode *di;
@@ -194,7 +192,7 @@ static void bfs_evict_inode(struct inode *inode)
194 info->si_freeb += bi->i_eblock + 1 - bi->i_sblock; 192 info->si_freeb += bi->i_eblock + 1 - bi->i_sblock;
195 info->si_freei++; 193 info->si_freei++;
196 clear_bit(ino, info->si_imap); 194 clear_bit(ino, info->si_imap);
197 dump_imap("delete_inode", s); 195 bfs_dump_imap("delete_inode", s);
198 } 196 }
199 197
200 /* 198 /*
@@ -297,7 +295,7 @@ static const struct super_operations bfs_sops = {
297 .statfs = bfs_statfs, 295 .statfs = bfs_statfs,
298}; 296};
299 297
300void dump_imap(const char *prefix, struct super_block *s) 298void bfs_dump_imap(const char *prefix, struct super_block *s)
301{ 299{
302#ifdef DEBUG 300#ifdef DEBUG
303 int i; 301 int i;
@@ -443,7 +441,7 @@ static int bfs_fill_super(struct super_block *s, void *data, int silent)
443 } 441 }
444 brelse(bh); 442 brelse(bh);
445 brelse(sbh); 443 brelse(sbh);
446 dump_imap("read_super", s); 444 bfs_dump_imap("read_super", s);
447 return 0; 445 return 0;
448 446
449out3: 447out3:
diff --git a/fs/coda/cache.c b/fs/coda/cache.c
index 1da168c61d35..278f8fdeb9ef 100644
--- a/fs/coda/cache.c
+++ b/fs/coda/cache.c
@@ -13,7 +13,7 @@
13#include <linux/fs.h> 13#include <linux/fs.h>
14#include <linux/stat.h> 14#include <linux/stat.h>
15#include <linux/errno.h> 15#include <linux/errno.h>
16#include <asm/uaccess.h> 16#include <linux/uaccess.h>
17#include <linux/string.h> 17#include <linux/string.h>
18#include <linux/list.h> 18#include <linux/list.h>
19#include <linux/sched.h> 19#include <linux/sched.h>
diff --git a/fs/coda/coda_linux.c b/fs/coda/coda_linux.c
index 2849f41e72a2..1326d38960db 100644
--- a/fs/coda/coda_linux.c
+++ b/fs/coda/coda_linux.c
@@ -13,7 +13,7 @@
13#include <linux/fs.h> 13#include <linux/fs.h>
14#include <linux/stat.h> 14#include <linux/stat.h>
15#include <linux/errno.h> 15#include <linux/errno.h>
16#include <asm/uaccess.h> 16#include <linux/uaccess.h>
17#include <linux/string.h> 17#include <linux/string.h>
18 18
19#include <linux/coda.h> 19#include <linux/coda.h>
diff --git a/fs/coda/dir.c b/fs/coda/dir.c
index cd8a63238b11..9c3dedc000d1 100644
--- a/fs/coda/dir.c
+++ b/fs/coda/dir.c
@@ -19,8 +19,7 @@
19#include <linux/string.h> 19#include <linux/string.h>
20#include <linux/spinlock.h> 20#include <linux/spinlock.h>
21#include <linux/namei.h> 21#include <linux/namei.h>
22 22#include <linux/uaccess.h>
23#include <asm/uaccess.h>
24 23
25#include <linux/coda.h> 24#include <linux/coda.h>
26#include <linux/coda_psdev.h> 25#include <linux/coda_psdev.h>
diff --git a/fs/coda/file.c b/fs/coda/file.c
index 9e83b7790212..d244d743a232 100644
--- a/fs/coda/file.c
+++ b/fs/coda/file.c
@@ -18,7 +18,7 @@
18#include <linux/spinlock.h> 18#include <linux/spinlock.h>
19#include <linux/string.h> 19#include <linux/string.h>
20#include <linux/slab.h> 20#include <linux/slab.h>
21#include <asm/uaccess.h> 21#include <linux/uaccess.h>
22 22
23#include <linux/coda.h> 23#include <linux/coda.h>
24#include <linux/coda_psdev.h> 24#include <linux/coda_psdev.h>
diff --git a/fs/coda/inode.c b/fs/coda/inode.c
index fe3afb2de880..b945410bfcd5 100644
--- a/fs/coda/inode.c
+++ b/fs/coda/inode.c
@@ -21,9 +21,7 @@
21#include <linux/vfs.h> 21#include <linux/vfs.h>
22#include <linux/slab.h> 22#include <linux/slab.h>
23#include <linux/pid_namespace.h> 23#include <linux/pid_namespace.h>
24 24#include <linux/uaccess.h>
25#include <asm/uaccess.h>
26
27#include <linux/fs.h> 25#include <linux/fs.h>
28#include <linux/vmalloc.h> 26#include <linux/vmalloc.h>
29 27
diff --git a/fs/coda/pioctl.c b/fs/coda/pioctl.c
index 3f5de96bbb58..4326d172fc27 100644
--- a/fs/coda/pioctl.c
+++ b/fs/coda/pioctl.c
@@ -16,7 +16,7 @@
16#include <linux/string.h> 16#include <linux/string.h>
17#include <linux/namei.h> 17#include <linux/namei.h>
18#include <linux/module.h> 18#include <linux/module.h>
19#include <asm/uaccess.h> 19#include <linux/uaccess.h>
20 20
21#include <linux/coda.h> 21#include <linux/coda.h>
22#include <linux/coda_psdev.h> 22#include <linux/coda_psdev.h>
diff --git a/fs/coda/psdev.c b/fs/coda/psdev.c
index 5c1e4242368b..822629126e89 100644
--- a/fs/coda/psdev.c
+++ b/fs/coda/psdev.c
@@ -40,7 +40,7 @@
40#include <linux/pid_namespace.h> 40#include <linux/pid_namespace.h>
41#include <asm/io.h> 41#include <asm/io.h>
42#include <asm/poll.h> 42#include <asm/poll.h>
43#include <asm/uaccess.h> 43#include <linux/uaccess.h>
44 44
45#include <linux/coda.h> 45#include <linux/coda.h>
46#include <linux/coda_psdev.h> 46#include <linux/coda_psdev.h>
diff --git a/fs/coda/upcall.c b/fs/coda/upcall.c
index 21fcf8dcb9cd..5bb6e27298a4 100644
--- a/fs/coda/upcall.c
+++ b/fs/coda/upcall.c
@@ -27,7 +27,7 @@
27#include <linux/string.h> 27#include <linux/string.h>
28#include <linux/slab.h> 28#include <linux/slab.h>
29#include <linux/mutex.h> 29#include <linux/mutex.h>
30#include <asm/uaccess.h> 30#include <linux/uaccess.h>
31#include <linux/vmalloc.h> 31#include <linux/vmalloc.h>
32#include <linux/vfs.h> 32#include <linux/vfs.h>
33 33
diff --git a/fs/cramfs/inode.c b/fs/cramfs/inode.c
index ddcfe590b8a8..355c522f3585 100644
--- a/fs/cramfs/inode.c
+++ b/fs/cramfs/inode.c
@@ -11,6 +11,8 @@
11 * The actual compression is based on zlib, see the other files. 11 * The actual compression is based on zlib, see the other files.
12 */ 12 */
13 13
14#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
15
14#include <linux/module.h> 16#include <linux/module.h>
15#include <linux/fs.h> 17#include <linux/fs.h>
16#include <linux/pagemap.h> 18#include <linux/pagemap.h>
@@ -21,7 +23,7 @@
21#include <linux/vfs.h> 23#include <linux/vfs.h>
22#include <linux/mutex.h> 24#include <linux/mutex.h>
23#include <uapi/linux/cramfs_fs.h> 25#include <uapi/linux/cramfs_fs.h>
24#include <asm/uaccess.h> 26#include <linux/uaccess.h>
25 27
26#include "internal.h" 28#include "internal.h"
27 29
@@ -153,7 +155,7 @@ static struct inode *get_cramfs_inode(struct super_block *sb,
153 155
154static unsigned char read_buffers[READ_BUFFERS][BUFFER_SIZE]; 156static unsigned char read_buffers[READ_BUFFERS][BUFFER_SIZE];
155static unsigned buffer_blocknr[READ_BUFFERS]; 157static unsigned buffer_blocknr[READ_BUFFERS];
156static struct super_block * buffer_dev[READ_BUFFERS]; 158static struct super_block *buffer_dev[READ_BUFFERS];
157static int next_buffer; 159static int next_buffer;
158 160
159/* 161/*
@@ -205,6 +207,7 @@ static void *cramfs_read(struct super_block *sb, unsigned int offset, unsigned i
205 207
206 for (i = 0; i < BLKS_PER_BUF; i++) { 208 for (i = 0; i < BLKS_PER_BUF; i++) {
207 struct page *page = pages[i]; 209 struct page *page = pages[i];
210
208 if (page) { 211 if (page) {
209 wait_on_page_locked(page); 212 wait_on_page_locked(page);
210 if (!PageUptodate(page)) { 213 if (!PageUptodate(page)) {
@@ -223,6 +226,7 @@ static void *cramfs_read(struct super_block *sb, unsigned int offset, unsigned i
223 data = read_buffers[buffer]; 226 data = read_buffers[buffer];
224 for (i = 0; i < BLKS_PER_BUF; i++) { 227 for (i = 0; i < BLKS_PER_BUF; i++) {
225 struct page *page = pages[i]; 228 struct page *page = pages[i];
229
226 if (page) { 230 if (page) {
227 memcpy(data, kmap(page), PAGE_CACHE_SIZE); 231 memcpy(data, kmap(page), PAGE_CACHE_SIZE);
228 kunmap(page); 232 kunmap(page);
@@ -237,6 +241,7 @@ static void *cramfs_read(struct super_block *sb, unsigned int offset, unsigned i
237static void cramfs_kill_sb(struct super_block *sb) 241static void cramfs_kill_sb(struct super_block *sb)
238{ 242{
239 struct cramfs_sb_info *sbi = CRAMFS_SB(sb); 243 struct cramfs_sb_info *sbi = CRAMFS_SB(sb);
244
240 kill_block_super(sb); 245 kill_block_super(sb);
241 kfree(sbi); 246 kfree(sbi);
242} 247}
@@ -277,7 +282,7 @@ static int cramfs_fill_super(struct super_block *sb, void *data, int silent)
277 /* check for wrong endianness */ 282 /* check for wrong endianness */
278 if (super.magic == CRAMFS_MAGIC_WEND) { 283 if (super.magic == CRAMFS_MAGIC_WEND) {
279 if (!silent) 284 if (!silent)
280 printk(KERN_ERR "cramfs: wrong endianness\n"); 285 pr_err("wrong endianness\n");
281 return -EINVAL; 286 return -EINVAL;
282 } 287 }
283 288
@@ -287,22 +292,22 @@ static int cramfs_fill_super(struct super_block *sb, void *data, int silent)
287 mutex_unlock(&read_mutex); 292 mutex_unlock(&read_mutex);
288 if (super.magic != CRAMFS_MAGIC) { 293 if (super.magic != CRAMFS_MAGIC) {
289 if (super.magic == CRAMFS_MAGIC_WEND && !silent) 294 if (super.magic == CRAMFS_MAGIC_WEND && !silent)
290 printk(KERN_ERR "cramfs: wrong endianness\n"); 295 pr_err("wrong endianness\n");
291 else if (!silent) 296 else if (!silent)
292 printk(KERN_ERR "cramfs: wrong magic\n"); 297 pr_err("wrong magic\n");
293 return -EINVAL; 298 return -EINVAL;
294 } 299 }
295 } 300 }
296 301
297 /* get feature flags first */ 302 /* get feature flags first */
298 if (super.flags & ~CRAMFS_SUPPORTED_FLAGS) { 303 if (super.flags & ~CRAMFS_SUPPORTED_FLAGS) {
299 printk(KERN_ERR "cramfs: unsupported filesystem features\n"); 304 pr_err("unsupported filesystem features\n");
300 return -EINVAL; 305 return -EINVAL;
301 } 306 }
302 307
303 /* Check that the root inode is in a sane state */ 308 /* Check that the root inode is in a sane state */
304 if (!S_ISDIR(super.root.mode)) { 309 if (!S_ISDIR(super.root.mode)) {
305 printk(KERN_ERR "cramfs: root is not a directory\n"); 310 pr_err("root is not a directory\n");
306 return -EINVAL; 311 return -EINVAL;
307 } 312 }
308 /* correct strange, hard-coded permissions of mkcramfs */ 313 /* correct strange, hard-coded permissions of mkcramfs */
@@ -310,23 +315,23 @@ static int cramfs_fill_super(struct super_block *sb, void *data, int silent)
310 315
311 root_offset = super.root.offset << 2; 316 root_offset = super.root.offset << 2;
312 if (super.flags & CRAMFS_FLAG_FSID_VERSION_2) { 317 if (super.flags & CRAMFS_FLAG_FSID_VERSION_2) {
313 sbi->size=super.size; 318 sbi->size = super.size;
314 sbi->blocks=super.fsid.blocks; 319 sbi->blocks = super.fsid.blocks;
315 sbi->files=super.fsid.files; 320 sbi->files = super.fsid.files;
316 } else { 321 } else {
317 sbi->size=1<<28; 322 sbi->size = 1<<28;
318 sbi->blocks=0; 323 sbi->blocks = 0;
319 sbi->files=0; 324 sbi->files = 0;
320 } 325 }
321 sbi->magic=super.magic; 326 sbi->magic = super.magic;
322 sbi->flags=super.flags; 327 sbi->flags = super.flags;
323 if (root_offset == 0) 328 if (root_offset == 0)
324 printk(KERN_INFO "cramfs: empty filesystem"); 329 pr_info("empty filesystem");
325 else if (!(super.flags & CRAMFS_FLAG_SHIFTED_ROOT_OFFSET) && 330 else if (!(super.flags & CRAMFS_FLAG_SHIFTED_ROOT_OFFSET) &&
326 ((root_offset != sizeof(struct cramfs_super)) && 331 ((root_offset != sizeof(struct cramfs_super)) &&
327 (root_offset != 512 + sizeof(struct cramfs_super)))) 332 (root_offset != 512 + sizeof(struct cramfs_super))))
328 { 333 {
329 printk(KERN_ERR "cramfs: bad root offset %lu\n", root_offset); 334 pr_err("bad root offset %lu\n", root_offset);
330 return -EINVAL; 335 return -EINVAL;
331 } 336 }
332 337
@@ -425,7 +430,7 @@ static int cramfs_readdir(struct file *file, struct dir_context *ctx)
425/* 430/*
426 * Lookup and fill in the inode data.. 431 * Lookup and fill in the inode data..
427 */ 432 */
428static struct dentry * cramfs_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags) 433static struct dentry *cramfs_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags)
429{ 434{
430 unsigned int offset = 0; 435 unsigned int offset = 0;
431 struct inode *inode = NULL; 436 struct inode *inode = NULL;
@@ -483,7 +488,7 @@ out:
483 return NULL; 488 return NULL;
484} 489}
485 490
486static int cramfs_readpage(struct file *file, struct page * page) 491static int cramfs_readpage(struct file *file, struct page *page)
487{ 492{
488 struct inode *inode = page->mapping->host; 493 struct inode *inode = page->mapping->host;
489 u32 maxblock; 494 u32 maxblock;
@@ -511,7 +516,7 @@ static int cramfs_readpage(struct file *file, struct page * page)
511 if (compr_len == 0) 516 if (compr_len == 0)
512 ; /* hole */ 517 ; /* hole */
513 else if (unlikely(compr_len > (PAGE_CACHE_SIZE << 1))) { 518 else if (unlikely(compr_len > (PAGE_CACHE_SIZE << 1))) {
514 pr_err("cramfs: bad compressed blocksize %u\n", 519 pr_err("bad compressed blocksize %u\n",
515 compr_len); 520 compr_len);
516 goto err; 521 goto err;
517 } else { 522 } else {
diff --git a/fs/cramfs/uncompress.c b/fs/cramfs/uncompress.c
index 1760c1b84d97..ec4f1d4fdad0 100644
--- a/fs/cramfs/uncompress.c
+++ b/fs/cramfs/uncompress.c
@@ -15,6 +15,8 @@
15 * then is used by multiple filesystems. 15 * then is used by multiple filesystems.
16 */ 16 */
17 17
18#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
19
18#include <linux/kernel.h> 20#include <linux/kernel.h>
19#include <linux/errno.h> 21#include <linux/errno.h>
20#include <linux/vmalloc.h> 22#include <linux/vmalloc.h>
@@ -37,7 +39,7 @@ int cramfs_uncompress_block(void *dst, int dstlen, void *src, int srclen)
37 39
38 err = zlib_inflateReset(&stream); 40 err = zlib_inflateReset(&stream);
39 if (err != Z_OK) { 41 if (err != Z_OK) {
40 printk("zlib_inflateReset error %d\n", err); 42 pr_err("zlib_inflateReset error %d\n", err);
41 zlib_inflateEnd(&stream); 43 zlib_inflateEnd(&stream);
42 zlib_inflateInit(&stream); 44 zlib_inflateInit(&stream);
43 } 45 }
@@ -48,8 +50,8 @@ int cramfs_uncompress_block(void *dst, int dstlen, void *src, int srclen)
48 return stream.total_out; 50 return stream.total_out;
49 51
50err: 52err:
51 printk("Error %d while decompressing!\n", err); 53 pr_err("Error %d while decompressing!\n", err);
52 printk("%p(%d)->%p(%d)\n", src, srclen, dst, dstlen); 54 pr_err("%p(%d)->%p(%d)\n", src, srclen, dst, dstlen);
53 return -EIO; 55 return -EIO;
54} 56}
55 57
@@ -57,7 +59,7 @@ int cramfs_uncompress_init(void)
57{ 59{
58 if (!initialized++) { 60 if (!initialized++) {
59 stream.workspace = vmalloc(zlib_inflate_workspacesize()); 61 stream.workspace = vmalloc(zlib_inflate_workspacesize());
60 if ( !stream.workspace ) { 62 if (!stream.workspace) {
61 initialized = 0; 63 initialized = 0;
62 return -ENOMEM; 64 return -ENOMEM;
63 } 65 }
diff --git a/fs/dlm/debug_fs.c b/fs/dlm/debug_fs.c
index 8d77ba7b1756..1323c568e362 100644
--- a/fs/dlm/debug_fs.c
+++ b/fs/dlm/debug_fs.c
@@ -718,16 +718,11 @@ static const struct file_operations waiters_fops = {
718 718
719void dlm_delete_debug_file(struct dlm_ls *ls) 719void dlm_delete_debug_file(struct dlm_ls *ls)
720{ 720{
721 if (ls->ls_debug_rsb_dentry) 721 debugfs_remove(ls->ls_debug_rsb_dentry);
722 debugfs_remove(ls->ls_debug_rsb_dentry); 722 debugfs_remove(ls->ls_debug_waiters_dentry);
723 if (ls->ls_debug_waiters_dentry) 723 debugfs_remove(ls->ls_debug_locks_dentry);
724 debugfs_remove(ls->ls_debug_waiters_dentry); 724 debugfs_remove(ls->ls_debug_all_dentry);
725 if (ls->ls_debug_locks_dentry) 725 debugfs_remove(ls->ls_debug_toss_dentry);
726 debugfs_remove(ls->ls_debug_locks_dentry);
727 if (ls->ls_debug_all_dentry)
728 debugfs_remove(ls->ls_debug_all_dentry);
729 if (ls->ls_debug_toss_dentry)
730 debugfs_remove(ls->ls_debug_toss_dentry);
731} 726}
732 727
733int dlm_create_debug_file(struct dlm_ls *ls) 728int dlm_create_debug_file(struct dlm_ls *ls)
diff --git a/fs/efs/namei.c b/fs/efs/namei.c
index 356c044e2cd3..bbee8f063dfa 100644
--- a/fs/efs/namei.c
+++ b/fs/efs/namei.c
@@ -12,7 +12,8 @@
12#include "efs.h" 12#include "efs.h"
13 13
14 14
15static efs_ino_t efs_find_entry(struct inode *inode, const char *name, int len) { 15static efs_ino_t efs_find_entry(struct inode *inode, const char *name, int len)
16{
16 struct buffer_head *bh; 17 struct buffer_head *bh;
17 18
18 int slot, namelen; 19 int slot, namelen;
@@ -40,10 +41,10 @@ static efs_ino_t efs_find_entry(struct inode *inode, const char *name, int len)
40 if (be16_to_cpu(dirblock->magic) != EFS_DIRBLK_MAGIC) { 41 if (be16_to_cpu(dirblock->magic) != EFS_DIRBLK_MAGIC) {
41 pr_err("%s(): invalid directory block\n", __func__); 42 pr_err("%s(): invalid directory block\n", __func__);
42 brelse(bh); 43 brelse(bh);
43 return(0); 44 return 0;
44 } 45 }
45 46
46 for(slot = 0; slot < dirblock->slots; slot++) { 47 for (slot = 0; slot < dirblock->slots; slot++) {
47 dirslot = (struct efs_dentry *) (((char *) bh->b_data) + EFS_SLOTAT(dirblock, slot)); 48 dirslot = (struct efs_dentry *) (((char *) bh->b_data) + EFS_SLOTAT(dirblock, slot));
48 49
49 namelen = dirslot->namelen; 50 namelen = dirslot->namelen;
@@ -52,12 +53,12 @@ static efs_ino_t efs_find_entry(struct inode *inode, const char *name, int len)
52 if ((namelen == len) && (!memcmp(name, nameptr, len))) { 53 if ((namelen == len) && (!memcmp(name, nameptr, len))) {
53 inodenum = be32_to_cpu(dirslot->inode); 54 inodenum = be32_to_cpu(dirslot->inode);
54 brelse(bh); 55 brelse(bh);
55 return(inodenum); 56 return inodenum;
56 } 57 }
57 } 58 }
58 brelse(bh); 59 brelse(bh);
59 } 60 }
60 return(0); 61 return 0;
61} 62}
62 63
63struct dentry *efs_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags) 64struct dentry *efs_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags)
diff --git a/fs/exec.c b/fs/exec.c
index ab1f1200ce5d..a2b42a98c743 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -368,10 +368,6 @@ static int bprm_mm_init(struct linux_binprm *bprm)
368 if (!mm) 368 if (!mm)
369 goto err; 369 goto err;
370 370
371 err = init_new_context(current, mm);
372 if (err)
373 goto err;
374
375 err = __bprm_mm_init(bprm); 371 err = __bprm_mm_init(bprm);
376 if (err) 372 if (err)
377 goto err; 373 goto err;
diff --git a/fs/exofs/ore_raid.c b/fs/exofs/ore_raid.c
index 7f20f25c232c..84529b8a331b 100644
--- a/fs/exofs/ore_raid.c
+++ b/fs/exofs/ore_raid.c
@@ -116,7 +116,7 @@ static int _sp2d_alloc(unsigned pages_in_unit, unsigned group_width,
116 num_a1pa = min_t(unsigned, PAGE_SIZE / sizeof__a1pa, 116 num_a1pa = min_t(unsigned, PAGE_SIZE / sizeof__a1pa,
117 pages_in_unit - i); 117 pages_in_unit - i);
118 118
119 __a1pa = kzalloc(num_a1pa * sizeof__a1pa, GFP_KERNEL); 119 __a1pa = kcalloc(num_a1pa, sizeof__a1pa, GFP_KERNEL);
120 if (unlikely(!__a1pa)) { 120 if (unlikely(!__a1pa)) {
121 ORE_DBGMSG("!! Failed to _alloc_1p_arrays=%d\n", 121 ORE_DBGMSG("!! Failed to _alloc_1p_arrays=%d\n",
122 num_a1pa); 122 num_a1pa);
diff --git a/fs/fcntl.c b/fs/fcntl.c
index 72c82f69b01b..22d1c3df61ac 100644
--- a/fs/fcntl.c
+++ b/fs/fcntl.c
@@ -21,6 +21,7 @@
21#include <linux/rcupdate.h> 21#include <linux/rcupdate.h>
22#include <linux/pid_namespace.h> 22#include <linux/pid_namespace.h>
23#include <linux/user_namespace.h> 23#include <linux/user_namespace.h>
24#include <linux/shmem_fs.h>
24 25
25#include <asm/poll.h> 26#include <asm/poll.h>
26#include <asm/siginfo.h> 27#include <asm/siginfo.h>
@@ -336,6 +337,10 @@ static long do_fcntl(int fd, unsigned int cmd, unsigned long arg,
336 case F_GETPIPE_SZ: 337 case F_GETPIPE_SZ:
337 err = pipe_fcntl(filp, cmd, arg); 338 err = pipe_fcntl(filp, cmd, arg);
338 break; 339 break;
340 case F_ADD_SEALS:
341 case F_GET_SEALS:
342 err = shmem_fcntl(filp, cmd, arg);
343 break;
339 default: 344 default:
340 break; 345 break;
341 } 346 }
diff --git a/fs/hpfs/dnode.c b/fs/hpfs/dnode.c
index f36fc010fccb..2923a7bd82ac 100644
--- a/fs/hpfs/dnode.c
+++ b/fs/hpfs/dnode.c
@@ -545,12 +545,13 @@ static void delete_empty_dnode(struct inode *i, dnode_secno dno)
545 struct dnode *d1; 545 struct dnode *d1;
546 struct quad_buffer_head qbh1; 546 struct quad_buffer_head qbh1;
547 if (hpfs_sb(i->i_sb)->sb_chk) 547 if (hpfs_sb(i->i_sb)->sb_chk)
548 if (up != i->i_ino) { 548 if (up != i->i_ino) {
549 hpfs_error(i->i_sb, 549 hpfs_error(i->i_sb,
550 "bad pointer to fnode, dnode %08x, pointing to %08x, should be %08lx", 550 "bad pointer to fnode, dnode %08x, pointing to %08x, should be %08lx",
551 dno, up, (unsigned long)i->i_ino); 551 dno, up,
552 return; 552 (unsigned long)i->i_ino);
553 } 553 return;
554 }
554 if ((d1 = hpfs_map_dnode(i->i_sb, down, &qbh1))) { 555 if ((d1 = hpfs_map_dnode(i->i_sb, down, &qbh1))) {
555 d1->up = cpu_to_le32(up); 556 d1->up = cpu_to_le32(up);
556 d1->root_dnode = 1; 557 d1->root_dnode = 1;
@@ -1061,8 +1062,8 @@ struct hpfs_dirent *map_fnode_dirent(struct super_block *s, fnode_secno fno,
1061 hpfs_brelse4(qbh); 1062 hpfs_brelse4(qbh);
1062 if (hpfs_sb(s)->sb_chk) 1063 if (hpfs_sb(s)->sb_chk)
1063 if (hpfs_stop_cycles(s, dno, &c1, &c2, "map_fnode_dirent #1")) { 1064 if (hpfs_stop_cycles(s, dno, &c1, &c2, "map_fnode_dirent #1")) {
1064 kfree(name2); 1065 kfree(name2);
1065 return NULL; 1066 return NULL;
1066 } 1067 }
1067 goto go_down; 1068 goto go_down;
1068 } 1069 }
diff --git a/fs/inode.c b/fs/inode.c
index 5938f3928944..26753ba7b6d6 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -165,6 +165,7 @@ int inode_init_always(struct super_block *sb, struct inode *inode)
165 mapping->a_ops = &empty_aops; 165 mapping->a_ops = &empty_aops;
166 mapping->host = inode; 166 mapping->host = inode;
167 mapping->flags = 0; 167 mapping->flags = 0;
168 atomic_set(&mapping->i_mmap_writable, 0);
168 mapping_set_gfp_mask(mapping, GFP_HIGHUSER_MOVABLE); 169 mapping_set_gfp_mask(mapping, GFP_HIGHUSER_MOVABLE);
169 mapping->private_data = NULL; 170 mapping->private_data = NULL;
170 mapping->backing_dev_info = &default_backing_dev_info; 171 mapping->backing_dev_info = &default_backing_dev_info;
diff --git a/fs/isofs/compress.c b/fs/isofs/compress.c
index 592e5115a561..f311bf084015 100644
--- a/fs/isofs/compress.c
+++ b/fs/isofs/compress.c
@@ -158,8 +158,8 @@ static loff_t zisofs_uncompress_block(struct inode *inode, loff_t block_start,
158 "zisofs: zisofs_inflate returned" 158 "zisofs: zisofs_inflate returned"
159 " %d, inode = %lu," 159 " %d, inode = %lu,"
160 " page idx = %d, bh idx = %d," 160 " page idx = %d, bh idx = %d,"
161 " avail_in = %d," 161 " avail_in = %ld,"
162 " avail_out = %d\n", 162 " avail_out = %ld\n",
163 zerr, inode->i_ino, curpage, 163 zerr, inode->i_ino, curpage,
164 curbh, stream.avail_in, 164 curbh, stream.avail_in,
165 stream.avail_out); 165 stream.avail_out);
diff --git a/fs/jffs2/compr_zlib.c b/fs/jffs2/compr_zlib.c
index 0b9a1e44e833..5698dae5d92d 100644
--- a/fs/jffs2/compr_zlib.c
+++ b/fs/jffs2/compr_zlib.c
@@ -94,11 +94,12 @@ static int jffs2_zlib_compress(unsigned char *data_in,
94 94
95 while (def_strm.total_out < *dstlen - STREAM_END_SPACE && def_strm.total_in < *sourcelen) { 95 while (def_strm.total_out < *dstlen - STREAM_END_SPACE && def_strm.total_in < *sourcelen) {
96 def_strm.avail_out = *dstlen - (def_strm.total_out + STREAM_END_SPACE); 96 def_strm.avail_out = *dstlen - (def_strm.total_out + STREAM_END_SPACE);
97 def_strm.avail_in = min((unsigned)(*sourcelen-def_strm.total_in), def_strm.avail_out); 97 def_strm.avail_in = min_t(unsigned long,
98 jffs2_dbg(1, "calling deflate with avail_in %d, avail_out %d\n", 98 (*sourcelen-def_strm.total_in), def_strm.avail_out);
99 jffs2_dbg(1, "calling deflate with avail_in %ld, avail_out %ld\n",
99 def_strm.avail_in, def_strm.avail_out); 100 def_strm.avail_in, def_strm.avail_out);
100 ret = zlib_deflate(&def_strm, Z_PARTIAL_FLUSH); 101 ret = zlib_deflate(&def_strm, Z_PARTIAL_FLUSH);
101 jffs2_dbg(1, "deflate returned with avail_in %d, avail_out %d, total_in %ld, total_out %ld\n", 102 jffs2_dbg(1, "deflate returned with avail_in %ld, avail_out %ld, total_in %ld, total_out %ld\n",
102 def_strm.avail_in, def_strm.avail_out, 103 def_strm.avail_in, def_strm.avail_out,
103 def_strm.total_in, def_strm.total_out); 104 def_strm.total_in, def_strm.total_out);
104 if (ret != Z_OK) { 105 if (ret != Z_OK) {
diff --git a/fs/minix/bitmap.c b/fs/minix/bitmap.c
index 4bc50dac8e97..742942a983be 100644
--- a/fs/minix/bitmap.c
+++ b/fs/minix/bitmap.c
@@ -96,7 +96,7 @@ int minix_new_block(struct inode * inode)
96unsigned long minix_count_free_blocks(struct super_block *sb) 96unsigned long minix_count_free_blocks(struct super_block *sb)
97{ 97{
98 struct minix_sb_info *sbi = minix_sb(sb); 98 struct minix_sb_info *sbi = minix_sb(sb);
99 u32 bits = sbi->s_nzones - (sbi->s_firstdatazone + 1); 99 u32 bits = sbi->s_nzones - sbi->s_firstdatazone + 1;
100 100
101 return (count_free(sbi->s_zmap, sb->s_blocksize, bits) 101 return (count_free(sbi->s_zmap, sb->s_blocksize, bits)
102 << sbi->s_log_zone_size); 102 << sbi->s_log_zone_size);
diff --git a/fs/minix/inode.c b/fs/minix/inode.c
index f007a3355570..3f57af196a7d 100644
--- a/fs/minix/inode.c
+++ b/fs/minix/inode.c
@@ -267,12 +267,12 @@ static int minix_fill_super(struct super_block *s, void *data, int silent)
267 block = minix_blocks_needed(sbi->s_ninodes, s->s_blocksize); 267 block = minix_blocks_needed(sbi->s_ninodes, s->s_blocksize);
268 if (sbi->s_imap_blocks < block) { 268 if (sbi->s_imap_blocks < block) {
269 printk("MINIX-fs: file system does not have enough " 269 printk("MINIX-fs: file system does not have enough "
270 "imap blocks allocated. Refusing to mount\n"); 270 "imap blocks allocated. Refusing to mount.\n");
271 goto out_no_bitmap; 271 goto out_no_bitmap;
272 } 272 }
273 273
274 block = minix_blocks_needed( 274 block = minix_blocks_needed(
275 (sbi->s_nzones - (sbi->s_firstdatazone + 1)), 275 (sbi->s_nzones - sbi->s_firstdatazone + 1),
276 s->s_blocksize); 276 s->s_blocksize);
277 if (sbi->s_zmap_blocks < block) { 277 if (sbi->s_zmap_blocks < block) {
278 printk("MINIX-fs: file system does not have enough " 278 printk("MINIX-fs: file system does not have enough "
diff --git a/fs/nilfs2/Makefile b/fs/nilfs2/Makefile
index 85c98737a146..fc603e0431bb 100644
--- a/fs/nilfs2/Makefile
+++ b/fs/nilfs2/Makefile
@@ -2,4 +2,4 @@ obj-$(CONFIG_NILFS2_FS) += nilfs2.o
2nilfs2-y := inode.o file.o dir.o super.o namei.o page.o mdt.o \ 2nilfs2-y := inode.o file.o dir.o super.o namei.o page.o mdt.o \
3 btnode.o bmap.o btree.o direct.o dat.o recovery.o \ 3 btnode.o bmap.o btree.o direct.o dat.o recovery.o \
4 the_nilfs.o segbuf.o segment.o cpfile.o sufile.o \ 4 the_nilfs.o segbuf.o segment.o cpfile.o sufile.o \
5 ifile.o alloc.o gcinode.o ioctl.o 5 ifile.o alloc.o gcinode.o ioctl.o sysfs.o
diff --git a/fs/nilfs2/nilfs.h b/fs/nilfs2/nilfs.h
index 9bc72dec3fa6..0696161bf59d 100644
--- a/fs/nilfs2/nilfs.h
+++ b/fs/nilfs2/nilfs.h
@@ -320,6 +320,14 @@ int nilfs_gccache_wait_and_mark_dirty(struct buffer_head *);
320int nilfs_init_gcinode(struct inode *inode); 320int nilfs_init_gcinode(struct inode *inode);
321void nilfs_remove_all_gcinodes(struct the_nilfs *nilfs); 321void nilfs_remove_all_gcinodes(struct the_nilfs *nilfs);
322 322
323/* sysfs.c */
324int __init nilfs_sysfs_init(void);
325void nilfs_sysfs_exit(void);
326int nilfs_sysfs_create_device_group(struct super_block *);
327void nilfs_sysfs_delete_device_group(struct the_nilfs *);
328int nilfs_sysfs_create_snapshot_group(struct nilfs_root *);
329void nilfs_sysfs_delete_snapshot_group(struct nilfs_root *);
330
323/* 331/*
324 * Inodes and files operations 332 * Inodes and files operations
325 */ 333 */
diff --git a/fs/nilfs2/super.c b/fs/nilfs2/super.c
index 8c532b2ca3ab..c519927b7b5e 100644
--- a/fs/nilfs2/super.c
+++ b/fs/nilfs2/super.c
@@ -1452,13 +1452,19 @@ static int __init init_nilfs_fs(void)
1452 if (err) 1452 if (err)
1453 goto fail; 1453 goto fail;
1454 1454
1455 err = register_filesystem(&nilfs_fs_type); 1455 err = nilfs_sysfs_init();
1456 if (err) 1456 if (err)
1457 goto free_cachep; 1457 goto free_cachep;
1458 1458
1459 err = register_filesystem(&nilfs_fs_type);
1460 if (err)
1461 goto deinit_sysfs_entry;
1462
1459 printk(KERN_INFO "NILFS version 2 loaded\n"); 1463 printk(KERN_INFO "NILFS version 2 loaded\n");
1460 return 0; 1464 return 0;
1461 1465
1466deinit_sysfs_entry:
1467 nilfs_sysfs_exit();
1462free_cachep: 1468free_cachep:
1463 nilfs_destroy_cachep(); 1469 nilfs_destroy_cachep();
1464fail: 1470fail:
@@ -1468,6 +1474,7 @@ fail:
1468static void __exit exit_nilfs_fs(void) 1474static void __exit exit_nilfs_fs(void)
1469{ 1475{
1470 nilfs_destroy_cachep(); 1476 nilfs_destroy_cachep();
1477 nilfs_sysfs_exit();
1471 unregister_filesystem(&nilfs_fs_type); 1478 unregister_filesystem(&nilfs_fs_type);
1472} 1479}
1473 1480
diff --git a/fs/nilfs2/sysfs.c b/fs/nilfs2/sysfs.c
new file mode 100644
index 000000000000..bbb0dcc35905
--- /dev/null
+++ b/fs/nilfs2/sysfs.c
@@ -0,0 +1,1137 @@
1/*
2 * sysfs.c - sysfs support implementation.
3 *
4 * Copyright (C) 2005-2014 Nippon Telegraph and Telephone Corporation.
5 * Copyright (C) 2014 HGST, Inc., a Western Digital Company.
6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 2 of the License, or
10 * (at your option) any later version.
11 *
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
16 *
17 * Written by Vyacheslav Dubeyko <Vyacheslav.Dubeyko@hgst.com>
18 */
19
20#include <linux/kobject.h>
21
22#include "nilfs.h"
23#include "mdt.h"
24#include "sufile.h"
25#include "cpfile.h"
26#include "sysfs.h"
27
28/* /sys/fs/<nilfs>/ */
29static struct kset *nilfs_kset;
30
31#define NILFS_SHOW_TIME(time_t_val, buf) ({ \
32 struct tm res; \
33 int count = 0; \
34 time_to_tm(time_t_val, 0, &res); \
35 res.tm_year += 1900; \
36 res.tm_mon += 1; \
37 count = scnprintf(buf, PAGE_SIZE, \
38 "%ld-%.2d-%.2d %.2d:%.2d:%.2d\n", \
39 res.tm_year, res.tm_mon, res.tm_mday, \
40 res.tm_hour, res.tm_min, res.tm_sec);\
41 count; \
42})
43
44#define NILFS_DEV_INT_GROUP_OPS(name, parent_name) \
45static ssize_t nilfs_##name##_attr_show(struct kobject *kobj, \
46 struct attribute *attr, char *buf) \
47{ \
48 struct the_nilfs *nilfs = container_of(kobj->parent, \
49 struct the_nilfs, \
50 ns_##parent_name##_kobj); \
51 struct nilfs_##name##_attr *a = container_of(attr, \
52 struct nilfs_##name##_attr, \
53 attr); \
54 return a->show ? a->show(a, nilfs, buf) : 0; \
55} \
56static ssize_t nilfs_##name##_attr_store(struct kobject *kobj, \
57 struct attribute *attr, \
58 const char *buf, size_t len) \
59{ \
60 struct the_nilfs *nilfs = container_of(kobj->parent, \
61 struct the_nilfs, \
62 ns_##parent_name##_kobj); \
63 struct nilfs_##name##_attr *a = container_of(attr, \
64 struct nilfs_##name##_attr, \
65 attr); \
66 return a->store ? a->store(a, nilfs, buf, len) : 0; \
67} \
68static const struct sysfs_ops nilfs_##name##_attr_ops = { \
69 .show = nilfs_##name##_attr_show, \
70 .store = nilfs_##name##_attr_store, \
71};
72
73#define NILFS_DEV_INT_GROUP_TYPE(name, parent_name) \
74static void nilfs_##name##_attr_release(struct kobject *kobj) \
75{ \
76 struct nilfs_sysfs_##parent_name##_subgroups *subgroups; \
77 struct the_nilfs *nilfs = container_of(kobj->parent, \
78 struct the_nilfs, \
79 ns_##parent_name##_kobj); \
80 subgroups = nilfs->ns_##parent_name##_subgroups; \
81 complete(&subgroups->sg_##name##_kobj_unregister); \
82} \
83static struct kobj_type nilfs_##name##_ktype = { \
84 .default_attrs = nilfs_##name##_attrs, \
85 .sysfs_ops = &nilfs_##name##_attr_ops, \
86 .release = nilfs_##name##_attr_release, \
87};
88
89#define NILFS_DEV_INT_GROUP_FNS(name, parent_name) \
90static int nilfs_sysfs_create_##name##_group(struct the_nilfs *nilfs) \
91{ \
92 struct kobject *parent; \
93 struct kobject *kobj; \
94 struct completion *kobj_unregister; \
95 struct nilfs_sysfs_##parent_name##_subgroups *subgroups; \
96 int err; \
97 subgroups = nilfs->ns_##parent_name##_subgroups; \
98 kobj = &subgroups->sg_##name##_kobj; \
99 kobj_unregister = &subgroups->sg_##name##_kobj_unregister; \
100 parent = &nilfs->ns_##parent_name##_kobj; \
101 kobj->kset = nilfs_kset; \
102 init_completion(kobj_unregister); \
103 err = kobject_init_and_add(kobj, &nilfs_##name##_ktype, parent, \
104 #name); \
105 if (err) \
106 return err; \
107 return 0; \
108} \
109static void nilfs_sysfs_delete_##name##_group(struct the_nilfs *nilfs) \
110{ \
111 kobject_del(&nilfs->ns_##parent_name##_subgroups->sg_##name##_kobj); \
112}
113
114/************************************************************************
115 * NILFS snapshot attrs *
116 ************************************************************************/
117
118static ssize_t
119nilfs_snapshot_inodes_count_show(struct nilfs_snapshot_attr *attr,
120 struct nilfs_root *root, char *buf)
121{
122 return snprintf(buf, PAGE_SIZE, "%llu\n",
123 (unsigned long long)atomic64_read(&root->inodes_count));
124}
125
126static ssize_t
127nilfs_snapshot_blocks_count_show(struct nilfs_snapshot_attr *attr,
128 struct nilfs_root *root, char *buf)
129{
130 return snprintf(buf, PAGE_SIZE, "%llu\n",
131 (unsigned long long)atomic64_read(&root->blocks_count));
132}
133
134static const char snapshot_readme_str[] =
135 "The group contains details about mounted snapshot.\n\n"
136 "(1) inodes_count\n\tshow number of inodes for snapshot.\n\n"
137 "(2) blocks_count\n\tshow number of blocks for snapshot.\n\n";
138
139static ssize_t
140nilfs_snapshot_README_show(struct nilfs_snapshot_attr *attr,
141 struct nilfs_root *root, char *buf)
142{
143 return snprintf(buf, PAGE_SIZE, snapshot_readme_str);
144}
145
146NILFS_SNAPSHOT_RO_ATTR(inodes_count);
147NILFS_SNAPSHOT_RO_ATTR(blocks_count);
148NILFS_SNAPSHOT_RO_ATTR(README);
149
150static struct attribute *nilfs_snapshot_attrs[] = {
151 NILFS_SNAPSHOT_ATTR_LIST(inodes_count),
152 NILFS_SNAPSHOT_ATTR_LIST(blocks_count),
153 NILFS_SNAPSHOT_ATTR_LIST(README),
154 NULL,
155};
156
157static ssize_t nilfs_snapshot_attr_show(struct kobject *kobj,
158 struct attribute *attr, char *buf)
159{
160 struct nilfs_root *root =
161 container_of(kobj, struct nilfs_root, snapshot_kobj);
162 struct nilfs_snapshot_attr *a =
163 container_of(attr, struct nilfs_snapshot_attr, attr);
164
165 return a->show ? a->show(a, root, buf) : 0;
166}
167
168static ssize_t nilfs_snapshot_attr_store(struct kobject *kobj,
169 struct attribute *attr,
170 const char *buf, size_t len)
171{
172 struct nilfs_root *root =
173 container_of(kobj, struct nilfs_root, snapshot_kobj);
174 struct nilfs_snapshot_attr *a =
175 container_of(attr, struct nilfs_snapshot_attr, attr);
176
177 return a->store ? a->store(a, root, buf, len) : 0;
178}
179
180static void nilfs_snapshot_attr_release(struct kobject *kobj)
181{
182 struct nilfs_root *root = container_of(kobj, struct nilfs_root,
183 snapshot_kobj);
184 complete(&root->snapshot_kobj_unregister);
185}
186
187static const struct sysfs_ops nilfs_snapshot_attr_ops = {
188 .show = nilfs_snapshot_attr_show,
189 .store = nilfs_snapshot_attr_store,
190};
191
192static struct kobj_type nilfs_snapshot_ktype = {
193 .default_attrs = nilfs_snapshot_attrs,
194 .sysfs_ops = &nilfs_snapshot_attr_ops,
195 .release = nilfs_snapshot_attr_release,
196};
197
198int nilfs_sysfs_create_snapshot_group(struct nilfs_root *root)
199{
200 struct the_nilfs *nilfs;
201 struct kobject *parent;
202 int err;
203
204 nilfs = root->nilfs;
205 parent = &nilfs->ns_dev_subgroups->sg_mounted_snapshots_kobj;
206 root->snapshot_kobj.kset = nilfs_kset;
207 init_completion(&root->snapshot_kobj_unregister);
208
209 if (root->cno == NILFS_CPTREE_CURRENT_CNO) {
210 err = kobject_init_and_add(&root->snapshot_kobj,
211 &nilfs_snapshot_ktype,
212 &nilfs->ns_dev_kobj,
213 "current_checkpoint");
214 } else {
215 err = kobject_init_and_add(&root->snapshot_kobj,
216 &nilfs_snapshot_ktype,
217 parent,
218 "%llu", root->cno);
219 }
220
221 if (err)
222 return err;
223
224 return 0;
225}
226
227void nilfs_sysfs_delete_snapshot_group(struct nilfs_root *root)
228{
229 kobject_del(&root->snapshot_kobj);
230}
231
232/************************************************************************
233 * NILFS mounted snapshots attrs *
234 ************************************************************************/
235
236static const char mounted_snapshots_readme_str[] =
237 "The mounted_snapshots group contains group for\n"
238 "every mounted snapshot.\n";
239
240static ssize_t
241nilfs_mounted_snapshots_README_show(struct nilfs_mounted_snapshots_attr *attr,
242 struct the_nilfs *nilfs, char *buf)
243{
244 return snprintf(buf, PAGE_SIZE, mounted_snapshots_readme_str);
245}
246
247NILFS_MOUNTED_SNAPSHOTS_RO_ATTR(README);
248
249static struct attribute *nilfs_mounted_snapshots_attrs[] = {
250 NILFS_MOUNTED_SNAPSHOTS_ATTR_LIST(README),
251 NULL,
252};
253
254NILFS_DEV_INT_GROUP_OPS(mounted_snapshots, dev);
255NILFS_DEV_INT_GROUP_TYPE(mounted_snapshots, dev);
256NILFS_DEV_INT_GROUP_FNS(mounted_snapshots, dev);
257
258/************************************************************************
259 * NILFS checkpoints attrs *
260 ************************************************************************/
261
262static ssize_t
263nilfs_checkpoints_checkpoints_number_show(struct nilfs_checkpoints_attr *attr,
264 struct the_nilfs *nilfs,
265 char *buf)
266{
267 __u64 ncheckpoints;
268 struct nilfs_cpstat cpstat;
269 int err;
270
271 down_read(&nilfs->ns_segctor_sem);
272 err = nilfs_cpfile_get_stat(nilfs->ns_cpfile, &cpstat);
273 up_read(&nilfs->ns_segctor_sem);
274 if (err < 0) {
275 printk(KERN_ERR "NILFS: unable to get checkpoint stat: err=%d\n",
276 err);
277 return err;
278 }
279
280 ncheckpoints = cpstat.cs_ncps;
281
282 return snprintf(buf, PAGE_SIZE, "%llu\n", ncheckpoints);
283}
284
285static ssize_t
286nilfs_checkpoints_snapshots_number_show(struct nilfs_checkpoints_attr *attr,
287 struct the_nilfs *nilfs,
288 char *buf)
289{
290 __u64 nsnapshots;
291 struct nilfs_cpstat cpstat;
292 int err;
293
294 down_read(&nilfs->ns_segctor_sem);
295 err = nilfs_cpfile_get_stat(nilfs->ns_cpfile, &cpstat);
296 up_read(&nilfs->ns_segctor_sem);
297 if (err < 0) {
298 printk(KERN_ERR "NILFS: unable to get checkpoint stat: err=%d\n",
299 err);
300 return err;
301 }
302
303 nsnapshots = cpstat.cs_nsss;
304
305 return snprintf(buf, PAGE_SIZE, "%llu\n", nsnapshots);
306}
307
308static ssize_t
309nilfs_checkpoints_last_seg_checkpoint_show(struct nilfs_checkpoints_attr *attr,
310 struct the_nilfs *nilfs,
311 char *buf)
312{
313 __u64 last_cno;
314
315 spin_lock(&nilfs->ns_last_segment_lock);
316 last_cno = nilfs->ns_last_cno;
317 spin_unlock(&nilfs->ns_last_segment_lock);
318
319 return snprintf(buf, PAGE_SIZE, "%llu\n", last_cno);
320}
321
322static ssize_t
323nilfs_checkpoints_next_checkpoint_show(struct nilfs_checkpoints_attr *attr,
324 struct the_nilfs *nilfs,
325 char *buf)
326{
327 __u64 cno;
328
329 down_read(&nilfs->ns_sem);
330 cno = nilfs->ns_cno;
331 up_read(&nilfs->ns_sem);
332
333 return snprintf(buf, PAGE_SIZE, "%llu\n", cno);
334}
335
336static const char checkpoints_readme_str[] =
337 "The checkpoints group contains attributes that describe\n"
338 "details about volume's checkpoints.\n\n"
339 "(1) checkpoints_number\n\tshow number of checkpoints on volume.\n\n"
340 "(2) snapshots_number\n\tshow number of snapshots on volume.\n\n"
341 "(3) last_seg_checkpoint\n"
342 "\tshow checkpoint number of the latest segment.\n\n"
343 "(4) next_checkpoint\n\tshow next checkpoint number.\n\n";
344
345static ssize_t
346nilfs_checkpoints_README_show(struct nilfs_checkpoints_attr *attr,
347 struct the_nilfs *nilfs, char *buf)
348{
349 return snprintf(buf, PAGE_SIZE, checkpoints_readme_str);
350}
351
352NILFS_CHECKPOINTS_RO_ATTR(checkpoints_number);
353NILFS_CHECKPOINTS_RO_ATTR(snapshots_number);
354NILFS_CHECKPOINTS_RO_ATTR(last_seg_checkpoint);
355NILFS_CHECKPOINTS_RO_ATTR(next_checkpoint);
356NILFS_CHECKPOINTS_RO_ATTR(README);
357
358static struct attribute *nilfs_checkpoints_attrs[] = {
359 NILFS_CHECKPOINTS_ATTR_LIST(checkpoints_number),
360 NILFS_CHECKPOINTS_ATTR_LIST(snapshots_number),
361 NILFS_CHECKPOINTS_ATTR_LIST(last_seg_checkpoint),
362 NILFS_CHECKPOINTS_ATTR_LIST(next_checkpoint),
363 NILFS_CHECKPOINTS_ATTR_LIST(README),
364 NULL,
365};
366
367NILFS_DEV_INT_GROUP_OPS(checkpoints, dev);
368NILFS_DEV_INT_GROUP_TYPE(checkpoints, dev);
369NILFS_DEV_INT_GROUP_FNS(checkpoints, dev);
370
371/************************************************************************
372 * NILFS segments attrs *
373 ************************************************************************/
374
375static ssize_t
376nilfs_segments_segments_number_show(struct nilfs_segments_attr *attr,
377 struct the_nilfs *nilfs,
378 char *buf)
379{
380 return snprintf(buf, PAGE_SIZE, "%lu\n", nilfs->ns_nsegments);
381}
382
383static ssize_t
384nilfs_segments_blocks_per_segment_show(struct nilfs_segments_attr *attr,
385 struct the_nilfs *nilfs,
386 char *buf)
387{
388 return snprintf(buf, PAGE_SIZE, "%lu\n", nilfs->ns_blocks_per_segment);
389}
390
391static ssize_t
392nilfs_segments_clean_segments_show(struct nilfs_segments_attr *attr,
393 struct the_nilfs *nilfs,
394 char *buf)
395{
396 unsigned long ncleansegs;
397
398 down_read(&NILFS_MDT(nilfs->ns_dat)->mi_sem);
399 ncleansegs = nilfs_sufile_get_ncleansegs(nilfs->ns_sufile);
400 up_read(&NILFS_MDT(nilfs->ns_dat)->mi_sem);
401
402 return snprintf(buf, PAGE_SIZE, "%lu\n", ncleansegs);
403}
404
405static ssize_t
406nilfs_segments_dirty_segments_show(struct nilfs_segments_attr *attr,
407 struct the_nilfs *nilfs,
408 char *buf)
409{
410 struct nilfs_sustat sustat;
411 int err;
412
413 down_read(&nilfs->ns_segctor_sem);
414 err = nilfs_sufile_get_stat(nilfs->ns_sufile, &sustat);
415 up_read(&nilfs->ns_segctor_sem);
416 if (err < 0) {
417 printk(KERN_ERR "NILFS: unable to get segment stat: err=%d\n",
418 err);
419 return err;
420 }
421
422 return snprintf(buf, PAGE_SIZE, "%llu\n", sustat.ss_ndirtysegs);
423}
424
425static const char segments_readme_str[] =
426 "The segments group contains attributes that describe\n"
427 "details about volume's segments.\n\n"
428 "(1) segments_number\n\tshow number of segments on volume.\n\n"
429 "(2) blocks_per_segment\n\tshow number of blocks in segment.\n\n"
430 "(3) clean_segments\n\tshow count of clean segments.\n\n"
431 "(4) dirty_segments\n\tshow count of dirty segments.\n\n";
432
433static ssize_t
434nilfs_segments_README_show(struct nilfs_segments_attr *attr,
435 struct the_nilfs *nilfs,
436 char *buf)
437{
438 return snprintf(buf, PAGE_SIZE, segments_readme_str);
439}
440
441NILFS_SEGMENTS_RO_ATTR(segments_number);
442NILFS_SEGMENTS_RO_ATTR(blocks_per_segment);
443NILFS_SEGMENTS_RO_ATTR(clean_segments);
444NILFS_SEGMENTS_RO_ATTR(dirty_segments);
445NILFS_SEGMENTS_RO_ATTR(README);
446
447static struct attribute *nilfs_segments_attrs[] = {
448 NILFS_SEGMENTS_ATTR_LIST(segments_number),
449 NILFS_SEGMENTS_ATTR_LIST(blocks_per_segment),
450 NILFS_SEGMENTS_ATTR_LIST(clean_segments),
451 NILFS_SEGMENTS_ATTR_LIST(dirty_segments),
452 NILFS_SEGMENTS_ATTR_LIST(README),
453 NULL,
454};
455
456NILFS_DEV_INT_GROUP_OPS(segments, dev);
457NILFS_DEV_INT_GROUP_TYPE(segments, dev);
458NILFS_DEV_INT_GROUP_FNS(segments, dev);
459
460/************************************************************************
461 * NILFS segctor attrs *
462 ************************************************************************/
463
464static ssize_t
465nilfs_segctor_last_pseg_block_show(struct nilfs_segctor_attr *attr,
466 struct the_nilfs *nilfs,
467 char *buf)
468{
469 sector_t last_pseg;
470
471 spin_lock(&nilfs->ns_last_segment_lock);
472 last_pseg = nilfs->ns_last_pseg;
473 spin_unlock(&nilfs->ns_last_segment_lock);
474
475 return snprintf(buf, PAGE_SIZE, "%llu\n",
476 (unsigned long long)last_pseg);
477}
478
479static ssize_t
480nilfs_segctor_last_seg_sequence_show(struct nilfs_segctor_attr *attr,
481 struct the_nilfs *nilfs,
482 char *buf)
483{
484 u64 last_seq;
485
486 spin_lock(&nilfs->ns_last_segment_lock);
487 last_seq = nilfs->ns_last_seq;
488 spin_unlock(&nilfs->ns_last_segment_lock);
489
490 return snprintf(buf, PAGE_SIZE, "%llu\n", last_seq);
491}
492
493static ssize_t
494nilfs_segctor_last_seg_checkpoint_show(struct nilfs_segctor_attr *attr,
495 struct the_nilfs *nilfs,
496 char *buf)
497{
498 __u64 last_cno;
499
500 spin_lock(&nilfs->ns_last_segment_lock);
501 last_cno = nilfs->ns_last_cno;
502 spin_unlock(&nilfs->ns_last_segment_lock);
503
504 return snprintf(buf, PAGE_SIZE, "%llu\n", last_cno);
505}
506
507static ssize_t
508nilfs_segctor_current_seg_sequence_show(struct nilfs_segctor_attr *attr,
509 struct the_nilfs *nilfs,
510 char *buf)
511{
512 u64 seg_seq;
513
514 down_read(&nilfs->ns_sem);
515 seg_seq = nilfs->ns_seg_seq;
516 up_read(&nilfs->ns_sem);
517
518 return snprintf(buf, PAGE_SIZE, "%llu\n", seg_seq);
519}
520
521static ssize_t
522nilfs_segctor_current_last_full_seg_show(struct nilfs_segctor_attr *attr,
523 struct the_nilfs *nilfs,
524 char *buf)
525{
526 __u64 segnum;
527
528 down_read(&nilfs->ns_sem);
529 segnum = nilfs->ns_segnum;
530 up_read(&nilfs->ns_sem);
531
532 return snprintf(buf, PAGE_SIZE, "%llu\n", segnum);
533}
534
535static ssize_t
536nilfs_segctor_next_full_seg_show(struct nilfs_segctor_attr *attr,
537 struct the_nilfs *nilfs,
538 char *buf)
539{
540 __u64 nextnum;
541
542 down_read(&nilfs->ns_sem);
543 nextnum = nilfs->ns_nextnum;
544 up_read(&nilfs->ns_sem);
545
546 return snprintf(buf, PAGE_SIZE, "%llu\n", nextnum);
547}
548
549static ssize_t
550nilfs_segctor_next_pseg_offset_show(struct nilfs_segctor_attr *attr,
551 struct the_nilfs *nilfs,
552 char *buf)
553{
554 unsigned long pseg_offset;
555
556 down_read(&nilfs->ns_sem);
557 pseg_offset = nilfs->ns_pseg_offset;
558 up_read(&nilfs->ns_sem);
559
560 return snprintf(buf, PAGE_SIZE, "%lu\n", pseg_offset);
561}
562
563static ssize_t
564nilfs_segctor_next_checkpoint_show(struct nilfs_segctor_attr *attr,
565 struct the_nilfs *nilfs,
566 char *buf)
567{
568 __u64 cno;
569
570 down_read(&nilfs->ns_sem);
571 cno = nilfs->ns_cno;
572 up_read(&nilfs->ns_sem);
573
574 return snprintf(buf, PAGE_SIZE, "%llu\n", cno);
575}
576
577static ssize_t
578nilfs_segctor_last_seg_write_time_show(struct nilfs_segctor_attr *attr,
579 struct the_nilfs *nilfs,
580 char *buf)
581{
582 time_t ctime;
583
584 down_read(&nilfs->ns_sem);
585 ctime = nilfs->ns_ctime;
586 up_read(&nilfs->ns_sem);
587
588 return NILFS_SHOW_TIME(ctime, buf);
589}
590
591static ssize_t
592nilfs_segctor_last_seg_write_time_secs_show(struct nilfs_segctor_attr *attr,
593 struct the_nilfs *nilfs,
594 char *buf)
595{
596 time_t ctime;
597
598 down_read(&nilfs->ns_sem);
599 ctime = nilfs->ns_ctime;
600 up_read(&nilfs->ns_sem);
601
602 return snprintf(buf, PAGE_SIZE, "%llu\n", (unsigned long long)ctime);
603}
604
605static ssize_t
606nilfs_segctor_last_nongc_write_time_show(struct nilfs_segctor_attr *attr,
607 struct the_nilfs *nilfs,
608 char *buf)
609{
610 time_t nongc_ctime;
611
612 down_read(&nilfs->ns_sem);
613 nongc_ctime = nilfs->ns_nongc_ctime;
614 up_read(&nilfs->ns_sem);
615
616 return NILFS_SHOW_TIME(nongc_ctime, buf);
617}
618
619static ssize_t
620nilfs_segctor_last_nongc_write_time_secs_show(struct nilfs_segctor_attr *attr,
621 struct the_nilfs *nilfs,
622 char *buf)
623{
624 time_t nongc_ctime;
625
626 down_read(&nilfs->ns_sem);
627 nongc_ctime = nilfs->ns_nongc_ctime;
628 up_read(&nilfs->ns_sem);
629
630 return snprintf(buf, PAGE_SIZE, "%llu\n",
631 (unsigned long long)nongc_ctime);
632}
633
634static ssize_t
635nilfs_segctor_dirty_data_blocks_count_show(struct nilfs_segctor_attr *attr,
636 struct the_nilfs *nilfs,
637 char *buf)
638{
639 u32 ndirtyblks;
640
641 down_read(&nilfs->ns_sem);
642 ndirtyblks = atomic_read(&nilfs->ns_ndirtyblks);
643 up_read(&nilfs->ns_sem);
644
645 return snprintf(buf, PAGE_SIZE, "%u\n", ndirtyblks);
646}
647
648static const char segctor_readme_str[] =
649 "The segctor group contains attributes that describe\n"
650 "segctor thread activity details.\n\n"
651 "(1) last_pseg_block\n"
652 "\tshow start block number of the latest segment.\n\n"
653 "(2) last_seg_sequence\n"
654 "\tshow sequence value of the latest segment.\n\n"
655 "(3) last_seg_checkpoint\n"
656 "\tshow checkpoint number of the latest segment.\n\n"
657 "(4) current_seg_sequence\n\tshow segment sequence counter.\n\n"
658 "(5) current_last_full_seg\n"
659 "\tshow index number of the latest full segment.\n\n"
660 "(6) next_full_seg\n"
661 "\tshow index number of the full segment index to be used next.\n\n"
662 "(7) next_pseg_offset\n"
663 "\tshow offset of next partial segment in the current full segment.\n\n"
664 "(8) next_checkpoint\n\tshow next checkpoint number.\n\n"
665 "(9) last_seg_write_time\n"
666 "\tshow write time of the last segment in human-readable format.\n\n"
667 "(10) last_seg_write_time_secs\n"
668 "\tshow write time of the last segment in seconds.\n\n"
669 "(11) last_nongc_write_time\n"
670 "\tshow write time of the last segment not for cleaner operation "
671 "in human-readable format.\n\n"
672 "(12) last_nongc_write_time_secs\n"
673 "\tshow write time of the last segment not for cleaner operation "
674 "in seconds.\n\n"
675 "(13) dirty_data_blocks_count\n"
676 "\tshow number of dirty data blocks.\n\n";
677
678static ssize_t
679nilfs_segctor_README_show(struct nilfs_segctor_attr *attr,
680 struct the_nilfs *nilfs, char *buf)
681{
682 return snprintf(buf, PAGE_SIZE, segctor_readme_str);
683}
684
685NILFS_SEGCTOR_RO_ATTR(last_pseg_block);
686NILFS_SEGCTOR_RO_ATTR(last_seg_sequence);
687NILFS_SEGCTOR_RO_ATTR(last_seg_checkpoint);
688NILFS_SEGCTOR_RO_ATTR(current_seg_sequence);
689NILFS_SEGCTOR_RO_ATTR(current_last_full_seg);
690NILFS_SEGCTOR_RO_ATTR(next_full_seg);
691NILFS_SEGCTOR_RO_ATTR(next_pseg_offset);
692NILFS_SEGCTOR_RO_ATTR(next_checkpoint);
693NILFS_SEGCTOR_RO_ATTR(last_seg_write_time);
694NILFS_SEGCTOR_RO_ATTR(last_seg_write_time_secs);
695NILFS_SEGCTOR_RO_ATTR(last_nongc_write_time);
696NILFS_SEGCTOR_RO_ATTR(last_nongc_write_time_secs);
697NILFS_SEGCTOR_RO_ATTR(dirty_data_blocks_count);
698NILFS_SEGCTOR_RO_ATTR(README);
699
700static struct attribute *nilfs_segctor_attrs[] = {
701 NILFS_SEGCTOR_ATTR_LIST(last_pseg_block),
702 NILFS_SEGCTOR_ATTR_LIST(last_seg_sequence),
703 NILFS_SEGCTOR_ATTR_LIST(last_seg_checkpoint),
704 NILFS_SEGCTOR_ATTR_LIST(current_seg_sequence),
705 NILFS_SEGCTOR_ATTR_LIST(current_last_full_seg),
706 NILFS_SEGCTOR_ATTR_LIST(next_full_seg),
707 NILFS_SEGCTOR_ATTR_LIST(next_pseg_offset),
708 NILFS_SEGCTOR_ATTR_LIST(next_checkpoint),
709 NILFS_SEGCTOR_ATTR_LIST(last_seg_write_time),
710 NILFS_SEGCTOR_ATTR_LIST(last_seg_write_time_secs),
711 NILFS_SEGCTOR_ATTR_LIST(last_nongc_write_time),
712 NILFS_SEGCTOR_ATTR_LIST(last_nongc_write_time_secs),
713 NILFS_SEGCTOR_ATTR_LIST(dirty_data_blocks_count),
714 NILFS_SEGCTOR_ATTR_LIST(README),
715 NULL,
716};
717
718NILFS_DEV_INT_GROUP_OPS(segctor, dev);
719NILFS_DEV_INT_GROUP_TYPE(segctor, dev);
720NILFS_DEV_INT_GROUP_FNS(segctor, dev);
721
722/************************************************************************
723 * NILFS superblock attrs *
724 ************************************************************************/
725
726static ssize_t
727nilfs_superblock_sb_write_time_show(struct nilfs_superblock_attr *attr,
728 struct the_nilfs *nilfs,
729 char *buf)
730{
731 time_t sbwtime;
732
733 down_read(&nilfs->ns_sem);
734 sbwtime = nilfs->ns_sbwtime;
735 up_read(&nilfs->ns_sem);
736
737 return NILFS_SHOW_TIME(sbwtime, buf);
738}
739
740static ssize_t
741nilfs_superblock_sb_write_time_secs_show(struct nilfs_superblock_attr *attr,
742 struct the_nilfs *nilfs,
743 char *buf)
744{
745 time_t sbwtime;
746
747 down_read(&nilfs->ns_sem);
748 sbwtime = nilfs->ns_sbwtime;
749 up_read(&nilfs->ns_sem);
750
751 return snprintf(buf, PAGE_SIZE, "%llu\n", (unsigned long long)sbwtime);
752}
753
754static ssize_t
755nilfs_superblock_sb_write_count_show(struct nilfs_superblock_attr *attr,
756 struct the_nilfs *nilfs,
757 char *buf)
758{
759 unsigned sbwcount;
760
761 down_read(&nilfs->ns_sem);
762 sbwcount = nilfs->ns_sbwcount;
763 up_read(&nilfs->ns_sem);
764
765 return snprintf(buf, PAGE_SIZE, "%u\n", sbwcount);
766}
767
768static ssize_t
769nilfs_superblock_sb_update_frequency_show(struct nilfs_superblock_attr *attr,
770 struct the_nilfs *nilfs,
771 char *buf)
772{
773 unsigned sb_update_freq;
774
775 down_read(&nilfs->ns_sem);
776 sb_update_freq = nilfs->ns_sb_update_freq;
777 up_read(&nilfs->ns_sem);
778
779 return snprintf(buf, PAGE_SIZE, "%u\n", sb_update_freq);
780}
781
782static ssize_t
783nilfs_superblock_sb_update_frequency_store(struct nilfs_superblock_attr *attr,
784 struct the_nilfs *nilfs,
785 const char *buf, size_t count)
786{
787 unsigned val;
788 int err;
789
790 err = kstrtouint(skip_spaces(buf), 0, &val);
791 if (err) {
792 printk(KERN_ERR "NILFS: unable to convert string: err=%d\n",
793 err);
794 return err;
795 }
796
797 if (val < NILFS_SB_FREQ) {
798 val = NILFS_SB_FREQ;
799 printk(KERN_WARNING "NILFS: superblock update frequency cannot be lesser than 10 seconds\n");
800 }
801
802 down_write(&nilfs->ns_sem);
803 nilfs->ns_sb_update_freq = val;
804 up_write(&nilfs->ns_sem);
805
806 return count;
807}
808
809static const char sb_readme_str[] =
810 "The superblock group contains attributes that describe\n"
811 "superblock's details.\n\n"
812 "(1) sb_write_time\n\tshow previous write time of super block "
813 "in human-readable format.\n\n"
814 "(2) sb_write_time_secs\n\tshow previous write time of super block "
815 "in seconds.\n\n"
816 "(3) sb_write_count\n\tshow write count of super block.\n\n"
817 "(4) sb_update_frequency\n"
818 "\tshow/set interval of periodical update of superblock (in seconds).\n\n"
819 "\tYou can set preferable frequency of superblock update by command:\n\n"
820 "\t'echo <val> > /sys/fs/<nilfs>/<dev>/superblock/sb_update_frequency'\n";
821
822static ssize_t
823nilfs_superblock_README_show(struct nilfs_superblock_attr *attr,
824 struct the_nilfs *nilfs, char *buf)
825{
826 return snprintf(buf, PAGE_SIZE, sb_readme_str);
827}
828
829NILFS_SUPERBLOCK_RO_ATTR(sb_write_time);
830NILFS_SUPERBLOCK_RO_ATTR(sb_write_time_secs);
831NILFS_SUPERBLOCK_RO_ATTR(sb_write_count);
832NILFS_SUPERBLOCK_RW_ATTR(sb_update_frequency);
833NILFS_SUPERBLOCK_RO_ATTR(README);
834
835static struct attribute *nilfs_superblock_attrs[] = {
836 NILFS_SUPERBLOCK_ATTR_LIST(sb_write_time),
837 NILFS_SUPERBLOCK_ATTR_LIST(sb_write_time_secs),
838 NILFS_SUPERBLOCK_ATTR_LIST(sb_write_count),
839 NILFS_SUPERBLOCK_ATTR_LIST(sb_update_frequency),
840 NILFS_SUPERBLOCK_ATTR_LIST(README),
841 NULL,
842};
843
844NILFS_DEV_INT_GROUP_OPS(superblock, dev);
845NILFS_DEV_INT_GROUP_TYPE(superblock, dev);
846NILFS_DEV_INT_GROUP_FNS(superblock, dev);
847
848/************************************************************************
849 * NILFS device attrs *
850 ************************************************************************/
851
852static
853ssize_t nilfs_dev_revision_show(struct nilfs_dev_attr *attr,
854 struct the_nilfs *nilfs,
855 char *buf)
856{
857 struct nilfs_super_block **sbp = nilfs->ns_sbp;
858 u32 major = le32_to_cpu(sbp[0]->s_rev_level);
859 u16 minor = le16_to_cpu(sbp[0]->s_minor_rev_level);
860
861 return snprintf(buf, PAGE_SIZE, "%d.%d\n", major, minor);
862}
863
864static
865ssize_t nilfs_dev_blocksize_show(struct nilfs_dev_attr *attr,
866 struct the_nilfs *nilfs,
867 char *buf)
868{
869 return snprintf(buf, PAGE_SIZE, "%u\n", nilfs->ns_blocksize);
870}
871
872static
873ssize_t nilfs_dev_device_size_show(struct nilfs_dev_attr *attr,
874 struct the_nilfs *nilfs,
875 char *buf)
876{
877 struct nilfs_super_block **sbp = nilfs->ns_sbp;
878 u64 dev_size = le64_to_cpu(sbp[0]->s_dev_size);
879
880 return snprintf(buf, PAGE_SIZE, "%llu\n", dev_size);
881}
882
883static
884ssize_t nilfs_dev_free_blocks_show(struct nilfs_dev_attr *attr,
885 struct the_nilfs *nilfs,
886 char *buf)
887{
888 sector_t free_blocks = 0;
889
890 nilfs_count_free_blocks(nilfs, &free_blocks);
891 return snprintf(buf, PAGE_SIZE, "%llu\n",
892 (unsigned long long)free_blocks);
893}
894
895static
896ssize_t nilfs_dev_uuid_show(struct nilfs_dev_attr *attr,
897 struct the_nilfs *nilfs,
898 char *buf)
899{
900 struct nilfs_super_block **sbp = nilfs->ns_sbp;
901
902 return snprintf(buf, PAGE_SIZE, "%pUb\n", sbp[0]->s_uuid);
903}
904
905static
906ssize_t nilfs_dev_volume_name_show(struct nilfs_dev_attr *attr,
907 struct the_nilfs *nilfs,
908 char *buf)
909{
910 struct nilfs_super_block **sbp = nilfs->ns_sbp;
911
912 return scnprintf(buf, sizeof(sbp[0]->s_volume_name), "%s\n",
913 sbp[0]->s_volume_name);
914}
915
916static const char dev_readme_str[] =
917 "The <device> group contains attributes that describe file system\n"
918 "partition's details.\n\n"
919 "(1) revision\n\tshow NILFS file system revision.\n\n"
920 "(2) blocksize\n\tshow volume block size in bytes.\n\n"
921 "(3) device_size\n\tshow volume size in bytes.\n\n"
922 "(4) free_blocks\n\tshow count of free blocks on volume.\n\n"
923 "(5) uuid\n\tshow volume's UUID.\n\n"
924 "(6) volume_name\n\tshow volume's name.\n\n";
925
926static ssize_t nilfs_dev_README_show(struct nilfs_dev_attr *attr,
927 struct the_nilfs *nilfs,
928 char *buf)
929{
930 return snprintf(buf, PAGE_SIZE, dev_readme_str);
931}
932
933NILFS_DEV_RO_ATTR(revision);
934NILFS_DEV_RO_ATTR(blocksize);
935NILFS_DEV_RO_ATTR(device_size);
936NILFS_DEV_RO_ATTR(free_blocks);
937NILFS_DEV_RO_ATTR(uuid);
938NILFS_DEV_RO_ATTR(volume_name);
939NILFS_DEV_RO_ATTR(README);
940
941static struct attribute *nilfs_dev_attrs[] = {
942 NILFS_DEV_ATTR_LIST(revision),
943 NILFS_DEV_ATTR_LIST(blocksize),
944 NILFS_DEV_ATTR_LIST(device_size),
945 NILFS_DEV_ATTR_LIST(free_blocks),
946 NILFS_DEV_ATTR_LIST(uuid),
947 NILFS_DEV_ATTR_LIST(volume_name),
948 NILFS_DEV_ATTR_LIST(README),
949 NULL,
950};
951
952static ssize_t nilfs_dev_attr_show(struct kobject *kobj,
953 struct attribute *attr, char *buf)
954{
955 struct the_nilfs *nilfs = container_of(kobj, struct the_nilfs,
956 ns_dev_kobj);
957 struct nilfs_dev_attr *a = container_of(attr, struct nilfs_dev_attr,
958 attr);
959
960 return a->show ? a->show(a, nilfs, buf) : 0;
961}
962
963static ssize_t nilfs_dev_attr_store(struct kobject *kobj,
964 struct attribute *attr,
965 const char *buf, size_t len)
966{
967 struct the_nilfs *nilfs = container_of(kobj, struct the_nilfs,
968 ns_dev_kobj);
969 struct nilfs_dev_attr *a = container_of(attr, struct nilfs_dev_attr,
970 attr);
971
972 return a->store ? a->store(a, nilfs, buf, len) : 0;
973}
974
975static void nilfs_dev_attr_release(struct kobject *kobj)
976{
977 struct the_nilfs *nilfs = container_of(kobj, struct the_nilfs,
978 ns_dev_kobj);
979 complete(&nilfs->ns_dev_kobj_unregister);
980}
981
982static const struct sysfs_ops nilfs_dev_attr_ops = {
983 .show = nilfs_dev_attr_show,
984 .store = nilfs_dev_attr_store,
985};
986
987static struct kobj_type nilfs_dev_ktype = {
988 .default_attrs = nilfs_dev_attrs,
989 .sysfs_ops = &nilfs_dev_attr_ops,
990 .release = nilfs_dev_attr_release,
991};
992
993int nilfs_sysfs_create_device_group(struct super_block *sb)
994{
995 struct the_nilfs *nilfs = sb->s_fs_info;
996 size_t devgrp_size = sizeof(struct nilfs_sysfs_dev_subgroups);
997 int err;
998
999 nilfs->ns_dev_subgroups = kzalloc(devgrp_size, GFP_KERNEL);
1000 if (unlikely(!nilfs->ns_dev_subgroups)) {
1001 err = -ENOMEM;
1002 printk(KERN_ERR "NILFS: unable to allocate memory for device group\n");
1003 goto failed_create_device_group;
1004 }
1005
1006 nilfs->ns_dev_kobj.kset = nilfs_kset;
1007 init_completion(&nilfs->ns_dev_kobj_unregister);
1008 err = kobject_init_and_add(&nilfs->ns_dev_kobj, &nilfs_dev_ktype, NULL,
1009 "%s", sb->s_id);
1010 if (err)
1011 goto free_dev_subgroups;
1012
1013 err = nilfs_sysfs_create_mounted_snapshots_group(nilfs);
1014 if (err)
1015 goto cleanup_dev_kobject;
1016
1017 err = nilfs_sysfs_create_checkpoints_group(nilfs);
1018 if (err)
1019 goto delete_mounted_snapshots_group;
1020
1021 err = nilfs_sysfs_create_segments_group(nilfs);
1022 if (err)
1023 goto delete_checkpoints_group;
1024
1025 err = nilfs_sysfs_create_superblock_group(nilfs);
1026 if (err)
1027 goto delete_segments_group;
1028
1029 err = nilfs_sysfs_create_segctor_group(nilfs);
1030 if (err)
1031 goto delete_superblock_group;
1032
1033 return 0;
1034
1035delete_superblock_group:
1036 nilfs_sysfs_delete_superblock_group(nilfs);
1037
1038delete_segments_group:
1039 nilfs_sysfs_delete_segments_group(nilfs);
1040
1041delete_checkpoints_group:
1042 nilfs_sysfs_delete_checkpoints_group(nilfs);
1043
1044delete_mounted_snapshots_group:
1045 nilfs_sysfs_delete_mounted_snapshots_group(nilfs);
1046
1047cleanup_dev_kobject:
1048 kobject_del(&nilfs->ns_dev_kobj);
1049
1050free_dev_subgroups:
1051 kfree(nilfs->ns_dev_subgroups);
1052
1053failed_create_device_group:
1054 return err;
1055}
1056
1057void nilfs_sysfs_delete_device_group(struct the_nilfs *nilfs)
1058{
1059 nilfs_sysfs_delete_mounted_snapshots_group(nilfs);
1060 nilfs_sysfs_delete_checkpoints_group(nilfs);
1061 nilfs_sysfs_delete_segments_group(nilfs);
1062 nilfs_sysfs_delete_superblock_group(nilfs);
1063 nilfs_sysfs_delete_segctor_group(nilfs);
1064 kobject_del(&nilfs->ns_dev_kobj);
1065 kfree(nilfs->ns_dev_subgroups);
1066}
1067
1068/************************************************************************
1069 * NILFS feature attrs *
1070 ************************************************************************/
1071
1072static ssize_t nilfs_feature_revision_show(struct kobject *kobj,
1073 struct attribute *attr, char *buf)
1074{
1075 return snprintf(buf, PAGE_SIZE, "%d.%d\n",
1076 NILFS_CURRENT_REV, NILFS_MINOR_REV);
1077}
1078
1079static const char features_readme_str[] =
1080 "The features group contains attributes that describe NILFS file\n"
1081 "system driver features.\n\n"
1082 "(1) revision\n\tshow current revision of NILFS file system driver.\n";
1083
1084static ssize_t nilfs_feature_README_show(struct kobject *kobj,
1085 struct attribute *attr,
1086 char *buf)
1087{
1088 return snprintf(buf, PAGE_SIZE, features_readme_str);
1089}
1090
1091NILFS_FEATURE_RO_ATTR(revision);
1092NILFS_FEATURE_RO_ATTR(README);
1093
1094static struct attribute *nilfs_feature_attrs[] = {
1095 NILFS_FEATURE_ATTR_LIST(revision),
1096 NILFS_FEATURE_ATTR_LIST(README),
1097 NULL,
1098};
1099
1100static const struct attribute_group nilfs_feature_attr_group = {
1101 .name = "features",
1102 .attrs = nilfs_feature_attrs,
1103};
1104
1105int __init nilfs_sysfs_init(void)
1106{
1107 int err;
1108
1109 nilfs_kset = kset_create_and_add(NILFS_ROOT_GROUP_NAME, NULL, fs_kobj);
1110 if (!nilfs_kset) {
1111 err = -ENOMEM;
1112 printk(KERN_ERR "NILFS: unable to create sysfs entry: err %d\n",
1113 err);
1114 goto failed_sysfs_init;
1115 }
1116
1117 err = sysfs_create_group(&nilfs_kset->kobj, &nilfs_feature_attr_group);
1118 if (unlikely(err)) {
1119 printk(KERN_ERR "NILFS: unable to create feature group: err %d\n",
1120 err);
1121 goto cleanup_sysfs_init;
1122 }
1123
1124 return 0;
1125
1126cleanup_sysfs_init:
1127 kset_unregister(nilfs_kset);
1128
1129failed_sysfs_init:
1130 return err;
1131}
1132
1133void nilfs_sysfs_exit(void)
1134{
1135 sysfs_remove_group(&nilfs_kset->kobj, &nilfs_feature_attr_group);
1136 kset_unregister(nilfs_kset);
1137}
diff --git a/fs/nilfs2/sysfs.h b/fs/nilfs2/sysfs.h
new file mode 100644
index 000000000000..677e3a1a8370
--- /dev/null
+++ b/fs/nilfs2/sysfs.h
@@ -0,0 +1,176 @@
1/*
2 * sysfs.h - sysfs support declarations.
3 *
4 * Copyright (C) 2005-2014 Nippon Telegraph and Telephone Corporation.
5 * Copyright (C) 2014 HGST, Inc., a Western Digital Company.
6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 2 of the License, or
10 * (at your option) any later version.
11 *
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
16 *
17 * Written by Vyacheslav Dubeyko <Vyacheslav.Dubeyko@hgst.com>
18 */
19
20#ifndef _NILFS_SYSFS_H
21#define _NILFS_SYSFS_H
22
23#include <linux/sysfs.h>
24
25#define NILFS_ROOT_GROUP_NAME "nilfs2"
26
27/*
28 * struct nilfs_sysfs_dev_subgroups - device subgroup kernel objects
29 * @sg_superblock_kobj: /sys/fs/<nilfs>/<device>/superblock
30 * @sg_superblock_kobj_unregister: completion state
31 * @sg_segctor_kobj: /sys/fs/<nilfs>/<device>/segctor
32 * @sg_segctor_kobj_unregister: completion state
33 * @sg_mounted_snapshots_kobj: /sys/fs/<nilfs>/<device>/mounted_snapshots
34 * @sg_mounted_snapshots_kobj_unregister: completion state
35 * @sg_checkpoints_kobj: /sys/fs/<nilfs>/<device>/checkpoints
36 * @sg_checkpoints_kobj_unregister: completion state
37 * @sg_segments_kobj: /sys/fs/<nilfs>/<device>/segments
38 * @sg_segments_kobj_unregister: completion state
39 */
40struct nilfs_sysfs_dev_subgroups {
41 /* /sys/fs/<nilfs>/<device>/superblock */
42 struct kobject sg_superblock_kobj;
43 struct completion sg_superblock_kobj_unregister;
44
45 /* /sys/fs/<nilfs>/<device>/segctor */
46 struct kobject sg_segctor_kobj;
47 struct completion sg_segctor_kobj_unregister;
48
49 /* /sys/fs/<nilfs>/<device>/mounted_snapshots */
50 struct kobject sg_mounted_snapshots_kobj;
51 struct completion sg_mounted_snapshots_kobj_unregister;
52
53 /* /sys/fs/<nilfs>/<device>/checkpoints */
54 struct kobject sg_checkpoints_kobj;
55 struct completion sg_checkpoints_kobj_unregister;
56
57 /* /sys/fs/<nilfs>/<device>/segments */
58 struct kobject sg_segments_kobj;
59 struct completion sg_segments_kobj_unregister;
60};
61
62#define NILFS_COMMON_ATTR_STRUCT(name) \
63struct nilfs_##name##_attr { \
64 struct attribute attr; \
65 ssize_t (*show)(struct kobject *, struct attribute *, \
66 char *); \
67 ssize_t (*store)(struct kobject *, struct attribute *, \
68 const char *, size_t); \
69};
70
71NILFS_COMMON_ATTR_STRUCT(feature);
72
73#define NILFS_DEV_ATTR_STRUCT(name) \
74struct nilfs_##name##_attr { \
75 struct attribute attr; \
76 ssize_t (*show)(struct nilfs_##name##_attr *, struct the_nilfs *, \
77 char *); \
78 ssize_t (*store)(struct nilfs_##name##_attr *, struct the_nilfs *, \
79 const char *, size_t); \
80};
81
82NILFS_DEV_ATTR_STRUCT(dev);
83NILFS_DEV_ATTR_STRUCT(segments);
84NILFS_DEV_ATTR_STRUCT(mounted_snapshots);
85NILFS_DEV_ATTR_STRUCT(checkpoints);
86NILFS_DEV_ATTR_STRUCT(superblock);
87NILFS_DEV_ATTR_STRUCT(segctor);
88
89#define NILFS_CP_ATTR_STRUCT(name) \
90struct nilfs_##name##_attr { \
91 struct attribute attr; \
92 ssize_t (*show)(struct nilfs_##name##_attr *, struct nilfs_root *, \
93 char *); \
94 ssize_t (*store)(struct nilfs_##name##_attr *, struct nilfs_root *, \
95 const char *, size_t); \
96};
97
98NILFS_CP_ATTR_STRUCT(snapshot);
99
100#define NILFS_ATTR(type, name, mode, show, store) \
101 static struct nilfs_##type##_attr nilfs_##type##_attr_##name = \
102 __ATTR(name, mode, show, store)
103
104#define NILFS_INFO_ATTR(type, name) \
105 NILFS_ATTR(type, name, 0444, NULL, NULL)
106#define NILFS_RO_ATTR(type, name) \
107 NILFS_ATTR(type, name, 0444, nilfs_##type##_##name##_show, NULL)
108#define NILFS_RW_ATTR(type, name) \
109 NILFS_ATTR(type, name, 0644, \
110 nilfs_##type##_##name##_show, \
111 nilfs_##type##_##name##_store)
112
113#define NILFS_FEATURE_INFO_ATTR(name) \
114 NILFS_INFO_ATTR(feature, name)
115#define NILFS_FEATURE_RO_ATTR(name) \
116 NILFS_RO_ATTR(feature, name)
117#define NILFS_FEATURE_RW_ATTR(name) \
118 NILFS_RW_ATTR(feature, name)
119
120#define NILFS_DEV_INFO_ATTR(name) \
121 NILFS_INFO_ATTR(dev, name)
122#define NILFS_DEV_RO_ATTR(name) \
123 NILFS_RO_ATTR(dev, name)
124#define NILFS_DEV_RW_ATTR(name) \
125 NILFS_RW_ATTR(dev, name)
126
127#define NILFS_SEGMENTS_RO_ATTR(name) \
128 NILFS_RO_ATTR(segments, name)
129#define NILFS_SEGMENTS_RW_ATTR(name) \
130 NILFS_RW_ATTR(segs_info, name)
131
132#define NILFS_MOUNTED_SNAPSHOTS_RO_ATTR(name) \
133 NILFS_RO_ATTR(mounted_snapshots, name)
134
135#define NILFS_CHECKPOINTS_RO_ATTR(name) \
136 NILFS_RO_ATTR(checkpoints, name)
137#define NILFS_CHECKPOINTS_RW_ATTR(name) \
138 NILFS_RW_ATTR(checkpoints, name)
139
140#define NILFS_SNAPSHOT_INFO_ATTR(name) \
141 NILFS_INFO_ATTR(snapshot, name)
142#define NILFS_SNAPSHOT_RO_ATTR(name) \
143 NILFS_RO_ATTR(snapshot, name)
144#define NILFS_SNAPSHOT_RW_ATTR(name) \
145 NILFS_RW_ATTR(snapshot, name)
146
147#define NILFS_SUPERBLOCK_RO_ATTR(name) \
148 NILFS_RO_ATTR(superblock, name)
149#define NILFS_SUPERBLOCK_RW_ATTR(name) \
150 NILFS_RW_ATTR(superblock, name)
151
152#define NILFS_SEGCTOR_INFO_ATTR(name) \
153 NILFS_INFO_ATTR(segctor, name)
154#define NILFS_SEGCTOR_RO_ATTR(name) \
155 NILFS_RO_ATTR(segctor, name)
156#define NILFS_SEGCTOR_RW_ATTR(name) \
157 NILFS_RW_ATTR(segctor, name)
158
159#define NILFS_FEATURE_ATTR_LIST(name) \
160 (&nilfs_feature_attr_##name.attr)
161#define NILFS_DEV_ATTR_LIST(name) \
162 (&nilfs_dev_attr_##name.attr)
163#define NILFS_SEGMENTS_ATTR_LIST(name) \
164 (&nilfs_segments_attr_##name.attr)
165#define NILFS_MOUNTED_SNAPSHOTS_ATTR_LIST(name) \
166 (&nilfs_mounted_snapshots_attr_##name.attr)
167#define NILFS_CHECKPOINTS_ATTR_LIST(name) \
168 (&nilfs_checkpoints_attr_##name.attr)
169#define NILFS_SNAPSHOT_ATTR_LIST(name) \
170 (&nilfs_snapshot_attr_##name.attr)
171#define NILFS_SUPERBLOCK_ATTR_LIST(name) \
172 (&nilfs_superblock_attr_##name.attr)
173#define NILFS_SEGCTOR_ATTR_LIST(name) \
174 (&nilfs_segctor_attr_##name.attr)
175
176#endif /* _NILFS_SYSFS_H */
diff --git a/fs/nilfs2/the_nilfs.c b/fs/nilfs2/the_nilfs.c
index 8ba8229ba076..9da25fe9ea61 100644
--- a/fs/nilfs2/the_nilfs.c
+++ b/fs/nilfs2/the_nilfs.c
@@ -85,6 +85,7 @@ struct the_nilfs *alloc_nilfs(struct block_device *bdev)
85 nilfs->ns_cptree = RB_ROOT; 85 nilfs->ns_cptree = RB_ROOT;
86 spin_lock_init(&nilfs->ns_cptree_lock); 86 spin_lock_init(&nilfs->ns_cptree_lock);
87 init_rwsem(&nilfs->ns_segctor_sem); 87 init_rwsem(&nilfs->ns_segctor_sem);
88 nilfs->ns_sb_update_freq = NILFS_SB_FREQ;
88 89
89 return nilfs; 90 return nilfs;
90} 91}
@@ -97,6 +98,7 @@ void destroy_nilfs(struct the_nilfs *nilfs)
97{ 98{
98 might_sleep(); 99 might_sleep();
99 if (nilfs_init(nilfs)) { 100 if (nilfs_init(nilfs)) {
101 nilfs_sysfs_delete_device_group(nilfs);
100 brelse(nilfs->ns_sbh[0]); 102 brelse(nilfs->ns_sbh[0]);
101 brelse(nilfs->ns_sbh[1]); 103 brelse(nilfs->ns_sbh[1]);
102 } 104 }
@@ -640,6 +642,10 @@ int init_nilfs(struct the_nilfs *nilfs, struct super_block *sb, char *data)
640 if (err) 642 if (err)
641 goto failed_sbh; 643 goto failed_sbh;
642 644
645 err = nilfs_sysfs_create_device_group(sb);
646 if (err)
647 goto failed_sbh;
648
643 set_nilfs_init(nilfs); 649 set_nilfs_init(nilfs);
644 err = 0; 650 err = 0;
645 out: 651 out:
@@ -740,12 +746,13 @@ nilfs_find_or_create_root(struct the_nilfs *nilfs, __u64 cno)
740{ 746{
741 struct rb_node **p, *parent; 747 struct rb_node **p, *parent;
742 struct nilfs_root *root, *new; 748 struct nilfs_root *root, *new;
749 int err;
743 750
744 root = nilfs_lookup_root(nilfs, cno); 751 root = nilfs_lookup_root(nilfs, cno);
745 if (root) 752 if (root)
746 return root; 753 return root;
747 754
748 new = kmalloc(sizeof(*root), GFP_KERNEL); 755 new = kzalloc(sizeof(*root), GFP_KERNEL);
749 if (!new) 756 if (!new)
750 return NULL; 757 return NULL;
751 758
@@ -782,6 +789,12 @@ nilfs_find_or_create_root(struct the_nilfs *nilfs, __u64 cno)
782 789
783 spin_unlock(&nilfs->ns_cptree_lock); 790 spin_unlock(&nilfs->ns_cptree_lock);
784 791
792 err = nilfs_sysfs_create_snapshot_group(new);
793 if (err) {
794 kfree(new);
795 new = NULL;
796 }
797
785 return new; 798 return new;
786} 799}
787 800
@@ -790,6 +803,8 @@ void nilfs_put_root(struct nilfs_root *root)
790 if (atomic_dec_and_test(&root->count)) { 803 if (atomic_dec_and_test(&root->count)) {
791 struct the_nilfs *nilfs = root->nilfs; 804 struct the_nilfs *nilfs = root->nilfs;
792 805
806 nilfs_sysfs_delete_snapshot_group(root);
807
793 spin_lock(&nilfs->ns_cptree_lock); 808 spin_lock(&nilfs->ns_cptree_lock);
794 rb_erase(&root->rb_node, &nilfs->ns_cptree); 809 rb_erase(&root->rb_node, &nilfs->ns_cptree);
795 spin_unlock(&nilfs->ns_cptree_lock); 810 spin_unlock(&nilfs->ns_cptree_lock);
diff --git a/fs/nilfs2/the_nilfs.h b/fs/nilfs2/the_nilfs.h
index de8cc53b4a5c..d01ead1bea9a 100644
--- a/fs/nilfs2/the_nilfs.h
+++ b/fs/nilfs2/the_nilfs.h
@@ -33,6 +33,7 @@
33#include <linux/slab.h> 33#include <linux/slab.h>
34 34
35struct nilfs_sc_info; 35struct nilfs_sc_info;
36struct nilfs_sysfs_dev_subgroups;
36 37
37/* the_nilfs struct */ 38/* the_nilfs struct */
38enum { 39enum {
@@ -54,6 +55,7 @@ enum {
54 * @ns_sbwcount: write count of super block 55 * @ns_sbwcount: write count of super block
55 * @ns_sbsize: size of valid data in super block 56 * @ns_sbsize: size of valid data in super block
56 * @ns_mount_state: file system state 57 * @ns_mount_state: file system state
58 * @ns_sb_update_freq: interval of periodical update of superblocks (in seconds)
57 * @ns_seg_seq: segment sequence counter 59 * @ns_seg_seq: segment sequence counter
58 * @ns_segnum: index number of the latest full segment. 60 * @ns_segnum: index number of the latest full segment.
59 * @ns_nextnum: index number of the full segment index to be used next 61 * @ns_nextnum: index number of the full segment index to be used next
@@ -95,6 +97,9 @@ enum {
95 * @ns_inode_size: size of on-disk inode 97 * @ns_inode_size: size of on-disk inode
96 * @ns_first_ino: first not-special inode number 98 * @ns_first_ino: first not-special inode number
97 * @ns_crc_seed: seed value of CRC32 calculation 99 * @ns_crc_seed: seed value of CRC32 calculation
100 * @ns_dev_kobj: /sys/fs/<nilfs>/<device>
101 * @ns_dev_kobj_unregister: completion state
102 * @ns_dev_subgroups: <device> subgroups pointer
98 */ 103 */
99struct the_nilfs { 104struct the_nilfs {
100 unsigned long ns_flags; 105 unsigned long ns_flags;
@@ -114,6 +119,7 @@ struct the_nilfs {
114 unsigned ns_sbwcount; 119 unsigned ns_sbwcount;
115 unsigned ns_sbsize; 120 unsigned ns_sbsize;
116 unsigned ns_mount_state; 121 unsigned ns_mount_state;
122 unsigned ns_sb_update_freq;
117 123
118 /* 124 /*
119 * Following fields are dedicated to a writable FS-instance. 125 * Following fields are dedicated to a writable FS-instance.
@@ -188,6 +194,11 @@ struct the_nilfs {
188 int ns_inode_size; 194 int ns_inode_size;
189 int ns_first_ino; 195 int ns_first_ino;
190 u32 ns_crc_seed; 196 u32 ns_crc_seed;
197
198 /* /sys/fs/<nilfs>/<device> */
199 struct kobject ns_dev_kobj;
200 struct completion ns_dev_kobj_unregister;
201 struct nilfs_sysfs_dev_subgroups *ns_dev_subgroups;
191}; 202};
192 203
193#define THE_NILFS_FNS(bit, name) \ 204#define THE_NILFS_FNS(bit, name) \
@@ -232,6 +243,8 @@ THE_NILFS_FNS(SB_DIRTY, sb_dirty)
232 * @ifile: inode file 243 * @ifile: inode file
233 * @inodes_count: number of inodes 244 * @inodes_count: number of inodes
234 * @blocks_count: number of blocks 245 * @blocks_count: number of blocks
246 * @snapshot_kobj: /sys/fs/<nilfs>/<device>/mounted_snapshots/<snapshot>
247 * @snapshot_kobj_unregister: completion state for kernel object
235 */ 248 */
236struct nilfs_root { 249struct nilfs_root {
237 __u64 cno; 250 __u64 cno;
@@ -243,6 +256,10 @@ struct nilfs_root {
243 256
244 atomic64_t inodes_count; 257 atomic64_t inodes_count;
245 atomic64_t blocks_count; 258 atomic64_t blocks_count;
259
260 /* /sys/fs/<nilfs>/<device>/mounted_snapshots/<snapshot> */
261 struct kobject snapshot_kobj;
262 struct completion snapshot_kobj_unregister;
246}; 263};
247 264
248/* Special checkpoint number */ 265/* Special checkpoint number */
@@ -254,7 +271,8 @@ struct nilfs_root {
254static inline int nilfs_sb_need_update(struct the_nilfs *nilfs) 271static inline int nilfs_sb_need_update(struct the_nilfs *nilfs)
255{ 272{
256 u64 t = get_seconds(); 273 u64 t = get_seconds();
257 return t < nilfs->ns_sbwtime || t > nilfs->ns_sbwtime + NILFS_SB_FREQ; 274 return t < nilfs->ns_sbwtime ||
275 t > nilfs->ns_sbwtime + nilfs->ns_sb_update_freq;
258} 276}
259 277
260static inline int nilfs_sb_will_flip(struct the_nilfs *nilfs) 278static inline int nilfs_sb_will_flip(struct the_nilfs *nilfs)
diff --git a/fs/omfs/inode.c b/fs/omfs/inode.c
index ec58c7659183..ba8819702c56 100644
--- a/fs/omfs/inode.c
+++ b/fs/omfs/inode.c
@@ -321,7 +321,7 @@ static int omfs_get_imap(struct super_block *sb)
321 goto out; 321 goto out;
322 322
323 sbi->s_imap_size = array_size; 323 sbi->s_imap_size = array_size;
324 sbi->s_imap = kzalloc(array_size * sizeof(unsigned long *), GFP_KERNEL); 324 sbi->s_imap = kcalloc(array_size, sizeof(unsigned long *), GFP_KERNEL);
325 if (!sbi->s_imap) 325 if (!sbi->s_imap)
326 goto nomem; 326 goto nomem;
327 327
diff --git a/fs/proc/base.c b/fs/proc/base.c
index 2d696b0c93bf..043c83cb51f9 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -105,7 +105,7 @@
105 */ 105 */
106 106
107struct pid_entry { 107struct pid_entry {
108 char *name; 108 const char *name;
109 int len; 109 int len;
110 umode_t mode; 110 umode_t mode;
111 const struct inode_operations *iop; 111 const struct inode_operations *iop;
@@ -130,10 +130,6 @@ struct pid_entry {
130 { .proc_get_link = get_link } ) 130 { .proc_get_link = get_link } )
131#define REG(NAME, MODE, fops) \ 131#define REG(NAME, MODE, fops) \
132 NOD(NAME, (S_IFREG|(MODE)), NULL, &fops, {}) 132 NOD(NAME, (S_IFREG|(MODE)), NULL, &fops, {})
133#define INF(NAME, MODE, read) \
134 NOD(NAME, (S_IFREG|(MODE)), \
135 NULL, &proc_info_file_operations, \
136 { .proc_read = read } )
137#define ONE(NAME, MODE, show) \ 133#define ONE(NAME, MODE, show) \
138 NOD(NAME, (S_IFREG|(MODE)), \ 134 NOD(NAME, (S_IFREG|(MODE)), \
139 NULL, &proc_single_file_operations, \ 135 NULL, &proc_single_file_operations, \
@@ -200,27 +196,32 @@ static int proc_root_link(struct dentry *dentry, struct path *path)
200 return result; 196 return result;
201} 197}
202 198
203static int proc_pid_cmdline(struct task_struct *task, char *buffer) 199static int proc_pid_cmdline(struct seq_file *m, struct pid_namespace *ns,
200 struct pid *pid, struct task_struct *task)
204{ 201{
205 return get_cmdline(task, buffer, PAGE_SIZE); 202 /*
203 * Rely on struct seq_operations::show() being called once
204 * per internal buffer allocation. See single_open(), traverse().
205 */
206 BUG_ON(m->size < PAGE_SIZE);
207 m->count += get_cmdline(task, m->buf, PAGE_SIZE);
208 return 0;
206} 209}
207 210
208static int proc_pid_auxv(struct task_struct *task, char *buffer) 211static int proc_pid_auxv(struct seq_file *m, struct pid_namespace *ns,
212 struct pid *pid, struct task_struct *task)
209{ 213{
210 struct mm_struct *mm = mm_access(task, PTRACE_MODE_READ); 214 struct mm_struct *mm = mm_access(task, PTRACE_MODE_READ);
211 int res = PTR_ERR(mm);
212 if (mm && !IS_ERR(mm)) { 215 if (mm && !IS_ERR(mm)) {
213 unsigned int nwords = 0; 216 unsigned int nwords = 0;
214 do { 217 do {
215 nwords += 2; 218 nwords += 2;
216 } while (mm->saved_auxv[nwords - 2] != 0); /* AT_NULL */ 219 } while (mm->saved_auxv[nwords - 2] != 0); /* AT_NULL */
217 res = nwords * sizeof(mm->saved_auxv[0]); 220 seq_write(m, mm->saved_auxv, nwords * sizeof(mm->saved_auxv[0]));
218 if (res > PAGE_SIZE)
219 res = PAGE_SIZE;
220 memcpy(buffer, mm->saved_auxv, res);
221 mmput(mm); 221 mmput(mm);
222 } 222 return 0;
223 return res; 223 } else
224 return PTR_ERR(mm);
224} 225}
225 226
226 227
@@ -229,7 +230,8 @@ static int proc_pid_auxv(struct task_struct *task, char *buffer)
229 * Provides a wchan file via kallsyms in a proper one-value-per-file format. 230 * Provides a wchan file via kallsyms in a proper one-value-per-file format.
230 * Returns the resolved symbol. If that fails, simply return the address. 231 * Returns the resolved symbol. If that fails, simply return the address.
231 */ 232 */
232static int proc_pid_wchan(struct task_struct *task, char *buffer) 233static int proc_pid_wchan(struct seq_file *m, struct pid_namespace *ns,
234 struct pid *pid, struct task_struct *task)
233{ 235{
234 unsigned long wchan; 236 unsigned long wchan;
235 char symname[KSYM_NAME_LEN]; 237 char symname[KSYM_NAME_LEN];
@@ -240,9 +242,9 @@ static int proc_pid_wchan(struct task_struct *task, char *buffer)
240 if (!ptrace_may_access(task, PTRACE_MODE_READ)) 242 if (!ptrace_may_access(task, PTRACE_MODE_READ))
241 return 0; 243 return 0;
242 else 244 else
243 return sprintf(buffer, "%lu", wchan); 245 return seq_printf(m, "%lu", wchan);
244 else 246 else
245 return sprintf(buffer, "%s", symname); 247 return seq_printf(m, "%s", symname);
246} 248}
247#endif /* CONFIG_KALLSYMS */ 249#endif /* CONFIG_KALLSYMS */
248 250
@@ -304,9 +306,10 @@ static int proc_pid_stack(struct seq_file *m, struct pid_namespace *ns,
304/* 306/*
305 * Provides /proc/PID/schedstat 307 * Provides /proc/PID/schedstat
306 */ 308 */
307static int proc_pid_schedstat(struct task_struct *task, char *buffer) 309static int proc_pid_schedstat(struct seq_file *m, struct pid_namespace *ns,
310 struct pid *pid, struct task_struct *task)
308{ 311{
309 return sprintf(buffer, "%llu %llu %lu\n", 312 return seq_printf(m, "%llu %llu %lu\n",
310 (unsigned long long)task->se.sum_exec_runtime, 313 (unsigned long long)task->se.sum_exec_runtime,
311 (unsigned long long)task->sched_info.run_delay, 314 (unsigned long long)task->sched_info.run_delay,
312 task->sched_info.pcount); 315 task->sched_info.pcount);
@@ -404,7 +407,8 @@ static const struct file_operations proc_cpuset_operations = {
404}; 407};
405#endif 408#endif
406 409
407static int proc_oom_score(struct task_struct *task, char *buffer) 410static int proc_oom_score(struct seq_file *m, struct pid_namespace *ns,
411 struct pid *pid, struct task_struct *task)
408{ 412{
409 unsigned long totalpages = totalram_pages + total_swap_pages; 413 unsigned long totalpages = totalram_pages + total_swap_pages;
410 unsigned long points = 0; 414 unsigned long points = 0;
@@ -414,12 +418,12 @@ static int proc_oom_score(struct task_struct *task, char *buffer)
414 points = oom_badness(task, NULL, NULL, totalpages) * 418 points = oom_badness(task, NULL, NULL, totalpages) *
415 1000 / totalpages; 419 1000 / totalpages;
416 read_unlock(&tasklist_lock); 420 read_unlock(&tasklist_lock);
417 return sprintf(buffer, "%lu\n", points); 421 return seq_printf(m, "%lu\n", points);
418} 422}
419 423
420struct limit_names { 424struct limit_names {
421 char *name; 425 const char *name;
422 char *unit; 426 const char *unit;
423}; 427};
424 428
425static const struct limit_names lnames[RLIM_NLIMITS] = { 429static const struct limit_names lnames[RLIM_NLIMITS] = {
@@ -442,12 +446,11 @@ static const struct limit_names lnames[RLIM_NLIMITS] = {
442}; 446};
443 447
444/* Display limits for a process */ 448/* Display limits for a process */
445static int proc_pid_limits(struct task_struct *task, char *buffer) 449static int proc_pid_limits(struct seq_file *m, struct pid_namespace *ns,
450 struct pid *pid, struct task_struct *task)
446{ 451{
447 unsigned int i; 452 unsigned int i;
448 int count = 0;
449 unsigned long flags; 453 unsigned long flags;
450 char *bufptr = buffer;
451 454
452 struct rlimit rlim[RLIM_NLIMITS]; 455 struct rlimit rlim[RLIM_NLIMITS];
453 456
@@ -459,35 +462,34 @@ static int proc_pid_limits(struct task_struct *task, char *buffer)
459 /* 462 /*
460 * print the file header 463 * print the file header
461 */ 464 */
462 count += sprintf(&bufptr[count], "%-25s %-20s %-20s %-10s\n", 465 seq_printf(m, "%-25s %-20s %-20s %-10s\n",
463 "Limit", "Soft Limit", "Hard Limit", "Units"); 466 "Limit", "Soft Limit", "Hard Limit", "Units");
464 467
465 for (i = 0; i < RLIM_NLIMITS; i++) { 468 for (i = 0; i < RLIM_NLIMITS; i++) {
466 if (rlim[i].rlim_cur == RLIM_INFINITY) 469 if (rlim[i].rlim_cur == RLIM_INFINITY)
467 count += sprintf(&bufptr[count], "%-25s %-20s ", 470 seq_printf(m, "%-25s %-20s ",
468 lnames[i].name, "unlimited"); 471 lnames[i].name, "unlimited");
469 else 472 else
470 count += sprintf(&bufptr[count], "%-25s %-20lu ", 473 seq_printf(m, "%-25s %-20lu ",
471 lnames[i].name, rlim[i].rlim_cur); 474 lnames[i].name, rlim[i].rlim_cur);
472 475
473 if (rlim[i].rlim_max == RLIM_INFINITY) 476 if (rlim[i].rlim_max == RLIM_INFINITY)
474 count += sprintf(&bufptr[count], "%-20s ", "unlimited"); 477 seq_printf(m, "%-20s ", "unlimited");
475 else 478 else
476 count += sprintf(&bufptr[count], "%-20lu ", 479 seq_printf(m, "%-20lu ", rlim[i].rlim_max);
477 rlim[i].rlim_max);
478 480
479 if (lnames[i].unit) 481 if (lnames[i].unit)
480 count += sprintf(&bufptr[count], "%-10s\n", 482 seq_printf(m, "%-10s\n", lnames[i].unit);
481 lnames[i].unit);
482 else 483 else
483 count += sprintf(&bufptr[count], "\n"); 484 seq_putc(m, '\n');
484 } 485 }
485 486
486 return count; 487 return 0;
487} 488}
488 489
489#ifdef CONFIG_HAVE_ARCH_TRACEHOOK 490#ifdef CONFIG_HAVE_ARCH_TRACEHOOK
490static int proc_pid_syscall(struct task_struct *task, char *buffer) 491static int proc_pid_syscall(struct seq_file *m, struct pid_namespace *ns,
492 struct pid *pid, struct task_struct *task)
491{ 493{
492 long nr; 494 long nr;
493 unsigned long args[6], sp, pc; 495 unsigned long args[6], sp, pc;
@@ -496,11 +498,11 @@ static int proc_pid_syscall(struct task_struct *task, char *buffer)
496 return res; 498 return res;
497 499
498 if (task_current_syscall(task, &nr, args, 6, &sp, &pc)) 500 if (task_current_syscall(task, &nr, args, 6, &sp, &pc))
499 res = sprintf(buffer, "running\n"); 501 seq_puts(m, "running\n");
500 else if (nr < 0) 502 else if (nr < 0)
501 res = sprintf(buffer, "%ld 0x%lx 0x%lx\n", nr, sp, pc); 503 seq_printf(m, "%ld 0x%lx 0x%lx\n", nr, sp, pc);
502 else 504 else
503 res = sprintf(buffer, 505 seq_printf(m,
504 "%ld 0x%lx 0x%lx 0x%lx 0x%lx 0x%lx 0x%lx 0x%lx 0x%lx\n", 506 "%ld 0x%lx 0x%lx 0x%lx 0x%lx 0x%lx 0x%lx 0x%lx 0x%lx\n",
505 nr, 507 nr,
506 args[0], args[1], args[2], args[3], args[4], args[5], 508 args[0], args[1], args[2], args[3], args[4], args[5],
@@ -598,43 +600,6 @@ static const struct inode_operations proc_def_inode_operations = {
598 .setattr = proc_setattr, 600 .setattr = proc_setattr,
599}; 601};
600 602
601#define PROC_BLOCK_SIZE (3*1024) /* 4K page size but our output routines use some slack for overruns */
602
603static ssize_t proc_info_read(struct file * file, char __user * buf,
604 size_t count, loff_t *ppos)
605{
606 struct inode * inode = file_inode(file);
607 unsigned long page;
608 ssize_t length;
609 struct task_struct *task = get_proc_task(inode);
610
611 length = -ESRCH;
612 if (!task)
613 goto out_no_task;
614
615 if (count > PROC_BLOCK_SIZE)
616 count = PROC_BLOCK_SIZE;
617
618 length = -ENOMEM;
619 if (!(page = __get_free_page(GFP_TEMPORARY)))
620 goto out;
621
622 length = PROC_I(inode)->op.proc_read(task, (char*)page);
623
624 if (length >= 0)
625 length = simple_read_from_buffer(buf, count, ppos, (char *)page, length);
626 free_page(page);
627out:
628 put_task_struct(task);
629out_no_task:
630 return length;
631}
632
633static const struct file_operations proc_info_file_operations = {
634 .read = proc_info_read,
635 .llseek = generic_file_llseek,
636};
637
638static int proc_single_show(struct seq_file *m, void *v) 603static int proc_single_show(struct seq_file *m, void *v)
639{ 604{
640 struct inode *inode = m->private; 605 struct inode *inode = m->private;
@@ -2056,7 +2021,7 @@ static int show_timer(struct seq_file *m, void *v)
2056 struct k_itimer *timer; 2021 struct k_itimer *timer;
2057 struct timers_private *tp = m->private; 2022 struct timers_private *tp = m->private;
2058 int notify; 2023 int notify;
2059 static char *nstr[] = { 2024 static const char * const nstr[] = {
2060 [SIGEV_SIGNAL] = "signal", 2025 [SIGEV_SIGNAL] = "signal",
2061 [SIGEV_NONE] = "none", 2026 [SIGEV_NONE] = "none",
2062 [SIGEV_THREAD] = "thread", 2027 [SIGEV_THREAD] = "thread",
@@ -2392,7 +2357,7 @@ static const struct file_operations proc_coredump_filter_operations = {
2392#endif 2357#endif
2393 2358
2394#ifdef CONFIG_TASK_IO_ACCOUNTING 2359#ifdef CONFIG_TASK_IO_ACCOUNTING
2395static int do_io_accounting(struct task_struct *task, char *buffer, int whole) 2360static int do_io_accounting(struct task_struct *task, struct seq_file *m, int whole)
2396{ 2361{
2397 struct task_io_accounting acct = task->ioac; 2362 struct task_io_accounting acct = task->ioac;
2398 unsigned long flags; 2363 unsigned long flags;
@@ -2416,7 +2381,7 @@ static int do_io_accounting(struct task_struct *task, char *buffer, int whole)
2416 2381
2417 unlock_task_sighand(task, &flags); 2382 unlock_task_sighand(task, &flags);
2418 } 2383 }
2419 result = sprintf(buffer, 2384 result = seq_printf(m,
2420 "rchar: %llu\n" 2385 "rchar: %llu\n"
2421 "wchar: %llu\n" 2386 "wchar: %llu\n"
2422 "syscr: %llu\n" 2387 "syscr: %llu\n"
@@ -2436,20 +2401,22 @@ out_unlock:
2436 return result; 2401 return result;
2437} 2402}
2438 2403
2439static int proc_tid_io_accounting(struct task_struct *task, char *buffer) 2404static int proc_tid_io_accounting(struct seq_file *m, struct pid_namespace *ns,
2405 struct pid *pid, struct task_struct *task)
2440{ 2406{
2441 return do_io_accounting(task, buffer, 0); 2407 return do_io_accounting(task, m, 0);
2442} 2408}
2443 2409
2444static int proc_tgid_io_accounting(struct task_struct *task, char *buffer) 2410static int proc_tgid_io_accounting(struct seq_file *m, struct pid_namespace *ns,
2411 struct pid *pid, struct task_struct *task)
2445{ 2412{
2446 return do_io_accounting(task, buffer, 1); 2413 return do_io_accounting(task, m, 1);
2447} 2414}
2448#endif /* CONFIG_TASK_IO_ACCOUNTING */ 2415#endif /* CONFIG_TASK_IO_ACCOUNTING */
2449 2416
2450#ifdef CONFIG_USER_NS 2417#ifdef CONFIG_USER_NS
2451static int proc_id_map_open(struct inode *inode, struct file *file, 2418static int proc_id_map_open(struct inode *inode, struct file *file,
2452 struct seq_operations *seq_ops) 2419 const struct seq_operations *seq_ops)
2453{ 2420{
2454 struct user_namespace *ns = NULL; 2421 struct user_namespace *ns = NULL;
2455 struct task_struct *task; 2422 struct task_struct *task;
@@ -2557,10 +2524,10 @@ static const struct pid_entry tgid_base_stuff[] = {
2557 DIR("net", S_IRUGO|S_IXUGO, proc_net_inode_operations, proc_net_operations), 2524 DIR("net", S_IRUGO|S_IXUGO, proc_net_inode_operations, proc_net_operations),
2558#endif 2525#endif
2559 REG("environ", S_IRUSR, proc_environ_operations), 2526 REG("environ", S_IRUSR, proc_environ_operations),
2560 INF("auxv", S_IRUSR, proc_pid_auxv), 2527 ONE("auxv", S_IRUSR, proc_pid_auxv),
2561 ONE("status", S_IRUGO, proc_pid_status), 2528 ONE("status", S_IRUGO, proc_pid_status),
2562 ONE("personality", S_IRUSR, proc_pid_personality), 2529 ONE("personality", S_IRUSR, proc_pid_personality),
2563 INF("limits", S_IRUGO, proc_pid_limits), 2530 ONE("limits", S_IRUGO, proc_pid_limits),
2564#ifdef CONFIG_SCHED_DEBUG 2531#ifdef CONFIG_SCHED_DEBUG
2565 REG("sched", S_IRUGO|S_IWUSR, proc_pid_sched_operations), 2532 REG("sched", S_IRUGO|S_IWUSR, proc_pid_sched_operations),
2566#endif 2533#endif
@@ -2569,9 +2536,9 @@ static const struct pid_entry tgid_base_stuff[] = {
2569#endif 2536#endif
2570 REG("comm", S_IRUGO|S_IWUSR, proc_pid_set_comm_operations), 2537 REG("comm", S_IRUGO|S_IWUSR, proc_pid_set_comm_operations),
2571#ifdef CONFIG_HAVE_ARCH_TRACEHOOK 2538#ifdef CONFIG_HAVE_ARCH_TRACEHOOK
2572 INF("syscall", S_IRUSR, proc_pid_syscall), 2539 ONE("syscall", S_IRUSR, proc_pid_syscall),
2573#endif 2540#endif
2574 INF("cmdline", S_IRUGO, proc_pid_cmdline), 2541 ONE("cmdline", S_IRUGO, proc_pid_cmdline),
2575 ONE("stat", S_IRUGO, proc_tgid_stat), 2542 ONE("stat", S_IRUGO, proc_tgid_stat),
2576 ONE("statm", S_IRUGO, proc_pid_statm), 2543 ONE("statm", S_IRUGO, proc_pid_statm),
2577 REG("maps", S_IRUGO, proc_pid_maps_operations), 2544 REG("maps", S_IRUGO, proc_pid_maps_operations),
@@ -2594,13 +2561,13 @@ static const struct pid_entry tgid_base_stuff[] = {
2594 DIR("attr", S_IRUGO|S_IXUGO, proc_attr_dir_inode_operations, proc_attr_dir_operations), 2561 DIR("attr", S_IRUGO|S_IXUGO, proc_attr_dir_inode_operations, proc_attr_dir_operations),
2595#endif 2562#endif
2596#ifdef CONFIG_KALLSYMS 2563#ifdef CONFIG_KALLSYMS
2597 INF("wchan", S_IRUGO, proc_pid_wchan), 2564 ONE("wchan", S_IRUGO, proc_pid_wchan),
2598#endif 2565#endif
2599#ifdef CONFIG_STACKTRACE 2566#ifdef CONFIG_STACKTRACE
2600 ONE("stack", S_IRUSR, proc_pid_stack), 2567 ONE("stack", S_IRUSR, proc_pid_stack),
2601#endif 2568#endif
2602#ifdef CONFIG_SCHEDSTATS 2569#ifdef CONFIG_SCHEDSTATS
2603 INF("schedstat", S_IRUGO, proc_pid_schedstat), 2570 ONE("schedstat", S_IRUGO, proc_pid_schedstat),
2604#endif 2571#endif
2605#ifdef CONFIG_LATENCYTOP 2572#ifdef CONFIG_LATENCYTOP
2606 REG("latency", S_IRUGO, proc_lstats_operations), 2573 REG("latency", S_IRUGO, proc_lstats_operations),
@@ -2611,7 +2578,7 @@ static const struct pid_entry tgid_base_stuff[] = {
2611#ifdef CONFIG_CGROUPS 2578#ifdef CONFIG_CGROUPS
2612 REG("cgroup", S_IRUGO, proc_cgroup_operations), 2579 REG("cgroup", S_IRUGO, proc_cgroup_operations),
2613#endif 2580#endif
2614 INF("oom_score", S_IRUGO, proc_oom_score), 2581 ONE("oom_score", S_IRUGO, proc_oom_score),
2615 REG("oom_adj", S_IRUGO|S_IWUSR, proc_oom_adj_operations), 2582 REG("oom_adj", S_IRUGO|S_IWUSR, proc_oom_adj_operations),
2616 REG("oom_score_adj", S_IRUGO|S_IWUSR, proc_oom_score_adj_operations), 2583 REG("oom_score_adj", S_IRUGO|S_IWUSR, proc_oom_score_adj_operations),
2617#ifdef CONFIG_AUDITSYSCALL 2584#ifdef CONFIG_AUDITSYSCALL
@@ -2625,10 +2592,10 @@ static const struct pid_entry tgid_base_stuff[] = {
2625 REG("coredump_filter", S_IRUGO|S_IWUSR, proc_coredump_filter_operations), 2592 REG("coredump_filter", S_IRUGO|S_IWUSR, proc_coredump_filter_operations),
2626#endif 2593#endif
2627#ifdef CONFIG_TASK_IO_ACCOUNTING 2594#ifdef CONFIG_TASK_IO_ACCOUNTING
2628 INF("io", S_IRUSR, proc_tgid_io_accounting), 2595 ONE("io", S_IRUSR, proc_tgid_io_accounting),
2629#endif 2596#endif
2630#ifdef CONFIG_HARDWALL 2597#ifdef CONFIG_HARDWALL
2631 INF("hardwall", S_IRUGO, proc_pid_hardwall), 2598 ONE("hardwall", S_IRUGO, proc_pid_hardwall),
2632#endif 2599#endif
2633#ifdef CONFIG_USER_NS 2600#ifdef CONFIG_USER_NS
2634 REG("uid_map", S_IRUGO|S_IWUSR, proc_uid_map_operations), 2601 REG("uid_map", S_IRUGO|S_IWUSR, proc_uid_map_operations),
@@ -2780,12 +2747,12 @@ out:
2780 2747
2781struct dentry *proc_pid_lookup(struct inode *dir, struct dentry * dentry, unsigned int flags) 2748struct dentry *proc_pid_lookup(struct inode *dir, struct dentry * dentry, unsigned int flags)
2782{ 2749{
2783 int result = 0; 2750 int result = -ENOENT;
2784 struct task_struct *task; 2751 struct task_struct *task;
2785 unsigned tgid; 2752 unsigned tgid;
2786 struct pid_namespace *ns; 2753 struct pid_namespace *ns;
2787 2754
2788 tgid = name_to_int(dentry); 2755 tgid = name_to_int(&dentry->d_name);
2789 if (tgid == ~0U) 2756 if (tgid == ~0U)
2790 goto out; 2757 goto out;
2791 2758
@@ -2896,18 +2863,18 @@ static const struct pid_entry tid_base_stuff[] = {
2896 DIR("fdinfo", S_IRUSR|S_IXUSR, proc_fdinfo_inode_operations, proc_fdinfo_operations), 2863 DIR("fdinfo", S_IRUSR|S_IXUSR, proc_fdinfo_inode_operations, proc_fdinfo_operations),
2897 DIR("ns", S_IRUSR|S_IXUGO, proc_ns_dir_inode_operations, proc_ns_dir_operations), 2864 DIR("ns", S_IRUSR|S_IXUGO, proc_ns_dir_inode_operations, proc_ns_dir_operations),
2898 REG("environ", S_IRUSR, proc_environ_operations), 2865 REG("environ", S_IRUSR, proc_environ_operations),
2899 INF("auxv", S_IRUSR, proc_pid_auxv), 2866 ONE("auxv", S_IRUSR, proc_pid_auxv),
2900 ONE("status", S_IRUGO, proc_pid_status), 2867 ONE("status", S_IRUGO, proc_pid_status),
2901 ONE("personality", S_IRUSR, proc_pid_personality), 2868 ONE("personality", S_IRUSR, proc_pid_personality),
2902 INF("limits", S_IRUGO, proc_pid_limits), 2869 ONE("limits", S_IRUGO, proc_pid_limits),
2903#ifdef CONFIG_SCHED_DEBUG 2870#ifdef CONFIG_SCHED_DEBUG
2904 REG("sched", S_IRUGO|S_IWUSR, proc_pid_sched_operations), 2871 REG("sched", S_IRUGO|S_IWUSR, proc_pid_sched_operations),
2905#endif 2872#endif
2906 REG("comm", S_IRUGO|S_IWUSR, proc_pid_set_comm_operations), 2873 REG("comm", S_IRUGO|S_IWUSR, proc_pid_set_comm_operations),
2907#ifdef CONFIG_HAVE_ARCH_TRACEHOOK 2874#ifdef CONFIG_HAVE_ARCH_TRACEHOOK
2908 INF("syscall", S_IRUSR, proc_pid_syscall), 2875 ONE("syscall", S_IRUSR, proc_pid_syscall),
2909#endif 2876#endif
2910 INF("cmdline", S_IRUGO, proc_pid_cmdline), 2877 ONE("cmdline", S_IRUGO, proc_pid_cmdline),
2911 ONE("stat", S_IRUGO, proc_tid_stat), 2878 ONE("stat", S_IRUGO, proc_tid_stat),
2912 ONE("statm", S_IRUGO, proc_pid_statm), 2879 ONE("statm", S_IRUGO, proc_pid_statm),
2913 REG("maps", S_IRUGO, proc_tid_maps_operations), 2880 REG("maps", S_IRUGO, proc_tid_maps_operations),
@@ -2932,13 +2899,13 @@ static const struct pid_entry tid_base_stuff[] = {
2932 DIR("attr", S_IRUGO|S_IXUGO, proc_attr_dir_inode_operations, proc_attr_dir_operations), 2899 DIR("attr", S_IRUGO|S_IXUGO, proc_attr_dir_inode_operations, proc_attr_dir_operations),
2933#endif 2900#endif
2934#ifdef CONFIG_KALLSYMS 2901#ifdef CONFIG_KALLSYMS
2935 INF("wchan", S_IRUGO, proc_pid_wchan), 2902 ONE("wchan", S_IRUGO, proc_pid_wchan),
2936#endif 2903#endif
2937#ifdef CONFIG_STACKTRACE 2904#ifdef CONFIG_STACKTRACE
2938 ONE("stack", S_IRUSR, proc_pid_stack), 2905 ONE("stack", S_IRUSR, proc_pid_stack),
2939#endif 2906#endif
2940#ifdef CONFIG_SCHEDSTATS 2907#ifdef CONFIG_SCHEDSTATS
2941 INF("schedstat", S_IRUGO, proc_pid_schedstat), 2908 ONE("schedstat", S_IRUGO, proc_pid_schedstat),
2942#endif 2909#endif
2943#ifdef CONFIG_LATENCYTOP 2910#ifdef CONFIG_LATENCYTOP
2944 REG("latency", S_IRUGO, proc_lstats_operations), 2911 REG("latency", S_IRUGO, proc_lstats_operations),
@@ -2949,7 +2916,7 @@ static const struct pid_entry tid_base_stuff[] = {
2949#ifdef CONFIG_CGROUPS 2916#ifdef CONFIG_CGROUPS
2950 REG("cgroup", S_IRUGO, proc_cgroup_operations), 2917 REG("cgroup", S_IRUGO, proc_cgroup_operations),
2951#endif 2918#endif
2952 INF("oom_score", S_IRUGO, proc_oom_score), 2919 ONE("oom_score", S_IRUGO, proc_oom_score),
2953 REG("oom_adj", S_IRUGO|S_IWUSR, proc_oom_adj_operations), 2920 REG("oom_adj", S_IRUGO|S_IWUSR, proc_oom_adj_operations),
2954 REG("oom_score_adj", S_IRUGO|S_IWUSR, proc_oom_score_adj_operations), 2921 REG("oom_score_adj", S_IRUGO|S_IWUSR, proc_oom_score_adj_operations),
2955#ifdef CONFIG_AUDITSYSCALL 2922#ifdef CONFIG_AUDITSYSCALL
@@ -2960,10 +2927,10 @@ static const struct pid_entry tid_base_stuff[] = {
2960 REG("make-it-fail", S_IRUGO|S_IWUSR, proc_fault_inject_operations), 2927 REG("make-it-fail", S_IRUGO|S_IWUSR, proc_fault_inject_operations),
2961#endif 2928#endif
2962#ifdef CONFIG_TASK_IO_ACCOUNTING 2929#ifdef CONFIG_TASK_IO_ACCOUNTING
2963 INF("io", S_IRUSR, proc_tid_io_accounting), 2930 ONE("io", S_IRUSR, proc_tid_io_accounting),
2964#endif 2931#endif
2965#ifdef CONFIG_HARDWALL 2932#ifdef CONFIG_HARDWALL
2966 INF("hardwall", S_IRUGO, proc_pid_hardwall), 2933 ONE("hardwall", S_IRUGO, proc_pid_hardwall),
2967#endif 2934#endif
2968#ifdef CONFIG_USER_NS 2935#ifdef CONFIG_USER_NS
2969 REG("uid_map", S_IRUGO|S_IWUSR, proc_uid_map_operations), 2936 REG("uid_map", S_IRUGO|S_IWUSR, proc_uid_map_operations),
@@ -3033,7 +3000,7 @@ static struct dentry *proc_task_lookup(struct inode *dir, struct dentry * dentry
3033 if (!leader) 3000 if (!leader)
3034 goto out_no_task; 3001 goto out_no_task;
3035 3002
3036 tid = name_to_int(dentry); 3003 tid = name_to_int(&dentry->d_name);
3037 if (tid == ~0U) 3004 if (tid == ~0U)
3038 goto out; 3005 goto out;
3039 3006
diff --git a/fs/proc/fd.c b/fs/proc/fd.c
index 0788d093f5d8..955bb55fab8c 100644
--- a/fs/proc/fd.c
+++ b/fs/proc/fd.c
@@ -206,7 +206,7 @@ static struct dentry *proc_lookupfd_common(struct inode *dir,
206{ 206{
207 struct task_struct *task = get_proc_task(dir); 207 struct task_struct *task = get_proc_task(dir);
208 int result = -ENOENT; 208 int result = -ENOENT;
209 unsigned fd = name_to_int(dentry); 209 unsigned fd = name_to_int(&dentry->d_name);
210 210
211 if (!task) 211 if (!task)
212 goto out_no_task; 212 goto out_no_task;
diff --git a/fs/proc/generic.c b/fs/proc/generic.c
index b7f268eb5f45..317b72641ebf 100644
--- a/fs/proc/generic.c
+++ b/fs/proc/generic.c
@@ -27,7 +27,7 @@
27 27
28#include "internal.h" 28#include "internal.h"
29 29
30DEFINE_SPINLOCK(proc_subdir_lock); 30static DEFINE_SPINLOCK(proc_subdir_lock);
31 31
32static int proc_match(unsigned int len, const char *name, struct proc_dir_entry *de) 32static int proc_match(unsigned int len, const char *name, struct proc_dir_entry *de)
33{ 33{
@@ -330,28 +330,28 @@ static struct proc_dir_entry *__proc_create(struct proc_dir_entry **parent,
330 nlink_t nlink) 330 nlink_t nlink)
331{ 331{
332 struct proc_dir_entry *ent = NULL; 332 struct proc_dir_entry *ent = NULL;
333 const char *fn = name; 333 const char *fn;
334 unsigned int len; 334 struct qstr qstr;
335
336 /* make sure name is valid */
337 if (!name || !strlen(name))
338 goto out;
339 335
340 if (xlate_proc_name(name, parent, &fn) != 0) 336 if (xlate_proc_name(name, parent, &fn) != 0)
341 goto out; 337 goto out;
338 qstr.name = fn;
339 qstr.len = strlen(fn);
340 if (qstr.len == 0 || qstr.len >= 256) {
341 WARN(1, "name len %u\n", qstr.len);
342 return NULL;
343 }
344 if (*parent == &proc_root && name_to_int(&qstr) != ~0U) {
345 WARN(1, "create '/proc/%s' by hand\n", qstr.name);
346 return NULL;
347 }
342 348
343 /* At this point there must not be any '/' characters beyond *fn */ 349 ent = kzalloc(sizeof(struct proc_dir_entry) + qstr.len + 1, GFP_KERNEL);
344 if (strchr(fn, '/'))
345 goto out;
346
347 len = strlen(fn);
348
349 ent = kzalloc(sizeof(struct proc_dir_entry) + len + 1, GFP_KERNEL);
350 if (!ent) 350 if (!ent)
351 goto out; 351 goto out;
352 352
353 memcpy(ent->name, fn, len + 1); 353 memcpy(ent->name, fn, qstr.len + 1);
354 ent->namelen = len; 354 ent->namelen = qstr.len;
355 ent->mode = mode; 355 ent->mode = mode;
356 ent->nlink = nlink; 356 ent->nlink = nlink;
357 atomic_set(&ent->count, 1); 357 atomic_set(&ent->count, 1);
diff --git a/fs/proc/internal.h b/fs/proc/internal.h
index 3ab6d14e71c5..a024cf7b260f 100644
--- a/fs/proc/internal.h
+++ b/fs/proc/internal.h
@@ -52,7 +52,6 @@ struct proc_dir_entry {
52 52
53union proc_op { 53union proc_op {
54 int (*proc_get_link)(struct dentry *, struct path *); 54 int (*proc_get_link)(struct dentry *, struct path *);
55 int (*proc_read)(struct task_struct *task, char *page);
56 int (*proc_show)(struct seq_file *m, 55 int (*proc_show)(struct seq_file *m,
57 struct pid_namespace *ns, struct pid *pid, 56 struct pid_namespace *ns, struct pid *pid,
58 struct task_struct *task); 57 struct task_struct *task);
@@ -112,10 +111,10 @@ static inline int task_dumpable(struct task_struct *task)
112 return 0; 111 return 0;
113} 112}
114 113
115static inline unsigned name_to_int(struct dentry *dentry) 114static inline unsigned name_to_int(const struct qstr *qstr)
116{ 115{
117 const char *name = dentry->d_name.name; 116 const char *name = qstr->name;
118 int len = dentry->d_name.len; 117 int len = qstr->len;
119 unsigned n = 0; 118 unsigned n = 0;
120 119
121 if (len > 1 && *name == '0') 120 if (len > 1 && *name == '0')
@@ -178,8 +177,6 @@ extern bool proc_fill_cache(struct file *, struct dir_context *, const char *, i
178/* 177/*
179 * generic.c 178 * generic.c
180 */ 179 */
181extern spinlock_t proc_subdir_lock;
182
183extern struct dentry *proc_lookup(struct inode *, struct dentry *, unsigned int); 180extern struct dentry *proc_lookup(struct inode *, struct dentry *, unsigned int);
184extern struct dentry *proc_lookup_de(struct proc_dir_entry *, struct inode *, 181extern struct dentry *proc_lookup_de(struct proc_dir_entry *, struct inode *,
185 struct dentry *); 182 struct dentry *);
diff --git a/fs/proc/kcore.c b/fs/proc/kcore.c
index 39e6ef32f0bd..6df8d0722c97 100644
--- a/fs/proc/kcore.c
+++ b/fs/proc/kcore.c
@@ -172,7 +172,7 @@ get_sparsemem_vmemmap_info(struct kcore_list *ent, struct list_head *head)
172 172
173 start = ((unsigned long)pfn_to_page(pfn)) & PAGE_MASK; 173 start = ((unsigned long)pfn_to_page(pfn)) & PAGE_MASK;
174 end = ((unsigned long)pfn_to_page(pfn + nr_pages)) - 1; 174 end = ((unsigned long)pfn_to_page(pfn + nr_pages)) - 1;
175 end = ALIGN(end, PAGE_SIZE); 175 end = PAGE_ALIGN(end);
176 /* overlap check (because we have to align page */ 176 /* overlap check (because we have to align page */
177 list_for_each_entry(tmp, head, list) { 177 list_for_each_entry(tmp, head, list) {
178 if (tmp->type != KCORE_VMEMMAP) 178 if (tmp->type != KCORE_VMEMMAP)
diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c
index 71290463a1d3..f92d5dd578a4 100644
--- a/fs/proc/proc_sysctl.c
+++ b/fs/proc/proc_sysctl.c
@@ -632,7 +632,7 @@ out:
632 return ret; 632 return ret;
633} 633}
634 634
635static int scan(struct ctl_table_header *head, ctl_table *table, 635static int scan(struct ctl_table_header *head, struct ctl_table *table,
636 unsigned long *pos, struct file *file, 636 unsigned long *pos, struct file *file,
637 struct dir_context *ctx) 637 struct dir_context *ctx)
638{ 638{
diff --git a/fs/proc/proc_tty.c b/fs/proc/proc_tty.c
index cb761f010300..15f327bed8c6 100644
--- a/fs/proc/proc_tty.c
+++ b/fs/proc/proc_tty.c
@@ -18,7 +18,7 @@
18/* 18/*
19 * The /proc/tty directory inodes... 19 * The /proc/tty directory inodes...
20 */ 20 */
21static struct proc_dir_entry *proc_tty_ldisc, *proc_tty_driver; 21static struct proc_dir_entry *proc_tty_driver;
22 22
23/* 23/*
24 * This is the handler for /proc/tty/drivers 24 * This is the handler for /proc/tty/drivers
@@ -176,7 +176,7 @@ void __init proc_tty_init(void)
176{ 176{
177 if (!proc_mkdir("tty", NULL)) 177 if (!proc_mkdir("tty", NULL))
178 return; 178 return;
179 proc_tty_ldisc = proc_mkdir("tty/ldisc", NULL); 179 proc_mkdir("tty/ldisc", NULL); /* Preserved: it's userspace visible */
180 /* 180 /*
181 * /proc/tty/driver/serial reveals the exact character counts for 181 * /proc/tty/driver/serial reveals the exact character counts for
182 * serial links which is just too easy to abuse for inferring 182 * serial links which is just too easy to abuse for inferring
diff --git a/fs/proc/root.c b/fs/proc/root.c
index 5dbadecb234d..574bafc41f0b 100644
--- a/fs/proc/root.c
+++ b/fs/proc/root.c
@@ -199,10 +199,10 @@ static int proc_root_getattr(struct vfsmount *mnt, struct dentry *dentry, struct
199 199
200static struct dentry *proc_root_lookup(struct inode * dir, struct dentry * dentry, unsigned int flags) 200static struct dentry *proc_root_lookup(struct inode * dir, struct dentry * dentry, unsigned int flags)
201{ 201{
202 if (!proc_lookup(dir, dentry, flags)) 202 if (!proc_pid_lookup(dir, dentry, flags))
203 return NULL; 203 return NULL;
204 204
205 return proc_pid_lookup(dir, dentry, flags); 205 return proc_lookup(dir, dentry, flags);
206} 206}
207 207
208static int proc_root_readdir(struct file *file, struct dir_context *ctx) 208static int proc_root_readdir(struct file *file, struct dir_context *ctx)
diff --git a/fs/proc/vmcore.c b/fs/proc/vmcore.c
index 382aa890e228..a90d6d354199 100644
--- a/fs/proc/vmcore.c
+++ b/fs/proc/vmcore.c
@@ -328,6 +328,82 @@ static inline char *alloc_elfnotes_buf(size_t notes_sz)
328 * virtually contiguous user-space in ELF layout. 328 * virtually contiguous user-space in ELF layout.
329 */ 329 */
330#ifdef CONFIG_MMU 330#ifdef CONFIG_MMU
331/*
332 * remap_oldmem_pfn_checked - do remap_oldmem_pfn_range replacing all pages
333 * reported as not being ram with the zero page.
334 *
335 * @vma: vm_area_struct describing requested mapping
336 * @from: start remapping from
337 * @pfn: page frame number to start remapping to
338 * @size: remapping size
339 * @prot: protection bits
340 *
341 * Returns zero on success, -EAGAIN on failure.
342 */
343static int remap_oldmem_pfn_checked(struct vm_area_struct *vma,
344 unsigned long from, unsigned long pfn,
345 unsigned long size, pgprot_t prot)
346{
347 unsigned long map_size;
348 unsigned long pos_start, pos_end, pos;
349 unsigned long zeropage_pfn = my_zero_pfn(0);
350 size_t len = 0;
351
352 pos_start = pfn;
353 pos_end = pfn + (size >> PAGE_SHIFT);
354
355 for (pos = pos_start; pos < pos_end; ++pos) {
356 if (!pfn_is_ram(pos)) {
357 /*
358 * We hit a page which is not ram. Remap the continuous
359 * region between pos_start and pos-1 and replace
360 * the non-ram page at pos with the zero page.
361 */
362 if (pos > pos_start) {
363 /* Remap continuous region */
364 map_size = (pos - pos_start) << PAGE_SHIFT;
365 if (remap_oldmem_pfn_range(vma, from + len,
366 pos_start, map_size,
367 prot))
368 goto fail;
369 len += map_size;
370 }
371 /* Remap the zero page */
372 if (remap_oldmem_pfn_range(vma, from + len,
373 zeropage_pfn,
374 PAGE_SIZE, prot))
375 goto fail;
376 len += PAGE_SIZE;
377 pos_start = pos + 1;
378 }
379 }
380 if (pos > pos_start) {
381 /* Remap the rest */
382 map_size = (pos - pos_start) << PAGE_SHIFT;
383 if (remap_oldmem_pfn_range(vma, from + len, pos_start,
384 map_size, prot))
385 goto fail;
386 }
387 return 0;
388fail:
389 do_munmap(vma->vm_mm, from, len);
390 return -EAGAIN;
391}
392
393static int vmcore_remap_oldmem_pfn(struct vm_area_struct *vma,
394 unsigned long from, unsigned long pfn,
395 unsigned long size, pgprot_t prot)
396{
397 /*
398 * Check if oldmem_pfn_is_ram was registered to avoid
399 * looping over all pages without a reason.
400 */
401 if (oldmem_pfn_is_ram)
402 return remap_oldmem_pfn_checked(vma, from, pfn, size, prot);
403 else
404 return remap_oldmem_pfn_range(vma, from, pfn, size, prot);
405}
406
331static int mmap_vmcore(struct file *file, struct vm_area_struct *vma) 407static int mmap_vmcore(struct file *file, struct vm_area_struct *vma)
332{ 408{
333 size_t size = vma->vm_end - vma->vm_start; 409 size_t size = vma->vm_end - vma->vm_start;
@@ -387,9 +463,9 @@ static int mmap_vmcore(struct file *file, struct vm_area_struct *vma)
387 463
388 tsz = min_t(size_t, m->offset + m->size - start, size); 464 tsz = min_t(size_t, m->offset + m->size - start, size);
389 paddr = m->paddr + start - m->offset; 465 paddr = m->paddr + start - m->offset;
390 if (remap_oldmem_pfn_range(vma, vma->vm_start + len, 466 if (vmcore_remap_oldmem_pfn(vma, vma->vm_start + len,
391 paddr >> PAGE_SHIFT, tsz, 467 paddr >> PAGE_SHIFT, tsz,
392 vma->vm_page_prot)) 468 vma->vm_page_prot))
393 goto fail; 469 goto fail;
394 size -= tsz; 470 size -= tsz;
395 start += tsz; 471 start += tsz;
diff --git a/fs/pstore/ram_core.c b/fs/pstore/ram_core.c
index 34a1e5aa848c..9d7b9a83699e 100644
--- a/fs/pstore/ram_core.c
+++ b/fs/pstore/ram_core.c
@@ -394,7 +394,7 @@ static void *persistent_ram_vmap(phys_addr_t start, size_t size)
394 394
395 prot = pgprot_noncached(PAGE_KERNEL); 395 prot = pgprot_noncached(PAGE_KERNEL);
396 396
397 pages = kmalloc(sizeof(struct page *) * page_count, GFP_KERNEL); 397 pages = kmalloc_array(page_count, sizeof(struct page *), GFP_KERNEL);
398 if (!pages) { 398 if (!pages) {
399 pr_err("%s: Failed to allocate array for %u pages\n", 399 pr_err("%s: Failed to allocate array for %u pages\n",
400 __func__, page_count); 400 __func__, page_count);
diff --git a/fs/qnx6/Makefile b/fs/qnx6/Makefile
index 9dd06199afc9..5e6bae6fae50 100644
--- a/fs/qnx6/Makefile
+++ b/fs/qnx6/Makefile
@@ -5,3 +5,4 @@
5obj-$(CONFIG_QNX6FS_FS) += qnx6.o 5obj-$(CONFIG_QNX6FS_FS) += qnx6.o
6 6
7qnx6-objs := inode.o dir.o namei.o super_mmi.o 7qnx6-objs := inode.o dir.o namei.o super_mmi.o
8ccflags-$(CONFIG_QNX6FS_DEBUG) += -DDEBUG
diff --git a/fs/qnx6/dir.c b/fs/qnx6/dir.c
index 15b7d92ed60d..8d64bb5366bf 100644
--- a/fs/qnx6/dir.c
+++ b/fs/qnx6/dir.c
@@ -77,21 +77,20 @@ static int qnx6_dir_longfilename(struct inode *inode,
77 if (de->de_size != 0xff) { 77 if (de->de_size != 0xff) {
78 /* error - long filename entries always have size 0xff 78 /* error - long filename entries always have size 0xff
79 in direntry */ 79 in direntry */
80 printk(KERN_ERR "qnx6: invalid direntry size (%i).\n", 80 pr_err("invalid direntry size (%i).\n", de->de_size);
81 de->de_size);
82 return 0; 81 return 0;
83 } 82 }
84 lf = qnx6_longname(s, de, &page); 83 lf = qnx6_longname(s, de, &page);
85 if (IS_ERR(lf)) { 84 if (IS_ERR(lf)) {
86 printk(KERN_ERR "qnx6:Error reading longname\n"); 85 pr_err("Error reading longname\n");
87 return 0; 86 return 0;
88 } 87 }
89 88
90 lf_size = fs16_to_cpu(sbi, lf->lf_size); 89 lf_size = fs16_to_cpu(sbi, lf->lf_size);
91 90
92 if (lf_size > QNX6_LONG_NAME_MAX) { 91 if (lf_size > QNX6_LONG_NAME_MAX) {
93 QNX6DEBUG((KERN_INFO "file %s\n", lf->lf_fname)); 92 pr_debug("file %s\n", lf->lf_fname);
94 printk(KERN_ERR "qnx6:Filename too long (%i)\n", lf_size); 93 pr_err("Filename too long (%i)\n", lf_size);
95 qnx6_put_page(page); 94 qnx6_put_page(page);
96 return 0; 95 return 0;
97 } 96 }
@@ -100,10 +99,10 @@ static int qnx6_dir_longfilename(struct inode *inode,
100 mmi 3g filesystem does not have that checksum */ 99 mmi 3g filesystem does not have that checksum */
101 if (!test_opt(s, MMI_FS) && fs32_to_cpu(sbi, de->de_checksum) != 100 if (!test_opt(s, MMI_FS) && fs32_to_cpu(sbi, de->de_checksum) !=
102 qnx6_lfile_checksum(lf->lf_fname, lf_size)) 101 qnx6_lfile_checksum(lf->lf_fname, lf_size))
103 printk(KERN_INFO "qnx6: long filename checksum error.\n"); 102 pr_info("long filename checksum error.\n");
104 103
105 QNX6DEBUG((KERN_INFO "qnx6_readdir:%.*s inode:%u\n", 104 pr_debug("qnx6_readdir:%.*s inode:%u\n",
106 lf_size, lf->lf_fname, de_inode)); 105 lf_size, lf->lf_fname, de_inode);
107 if (!dir_emit(ctx, lf->lf_fname, lf_size, de_inode, DT_UNKNOWN)) { 106 if (!dir_emit(ctx, lf->lf_fname, lf_size, de_inode, DT_UNKNOWN)) {
108 qnx6_put_page(page); 107 qnx6_put_page(page);
109 return 0; 108 return 0;
@@ -136,7 +135,7 @@ static int qnx6_readdir(struct file *file, struct dir_context *ctx)
136 int i = start; 135 int i = start;
137 136
138 if (IS_ERR(page)) { 137 if (IS_ERR(page)) {
139 printk(KERN_ERR "qnx6_readdir: read failed\n"); 138 pr_err("%s(): read failed\n", __func__);
140 ctx->pos = (n + 1) << PAGE_CACHE_SHIFT; 139 ctx->pos = (n + 1) << PAGE_CACHE_SHIFT;
141 return PTR_ERR(page); 140 return PTR_ERR(page);
142 } 141 }
@@ -159,9 +158,9 @@ static int qnx6_readdir(struct file *file, struct dir_context *ctx)
159 break; 158 break;
160 } 159 }
161 } else { 160 } else {
162 QNX6DEBUG((KERN_INFO "qnx6_readdir:%.*s" 161 pr_debug("%s():%.*s inode:%u\n",
163 " inode:%u\n", size, de->de_fname, 162 __func__, size, de->de_fname,
164 no_inode)); 163 no_inode);
165 if (!dir_emit(ctx, de->de_fname, size, 164 if (!dir_emit(ctx, de->de_fname, size,
166 no_inode, DT_UNKNOWN)) { 165 no_inode, DT_UNKNOWN)) {
167 done = true; 166 done = true;
@@ -259,8 +258,7 @@ unsigned qnx6_find_entry(int len, struct inode *dir, const char *name,
259 if (ino) 258 if (ino)
260 goto found; 259 goto found;
261 } else 260 } else
262 printk(KERN_ERR "qnx6: undefined " 261 pr_err("undefined filename size in inode.\n");
263 "filename size in inode.\n");
264 } 262 }
265 qnx6_put_page(page); 263 qnx6_put_page(page);
266 } 264 }
diff --git a/fs/qnx6/inode.c b/fs/qnx6/inode.c
index 65cdaab3ed49..44e73923670d 100644
--- a/fs/qnx6/inode.c
+++ b/fs/qnx6/inode.c
@@ -73,8 +73,8 @@ static int qnx6_get_block(struct inode *inode, sector_t iblock,
73{ 73{
74 unsigned phys; 74 unsigned phys;
75 75
76 QNX6DEBUG((KERN_INFO "qnx6: qnx6_get_block inode=[%ld] iblock=[%ld]\n", 76 pr_debug("qnx6_get_block inode=[%ld] iblock=[%ld]\n",
77 inode->i_ino, (unsigned long)iblock)); 77 inode->i_ino, (unsigned long)iblock);
78 78
79 phys = qnx6_block_map(inode, iblock); 79 phys = qnx6_block_map(inode, iblock);
80 if (phys) { 80 if (phys) {
@@ -87,7 +87,7 @@ static int qnx6_get_block(struct inode *inode, sector_t iblock,
87static int qnx6_check_blockptr(__fs32 ptr) 87static int qnx6_check_blockptr(__fs32 ptr)
88{ 88{
89 if (ptr == ~(__fs32)0) { 89 if (ptr == ~(__fs32)0) {
90 printk(KERN_ERR "qnx6: hit unused blockpointer.\n"); 90 pr_err("hit unused blockpointer.\n");
91 return 0; 91 return 0;
92 } 92 }
93 return 1; 93 return 1;
@@ -127,8 +127,7 @@ static unsigned qnx6_block_map(struct inode *inode, unsigned no)
127 levelptr = no >> bitdelta; 127 levelptr = no >> bitdelta;
128 128
129 if (levelptr > QNX6_NO_DIRECT_POINTERS - 1) { 129 if (levelptr > QNX6_NO_DIRECT_POINTERS - 1) {
130 printk(KERN_ERR "qnx6:Requested file block number (%u) too big.", 130 pr_err("Requested file block number (%u) too big.", no);
131 no);
132 return 0; 131 return 0;
133 } 132 }
134 133
@@ -137,8 +136,7 @@ static unsigned qnx6_block_map(struct inode *inode, unsigned no)
137 for (i = 0; i < depth; i++) { 136 for (i = 0; i < depth; i++) {
138 bh = sb_bread(s, block); 137 bh = sb_bread(s, block);
139 if (!bh) { 138 if (!bh) {
140 printk(KERN_ERR "qnx6:Error reading block (%u)\n", 139 pr_err("Error reading block (%u)\n", block);
141 block);
142 return 0; 140 return 0;
143 } 141 }
144 bitdelta -= ptrbits; 142 bitdelta -= ptrbits;
@@ -207,26 +205,16 @@ void qnx6_superblock_debug(struct qnx6_super_block *sb, struct super_block *s)
207{ 205{
208 struct qnx6_sb_info *sbi = QNX6_SB(s); 206 struct qnx6_sb_info *sbi = QNX6_SB(s);
209 207
210 QNX6DEBUG((KERN_INFO "magic: %08x\n", 208 pr_debug("magic: %08x\n", fs32_to_cpu(sbi, sb->sb_magic));
211 fs32_to_cpu(sbi, sb->sb_magic))); 209 pr_debug("checksum: %08x\n", fs32_to_cpu(sbi, sb->sb_checksum));
212 QNX6DEBUG((KERN_INFO "checksum: %08x\n", 210 pr_debug("serial: %llx\n", fs64_to_cpu(sbi, sb->sb_serial));
213 fs32_to_cpu(sbi, sb->sb_checksum))); 211 pr_debug("flags: %08x\n", fs32_to_cpu(sbi, sb->sb_flags));
214 QNX6DEBUG((KERN_INFO "serial: %llx\n", 212 pr_debug("blocksize: %08x\n", fs32_to_cpu(sbi, sb->sb_blocksize));
215 fs64_to_cpu(sbi, sb->sb_serial))); 213 pr_debug("num_inodes: %08x\n", fs32_to_cpu(sbi, sb->sb_num_inodes));
216 QNX6DEBUG((KERN_INFO "flags: %08x\n", 214 pr_debug("free_inodes: %08x\n", fs32_to_cpu(sbi, sb->sb_free_inodes));
217 fs32_to_cpu(sbi, sb->sb_flags))); 215 pr_debug("num_blocks: %08x\n", fs32_to_cpu(sbi, sb->sb_num_blocks));
218 QNX6DEBUG((KERN_INFO "blocksize: %08x\n", 216 pr_debug("free_blocks: %08x\n", fs32_to_cpu(sbi, sb->sb_free_blocks));
219 fs32_to_cpu(sbi, sb->sb_blocksize))); 217 pr_debug("inode_levels: %02x\n", sb->Inode.levels);
220 QNX6DEBUG((KERN_INFO "num_inodes: %08x\n",
221 fs32_to_cpu(sbi, sb->sb_num_inodes)));
222 QNX6DEBUG((KERN_INFO "free_inodes: %08x\n",
223 fs32_to_cpu(sbi, sb->sb_free_inodes)));
224 QNX6DEBUG((KERN_INFO "num_blocks: %08x\n",
225 fs32_to_cpu(sbi, sb->sb_num_blocks)));
226 QNX6DEBUG((KERN_INFO "free_blocks: %08x\n",
227 fs32_to_cpu(sbi, sb->sb_free_blocks)));
228 QNX6DEBUG((KERN_INFO "inode_levels: %02x\n",
229 sb->Inode.levels));
230} 218}
231#endif 219#endif
232 220
@@ -277,7 +265,7 @@ static struct buffer_head *qnx6_check_first_superblock(struct super_block *s,
277 start with the first superblock */ 265 start with the first superblock */
278 bh = sb_bread(s, offset); 266 bh = sb_bread(s, offset);
279 if (!bh) { 267 if (!bh) {
280 printk(KERN_ERR "qnx6: unable to read the first superblock\n"); 268 pr_err("unable to read the first superblock\n");
281 return NULL; 269 return NULL;
282 } 270 }
283 sb = (struct qnx6_super_block *)bh->b_data; 271 sb = (struct qnx6_super_block *)bh->b_data;
@@ -285,20 +273,16 @@ static struct buffer_head *qnx6_check_first_superblock(struct super_block *s,
285 sbi->s_bytesex = BYTESEX_BE; 273 sbi->s_bytesex = BYTESEX_BE;
286 if (fs32_to_cpu(sbi, sb->sb_magic) == QNX6_SUPER_MAGIC) { 274 if (fs32_to_cpu(sbi, sb->sb_magic) == QNX6_SUPER_MAGIC) {
287 /* we got a big endian fs */ 275 /* we got a big endian fs */
288 QNX6DEBUG((KERN_INFO "qnx6: fs got different" 276 pr_debug("fs got different endianness.\n");
289 " endianness.\n"));
290 return bh; 277 return bh;
291 } else 278 } else
292 sbi->s_bytesex = BYTESEX_LE; 279 sbi->s_bytesex = BYTESEX_LE;
293 if (!silent) { 280 if (!silent) {
294 if (offset == 0) { 281 if (offset == 0) {
295 printk(KERN_ERR "qnx6: wrong signature (magic)" 282 pr_err("wrong signature (magic) in superblock #1.\n");
296 " in superblock #1.\n");
297 } else { 283 } else {
298 printk(KERN_INFO "qnx6: wrong signature (magic)" 284 pr_info("wrong signature (magic) at position (0x%lx) - will try alternative position (0x0000).\n",
299 " at position (0x%lx) - will try" 285 offset * s->s_blocksize);
300 " alternative position (0x0000).\n",
301 offset * s->s_blocksize);
302 } 286 }
303 } 287 }
304 brelse(bh); 288 brelse(bh);
@@ -329,13 +313,13 @@ static int qnx6_fill_super(struct super_block *s, void *data, int silent)
329 313
330 /* Superblock always is 512 Byte long */ 314 /* Superblock always is 512 Byte long */
331 if (!sb_set_blocksize(s, QNX6_SUPERBLOCK_SIZE)) { 315 if (!sb_set_blocksize(s, QNX6_SUPERBLOCK_SIZE)) {
332 printk(KERN_ERR "qnx6: unable to set blocksize\n"); 316 pr_err("unable to set blocksize\n");
333 goto outnobh; 317 goto outnobh;
334 } 318 }
335 319
336 /* parse the mount-options */ 320 /* parse the mount-options */
337 if (!qnx6_parse_options((char *) data, s)) { 321 if (!qnx6_parse_options((char *) data, s)) {
338 printk(KERN_ERR "qnx6: invalid mount options.\n"); 322 pr_err("invalid mount options.\n");
339 goto outnobh; 323 goto outnobh;
340 } 324 }
341 if (test_opt(s, MMI_FS)) { 325 if (test_opt(s, MMI_FS)) {
@@ -355,7 +339,7 @@ static int qnx6_fill_super(struct super_block *s, void *data, int silent)
355 /* try again without bootblock offset */ 339 /* try again without bootblock offset */
356 bh1 = qnx6_check_first_superblock(s, 0, silent); 340 bh1 = qnx6_check_first_superblock(s, 0, silent);
357 if (!bh1) { 341 if (!bh1) {
358 printk(KERN_ERR "qnx6: unable to read the first superblock\n"); 342 pr_err("unable to read the first superblock\n");
359 goto outnobh; 343 goto outnobh;
360 } 344 }
361 /* seems that no bootblock at partition start */ 345 /* seems that no bootblock at partition start */
@@ -370,13 +354,13 @@ static int qnx6_fill_super(struct super_block *s, void *data, int silent)
370 /* checksum check - start at byte 8 and end at byte 512 */ 354 /* checksum check - start at byte 8 and end at byte 512 */
371 if (fs32_to_cpu(sbi, sb1->sb_checksum) != 355 if (fs32_to_cpu(sbi, sb1->sb_checksum) !=
372 crc32_be(0, (char *)(bh1->b_data + 8), 504)) { 356 crc32_be(0, (char *)(bh1->b_data + 8), 504)) {
373 printk(KERN_ERR "qnx6: superblock #1 checksum error\n"); 357 pr_err("superblock #1 checksum error\n");
374 goto out; 358 goto out;
375 } 359 }
376 360
377 /* set new blocksize */ 361 /* set new blocksize */
378 if (!sb_set_blocksize(s, fs32_to_cpu(sbi, sb1->sb_blocksize))) { 362 if (!sb_set_blocksize(s, fs32_to_cpu(sbi, sb1->sb_blocksize))) {
379 printk(KERN_ERR "qnx6: unable to set blocksize\n"); 363 pr_err("unable to set blocksize\n");
380 goto out; 364 goto out;
381 } 365 }
382 /* blocksize invalidates bh - pull it back in */ 366 /* blocksize invalidates bh - pull it back in */
@@ -398,21 +382,20 @@ static int qnx6_fill_super(struct super_block *s, void *data, int silent)
398 /* next the second superblock */ 382 /* next the second superblock */
399 bh2 = sb_bread(s, offset); 383 bh2 = sb_bread(s, offset);
400 if (!bh2) { 384 if (!bh2) {
401 printk(KERN_ERR "qnx6: unable to read the second superblock\n"); 385 pr_err("unable to read the second superblock\n");
402 goto out; 386 goto out;
403 } 387 }
404 sb2 = (struct qnx6_super_block *)bh2->b_data; 388 sb2 = (struct qnx6_super_block *)bh2->b_data;
405 if (fs32_to_cpu(sbi, sb2->sb_magic) != QNX6_SUPER_MAGIC) { 389 if (fs32_to_cpu(sbi, sb2->sb_magic) != QNX6_SUPER_MAGIC) {
406 if (!silent) 390 if (!silent)
407 printk(KERN_ERR "qnx6: wrong signature (magic)" 391 pr_err("wrong signature (magic) in superblock #2.\n");
408 " in superblock #2.\n");
409 goto out; 392 goto out;
410 } 393 }
411 394
412 /* checksum check - start at byte 8 and end at byte 512 */ 395 /* checksum check - start at byte 8 and end at byte 512 */
413 if (fs32_to_cpu(sbi, sb2->sb_checksum) != 396 if (fs32_to_cpu(sbi, sb2->sb_checksum) !=
414 crc32_be(0, (char *)(bh2->b_data + 8), 504)) { 397 crc32_be(0, (char *)(bh2->b_data + 8), 504)) {
415 printk(KERN_ERR "qnx6: superblock #2 checksum error\n"); 398 pr_err("superblock #2 checksum error\n");
416 goto out; 399 goto out;
417 } 400 }
418 401
@@ -422,25 +405,24 @@ static int qnx6_fill_super(struct super_block *s, void *data, int silent)
422 sbi->sb_buf = bh1; 405 sbi->sb_buf = bh1;
423 sbi->sb = (struct qnx6_super_block *)bh1->b_data; 406 sbi->sb = (struct qnx6_super_block *)bh1->b_data;
424 brelse(bh2); 407 brelse(bh2);
425 printk(KERN_INFO "qnx6: superblock #1 active\n"); 408 pr_info("superblock #1 active\n");
426 } else { 409 } else {
427 /* superblock #2 active */ 410 /* superblock #2 active */
428 sbi->sb_buf = bh2; 411 sbi->sb_buf = bh2;
429 sbi->sb = (struct qnx6_super_block *)bh2->b_data; 412 sbi->sb = (struct qnx6_super_block *)bh2->b_data;
430 brelse(bh1); 413 brelse(bh1);
431 printk(KERN_INFO "qnx6: superblock #2 active\n"); 414 pr_info("superblock #2 active\n");
432 } 415 }
433mmi_success: 416mmi_success:
434 /* sanity check - limit maximum indirect pointer levels */ 417 /* sanity check - limit maximum indirect pointer levels */
435 if (sb1->Inode.levels > QNX6_PTR_MAX_LEVELS) { 418 if (sb1->Inode.levels > QNX6_PTR_MAX_LEVELS) {
436 printk(KERN_ERR "qnx6: too many inode levels (max %i, sb %i)\n", 419 pr_err("too many inode levels (max %i, sb %i)\n",
437 QNX6_PTR_MAX_LEVELS, sb1->Inode.levels); 420 QNX6_PTR_MAX_LEVELS, sb1->Inode.levels);
438 goto out; 421 goto out;
439 } 422 }
440 if (sb1->Longfile.levels > QNX6_PTR_MAX_LEVELS) { 423 if (sb1->Longfile.levels > QNX6_PTR_MAX_LEVELS) {
441 printk(KERN_ERR "qnx6: too many longfilename levels" 424 pr_err("too many longfilename levels (max %i, sb %i)\n",
442 " (max %i, sb %i)\n", 425 QNX6_PTR_MAX_LEVELS, sb1->Longfile.levels);
443 QNX6_PTR_MAX_LEVELS, sb1->Longfile.levels);
444 goto out; 426 goto out;
445 } 427 }
446 s->s_op = &qnx6_sops; 428 s->s_op = &qnx6_sops;
@@ -460,7 +442,7 @@ mmi_success:
460 /* prefetch root inode */ 442 /* prefetch root inode */
461 root = qnx6_iget(s, QNX6_ROOT_INO); 443 root = qnx6_iget(s, QNX6_ROOT_INO);
462 if (IS_ERR(root)) { 444 if (IS_ERR(root)) {
463 printk(KERN_ERR "qnx6: get inode failed\n"); 445 pr_err("get inode failed\n");
464 ret = PTR_ERR(root); 446 ret = PTR_ERR(root);
465 goto out2; 447 goto out2;
466 } 448 }
@@ -474,7 +456,7 @@ mmi_success:
474 errmsg = qnx6_checkroot(s); 456 errmsg = qnx6_checkroot(s);
475 if (errmsg != NULL) { 457 if (errmsg != NULL) {
476 if (!silent) 458 if (!silent)
477 printk(KERN_ERR "qnx6: %s\n", errmsg); 459 pr_err("%s\n", errmsg);
478 goto out3; 460 goto out3;
479 } 461 }
480 return 0; 462 return 0;
@@ -555,8 +537,7 @@ struct inode *qnx6_iget(struct super_block *sb, unsigned ino)
555 inode->i_mode = 0; 537 inode->i_mode = 0;
556 538
557 if (ino == 0) { 539 if (ino == 0) {
558 printk(KERN_ERR "qnx6: bad inode number on dev %s: %u is " 540 pr_err("bad inode number on dev %s: %u is out of range\n",
559 "out of range\n",
560 sb->s_id, ino); 541 sb->s_id, ino);
561 iget_failed(inode); 542 iget_failed(inode);
562 return ERR_PTR(-EIO); 543 return ERR_PTR(-EIO);
@@ -566,8 +547,8 @@ struct inode *qnx6_iget(struct super_block *sb, unsigned ino)
566 mapping = sbi->inodes->i_mapping; 547 mapping = sbi->inodes->i_mapping;
567 page = read_mapping_page(mapping, n, NULL); 548 page = read_mapping_page(mapping, n, NULL);
568 if (IS_ERR(page)) { 549 if (IS_ERR(page)) {
569 printk(KERN_ERR "qnx6: major problem: unable to read inode from " 550 pr_err("major problem: unable to read inode from dev %s\n",
570 "dev %s\n", sb->s_id); 551 sb->s_id);
571 iget_failed(inode); 552 iget_failed(inode);
572 return ERR_CAST(page); 553 return ERR_CAST(page);
573 } 554 }
@@ -689,7 +670,7 @@ static int __init init_qnx6_fs(void)
689 return err; 670 return err;
690 } 671 }
691 672
692 printk(KERN_INFO "QNX6 filesystem 1.0.0 registered.\n"); 673 pr_info("QNX6 filesystem 1.0.0 registered.\n");
693 return 0; 674 return 0;
694} 675}
695 676
diff --git a/fs/qnx6/namei.c b/fs/qnx6/namei.c
index 0561326a94f5..6c1a323137dd 100644
--- a/fs/qnx6/namei.c
+++ b/fs/qnx6/namei.c
@@ -29,12 +29,12 @@ struct dentry *qnx6_lookup(struct inode *dir, struct dentry *dentry,
29 foundinode = qnx6_iget(dir->i_sb, ino); 29 foundinode = qnx6_iget(dir->i_sb, ino);
30 qnx6_put_page(page); 30 qnx6_put_page(page);
31 if (IS_ERR(foundinode)) { 31 if (IS_ERR(foundinode)) {
32 QNX6DEBUG((KERN_ERR "qnx6: lookup->iget -> " 32 pr_debug("lookup->iget -> error %ld\n",
33 " error %ld\n", PTR_ERR(foundinode))); 33 PTR_ERR(foundinode));
34 return ERR_CAST(foundinode); 34 return ERR_CAST(foundinode);
35 } 35 }
36 } else { 36 } else {
37 QNX6DEBUG((KERN_INFO "qnx6_lookup: not found %s\n", name)); 37 pr_debug("%s(): not found %s\n", __func__, name);
38 return NULL; 38 return NULL;
39 } 39 }
40 d_add(dentry, foundinode); 40 d_add(dentry, foundinode);
diff --git a/fs/qnx6/qnx6.h b/fs/qnx6/qnx6.h
index b00fcc960d37..d3fb2b698800 100644
--- a/fs/qnx6/qnx6.h
+++ b/fs/qnx6/qnx6.h
@@ -10,6 +10,12 @@
10 * 10 *
11 */ 11 */
12 12
13#ifdef pr_fmt
14#undef pr_fmt
15#endif
16
17#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
18
13#include <linux/fs.h> 19#include <linux/fs.h>
14#include <linux/pagemap.h> 20#include <linux/pagemap.h>
15 21
@@ -19,12 +25,6 @@ typedef __u64 __bitwise __fs64;
19 25
20#include <linux/qnx6_fs.h> 26#include <linux/qnx6_fs.h>
21 27
22#ifdef CONFIG_QNX6FS_DEBUG
23#define QNX6DEBUG(X) printk X
24#else
25#define QNX6DEBUG(X) (void) 0
26#endif
27
28struct qnx6_sb_info { 28struct qnx6_sb_info {
29 struct buffer_head *sb_buf; /* superblock buffer */ 29 struct buffer_head *sb_buf; /* superblock buffer */
30 struct qnx6_super_block *sb; /* our superblock */ 30 struct qnx6_super_block *sb; /* our superblock */
diff --git a/fs/qnx6/super_mmi.c b/fs/qnx6/super_mmi.c
index 29c32cba62d6..62aaf3e3126a 100644
--- a/fs/qnx6/super_mmi.c
+++ b/fs/qnx6/super_mmi.c
@@ -44,15 +44,14 @@ struct qnx6_super_block *qnx6_mmi_fill_super(struct super_block *s, int silent)
44 start with the first superblock */ 44 start with the first superblock */
45 bh1 = sb_bread(s, 0); 45 bh1 = sb_bread(s, 0);
46 if (!bh1) { 46 if (!bh1) {
47 printk(KERN_ERR "qnx6: Unable to read first mmi superblock\n"); 47 pr_err("Unable to read first mmi superblock\n");
48 return NULL; 48 return NULL;
49 } 49 }
50 sb1 = (struct qnx6_mmi_super_block *)bh1->b_data; 50 sb1 = (struct qnx6_mmi_super_block *)bh1->b_data;
51 sbi = QNX6_SB(s); 51 sbi = QNX6_SB(s);
52 if (fs32_to_cpu(sbi, sb1->sb_magic) != QNX6_SUPER_MAGIC) { 52 if (fs32_to_cpu(sbi, sb1->sb_magic) != QNX6_SUPER_MAGIC) {
53 if (!silent) { 53 if (!silent) {
54 printk(KERN_ERR "qnx6: wrong signature (magic) in" 54 pr_err("wrong signature (magic) in superblock #1.\n");
55 " superblock #1.\n");
56 goto out; 55 goto out;
57 } 56 }
58 } 57 }
@@ -60,7 +59,7 @@ struct qnx6_super_block *qnx6_mmi_fill_super(struct super_block *s, int silent)
60 /* checksum check - start at byte 8 and end at byte 512 */ 59 /* checksum check - start at byte 8 and end at byte 512 */
61 if (fs32_to_cpu(sbi, sb1->sb_checksum) != 60 if (fs32_to_cpu(sbi, sb1->sb_checksum) !=
62 crc32_be(0, (char *)(bh1->b_data + 8), 504)) { 61 crc32_be(0, (char *)(bh1->b_data + 8), 504)) {
63 printk(KERN_ERR "qnx6: superblock #1 checksum error\n"); 62 pr_err("superblock #1 checksum error\n");
64 goto out; 63 goto out;
65 } 64 }
66 65
@@ -70,7 +69,7 @@ struct qnx6_super_block *qnx6_mmi_fill_super(struct super_block *s, int silent)
70 69
71 /* set new blocksize */ 70 /* set new blocksize */
72 if (!sb_set_blocksize(s, fs32_to_cpu(sbi, sb1->sb_blocksize))) { 71 if (!sb_set_blocksize(s, fs32_to_cpu(sbi, sb1->sb_blocksize))) {
73 printk(KERN_ERR "qnx6: unable to set blocksize\n"); 72 pr_err("unable to set blocksize\n");
74 goto out; 73 goto out;
75 } 74 }
76 /* blocksize invalidates bh - pull it back in */ 75 /* blocksize invalidates bh - pull it back in */
@@ -83,27 +82,26 @@ struct qnx6_super_block *qnx6_mmi_fill_super(struct super_block *s, int silent)
83 /* read second superblock */ 82 /* read second superblock */
84 bh2 = sb_bread(s, offset); 83 bh2 = sb_bread(s, offset);
85 if (!bh2) { 84 if (!bh2) {
86 printk(KERN_ERR "qnx6: unable to read the second superblock\n"); 85 pr_err("unable to read the second superblock\n");
87 goto out; 86 goto out;
88 } 87 }
89 sb2 = (struct qnx6_mmi_super_block *)bh2->b_data; 88 sb2 = (struct qnx6_mmi_super_block *)bh2->b_data;
90 if (fs32_to_cpu(sbi, sb2->sb_magic) != QNX6_SUPER_MAGIC) { 89 if (fs32_to_cpu(sbi, sb2->sb_magic) != QNX6_SUPER_MAGIC) {
91 if (!silent) 90 if (!silent)
92 printk(KERN_ERR "qnx6: wrong signature (magic) in" 91 pr_err("wrong signature (magic) in superblock #2.\n");
93 " superblock #2.\n");
94 goto out; 92 goto out;
95 } 93 }
96 94
97 /* checksum check - start at byte 8 and end at byte 512 */ 95 /* checksum check - start at byte 8 and end at byte 512 */
98 if (fs32_to_cpu(sbi, sb2->sb_checksum) 96 if (fs32_to_cpu(sbi, sb2->sb_checksum)
99 != crc32_be(0, (char *)(bh2->b_data + 8), 504)) { 97 != crc32_be(0, (char *)(bh2->b_data + 8), 504)) {
100 printk(KERN_ERR "qnx6: superblock #1 checksum error\n"); 98 pr_err("superblock #1 checksum error\n");
101 goto out; 99 goto out;
102 } 100 }
103 101
104 qsb = kmalloc(sizeof(*qsb), GFP_KERNEL); 102 qsb = kmalloc(sizeof(*qsb), GFP_KERNEL);
105 if (!qsb) { 103 if (!qsb) {
106 printk(KERN_ERR "qnx6: unable to allocate memory.\n"); 104 pr_err("unable to allocate memory.\n");
107 goto out; 105 goto out;
108 } 106 }
109 107
@@ -119,7 +117,7 @@ struct qnx6_super_block *qnx6_mmi_fill_super(struct super_block *s, int silent)
119 sbi->sb_buf = bh1; 117 sbi->sb_buf = bh1;
120 sbi->sb = (struct qnx6_super_block *)bh1->b_data; 118 sbi->sb = (struct qnx6_super_block *)bh1->b_data;
121 brelse(bh2); 119 brelse(bh2);
122 printk(KERN_INFO "qnx6: superblock #1 active\n"); 120 pr_info("superblock #1 active\n");
123 } else { 121 } else {
124 /* superblock #2 active */ 122 /* superblock #2 active */
125 qnx6_mmi_copy_sb(qsb, sb2); 123 qnx6_mmi_copy_sb(qsb, sb2);
@@ -131,7 +129,7 @@ struct qnx6_super_block *qnx6_mmi_fill_super(struct super_block *s, int silent)
131 sbi->sb_buf = bh2; 129 sbi->sb_buf = bh2;
132 sbi->sb = (struct qnx6_super_block *)bh2->b_data; 130 sbi->sb = (struct qnx6_super_block *)bh2->b_data;
133 brelse(bh1); 131 brelse(bh1);
134 printk(KERN_INFO "qnx6: superblock #2 active\n"); 132 pr_info("superblock #2 active\n");
135 } 133 }
136 kfree(qsb); 134 kfree(qsb);
137 135
diff --git a/fs/ramfs/file-nommu.c b/fs/ramfs/file-nommu.c
index dda012ad4208..bbafbde3471a 100644
--- a/fs/ramfs/file-nommu.c
+++ b/fs/ramfs/file-nommu.c
@@ -222,7 +222,7 @@ static unsigned long ramfs_nommu_get_unmapped_area(struct file *file,
222 222
223 /* gang-find the pages */ 223 /* gang-find the pages */
224 ret = -ENOMEM; 224 ret = -ENOMEM;
225 pages = kzalloc(lpages * sizeof(struct page *), GFP_KERNEL); 225 pages = kcalloc(lpages, sizeof(struct page *), GFP_KERNEL);
226 if (!pages) 226 if (!pages)
227 goto out_free; 227 goto out_free;
228 228
diff --git a/fs/reiserfs/dir.c b/fs/reiserfs/dir.c
index d9f5a60dd59b..0a7dc941aaf4 100644
--- a/fs/reiserfs/dir.c
+++ b/fs/reiserfs/dir.c
@@ -9,7 +9,7 @@
9#include <linux/stat.h> 9#include <linux/stat.h>
10#include <linux/buffer_head.h> 10#include <linux/buffer_head.h>
11#include <linux/slab.h> 11#include <linux/slab.h>
12#include <asm/uaccess.h> 12#include <linux/uaccess.h>
13 13
14extern const struct reiserfs_key MIN_KEY; 14extern const struct reiserfs_key MIN_KEY;
15 15
diff --git a/fs/reiserfs/do_balan.c b/fs/reiserfs/do_balan.c
index 54fdf196bfb2..5739cb99de7b 100644
--- a/fs/reiserfs/do_balan.c
+++ b/fs/reiserfs/do_balan.c
@@ -10,7 +10,7 @@
10 * and using buffers obtained after all above. 10 * and using buffers obtained after all above.
11 */ 11 */
12 12
13#include <asm/uaccess.h> 13#include <linux/uaccess.h>
14#include <linux/time.h> 14#include <linux/time.h>
15#include "reiserfs.h" 15#include "reiserfs.h"
16#include <linux/buffer_head.h> 16#include <linux/buffer_head.h>
diff --git a/fs/reiserfs/file.c b/fs/reiserfs/file.c
index db9e80ba53a0..751dd3f4346b 100644
--- a/fs/reiserfs/file.c
+++ b/fs/reiserfs/file.c
@@ -6,7 +6,7 @@
6#include "reiserfs.h" 6#include "reiserfs.h"
7#include "acl.h" 7#include "acl.h"
8#include "xattr.h" 8#include "xattr.h"
9#include <asm/uaccess.h> 9#include <linux/uaccess.h>
10#include <linux/pagemap.h> 10#include <linux/pagemap.h>
11#include <linux/swap.h> 11#include <linux/swap.h>
12#include <linux/writeback.h> 12#include <linux/writeback.h>
diff --git a/fs/reiserfs/ibalance.c b/fs/reiserfs/ibalance.c
index 73231b1ebdbe..b751eea32e20 100644
--- a/fs/reiserfs/ibalance.c
+++ b/fs/reiserfs/ibalance.c
@@ -2,7 +2,7 @@
2 * Copyright 2000 by Hans Reiser, licensing governed by reiserfs/README 2 * Copyright 2000 by Hans Reiser, licensing governed by reiserfs/README
3 */ 3 */
4 4
5#include <asm/uaccess.h> 5#include <linux/uaccess.h>
6#include <linux/string.h> 6#include <linux/string.h>
7#include <linux/time.h> 7#include <linux/time.h>
8#include "reiserfs.h" 8#include "reiserfs.h"
diff --git a/fs/reiserfs/inode.c b/fs/reiserfs/inode.c
index 63b2b0ec49e6..a7eec9888f10 100644
--- a/fs/reiserfs/inode.c
+++ b/fs/reiserfs/inode.c
@@ -11,7 +11,7 @@
11#include <linux/pagemap.h> 11#include <linux/pagemap.h>
12#include <linux/highmem.h> 12#include <linux/highmem.h>
13#include <linux/slab.h> 13#include <linux/slab.h>
14#include <asm/uaccess.h> 14#include <linux/uaccess.h>
15#include <asm/unaligned.h> 15#include <asm/unaligned.h>
16#include <linux/buffer_head.h> 16#include <linux/buffer_head.h>
17#include <linux/mpage.h> 17#include <linux/mpage.h>
diff --git a/fs/reiserfs/ioctl.c b/fs/reiserfs/ioctl.c
index 501ed6811a2b..6ec8a30a0911 100644
--- a/fs/reiserfs/ioctl.c
+++ b/fs/reiserfs/ioctl.c
@@ -7,7 +7,7 @@
7#include <linux/mount.h> 7#include <linux/mount.h>
8#include "reiserfs.h" 8#include "reiserfs.h"
9#include <linux/time.h> 9#include <linux/time.h>
10#include <asm/uaccess.h> 10#include <linux/uaccess.h>
11#include <linux/pagemap.h> 11#include <linux/pagemap.h>
12#include <linux/compat.h> 12#include <linux/compat.h>
13 13
diff --git a/fs/reiserfs/item_ops.c b/fs/reiserfs/item_ops.c
index cfaee912ee09..aca73dd73906 100644
--- a/fs/reiserfs/item_ops.c
+++ b/fs/reiserfs/item_ops.c
@@ -54,7 +54,7 @@ static void sd_print_item(struct item_head *ih, char *item)
54 } else { 54 } else {
55 struct stat_data *sd = (struct stat_data *)item; 55 struct stat_data *sd = (struct stat_data *)item;
56 56
57 printk("\t0%-6o | %6Lu | %2u | %d | %s\n", sd_v2_mode(sd), 57 printk("\t0%-6o | %6llu | %2u | %d | %s\n", sd_v2_mode(sd),
58 (unsigned long long)sd_v2_size(sd), sd_v2_nlink(sd), 58 (unsigned long long)sd_v2_size(sd), sd_v2_nlink(sd),
59 sd_v2_rdev(sd), print_time(sd_v2_mtime(sd))); 59 sd_v2_rdev(sd), print_time(sd_v2_mtime(sd)));
60 } 60 }
@@ -408,7 +408,7 @@ static void direntry_print_item(struct item_head *ih, char *item)
408 namebuf[namelen + 2] = 0; 408 namebuf[namelen + 2] = 0;
409 } 409 }
410 410
411 printk("%d: %-15s%-15d%-15d%-15Ld%-15Ld(%s)\n", 411 printk("%d: %-15s%-15d%-15d%-15lld%-15lld(%s)\n",
412 i, namebuf, 412 i, namebuf,
413 deh_dir_id(deh), deh_objectid(deh), 413 deh_dir_id(deh), deh_objectid(deh),
414 GET_HASH_VALUE(deh_offset(deh)), 414 GET_HASH_VALUE(deh_offset(deh)),
diff --git a/fs/reiserfs/lbalance.c b/fs/reiserfs/lbalance.c
index d6744c8b24e1..814dda3ec998 100644
--- a/fs/reiserfs/lbalance.c
+++ b/fs/reiserfs/lbalance.c
@@ -2,7 +2,7 @@
2 * Copyright 2000 by Hans Reiser, licensing governed by reiserfs/README 2 * Copyright 2000 by Hans Reiser, licensing governed by reiserfs/README
3 */ 3 */
4 4
5#include <asm/uaccess.h> 5#include <linux/uaccess.h>
6#include <linux/string.h> 6#include <linux/string.h>
7#include <linux/time.h> 7#include <linux/time.h>
8#include "reiserfs.h" 8#include "reiserfs.h"
diff --git a/fs/reiserfs/prints.c b/fs/reiserfs/prints.c
index c9b47e91baf8..ae1dc841db3a 100644
--- a/fs/reiserfs/prints.c
+++ b/fs/reiserfs/prints.c
@@ -17,7 +17,7 @@ static char off_buf[80];
17static char *reiserfs_cpu_offset(struct cpu_key *key) 17static char *reiserfs_cpu_offset(struct cpu_key *key)
18{ 18{
19 if (cpu_key_k_type(key) == TYPE_DIRENTRY) 19 if (cpu_key_k_type(key) == TYPE_DIRENTRY)
20 sprintf(off_buf, "%Lu(%Lu)", 20 sprintf(off_buf, "%llu(%llu)",
21 (unsigned long long) 21 (unsigned long long)
22 GET_HASH_VALUE(cpu_key_k_offset(key)), 22 GET_HASH_VALUE(cpu_key_k_offset(key)),
23 (unsigned long long) 23 (unsigned long long)
@@ -34,7 +34,7 @@ static char *le_offset(struct reiserfs_key *key)
34 34
35 version = le_key_version(key); 35 version = le_key_version(key);
36 if (le_key_k_type(version, key) == TYPE_DIRENTRY) 36 if (le_key_k_type(version, key) == TYPE_DIRENTRY)
37 sprintf(off_buf, "%Lu(%Lu)", 37 sprintf(off_buf, "%llu(%llu)",
38 (unsigned long long) 38 (unsigned long long)
39 GET_HASH_VALUE(le_key_k_offset(version, key)), 39 GET_HASH_VALUE(le_key_k_offset(version, key)),
40 (unsigned long long) 40 (unsigned long long)
diff --git a/fs/reiserfs/procfs.c b/fs/reiserfs/procfs.c
index 02b0b7d0f7d5..621b9f381fe1 100644
--- a/fs/reiserfs/procfs.c
+++ b/fs/reiserfs/procfs.c
@@ -11,7 +11,7 @@
11#include <linux/module.h> 11#include <linux/module.h>
12#include <linux/time.h> 12#include <linux/time.h>
13#include <linux/seq_file.h> 13#include <linux/seq_file.h>
14#include <asm/uaccess.h> 14#include <linux/uaccess.h>
15#include "reiserfs.h" 15#include "reiserfs.h"
16#include <linux/init.h> 16#include <linux/init.h>
17#include <linux/proc_fs.h> 17#include <linux/proc_fs.h>
diff --git a/fs/reiserfs/stree.c b/fs/reiserfs/stree.c
index dd44468edc2b..24cbe013240f 100644
--- a/fs/reiserfs/stree.c
+++ b/fs/reiserfs/stree.c
@@ -2006,7 +2006,7 @@ int reiserfs_do_truncate(struct reiserfs_transaction_handle *th,
2006 &s_search_path) == POSITION_FOUND); 2006 &s_search_path) == POSITION_FOUND);
2007 2007
2008 RFALSE(file_size > ROUND_UP(new_file_size), 2008 RFALSE(file_size > ROUND_UP(new_file_size),
2009 "PAP-5680: truncate did not finish: new_file_size %Ld, current %Ld, oid %d", 2009 "PAP-5680: truncate did not finish: new_file_size %lld, current %lld, oid %d",
2010 new_file_size, file_size, s_item_key.on_disk_key.k_objectid); 2010 new_file_size, file_size, s_item_key.on_disk_key.k_objectid);
2011 2011
2012update_and_out: 2012update_and_out:
diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c
index a392cef6acc6..709ea92d716f 100644
--- a/fs/reiserfs/super.c
+++ b/fs/reiserfs/super.c
@@ -15,7 +15,7 @@
15#include <linux/slab.h> 15#include <linux/slab.h>
16#include <linux/vmalloc.h> 16#include <linux/vmalloc.h>
17#include <linux/time.h> 17#include <linux/time.h>
18#include <asm/uaccess.h> 18#include <linux/uaccess.h>
19#include "reiserfs.h" 19#include "reiserfs.h"
20#include "acl.h" 20#include "acl.h"
21#include "xattr.h" 21#include "xattr.h"
@@ -331,7 +331,7 @@ static int finish_unfinished(struct super_block *s)
331 * not completed truncate found. New size was 331 * not completed truncate found. New size was
332 * committed together with "save" link 332 * committed together with "save" link
333 */ 333 */
334 reiserfs_info(s, "Truncating %k to %Ld ..", 334 reiserfs_info(s, "Truncating %k to %lld ..",
335 INODE_PKEY(inode), inode->i_size); 335 INODE_PKEY(inode), inode->i_size);
336 336
337 /* don't update modification time */ 337 /* don't update modification time */
@@ -1577,7 +1577,7 @@ static int read_super_block(struct super_block *s, int offset)
1577 rs = (struct reiserfs_super_block *)bh->b_data; 1577 rs = (struct reiserfs_super_block *)bh->b_data;
1578 if (sb_blocksize(rs) != s->s_blocksize) { 1578 if (sb_blocksize(rs) != s->s_blocksize) {
1579 reiserfs_warning(s, "sh-2011", "can't find a reiserfs " 1579 reiserfs_warning(s, "sh-2011", "can't find a reiserfs "
1580 "filesystem on (dev %s, block %Lu, size %lu)", 1580 "filesystem on (dev %s, block %llu, size %lu)",
1581 s->s_id, 1581 s->s_id,
1582 (unsigned long long)bh->b_blocknr, 1582 (unsigned long long)bh->b_blocknr,
1583 s->s_blocksize); 1583 s->s_blocksize);
@@ -2441,8 +2441,7 @@ static ssize_t reiserfs_quota_write(struct super_block *sb, int type,
2441 struct buffer_head tmp_bh, *bh; 2441 struct buffer_head tmp_bh, *bh;
2442 2442
2443 if (!current->journal_info) { 2443 if (!current->journal_info) {
2444 printk(KERN_WARNING "reiserfs: Quota write (off=%Lu, len=%Lu)" 2444 printk(KERN_WARNING "reiserfs: Quota write (off=%llu, len=%llu) cancelled because transaction is not started.\n",
2445 " cancelled because transaction is not started.\n",
2446 (unsigned long long)off, (unsigned long long)len); 2445 (unsigned long long)off, (unsigned long long)len);
2447 return -EIO; 2446 return -EIO;
2448 } 2447 }
diff --git a/fs/reiserfs/xattr.c b/fs/reiserfs/xattr.c
index ca416d099e7d..7c36898af402 100644
--- a/fs/reiserfs/xattr.c
+++ b/fs/reiserfs/xattr.c
@@ -45,7 +45,7 @@
45#include <linux/xattr.h> 45#include <linux/xattr.h>
46#include "xattr.h" 46#include "xattr.h"
47#include "acl.h" 47#include "acl.h"
48#include <asm/uaccess.h> 48#include <linux/uaccess.h>
49#include <net/checksum.h> 49#include <net/checksum.h>
50#include <linux/stat.h> 50#include <linux/stat.h>
51#include <linux/quotaops.h> 51#include <linux/quotaops.h>
@@ -84,6 +84,7 @@ static int xattr_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
84static int xattr_unlink(struct inode *dir, struct dentry *dentry) 84static int xattr_unlink(struct inode *dir, struct dentry *dentry)
85{ 85{
86 int error; 86 int error;
87
87 BUG_ON(!mutex_is_locked(&dir->i_mutex)); 88 BUG_ON(!mutex_is_locked(&dir->i_mutex));
88 89
89 mutex_lock_nested(&dentry->d_inode->i_mutex, I_MUTEX_CHILD); 90 mutex_lock_nested(&dentry->d_inode->i_mutex, I_MUTEX_CHILD);
@@ -98,6 +99,7 @@ static int xattr_unlink(struct inode *dir, struct dentry *dentry)
98static int xattr_rmdir(struct inode *dir, struct dentry *dentry) 99static int xattr_rmdir(struct inode *dir, struct dentry *dentry)
99{ 100{
100 int error; 101 int error;
102
101 BUG_ON(!mutex_is_locked(&dir->i_mutex)); 103 BUG_ON(!mutex_is_locked(&dir->i_mutex));
102 104
103 mutex_lock_nested(&dentry->d_inode->i_mutex, I_MUTEX_CHILD); 105 mutex_lock_nested(&dentry->d_inode->i_mutex, I_MUTEX_CHILD);
@@ -117,6 +119,7 @@ static struct dentry *open_xa_root(struct super_block *sb, int flags)
117{ 119{
118 struct dentry *privroot = REISERFS_SB(sb)->priv_root; 120 struct dentry *privroot = REISERFS_SB(sb)->priv_root;
119 struct dentry *xaroot; 121 struct dentry *xaroot;
122
120 if (!privroot->d_inode) 123 if (!privroot->d_inode)
121 return ERR_PTR(-ENODATA); 124 return ERR_PTR(-ENODATA);
122 125
@@ -127,6 +130,7 @@ static struct dentry *open_xa_root(struct super_block *sb, int flags)
127 xaroot = ERR_PTR(-ENODATA); 130 xaroot = ERR_PTR(-ENODATA);
128 else if (!xaroot->d_inode) { 131 else if (!xaroot->d_inode) {
129 int err = -ENODATA; 132 int err = -ENODATA;
133
130 if (xattr_may_create(flags)) 134 if (xattr_may_create(flags))
131 err = xattr_mkdir(privroot->d_inode, xaroot, 0700); 135 err = xattr_mkdir(privroot->d_inode, xaroot, 0700);
132 if (err) { 136 if (err) {
@@ -157,6 +161,7 @@ static struct dentry *open_xa_dir(const struct inode *inode, int flags)
157 xadir = lookup_one_len(namebuf, xaroot, strlen(namebuf)); 161 xadir = lookup_one_len(namebuf, xaroot, strlen(namebuf));
158 if (!IS_ERR(xadir) && !xadir->d_inode) { 162 if (!IS_ERR(xadir) && !xadir->d_inode) {
159 int err = -ENODATA; 163 int err = -ENODATA;
164
160 if (xattr_may_create(flags)) 165 if (xattr_may_create(flags))
161 err = xattr_mkdir(xaroot->d_inode, xadir, 0700); 166 err = xattr_mkdir(xaroot->d_inode, xadir, 0700);
162 if (err) { 167 if (err) {
@@ -188,6 +193,7 @@ fill_with_dentries(void *buf, const char *name, int namelen, loff_t offset,
188{ 193{
189 struct reiserfs_dentry_buf *dbuf = buf; 194 struct reiserfs_dentry_buf *dbuf = buf;
190 struct dentry *dentry; 195 struct dentry *dentry;
196
191 WARN_ON_ONCE(!mutex_is_locked(&dbuf->xadir->d_inode->i_mutex)); 197 WARN_ON_ONCE(!mutex_is_locked(&dbuf->xadir->d_inode->i_mutex));
192 198
193 if (dbuf->count == ARRAY_SIZE(dbuf->dentries)) 199 if (dbuf->count == ARRAY_SIZE(dbuf->dentries))
@@ -218,6 +224,7 @@ static void
218cleanup_dentry_buf(struct reiserfs_dentry_buf *buf) 224cleanup_dentry_buf(struct reiserfs_dentry_buf *buf)
219{ 225{
220 int i; 226 int i;
227
221 for (i = 0; i < buf->count; i++) 228 for (i = 0; i < buf->count; i++)
222 if (buf->dentries[i]) 229 if (buf->dentries[i])
223 dput(buf->dentries[i]); 230 dput(buf->dentries[i]);
@@ -283,11 +290,13 @@ static int reiserfs_for_each_xattr(struct inode *inode,
283 int blocks = JOURNAL_PER_BALANCE_CNT * 2 + 2 + 290 int blocks = JOURNAL_PER_BALANCE_CNT * 2 + 2 +
284 4 * REISERFS_QUOTA_TRANS_BLOCKS(inode->i_sb); 291 4 * REISERFS_QUOTA_TRANS_BLOCKS(inode->i_sb);
285 struct reiserfs_transaction_handle th; 292 struct reiserfs_transaction_handle th;
293
286 reiserfs_write_lock(inode->i_sb); 294 reiserfs_write_lock(inode->i_sb);
287 err = journal_begin(&th, inode->i_sb, blocks); 295 err = journal_begin(&th, inode->i_sb, blocks);
288 reiserfs_write_unlock(inode->i_sb); 296 reiserfs_write_unlock(inode->i_sb);
289 if (!err) { 297 if (!err) {
290 int jerror; 298 int jerror;
299
291 mutex_lock_nested(&dir->d_parent->d_inode->i_mutex, 300 mutex_lock_nested(&dir->d_parent->d_inode->i_mutex,
292 I_MUTEX_XATTR); 301 I_MUTEX_XATTR);
293 err = action(dir, data); 302 err = action(dir, data);
@@ -340,6 +349,7 @@ static int chown_one_xattr(struct dentry *dentry, void *data)
340int reiserfs_delete_xattrs(struct inode *inode) 349int reiserfs_delete_xattrs(struct inode *inode)
341{ 350{
342 int err = reiserfs_for_each_xattr(inode, delete_one_xattr, NULL); 351 int err = reiserfs_for_each_xattr(inode, delete_one_xattr, NULL);
352
343 if (err) 353 if (err)
344 reiserfs_warning(inode->i_sb, "jdm-20004", 354 reiserfs_warning(inode->i_sb, "jdm-20004",
345 "Couldn't delete all xattrs (%d)\n", err); 355 "Couldn't delete all xattrs (%d)\n", err);
@@ -350,6 +360,7 @@ int reiserfs_delete_xattrs(struct inode *inode)
350int reiserfs_chown_xattrs(struct inode *inode, struct iattr *attrs) 360int reiserfs_chown_xattrs(struct inode *inode, struct iattr *attrs)
351{ 361{
352 int err = reiserfs_for_each_xattr(inode, chown_one_xattr, attrs); 362 int err = reiserfs_for_each_xattr(inode, chown_one_xattr, attrs);
363
353 if (err) 364 if (err)
354 reiserfs_warning(inode->i_sb, "jdm-20007", 365 reiserfs_warning(inode->i_sb, "jdm-20007",
355 "Couldn't chown all xattrs (%d)\n", err); 366 "Couldn't chown all xattrs (%d)\n", err);
@@ -439,6 +450,7 @@ int reiserfs_commit_write(struct file *f, struct page *page,
439static void update_ctime(struct inode *inode) 450static void update_ctime(struct inode *inode)
440{ 451{
441 struct timespec now = current_fs_time(inode->i_sb); 452 struct timespec now = current_fs_time(inode->i_sb);
453
442 if (inode_unhashed(inode) || !inode->i_nlink || 454 if (inode_unhashed(inode) || !inode->i_nlink ||
443 timespec_equal(&inode->i_ctime, &now)) 455 timespec_equal(&inode->i_ctime, &now))
444 return; 456 return;
@@ -514,6 +526,7 @@ reiserfs_xattr_set_handle(struct reiserfs_transaction_handle *th,
514 size_t chunk; 526 size_t chunk;
515 size_t skip = 0; 527 size_t skip = 0;
516 size_t page_offset = (file_pos & (PAGE_CACHE_SIZE - 1)); 528 size_t page_offset = (file_pos & (PAGE_CACHE_SIZE - 1));
529
517 if (buffer_size - buffer_pos > PAGE_CACHE_SIZE) 530 if (buffer_size - buffer_pos > PAGE_CACHE_SIZE)
518 chunk = PAGE_CACHE_SIZE; 531 chunk = PAGE_CACHE_SIZE;
519 else 532 else
@@ -530,6 +543,7 @@ reiserfs_xattr_set_handle(struct reiserfs_transaction_handle *th,
530 543
531 if (file_pos == 0) { 544 if (file_pos == 0) {
532 struct reiserfs_xattr_header *rxh; 545 struct reiserfs_xattr_header *rxh;
546
533 skip = file_pos = sizeof(struct reiserfs_xattr_header); 547 skip = file_pos = sizeof(struct reiserfs_xattr_header);
534 if (chunk + skip > PAGE_CACHE_SIZE) 548 if (chunk + skip > PAGE_CACHE_SIZE)
535 chunk = PAGE_CACHE_SIZE - skip; 549 chunk = PAGE_CACHE_SIZE - skip;
@@ -659,6 +673,7 @@ reiserfs_xattr_get(struct inode *inode, const char *name, void *buffer,
659 size_t chunk; 673 size_t chunk;
660 char *data; 674 char *data;
661 size_t skip = 0; 675 size_t skip = 0;
676
662 if (isize - file_pos > PAGE_CACHE_SIZE) 677 if (isize - file_pos > PAGE_CACHE_SIZE)
663 chunk = PAGE_CACHE_SIZE; 678 chunk = PAGE_CACHE_SIZE;
664 else 679 else
@@ -792,6 +807,7 @@ reiserfs_setxattr(struct dentry *dentry, const char *name, const void *value,
792int reiserfs_removexattr(struct dentry *dentry, const char *name) 807int reiserfs_removexattr(struct dentry *dentry, const char *name)
793{ 808{
794 const struct xattr_handler *handler; 809 const struct xattr_handler *handler;
810
795 handler = find_xattr_handler_prefix(dentry->d_sb->s_xattr, name); 811 handler = find_xattr_handler_prefix(dentry->d_sb->s_xattr, name);
796 812
797 if (!handler || get_inode_sd_version(dentry->d_inode) == STAT_DATA_V1) 813 if (!handler || get_inode_sd_version(dentry->d_inode) == STAT_DATA_V1)
@@ -813,9 +829,11 @@ static int listxattr_filler(void *buf, const char *name, int namelen,
813{ 829{
814 struct listxattr_buf *b = (struct listxattr_buf *)buf; 830 struct listxattr_buf *b = (struct listxattr_buf *)buf;
815 size_t size; 831 size_t size;
832
816 if (name[0] != '.' || 833 if (name[0] != '.' ||
817 (namelen != 1 && (name[1] != '.' || namelen != 2))) { 834 (namelen != 1 && (name[1] != '.' || namelen != 2))) {
818 const struct xattr_handler *handler; 835 const struct xattr_handler *handler;
836
819 handler = find_xattr_handler_prefix(b->dentry->d_sb->s_xattr, 837 handler = find_xattr_handler_prefix(b->dentry->d_sb->s_xattr,
820 name); 838 name);
821 if (!handler) /* Unsupported xattr name */ 839 if (!handler) /* Unsupported xattr name */
@@ -885,6 +903,7 @@ static int create_privroot(struct dentry *dentry)
885{ 903{
886 int err; 904 int err;
887 struct inode *inode = dentry->d_parent->d_inode; 905 struct inode *inode = dentry->d_parent->d_inode;
906
888 WARN_ON_ONCE(!mutex_is_locked(&inode->i_mutex)); 907 WARN_ON_ONCE(!mutex_is_locked(&inode->i_mutex));
889 908
890 err = xattr_mkdir(inode, dentry, 0700); 909 err = xattr_mkdir(inode, dentry, 0700);
@@ -1015,6 +1034,7 @@ int reiserfs_xattr_init(struct super_block *s, int mount_flags)
1015 mutex_lock(&privroot->d_inode->i_mutex); 1034 mutex_lock(&privroot->d_inode->i_mutex);
1016 if (!REISERFS_SB(s)->xattr_root) { 1035 if (!REISERFS_SB(s)->xattr_root) {
1017 struct dentry *dentry; 1036 struct dentry *dentry;
1037
1018 dentry = lookup_one_len(XAROOT_NAME, privroot, 1038 dentry = lookup_one_len(XAROOT_NAME, privroot,
1019 strlen(XAROOT_NAME)); 1039 strlen(XAROOT_NAME));
1020 if (!IS_ERR(dentry)) 1040 if (!IS_ERR(dentry))
diff --git a/fs/reiserfs/xattr_acl.c b/fs/reiserfs/xattr_acl.c
index 44503e293790..4b34b9dc03dd 100644
--- a/fs/reiserfs/xattr_acl.c
+++ b/fs/reiserfs/xattr_acl.c
@@ -9,7 +9,7 @@
9#include <linux/posix_acl_xattr.h> 9#include <linux/posix_acl_xattr.h>
10#include "xattr.h" 10#include "xattr.h"
11#include "acl.h" 11#include "acl.h"
12#include <asm/uaccess.h> 12#include <linux/uaccess.h>
13 13
14static int __reiserfs_set_acl(struct reiserfs_transaction_handle *th, 14static int __reiserfs_set_acl(struct reiserfs_transaction_handle *th,
15 struct inode *inode, int type, 15 struct inode *inode, int type,
diff --git a/fs/reiserfs/xattr_security.c b/fs/reiserfs/xattr_security.c
index 800a3cef6f62..e7f8939a4cb5 100644
--- a/fs/reiserfs/xattr_security.c
+++ b/fs/reiserfs/xattr_security.c
@@ -6,7 +6,7 @@
6#include <linux/slab.h> 6#include <linux/slab.h>
7#include "xattr.h" 7#include "xattr.h"
8#include <linux/security.h> 8#include <linux/security.h>
9#include <asm/uaccess.h> 9#include <linux/uaccess.h>
10 10
11static int 11static int
12security_get(struct dentry *dentry, const char *name, void *buffer, size_t size, 12security_get(struct dentry *dentry, const char *name, void *buffer, size_t size,
diff --git a/fs/reiserfs/xattr_trusted.c b/fs/reiserfs/xattr_trusted.c
index a0035719f66b..5eeb0c48ba46 100644
--- a/fs/reiserfs/xattr_trusted.c
+++ b/fs/reiserfs/xattr_trusted.c
@@ -5,7 +5,7 @@
5#include <linux/pagemap.h> 5#include <linux/pagemap.h>
6#include <linux/xattr.h> 6#include <linux/xattr.h>
7#include "xattr.h" 7#include "xattr.h"
8#include <asm/uaccess.h> 8#include <linux/uaccess.h>
9 9
10static int 10static int
11trusted_get(struct dentry *dentry, const char *name, void *buffer, size_t size, 11trusted_get(struct dentry *dentry, const char *name, void *buffer, size_t size,
diff --git a/fs/reiserfs/xattr_user.c b/fs/reiserfs/xattr_user.c
index 8667491ae7c3..e50eab046471 100644
--- a/fs/reiserfs/xattr_user.c
+++ b/fs/reiserfs/xattr_user.c
@@ -4,7 +4,7 @@
4#include <linux/pagemap.h> 4#include <linux/pagemap.h>
5#include <linux/xattr.h> 5#include <linux/xattr.h>
6#include "xattr.h" 6#include "xattr.h"
7#include <asm/uaccess.h> 7#include <linux/uaccess.h>
8 8
9static int 9static int
10user_get(struct dentry *dentry, const char *name, void *buffer, size_t size, 10user_get(struct dentry *dentry, const char *name, void *buffer, size_t size,
diff --git a/fs/romfs/super.c b/fs/romfs/super.c
index ef90e8bca95a..e98dd88197d5 100644
--- a/fs/romfs/super.c
+++ b/fs/romfs/super.c
@@ -56,6 +56,8 @@
56 * 2 of the Licence, or (at your option) any later version. 56 * 2 of the Licence, or (at your option) any later version.
57 */ 57 */
58 58
59#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
60
59#include <linux/module.h> 61#include <linux/module.h>
60#include <linux/string.h> 62#include <linux/string.h>
61#include <linux/fs.h> 63#include <linux/fs.h>
@@ -380,7 +382,7 @@ static struct inode *romfs_iget(struct super_block *sb, unsigned long pos)
380eio: 382eio:
381 ret = -EIO; 383 ret = -EIO;
382error: 384error:
383 printk(KERN_ERR "ROMFS: read error for inode 0x%lx\n", pos); 385 pr_err("read error for inode 0x%lx\n", pos);
384 return ERR_PTR(ret); 386 return ERR_PTR(ret);
385} 387}
386 388
@@ -390,6 +392,7 @@ error:
390static struct inode *romfs_alloc_inode(struct super_block *sb) 392static struct inode *romfs_alloc_inode(struct super_block *sb)
391{ 393{
392 struct romfs_inode_info *inode; 394 struct romfs_inode_info *inode;
395
393 inode = kmem_cache_alloc(romfs_inode_cachep, GFP_KERNEL); 396 inode = kmem_cache_alloc(romfs_inode_cachep, GFP_KERNEL);
394 return inode ? &inode->vfs_inode : NULL; 397 return inode ? &inode->vfs_inode : NULL;
395} 398}
@@ -400,6 +403,7 @@ static struct inode *romfs_alloc_inode(struct super_block *sb)
400static void romfs_i_callback(struct rcu_head *head) 403static void romfs_i_callback(struct rcu_head *head)
401{ 404{
402 struct inode *inode = container_of(head, struct inode, i_rcu); 405 struct inode *inode = container_of(head, struct inode, i_rcu);
406
403 kmem_cache_free(romfs_inode_cachep, ROMFS_I(inode)); 407 kmem_cache_free(romfs_inode_cachep, ROMFS_I(inode));
404} 408}
405 409
@@ -507,15 +511,13 @@ static int romfs_fill_super(struct super_block *sb, void *data, int silent)
507 if (rsb->word0 != ROMSB_WORD0 || rsb->word1 != ROMSB_WORD1 || 511 if (rsb->word0 != ROMSB_WORD0 || rsb->word1 != ROMSB_WORD1 ||
508 img_size < ROMFH_SIZE) { 512 img_size < ROMFH_SIZE) {
509 if (!silent) 513 if (!silent)
510 printk(KERN_WARNING "VFS:" 514 pr_warn("VFS: Can't find a romfs filesystem on dev %s.\n",
511 " Can't find a romfs filesystem on dev %s.\n",
512 sb->s_id); 515 sb->s_id);
513 goto error_rsb_inval; 516 goto error_rsb_inval;
514 } 517 }
515 518
516 if (romfs_checksum(rsb, min_t(size_t, img_size, 512))) { 519 if (romfs_checksum(rsb, min_t(size_t, img_size, 512))) {
517 printk(KERN_ERR "ROMFS: bad initial checksum on dev %s.\n", 520 pr_err("bad initial checksum on dev %s.\n", sb->s_id);
518 sb->s_id);
519 goto error_rsb_inval; 521 goto error_rsb_inval;
520 } 522 }
521 523
@@ -523,8 +525,8 @@ static int romfs_fill_super(struct super_block *sb, void *data, int silent)
523 525
524 len = strnlen(rsb->name, ROMFS_MAXFN); 526 len = strnlen(rsb->name, ROMFS_MAXFN);
525 if (!silent) 527 if (!silent)
526 printk(KERN_NOTICE "ROMFS: Mounting image '%*.*s' through %s\n", 528 pr_notice("Mounting image '%*.*s' through %s\n",
527 (unsigned) len, (unsigned) len, rsb->name, storage); 529 (unsigned) len, (unsigned) len, rsb->name, storage);
528 530
529 kfree(rsb); 531 kfree(rsb);
530 rsb = NULL; 532 rsb = NULL;
@@ -614,7 +616,7 @@ static int __init init_romfs_fs(void)
614{ 616{
615 int ret; 617 int ret;
616 618
617 printk(KERN_INFO "ROMFS MTD (C) 2007 Red Hat, Inc.\n"); 619 pr_info("ROMFS MTD (C) 2007 Red Hat, Inc.\n");
618 620
619 romfs_inode_cachep = 621 romfs_inode_cachep =
620 kmem_cache_create("romfs_i", 622 kmem_cache_create("romfs_i",
@@ -623,13 +625,12 @@ static int __init init_romfs_fs(void)
623 romfs_i_init_once); 625 romfs_i_init_once);
624 626
625 if (!romfs_inode_cachep) { 627 if (!romfs_inode_cachep) {
626 printk(KERN_ERR 628 pr_err("Failed to initialise inode cache\n");
627 "ROMFS error: Failed to initialise inode cache\n");
628 return -ENOMEM; 629 return -ENOMEM;
629 } 630 }
630 ret = register_filesystem(&romfs_fs_type); 631 ret = register_filesystem(&romfs_fs_type);
631 if (ret) { 632 if (ret) {
632 printk(KERN_ERR "ROMFS error: Failed to register filesystem\n"); 633 pr_err("Failed to register filesystem\n");
633 goto error_register; 634 goto error_register;
634 } 635 }
635 return 0; 636 return 0;
diff --git a/fs/ufs/Makefile b/fs/ufs/Makefile
index dd39980437fc..4d0e02b022b3 100644
--- a/fs/ufs/Makefile
+++ b/fs/ufs/Makefile
@@ -6,3 +6,4 @@ obj-$(CONFIG_UFS_FS) += ufs.o
6 6
7ufs-objs := balloc.o cylinder.o dir.o file.o ialloc.o inode.o \ 7ufs-objs := balloc.o cylinder.o dir.o file.o ialloc.o inode.o \
8 namei.o super.o symlink.o truncate.o util.o 8 namei.o super.o symlink.o truncate.o util.o
9ccflags-$(CONFIG_UFS_DEBUG) += -DDEBUG
diff --git a/fs/ufs/inode.c b/fs/ufs/inode.c
index 61e8a9b021dd..7c580c97990e 100644
--- a/fs/ufs/inode.c
+++ b/fs/ufs/inode.c
@@ -158,16 +158,16 @@ out:
158 158
159/** 159/**
160 * ufs_inode_getfrag() - allocate new fragment(s) 160 * ufs_inode_getfrag() - allocate new fragment(s)
161 * @inode - pointer to inode 161 * @inode: pointer to inode
162 * @fragment - number of `fragment' which hold pointer 162 * @fragment: number of `fragment' which hold pointer
163 * to new allocated fragment(s) 163 * to new allocated fragment(s)
164 * @new_fragment - number of new allocated fragment(s) 164 * @new_fragment: number of new allocated fragment(s)
165 * @required - how many fragment(s) we require 165 * @required: how many fragment(s) we require
166 * @err - we set it if something wrong 166 * @err: we set it if something wrong
167 * @phys - pointer to where we save physical number of new allocated fragments, 167 * @phys: pointer to where we save physical number of new allocated fragments,
168 * NULL if we allocate not data(indirect blocks for example). 168 * NULL if we allocate not data(indirect blocks for example).
169 * @new - we set it if we allocate new block 169 * @new: we set it if we allocate new block
170 * @locked_page - for ufs_new_fragments() 170 * @locked_page: for ufs_new_fragments()
171 */ 171 */
172static struct buffer_head * 172static struct buffer_head *
173ufs_inode_getfrag(struct inode *inode, u64 fragment, 173ufs_inode_getfrag(struct inode *inode, u64 fragment,
@@ -315,16 +315,16 @@ repeat2:
315 315
316/** 316/**
317 * ufs_inode_getblock() - allocate new block 317 * ufs_inode_getblock() - allocate new block
318 * @inode - pointer to inode 318 * @inode: pointer to inode
319 * @bh - pointer to block which hold "pointer" to new allocated block 319 * @bh: pointer to block which hold "pointer" to new allocated block
320 * @fragment - number of `fragment' which hold pointer 320 * @fragment: number of `fragment' which hold pointer
321 * to new allocated block 321 * to new allocated block
322 * @new_fragment - number of new allocated fragment 322 * @new_fragment: number of new allocated fragment
323 * (block will hold this fragment and also uspi->s_fpb-1) 323 * (block will hold this fragment and also uspi->s_fpb-1)
324 * @err - see ufs_inode_getfrag() 324 * @err: see ufs_inode_getfrag()
325 * @phys - see ufs_inode_getfrag() 325 * @phys: see ufs_inode_getfrag()
326 * @new - see ufs_inode_getfrag() 326 * @new: see ufs_inode_getfrag()
327 * @locked_page - see ufs_inode_getfrag() 327 * @locked_page: see ufs_inode_getfrag()
328 */ 328 */
329static struct buffer_head * 329static struct buffer_head *
330ufs_inode_getblock(struct inode *inode, struct buffer_head *bh, 330ufs_inode_getblock(struct inode *inode, struct buffer_head *bh,
diff --git a/fs/ufs/super.c b/fs/ufs/super.c
index b879f1ba3439..da73801301d5 100644
--- a/fs/ufs/super.c
+++ b/fs/ufs/super.c
@@ -65,7 +65,6 @@
65 * Evgeniy Dushistov <dushistov@mail.ru>, 2007 65 * Evgeniy Dushistov <dushistov@mail.ru>, 2007
66 */ 66 */
67 67
68
69#include <linux/exportfs.h> 68#include <linux/exportfs.h>
70#include <linux/module.h> 69#include <linux/module.h>
71#include <linux/bitops.h> 70#include <linux/bitops.h>
@@ -172,73 +171,73 @@ static void ufs_print_super_stuff(struct super_block *sb,
172{ 171{
173 u32 magic = fs32_to_cpu(sb, usb3->fs_magic); 172 u32 magic = fs32_to_cpu(sb, usb3->fs_magic);
174 173
175 printk("ufs_print_super_stuff\n"); 174 pr_debug("ufs_print_super_stuff\n");
176 printk(" magic: 0x%x\n", magic); 175 pr_debug(" magic: 0x%x\n", magic);
177 if (fs32_to_cpu(sb, usb3->fs_magic) == UFS2_MAGIC) { 176 if (fs32_to_cpu(sb, usb3->fs_magic) == UFS2_MAGIC) {
178 printk(" fs_size: %llu\n", (unsigned long long) 177 pr_debug(" fs_size: %llu\n", (unsigned long long)
179 fs64_to_cpu(sb, usb3->fs_un1.fs_u2.fs_size)); 178 fs64_to_cpu(sb, usb3->fs_un1.fs_u2.fs_size));
180 printk(" fs_dsize: %llu\n", (unsigned long long) 179 pr_debug(" fs_dsize: %llu\n", (unsigned long long)
181 fs64_to_cpu(sb, usb3->fs_un1.fs_u2.fs_dsize)); 180 fs64_to_cpu(sb, usb3->fs_un1.fs_u2.fs_dsize));
182 printk(" bsize: %u\n", 181 pr_debug(" bsize: %u\n",
183 fs32_to_cpu(sb, usb1->fs_bsize)); 182 fs32_to_cpu(sb, usb1->fs_bsize));
184 printk(" fsize: %u\n", 183 pr_debug(" fsize: %u\n",
185 fs32_to_cpu(sb, usb1->fs_fsize)); 184 fs32_to_cpu(sb, usb1->fs_fsize));
186 printk(" fs_volname: %s\n", usb2->fs_un.fs_u2.fs_volname); 185 pr_debug(" fs_volname: %s\n", usb2->fs_un.fs_u2.fs_volname);
187 printk(" fs_sblockloc: %llu\n", (unsigned long long) 186 pr_debug(" fs_sblockloc: %llu\n", (unsigned long long)
188 fs64_to_cpu(sb, usb2->fs_un.fs_u2.fs_sblockloc)); 187 fs64_to_cpu(sb, usb2->fs_un.fs_u2.fs_sblockloc));
189 printk(" cs_ndir(No of dirs): %llu\n", (unsigned long long) 188 pr_debug(" cs_ndir(No of dirs): %llu\n", (unsigned long long)
190 fs64_to_cpu(sb, usb2->fs_un.fs_u2.cs_ndir)); 189 fs64_to_cpu(sb, usb2->fs_un.fs_u2.cs_ndir));
191 printk(" cs_nbfree(No of free blocks): %llu\n", 190 pr_debug(" cs_nbfree(No of free blocks): %llu\n",
192 (unsigned long long) 191 (unsigned long long)
193 fs64_to_cpu(sb, usb2->fs_un.fs_u2.cs_nbfree)); 192 fs64_to_cpu(sb, usb2->fs_un.fs_u2.cs_nbfree));
194 printk(KERN_INFO" cs_nifree(Num of free inodes): %llu\n", 193 pr_info(" cs_nifree(Num of free inodes): %llu\n",
195 (unsigned long long) 194 (unsigned long long)
196 fs64_to_cpu(sb, usb3->fs_un1.fs_u2.cs_nifree)); 195 fs64_to_cpu(sb, usb3->fs_un1.fs_u2.cs_nifree));
197 printk(KERN_INFO" cs_nffree(Num of free frags): %llu\n", 196 pr_info(" cs_nffree(Num of free frags): %llu\n",
198 (unsigned long long) 197 (unsigned long long)
199 fs64_to_cpu(sb, usb3->fs_un1.fs_u2.cs_nffree)); 198 fs64_to_cpu(sb, usb3->fs_un1.fs_u2.cs_nffree));
200 printk(KERN_INFO" fs_maxsymlinklen: %u\n", 199 pr_info(" fs_maxsymlinklen: %u\n",
201 fs32_to_cpu(sb, usb3->fs_un2.fs_44.fs_maxsymlinklen)); 200 fs32_to_cpu(sb, usb3->fs_un2.fs_44.fs_maxsymlinklen));
202 } else { 201 } else {
203 printk(" sblkno: %u\n", fs32_to_cpu(sb, usb1->fs_sblkno)); 202 pr_debug(" sblkno: %u\n", fs32_to_cpu(sb, usb1->fs_sblkno));
204 printk(" cblkno: %u\n", fs32_to_cpu(sb, usb1->fs_cblkno)); 203 pr_debug(" cblkno: %u\n", fs32_to_cpu(sb, usb1->fs_cblkno));
205 printk(" iblkno: %u\n", fs32_to_cpu(sb, usb1->fs_iblkno)); 204 pr_debug(" iblkno: %u\n", fs32_to_cpu(sb, usb1->fs_iblkno));
206 printk(" dblkno: %u\n", fs32_to_cpu(sb, usb1->fs_dblkno)); 205 pr_debug(" dblkno: %u\n", fs32_to_cpu(sb, usb1->fs_dblkno));
207 printk(" cgoffset: %u\n", 206 pr_debug(" cgoffset: %u\n",
208 fs32_to_cpu(sb, usb1->fs_cgoffset)); 207 fs32_to_cpu(sb, usb1->fs_cgoffset));
209 printk(" ~cgmask: 0x%x\n", 208 pr_debug(" ~cgmask: 0x%x\n",
210 ~fs32_to_cpu(sb, usb1->fs_cgmask)); 209 ~fs32_to_cpu(sb, usb1->fs_cgmask));
211 printk(" size: %u\n", fs32_to_cpu(sb, usb1->fs_size)); 210 pr_debug(" size: %u\n", fs32_to_cpu(sb, usb1->fs_size));
212 printk(" dsize: %u\n", fs32_to_cpu(sb, usb1->fs_dsize)); 211 pr_debug(" dsize: %u\n", fs32_to_cpu(sb, usb1->fs_dsize));
213 printk(" ncg: %u\n", fs32_to_cpu(sb, usb1->fs_ncg)); 212 pr_debug(" ncg: %u\n", fs32_to_cpu(sb, usb1->fs_ncg));
214 printk(" bsize: %u\n", fs32_to_cpu(sb, usb1->fs_bsize)); 213 pr_debug(" bsize: %u\n", fs32_to_cpu(sb, usb1->fs_bsize));
215 printk(" fsize: %u\n", fs32_to_cpu(sb, usb1->fs_fsize)); 214 pr_debug(" fsize: %u\n", fs32_to_cpu(sb, usb1->fs_fsize));
216 printk(" frag: %u\n", fs32_to_cpu(sb, usb1->fs_frag)); 215 pr_debug(" frag: %u\n", fs32_to_cpu(sb, usb1->fs_frag));
217 printk(" fragshift: %u\n", 216 pr_debug(" fragshift: %u\n",
218 fs32_to_cpu(sb, usb1->fs_fragshift)); 217 fs32_to_cpu(sb, usb1->fs_fragshift));
219 printk(" ~fmask: %u\n", ~fs32_to_cpu(sb, usb1->fs_fmask)); 218 pr_debug(" ~fmask: %u\n", ~fs32_to_cpu(sb, usb1->fs_fmask));
220 printk(" fshift: %u\n", fs32_to_cpu(sb, usb1->fs_fshift)); 219 pr_debug(" fshift: %u\n", fs32_to_cpu(sb, usb1->fs_fshift));
221 printk(" sbsize: %u\n", fs32_to_cpu(sb, usb1->fs_sbsize)); 220 pr_debug(" sbsize: %u\n", fs32_to_cpu(sb, usb1->fs_sbsize));
222 printk(" spc: %u\n", fs32_to_cpu(sb, usb1->fs_spc)); 221 pr_debug(" spc: %u\n", fs32_to_cpu(sb, usb1->fs_spc));
223 printk(" cpg: %u\n", fs32_to_cpu(sb, usb1->fs_cpg)); 222 pr_debug(" cpg: %u\n", fs32_to_cpu(sb, usb1->fs_cpg));
224 printk(" ipg: %u\n", fs32_to_cpu(sb, usb1->fs_ipg)); 223 pr_debug(" ipg: %u\n", fs32_to_cpu(sb, usb1->fs_ipg));
225 printk(" fpg: %u\n", fs32_to_cpu(sb, usb1->fs_fpg)); 224 pr_debug(" fpg: %u\n", fs32_to_cpu(sb, usb1->fs_fpg));
226 printk(" csaddr: %u\n", fs32_to_cpu(sb, usb1->fs_csaddr)); 225 pr_debug(" csaddr: %u\n", fs32_to_cpu(sb, usb1->fs_csaddr));
227 printk(" cssize: %u\n", fs32_to_cpu(sb, usb1->fs_cssize)); 226 pr_debug(" cssize: %u\n", fs32_to_cpu(sb, usb1->fs_cssize));
228 printk(" cgsize: %u\n", fs32_to_cpu(sb, usb1->fs_cgsize)); 227 pr_debug(" cgsize: %u\n", fs32_to_cpu(sb, usb1->fs_cgsize));
229 printk(" fstodb: %u\n", 228 pr_debug(" fstodb: %u\n",
230 fs32_to_cpu(sb, usb1->fs_fsbtodb)); 229 fs32_to_cpu(sb, usb1->fs_fsbtodb));
231 printk(" nrpos: %u\n", fs32_to_cpu(sb, usb3->fs_nrpos)); 230 pr_debug(" nrpos: %u\n", fs32_to_cpu(sb, usb3->fs_nrpos));
232 printk(" ndir %u\n", 231 pr_debug(" ndir %u\n",
233 fs32_to_cpu(sb, usb1->fs_cstotal.cs_ndir)); 232 fs32_to_cpu(sb, usb1->fs_cstotal.cs_ndir));
234 printk(" nifree %u\n", 233 pr_debug(" nifree %u\n",
235 fs32_to_cpu(sb, usb1->fs_cstotal.cs_nifree)); 234 fs32_to_cpu(sb, usb1->fs_cstotal.cs_nifree));
236 printk(" nbfree %u\n", 235 pr_debug(" nbfree %u\n",
237 fs32_to_cpu(sb, usb1->fs_cstotal.cs_nbfree)); 236 fs32_to_cpu(sb, usb1->fs_cstotal.cs_nbfree));
238 printk(" nffree %u\n", 237 pr_debug(" nffree %u\n",
239 fs32_to_cpu(sb, usb1->fs_cstotal.cs_nffree)); 238 fs32_to_cpu(sb, usb1->fs_cstotal.cs_nffree));
240 } 239 }
241 printk("\n"); 240 pr_debug("\n");
242} 241}
243 242
244/* 243/*
@@ -247,38 +246,38 @@ static void ufs_print_super_stuff(struct super_block *sb,
247static void ufs_print_cylinder_stuff(struct super_block *sb, 246static void ufs_print_cylinder_stuff(struct super_block *sb,
248 struct ufs_cylinder_group *cg) 247 struct ufs_cylinder_group *cg)
249{ 248{
250 printk("\nufs_print_cylinder_stuff\n"); 249 pr_debug("\nufs_print_cylinder_stuff\n");
251 printk("size of ucg: %zu\n", sizeof(struct ufs_cylinder_group)); 250 pr_debug("size of ucg: %zu\n", sizeof(struct ufs_cylinder_group));
252 printk(" magic: %x\n", fs32_to_cpu(sb, cg->cg_magic)); 251 pr_debug(" magic: %x\n", fs32_to_cpu(sb, cg->cg_magic));
253 printk(" time: %u\n", fs32_to_cpu(sb, cg->cg_time)); 252 pr_debug(" time: %u\n", fs32_to_cpu(sb, cg->cg_time));
254 printk(" cgx: %u\n", fs32_to_cpu(sb, cg->cg_cgx)); 253 pr_debug(" cgx: %u\n", fs32_to_cpu(sb, cg->cg_cgx));
255 printk(" ncyl: %u\n", fs16_to_cpu(sb, cg->cg_ncyl)); 254 pr_debug(" ncyl: %u\n", fs16_to_cpu(sb, cg->cg_ncyl));
256 printk(" niblk: %u\n", fs16_to_cpu(sb, cg->cg_niblk)); 255 pr_debug(" niblk: %u\n", fs16_to_cpu(sb, cg->cg_niblk));
257 printk(" ndblk: %u\n", fs32_to_cpu(sb, cg->cg_ndblk)); 256 pr_debug(" ndblk: %u\n", fs32_to_cpu(sb, cg->cg_ndblk));
258 printk(" cs_ndir: %u\n", fs32_to_cpu(sb, cg->cg_cs.cs_ndir)); 257 pr_debug(" cs_ndir: %u\n", fs32_to_cpu(sb, cg->cg_cs.cs_ndir));
259 printk(" cs_nbfree: %u\n", fs32_to_cpu(sb, cg->cg_cs.cs_nbfree)); 258 pr_debug(" cs_nbfree: %u\n", fs32_to_cpu(sb, cg->cg_cs.cs_nbfree));
260 printk(" cs_nifree: %u\n", fs32_to_cpu(sb, cg->cg_cs.cs_nifree)); 259 pr_debug(" cs_nifree: %u\n", fs32_to_cpu(sb, cg->cg_cs.cs_nifree));
261 printk(" cs_nffree: %u\n", fs32_to_cpu(sb, cg->cg_cs.cs_nffree)); 260 pr_debug(" cs_nffree: %u\n", fs32_to_cpu(sb, cg->cg_cs.cs_nffree));
262 printk(" rotor: %u\n", fs32_to_cpu(sb, cg->cg_rotor)); 261 pr_debug(" rotor: %u\n", fs32_to_cpu(sb, cg->cg_rotor));
263 printk(" frotor: %u\n", fs32_to_cpu(sb, cg->cg_frotor)); 262 pr_debug(" frotor: %u\n", fs32_to_cpu(sb, cg->cg_frotor));
264 printk(" irotor: %u\n", fs32_to_cpu(sb, cg->cg_irotor)); 263 pr_debug(" irotor: %u\n", fs32_to_cpu(sb, cg->cg_irotor));
265 printk(" frsum: %u, %u, %u, %u, %u, %u, %u, %u\n", 264 pr_debug(" frsum: %u, %u, %u, %u, %u, %u, %u, %u\n",
266 fs32_to_cpu(sb, cg->cg_frsum[0]), fs32_to_cpu(sb, cg->cg_frsum[1]), 265 fs32_to_cpu(sb, cg->cg_frsum[0]), fs32_to_cpu(sb, cg->cg_frsum[1]),
267 fs32_to_cpu(sb, cg->cg_frsum[2]), fs32_to_cpu(sb, cg->cg_frsum[3]), 266 fs32_to_cpu(sb, cg->cg_frsum[2]), fs32_to_cpu(sb, cg->cg_frsum[3]),
268 fs32_to_cpu(sb, cg->cg_frsum[4]), fs32_to_cpu(sb, cg->cg_frsum[5]), 267 fs32_to_cpu(sb, cg->cg_frsum[4]), fs32_to_cpu(sb, cg->cg_frsum[5]),
269 fs32_to_cpu(sb, cg->cg_frsum[6]), fs32_to_cpu(sb, cg->cg_frsum[7])); 268 fs32_to_cpu(sb, cg->cg_frsum[6]), fs32_to_cpu(sb, cg->cg_frsum[7]));
270 printk(" btotoff: %u\n", fs32_to_cpu(sb, cg->cg_btotoff)); 269 pr_debug(" btotoff: %u\n", fs32_to_cpu(sb, cg->cg_btotoff));
271 printk(" boff: %u\n", fs32_to_cpu(sb, cg->cg_boff)); 270 pr_debug(" boff: %u\n", fs32_to_cpu(sb, cg->cg_boff));
272 printk(" iuseoff: %u\n", fs32_to_cpu(sb, cg->cg_iusedoff)); 271 pr_debug(" iuseoff: %u\n", fs32_to_cpu(sb, cg->cg_iusedoff));
273 printk(" freeoff: %u\n", fs32_to_cpu(sb, cg->cg_freeoff)); 272 pr_debug(" freeoff: %u\n", fs32_to_cpu(sb, cg->cg_freeoff));
274 printk(" nextfreeoff: %u\n", fs32_to_cpu(sb, cg->cg_nextfreeoff)); 273 pr_debug(" nextfreeoff: %u\n", fs32_to_cpu(sb, cg->cg_nextfreeoff));
275 printk(" clustersumoff %u\n", 274 pr_debug(" clustersumoff %u\n",
276 fs32_to_cpu(sb, cg->cg_u.cg_44.cg_clustersumoff)); 275 fs32_to_cpu(sb, cg->cg_u.cg_44.cg_clustersumoff));
277 printk(" clusteroff %u\n", 276 pr_debug(" clusteroff %u\n",
278 fs32_to_cpu(sb, cg->cg_u.cg_44.cg_clusteroff)); 277 fs32_to_cpu(sb, cg->cg_u.cg_44.cg_clusteroff));
279 printk(" nclusterblks %u\n", 278 pr_debug(" nclusterblks %u\n",
280 fs32_to_cpu(sb, cg->cg_u.cg_44.cg_nclusterblks)); 279 fs32_to_cpu(sb, cg->cg_u.cg_44.cg_nclusterblks));
281 printk("\n"); 280 pr_debug("\n");
282} 281}
283#else 282#else
284# define ufs_print_super_stuff(sb, usb1, usb2, usb3) /**/ 283# define ufs_print_super_stuff(sb, usb1, usb2, usb3) /**/
@@ -287,13 +286,12 @@ static void ufs_print_cylinder_stuff(struct super_block *sb,
287 286
288static const struct super_operations ufs_super_ops; 287static const struct super_operations ufs_super_ops;
289 288
290static char error_buf[1024];
291
292void ufs_error (struct super_block * sb, const char * function, 289void ufs_error (struct super_block * sb, const char * function,
293 const char * fmt, ...) 290 const char * fmt, ...)
294{ 291{
295 struct ufs_sb_private_info * uspi; 292 struct ufs_sb_private_info * uspi;
296 struct ufs_super_block_first * usb1; 293 struct ufs_super_block_first * usb1;
294 struct va_format vaf;
297 va_list args; 295 va_list args;
298 296
299 uspi = UFS_SB(sb)->s_uspi; 297 uspi = UFS_SB(sb)->s_uspi;
@@ -305,20 +303,21 @@ void ufs_error (struct super_block * sb, const char * function,
305 ufs_mark_sb_dirty(sb); 303 ufs_mark_sb_dirty(sb);
306 sb->s_flags |= MS_RDONLY; 304 sb->s_flags |= MS_RDONLY;
307 } 305 }
308 va_start (args, fmt); 306 va_start(args, fmt);
309 vsnprintf (error_buf, sizeof(error_buf), fmt, args); 307 vaf.fmt = fmt;
310 va_end (args); 308 vaf.va = &args;
311 switch (UFS_SB(sb)->s_mount_opt & UFS_MOUNT_ONERROR) { 309 switch (UFS_SB(sb)->s_mount_opt & UFS_MOUNT_ONERROR) {
312 case UFS_MOUNT_ONERROR_PANIC: 310 case UFS_MOUNT_ONERROR_PANIC:
313 panic ("UFS-fs panic (device %s): %s: %s\n", 311 panic("panic (device %s): %s: %pV\n",
314 sb->s_id, function, error_buf); 312 sb->s_id, function, &vaf);
315 313
316 case UFS_MOUNT_ONERROR_LOCK: 314 case UFS_MOUNT_ONERROR_LOCK:
317 case UFS_MOUNT_ONERROR_UMOUNT: 315 case UFS_MOUNT_ONERROR_UMOUNT:
318 case UFS_MOUNT_ONERROR_REPAIR: 316 case UFS_MOUNT_ONERROR_REPAIR:
319 printk (KERN_CRIT "UFS-fs error (device %s): %s: %s\n", 317 pr_crit("error (device %s): %s: %pV\n",
320 sb->s_id, function, error_buf); 318 sb->s_id, function, &vaf);
321 } 319 }
320 va_end(args);
322} 321}
323 322
324void ufs_panic (struct super_block * sb, const char * function, 323void ufs_panic (struct super_block * sb, const char * function,
@@ -326,6 +325,7 @@ void ufs_panic (struct super_block * sb, const char * function,
326{ 325{
327 struct ufs_sb_private_info * uspi; 326 struct ufs_sb_private_info * uspi;
328 struct ufs_super_block_first * usb1; 327 struct ufs_super_block_first * usb1;
328 struct va_format vaf;
329 va_list args; 329 va_list args;
330 330
331 uspi = UFS_SB(sb)->s_uspi; 331 uspi = UFS_SB(sb)->s_uspi;
@@ -336,24 +336,27 @@ void ufs_panic (struct super_block * sb, const char * function,
336 ubh_mark_buffer_dirty(USPI_UBH(uspi)); 336 ubh_mark_buffer_dirty(USPI_UBH(uspi));
337 ufs_mark_sb_dirty(sb); 337 ufs_mark_sb_dirty(sb);
338 } 338 }
339 va_start (args, fmt); 339 va_start(args, fmt);
340 vsnprintf (error_buf, sizeof(error_buf), fmt, args); 340 vaf.fmt = fmt;
341 va_end (args); 341 vaf.va = &args;
342 sb->s_flags |= MS_RDONLY; 342 sb->s_flags |= MS_RDONLY;
343 printk (KERN_CRIT "UFS-fs panic (device %s): %s: %s\n", 343 pr_crit("panic (device %s): %s: %pV\n",
344 sb->s_id, function, error_buf); 344 sb->s_id, function, &vaf);
345 va_end(args);
345} 346}
346 347
347void ufs_warning (struct super_block * sb, const char * function, 348void ufs_warning (struct super_block * sb, const char * function,
348 const char * fmt, ...) 349 const char * fmt, ...)
349{ 350{
351 struct va_format vaf;
350 va_list args; 352 va_list args;
351 353
352 va_start (args, fmt); 354 va_start(args, fmt);
353 vsnprintf (error_buf, sizeof(error_buf), fmt, args); 355 vaf.fmt = fmt;
354 va_end (args); 356 vaf.va = &args;
355 printk (KERN_WARNING "UFS-fs warning (device %s): %s: %s\n", 357 pr_warn("(device %s): %s: %pV\n",
356 sb->s_id, function, error_buf); 358 sb->s_id, function, &vaf);
359 va_end(args);
357} 360}
358 361
359enum { 362enum {
@@ -464,14 +467,12 @@ static int ufs_parse_options (char * options, unsigned * mount_options)
464 ufs_set_opt (*mount_options, ONERROR_UMOUNT); 467 ufs_set_opt (*mount_options, ONERROR_UMOUNT);
465 break; 468 break;
466 case Opt_onerror_repair: 469 case Opt_onerror_repair:
467 printk("UFS-fs: Unable to do repair on error, " 470 pr_err("Unable to do repair on error, will lock lock instead\n");
468 "will lock lock instead\n");
469 ufs_clear_opt (*mount_options, ONERROR); 471 ufs_clear_opt (*mount_options, ONERROR);
470 ufs_set_opt (*mount_options, ONERROR_REPAIR); 472 ufs_set_opt (*mount_options, ONERROR_REPAIR);
471 break; 473 break;
472 default: 474 default:
473 printk("UFS-fs: Invalid option: \"%s\" " 475 pr_err("Invalid option: \"%s\" or missing value\n", p);
474 "or missing value\n", p);
475 return 0; 476 return 0;
476 } 477 }
477 } 478 }
@@ -788,8 +789,7 @@ static int ufs_fill_super(struct super_block *sb, void *data, int silent)
788 789
789#ifndef CONFIG_UFS_FS_WRITE 790#ifndef CONFIG_UFS_FS_WRITE
790 if (!(sb->s_flags & MS_RDONLY)) { 791 if (!(sb->s_flags & MS_RDONLY)) {
791 printk("ufs was compiled with read-only support, " 792 pr_err("ufs was compiled with read-only support, can't be mounted as read-write\n");
792 "can't be mounted as read-write\n");
793 return -EROFS; 793 return -EROFS;
794 } 794 }
795#endif 795#endif
@@ -812,12 +812,12 @@ static int ufs_fill_super(struct super_block *sb, void *data, int silent)
812 sbi->s_mount_opt = 0; 812 sbi->s_mount_opt = 0;
813 ufs_set_opt (sbi->s_mount_opt, ONERROR_LOCK); 813 ufs_set_opt (sbi->s_mount_opt, ONERROR_LOCK);
814 if (!ufs_parse_options ((char *) data, &sbi->s_mount_opt)) { 814 if (!ufs_parse_options ((char *) data, &sbi->s_mount_opt)) {
815 printk("wrong mount options\n"); 815 pr_err("wrong mount options\n");
816 goto failed; 816 goto failed;
817 } 817 }
818 if (!(sbi->s_mount_opt & UFS_MOUNT_UFSTYPE)) { 818 if (!(sbi->s_mount_opt & UFS_MOUNT_UFSTYPE)) {
819 if (!silent) 819 if (!silent)
820 printk("You didn't specify the type of your ufs filesystem\n\n" 820 pr_err("You didn't specify the type of your ufs filesystem\n\n"
821 "mount -t ufs -o ufstype=" 821 "mount -t ufs -o ufstype="
822 "sun|sunx86|44bsd|ufs2|5xbsd|old|hp|nextstep|nextstep-cd|openstep ...\n\n" 822 "sun|sunx86|44bsd|ufs2|5xbsd|old|hp|nextstep|nextstep-cd|openstep ...\n\n"
823 ">>>WARNING<<< Wrong ufstype may corrupt your filesystem, " 823 ">>>WARNING<<< Wrong ufstype may corrupt your filesystem, "
@@ -868,7 +868,7 @@ static int ufs_fill_super(struct super_block *sb, void *data, int silent)
868 break; 868 break;
869 869
870 case UFS_MOUNT_UFSTYPE_SUNOS: 870 case UFS_MOUNT_UFSTYPE_SUNOS:
871 UFSD(("ufstype=sunos\n")) 871 UFSD("ufstype=sunos\n");
872 uspi->s_fsize = block_size = 1024; 872 uspi->s_fsize = block_size = 1024;
873 uspi->s_fmask = ~(1024 - 1); 873 uspi->s_fmask = ~(1024 - 1);
874 uspi->s_fshift = 10; 874 uspi->s_fshift = 10;
@@ -900,7 +900,7 @@ static int ufs_fill_super(struct super_block *sb, void *data, int silent)
900 flags |= UFS_DE_OLD | UFS_UID_OLD | UFS_ST_OLD | UFS_CG_OLD; 900 flags |= UFS_DE_OLD | UFS_UID_OLD | UFS_ST_OLD | UFS_CG_OLD;
901 if (!(sb->s_flags & MS_RDONLY)) { 901 if (!(sb->s_flags & MS_RDONLY)) {
902 if (!silent) 902 if (!silent)
903 printk(KERN_INFO "ufstype=old is supported read-only\n"); 903 pr_info("ufstype=old is supported read-only\n");
904 sb->s_flags |= MS_RDONLY; 904 sb->s_flags |= MS_RDONLY;
905 } 905 }
906 break; 906 break;
@@ -916,7 +916,7 @@ static int ufs_fill_super(struct super_block *sb, void *data, int silent)
916 flags |= UFS_DE_OLD | UFS_UID_OLD | UFS_ST_OLD | UFS_CG_OLD; 916 flags |= UFS_DE_OLD | UFS_UID_OLD | UFS_ST_OLD | UFS_CG_OLD;
917 if (!(sb->s_flags & MS_RDONLY)) { 917 if (!(sb->s_flags & MS_RDONLY)) {
918 if (!silent) 918 if (!silent)
919 printk(KERN_INFO "ufstype=nextstep is supported read-only\n"); 919 pr_info("ufstype=nextstep is supported read-only\n");
920 sb->s_flags |= MS_RDONLY; 920 sb->s_flags |= MS_RDONLY;
921 } 921 }
922 break; 922 break;
@@ -932,7 +932,7 @@ static int ufs_fill_super(struct super_block *sb, void *data, int silent)
932 flags |= UFS_DE_OLD | UFS_UID_OLD | UFS_ST_OLD | UFS_CG_OLD; 932 flags |= UFS_DE_OLD | UFS_UID_OLD | UFS_ST_OLD | UFS_CG_OLD;
933 if (!(sb->s_flags & MS_RDONLY)) { 933 if (!(sb->s_flags & MS_RDONLY)) {
934 if (!silent) 934 if (!silent)
935 printk(KERN_INFO "ufstype=nextstep-cd is supported read-only\n"); 935 pr_info("ufstype=nextstep-cd is supported read-only\n");
936 sb->s_flags |= MS_RDONLY; 936 sb->s_flags |= MS_RDONLY;
937 } 937 }
938 break; 938 break;
@@ -948,7 +948,7 @@ static int ufs_fill_super(struct super_block *sb, void *data, int silent)
948 flags |= UFS_DE_44BSD | UFS_UID_44BSD | UFS_ST_44BSD | UFS_CG_44BSD; 948 flags |= UFS_DE_44BSD | UFS_UID_44BSD | UFS_ST_44BSD | UFS_CG_44BSD;
949 if (!(sb->s_flags & MS_RDONLY)) { 949 if (!(sb->s_flags & MS_RDONLY)) {
950 if (!silent) 950 if (!silent)
951 printk(KERN_INFO "ufstype=openstep is supported read-only\n"); 951 pr_info("ufstype=openstep is supported read-only\n");
952 sb->s_flags |= MS_RDONLY; 952 sb->s_flags |= MS_RDONLY;
953 } 953 }
954 break; 954 break;
@@ -963,19 +963,19 @@ static int ufs_fill_super(struct super_block *sb, void *data, int silent)
963 flags |= UFS_DE_OLD | UFS_UID_OLD | UFS_ST_OLD | UFS_CG_OLD; 963 flags |= UFS_DE_OLD | UFS_UID_OLD | UFS_ST_OLD | UFS_CG_OLD;
964 if (!(sb->s_flags & MS_RDONLY)) { 964 if (!(sb->s_flags & MS_RDONLY)) {
965 if (!silent) 965 if (!silent)
966 printk(KERN_INFO "ufstype=hp is supported read-only\n"); 966 pr_info("ufstype=hp is supported read-only\n");
967 sb->s_flags |= MS_RDONLY; 967 sb->s_flags |= MS_RDONLY;
968 } 968 }
969 break; 969 break;
970 default: 970 default:
971 if (!silent) 971 if (!silent)
972 printk("unknown ufstype\n"); 972 pr_err("unknown ufstype\n");
973 goto failed; 973 goto failed;
974 } 974 }
975 975
976again: 976again:
977 if (!sb_set_blocksize(sb, block_size)) { 977 if (!sb_set_blocksize(sb, block_size)) {
978 printk(KERN_ERR "UFS: failed to set blocksize\n"); 978 pr_err("failed to set blocksize\n");
979 goto failed; 979 goto failed;
980 } 980 }
981 981
@@ -1034,7 +1034,7 @@ again:
1034 goto again; 1034 goto again;
1035 } 1035 }
1036 if (!silent) 1036 if (!silent)
1037 printk("ufs_read_super: bad magic number\n"); 1037 pr_err("%s(): bad magic number\n", __func__);
1038 goto failed; 1038 goto failed;
1039 1039
1040magic_found: 1040magic_found:
@@ -1048,33 +1048,33 @@ magic_found:
1048 uspi->s_fshift = fs32_to_cpu(sb, usb1->fs_fshift); 1048 uspi->s_fshift = fs32_to_cpu(sb, usb1->fs_fshift);
1049 1049
1050 if (!is_power_of_2(uspi->s_fsize)) { 1050 if (!is_power_of_2(uspi->s_fsize)) {
1051 printk(KERN_ERR "ufs_read_super: fragment size %u is not a power of 2\n", 1051 pr_err("%s(): fragment size %u is not a power of 2\n",
1052 uspi->s_fsize); 1052 __func__, uspi->s_fsize);
1053 goto failed; 1053 goto failed;
1054 } 1054 }
1055 if (uspi->s_fsize < 512) { 1055 if (uspi->s_fsize < 512) {
1056 printk(KERN_ERR "ufs_read_super: fragment size %u is too small\n", 1056 pr_err("%s(): fragment size %u is too small\n",
1057 uspi->s_fsize); 1057 __func__, uspi->s_fsize);
1058 goto failed; 1058 goto failed;
1059 } 1059 }
1060 if (uspi->s_fsize > 4096) { 1060 if (uspi->s_fsize > 4096) {
1061 printk(KERN_ERR "ufs_read_super: fragment size %u is too large\n", 1061 pr_err("%s(): fragment size %u is too large\n",
1062 uspi->s_fsize); 1062 __func__, uspi->s_fsize);
1063 goto failed; 1063 goto failed;
1064 } 1064 }
1065 if (!is_power_of_2(uspi->s_bsize)) { 1065 if (!is_power_of_2(uspi->s_bsize)) {
1066 printk(KERN_ERR "ufs_read_super: block size %u is not a power of 2\n", 1066 pr_err("%s(): block size %u is not a power of 2\n",
1067 uspi->s_bsize); 1067 __func__, uspi->s_bsize);
1068 goto failed; 1068 goto failed;
1069 } 1069 }
1070 if (uspi->s_bsize < 4096) { 1070 if (uspi->s_bsize < 4096) {
1071 printk(KERN_ERR "ufs_read_super: block size %u is too small\n", 1071 pr_err("%s(): block size %u is too small\n",
1072 uspi->s_bsize); 1072 __func__, uspi->s_bsize);
1073 goto failed; 1073 goto failed;
1074 } 1074 }
1075 if (uspi->s_bsize / uspi->s_fsize > 8) { 1075 if (uspi->s_bsize / uspi->s_fsize > 8) {
1076 printk(KERN_ERR "ufs_read_super: too many fragments per block (%u)\n", 1076 pr_err("%s(): too many fragments per block (%u)\n",
1077 uspi->s_bsize / uspi->s_fsize); 1077 __func__, uspi->s_bsize / uspi->s_fsize);
1078 goto failed; 1078 goto failed;
1079 } 1079 }
1080 if (uspi->s_fsize != block_size || uspi->s_sbsize != super_block_size) { 1080 if (uspi->s_fsize != block_size || uspi->s_sbsize != super_block_size) {
@@ -1113,20 +1113,21 @@ magic_found:
1113 UFSD("fs is DEC OSF/1\n"); 1113 UFSD("fs is DEC OSF/1\n");
1114 break; 1114 break;
1115 case UFS_FSACTIVE: 1115 case UFS_FSACTIVE:
1116 printk("ufs_read_super: fs is active\n"); 1116 pr_err("%s(): fs is active\n", __func__);
1117 sb->s_flags |= MS_RDONLY; 1117 sb->s_flags |= MS_RDONLY;
1118 break; 1118 break;
1119 case UFS_FSBAD: 1119 case UFS_FSBAD:
1120 printk("ufs_read_super: fs is bad\n"); 1120 pr_err("%s(): fs is bad\n", __func__);
1121 sb->s_flags |= MS_RDONLY; 1121 sb->s_flags |= MS_RDONLY;
1122 break; 1122 break;
1123 default: 1123 default:
1124 printk("ufs_read_super: can't grok fs_clean 0x%x\n", usb1->fs_clean); 1124 pr_err("%s(): can't grok fs_clean 0x%x\n",
1125 __func__, usb1->fs_clean);
1125 sb->s_flags |= MS_RDONLY; 1126 sb->s_flags |= MS_RDONLY;
1126 break; 1127 break;
1127 } 1128 }
1128 } else { 1129 } else {
1129 printk("ufs_read_super: fs needs fsck\n"); 1130 pr_err("%s(): fs needs fsck\n", __func__);
1130 sb->s_flags |= MS_RDONLY; 1131 sb->s_flags |= MS_RDONLY;
1131 } 1132 }
1132 1133
@@ -1299,7 +1300,7 @@ static int ufs_remount (struct super_block *sb, int *mount_flags, char *data)
1299 if (!(new_mount_opt & UFS_MOUNT_UFSTYPE)) { 1300 if (!(new_mount_opt & UFS_MOUNT_UFSTYPE)) {
1300 new_mount_opt |= ufstype; 1301 new_mount_opt |= ufstype;
1301 } else if ((new_mount_opt & UFS_MOUNT_UFSTYPE) != ufstype) { 1302 } else if ((new_mount_opt & UFS_MOUNT_UFSTYPE) != ufstype) {
1302 printk("ufstype can't be changed during remount\n"); 1303 pr_err("ufstype can't be changed during remount\n");
1303 unlock_ufs(sb); 1304 unlock_ufs(sb);
1304 return -EINVAL; 1305 return -EINVAL;
1305 } 1306 }
@@ -1328,8 +1329,7 @@ static int ufs_remount (struct super_block *sb, int *mount_flags, char *data)
1328 * fs was mounted as ro, remounting rw 1329 * fs was mounted as ro, remounting rw
1329 */ 1330 */
1330#ifndef CONFIG_UFS_FS_WRITE 1331#ifndef CONFIG_UFS_FS_WRITE
1331 printk("ufs was compiled with read-only support, " 1332 pr_err("ufs was compiled with read-only support, can't be mounted as read-write\n");
1332 "can't be mounted as read-write\n");
1333 unlock_ufs(sb); 1333 unlock_ufs(sb);
1334 return -EINVAL; 1334 return -EINVAL;
1335#else 1335#else
@@ -1338,12 +1338,12 @@ static int ufs_remount (struct super_block *sb, int *mount_flags, char *data)
1338 ufstype != UFS_MOUNT_UFSTYPE_44BSD && 1338 ufstype != UFS_MOUNT_UFSTYPE_44BSD &&
1339 ufstype != UFS_MOUNT_UFSTYPE_SUNx86 && 1339 ufstype != UFS_MOUNT_UFSTYPE_SUNx86 &&
1340 ufstype != UFS_MOUNT_UFSTYPE_UFS2) { 1340 ufstype != UFS_MOUNT_UFSTYPE_UFS2) {
1341 printk("this ufstype is read-only supported\n"); 1341 pr_err("this ufstype is read-only supported\n");
1342 unlock_ufs(sb); 1342 unlock_ufs(sb);
1343 return -EINVAL; 1343 return -EINVAL;
1344 } 1344 }
1345 if (!ufs_read_cylinder_structures(sb)) { 1345 if (!ufs_read_cylinder_structures(sb)) {
1346 printk("failed during remounting\n"); 1346 pr_err("failed during remounting\n");
1347 unlock_ufs(sb); 1347 unlock_ufs(sb);
1348 return -EPERM; 1348 return -EPERM;
1349 } 1349 }
diff --git a/fs/ufs/ufs.h b/fs/ufs/ufs.h
index 343e6fc571e5..2a07396d5f9e 100644
--- a/fs/ufs/ufs.h
+++ b/fs/ufs/ufs.h
@@ -1,6 +1,12 @@
1#ifndef _UFS_UFS_H 1#ifndef _UFS_UFS_H
2#define _UFS_UFS_H 1 2#define _UFS_UFS_H 1
3 3
4#ifdef pr_fmt
5#undef pr_fmt
6#endif
7
8#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
9
4#define UFS_MAX_GROUP_LOADED 8 10#define UFS_MAX_GROUP_LOADED 8
5#define UFS_CGNO_EMPTY ((unsigned)-1) 11#define UFS_CGNO_EMPTY ((unsigned)-1)
6 12
@@ -71,9 +77,9 @@ struct ufs_inode_info {
71 */ 77 */
72#ifdef CONFIG_UFS_DEBUG 78#ifdef CONFIG_UFS_DEBUG
73# define UFSD(f, a...) { \ 79# define UFSD(f, a...) { \
74 printk ("UFSD (%s, %d): %s:", \ 80 pr_debug("UFSD (%s, %d): %s:", \
75 __FILE__, __LINE__, __func__); \ 81 __FILE__, __LINE__, __func__); \
76 printk (f, ## a); \ 82 pr_debug(f, ## a); \
77 } 83 }
78#else 84#else
79# define UFSD(f, a...) /**/ 85# define UFSD(f, a...) /**/
diff --git a/include/asm-generic/pci-dma-compat.h b/include/asm-generic/pci-dma-compat.h
index 1437b7da09b2..c110843fc53b 100644
--- a/include/asm-generic/pci-dma-compat.h
+++ b/include/asm-generic/pci-dma-compat.h
@@ -19,6 +19,14 @@ pci_alloc_consistent(struct pci_dev *hwdev, size_t size,
19 return dma_alloc_coherent(hwdev == NULL ? NULL : &hwdev->dev, size, dma_handle, GFP_ATOMIC); 19 return dma_alloc_coherent(hwdev == NULL ? NULL : &hwdev->dev, size, dma_handle, GFP_ATOMIC);
20} 20}
21 21
22static inline void *
23pci_zalloc_consistent(struct pci_dev *hwdev, size_t size,
24 dma_addr_t *dma_handle)
25{
26 return dma_zalloc_coherent(hwdev == NULL ? NULL : &hwdev->dev,
27 size, dma_handle, GFP_ATOMIC);
28}
29
22static inline void 30static inline void
23pci_free_consistent(struct pci_dev *hwdev, size_t size, 31pci_free_consistent(struct pci_dev *hwdev, size_t size,
24 void *vaddr, dma_addr_t dma_handle) 32 void *vaddr, dma_addr_t dma_handle)
diff --git a/include/linux/decompress/bunzip2.h b/include/linux/decompress/bunzip2.h
index 115272137a9c..4d683df898e6 100644
--- a/include/linux/decompress/bunzip2.h
+++ b/include/linux/decompress/bunzip2.h
@@ -1,10 +1,10 @@
1#ifndef DECOMPRESS_BUNZIP2_H 1#ifndef DECOMPRESS_BUNZIP2_H
2#define DECOMPRESS_BUNZIP2_H 2#define DECOMPRESS_BUNZIP2_H
3 3
4int bunzip2(unsigned char *inbuf, int len, 4int bunzip2(unsigned char *inbuf, long len,
5 int(*fill)(void*, unsigned int), 5 long (*fill)(void*, unsigned long),
6 int(*flush)(void*, unsigned int), 6 long (*flush)(void*, unsigned long),
7 unsigned char *output, 7 unsigned char *output,
8 int *pos, 8 long *pos,
9 void(*error)(char *x)); 9 void(*error)(char *x));
10#endif 10#endif
diff --git a/include/linux/decompress/generic.h b/include/linux/decompress/generic.h
index 0c7111a55a1a..1fcfd64b5076 100644
--- a/include/linux/decompress/generic.h
+++ b/include/linux/decompress/generic.h
@@ -1,11 +1,11 @@
1#ifndef DECOMPRESS_GENERIC_H 1#ifndef DECOMPRESS_GENERIC_H
2#define DECOMPRESS_GENERIC_H 2#define DECOMPRESS_GENERIC_H
3 3
4typedef int (*decompress_fn) (unsigned char *inbuf, int len, 4typedef int (*decompress_fn) (unsigned char *inbuf, long len,
5 int(*fill)(void*, unsigned int), 5 long (*fill)(void*, unsigned long),
6 int(*flush)(void*, unsigned int), 6 long (*flush)(void*, unsigned long),
7 unsigned char *outbuf, 7 unsigned char *outbuf,
8 int *posp, 8 long *posp,
9 void(*error)(char *x)); 9 void(*error)(char *x));
10 10
11/* inbuf - input buffer 11/* inbuf - input buffer
@@ -33,7 +33,7 @@ typedef int (*decompress_fn) (unsigned char *inbuf, int len,
33 33
34 34
35/* Utility routine to detect the decompression method */ 35/* Utility routine to detect the decompression method */
36decompress_fn decompress_method(const unsigned char *inbuf, int len, 36decompress_fn decompress_method(const unsigned char *inbuf, long len,
37 const char **name); 37 const char **name);
38 38
39#endif 39#endif
diff --git a/include/linux/decompress/inflate.h b/include/linux/decompress/inflate.h
index 1d0aedef9822..e4f411fdbd24 100644
--- a/include/linux/decompress/inflate.h
+++ b/include/linux/decompress/inflate.h
@@ -1,10 +1,10 @@
1#ifndef LINUX_DECOMPRESS_INFLATE_H 1#ifndef LINUX_DECOMPRESS_INFLATE_H
2#define LINUX_DECOMPRESS_INFLATE_H 2#define LINUX_DECOMPRESS_INFLATE_H
3 3
4int gunzip(unsigned char *inbuf, int len, 4int gunzip(unsigned char *inbuf, long len,
5 int(*fill)(void*, unsigned int), 5 long (*fill)(void*, unsigned long),
6 int(*flush)(void*, unsigned int), 6 long (*flush)(void*, unsigned long),
7 unsigned char *output, 7 unsigned char *output,
8 int *pos, 8 long *pos,
9 void(*error_fn)(char *x)); 9 void(*error_fn)(char *x));
10#endif 10#endif
diff --git a/include/linux/decompress/unlz4.h b/include/linux/decompress/unlz4.h
index d5b68bf3ec92..3273c2f36496 100644
--- a/include/linux/decompress/unlz4.h
+++ b/include/linux/decompress/unlz4.h
@@ -1,10 +1,10 @@
1#ifndef DECOMPRESS_UNLZ4_H 1#ifndef DECOMPRESS_UNLZ4_H
2#define DECOMPRESS_UNLZ4_H 2#define DECOMPRESS_UNLZ4_H
3 3
4int unlz4(unsigned char *inbuf, int len, 4int unlz4(unsigned char *inbuf, long len,
5 int(*fill)(void*, unsigned int), 5 long (*fill)(void*, unsigned long),
6 int(*flush)(void*, unsigned int), 6 long (*flush)(void*, unsigned long),
7 unsigned char *output, 7 unsigned char *output,
8 int *pos, 8 long *pos,
9 void(*error)(char *x)); 9 void(*error)(char *x));
10#endif 10#endif
diff --git a/include/linux/decompress/unlzma.h b/include/linux/decompress/unlzma.h
index 7796538f1bf4..8a891a193840 100644
--- a/include/linux/decompress/unlzma.h
+++ b/include/linux/decompress/unlzma.h
@@ -1,11 +1,11 @@
1#ifndef DECOMPRESS_UNLZMA_H 1#ifndef DECOMPRESS_UNLZMA_H
2#define DECOMPRESS_UNLZMA_H 2#define DECOMPRESS_UNLZMA_H
3 3
4int unlzma(unsigned char *, int, 4int unlzma(unsigned char *, long,
5 int(*fill)(void*, unsigned int), 5 long (*fill)(void*, unsigned long),
6 int(*flush)(void*, unsigned int), 6 long (*flush)(void*, unsigned long),
7 unsigned char *output, 7 unsigned char *output,
8 int *posp, 8 long *posp,
9 void(*error)(char *x) 9 void(*error)(char *x)
10 ); 10 );
11 11
diff --git a/include/linux/decompress/unlzo.h b/include/linux/decompress/unlzo.h
index 987229752519..af18f95d6570 100644
--- a/include/linux/decompress/unlzo.h
+++ b/include/linux/decompress/unlzo.h
@@ -1,10 +1,10 @@
1#ifndef DECOMPRESS_UNLZO_H 1#ifndef DECOMPRESS_UNLZO_H
2#define DECOMPRESS_UNLZO_H 2#define DECOMPRESS_UNLZO_H
3 3
4int unlzo(unsigned char *inbuf, int len, 4int unlzo(unsigned char *inbuf, long len,
5 int(*fill)(void*, unsigned int), 5 long (*fill)(void*, unsigned long),
6 int(*flush)(void*, unsigned int), 6 long (*flush)(void*, unsigned long),
7 unsigned char *output, 7 unsigned char *output,
8 int *pos, 8 long *pos,
9 void(*error)(char *x)); 9 void(*error)(char *x));
10#endif 10#endif
diff --git a/include/linux/decompress/unxz.h b/include/linux/decompress/unxz.h
index 41728fc6c8a1..f764e2a7201e 100644
--- a/include/linux/decompress/unxz.h
+++ b/include/linux/decompress/unxz.h
@@ -10,10 +10,10 @@
10#ifndef DECOMPRESS_UNXZ_H 10#ifndef DECOMPRESS_UNXZ_H
11#define DECOMPRESS_UNXZ_H 11#define DECOMPRESS_UNXZ_H
12 12
13int unxz(unsigned char *in, int in_size, 13int unxz(unsigned char *in, long in_size,
14 int (*fill)(void *dest, unsigned int size), 14 long (*fill)(void *dest, unsigned long size),
15 int (*flush)(void *src, unsigned int size), 15 long (*flush)(void *src, unsigned long size),
16 unsigned char *out, int *in_used, 16 unsigned char *out, long *in_used,
17 void (*error)(char *x)); 17 void (*error)(char *x));
18 18
19#endif 19#endif
diff --git a/include/linux/efi.h b/include/linux/efi.h
index efc681fd5895..45cb4ffdea62 100644
--- a/include/linux/efi.h
+++ b/include/linux/efi.h
@@ -1156,6 +1156,9 @@ int efivars_sysfs_init(void);
1156#ifdef CONFIG_EFI_RUNTIME_MAP 1156#ifdef CONFIG_EFI_RUNTIME_MAP
1157int efi_runtime_map_init(struct kobject *); 1157int efi_runtime_map_init(struct kobject *);
1158void efi_runtime_map_setup(void *, int, u32); 1158void efi_runtime_map_setup(void *, int, u32);
1159int efi_get_runtime_map_size(void);
1160int efi_get_runtime_map_desc_size(void);
1161int efi_runtime_map_copy(void *buf, size_t bufsz);
1159#else 1162#else
1160static inline int efi_runtime_map_init(struct kobject *kobj) 1163static inline int efi_runtime_map_init(struct kobject *kobj)
1161{ 1164{
@@ -1164,6 +1167,22 @@ static inline int efi_runtime_map_init(struct kobject *kobj)
1164 1167
1165static inline void 1168static inline void
1166efi_runtime_map_setup(void *map, int nr_entries, u32 desc_size) {} 1169efi_runtime_map_setup(void *map, int nr_entries, u32 desc_size) {}
1170
1171static inline int efi_get_runtime_map_size(void)
1172{
1173 return 0;
1174}
1175
1176static inline int efi_get_runtime_map_desc_size(void)
1177{
1178 return 0;
1179}
1180
1181static inline int efi_runtime_map_copy(void *buf, size_t bufsz)
1182{
1183 return 0;
1184}
1185
1167#endif 1186#endif
1168 1187
1169/* prototypes shared between arch specific and generic stub code */ 1188/* prototypes shared between arch specific and generic stub code */
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 1ab6c6913040..f0890e4a7c25 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -387,7 +387,7 @@ struct address_space {
387 struct inode *host; /* owner: inode, block_device */ 387 struct inode *host; /* owner: inode, block_device */
388 struct radix_tree_root page_tree; /* radix tree of all pages */ 388 struct radix_tree_root page_tree; /* radix tree of all pages */
389 spinlock_t tree_lock; /* and lock protecting it */ 389 spinlock_t tree_lock; /* and lock protecting it */
390 unsigned int i_mmap_writable;/* count VM_SHARED mappings */ 390 atomic_t i_mmap_writable;/* count VM_SHARED mappings */
391 struct rb_root i_mmap; /* tree of private and shared mappings */ 391 struct rb_root i_mmap; /* tree of private and shared mappings */
392 struct list_head i_mmap_nonlinear;/*list VM_NONLINEAR mappings */ 392 struct list_head i_mmap_nonlinear;/*list VM_NONLINEAR mappings */
393 struct mutex i_mmap_mutex; /* protect tree, count, list */ 393 struct mutex i_mmap_mutex; /* protect tree, count, list */
@@ -470,10 +470,35 @@ static inline int mapping_mapped(struct address_space *mapping)
470 * Note that i_mmap_writable counts all VM_SHARED vmas: do_mmap_pgoff 470 * Note that i_mmap_writable counts all VM_SHARED vmas: do_mmap_pgoff
471 * marks vma as VM_SHARED if it is shared, and the file was opened for 471 * marks vma as VM_SHARED if it is shared, and the file was opened for
472 * writing i.e. vma may be mprotected writable even if now readonly. 472 * writing i.e. vma may be mprotected writable even if now readonly.
473 *
474 * If i_mmap_writable is negative, no new writable mappings are allowed. You
475 * can only deny writable mappings, if none exists right now.
473 */ 476 */
474static inline int mapping_writably_mapped(struct address_space *mapping) 477static inline int mapping_writably_mapped(struct address_space *mapping)
475{ 478{
476 return mapping->i_mmap_writable != 0; 479 return atomic_read(&mapping->i_mmap_writable) > 0;
480}
481
482static inline int mapping_map_writable(struct address_space *mapping)
483{
484 return atomic_inc_unless_negative(&mapping->i_mmap_writable) ?
485 0 : -EPERM;
486}
487
488static inline void mapping_unmap_writable(struct address_space *mapping)
489{
490 atomic_dec(&mapping->i_mmap_writable);
491}
492
493static inline int mapping_deny_writable(struct address_space *mapping)
494{
495 return atomic_dec_unless_positive(&mapping->i_mmap_writable) ?
496 0 : -EBUSY;
497}
498
499static inline void mapping_allow_writable(struct address_space *mapping)
500{
501 atomic_inc(&mapping->i_mmap_writable);
477} 502}
478 503
479/* 504/*
diff --git a/include/linux/ioport.h b/include/linux/ioport.h
index 5e3a906cc089..142ec544167c 100644
--- a/include/linux/ioport.h
+++ b/include/linux/ioport.h
@@ -237,6 +237,12 @@ extern int iomem_is_exclusive(u64 addr);
237extern int 237extern int
238walk_system_ram_range(unsigned long start_pfn, unsigned long nr_pages, 238walk_system_ram_range(unsigned long start_pfn, unsigned long nr_pages,
239 void *arg, int (*func)(unsigned long, unsigned long, void *)); 239 void *arg, int (*func)(unsigned long, unsigned long, void *));
240extern int
241walk_system_ram_res(u64 start, u64 end, void *arg,
242 int (*func)(u64, u64, void *));
243extern int
244walk_iomem_res(char *name, unsigned long flags, u64 start, u64 end, void *arg,
245 int (*func)(u64, u64, void *));
240 246
241/* True if any part of r1 overlaps r2 */ 247/* True if any part of r1 overlaps r2 */
242static inline bool resource_overlaps(struct resource *r1, struct resource *r2) 248static inline bool resource_overlaps(struct resource *r1, struct resource *r2)
diff --git a/include/linux/kernel.h b/include/linux/kernel.h
index 3dc22abbc68a..31ae66f34235 100644
--- a/include/linux/kernel.h
+++ b/include/linux/kernel.h
@@ -470,6 +470,7 @@ extern enum system_states {
470#define TAINT_FIRMWARE_WORKAROUND 11 470#define TAINT_FIRMWARE_WORKAROUND 11
471#define TAINT_OOT_MODULE 12 471#define TAINT_OOT_MODULE 12
472#define TAINT_UNSIGNED_MODULE 13 472#define TAINT_UNSIGNED_MODULE 13
473#define TAINT_SOFTLOCKUP 14
473 474
474extern const char hex_asc[]; 475extern const char hex_asc[];
475#define hex_asc_lo(x) hex_asc[((x) & 0x0f)] 476#define hex_asc_lo(x) hex_asc[((x) & 0x0f)]
diff --git a/include/linux/kexec.h b/include/linux/kexec.h
index a75641930049..4b2a0e11cc5b 100644
--- a/include/linux/kexec.h
+++ b/include/linux/kexec.h
@@ -10,6 +10,7 @@
10#include <linux/ioport.h> 10#include <linux/ioport.h>
11#include <linux/elfcore.h> 11#include <linux/elfcore.h>
12#include <linux/elf.h> 12#include <linux/elf.h>
13#include <linux/module.h>
13#include <asm/kexec.h> 14#include <asm/kexec.h>
14 15
15/* Verify architecture specific macros are defined */ 16/* Verify architecture specific macros are defined */
@@ -69,7 +70,18 @@ typedef unsigned long kimage_entry_t;
69#define IND_SOURCE 0x8 70#define IND_SOURCE 0x8
70 71
71struct kexec_segment { 72struct kexec_segment {
72 void __user *buf; 73 /*
74 * This pointer can point to user memory if kexec_load() system
75 * call is used or will point to kernel memory if
76 * kexec_file_load() system call is used.
77 *
78 * Use ->buf when expecting to deal with user memory and use ->kbuf
79 * when expecting to deal with kernel memory.
80 */
81 union {
82 void __user *buf;
83 void *kbuf;
84 };
73 size_t bufsz; 85 size_t bufsz;
74 unsigned long mem; 86 unsigned long mem;
75 size_t memsz; 87 size_t memsz;
@@ -84,6 +96,27 @@ struct compat_kexec_segment {
84}; 96};
85#endif 97#endif
86 98
99struct kexec_sha_region {
100 unsigned long start;
101 unsigned long len;
102};
103
104struct purgatory_info {
105 /* Pointer to elf header of read only purgatory */
106 Elf_Ehdr *ehdr;
107
108 /* Pointer to purgatory sechdrs which are modifiable */
109 Elf_Shdr *sechdrs;
110 /*
111 * Temporary buffer location where purgatory is loaded and relocated
112 * This memory can be freed post image load
113 */
114 void *purgatory_buf;
115
116 /* Address where purgatory is finally loaded and is executed from */
117 unsigned long purgatory_load_addr;
118};
119
87struct kimage { 120struct kimage {
88 kimage_entry_t head; 121 kimage_entry_t head;
89 kimage_entry_t *entry; 122 kimage_entry_t *entry;
@@ -100,7 +133,7 @@ struct kimage {
100 133
101 struct list_head control_pages; 134 struct list_head control_pages;
102 struct list_head dest_pages; 135 struct list_head dest_pages;
103 struct list_head unuseable_pages; 136 struct list_head unusable_pages;
104 137
105 /* Address of next control page to allocate for crash kernels. */ 138 /* Address of next control page to allocate for crash kernels. */
106 unsigned long control_page; 139 unsigned long control_page;
@@ -110,13 +143,63 @@ struct kimage {
110#define KEXEC_TYPE_DEFAULT 0 143#define KEXEC_TYPE_DEFAULT 0
111#define KEXEC_TYPE_CRASH 1 144#define KEXEC_TYPE_CRASH 1
112 unsigned int preserve_context : 1; 145 unsigned int preserve_context : 1;
146 /* If set, we are using file mode kexec syscall */
147 unsigned int file_mode:1;
113 148
114#ifdef ARCH_HAS_KIMAGE_ARCH 149#ifdef ARCH_HAS_KIMAGE_ARCH
115 struct kimage_arch arch; 150 struct kimage_arch arch;
116#endif 151#endif
152
153 /* Additional fields for file based kexec syscall */
154 void *kernel_buf;
155 unsigned long kernel_buf_len;
156
157 void *initrd_buf;
158 unsigned long initrd_buf_len;
159
160 char *cmdline_buf;
161 unsigned long cmdline_buf_len;
162
163 /* File operations provided by image loader */
164 struct kexec_file_ops *fops;
165
166 /* Image loader handling the kernel can store a pointer here */
167 void *image_loader_data;
168
169 /* Information for loading purgatory */
170 struct purgatory_info purgatory_info;
117}; 171};
118 172
173/*
174 * Keeps track of buffer parameters as provided by caller for requesting
175 * memory placement of buffer.
176 */
177struct kexec_buf {
178 struct kimage *image;
179 char *buffer;
180 unsigned long bufsz;
181 unsigned long memsz;
182 unsigned long buf_align;
183 unsigned long buf_min;
184 unsigned long buf_max;
185 bool top_down; /* allocate from top of memory hole */
186};
119 187
188typedef int (kexec_probe_t)(const char *kernel_buf, unsigned long kernel_size);
189typedef void *(kexec_load_t)(struct kimage *image, char *kernel_buf,
190 unsigned long kernel_len, char *initrd,
191 unsigned long initrd_len, char *cmdline,
192 unsigned long cmdline_len);
193typedef int (kexec_cleanup_t)(void *loader_data);
194typedef int (kexec_verify_sig_t)(const char *kernel_buf,
195 unsigned long kernel_len);
196
197struct kexec_file_ops {
198 kexec_probe_t *probe;
199 kexec_load_t *load;
200 kexec_cleanup_t *cleanup;
201 kexec_verify_sig_t *verify_sig;
202};
120 203
121/* kexec interface functions */ 204/* kexec interface functions */
122extern void machine_kexec(struct kimage *image); 205extern void machine_kexec(struct kimage *image);
@@ -127,8 +210,21 @@ extern asmlinkage long sys_kexec_load(unsigned long entry,
127 struct kexec_segment __user *segments, 210 struct kexec_segment __user *segments,
128 unsigned long flags); 211 unsigned long flags);
129extern int kernel_kexec(void); 212extern int kernel_kexec(void);
213extern int kexec_add_buffer(struct kimage *image, char *buffer,
214 unsigned long bufsz, unsigned long memsz,
215 unsigned long buf_align, unsigned long buf_min,
216 unsigned long buf_max, bool top_down,
217 unsigned long *load_addr);
130extern struct page *kimage_alloc_control_pages(struct kimage *image, 218extern struct page *kimage_alloc_control_pages(struct kimage *image,
131 unsigned int order); 219 unsigned int order);
220extern int kexec_load_purgatory(struct kimage *image, unsigned long min,
221 unsigned long max, int top_down,
222 unsigned long *load_addr);
223extern int kexec_purgatory_get_set_symbol(struct kimage *image,
224 const char *name, void *buf,
225 unsigned int size, bool get_value);
226extern void *kexec_purgatory_get_symbol_addr(struct kimage *image,
227 const char *name);
132extern void crash_kexec(struct pt_regs *); 228extern void crash_kexec(struct pt_regs *);
133int kexec_should_crash(struct task_struct *); 229int kexec_should_crash(struct task_struct *);
134void crash_save_cpu(struct pt_regs *regs, int cpu); 230void crash_save_cpu(struct pt_regs *regs, int cpu);
@@ -177,6 +273,10 @@ extern int kexec_load_disabled;
177#define KEXEC_FLAGS (KEXEC_ON_CRASH | KEXEC_PRESERVE_CONTEXT) 273#define KEXEC_FLAGS (KEXEC_ON_CRASH | KEXEC_PRESERVE_CONTEXT)
178#endif 274#endif
179 275
276/* List of defined/legal kexec file flags */
277#define KEXEC_FILE_FLAGS (KEXEC_FILE_UNLOAD | KEXEC_FILE_ON_CRASH | \
278 KEXEC_FILE_NO_INITRAMFS)
279
180#define VMCOREINFO_BYTES (4096) 280#define VMCOREINFO_BYTES (4096)
181#define VMCOREINFO_NOTE_NAME "VMCOREINFO" 281#define VMCOREINFO_NOTE_NAME "VMCOREINFO"
182#define VMCOREINFO_NOTE_NAME_BYTES ALIGN(sizeof(VMCOREINFO_NOTE_NAME), 4) 282#define VMCOREINFO_NOTE_NAME_BYTES ALIGN(sizeof(VMCOREINFO_NOTE_NAME), 4)
diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index eb65d29516ca..e0752d204d9e 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -54,39 +54,20 @@ struct mem_cgroup_reclaim_cookie {
54}; 54};
55 55
56#ifdef CONFIG_MEMCG 56#ifdef CONFIG_MEMCG
57/* 57int mem_cgroup_try_charge(struct page *page, struct mm_struct *mm,
58 * All "charge" functions with gfp_mask should use GFP_KERNEL or 58 gfp_t gfp_mask, struct mem_cgroup **memcgp);
59 * (gfp_mask & GFP_RECLAIM_MASK). In current implementatin, memcg doesn't 59void mem_cgroup_commit_charge(struct page *page, struct mem_cgroup *memcg,
60 * alloc memory but reclaims memory from all available zones. So, "where I want 60 bool lrucare);
61 * memory from" bits of gfp_mask has no meaning. So any bits of that field is 61void mem_cgroup_cancel_charge(struct page *page, struct mem_cgroup *memcg);
62 * available but adding a rule is better. charge functions' gfp_mask should 62void mem_cgroup_uncharge(struct page *page);
63 * be set to GFP_KERNEL or gfp_mask & GFP_RECLAIM_MASK for avoiding ambiguous 63void mem_cgroup_uncharge_list(struct list_head *page_list);
64 * codes.
65 * (Of course, if memcg does memory allocation in future, GFP_KERNEL is sane.)
66 */
67 64
68extern int mem_cgroup_charge_anon(struct page *page, struct mm_struct *mm, 65void mem_cgroup_migrate(struct page *oldpage, struct page *newpage,
69 gfp_t gfp_mask); 66 bool lrucare);
70/* for swap handling */
71extern int mem_cgroup_try_charge_swapin(struct mm_struct *mm,
72 struct page *page, gfp_t mask, struct mem_cgroup **memcgp);
73extern void mem_cgroup_commit_charge_swapin(struct page *page,
74 struct mem_cgroup *memcg);
75extern void mem_cgroup_cancel_charge_swapin(struct mem_cgroup *memcg);
76
77extern int mem_cgroup_charge_file(struct page *page, struct mm_struct *mm,
78 gfp_t gfp_mask);
79 67
80struct lruvec *mem_cgroup_zone_lruvec(struct zone *, struct mem_cgroup *); 68struct lruvec *mem_cgroup_zone_lruvec(struct zone *, struct mem_cgroup *);
81struct lruvec *mem_cgroup_page_lruvec(struct page *, struct zone *); 69struct lruvec *mem_cgroup_page_lruvec(struct page *, struct zone *);
82 70
83/* For coalescing uncharge for reducing memcg' overhead*/
84extern void mem_cgroup_uncharge_start(void);
85extern void mem_cgroup_uncharge_end(void);
86
87extern void mem_cgroup_uncharge_page(struct page *page);
88extern void mem_cgroup_uncharge_cache_page(struct page *page);
89
90bool __mem_cgroup_same_or_subtree(const struct mem_cgroup *root_memcg, 71bool __mem_cgroup_same_or_subtree(const struct mem_cgroup *root_memcg,
91 struct mem_cgroup *memcg); 72 struct mem_cgroup *memcg);
92bool task_in_mem_cgroup(struct task_struct *task, 73bool task_in_mem_cgroup(struct task_struct *task,
@@ -113,12 +94,6 @@ bool mm_match_cgroup(const struct mm_struct *mm, const struct mem_cgroup *memcg)
113 94
114extern struct cgroup_subsys_state *mem_cgroup_css(struct mem_cgroup *memcg); 95extern struct cgroup_subsys_state *mem_cgroup_css(struct mem_cgroup *memcg);
115 96
116extern void
117mem_cgroup_prepare_migration(struct page *page, struct page *newpage,
118 struct mem_cgroup **memcgp);
119extern void mem_cgroup_end_migration(struct mem_cgroup *memcg,
120 struct page *oldpage, struct page *newpage, bool migration_ok);
121
122struct mem_cgroup *mem_cgroup_iter(struct mem_cgroup *, 97struct mem_cgroup *mem_cgroup_iter(struct mem_cgroup *,
123 struct mem_cgroup *, 98 struct mem_cgroup *,
124 struct mem_cgroup_reclaim_cookie *); 99 struct mem_cgroup_reclaim_cookie *);
@@ -133,8 +108,6 @@ unsigned long mem_cgroup_get_lru_size(struct lruvec *lruvec, enum lru_list);
133void mem_cgroup_update_lru_size(struct lruvec *, enum lru_list, int); 108void mem_cgroup_update_lru_size(struct lruvec *, enum lru_list, int);
134extern void mem_cgroup_print_oom_info(struct mem_cgroup *memcg, 109extern void mem_cgroup_print_oom_info(struct mem_cgroup *memcg,
135 struct task_struct *p); 110 struct task_struct *p);
136extern void mem_cgroup_replace_page_cache(struct page *oldpage,
137 struct page *newpage);
138 111
139static inline void mem_cgroup_oom_enable(void) 112static inline void mem_cgroup_oom_enable(void)
140{ 113{
@@ -233,46 +206,36 @@ void mem_cgroup_print_bad_page(struct page *page);
233#else /* CONFIG_MEMCG */ 206#else /* CONFIG_MEMCG */
234struct mem_cgroup; 207struct mem_cgroup;
235 208
236static inline int mem_cgroup_charge_anon(struct page *page, 209static inline int mem_cgroup_try_charge(struct page *page, struct mm_struct *mm,
237 struct mm_struct *mm, gfp_t gfp_mask) 210 gfp_t gfp_mask,
238{ 211 struct mem_cgroup **memcgp)
239 return 0;
240}
241
242static inline int mem_cgroup_charge_file(struct page *page,
243 struct mm_struct *mm, gfp_t gfp_mask)
244{
245 return 0;
246}
247
248static inline int mem_cgroup_try_charge_swapin(struct mm_struct *mm,
249 struct page *page, gfp_t gfp_mask, struct mem_cgroup **memcgp)
250{ 212{
213 *memcgp = NULL;
251 return 0; 214 return 0;
252} 215}
253 216
254static inline void mem_cgroup_commit_charge_swapin(struct page *page, 217static inline void mem_cgroup_commit_charge(struct page *page,
255 struct mem_cgroup *memcg) 218 struct mem_cgroup *memcg,
256{ 219 bool lrucare)
257}
258
259static inline void mem_cgroup_cancel_charge_swapin(struct mem_cgroup *memcg)
260{ 220{
261} 221}
262 222
263static inline void mem_cgroup_uncharge_start(void) 223static inline void mem_cgroup_cancel_charge(struct page *page,
224 struct mem_cgroup *memcg)
264{ 225{
265} 226}
266 227
267static inline void mem_cgroup_uncharge_end(void) 228static inline void mem_cgroup_uncharge(struct page *page)
268{ 229{
269} 230}
270 231
271static inline void mem_cgroup_uncharge_page(struct page *page) 232static inline void mem_cgroup_uncharge_list(struct list_head *page_list)
272{ 233{
273} 234}
274 235
275static inline void mem_cgroup_uncharge_cache_page(struct page *page) 236static inline void mem_cgroup_migrate(struct page *oldpage,
237 struct page *newpage,
238 bool lrucare)
276{ 239{
277} 240}
278 241
@@ -311,17 +274,6 @@ static inline struct cgroup_subsys_state
311 return NULL; 274 return NULL;
312} 275}
313 276
314static inline void
315mem_cgroup_prepare_migration(struct page *page, struct page *newpage,
316 struct mem_cgroup **memcgp)
317{
318}
319
320static inline void mem_cgroup_end_migration(struct mem_cgroup *memcg,
321 struct page *oldpage, struct page *newpage, bool migration_ok)
322{
323}
324
325static inline struct mem_cgroup * 277static inline struct mem_cgroup *
326mem_cgroup_iter(struct mem_cgroup *root, 278mem_cgroup_iter(struct mem_cgroup *root,
327 struct mem_cgroup *prev, 279 struct mem_cgroup *prev,
@@ -417,10 +369,6 @@ static inline
417void mem_cgroup_count_vm_event(struct mm_struct *mm, enum vm_event_item idx) 369void mem_cgroup_count_vm_event(struct mm_struct *mm, enum vm_event_item idx)
418{ 370{
419} 371}
420static inline void mem_cgroup_replace_page_cache(struct page *oldpage,
421 struct page *newpage)
422{
423}
424#endif /* CONFIG_MEMCG */ 372#endif /* CONFIG_MEMCG */
425 373
426#if !defined(CONFIG_MEMCG) || !defined(CONFIG_DEBUG_VM) 374#if !defined(CONFIG_MEMCG) || !defined(CONFIG_DEBUG_VM)
diff --git a/include/linux/mm.h b/include/linux/mm.h
index e03dd29145a0..8981cc882ed2 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -2014,13 +2014,20 @@ static inline bool kernel_page_present(struct page *page) { return true; }
2014#endif /* CONFIG_HIBERNATION */ 2014#endif /* CONFIG_HIBERNATION */
2015#endif 2015#endif
2016 2016
2017#ifdef __HAVE_ARCH_GATE_AREA
2017extern struct vm_area_struct *get_gate_vma(struct mm_struct *mm); 2018extern struct vm_area_struct *get_gate_vma(struct mm_struct *mm);
2018#ifdef __HAVE_ARCH_GATE_AREA 2019extern int in_gate_area_no_mm(unsigned long addr);
2019int in_gate_area_no_mm(unsigned long addr); 2020extern int in_gate_area(struct mm_struct *mm, unsigned long addr);
2020int in_gate_area(struct mm_struct *mm, unsigned long addr);
2021#else 2021#else
2022int in_gate_area_no_mm(unsigned long addr); 2022static inline struct vm_area_struct *get_gate_vma(struct mm_struct *mm)
2023#define in_gate_area(mm, addr) ({(void)mm; in_gate_area_no_mm(addr);}) 2023{
2024 return NULL;
2025}
2026static inline int in_gate_area_no_mm(unsigned long addr) { return 0; }
2027static inline int in_gate_area(struct mm_struct *mm, unsigned long addr)
2028{
2029 return 0;
2030}
2024#endif /* __HAVE_ARCH_GATE_AREA */ 2031#endif /* __HAVE_ARCH_GATE_AREA */
2025 2032
2026#ifdef CONFIG_SYSCTL 2033#ifdef CONFIG_SYSCTL
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index 796deac19fcf..6e0b286649f1 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -461,6 +461,7 @@ static inline void mm_init_cpumask(struct mm_struct *mm)
461#ifdef CONFIG_CPUMASK_OFFSTACK 461#ifdef CONFIG_CPUMASK_OFFSTACK
462 mm->cpu_vm_mask_var = &mm->cpumask_allocation; 462 mm->cpu_vm_mask_var = &mm->cpumask_allocation;
463#endif 463#endif
464 cpumask_clear(mm->cpu_vm_mask_var);
464} 465}
465 466
466/* Future-safe accessor for struct mm_struct's cpu_vm_mask. */ 467/* Future-safe accessor for struct mm_struct's cpu_vm_mask. */
diff --git a/include/linux/page_cgroup.h b/include/linux/page_cgroup.h
index 777a524716db..5c831f1eca79 100644
--- a/include/linux/page_cgroup.h
+++ b/include/linux/page_cgroup.h
@@ -3,17 +3,15 @@
3 3
4enum { 4enum {
5 /* flags for mem_cgroup */ 5 /* flags for mem_cgroup */
6 PCG_LOCK, /* Lock for pc->mem_cgroup and following bits. */ 6 PCG_USED = 0x01, /* This page is charged to a memcg */
7 PCG_USED, /* this object is in use. */ 7 PCG_MEM = 0x02, /* This page holds a memory charge */
8 PCG_MIGRATION, /* under page migration */ 8 PCG_MEMSW = 0x04, /* This page holds a memory+swap charge */
9 __NR_PCG_FLAGS,
10}; 9};
11 10
12#ifndef __GENERATING_BOUNDS_H 11struct pglist_data;
13#include <generated/bounds.h>
14 12
15#ifdef CONFIG_MEMCG 13#ifdef CONFIG_MEMCG
16#include <linux/bit_spinlock.h> 14struct mem_cgroup;
17 15
18/* 16/*
19 * Page Cgroup can be considered as an extended mem_map. 17 * Page Cgroup can be considered as an extended mem_map.
@@ -27,65 +25,30 @@ struct page_cgroup {
27 struct mem_cgroup *mem_cgroup; 25 struct mem_cgroup *mem_cgroup;
28}; 26};
29 27
30void __meminit pgdat_page_cgroup_init(struct pglist_data *pgdat); 28extern void pgdat_page_cgroup_init(struct pglist_data *pgdat);
31 29
32#ifdef CONFIG_SPARSEMEM 30#ifdef CONFIG_SPARSEMEM
33static inline void __init page_cgroup_init_flatmem(void) 31static inline void page_cgroup_init_flatmem(void)
34{ 32{
35} 33}
36extern void __init page_cgroup_init(void); 34extern void page_cgroup_init(void);
37#else 35#else
38void __init page_cgroup_init_flatmem(void); 36extern void page_cgroup_init_flatmem(void);
39static inline void __init page_cgroup_init(void) 37static inline void page_cgroup_init(void)
40{ 38{
41} 39}
42#endif 40#endif
43 41
44struct page_cgroup *lookup_page_cgroup(struct page *page); 42struct page_cgroup *lookup_page_cgroup(struct page *page);
45struct page *lookup_cgroup_page(struct page_cgroup *pc);
46
47#define TESTPCGFLAG(uname, lname) \
48static inline int PageCgroup##uname(struct page_cgroup *pc) \
49 { return test_bit(PCG_##lname, &pc->flags); }
50
51#define SETPCGFLAG(uname, lname) \
52static inline void SetPageCgroup##uname(struct page_cgroup *pc)\
53 { set_bit(PCG_##lname, &pc->flags); }
54
55#define CLEARPCGFLAG(uname, lname) \
56static inline void ClearPageCgroup##uname(struct page_cgroup *pc) \
57 { clear_bit(PCG_##lname, &pc->flags); }
58
59#define TESTCLEARPCGFLAG(uname, lname) \
60static inline int TestClearPageCgroup##uname(struct page_cgroup *pc) \
61 { return test_and_clear_bit(PCG_##lname, &pc->flags); }
62
63TESTPCGFLAG(Used, USED)
64CLEARPCGFLAG(Used, USED)
65SETPCGFLAG(Used, USED)
66
67SETPCGFLAG(Migration, MIGRATION)
68CLEARPCGFLAG(Migration, MIGRATION)
69TESTPCGFLAG(Migration, MIGRATION)
70 43
71static inline void lock_page_cgroup(struct page_cgroup *pc) 44static inline int PageCgroupUsed(struct page_cgroup *pc)
72{ 45{
73 /* 46 return !!(pc->flags & PCG_USED);
74 * Don't take this lock in IRQ context.
75 * This lock is for pc->mem_cgroup, USED, MIGRATION
76 */
77 bit_spin_lock(PCG_LOCK, &pc->flags);
78} 47}
79 48#else /* !CONFIG_MEMCG */
80static inline void unlock_page_cgroup(struct page_cgroup *pc)
81{
82 bit_spin_unlock(PCG_LOCK, &pc->flags);
83}
84
85#else /* CONFIG_MEMCG */
86struct page_cgroup; 49struct page_cgroup;
87 50
88static inline void __meminit pgdat_page_cgroup_init(struct pglist_data *pgdat) 51static inline void pgdat_page_cgroup_init(struct pglist_data *pgdat)
89{ 52{
90} 53}
91 54
@@ -98,10 +61,9 @@ static inline void page_cgroup_init(void)
98{ 61{
99} 62}
100 63
101static inline void __init page_cgroup_init_flatmem(void) 64static inline void page_cgroup_init_flatmem(void)
102{ 65{
103} 66}
104
105#endif /* CONFIG_MEMCG */ 67#endif /* CONFIG_MEMCG */
106 68
107#include <linux/swap.h> 69#include <linux/swap.h>
@@ -140,6 +102,4 @@ static inline void swap_cgroup_swapoff(int type)
140 102
141#endif /* CONFIG_MEMCG_SWAP */ 103#endif /* CONFIG_MEMCG_SWAP */
142 104
143#endif /* !__GENERATING_BOUNDS_H */
144
145#endif /* __LINUX_PAGE_CGROUP_H */ 105#endif /* __LINUX_PAGE_CGROUP_H */
diff --git a/include/linux/rio_drv.h b/include/linux/rio_drv.h
index 5059994fe297..9fc2f213e74f 100644
--- a/include/linux/rio_drv.h
+++ b/include/linux/rio_drv.h
@@ -384,11 +384,16 @@ void rio_dev_put(struct rio_dev *);
384 384
385#ifdef CONFIG_RAPIDIO_DMA_ENGINE 385#ifdef CONFIG_RAPIDIO_DMA_ENGINE
386extern struct dma_chan *rio_request_dma(struct rio_dev *rdev); 386extern struct dma_chan *rio_request_dma(struct rio_dev *rdev);
387extern struct dma_chan *rio_request_mport_dma(struct rio_mport *mport);
387extern void rio_release_dma(struct dma_chan *dchan); 388extern void rio_release_dma(struct dma_chan *dchan);
388extern struct dma_async_tx_descriptor *rio_dma_prep_slave_sg( 389extern struct dma_async_tx_descriptor *rio_dma_prep_slave_sg(
389 struct rio_dev *rdev, struct dma_chan *dchan, 390 struct rio_dev *rdev, struct dma_chan *dchan,
390 struct rio_dma_data *data, 391 struct rio_dma_data *data,
391 enum dma_transfer_direction direction, unsigned long flags); 392 enum dma_transfer_direction direction, unsigned long flags);
393extern struct dma_async_tx_descriptor *rio_dma_prep_xfer(
394 struct dma_chan *dchan, u16 destid,
395 struct rio_dma_data *data,
396 enum dma_transfer_direction direction, unsigned long flags);
392#endif 397#endif
393 398
394/** 399/**
diff --git a/include/linux/scatterlist.h b/include/linux/scatterlist.h
index f4ec8bbcb372..ed8f9e70df9b 100644
--- a/include/linux/scatterlist.h
+++ b/include/linux/scatterlist.h
@@ -136,7 +136,7 @@ static inline void sg_set_buf(struct scatterlist *sg, const void *buf,
136static inline void sg_chain(struct scatterlist *prv, unsigned int prv_nents, 136static inline void sg_chain(struct scatterlist *prv, unsigned int prv_nents,
137 struct scatterlist *sgl) 137 struct scatterlist *sgl)
138{ 138{
139#ifndef ARCH_HAS_SG_CHAIN 139#ifndef CONFIG_ARCH_HAS_SG_CHAIN
140 BUG(); 140 BUG();
141#endif 141#endif
142 142
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 7c19d552dc3f..db2f6474e95e 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -33,6 +33,7 @@ struct sched_param {
33 33
34#include <linux/smp.h> 34#include <linux/smp.h>
35#include <linux/sem.h> 35#include <linux/sem.h>
36#include <linux/shm.h>
36#include <linux/signal.h> 37#include <linux/signal.h>
37#include <linux/compiler.h> 38#include <linux/compiler.h>
38#include <linux/completion.h> 39#include <linux/completion.h>
@@ -1385,6 +1386,7 @@ struct task_struct {
1385#ifdef CONFIG_SYSVIPC 1386#ifdef CONFIG_SYSVIPC
1386/* ipc stuff */ 1387/* ipc stuff */
1387 struct sysv_sem sysvsem; 1388 struct sysv_sem sysvsem;
1389 struct sysv_shm sysvshm;
1388#endif 1390#endif
1389#ifdef CONFIG_DETECT_HUNG_TASK 1391#ifdef CONFIG_DETECT_HUNG_TASK
1390/* hung task detection */ 1392/* hung task detection */
@@ -1628,12 +1630,6 @@ struct task_struct {
1628 unsigned long trace_recursion; 1630 unsigned long trace_recursion;
1629#endif /* CONFIG_TRACING */ 1631#endif /* CONFIG_TRACING */
1630#ifdef CONFIG_MEMCG /* memcg uses this to do batch job */ 1632#ifdef CONFIG_MEMCG /* memcg uses this to do batch job */
1631 struct memcg_batch_info {
1632 int do_batch; /* incremented when batch uncharge started */
1633 struct mem_cgroup *memcg; /* target memcg of uncharge */
1634 unsigned long nr_pages; /* uncharged usage */
1635 unsigned long memsw_nr_pages; /* uncharged mem+swap usage */
1636 } memcg_batch;
1637 unsigned int memcg_kmem_skip_account; 1633 unsigned int memcg_kmem_skip_account;
1638 struct memcg_oom_info { 1634 struct memcg_oom_info {
1639 struct mem_cgroup *memcg; 1635 struct mem_cgroup *memcg;
@@ -2967,15 +2963,10 @@ static inline void inc_syscw(struct task_struct *tsk)
2967 2963
2968#ifdef CONFIG_MEMCG 2964#ifdef CONFIG_MEMCG
2969extern void mm_update_next_owner(struct mm_struct *mm); 2965extern void mm_update_next_owner(struct mm_struct *mm);
2970extern void mm_init_owner(struct mm_struct *mm, struct task_struct *p);
2971#else 2966#else
2972static inline void mm_update_next_owner(struct mm_struct *mm) 2967static inline void mm_update_next_owner(struct mm_struct *mm)
2973{ 2968{
2974} 2969}
2975
2976static inline void mm_init_owner(struct mm_struct *mm, struct task_struct *p)
2977{
2978}
2979#endif /* CONFIG_MEMCG */ 2970#endif /* CONFIG_MEMCG */
2980 2971
2981static inline unsigned long task_rlimit(const struct task_struct *tsk, 2972static inline unsigned long task_rlimit(const struct task_struct *tsk,
diff --git a/include/linux/shm.h b/include/linux/shm.h
index 57d77709fbe2..6fb801686ad6 100644
--- a/include/linux/shm.h
+++ b/include/linux/shm.h
@@ -1,6 +1,7 @@
1#ifndef _LINUX_SHM_H_ 1#ifndef _LINUX_SHM_H_
2#define _LINUX_SHM_H_ 2#define _LINUX_SHM_H_
3 3
4#include <linux/list.h>
4#include <asm/page.h> 5#include <asm/page.h>
5#include <uapi/linux/shm.h> 6#include <uapi/linux/shm.h>
6#include <asm/shmparam.h> 7#include <asm/shmparam.h>
@@ -20,6 +21,7 @@ struct shmid_kernel /* private to the kernel */
20 21
21 /* The task created the shm object. NULL if the task is dead. */ 22 /* The task created the shm object. NULL if the task is dead. */
22 struct task_struct *shm_creator; 23 struct task_struct *shm_creator;
24 struct list_head shm_clist; /* list by creator */
23}; 25};
24 26
25/* shm_mode upper byte flags */ 27/* shm_mode upper byte flags */
@@ -44,11 +46,20 @@ struct shmid_kernel /* private to the kernel */
44#define SHM_HUGE_1GB (30 << SHM_HUGE_SHIFT) 46#define SHM_HUGE_1GB (30 << SHM_HUGE_SHIFT)
45 47
46#ifdef CONFIG_SYSVIPC 48#ifdef CONFIG_SYSVIPC
49struct sysv_shm {
50 struct list_head shm_clist;
51};
52
47long do_shmat(int shmid, char __user *shmaddr, int shmflg, unsigned long *addr, 53long do_shmat(int shmid, char __user *shmaddr, int shmflg, unsigned long *addr,
48 unsigned long shmlba); 54 unsigned long shmlba);
49extern int is_file_shm_hugepages(struct file *file); 55int is_file_shm_hugepages(struct file *file);
50extern void exit_shm(struct task_struct *task); 56void exit_shm(struct task_struct *task);
57#define shm_init_task(task) INIT_LIST_HEAD(&(task)->sysvshm.shm_clist)
51#else 58#else
59struct sysv_shm {
60 /* empty */
61};
62
52static inline long do_shmat(int shmid, char __user *shmaddr, 63static inline long do_shmat(int shmid, char __user *shmaddr,
53 int shmflg, unsigned long *addr, 64 int shmflg, unsigned long *addr,
54 unsigned long shmlba) 65 unsigned long shmlba)
@@ -62,6 +73,9 @@ static inline int is_file_shm_hugepages(struct file *file)
62static inline void exit_shm(struct task_struct *task) 73static inline void exit_shm(struct task_struct *task)
63{ 74{
64} 75}
76static inline void shm_init_task(struct task_struct *task)
77{
78}
65#endif 79#endif
66 80
67#endif /* _LINUX_SHM_H_ */ 81#endif /* _LINUX_SHM_H_ */
diff --git a/include/linux/shmem_fs.h b/include/linux/shmem_fs.h
index 4d1771c2d29f..50777b5b1e4c 100644
--- a/include/linux/shmem_fs.h
+++ b/include/linux/shmem_fs.h
@@ -1,6 +1,7 @@
1#ifndef __SHMEM_FS_H 1#ifndef __SHMEM_FS_H
2#define __SHMEM_FS_H 2#define __SHMEM_FS_H
3 3
4#include <linux/file.h>
4#include <linux/swap.h> 5#include <linux/swap.h>
5#include <linux/mempolicy.h> 6#include <linux/mempolicy.h>
6#include <linux/pagemap.h> 7#include <linux/pagemap.h>
@@ -11,6 +12,7 @@
11 12
12struct shmem_inode_info { 13struct shmem_inode_info {
13 spinlock_t lock; 14 spinlock_t lock;
15 unsigned int seals; /* shmem seals */
14 unsigned long flags; 16 unsigned long flags;
15 unsigned long alloced; /* data pages alloced to file */ 17 unsigned long alloced; /* data pages alloced to file */
16 union { 18 union {
@@ -65,4 +67,19 @@ static inline struct page *shmem_read_mapping_page(
65 mapping_gfp_mask(mapping)); 67 mapping_gfp_mask(mapping));
66} 68}
67 69
70#ifdef CONFIG_TMPFS
71
72extern int shmem_add_seals(struct file *file, unsigned int seals);
73extern int shmem_get_seals(struct file *file);
74extern long shmem_fcntl(struct file *file, unsigned int cmd, unsigned long arg);
75
76#else
77
78static inline long shmem_fcntl(struct file *f, unsigned int c, unsigned long a)
79{
80 return -EINVAL;
81}
82
83#endif
84
68#endif 85#endif
diff --git a/include/linux/swap.h b/include/linux/swap.h
index 1eb64043c076..1b72060f093a 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -320,6 +320,9 @@ extern void swap_setup(void);
320 320
321extern void add_page_to_unevictable_list(struct page *page); 321extern void add_page_to_unevictable_list(struct page *page);
322 322
323extern void lru_cache_add_active_or_unevictable(struct page *page,
324 struct vm_area_struct *vma);
325
323/* linux/mm/vmscan.c */ 326/* linux/mm/vmscan.c */
324extern unsigned long try_to_free_pages(struct zonelist *zonelist, int order, 327extern unsigned long try_to_free_pages(struct zonelist *zonelist, int order,
325 gfp_t gfp_mask, nodemask_t *mask); 328 gfp_t gfp_mask, nodemask_t *mask);
@@ -378,9 +381,13 @@ static inline int mem_cgroup_swappiness(struct mem_cgroup *mem)
378} 381}
379#endif 382#endif
380#ifdef CONFIG_MEMCG_SWAP 383#ifdef CONFIG_MEMCG_SWAP
381extern void mem_cgroup_uncharge_swap(swp_entry_t ent); 384extern void mem_cgroup_swapout(struct page *page, swp_entry_t entry);
385extern void mem_cgroup_uncharge_swap(swp_entry_t entry);
382#else 386#else
383static inline void mem_cgroup_uncharge_swap(swp_entry_t ent) 387static inline void mem_cgroup_swapout(struct page *page, swp_entry_t entry)
388{
389}
390static inline void mem_cgroup_uncharge_swap(swp_entry_t entry)
384{ 391{
385} 392}
386#endif 393#endif
@@ -440,7 +447,7 @@ extern void swap_shmem_alloc(swp_entry_t);
440extern int swap_duplicate(swp_entry_t); 447extern int swap_duplicate(swp_entry_t);
441extern int swapcache_prepare(swp_entry_t); 448extern int swapcache_prepare(swp_entry_t);
442extern void swap_free(swp_entry_t); 449extern void swap_free(swp_entry_t);
443extern void swapcache_free(swp_entry_t, struct page *page); 450extern void swapcache_free(swp_entry_t);
444extern int free_swap_and_cache(swp_entry_t); 451extern int free_swap_and_cache(swp_entry_t);
445extern int swap_type_of(dev_t, sector_t, struct block_device **); 452extern int swap_type_of(dev_t, sector_t, struct block_device **);
446extern unsigned int count_swap_pages(int, int); 453extern unsigned int count_swap_pages(int, int);
@@ -504,7 +511,7 @@ static inline void swap_free(swp_entry_t swp)
504{ 511{
505} 512}
506 513
507static inline void swapcache_free(swp_entry_t swp, struct page *page) 514static inline void swapcache_free(swp_entry_t swp)
508{ 515{
509} 516}
510 517
diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index 701daff5d899..0f86d85a9ce4 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -317,6 +317,10 @@ asmlinkage long sys_restart_syscall(void);
317asmlinkage long sys_kexec_load(unsigned long entry, unsigned long nr_segments, 317asmlinkage long sys_kexec_load(unsigned long entry, unsigned long nr_segments,
318 struct kexec_segment __user *segments, 318 struct kexec_segment __user *segments,
319 unsigned long flags); 319 unsigned long flags);
320asmlinkage long sys_kexec_file_load(int kernel_fd, int initrd_fd,
321 unsigned long cmdline_len,
322 const char __user *cmdline_ptr,
323 unsigned long flags);
320 324
321asmlinkage long sys_exit(int error_code); 325asmlinkage long sys_exit(int error_code);
322asmlinkage long sys_exit_group(int error_code); 326asmlinkage long sys_exit_group(int error_code);
@@ -802,6 +806,7 @@ asmlinkage long sys_timerfd_settime(int ufd, int flags,
802asmlinkage long sys_timerfd_gettime(int ufd, struct itimerspec __user *otmr); 806asmlinkage long sys_timerfd_gettime(int ufd, struct itimerspec __user *otmr);
803asmlinkage long sys_eventfd(unsigned int count); 807asmlinkage long sys_eventfd(unsigned int count);
804asmlinkage long sys_eventfd2(unsigned int count, int flags); 808asmlinkage long sys_eventfd2(unsigned int count, int flags);
809asmlinkage long sys_memfd_create(const char __user *uname_ptr, unsigned int flags);
805asmlinkage long sys_fallocate(int fd, int mode, loff_t offset, loff_t len); 810asmlinkage long sys_fallocate(int fd, int mode, loff_t offset, loff_t len);
806asmlinkage long sys_old_readdir(unsigned int, struct old_linux_dirent __user *, unsigned int); 811asmlinkage long sys_old_readdir(unsigned int, struct old_linux_dirent __user *, unsigned int);
807asmlinkage long sys_pselect6(int, fd_set __user *, fd_set __user *, 812asmlinkage long sys_pselect6(int, fd_set __user *, fd_set __user *,
diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h
index 14a8ff2de11e..b7361f831226 100644
--- a/include/linux/sysctl.h
+++ b/include/linux/sysctl.h
@@ -34,8 +34,6 @@ struct ctl_table_root;
34struct ctl_table_header; 34struct ctl_table_header;
35struct ctl_dir; 35struct ctl_dir;
36 36
37typedef struct ctl_table ctl_table;
38
39typedef int proc_handler (struct ctl_table *ctl, int write, 37typedef int proc_handler (struct ctl_table *ctl, int write,
40 void __user *buffer, size_t *lenp, loff_t *ppos); 38 void __user *buffer, size_t *lenp, loff_t *ppos);
41 39
diff --git a/include/linux/user_namespace.h b/include/linux/user_namespace.h
index 4836ba3c1cd8..e95372654f09 100644
--- a/include/linux/user_namespace.h
+++ b/include/linux/user_namespace.h
@@ -57,9 +57,9 @@ static inline void put_user_ns(struct user_namespace *ns)
57} 57}
58 58
59struct seq_operations; 59struct seq_operations;
60extern struct seq_operations proc_uid_seq_operations; 60extern const struct seq_operations proc_uid_seq_operations;
61extern struct seq_operations proc_gid_seq_operations; 61extern const struct seq_operations proc_gid_seq_operations;
62extern struct seq_operations proc_projid_seq_operations; 62extern const struct seq_operations proc_projid_seq_operations;
63extern ssize_t proc_uid_map_write(struct file *, const char __user *, size_t, loff_t *); 63extern ssize_t proc_uid_map_write(struct file *, const char __user *, size_t, loff_t *);
64extern ssize_t proc_gid_map_write(struct file *, const char __user *, size_t, loff_t *); 64extern ssize_t proc_gid_map_write(struct file *, const char __user *, size_t, loff_t *);
65extern ssize_t proc_projid_map_write(struct file *, const char __user *, size_t, loff_t *); 65extern ssize_t proc_projid_map_write(struct file *, const char __user *, size_t, loff_t *);
diff --git a/include/linux/zlib.h b/include/linux/zlib.h
index 197abb2a54c5..92dbbd3f6c75 100644
--- a/include/linux/zlib.h
+++ b/include/linux/zlib.h
@@ -83,11 +83,11 @@ struct internal_state;
83 83
84typedef struct z_stream_s { 84typedef struct z_stream_s {
85 const Byte *next_in; /* next input byte */ 85 const Byte *next_in; /* next input byte */
86 uInt avail_in; /* number of bytes available at next_in */ 86 uLong avail_in; /* number of bytes available at next_in */
87 uLong total_in; /* total nb of input bytes read so far */ 87 uLong total_in; /* total nb of input bytes read so far */
88 88
89 Byte *next_out; /* next output byte should be put there */ 89 Byte *next_out; /* next output byte should be put there */
90 uInt avail_out; /* remaining free space at next_out */ 90 uLong avail_out; /* remaining free space at next_out */
91 uLong total_out; /* total nb of bytes output so far */ 91 uLong total_out; /* total nb of bytes output so far */
92 92
93 char *msg; /* last error message, NULL if no error */ 93 char *msg; /* last error message, NULL if no error */
diff --git a/include/scsi/scsi.h b/include/scsi/scsi.h
index e6df23cae7be..261e708010da 100644
--- a/include/scsi/scsi.h
+++ b/include/scsi/scsi.h
@@ -31,7 +31,7 @@ enum scsi_timeouts {
31 * Like SCSI_MAX_SG_SEGMENTS, but for archs that have sg chaining. This limit 31 * Like SCSI_MAX_SG_SEGMENTS, but for archs that have sg chaining. This limit
32 * is totally arbitrary, a setting of 2048 will get you at least 8mb ios. 32 * is totally arbitrary, a setting of 2048 will get you at least 8mb ios.
33 */ 33 */
34#ifdef ARCH_HAS_SG_CHAIN 34#ifdef CONFIG_ARCH_HAS_SG_CHAIN
35#define SCSI_MAX_SG_CHAIN_SEGMENTS 2048 35#define SCSI_MAX_SG_CHAIN_SEGMENTS 2048
36#else 36#else
37#define SCSI_MAX_SG_CHAIN_SEGMENTS SCSI_MAX_SG_SEGMENTS 37#define SCSI_MAX_SG_CHAIN_SEGMENTS SCSI_MAX_SG_SEGMENTS
diff --git a/include/uapi/linux/fcntl.h b/include/uapi/linux/fcntl.h
index 074b886c6be0..beed138bd359 100644
--- a/include/uapi/linux/fcntl.h
+++ b/include/uapi/linux/fcntl.h
@@ -28,6 +28,21 @@
28#define F_GETPIPE_SZ (F_LINUX_SPECIFIC_BASE + 8) 28#define F_GETPIPE_SZ (F_LINUX_SPECIFIC_BASE + 8)
29 29
30/* 30/*
31 * Set/Get seals
32 */
33#define F_ADD_SEALS (F_LINUX_SPECIFIC_BASE + 9)
34#define F_GET_SEALS (F_LINUX_SPECIFIC_BASE + 10)
35
36/*
37 * Types of seals
38 */
39#define F_SEAL_SEAL 0x0001 /* prevent further seals from being set */
40#define F_SEAL_SHRINK 0x0002 /* prevent file from shrinking */
41#define F_SEAL_GROW 0x0004 /* prevent file from growing */
42#define F_SEAL_WRITE 0x0008 /* prevent writes */
43/* (1U << 31) is reserved for signed error codes */
44
45/*
31 * Types of directory notifications that may be requested. 46 * Types of directory notifications that may be requested.
32 */ 47 */
33#define DN_ACCESS 0x00000001 /* File accessed */ 48#define DN_ACCESS 0x00000001 /* File accessed */
diff --git a/include/uapi/linux/kexec.h b/include/uapi/linux/kexec.h
index d6629d49a243..6925f5b42f89 100644
--- a/include/uapi/linux/kexec.h
+++ b/include/uapi/linux/kexec.h
@@ -13,6 +13,17 @@
13#define KEXEC_PRESERVE_CONTEXT 0x00000002 13#define KEXEC_PRESERVE_CONTEXT 0x00000002
14#define KEXEC_ARCH_MASK 0xffff0000 14#define KEXEC_ARCH_MASK 0xffff0000
15 15
16/*
17 * Kexec file load interface flags.
18 * KEXEC_FILE_UNLOAD : Unload already loaded kexec/kdump image.
19 * KEXEC_FILE_ON_CRASH : Load/unload operation belongs to kdump image.
20 * KEXEC_FILE_NO_INITRAMFS : No initramfs is being loaded. Ignore the initrd
21 * fd field.
22 */
23#define KEXEC_FILE_UNLOAD 0x00000001
24#define KEXEC_FILE_ON_CRASH 0x00000002
25#define KEXEC_FILE_NO_INITRAMFS 0x00000004
26
16/* These values match the ELF architecture values. 27/* These values match the ELF architecture values.
17 * Unless there is a good reason that should continue to be the case. 28 * Unless there is a good reason that should continue to be the case.
18 */ 29 */
diff --git a/include/uapi/linux/memfd.h b/include/uapi/linux/memfd.h
new file mode 100644
index 000000000000..534e364bda92
--- /dev/null
+++ b/include/uapi/linux/memfd.h
@@ -0,0 +1,8 @@
1#ifndef _UAPI_LINUX_MEMFD_H
2#define _UAPI_LINUX_MEMFD_H
3
4/* flags for memfd_create(2) (unsigned int) */
5#define MFD_CLOEXEC 0x0001U
6#define MFD_ALLOW_SEALING 0x0002U
7
8#endif /* _UAPI_LINUX_MEMFD_H */
diff --git a/init/Kconfig b/init/Kconfig
index a291b7ef4738..44f9ed3dae22 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -783,8 +783,13 @@ endchoice
783 783
784endmenu # "RCU Subsystem" 784endmenu # "RCU Subsystem"
785 785
786config BUILD_BIN2C
787 bool
788 default n
789
786config IKCONFIG 790config IKCONFIG
787 tristate "Kernel .config support" 791 tristate "Kernel .config support"
792 select BUILD_BIN2C
788 ---help--- 793 ---help---
789 This option enables the complete Linux kernel ".config" file 794 This option enables the complete Linux kernel ".config" file
790 contents to be saved in the kernel. It provides documentation 795 contents to be saved in the kernel. It provides documentation
diff --git a/init/do_mounts.c b/init/do_mounts.c
index 82f22885c87e..b6237c31b0e2 100644
--- a/init/do_mounts.c
+++ b/init/do_mounts.c
@@ -539,12 +539,6 @@ void __init prepare_namespace(void)
539{ 539{
540 int is_floppy; 540 int is_floppy;
541 541
542 if (root_delay) {
543 printk(KERN_INFO "Waiting %d sec before mounting root device...\n",
544 root_delay);
545 ssleep(root_delay);
546 }
547
548 /* 542 /*
549 * wait for the known devices to complete their probing 543 * wait for the known devices to complete their probing
550 * 544 *
@@ -571,6 +565,12 @@ void __init prepare_namespace(void)
571 if (initrd_load()) 565 if (initrd_load())
572 goto out; 566 goto out;
573 567
568 if (root_delay) {
569 pr_info("Waiting %d sec before mounting root device...\n",
570 root_delay);
571 ssleep(root_delay);
572 }
573
574 /* wait for any asynchronous scanning to complete */ 574 /* wait for any asynchronous scanning to complete */
575 if ((ROOT_DEV == 0) && root_wait) { 575 if ((ROOT_DEV == 0) && root_wait) {
576 printk(KERN_INFO "Waiting for root device %s...\n", 576 printk(KERN_INFO "Waiting for root device %s...\n",
diff --git a/init/do_mounts_rd.c b/init/do_mounts_rd.c
index a8227022e3a0..e5d059e8aa11 100644
--- a/init/do_mounts_rd.c
+++ b/init/do_mounts_rd.c
@@ -311,9 +311,9 @@ static int exit_code;
311static int decompress_error; 311static int decompress_error;
312static int crd_infd, crd_outfd; 312static int crd_infd, crd_outfd;
313 313
314static int __init compr_fill(void *buf, unsigned int len) 314static long __init compr_fill(void *buf, unsigned long len)
315{ 315{
316 int r = sys_read(crd_infd, buf, len); 316 long r = sys_read(crd_infd, buf, len);
317 if (r < 0) 317 if (r < 0)
318 printk(KERN_ERR "RAMDISK: error while reading compressed data"); 318 printk(KERN_ERR "RAMDISK: error while reading compressed data");
319 else if (r == 0) 319 else if (r == 0)
@@ -321,13 +321,13 @@ static int __init compr_fill(void *buf, unsigned int len)
321 return r; 321 return r;
322} 322}
323 323
324static int __init compr_flush(void *window, unsigned int outcnt) 324static long __init compr_flush(void *window, unsigned long outcnt)
325{ 325{
326 int written = sys_write(crd_outfd, window, outcnt); 326 long written = sys_write(crd_outfd, window, outcnt);
327 if (written != outcnt) { 327 if (written != outcnt) {
328 if (decompress_error == 0) 328 if (decompress_error == 0)
329 printk(KERN_ERR 329 printk(KERN_ERR
330 "RAMDISK: incomplete write (%d != %d)\n", 330 "RAMDISK: incomplete write (%ld != %ld)\n",
331 written, outcnt); 331 written, outcnt);
332 decompress_error = 1; 332 decompress_error = 1;
333 return -1; 333 return -1;
diff --git a/init/initramfs.c b/init/initramfs.c
index a8497fab1c3d..bece48c3461e 100644
--- a/init/initramfs.c
+++ b/init/initramfs.c
@@ -19,6 +19,29 @@
19#include <linux/syscalls.h> 19#include <linux/syscalls.h>
20#include <linux/utime.h> 20#include <linux/utime.h>
21 21
22static ssize_t __init xwrite(int fd, const char *p, size_t count)
23{
24 ssize_t out = 0;
25
26 /* sys_write only can write MAX_RW_COUNT aka 2G-4K bytes at most */
27 while (count) {
28 ssize_t rv = sys_write(fd, p, count);
29
30 if (rv < 0) {
31 if (rv == -EINTR || rv == -EAGAIN)
32 continue;
33 return out ? out : rv;
34 } else if (rv == 0)
35 break;
36
37 p += rv;
38 out += rv;
39 count -= rv;
40 }
41
42 return out;
43}
44
22static __initdata char *message; 45static __initdata char *message;
23static void __init error(char *x) 46static void __init error(char *x)
24{ 47{
@@ -174,7 +197,7 @@ static __initdata enum state {
174} state, next_state; 197} state, next_state;
175 198
176static __initdata char *victim; 199static __initdata char *victim;
177static __initdata unsigned count; 200static unsigned long count __initdata;
178static __initdata loff_t this_header, next_header; 201static __initdata loff_t this_header, next_header;
179 202
180static inline void __init eat(unsigned n) 203static inline void __init eat(unsigned n)
@@ -186,7 +209,7 @@ static inline void __init eat(unsigned n)
186 209
187static __initdata char *vcollected; 210static __initdata char *vcollected;
188static __initdata char *collected; 211static __initdata char *collected;
189static __initdata int remains; 212static long remains __initdata;
190static __initdata char *collect; 213static __initdata char *collect;
191 214
192static void __init read_into(char *buf, unsigned size, enum state next) 215static void __init read_into(char *buf, unsigned size, enum state next)
@@ -213,7 +236,7 @@ static int __init do_start(void)
213 236
214static int __init do_collect(void) 237static int __init do_collect(void)
215{ 238{
216 unsigned n = remains; 239 unsigned long n = remains;
217 if (count < n) 240 if (count < n)
218 n = count; 241 n = count;
219 memcpy(collect, victim, n); 242 memcpy(collect, victim, n);
@@ -346,7 +369,8 @@ static int __init do_name(void)
346static int __init do_copy(void) 369static int __init do_copy(void)
347{ 370{
348 if (count >= body_len) { 371 if (count >= body_len) {
349 sys_write(wfd, victim, body_len); 372 if (xwrite(wfd, victim, body_len) != body_len)
373 error("write error");
350 sys_close(wfd); 374 sys_close(wfd);
351 do_utime(vcollected, mtime); 375 do_utime(vcollected, mtime);
352 kfree(vcollected); 376 kfree(vcollected);
@@ -354,7 +378,8 @@ static int __init do_copy(void)
354 state = SkipIt; 378 state = SkipIt;
355 return 0; 379 return 0;
356 } else { 380 } else {
357 sys_write(wfd, victim, count); 381 if (xwrite(wfd, victim, count) != count)
382 error("write error");
358 body_len -= count; 383 body_len -= count;
359 eat(count); 384 eat(count);
360 return 1; 385 return 1;
@@ -384,7 +409,7 @@ static __initdata int (*actions[])(void) = {
384 [Reset] = do_reset, 409 [Reset] = do_reset,
385}; 410};
386 411
387static int __init write_buffer(char *buf, unsigned len) 412static long __init write_buffer(char *buf, unsigned long len)
388{ 413{
389 count = len; 414 count = len;
390 victim = buf; 415 victim = buf;
@@ -394,11 +419,11 @@ static int __init write_buffer(char *buf, unsigned len)
394 return len - count; 419 return len - count;
395} 420}
396 421
397static int __init flush_buffer(void *bufv, unsigned len) 422static long __init flush_buffer(void *bufv, unsigned long len)
398{ 423{
399 char *buf = (char *) bufv; 424 char *buf = (char *) bufv;
400 int written; 425 long written;
401 int origLen = len; 426 long origLen = len;
402 if (message) 427 if (message)
403 return -1; 428 return -1;
404 while ((written = write_buffer(buf, len)) < len && !message) { 429 while ((written = write_buffer(buf, len)) < len && !message) {
@@ -417,13 +442,13 @@ static int __init flush_buffer(void *bufv, unsigned len)
417 return origLen; 442 return origLen;
418} 443}
419 444
420static unsigned my_inptr; /* index of next byte to be processed in inbuf */ 445static unsigned long my_inptr; /* index of next byte to be processed in inbuf */
421 446
422#include <linux/decompress/generic.h> 447#include <linux/decompress/generic.h>
423 448
424static char * __init unpack_to_rootfs(char *buf, unsigned len) 449static char * __init unpack_to_rootfs(char *buf, unsigned long len)
425{ 450{
426 int written, res; 451 long written;
427 decompress_fn decompress; 452 decompress_fn decompress;
428 const char *compress_name; 453 const char *compress_name;
429 static __initdata char msg_buf[64]; 454 static __initdata char msg_buf[64];
@@ -457,7 +482,7 @@ static char * __init unpack_to_rootfs(char *buf, unsigned len)
457 decompress = decompress_method(buf, len, &compress_name); 482 decompress = decompress_method(buf, len, &compress_name);
458 pr_debug("Detected %s compressed data\n", compress_name); 483 pr_debug("Detected %s compressed data\n", compress_name);
459 if (decompress) { 484 if (decompress) {
460 res = decompress(buf, len, NULL, flush_buffer, NULL, 485 int res = decompress(buf, len, NULL, flush_buffer, NULL,
461 &my_inptr, error); 486 &my_inptr, error);
462 if (res) 487 if (res)
463 error("decompressor failed"); 488 error("decompressor failed");
@@ -603,8 +628,13 @@ static int __init populate_rootfs(void)
603 fd = sys_open("/initrd.image", 628 fd = sys_open("/initrd.image",
604 O_WRONLY|O_CREAT, 0700); 629 O_WRONLY|O_CREAT, 0700);
605 if (fd >= 0) { 630 if (fd >= 0) {
606 sys_write(fd, (char *)initrd_start, 631 ssize_t written = xwrite(fd, (char *)initrd_start,
607 initrd_end - initrd_start); 632 initrd_end - initrd_start);
633
634 if (written != initrd_end - initrd_start)
635 pr_err("/initrd.image: incomplete write (%zd != %ld)\n",
636 written, initrd_end - initrd_start);
637
608 sys_close(fd); 638 sys_close(fd);
609 free_initrd(); 639 free_initrd();
610 } 640 }
diff --git a/init/main.c b/init/main.c
index e8ae1fef0908..bb1aed928f21 100644
--- a/init/main.c
+++ b/init/main.c
@@ -6,7 +6,7 @@
6 * GK 2/5/95 - Changed to support mounting root fs via NFS 6 * GK 2/5/95 - Changed to support mounting root fs via NFS
7 * Added initrd & change_root: Werner Almesberger & Hans Lermen, Feb '96 7 * Added initrd & change_root: Werner Almesberger & Hans Lermen, Feb '96
8 * Moan early if gcc is old, avoiding bogus kernels - Paul Gortmaker, May '96 8 * Moan early if gcc is old, avoiding bogus kernels - Paul Gortmaker, May '96
9 * Simplified starting of init: Michael A. Griffith <grif@acm.org> 9 * Simplified starting of init: Michael A. Griffith <grif@acm.org>
10 */ 10 */
11 11
12#define DEBUG /* Enable initcall_debug */ 12#define DEBUG /* Enable initcall_debug */
@@ -136,7 +136,7 @@ static char *ramdisk_execute_command;
136 * Used to generate warnings if static_key manipulation functions are used 136 * Used to generate warnings if static_key manipulation functions are used
137 * before jump_label_init is called. 137 * before jump_label_init is called.
138 */ 138 */
139bool static_key_initialized __read_mostly = false; 139bool static_key_initialized __read_mostly;
140EXPORT_SYMBOL_GPL(static_key_initialized); 140EXPORT_SYMBOL_GPL(static_key_initialized);
141 141
142/* 142/*
@@ -159,8 +159,8 @@ static int __init set_reset_devices(char *str)
159 159
160__setup("reset_devices", set_reset_devices); 160__setup("reset_devices", set_reset_devices);
161 161
162static const char * argv_init[MAX_INIT_ARGS+2] = { "init", NULL, }; 162static const char *argv_init[MAX_INIT_ARGS+2] = { "init", NULL, };
163const char * envp_init[MAX_INIT_ENVS+2] = { "HOME=/", "TERM=linux", NULL, }; 163const char *envp_init[MAX_INIT_ENVS+2] = { "HOME=/", "TERM=linux", NULL, };
164static const char *panic_later, *panic_param; 164static const char *panic_later, *panic_param;
165 165
166extern const struct obs_kernel_param __setup_start[], __setup_end[]; 166extern const struct obs_kernel_param __setup_start[], __setup_end[];
@@ -199,7 +199,6 @@ static int __init obsolete_checksetup(char *line)
199 * still work even if initially too large, it will just take slightly longer 199 * still work even if initially too large, it will just take slightly longer
200 */ 200 */
201unsigned long loops_per_jiffy = (1<<12); 201unsigned long loops_per_jiffy = (1<<12);
202
203EXPORT_SYMBOL(loops_per_jiffy); 202EXPORT_SYMBOL(loops_per_jiffy);
204 203
205static int __init debug_kernel(char *str) 204static int __init debug_kernel(char *str)
@@ -376,8 +375,8 @@ static void __init setup_command_line(char *command_line)
376 initcall_command_line = 375 initcall_command_line =
377 memblock_virt_alloc(strlen(boot_command_line) + 1, 0); 376 memblock_virt_alloc(strlen(boot_command_line) + 1, 0);
378 static_command_line = memblock_virt_alloc(strlen(command_line) + 1, 0); 377 static_command_line = memblock_virt_alloc(strlen(command_line) + 1, 0);
379 strcpy (saved_command_line, boot_command_line); 378 strcpy(saved_command_line, boot_command_line);
380 strcpy (static_command_line, command_line); 379 strcpy(static_command_line, command_line);
381} 380}
382 381
383/* 382/*
@@ -445,8 +444,8 @@ void __init parse_early_options(char *cmdline)
445/* Arch code calls this early on, or if not, just before other parsing. */ 444/* Arch code calls this early on, or if not, just before other parsing. */
446void __init parse_early_param(void) 445void __init parse_early_param(void)
447{ 446{
448 static __initdata int done = 0; 447 static int done __initdata;
449 static __initdata char tmp_cmdline[COMMAND_LINE_SIZE]; 448 static char tmp_cmdline[COMMAND_LINE_SIZE] __initdata;
450 449
451 if (done) 450 if (done)
452 return; 451 return;
@@ -500,7 +499,8 @@ static void __init mm_init(void)
500 499
501asmlinkage __visible void __init start_kernel(void) 500asmlinkage __visible void __init start_kernel(void)
502{ 501{
503 char * command_line, *after_dashes; 502 char *command_line;
503 char *after_dashes;
504 extern const struct kernel_param __start___param[], __stop___param[]; 504 extern const struct kernel_param __start___param[], __stop___param[];
505 505
506 /* 506 /*
@@ -572,7 +572,8 @@ asmlinkage __visible void __init start_kernel(void)
572 * fragile until we cpu_idle() for the first time. 572 * fragile until we cpu_idle() for the first time.
573 */ 573 */
574 preempt_disable(); 574 preempt_disable();
575 if (WARN(!irqs_disabled(), "Interrupts were enabled *very* early, fixing it\n")) 575 if (WARN(!irqs_disabled(),
576 "Interrupts were enabled *very* early, fixing it\n"))
576 local_irq_disable(); 577 local_irq_disable();
577 idr_init_cache(); 578 idr_init_cache();
578 rcu_init(); 579 rcu_init();
diff --git a/ipc/shm.c b/ipc/shm.c
index 89fc354156cb..7fc9f9f3a26b 100644
--- a/ipc/shm.c
+++ b/ipc/shm.c
@@ -178,6 +178,7 @@ static void shm_rcu_free(struct rcu_head *head)
178 178
179static inline void shm_rmid(struct ipc_namespace *ns, struct shmid_kernel *s) 179static inline void shm_rmid(struct ipc_namespace *ns, struct shmid_kernel *s)
180{ 180{
181 list_del(&s->shm_clist);
181 ipc_rmid(&shm_ids(ns), &s->shm_perm); 182 ipc_rmid(&shm_ids(ns), &s->shm_perm);
182} 183}
183 184
@@ -268,37 +269,6 @@ static void shm_close(struct vm_area_struct *vma)
268} 269}
269 270
270/* Called with ns->shm_ids(ns).rwsem locked */ 271/* Called with ns->shm_ids(ns).rwsem locked */
271static int shm_try_destroy_current(int id, void *p, void *data)
272{
273 struct ipc_namespace *ns = data;
274 struct kern_ipc_perm *ipcp = p;
275 struct shmid_kernel *shp = container_of(ipcp, struct shmid_kernel, shm_perm);
276
277 if (shp->shm_creator != current)
278 return 0;
279
280 /*
281 * Mark it as orphaned to destroy the segment when
282 * kernel.shm_rmid_forced is changed.
283 * It is noop if the following shm_may_destroy() returns true.
284 */
285 shp->shm_creator = NULL;
286
287 /*
288 * Don't even try to destroy it. If shm_rmid_forced=0 and IPC_RMID
289 * is not set, it shouldn't be deleted here.
290 */
291 if (!ns->shm_rmid_forced)
292 return 0;
293
294 if (shm_may_destroy(ns, shp)) {
295 shm_lock_by_ptr(shp);
296 shm_destroy(ns, shp);
297 }
298 return 0;
299}
300
301/* Called with ns->shm_ids(ns).rwsem locked */
302static int shm_try_destroy_orphaned(int id, void *p, void *data) 272static int shm_try_destroy_orphaned(int id, void *p, void *data)
303{ 273{
304 struct ipc_namespace *ns = data; 274 struct ipc_namespace *ns = data;
@@ -329,18 +299,50 @@ void shm_destroy_orphaned(struct ipc_namespace *ns)
329 up_write(&shm_ids(ns).rwsem); 299 up_write(&shm_ids(ns).rwsem);
330} 300}
331 301
332 302/* Locking assumes this will only be called with task == current */
333void exit_shm(struct task_struct *task) 303void exit_shm(struct task_struct *task)
334{ 304{
335 struct ipc_namespace *ns = task->nsproxy->ipc_ns; 305 struct ipc_namespace *ns = task->nsproxy->ipc_ns;
306 struct shmid_kernel *shp, *n;
336 307
337 if (shm_ids(ns).in_use == 0) 308 if (list_empty(&task->sysvshm.shm_clist))
338 return; 309 return;
339 310
340 /* Destroy all already created segments, but not mapped yet */ 311 /*
312 * If kernel.shm_rmid_forced is not set then only keep track of
313 * which shmids are orphaned, so that a later set of the sysctl
314 * can clean them up.
315 */
316 if (!ns->shm_rmid_forced) {
317 down_read(&shm_ids(ns).rwsem);
318 list_for_each_entry(shp, &task->sysvshm.shm_clist, shm_clist)
319 shp->shm_creator = NULL;
320 /*
321 * Only under read lock but we are only called on current
322 * so no entry on the list will be shared.
323 */
324 list_del(&task->sysvshm.shm_clist);
325 up_read(&shm_ids(ns).rwsem);
326 return;
327 }
328
329 /*
330 * Destroy all already created segments, that were not yet mapped,
331 * and mark any mapped as orphan to cover the sysctl toggling.
332 * Destroy is skipped if shm_may_destroy() returns false.
333 */
341 down_write(&shm_ids(ns).rwsem); 334 down_write(&shm_ids(ns).rwsem);
342 if (shm_ids(ns).in_use) 335 list_for_each_entry_safe(shp, n, &task->sysvshm.shm_clist, shm_clist) {
343 idr_for_each(&shm_ids(ns).ipcs_idr, &shm_try_destroy_current, ns); 336 shp->shm_creator = NULL;
337
338 if (shm_may_destroy(ns, shp)) {
339 shm_lock_by_ptr(shp);
340 shm_destroy(ns, shp);
341 }
342 }
343
344 /* Remove the list head from any segments still attached. */
345 list_del(&task->sysvshm.shm_clist);
344 up_write(&shm_ids(ns).rwsem); 346 up_write(&shm_ids(ns).rwsem);
345} 347}
346 348
@@ -561,6 +563,7 @@ static int newseg(struct ipc_namespace *ns, struct ipc_params *params)
561 shp->shm_nattch = 0; 563 shp->shm_nattch = 0;
562 shp->shm_file = file; 564 shp->shm_file = file;
563 shp->shm_creator = current; 565 shp->shm_creator = current;
566 list_add(&shp->shm_clist, &current->sysvshm.shm_clist);
564 567
565 /* 568 /*
566 * shmid gets reported as "inode#" in /proc/pid/maps. 569 * shmid gets reported as "inode#" in /proc/pid/maps.
diff --git a/kernel/Makefile b/kernel/Makefile
index 0026cf531769..dc5c77544fd6 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -105,7 +105,7 @@ targets += config_data.gz
105$(obj)/config_data.gz: $(KCONFIG_CONFIG) FORCE 105$(obj)/config_data.gz: $(KCONFIG_CONFIG) FORCE
106 $(call if_changed,gzip) 106 $(call if_changed,gzip)
107 107
108 filechk_ikconfiggz = (echo "static const char kernel_config_data[] __used = MAGIC_START"; cat $< | scripts/bin2c; echo "MAGIC_END;") 108 filechk_ikconfiggz = (echo "static const char kernel_config_data[] __used = MAGIC_START"; cat $< | scripts/basic/bin2c; echo "MAGIC_END;")
109targets += config_data.h 109targets += config_data.h
110$(obj)/config_data.h: $(obj)/config_data.gz FORCE 110$(obj)/config_data.h: $(obj)/config_data.gz FORCE
111 $(call filechk,ikconfiggz) 111 $(call filechk,ikconfiggz)
diff --git a/kernel/acct.c b/kernel/acct.c
index a1844f14c6d6..51793520566f 100644
--- a/kernel/acct.c
+++ b/kernel/acct.c
@@ -141,12 +141,12 @@ static int check_free_space(struct bsd_acct_struct *acct, struct file *file)
141 if (acct->active) { 141 if (acct->active) {
142 if (act < 0) { 142 if (act < 0) {
143 acct->active = 0; 143 acct->active = 0;
144 printk(KERN_INFO "Process accounting paused\n"); 144 pr_info("Process accounting paused\n");
145 } 145 }
146 } else { 146 } else {
147 if (act > 0) { 147 if (act > 0) {
148 acct->active = 1; 148 acct->active = 1;
149 printk(KERN_INFO "Process accounting resumed\n"); 149 pr_info("Process accounting resumed\n");
150 } 150 }
151 } 151 }
152 152
@@ -261,6 +261,7 @@ SYSCALL_DEFINE1(acct, const char __user *, name)
261 261
262 if (name) { 262 if (name) {
263 struct filename *tmp = getname(name); 263 struct filename *tmp = getname(name);
264
264 if (IS_ERR(tmp)) 265 if (IS_ERR(tmp))
265 return PTR_ERR(tmp); 266 return PTR_ERR(tmp);
266 error = acct_on(tmp); 267 error = acct_on(tmp);
@@ -376,7 +377,7 @@ static comp_t encode_comp_t(unsigned long value)
376 return exp; 377 return exp;
377} 378}
378 379
379#if ACCT_VERSION==1 || ACCT_VERSION==2 380#if ACCT_VERSION == 1 || ACCT_VERSION == 2
380/* 381/*
381 * encode an u64 into a comp2_t (24 bits) 382 * encode an u64 into a comp2_t (24 bits)
382 * 383 *
@@ -389,7 +390,7 @@ static comp_t encode_comp_t(unsigned long value)
389#define MANTSIZE2 20 /* 20 bit mantissa. */ 390#define MANTSIZE2 20 /* 20 bit mantissa. */
390#define EXPSIZE2 5 /* 5 bit base 2 exponent. */ 391#define EXPSIZE2 5 /* 5 bit base 2 exponent. */
391#define MAXFRACT2 ((1ul << MANTSIZE2) - 1) /* Maximum fractional value. */ 392#define MAXFRACT2 ((1ul << MANTSIZE2) - 1) /* Maximum fractional value. */
392#define MAXEXP2 ((1 <<EXPSIZE2) - 1) /* Maximum exponent. */ 393#define MAXEXP2 ((1 << EXPSIZE2) - 1) /* Maximum exponent. */
393 394
394static comp2_t encode_comp2_t(u64 value) 395static comp2_t encode_comp2_t(u64 value)
395{ 396{
@@ -420,7 +421,7 @@ static comp2_t encode_comp2_t(u64 value)
420} 421}
421#endif 422#endif
422 423
423#if ACCT_VERSION==3 424#if ACCT_VERSION == 3
424/* 425/*
425 * encode an u64 into a 32 bit IEEE float 426 * encode an u64 into a 32 bit IEEE float
426 */ 427 */
@@ -429,8 +430,9 @@ static u32 encode_float(u64 value)
429 unsigned exp = 190; 430 unsigned exp = 190;
430 unsigned u; 431 unsigned u;
431 432
432 if (value==0) return 0; 433 if (value == 0)
433 while ((s64)value > 0){ 434 return 0;
435 while ((s64)value > 0) {
434 value <<= 1; 436 value <<= 1;
435 exp--; 437 exp--;
436 } 438 }
@@ -486,16 +488,17 @@ static void do_acct_process(struct bsd_acct_struct *acct,
486 run_time -= current->group_leader->start_time; 488 run_time -= current->group_leader->start_time;
487 /* convert nsec -> AHZ */ 489 /* convert nsec -> AHZ */
488 elapsed = nsec_to_AHZ(run_time); 490 elapsed = nsec_to_AHZ(run_time);
489#if ACCT_VERSION==3 491#if ACCT_VERSION == 3
490 ac.ac_etime = encode_float(elapsed); 492 ac.ac_etime = encode_float(elapsed);
491#else 493#else
492 ac.ac_etime = encode_comp_t(elapsed < (unsigned long) -1l ? 494 ac.ac_etime = encode_comp_t(elapsed < (unsigned long) -1l ?
493 (unsigned long) elapsed : (unsigned long) -1l); 495 (unsigned long) elapsed : (unsigned long) -1l);
494#endif 496#endif
495#if ACCT_VERSION==1 || ACCT_VERSION==2 497#if ACCT_VERSION == 1 || ACCT_VERSION == 2
496 { 498 {
497 /* new enlarged etime field */ 499 /* new enlarged etime field */
498 comp2_t etime = encode_comp2_t(elapsed); 500 comp2_t etime = encode_comp2_t(elapsed);
501
499 ac.ac_etime_hi = etime >> 16; 502 ac.ac_etime_hi = etime >> 16;
500 ac.ac_etime_lo = (u16) etime; 503 ac.ac_etime_lo = (u16) etime;
501 } 504 }
@@ -505,15 +508,15 @@ static void do_acct_process(struct bsd_acct_struct *acct,
505 /* we really need to bite the bullet and change layout */ 508 /* we really need to bite the bullet and change layout */
506 ac.ac_uid = from_kuid_munged(file->f_cred->user_ns, orig_cred->uid); 509 ac.ac_uid = from_kuid_munged(file->f_cred->user_ns, orig_cred->uid);
507 ac.ac_gid = from_kgid_munged(file->f_cred->user_ns, orig_cred->gid); 510 ac.ac_gid = from_kgid_munged(file->f_cred->user_ns, orig_cred->gid);
508#if ACCT_VERSION==2 511#if ACCT_VERSION == 2
509 ac.ac_ahz = AHZ; 512 ac.ac_ahz = AHZ;
510#endif 513#endif
511#if ACCT_VERSION==1 || ACCT_VERSION==2 514#if ACCT_VERSION == 1 || ACCT_VERSION == 2
512 /* backward-compatible 16 bit fields */ 515 /* backward-compatible 16 bit fields */
513 ac.ac_uid16 = ac.ac_uid; 516 ac.ac_uid16 = ac.ac_uid;
514 ac.ac_gid16 = ac.ac_gid; 517 ac.ac_gid16 = ac.ac_gid;
515#endif 518#endif
516#if ACCT_VERSION==3 519#if ACCT_VERSION == 3
517 ac.ac_pid = task_tgid_nr_ns(current, ns); 520 ac.ac_pid = task_tgid_nr_ns(current, ns);
518 rcu_read_lock(); 521 rcu_read_lock();
519 ac.ac_ppid = task_tgid_nr_ns(rcu_dereference(current->real_parent), ns); 522 ac.ac_ppid = task_tgid_nr_ns(rcu_dereference(current->real_parent), ns);
@@ -574,6 +577,7 @@ void acct_collect(long exitcode, int group_dead)
574 577
575 if (group_dead && current->mm) { 578 if (group_dead && current->mm) {
576 struct vm_area_struct *vma; 579 struct vm_area_struct *vma;
580
577 down_read(&current->mm->mmap_sem); 581 down_read(&current->mm->mmap_sem);
578 vma = current->mm->mmap; 582 vma = current->mm->mmap;
579 while (vma) { 583 while (vma) {
diff --git a/kernel/bounds.c b/kernel/bounds.c
index 9fd4246b04b8..e1d1d1952bfa 100644
--- a/kernel/bounds.c
+++ b/kernel/bounds.c
@@ -9,7 +9,6 @@
9#include <linux/page-flags.h> 9#include <linux/page-flags.h>
10#include <linux/mmzone.h> 10#include <linux/mmzone.h>
11#include <linux/kbuild.h> 11#include <linux/kbuild.h>
12#include <linux/page_cgroup.h>
13#include <linux/log2.h> 12#include <linux/log2.h>
14#include <linux/spinlock_types.h> 13#include <linux/spinlock_types.h>
15 14
@@ -18,7 +17,6 @@ void foo(void)
18 /* The enum constants to put into include/generated/bounds.h */ 17 /* The enum constants to put into include/generated/bounds.h */
19 DEFINE(NR_PAGEFLAGS, __NR_PAGEFLAGS); 18 DEFINE(NR_PAGEFLAGS, __NR_PAGEFLAGS);
20 DEFINE(MAX_NR_ZONES, __MAX_NR_ZONES); 19 DEFINE(MAX_NR_ZONES, __MAX_NR_ZONES);
21 DEFINE(NR_PCG_FLAGS, __NR_PCG_FLAGS);
22#ifdef CONFIG_SMP 20#ifdef CONFIG_SMP
23 DEFINE(NR_CPUS_BITS, ilog2(CONFIG_NR_CPUS)); 21 DEFINE(NR_CPUS_BITS, ilog2(CONFIG_NR_CPUS));
24#endif 22#endif
diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c
index 6f3254e8c137..1d0af8a2c646 100644
--- a/kernel/events/uprobes.c
+++ b/kernel/events/uprobes.c
@@ -167,6 +167,11 @@ static int __replace_page(struct vm_area_struct *vma, unsigned long addr,
167 /* For mmu_notifiers */ 167 /* For mmu_notifiers */
168 const unsigned long mmun_start = addr; 168 const unsigned long mmun_start = addr;
169 const unsigned long mmun_end = addr + PAGE_SIZE; 169 const unsigned long mmun_end = addr + PAGE_SIZE;
170 struct mem_cgroup *memcg;
171
172 err = mem_cgroup_try_charge(kpage, vma->vm_mm, GFP_KERNEL, &memcg);
173 if (err)
174 return err;
170 175
171 /* For try_to_free_swap() and munlock_vma_page() below */ 176 /* For try_to_free_swap() and munlock_vma_page() below */
172 lock_page(page); 177 lock_page(page);
@@ -179,6 +184,8 @@ static int __replace_page(struct vm_area_struct *vma, unsigned long addr,
179 184
180 get_page(kpage); 185 get_page(kpage);
181 page_add_new_anon_rmap(kpage, vma, addr); 186 page_add_new_anon_rmap(kpage, vma, addr);
187 mem_cgroup_commit_charge(kpage, memcg, false);
188 lru_cache_add_active_or_unevictable(kpage, vma);
182 189
183 if (!PageAnon(page)) { 190 if (!PageAnon(page)) {
184 dec_mm_counter(mm, MM_FILEPAGES); 191 dec_mm_counter(mm, MM_FILEPAGES);
@@ -200,6 +207,7 @@ static int __replace_page(struct vm_area_struct *vma, unsigned long addr,
200 207
201 err = 0; 208 err = 0;
202 unlock: 209 unlock:
210 mem_cgroup_cancel_charge(kpage, memcg);
203 mmu_notifier_invalidate_range_end(mm, mmun_start, mmun_end); 211 mmu_notifier_invalidate_range_end(mm, mmun_start, mmun_end);
204 unlock_page(page); 212 unlock_page(page);
205 return err; 213 return err;
@@ -315,18 +323,11 @@ retry:
315 if (!new_page) 323 if (!new_page)
316 goto put_old; 324 goto put_old;
317 325
318 if (mem_cgroup_charge_anon(new_page, mm, GFP_KERNEL))
319 goto put_new;
320
321 __SetPageUptodate(new_page); 326 __SetPageUptodate(new_page);
322 copy_highpage(new_page, old_page); 327 copy_highpage(new_page, old_page);
323 copy_to_page(new_page, vaddr, &opcode, UPROBE_SWBP_INSN_SIZE); 328 copy_to_page(new_page, vaddr, &opcode, UPROBE_SWBP_INSN_SIZE);
324 329
325 ret = __replace_page(vma, vaddr, old_page, new_page); 330 ret = __replace_page(vma, vaddr, old_page, new_page);
326 if (ret)
327 mem_cgroup_uncharge_page(new_page);
328
329put_new:
330 page_cache_release(new_page); 331 page_cache_release(new_page);
331put_old: 332put_old:
332 put_page(old_page); 333 put_page(old_page);
diff --git a/kernel/exit.c b/kernel/exit.c
index 88c6b3e42583..32c58f7433a3 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -59,7 +59,7 @@
59#include <asm/pgtable.h> 59#include <asm/pgtable.h>
60#include <asm/mmu_context.h> 60#include <asm/mmu_context.h>
61 61
62static void exit_mm(struct task_struct * tsk); 62static void exit_mm(struct task_struct *tsk);
63 63
64static void __unhash_process(struct task_struct *p, bool group_dead) 64static void __unhash_process(struct task_struct *p, bool group_dead)
65{ 65{
@@ -151,7 +151,7 @@ static void __exit_signal(struct task_struct *tsk)
151 spin_unlock(&sighand->siglock); 151 spin_unlock(&sighand->siglock);
152 152
153 __cleanup_sighand(sighand); 153 __cleanup_sighand(sighand);
154 clear_tsk_thread_flag(tsk,TIF_SIGPENDING); 154 clear_tsk_thread_flag(tsk, TIF_SIGPENDING);
155 if (group_dead) { 155 if (group_dead) {
156 flush_sigqueue(&sig->shared_pending); 156 flush_sigqueue(&sig->shared_pending);
157 tty_kref_put(tty); 157 tty_kref_put(tty);
@@ -168,7 +168,7 @@ static void delayed_put_task_struct(struct rcu_head *rhp)
168} 168}
169 169
170 170
171void release_task(struct task_struct * p) 171void release_task(struct task_struct *p)
172{ 172{
173 struct task_struct *leader; 173 struct task_struct *leader;
174 int zap_leader; 174 int zap_leader;
@@ -192,7 +192,8 @@ repeat:
192 */ 192 */
193 zap_leader = 0; 193 zap_leader = 0;
194 leader = p->group_leader; 194 leader = p->group_leader;
195 if (leader != p && thread_group_empty(leader) && leader->exit_state == EXIT_ZOMBIE) { 195 if (leader != p && thread_group_empty(leader)
196 && leader->exit_state == EXIT_ZOMBIE) {
196 /* 197 /*
197 * If we were the last child thread and the leader has 198 * If we were the last child thread and the leader has
198 * exited already, and the leader's parent ignores SIGCHLD, 199 * exited already, and the leader's parent ignores SIGCHLD,
@@ -241,7 +242,8 @@ struct pid *session_of_pgrp(struct pid *pgrp)
241 * 242 *
242 * "I ask you, have you ever known what it is to be an orphan?" 243 * "I ask you, have you ever known what it is to be an orphan?"
243 */ 244 */
244static int will_become_orphaned_pgrp(struct pid *pgrp, struct task_struct *ignored_task) 245static int will_become_orphaned_pgrp(struct pid *pgrp,
246 struct task_struct *ignored_task)
245{ 247{
246 struct task_struct *p; 248 struct task_struct *p;
247 249
@@ -294,9 +296,9 @@ kill_orphaned_pgrp(struct task_struct *tsk, struct task_struct *parent)
294 struct task_struct *ignored_task = tsk; 296 struct task_struct *ignored_task = tsk;
295 297
296 if (!parent) 298 if (!parent)
297 /* exit: our father is in a different pgrp than 299 /* exit: our father is in a different pgrp than
298 * we are and we were the only connection outside. 300 * we are and we were the only connection outside.
299 */ 301 */
300 parent = tsk->real_parent; 302 parent = tsk->real_parent;
301 else 303 else
302 /* reparent: our child is in a different pgrp than 304 /* reparent: our child is in a different pgrp than
@@ -405,7 +407,7 @@ assign_new_owner:
405 * Turn us into a lazy TLB process if we 407 * Turn us into a lazy TLB process if we
406 * aren't already.. 408 * aren't already..
407 */ 409 */
408static void exit_mm(struct task_struct * tsk) 410static void exit_mm(struct task_struct *tsk)
409{ 411{
410 struct mm_struct *mm = tsk->mm; 412 struct mm_struct *mm = tsk->mm;
411 struct core_state *core_state; 413 struct core_state *core_state;
@@ -425,6 +427,7 @@ static void exit_mm(struct task_struct * tsk)
425 core_state = mm->core_state; 427 core_state = mm->core_state;
426 if (core_state) { 428 if (core_state) {
427 struct core_thread self; 429 struct core_thread self;
430
428 up_read(&mm->mmap_sem); 431 up_read(&mm->mmap_sem);
429 432
430 self.task = tsk; 433 self.task = tsk;
@@ -566,6 +569,7 @@ static void forget_original_parent(struct task_struct *father)
566 569
567 list_for_each_entry_safe(p, n, &father->children, sibling) { 570 list_for_each_entry_safe(p, n, &father->children, sibling) {
568 struct task_struct *t = p; 571 struct task_struct *t = p;
572
569 do { 573 do {
570 t->real_parent = reaper; 574 t->real_parent = reaper;
571 if (t->parent == father) { 575 if (t->parent == father) {
@@ -599,7 +603,7 @@ static void exit_notify(struct task_struct *tsk, int group_dead)
599 /* 603 /*
600 * This does two things: 604 * This does two things:
601 * 605 *
602 * A. Make init inherit all the child processes 606 * A. Make init inherit all the child processes
603 * B. Check to see if any process groups have become orphaned 607 * B. Check to see if any process groups have become orphaned
604 * as a result of our exiting, and if they have any stopped 608 * as a result of our exiting, and if they have any stopped
605 * jobs, send them a SIGHUP and then a SIGCONT. (POSIX 3.2.2.2) 609 * jobs, send them a SIGHUP and then a SIGCONT. (POSIX 3.2.2.2)
@@ -649,9 +653,8 @@ static void check_stack_usage(void)
649 653
650 spin_lock(&low_water_lock); 654 spin_lock(&low_water_lock);
651 if (free < lowest_to_date) { 655 if (free < lowest_to_date) {
652 printk(KERN_WARNING "%s (%d) used greatest stack depth: " 656 pr_warn("%s (%d) used greatest stack depth: %lu bytes left\n",
653 "%lu bytes left\n", 657 current->comm, task_pid_nr(current), free);
654 current->comm, task_pid_nr(current), free);
655 lowest_to_date = free; 658 lowest_to_date = free;
656 } 659 }
657 spin_unlock(&low_water_lock); 660 spin_unlock(&low_water_lock);
@@ -692,8 +695,7 @@ void do_exit(long code)
692 * leave this task alone and wait for reboot. 695 * leave this task alone and wait for reboot.
693 */ 696 */
694 if (unlikely(tsk->flags & PF_EXITING)) { 697 if (unlikely(tsk->flags & PF_EXITING)) {
695 printk(KERN_ALERT 698 pr_alert("Fixing recursive fault but reboot is needed!\n");
696 "Fixing recursive fault but reboot is needed!\n");
697 /* 699 /*
698 * We can do this unlocked here. The futex code uses 700 * We can do this unlocked here. The futex code uses
699 * this flag just to verify whether the pi state 701 * this flag just to verify whether the pi state
@@ -717,9 +719,9 @@ void do_exit(long code)
717 raw_spin_unlock_wait(&tsk->pi_lock); 719 raw_spin_unlock_wait(&tsk->pi_lock);
718 720
719 if (unlikely(in_atomic())) 721 if (unlikely(in_atomic()))
720 printk(KERN_INFO "note: %s[%d] exited with preempt_count %d\n", 722 pr_info("note: %s[%d] exited with preempt_count %d\n",
721 current->comm, task_pid_nr(current), 723 current->comm, task_pid_nr(current),
722 preempt_count()); 724 preempt_count());
723 725
724 acct_update_integrals(tsk); 726 acct_update_integrals(tsk);
725 /* sync mm's RSS info before statistics gathering */ 727 /* sync mm's RSS info before statistics gathering */
@@ -837,7 +839,6 @@ void do_exit(long code)
837 for (;;) 839 for (;;)
838 cpu_relax(); /* For when BUG is null */ 840 cpu_relax(); /* For when BUG is null */
839} 841}
840
841EXPORT_SYMBOL_GPL(do_exit); 842EXPORT_SYMBOL_GPL(do_exit);
842 843
843void complete_and_exit(struct completion *comp, long code) 844void complete_and_exit(struct completion *comp, long code)
@@ -847,7 +848,6 @@ void complete_and_exit(struct completion *comp, long code)
847 848
848 do_exit(code); 849 do_exit(code);
849} 850}
850
851EXPORT_SYMBOL(complete_and_exit); 851EXPORT_SYMBOL(complete_and_exit);
852 852
853SYSCALL_DEFINE1(exit, int, error_code) 853SYSCALL_DEFINE1(exit, int, error_code)
@@ -870,6 +870,7 @@ do_group_exit(int exit_code)
870 exit_code = sig->group_exit_code; 870 exit_code = sig->group_exit_code;
871 else if (!thread_group_empty(current)) { 871 else if (!thread_group_empty(current)) {
872 struct sighand_struct *const sighand = current->sighand; 872 struct sighand_struct *const sighand = current->sighand;
873
873 spin_lock_irq(&sighand->siglock); 874 spin_lock_irq(&sighand->siglock);
874 if (signal_group_exit(sig)) 875 if (signal_group_exit(sig))
875 /* Another thread got here before we took the lock. */ 876 /* Another thread got here before we took the lock. */
@@ -1034,9 +1035,9 @@ static int wait_task_zombie(struct wait_opts *wo, struct task_struct *p)
1034 * as other threads in the parent group can be right 1035 * as other threads in the parent group can be right
1035 * here reaping other children at the same time. 1036 * here reaping other children at the same time.
1036 * 1037 *
1037 * We use thread_group_cputime_adjusted() to get times for the thread 1038 * We use thread_group_cputime_adjusted() to get times for
1038 * group, which consolidates times for all threads in the 1039 * the thread group, which consolidates times for all threads
1039 * group including the group leader. 1040 * in the group including the group leader.
1040 */ 1041 */
1041 thread_group_cputime_adjusted(p, &tgutime, &tgstime); 1042 thread_group_cputime_adjusted(p, &tgutime, &tgstime);
1042 spin_lock_irq(&p->real_parent->sighand->siglock); 1043 spin_lock_irq(&p->real_parent->sighand->siglock);
@@ -1418,6 +1419,7 @@ static int do_wait_thread(struct wait_opts *wo, struct task_struct *tsk)
1418 1419
1419 list_for_each_entry(p, &tsk->children, sibling) { 1420 list_for_each_entry(p, &tsk->children, sibling) {
1420 int ret = wait_consider_task(wo, 0, p); 1421 int ret = wait_consider_task(wo, 0, p);
1422
1421 if (ret) 1423 if (ret)
1422 return ret; 1424 return ret;
1423 } 1425 }
@@ -1431,6 +1433,7 @@ static int ptrace_do_wait(struct wait_opts *wo, struct task_struct *tsk)
1431 1433
1432 list_for_each_entry(p, &tsk->ptraced, ptrace_entry) { 1434 list_for_each_entry(p, &tsk->ptraced, ptrace_entry) {
1433 int ret = wait_consider_task(wo, 1, p); 1435 int ret = wait_consider_task(wo, 1, p);
1436
1434 if (ret) 1437 if (ret)
1435 return ret; 1438 return ret;
1436 } 1439 }
diff --git a/kernel/fork.c b/kernel/fork.c
index fbd3497b221f..1380d8ace334 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -374,12 +374,11 @@ static int dup_mmap(struct mm_struct *mm, struct mm_struct *oldmm)
374 */ 374 */
375 down_write_nested(&mm->mmap_sem, SINGLE_DEPTH_NESTING); 375 down_write_nested(&mm->mmap_sem, SINGLE_DEPTH_NESTING);
376 376
377 mm->locked_vm = 0; 377 mm->total_vm = oldmm->total_vm;
378 mm->mmap = NULL; 378 mm->shared_vm = oldmm->shared_vm;
379 mm->vmacache_seqnum = 0; 379 mm->exec_vm = oldmm->exec_vm;
380 mm->map_count = 0; 380 mm->stack_vm = oldmm->stack_vm;
381 cpumask_clear(mm_cpumask(mm)); 381
382 mm->mm_rb = RB_ROOT;
383 rb_link = &mm->mm_rb.rb_node; 382 rb_link = &mm->mm_rb.rb_node;
384 rb_parent = NULL; 383 rb_parent = NULL;
385 pprev = &mm->mmap; 384 pprev = &mm->mmap;
@@ -430,7 +429,7 @@ static int dup_mmap(struct mm_struct *mm, struct mm_struct *oldmm)
430 atomic_dec(&inode->i_writecount); 429 atomic_dec(&inode->i_writecount);
431 mutex_lock(&mapping->i_mmap_mutex); 430 mutex_lock(&mapping->i_mmap_mutex);
432 if (tmp->vm_flags & VM_SHARED) 431 if (tmp->vm_flags & VM_SHARED)
433 mapping->i_mmap_writable++; 432 atomic_inc(&mapping->i_mmap_writable);
434 flush_dcache_mmap_lock(mapping); 433 flush_dcache_mmap_lock(mapping);
435 /* insert tmp into the share list, just after mpnt */ 434 /* insert tmp into the share list, just after mpnt */
436 if (unlikely(tmp->vm_flags & VM_NONLINEAR)) 435 if (unlikely(tmp->vm_flags & VM_NONLINEAR))
@@ -536,19 +535,37 @@ static void mm_init_aio(struct mm_struct *mm)
536#endif 535#endif
537} 536}
538 537
538static void mm_init_owner(struct mm_struct *mm, struct task_struct *p)
539{
540#ifdef CONFIG_MEMCG
541 mm->owner = p;
542#endif
543}
544
539static struct mm_struct *mm_init(struct mm_struct *mm, struct task_struct *p) 545static struct mm_struct *mm_init(struct mm_struct *mm, struct task_struct *p)
540{ 546{
547 mm->mmap = NULL;
548 mm->mm_rb = RB_ROOT;
549 mm->vmacache_seqnum = 0;
541 atomic_set(&mm->mm_users, 1); 550 atomic_set(&mm->mm_users, 1);
542 atomic_set(&mm->mm_count, 1); 551 atomic_set(&mm->mm_count, 1);
543 init_rwsem(&mm->mmap_sem); 552 init_rwsem(&mm->mmap_sem);
544 INIT_LIST_HEAD(&mm->mmlist); 553 INIT_LIST_HEAD(&mm->mmlist);
545 mm->core_state = NULL; 554 mm->core_state = NULL;
546 atomic_long_set(&mm->nr_ptes, 0); 555 atomic_long_set(&mm->nr_ptes, 0);
556 mm->map_count = 0;
557 mm->locked_vm = 0;
558 mm->pinned_vm = 0;
547 memset(&mm->rss_stat, 0, sizeof(mm->rss_stat)); 559 memset(&mm->rss_stat, 0, sizeof(mm->rss_stat));
548 spin_lock_init(&mm->page_table_lock); 560 spin_lock_init(&mm->page_table_lock);
561 mm_init_cpumask(mm);
549 mm_init_aio(mm); 562 mm_init_aio(mm);
550 mm_init_owner(mm, p); 563 mm_init_owner(mm, p);
564 mmu_notifier_mm_init(mm);
551 clear_tlb_flush_pending(mm); 565 clear_tlb_flush_pending(mm);
566#if defined(CONFIG_TRANSPARENT_HUGEPAGE) && !USE_SPLIT_PMD_PTLOCKS
567 mm->pmd_huge_pte = NULL;
568#endif
552 569
553 if (current->mm) { 570 if (current->mm) {
554 mm->flags = current->mm->flags & MMF_INIT_MASK; 571 mm->flags = current->mm->flags & MMF_INIT_MASK;
@@ -558,11 +575,17 @@ static struct mm_struct *mm_init(struct mm_struct *mm, struct task_struct *p)
558 mm->def_flags = 0; 575 mm->def_flags = 0;
559 } 576 }
560 577
561 if (likely(!mm_alloc_pgd(mm))) { 578 if (mm_alloc_pgd(mm))
562 mmu_notifier_mm_init(mm); 579 goto fail_nopgd;
563 return mm; 580
564 } 581 if (init_new_context(p, mm))
582 goto fail_nocontext;
583
584 return mm;
565 585
586fail_nocontext:
587 mm_free_pgd(mm);
588fail_nopgd:
566 free_mm(mm); 589 free_mm(mm);
567 return NULL; 590 return NULL;
568} 591}
@@ -596,7 +619,6 @@ struct mm_struct *mm_alloc(void)
596 return NULL; 619 return NULL;
597 620
598 memset(mm, 0, sizeof(*mm)); 621 memset(mm, 0, sizeof(*mm));
599 mm_init_cpumask(mm);
600 return mm_init(mm, current); 622 return mm_init(mm, current);
601} 623}
602 624
@@ -828,17 +850,10 @@ static struct mm_struct *dup_mm(struct task_struct *tsk)
828 goto fail_nomem; 850 goto fail_nomem;
829 851
830 memcpy(mm, oldmm, sizeof(*mm)); 852 memcpy(mm, oldmm, sizeof(*mm));
831 mm_init_cpumask(mm);
832 853
833#if defined(CONFIG_TRANSPARENT_HUGEPAGE) && !USE_SPLIT_PMD_PTLOCKS
834 mm->pmd_huge_pte = NULL;
835#endif
836 if (!mm_init(mm, tsk)) 854 if (!mm_init(mm, tsk))
837 goto fail_nomem; 855 goto fail_nomem;
838 856
839 if (init_new_context(tsk, mm))
840 goto fail_nocontext;
841
842 dup_mm_exe_file(oldmm, mm); 857 dup_mm_exe_file(oldmm, mm);
843 858
844 err = dup_mmap(mm, oldmm); 859 err = dup_mmap(mm, oldmm);
@@ -860,15 +875,6 @@ free_pt:
860 875
861fail_nomem: 876fail_nomem:
862 return NULL; 877 return NULL;
863
864fail_nocontext:
865 /*
866 * If init_new_context() failed, we cannot use mmput() to free the mm
867 * because it calls destroy_context()
868 */
869 mm_free_pgd(mm);
870 free_mm(mm);
871 return NULL;
872} 878}
873 879
874static int copy_mm(unsigned long clone_flags, struct task_struct *tsk) 880static int copy_mm(unsigned long clone_flags, struct task_struct *tsk)
@@ -1140,13 +1146,6 @@ static void rt_mutex_init_task(struct task_struct *p)
1140#endif 1146#endif
1141} 1147}
1142 1148
1143#ifdef CONFIG_MEMCG
1144void mm_init_owner(struct mm_struct *mm, struct task_struct *p)
1145{
1146 mm->owner = p;
1147}
1148#endif /* CONFIG_MEMCG */
1149
1150/* 1149/*
1151 * Initialize POSIX timer handling for a single task. 1150 * Initialize POSIX timer handling for a single task.
1152 */ 1151 */
@@ -1346,10 +1345,6 @@ static struct task_struct *copy_process(unsigned long clone_flags,
1346#ifdef CONFIG_DEBUG_MUTEXES 1345#ifdef CONFIG_DEBUG_MUTEXES
1347 p->blocked_on = NULL; /* not blocked yet */ 1346 p->blocked_on = NULL; /* not blocked yet */
1348#endif 1347#endif
1349#ifdef CONFIG_MEMCG
1350 p->memcg_batch.do_batch = 0;
1351 p->memcg_batch.memcg = NULL;
1352#endif
1353#ifdef CONFIG_BCACHE 1348#ifdef CONFIG_BCACHE
1354 p->sequential_io = 0; 1349 p->sequential_io = 0;
1355 p->sequential_io_avg = 0; 1350 p->sequential_io_avg = 0;
@@ -1367,6 +1362,7 @@ static struct task_struct *copy_process(unsigned long clone_flags,
1367 if (retval) 1362 if (retval)
1368 goto bad_fork_cleanup_policy; 1363 goto bad_fork_cleanup_policy;
1369 /* copy all the process information */ 1364 /* copy all the process information */
1365 shm_init_task(p);
1370 retval = copy_semundo(clone_flags, p); 1366 retval = copy_semundo(clone_flags, p);
1371 if (retval) 1367 if (retval)
1372 goto bad_fork_cleanup_audit; 1368 goto bad_fork_cleanup_audit;
@@ -1918,6 +1914,11 @@ SYSCALL_DEFINE1(unshare, unsigned long, unshare_flags)
1918 */ 1914 */
1919 exit_sem(current); 1915 exit_sem(current);
1920 } 1916 }
1917 if (unshare_flags & CLONE_NEWIPC) {
1918 /* Orphan segments in old ns (see sem above). */
1919 exit_shm(current);
1920 shm_init_task(current);
1921 }
1921 1922
1922 if (new_nsproxy) 1923 if (new_nsproxy)
1923 switch_task_namespaces(current, new_nsproxy); 1924 switch_task_namespaces(current, new_nsproxy);
diff --git a/kernel/gcov/fs.c b/kernel/gcov/fs.c
index 15ff01a76379..edf67c493a8e 100644
--- a/kernel/gcov/fs.c
+++ b/kernel/gcov/fs.c
@@ -784,8 +784,7 @@ static __init int gcov_fs_init(void)
784 784
785err_remove: 785err_remove:
786 pr_err("init failed\n"); 786 pr_err("init failed\n");
787 if (root_node.dentry) 787 debugfs_remove(root_node.dentry);
788 debugfs_remove(root_node.dentry);
789 788
790 return rc; 789 return rc;
791} 790}
diff --git a/kernel/kallsyms.c b/kernel/kallsyms.c
index cb0cf37dac3a..ae5167087845 100644
--- a/kernel/kallsyms.c
+++ b/kernel/kallsyms.c
@@ -364,7 +364,7 @@ static int __sprint_symbol(char *buffer, unsigned long address,
364 address += symbol_offset; 364 address += symbol_offset;
365 name = kallsyms_lookup(address, &size, &offset, &modname, buffer); 365 name = kallsyms_lookup(address, &size, &offset, &modname, buffer);
366 if (!name) 366 if (!name)
367 return sprintf(buffer, "0x%lx", address); 367 return sprintf(buffer, "0x%lx", address - symbol_offset);
368 368
369 if (name != buffer) 369 if (name != buffer)
370 strcpy(buffer, name); 370 strcpy(buffer, name);
diff --git a/kernel/kexec.c b/kernel/kexec.c
index 4b8f0c925884..0b49a0a58102 100644
--- a/kernel/kexec.c
+++ b/kernel/kexec.c
@@ -6,6 +6,8 @@
6 * Version 2. See the file COPYING for more details. 6 * Version 2. See the file COPYING for more details.
7 */ 7 */
8 8
9#define pr_fmt(fmt) "kexec: " fmt
10
9#include <linux/capability.h> 11#include <linux/capability.h>
10#include <linux/mm.h> 12#include <linux/mm.h>
11#include <linux/file.h> 13#include <linux/file.h>
@@ -40,6 +42,9 @@
40#include <asm/io.h> 42#include <asm/io.h>
41#include <asm/sections.h> 43#include <asm/sections.h>
42 44
45#include <crypto/hash.h>
46#include <crypto/sha.h>
47
43/* Per cpu memory for storing cpu states in case of system crash. */ 48/* Per cpu memory for storing cpu states in case of system crash. */
44note_buf_t __percpu *crash_notes; 49note_buf_t __percpu *crash_notes;
45 50
@@ -52,6 +57,15 @@ size_t vmcoreinfo_max_size = sizeof(vmcoreinfo_data);
52/* Flag to indicate we are going to kexec a new kernel */ 57/* Flag to indicate we are going to kexec a new kernel */
53bool kexec_in_progress = false; 58bool kexec_in_progress = false;
54 59
60/*
61 * Declare these symbols weak so that if architecture provides a purgatory,
62 * these will be overridden.
63 */
64char __weak kexec_purgatory[0];
65size_t __weak kexec_purgatory_size = 0;
66
67static int kexec_calculate_store_digests(struct kimage *image);
68
55/* Location of the reserved area for the crash kernel */ 69/* Location of the reserved area for the crash kernel */
56struct resource crashk_res = { 70struct resource crashk_res = {
57 .name = "Crash kernel", 71 .name = "Crash kernel",
@@ -125,45 +139,27 @@ static struct page *kimage_alloc_page(struct kimage *image,
125 gfp_t gfp_mask, 139 gfp_t gfp_mask,
126 unsigned long dest); 140 unsigned long dest);
127 141
128static int do_kimage_alloc(struct kimage **rimage, unsigned long entry, 142static int copy_user_segment_list(struct kimage *image,
129 unsigned long nr_segments, 143 unsigned long nr_segments,
130 struct kexec_segment __user *segments) 144 struct kexec_segment __user *segments)
131{ 145{
146 int ret;
132 size_t segment_bytes; 147 size_t segment_bytes;
133 struct kimage *image;
134 unsigned long i;
135 int result;
136
137 /* Allocate a controlling structure */
138 result = -ENOMEM;
139 image = kzalloc(sizeof(*image), GFP_KERNEL);
140 if (!image)
141 goto out;
142
143 image->head = 0;
144 image->entry = &image->head;
145 image->last_entry = &image->head;
146 image->control_page = ~0; /* By default this does not apply */
147 image->start = entry;
148 image->type = KEXEC_TYPE_DEFAULT;
149
150 /* Initialize the list of control pages */
151 INIT_LIST_HEAD(&image->control_pages);
152
153 /* Initialize the list of destination pages */
154 INIT_LIST_HEAD(&image->dest_pages);
155
156 /* Initialize the list of unusable pages */
157 INIT_LIST_HEAD(&image->unuseable_pages);
158 148
159 /* Read in the segments */ 149 /* Read in the segments */
160 image->nr_segments = nr_segments; 150 image->nr_segments = nr_segments;
161 segment_bytes = nr_segments * sizeof(*segments); 151 segment_bytes = nr_segments * sizeof(*segments);
162 result = copy_from_user(image->segment, segments, segment_bytes); 152 ret = copy_from_user(image->segment, segments, segment_bytes);
163 if (result) { 153 if (ret)
164 result = -EFAULT; 154 ret = -EFAULT;
165 goto out; 155
166 } 156 return ret;
157}
158
159static int sanity_check_segment_list(struct kimage *image)
160{
161 int result, i;
162 unsigned long nr_segments = image->nr_segments;
167 163
168 /* 164 /*
169 * Verify we have good destination addresses. The caller is 165 * Verify we have good destination addresses. The caller is
@@ -185,9 +181,9 @@ static int do_kimage_alloc(struct kimage **rimage, unsigned long entry,
185 mstart = image->segment[i].mem; 181 mstart = image->segment[i].mem;
186 mend = mstart + image->segment[i].memsz; 182 mend = mstart + image->segment[i].memsz;
187 if ((mstart & ~PAGE_MASK) || (mend & ~PAGE_MASK)) 183 if ((mstart & ~PAGE_MASK) || (mend & ~PAGE_MASK))
188 goto out; 184 return result;
189 if (mend >= KEXEC_DESTINATION_MEMORY_LIMIT) 185 if (mend >= KEXEC_DESTINATION_MEMORY_LIMIT)
190 goto out; 186 return result;
191 } 187 }
192 188
193 /* Verify our destination addresses do not overlap. 189 /* Verify our destination addresses do not overlap.
@@ -208,7 +204,7 @@ static int do_kimage_alloc(struct kimage **rimage, unsigned long entry,
208 pend = pstart + image->segment[j].memsz; 204 pend = pstart + image->segment[j].memsz;
209 /* Do the segments overlap ? */ 205 /* Do the segments overlap ? */
210 if ((mend > pstart) && (mstart < pend)) 206 if ((mend > pstart) && (mstart < pend))
211 goto out; 207 return result;
212 } 208 }
213 } 209 }
214 210
@@ -220,130 +216,401 @@ static int do_kimage_alloc(struct kimage **rimage, unsigned long entry,
220 result = -EINVAL; 216 result = -EINVAL;
221 for (i = 0; i < nr_segments; i++) { 217 for (i = 0; i < nr_segments; i++) {
222 if (image->segment[i].bufsz > image->segment[i].memsz) 218 if (image->segment[i].bufsz > image->segment[i].memsz)
223 goto out; 219 return result;
224 } 220 }
225 221
226 result = 0; 222 /*
227out: 223 * Verify we have good destination addresses. Normally
228 if (result == 0) 224 * the caller is responsible for making certain we don't
229 *rimage = image; 225 * attempt to load the new image into invalid or reserved
230 else 226 * areas of RAM. But crash kernels are preloaded into a
231 kfree(image); 227 * reserved area of ram. We must ensure the addresses
228 * are in the reserved area otherwise preloading the
229 * kernel could corrupt things.
230 */
232 231
233 return result; 232 if (image->type == KEXEC_TYPE_CRASH) {
233 result = -EADDRNOTAVAIL;
234 for (i = 0; i < nr_segments; i++) {
235 unsigned long mstart, mend;
236
237 mstart = image->segment[i].mem;
238 mend = mstart + image->segment[i].memsz - 1;
239 /* Ensure we are within the crash kernel limits */
240 if ((mstart < crashk_res.start) ||
241 (mend > crashk_res.end))
242 return result;
243 }
244 }
234 245
246 return 0;
247}
248
249static struct kimage *do_kimage_alloc_init(void)
250{
251 struct kimage *image;
252
253 /* Allocate a controlling structure */
254 image = kzalloc(sizeof(*image), GFP_KERNEL);
255 if (!image)
256 return NULL;
257
258 image->head = 0;
259 image->entry = &image->head;
260 image->last_entry = &image->head;
261 image->control_page = ~0; /* By default this does not apply */
262 image->type = KEXEC_TYPE_DEFAULT;
263
264 /* Initialize the list of control pages */
265 INIT_LIST_HEAD(&image->control_pages);
266
267 /* Initialize the list of destination pages */
268 INIT_LIST_HEAD(&image->dest_pages);
269
270 /* Initialize the list of unusable pages */
271 INIT_LIST_HEAD(&image->unusable_pages);
272
273 return image;
235} 274}
236 275
237static void kimage_free_page_list(struct list_head *list); 276static void kimage_free_page_list(struct list_head *list);
238 277
239static int kimage_normal_alloc(struct kimage **rimage, unsigned long entry, 278static int kimage_alloc_init(struct kimage **rimage, unsigned long entry,
240 unsigned long nr_segments, 279 unsigned long nr_segments,
241 struct kexec_segment __user *segments) 280 struct kexec_segment __user *segments,
281 unsigned long flags)
242{ 282{
243 int result; 283 int ret;
244 struct kimage *image; 284 struct kimage *image;
285 bool kexec_on_panic = flags & KEXEC_ON_CRASH;
286
287 if (kexec_on_panic) {
288 /* Verify we have a valid entry point */
289 if ((entry < crashk_res.start) || (entry > crashk_res.end))
290 return -EADDRNOTAVAIL;
291 }
245 292
246 /* Allocate and initialize a controlling structure */ 293 /* Allocate and initialize a controlling structure */
247 image = NULL; 294 image = do_kimage_alloc_init();
248 result = do_kimage_alloc(&image, entry, nr_segments, segments); 295 if (!image)
249 if (result) 296 return -ENOMEM;
250 goto out; 297
298 image->start = entry;
299
300 ret = copy_user_segment_list(image, nr_segments, segments);
301 if (ret)
302 goto out_free_image;
303
304 ret = sanity_check_segment_list(image);
305 if (ret)
306 goto out_free_image;
307
308 /* Enable the special crash kernel control page allocation policy. */
309 if (kexec_on_panic) {
310 image->control_page = crashk_res.start;
311 image->type = KEXEC_TYPE_CRASH;
312 }
251 313
252 /* 314 /*
253 * Find a location for the control code buffer, and add it 315 * Find a location for the control code buffer, and add it
254 * the vector of segments so that it's pages will also be 316 * the vector of segments so that it's pages will also be
255 * counted as destination pages. 317 * counted as destination pages.
256 */ 318 */
257 result = -ENOMEM; 319 ret = -ENOMEM;
258 image->control_code_page = kimage_alloc_control_pages(image, 320 image->control_code_page = kimage_alloc_control_pages(image,
259 get_order(KEXEC_CONTROL_PAGE_SIZE)); 321 get_order(KEXEC_CONTROL_PAGE_SIZE));
260 if (!image->control_code_page) { 322 if (!image->control_code_page) {
261 pr_err("Could not allocate control_code_buffer\n"); 323 pr_err("Could not allocate control_code_buffer\n");
262 goto out_free; 324 goto out_free_image;
263 } 325 }
264 326
265 image->swap_page = kimage_alloc_control_pages(image, 0); 327 if (!kexec_on_panic) {
266 if (!image->swap_page) { 328 image->swap_page = kimage_alloc_control_pages(image, 0);
267 pr_err("Could not allocate swap buffer\n"); 329 if (!image->swap_page) {
268 goto out_free; 330 pr_err("Could not allocate swap buffer\n");
331 goto out_free_control_pages;
332 }
269 } 333 }
270 334
271 *rimage = image; 335 *rimage = image;
272 return 0; 336 return 0;
273 337out_free_control_pages:
274out_free:
275 kimage_free_page_list(&image->control_pages); 338 kimage_free_page_list(&image->control_pages);
339out_free_image:
276 kfree(image); 340 kfree(image);
277out: 341 return ret;
278 return result;
279} 342}
280 343
281static int kimage_crash_alloc(struct kimage **rimage, unsigned long entry, 344static int copy_file_from_fd(int fd, void **buf, unsigned long *buf_len)
282 unsigned long nr_segments,
283 struct kexec_segment __user *segments)
284{ 345{
285 int result; 346 struct fd f = fdget(fd);
286 struct kimage *image; 347 int ret;
287 unsigned long i; 348 struct kstat stat;
349 loff_t pos;
350 ssize_t bytes = 0;
288 351
289 image = NULL; 352 if (!f.file)
290 /* Verify we have a valid entry point */ 353 return -EBADF;
291 if ((entry < crashk_res.start) || (entry > crashk_res.end)) { 354
292 result = -EADDRNOTAVAIL; 355 ret = vfs_getattr(&f.file->f_path, &stat);
356 if (ret)
357 goto out;
358
359 if (stat.size > INT_MAX) {
360 ret = -EFBIG;
293 goto out; 361 goto out;
294 } 362 }
295 363
296 /* Allocate and initialize a controlling structure */ 364 /* Don't hand 0 to vmalloc, it whines. */
297 result = do_kimage_alloc(&image, entry, nr_segments, segments); 365 if (stat.size == 0) {
298 if (result) 366 ret = -EINVAL;
299 goto out; 367 goto out;
368 }
300 369
301 /* Enable the special crash kernel control page 370 *buf = vmalloc(stat.size);
302 * allocation policy. 371 if (!*buf) {
303 */ 372 ret = -ENOMEM;
304 image->control_page = crashk_res.start; 373 goto out;
305 image->type = KEXEC_TYPE_CRASH; 374 }
306 375
307 /* 376 pos = 0;
308 * Verify we have good destination addresses. Normally 377 while (pos < stat.size) {
309 * the caller is responsible for making certain we don't 378 bytes = kernel_read(f.file, pos, (char *)(*buf) + pos,
310 * attempt to load the new image into invalid or reserved 379 stat.size - pos);
311 * areas of RAM. But crash kernels are preloaded into a 380 if (bytes < 0) {
312 * reserved area of ram. We must ensure the addresses 381 vfree(*buf);
313 * are in the reserved area otherwise preloading the 382 ret = bytes;
314 * kernel could corrupt things. 383 goto out;
315 */ 384 }
316 result = -EADDRNOTAVAIL;
317 for (i = 0; i < nr_segments; i++) {
318 unsigned long mstart, mend;
319 385
320 mstart = image->segment[i].mem; 386 if (bytes == 0)
321 mend = mstart + image->segment[i].memsz - 1; 387 break;
322 /* Ensure we are within the crash kernel limits */ 388 pos += bytes;
323 if ((mstart < crashk_res.start) || (mend > crashk_res.end))
324 goto out_free;
325 } 389 }
326 390
391 if (pos != stat.size) {
392 ret = -EBADF;
393 vfree(*buf);
394 goto out;
395 }
396
397 *buf_len = pos;
398out:
399 fdput(f);
400 return ret;
401}
402
403/* Architectures can provide this probe function */
404int __weak arch_kexec_kernel_image_probe(struct kimage *image, void *buf,
405 unsigned long buf_len)
406{
407 return -ENOEXEC;
408}
409
410void * __weak arch_kexec_kernel_image_load(struct kimage *image)
411{
412 return ERR_PTR(-ENOEXEC);
413}
414
415void __weak arch_kimage_file_post_load_cleanup(struct kimage *image)
416{
417}
418
419int __weak arch_kexec_kernel_verify_sig(struct kimage *image, void *buf,
420 unsigned long buf_len)
421{
422 return -EKEYREJECTED;
423}
424
425/* Apply relocations of type RELA */
426int __weak
427arch_kexec_apply_relocations_add(const Elf_Ehdr *ehdr, Elf_Shdr *sechdrs,
428 unsigned int relsec)
429{
430 pr_err("RELA relocation unsupported.\n");
431 return -ENOEXEC;
432}
433
434/* Apply relocations of type REL */
435int __weak
436arch_kexec_apply_relocations(const Elf_Ehdr *ehdr, Elf_Shdr *sechdrs,
437 unsigned int relsec)
438{
439 pr_err("REL relocation unsupported.\n");
440 return -ENOEXEC;
441}
442
443/*
444 * Free up memory used by kernel, initrd, and comand line. This is temporary
445 * memory allocation which is not needed any more after these buffers have
446 * been loaded into separate segments and have been copied elsewhere.
447 */
448static void kimage_file_post_load_cleanup(struct kimage *image)
449{
450 struct purgatory_info *pi = &image->purgatory_info;
451
452 vfree(image->kernel_buf);
453 image->kernel_buf = NULL;
454
455 vfree(image->initrd_buf);
456 image->initrd_buf = NULL;
457
458 kfree(image->cmdline_buf);
459 image->cmdline_buf = NULL;
460
461 vfree(pi->purgatory_buf);
462 pi->purgatory_buf = NULL;
463
464 vfree(pi->sechdrs);
465 pi->sechdrs = NULL;
466
467 /* See if architecture has anything to cleanup post load */
468 arch_kimage_file_post_load_cleanup(image);
469
327 /* 470 /*
328 * Find a location for the control code buffer, and add 471 * Above call should have called into bootloader to free up
329 * the vector of segments so that it's pages will also be 472 * any data stored in kimage->image_loader_data. It should
330 * counted as destination pages. 473 * be ok now to free it up.
331 */ 474 */
332 result = -ENOMEM; 475 kfree(image->image_loader_data);
476 image->image_loader_data = NULL;
477}
478
479/*
480 * In file mode list of segments is prepared by kernel. Copy relevant
481 * data from user space, do error checking, prepare segment list
482 */
483static int
484kimage_file_prepare_segments(struct kimage *image, int kernel_fd, int initrd_fd,
485 const char __user *cmdline_ptr,
486 unsigned long cmdline_len, unsigned flags)
487{
488 int ret = 0;
489 void *ldata;
490
491 ret = copy_file_from_fd(kernel_fd, &image->kernel_buf,
492 &image->kernel_buf_len);
493 if (ret)
494 return ret;
495
496 /* Call arch image probe handlers */
497 ret = arch_kexec_kernel_image_probe(image, image->kernel_buf,
498 image->kernel_buf_len);
499
500 if (ret)
501 goto out;
502
503#ifdef CONFIG_KEXEC_VERIFY_SIG
504 ret = arch_kexec_kernel_verify_sig(image, image->kernel_buf,
505 image->kernel_buf_len);
506 if (ret) {
507 pr_debug("kernel signature verification failed.\n");
508 goto out;
509 }
510 pr_debug("kernel signature verification successful.\n");
511#endif
512 /* It is possible that there no initramfs is being loaded */
513 if (!(flags & KEXEC_FILE_NO_INITRAMFS)) {
514 ret = copy_file_from_fd(initrd_fd, &image->initrd_buf,
515 &image->initrd_buf_len);
516 if (ret)
517 goto out;
518 }
519
520 if (cmdline_len) {
521 image->cmdline_buf = kzalloc(cmdline_len, GFP_KERNEL);
522 if (!image->cmdline_buf) {
523 ret = -ENOMEM;
524 goto out;
525 }
526
527 ret = copy_from_user(image->cmdline_buf, cmdline_ptr,
528 cmdline_len);
529 if (ret) {
530 ret = -EFAULT;
531 goto out;
532 }
533
534 image->cmdline_buf_len = cmdline_len;
535
536 /* command line should be a string with last byte null */
537 if (image->cmdline_buf[cmdline_len - 1] != '\0') {
538 ret = -EINVAL;
539 goto out;
540 }
541 }
542
543 /* Call arch image load handlers */
544 ldata = arch_kexec_kernel_image_load(image);
545
546 if (IS_ERR(ldata)) {
547 ret = PTR_ERR(ldata);
548 goto out;
549 }
550
551 image->image_loader_data = ldata;
552out:
553 /* In case of error, free up all allocated memory in this function */
554 if (ret)
555 kimage_file_post_load_cleanup(image);
556 return ret;
557}
558
559static int
560kimage_file_alloc_init(struct kimage **rimage, int kernel_fd,
561 int initrd_fd, const char __user *cmdline_ptr,
562 unsigned long cmdline_len, unsigned long flags)
563{
564 int ret;
565 struct kimage *image;
566 bool kexec_on_panic = flags & KEXEC_FILE_ON_CRASH;
567
568 image = do_kimage_alloc_init();
569 if (!image)
570 return -ENOMEM;
571
572 image->file_mode = 1;
573
574 if (kexec_on_panic) {
575 /* Enable special crash kernel control page alloc policy. */
576 image->control_page = crashk_res.start;
577 image->type = KEXEC_TYPE_CRASH;
578 }
579
580 ret = kimage_file_prepare_segments(image, kernel_fd, initrd_fd,
581 cmdline_ptr, cmdline_len, flags);
582 if (ret)
583 goto out_free_image;
584
585 ret = sanity_check_segment_list(image);
586 if (ret)
587 goto out_free_post_load_bufs;
588
589 ret = -ENOMEM;
333 image->control_code_page = kimage_alloc_control_pages(image, 590 image->control_code_page = kimage_alloc_control_pages(image,
334 get_order(KEXEC_CONTROL_PAGE_SIZE)); 591 get_order(KEXEC_CONTROL_PAGE_SIZE));
335 if (!image->control_code_page) { 592 if (!image->control_code_page) {
336 pr_err("Could not allocate control_code_buffer\n"); 593 pr_err("Could not allocate control_code_buffer\n");
337 goto out_free; 594 goto out_free_post_load_bufs;
595 }
596
597 if (!kexec_on_panic) {
598 image->swap_page = kimage_alloc_control_pages(image, 0);
599 if (!image->swap_page) {
600 pr_err(KERN_ERR "Could not allocate swap buffer\n");
601 goto out_free_control_pages;
602 }
338 } 603 }
339 604
340 *rimage = image; 605 *rimage = image;
341 return 0; 606 return 0;
342 607out_free_control_pages:
343out_free: 608 kimage_free_page_list(&image->control_pages);
609out_free_post_load_bufs:
610 kimage_file_post_load_cleanup(image);
611out_free_image:
344 kfree(image); 612 kfree(image);
345out: 613 return ret;
346 return result;
347} 614}
348 615
349static int kimage_is_destination_range(struct kimage *image, 616static int kimage_is_destination_range(struct kimage *image,
@@ -609,7 +876,7 @@ static void kimage_free_extra_pages(struct kimage *image)
609 kimage_free_page_list(&image->dest_pages); 876 kimage_free_page_list(&image->dest_pages);
610 877
611 /* Walk through and free any unusable pages I have cached */ 878 /* Walk through and free any unusable pages I have cached */
612 kimage_free_page_list(&image->unuseable_pages); 879 kimage_free_page_list(&image->unusable_pages);
613 880
614} 881}
615static void kimage_terminate(struct kimage *image) 882static void kimage_terminate(struct kimage *image)
@@ -663,6 +930,14 @@ static void kimage_free(struct kimage *image)
663 930
664 /* Free the kexec control pages... */ 931 /* Free the kexec control pages... */
665 kimage_free_page_list(&image->control_pages); 932 kimage_free_page_list(&image->control_pages);
933
934 /*
935 * Free up any temporary buffers allocated. This might hit if
936 * error occurred much later after buffer allocation.
937 */
938 if (image->file_mode)
939 kimage_file_post_load_cleanup(image);
940
666 kfree(image); 941 kfree(image);
667} 942}
668 943
@@ -732,7 +1007,7 @@ static struct page *kimage_alloc_page(struct kimage *image,
732 /* If the page cannot be used file it away */ 1007 /* If the page cannot be used file it away */
733 if (page_to_pfn(page) > 1008 if (page_to_pfn(page) >
734 (KEXEC_SOURCE_MEMORY_LIMIT >> PAGE_SHIFT)) { 1009 (KEXEC_SOURCE_MEMORY_LIMIT >> PAGE_SHIFT)) {
735 list_add(&page->lru, &image->unuseable_pages); 1010 list_add(&page->lru, &image->unusable_pages);
736 continue; 1011 continue;
737 } 1012 }
738 addr = page_to_pfn(page) << PAGE_SHIFT; 1013 addr = page_to_pfn(page) << PAGE_SHIFT;
@@ -791,10 +1066,14 @@ static int kimage_load_normal_segment(struct kimage *image,
791 unsigned long maddr; 1066 unsigned long maddr;
792 size_t ubytes, mbytes; 1067 size_t ubytes, mbytes;
793 int result; 1068 int result;
794 unsigned char __user *buf; 1069 unsigned char __user *buf = NULL;
1070 unsigned char *kbuf = NULL;
795 1071
796 result = 0; 1072 result = 0;
797 buf = segment->buf; 1073 if (image->file_mode)
1074 kbuf = segment->kbuf;
1075 else
1076 buf = segment->buf;
798 ubytes = segment->bufsz; 1077 ubytes = segment->bufsz;
799 mbytes = segment->memsz; 1078 mbytes = segment->memsz;
800 maddr = segment->mem; 1079 maddr = segment->mem;
@@ -826,7 +1105,11 @@ static int kimage_load_normal_segment(struct kimage *image,
826 PAGE_SIZE - (maddr & ~PAGE_MASK)); 1105 PAGE_SIZE - (maddr & ~PAGE_MASK));
827 uchunk = min(ubytes, mchunk); 1106 uchunk = min(ubytes, mchunk);
828 1107
829 result = copy_from_user(ptr, buf, uchunk); 1108 /* For file based kexec, source pages are in kernel memory */
1109 if (image->file_mode)
1110 memcpy(ptr, kbuf, uchunk);
1111 else
1112 result = copy_from_user(ptr, buf, uchunk);
830 kunmap(page); 1113 kunmap(page);
831 if (result) { 1114 if (result) {
832 result = -EFAULT; 1115 result = -EFAULT;
@@ -834,7 +1117,10 @@ static int kimage_load_normal_segment(struct kimage *image,
834 } 1117 }
835 ubytes -= uchunk; 1118 ubytes -= uchunk;
836 maddr += mchunk; 1119 maddr += mchunk;
837 buf += mchunk; 1120 if (image->file_mode)
1121 kbuf += mchunk;
1122 else
1123 buf += mchunk;
838 mbytes -= mchunk; 1124 mbytes -= mchunk;
839 } 1125 }
840out: 1126out:
@@ -851,10 +1137,14 @@ static int kimage_load_crash_segment(struct kimage *image,
851 unsigned long maddr; 1137 unsigned long maddr;
852 size_t ubytes, mbytes; 1138 size_t ubytes, mbytes;
853 int result; 1139 int result;
854 unsigned char __user *buf; 1140 unsigned char __user *buf = NULL;
1141 unsigned char *kbuf = NULL;
855 1142
856 result = 0; 1143 result = 0;
857 buf = segment->buf; 1144 if (image->file_mode)
1145 kbuf = segment->kbuf;
1146 else
1147 buf = segment->buf;
858 ubytes = segment->bufsz; 1148 ubytes = segment->bufsz;
859 mbytes = segment->memsz; 1149 mbytes = segment->memsz;
860 maddr = segment->mem; 1150 maddr = segment->mem;
@@ -877,7 +1167,12 @@ static int kimage_load_crash_segment(struct kimage *image,
877 /* Zero the trailing part of the page */ 1167 /* Zero the trailing part of the page */
878 memset(ptr + uchunk, 0, mchunk - uchunk); 1168 memset(ptr + uchunk, 0, mchunk - uchunk);
879 } 1169 }
880 result = copy_from_user(ptr, buf, uchunk); 1170
1171 /* For file based kexec, source pages are in kernel memory */
1172 if (image->file_mode)
1173 memcpy(ptr, kbuf, uchunk);
1174 else
1175 result = copy_from_user(ptr, buf, uchunk);
881 kexec_flush_icache_page(page); 1176 kexec_flush_icache_page(page);
882 kunmap(page); 1177 kunmap(page);
883 if (result) { 1178 if (result) {
@@ -886,7 +1181,10 @@ static int kimage_load_crash_segment(struct kimage *image,
886 } 1181 }
887 ubytes -= uchunk; 1182 ubytes -= uchunk;
888 maddr += mchunk; 1183 maddr += mchunk;
889 buf += mchunk; 1184 if (image->file_mode)
1185 kbuf += mchunk;
1186 else
1187 buf += mchunk;
890 mbytes -= mchunk; 1188 mbytes -= mchunk;
891 } 1189 }
892out: 1190out:
@@ -986,16 +1284,16 @@ SYSCALL_DEFINE4(kexec_load, unsigned long, entry, unsigned long, nr_segments,
986 1284
987 /* Loading another kernel to reboot into */ 1285 /* Loading another kernel to reboot into */
988 if ((flags & KEXEC_ON_CRASH) == 0) 1286 if ((flags & KEXEC_ON_CRASH) == 0)
989 result = kimage_normal_alloc(&image, entry, 1287 result = kimage_alloc_init(&image, entry, nr_segments,
990 nr_segments, segments); 1288 segments, flags);
991 /* Loading another kernel to switch to if this one crashes */ 1289 /* Loading another kernel to switch to if this one crashes */
992 else if (flags & KEXEC_ON_CRASH) { 1290 else if (flags & KEXEC_ON_CRASH) {
993 /* Free any current crash dump kernel before 1291 /* Free any current crash dump kernel before
994 * we corrupt it. 1292 * we corrupt it.
995 */ 1293 */
996 kimage_free(xchg(&kexec_crash_image, NULL)); 1294 kimage_free(xchg(&kexec_crash_image, NULL));
997 result = kimage_crash_alloc(&image, entry, 1295 result = kimage_alloc_init(&image, entry, nr_segments,
998 nr_segments, segments); 1296 segments, flags);
999 crash_map_reserved_pages(); 1297 crash_map_reserved_pages();
1000 } 1298 }
1001 if (result) 1299 if (result)
@@ -1077,6 +1375,82 @@ COMPAT_SYSCALL_DEFINE4(kexec_load, compat_ulong_t, entry,
1077} 1375}
1078#endif 1376#endif
1079 1377
1378SYSCALL_DEFINE5(kexec_file_load, int, kernel_fd, int, initrd_fd,
1379 unsigned long, cmdline_len, const char __user *, cmdline_ptr,
1380 unsigned long, flags)
1381{
1382 int ret = 0, i;
1383 struct kimage **dest_image, *image;
1384
1385 /* We only trust the superuser with rebooting the system. */
1386 if (!capable(CAP_SYS_BOOT) || kexec_load_disabled)
1387 return -EPERM;
1388
1389 /* Make sure we have a legal set of flags */
1390 if (flags != (flags & KEXEC_FILE_FLAGS))
1391 return -EINVAL;
1392
1393 image = NULL;
1394
1395 if (!mutex_trylock(&kexec_mutex))
1396 return -EBUSY;
1397
1398 dest_image = &kexec_image;
1399 if (flags & KEXEC_FILE_ON_CRASH)
1400 dest_image = &kexec_crash_image;
1401
1402 if (flags & KEXEC_FILE_UNLOAD)
1403 goto exchange;
1404
1405 /*
1406 * In case of crash, new kernel gets loaded in reserved region. It is
1407 * same memory where old crash kernel might be loaded. Free any
1408 * current crash dump kernel before we corrupt it.
1409 */
1410 if (flags & KEXEC_FILE_ON_CRASH)
1411 kimage_free(xchg(&kexec_crash_image, NULL));
1412
1413 ret = kimage_file_alloc_init(&image, kernel_fd, initrd_fd, cmdline_ptr,
1414 cmdline_len, flags);
1415 if (ret)
1416 goto out;
1417
1418 ret = machine_kexec_prepare(image);
1419 if (ret)
1420 goto out;
1421
1422 ret = kexec_calculate_store_digests(image);
1423 if (ret)
1424 goto out;
1425
1426 for (i = 0; i < image->nr_segments; i++) {
1427 struct kexec_segment *ksegment;
1428
1429 ksegment = &image->segment[i];
1430 pr_debug("Loading segment %d: buf=0x%p bufsz=0x%zx mem=0x%lx memsz=0x%zx\n",
1431 i, ksegment->buf, ksegment->bufsz, ksegment->mem,
1432 ksegment->memsz);
1433
1434 ret = kimage_load_segment(image, &image->segment[i]);
1435 if (ret)
1436 goto out;
1437 }
1438
1439 kimage_terminate(image);
1440
1441 /*
1442 * Free up any temporary buffers allocated which are not needed
1443 * after image has been loaded
1444 */
1445 kimage_file_post_load_cleanup(image);
1446exchange:
1447 image = xchg(dest_image, image);
1448out:
1449 mutex_unlock(&kexec_mutex);
1450 kimage_free(image);
1451 return ret;
1452}
1453
1080void crash_kexec(struct pt_regs *regs) 1454void crash_kexec(struct pt_regs *regs)
1081{ 1455{
1082 /* Take the kexec_mutex here to prevent sys_kexec_load 1456 /* Take the kexec_mutex here to prevent sys_kexec_load
@@ -1632,6 +2006,683 @@ static int __init crash_save_vmcoreinfo_init(void)
1632 2006
1633subsys_initcall(crash_save_vmcoreinfo_init); 2007subsys_initcall(crash_save_vmcoreinfo_init);
1634 2008
2009static int __kexec_add_segment(struct kimage *image, char *buf,
2010 unsigned long bufsz, unsigned long mem,
2011 unsigned long memsz)
2012{
2013 struct kexec_segment *ksegment;
2014
2015 ksegment = &image->segment[image->nr_segments];
2016 ksegment->kbuf = buf;
2017 ksegment->bufsz = bufsz;
2018 ksegment->mem = mem;
2019 ksegment->memsz = memsz;
2020 image->nr_segments++;
2021
2022 return 0;
2023}
2024
2025static int locate_mem_hole_top_down(unsigned long start, unsigned long end,
2026 struct kexec_buf *kbuf)
2027{
2028 struct kimage *image = kbuf->image;
2029 unsigned long temp_start, temp_end;
2030
2031 temp_end = min(end, kbuf->buf_max);
2032 temp_start = temp_end - kbuf->memsz;
2033
2034 do {
2035 /* align down start */
2036 temp_start = temp_start & (~(kbuf->buf_align - 1));
2037
2038 if (temp_start < start || temp_start < kbuf->buf_min)
2039 return 0;
2040
2041 temp_end = temp_start + kbuf->memsz - 1;
2042
2043 /*
2044 * Make sure this does not conflict with any of existing
2045 * segments
2046 */
2047 if (kimage_is_destination_range(image, temp_start, temp_end)) {
2048 temp_start = temp_start - PAGE_SIZE;
2049 continue;
2050 }
2051
2052 /* We found a suitable memory range */
2053 break;
2054 } while (1);
2055
2056 /* If we are here, we found a suitable memory range */
2057 __kexec_add_segment(image, kbuf->buffer, kbuf->bufsz, temp_start,
2058 kbuf->memsz);
2059
2060 /* Success, stop navigating through remaining System RAM ranges */
2061 return 1;
2062}
2063
2064static int locate_mem_hole_bottom_up(unsigned long start, unsigned long end,
2065 struct kexec_buf *kbuf)
2066{
2067 struct kimage *image = kbuf->image;
2068 unsigned long temp_start, temp_end;
2069
2070 temp_start = max(start, kbuf->buf_min);
2071
2072 do {
2073 temp_start = ALIGN(temp_start, kbuf->buf_align);
2074 temp_end = temp_start + kbuf->memsz - 1;
2075
2076 if (temp_end > end || temp_end > kbuf->buf_max)
2077 return 0;
2078 /*
2079 * Make sure this does not conflict with any of existing
2080 * segments
2081 */
2082 if (kimage_is_destination_range(image, temp_start, temp_end)) {
2083 temp_start = temp_start + PAGE_SIZE;
2084 continue;
2085 }
2086
2087 /* We found a suitable memory range */
2088 break;
2089 } while (1);
2090
2091 /* If we are here, we found a suitable memory range */
2092 __kexec_add_segment(image, kbuf->buffer, kbuf->bufsz, temp_start,
2093 kbuf->memsz);
2094
2095 /* Success, stop navigating through remaining System RAM ranges */
2096 return 1;
2097}
2098
2099static int locate_mem_hole_callback(u64 start, u64 end, void *arg)
2100{
2101 struct kexec_buf *kbuf = (struct kexec_buf *)arg;
2102 unsigned long sz = end - start + 1;
2103
2104 /* Returning 0 will take to next memory range */
2105 if (sz < kbuf->memsz)
2106 return 0;
2107
2108 if (end < kbuf->buf_min || start > kbuf->buf_max)
2109 return 0;
2110
2111 /*
2112 * Allocate memory top down with-in ram range. Otherwise bottom up
2113 * allocation.
2114 */
2115 if (kbuf->top_down)
2116 return locate_mem_hole_top_down(start, end, kbuf);
2117 return locate_mem_hole_bottom_up(start, end, kbuf);
2118}
2119
2120/*
2121 * Helper function for placing a buffer in a kexec segment. This assumes
2122 * that kexec_mutex is held.
2123 */
2124int kexec_add_buffer(struct kimage *image, char *buffer, unsigned long bufsz,
2125 unsigned long memsz, unsigned long buf_align,
2126 unsigned long buf_min, unsigned long buf_max,
2127 bool top_down, unsigned long *load_addr)
2128{
2129
2130 struct kexec_segment *ksegment;
2131 struct kexec_buf buf, *kbuf;
2132 int ret;
2133
2134 /* Currently adding segment this way is allowed only in file mode */
2135 if (!image->file_mode)
2136 return -EINVAL;
2137
2138 if (image->nr_segments >= KEXEC_SEGMENT_MAX)
2139 return -EINVAL;
2140
2141 /*
2142 * Make sure we are not trying to add buffer after allocating
2143 * control pages. All segments need to be placed first before
2144 * any control pages are allocated. As control page allocation
2145 * logic goes through list of segments to make sure there are
2146 * no destination overlaps.
2147 */
2148 if (!list_empty(&image->control_pages)) {
2149 WARN_ON(1);
2150 return -EINVAL;
2151 }
2152
2153 memset(&buf, 0, sizeof(struct kexec_buf));
2154 kbuf = &buf;
2155 kbuf->image = image;
2156 kbuf->buffer = buffer;
2157 kbuf->bufsz = bufsz;
2158
2159 kbuf->memsz = ALIGN(memsz, PAGE_SIZE);
2160 kbuf->buf_align = max(buf_align, PAGE_SIZE);
2161 kbuf->buf_min = buf_min;
2162 kbuf->buf_max = buf_max;
2163 kbuf->top_down = top_down;
2164
2165 /* Walk the RAM ranges and allocate a suitable range for the buffer */
2166 if (image->type == KEXEC_TYPE_CRASH)
2167 ret = walk_iomem_res("Crash kernel",
2168 IORESOURCE_MEM | IORESOURCE_BUSY,
2169 crashk_res.start, crashk_res.end, kbuf,
2170 locate_mem_hole_callback);
2171 else
2172 ret = walk_system_ram_res(0, -1, kbuf,
2173 locate_mem_hole_callback);
2174 if (ret != 1) {
2175 /* A suitable memory range could not be found for buffer */
2176 return -EADDRNOTAVAIL;
2177 }
2178
2179 /* Found a suitable memory range */
2180 ksegment = &image->segment[image->nr_segments - 1];
2181 *load_addr = ksegment->mem;
2182 return 0;
2183}
2184
2185/* Calculate and store the digest of segments */
2186static int kexec_calculate_store_digests(struct kimage *image)
2187{
2188 struct crypto_shash *tfm;
2189 struct shash_desc *desc;
2190 int ret = 0, i, j, zero_buf_sz, sha_region_sz;
2191 size_t desc_size, nullsz;
2192 char *digest;
2193 void *zero_buf;
2194 struct kexec_sha_region *sha_regions;
2195 struct purgatory_info *pi = &image->purgatory_info;
2196
2197 zero_buf = __va(page_to_pfn(ZERO_PAGE(0)) << PAGE_SHIFT);
2198 zero_buf_sz = PAGE_SIZE;
2199
2200 tfm = crypto_alloc_shash("sha256", 0, 0);
2201 if (IS_ERR(tfm)) {
2202 ret = PTR_ERR(tfm);
2203 goto out;
2204 }
2205
2206 desc_size = crypto_shash_descsize(tfm) + sizeof(*desc);
2207 desc = kzalloc(desc_size, GFP_KERNEL);
2208 if (!desc) {
2209 ret = -ENOMEM;
2210 goto out_free_tfm;
2211 }
2212
2213 sha_region_sz = KEXEC_SEGMENT_MAX * sizeof(struct kexec_sha_region);
2214 sha_regions = vzalloc(sha_region_sz);
2215 if (!sha_regions)
2216 goto out_free_desc;
2217
2218 desc->tfm = tfm;
2219 desc->flags = 0;
2220
2221 ret = crypto_shash_init(desc);
2222 if (ret < 0)
2223 goto out_free_sha_regions;
2224
2225 digest = kzalloc(SHA256_DIGEST_SIZE, GFP_KERNEL);
2226 if (!digest) {
2227 ret = -ENOMEM;
2228 goto out_free_sha_regions;
2229 }
2230
2231 for (j = i = 0; i < image->nr_segments; i++) {
2232 struct kexec_segment *ksegment;
2233
2234 ksegment = &image->segment[i];
2235 /*
2236 * Skip purgatory as it will be modified once we put digest
2237 * info in purgatory.
2238 */
2239 if (ksegment->kbuf == pi->purgatory_buf)
2240 continue;
2241
2242 ret = crypto_shash_update(desc, ksegment->kbuf,
2243 ksegment->bufsz);
2244 if (ret)
2245 break;
2246
2247 /*
2248 * Assume rest of the buffer is filled with zero and
2249 * update digest accordingly.
2250 */
2251 nullsz = ksegment->memsz - ksegment->bufsz;
2252 while (nullsz) {
2253 unsigned long bytes = nullsz;
2254
2255 if (bytes > zero_buf_sz)
2256 bytes = zero_buf_sz;
2257 ret = crypto_shash_update(desc, zero_buf, bytes);
2258 if (ret)
2259 break;
2260 nullsz -= bytes;
2261 }
2262
2263 if (ret)
2264 break;
2265
2266 sha_regions[j].start = ksegment->mem;
2267 sha_regions[j].len = ksegment->memsz;
2268 j++;
2269 }
2270
2271 if (!ret) {
2272 ret = crypto_shash_final(desc, digest);
2273 if (ret)
2274 goto out_free_digest;
2275 ret = kexec_purgatory_get_set_symbol(image, "sha_regions",
2276 sha_regions, sha_region_sz, 0);
2277 if (ret)
2278 goto out_free_digest;
2279
2280 ret = kexec_purgatory_get_set_symbol(image, "sha256_digest",
2281 digest, SHA256_DIGEST_SIZE, 0);
2282 if (ret)
2283 goto out_free_digest;
2284 }
2285
2286out_free_digest:
2287 kfree(digest);
2288out_free_sha_regions:
2289 vfree(sha_regions);
2290out_free_desc:
2291 kfree(desc);
2292out_free_tfm:
2293 kfree(tfm);
2294out:
2295 return ret;
2296}
2297
2298/* Actually load purgatory. Lot of code taken from kexec-tools */
2299static int __kexec_load_purgatory(struct kimage *image, unsigned long min,
2300 unsigned long max, int top_down)
2301{
2302 struct purgatory_info *pi = &image->purgatory_info;
2303 unsigned long align, buf_align, bss_align, buf_sz, bss_sz, bss_pad;
2304 unsigned long memsz, entry, load_addr, curr_load_addr, bss_addr, offset;
2305 unsigned char *buf_addr, *src;
2306 int i, ret = 0, entry_sidx = -1;
2307 const Elf_Shdr *sechdrs_c;
2308 Elf_Shdr *sechdrs = NULL;
2309 void *purgatory_buf = NULL;
2310
2311 /*
2312 * sechdrs_c points to section headers in purgatory and are read
2313 * only. No modifications allowed.
2314 */
2315 sechdrs_c = (void *)pi->ehdr + pi->ehdr->e_shoff;
2316
2317 /*
2318 * We can not modify sechdrs_c[] and its fields. It is read only.
2319 * Copy it over to a local copy where one can store some temporary
2320 * data and free it at the end. We need to modify ->sh_addr and
2321 * ->sh_offset fields to keep track of permanent and temporary
2322 * locations of sections.
2323 */
2324 sechdrs = vzalloc(pi->ehdr->e_shnum * sizeof(Elf_Shdr));
2325 if (!sechdrs)
2326 return -ENOMEM;
2327
2328 memcpy(sechdrs, sechdrs_c, pi->ehdr->e_shnum * sizeof(Elf_Shdr));
2329
2330 /*
2331 * We seem to have multiple copies of sections. First copy is which
2332 * is embedded in kernel in read only section. Some of these sections
2333 * will be copied to a temporary buffer and relocated. And these
2334 * sections will finally be copied to their final destination at
2335 * segment load time.
2336 *
2337 * Use ->sh_offset to reflect section address in memory. It will
2338 * point to original read only copy if section is not allocatable.
2339 * Otherwise it will point to temporary copy which will be relocated.
2340 *
2341 * Use ->sh_addr to contain final address of the section where it
2342 * will go during execution time.
2343 */
2344 for (i = 0; i < pi->ehdr->e_shnum; i++) {
2345 if (sechdrs[i].sh_type == SHT_NOBITS)
2346 continue;
2347
2348 sechdrs[i].sh_offset = (unsigned long)pi->ehdr +
2349 sechdrs[i].sh_offset;
2350 }
2351
2352 /*
2353 * Identify entry point section and make entry relative to section
2354 * start.
2355 */
2356 entry = pi->ehdr->e_entry;
2357 for (i = 0; i < pi->ehdr->e_shnum; i++) {
2358 if (!(sechdrs[i].sh_flags & SHF_ALLOC))
2359 continue;
2360
2361 if (!(sechdrs[i].sh_flags & SHF_EXECINSTR))
2362 continue;
2363
2364 /* Make entry section relative */
2365 if (sechdrs[i].sh_addr <= pi->ehdr->e_entry &&
2366 ((sechdrs[i].sh_addr + sechdrs[i].sh_size) >
2367 pi->ehdr->e_entry)) {
2368 entry_sidx = i;
2369 entry -= sechdrs[i].sh_addr;
2370 break;
2371 }
2372 }
2373
2374 /* Determine how much memory is needed to load relocatable object. */
2375 buf_align = 1;
2376 bss_align = 1;
2377 buf_sz = 0;
2378 bss_sz = 0;
2379
2380 for (i = 0; i < pi->ehdr->e_shnum; i++) {
2381 if (!(sechdrs[i].sh_flags & SHF_ALLOC))
2382 continue;
2383
2384 align = sechdrs[i].sh_addralign;
2385 if (sechdrs[i].sh_type != SHT_NOBITS) {
2386 if (buf_align < align)
2387 buf_align = align;
2388 buf_sz = ALIGN(buf_sz, align);
2389 buf_sz += sechdrs[i].sh_size;
2390 } else {
2391 /* bss section */
2392 if (bss_align < align)
2393 bss_align = align;
2394 bss_sz = ALIGN(bss_sz, align);
2395 bss_sz += sechdrs[i].sh_size;
2396 }
2397 }
2398
2399 /* Determine the bss padding required to align bss properly */
2400 bss_pad = 0;
2401 if (buf_sz & (bss_align - 1))
2402 bss_pad = bss_align - (buf_sz & (bss_align - 1));
2403
2404 memsz = buf_sz + bss_pad + bss_sz;
2405
2406 /* Allocate buffer for purgatory */
2407 purgatory_buf = vzalloc(buf_sz);
2408 if (!purgatory_buf) {
2409 ret = -ENOMEM;
2410 goto out;
2411 }
2412
2413 if (buf_align < bss_align)
2414 buf_align = bss_align;
2415
2416 /* Add buffer to segment list */
2417 ret = kexec_add_buffer(image, purgatory_buf, buf_sz, memsz,
2418 buf_align, min, max, top_down,
2419 &pi->purgatory_load_addr);
2420 if (ret)
2421 goto out;
2422
2423 /* Load SHF_ALLOC sections */
2424 buf_addr = purgatory_buf;
2425 load_addr = curr_load_addr = pi->purgatory_load_addr;
2426 bss_addr = load_addr + buf_sz + bss_pad;
2427
2428 for (i = 0; i < pi->ehdr->e_shnum; i++) {
2429 if (!(sechdrs[i].sh_flags & SHF_ALLOC))
2430 continue;
2431
2432 align = sechdrs[i].sh_addralign;
2433 if (sechdrs[i].sh_type != SHT_NOBITS) {
2434 curr_load_addr = ALIGN(curr_load_addr, align);
2435 offset = curr_load_addr - load_addr;
2436 /* We already modifed ->sh_offset to keep src addr */
2437 src = (char *) sechdrs[i].sh_offset;
2438 memcpy(buf_addr + offset, src, sechdrs[i].sh_size);
2439
2440 /* Store load address and source address of section */
2441 sechdrs[i].sh_addr = curr_load_addr;
2442
2443 /*
2444 * This section got copied to temporary buffer. Update
2445 * ->sh_offset accordingly.
2446 */
2447 sechdrs[i].sh_offset = (unsigned long)(buf_addr + offset);
2448
2449 /* Advance to the next address */
2450 curr_load_addr += sechdrs[i].sh_size;
2451 } else {
2452 bss_addr = ALIGN(bss_addr, align);
2453 sechdrs[i].sh_addr = bss_addr;
2454 bss_addr += sechdrs[i].sh_size;
2455 }
2456 }
2457
2458 /* Update entry point based on load address of text section */
2459 if (entry_sidx >= 0)
2460 entry += sechdrs[entry_sidx].sh_addr;
2461
2462 /* Make kernel jump to purgatory after shutdown */
2463 image->start = entry;
2464
2465 /* Used later to get/set symbol values */
2466 pi->sechdrs = sechdrs;
2467
2468 /*
2469 * Used later to identify which section is purgatory and skip it
2470 * from checksumming.
2471 */
2472 pi->purgatory_buf = purgatory_buf;
2473 return ret;
2474out:
2475 vfree(sechdrs);
2476 vfree(purgatory_buf);
2477 return ret;
2478}
2479
2480static int kexec_apply_relocations(struct kimage *image)
2481{
2482 int i, ret;
2483 struct purgatory_info *pi = &image->purgatory_info;
2484 Elf_Shdr *sechdrs = pi->sechdrs;
2485
2486 /* Apply relocations */
2487 for (i = 0; i < pi->ehdr->e_shnum; i++) {
2488 Elf_Shdr *section, *symtab;
2489
2490 if (sechdrs[i].sh_type != SHT_RELA &&
2491 sechdrs[i].sh_type != SHT_REL)
2492 continue;
2493
2494 /*
2495 * For section of type SHT_RELA/SHT_REL,
2496 * ->sh_link contains section header index of associated
2497 * symbol table. And ->sh_info contains section header
2498 * index of section to which relocations apply.
2499 */
2500 if (sechdrs[i].sh_info >= pi->ehdr->e_shnum ||
2501 sechdrs[i].sh_link >= pi->ehdr->e_shnum)
2502 return -ENOEXEC;
2503
2504 section = &sechdrs[sechdrs[i].sh_info];
2505 symtab = &sechdrs[sechdrs[i].sh_link];
2506
2507 if (!(section->sh_flags & SHF_ALLOC))
2508 continue;
2509
2510 /*
2511 * symtab->sh_link contain section header index of associated
2512 * string table.
2513 */
2514 if (symtab->sh_link >= pi->ehdr->e_shnum)
2515 /* Invalid section number? */
2516 continue;
2517
2518 /*
2519 * Respective archicture needs to provide support for applying
2520 * relocations of type SHT_RELA/SHT_REL.
2521 */
2522 if (sechdrs[i].sh_type == SHT_RELA)
2523 ret = arch_kexec_apply_relocations_add(pi->ehdr,
2524 sechdrs, i);
2525 else if (sechdrs[i].sh_type == SHT_REL)
2526 ret = arch_kexec_apply_relocations(pi->ehdr,
2527 sechdrs, i);
2528 if (ret)
2529 return ret;
2530 }
2531
2532 return 0;
2533}
2534
2535/* Load relocatable purgatory object and relocate it appropriately */
2536int kexec_load_purgatory(struct kimage *image, unsigned long min,
2537 unsigned long max, int top_down,
2538 unsigned long *load_addr)
2539{
2540 struct purgatory_info *pi = &image->purgatory_info;
2541 int ret;
2542
2543 if (kexec_purgatory_size <= 0)
2544 return -EINVAL;
2545
2546 if (kexec_purgatory_size < sizeof(Elf_Ehdr))
2547 return -ENOEXEC;
2548
2549 pi->ehdr = (Elf_Ehdr *)kexec_purgatory;
2550
2551 if (memcmp(pi->ehdr->e_ident, ELFMAG, SELFMAG) != 0
2552 || pi->ehdr->e_type != ET_REL
2553 || !elf_check_arch(pi->ehdr)
2554 || pi->ehdr->e_shentsize != sizeof(Elf_Shdr))
2555 return -ENOEXEC;
2556
2557 if (pi->ehdr->e_shoff >= kexec_purgatory_size
2558 || (pi->ehdr->e_shnum * sizeof(Elf_Shdr) >
2559 kexec_purgatory_size - pi->ehdr->e_shoff))
2560 return -ENOEXEC;
2561
2562 ret = __kexec_load_purgatory(image, min, max, top_down);
2563 if (ret)
2564 return ret;
2565
2566 ret = kexec_apply_relocations(image);
2567 if (ret)
2568 goto out;
2569
2570 *load_addr = pi->purgatory_load_addr;
2571 return 0;
2572out:
2573 vfree(pi->sechdrs);
2574 vfree(pi->purgatory_buf);
2575 return ret;
2576}
2577
2578static Elf_Sym *kexec_purgatory_find_symbol(struct purgatory_info *pi,
2579 const char *name)
2580{
2581 Elf_Sym *syms;
2582 Elf_Shdr *sechdrs;
2583 Elf_Ehdr *ehdr;
2584 int i, k;
2585 const char *strtab;
2586
2587 if (!pi->sechdrs || !pi->ehdr)
2588 return NULL;
2589
2590 sechdrs = pi->sechdrs;
2591 ehdr = pi->ehdr;
2592
2593 for (i = 0; i < ehdr->e_shnum; i++) {
2594 if (sechdrs[i].sh_type != SHT_SYMTAB)
2595 continue;
2596
2597 if (sechdrs[i].sh_link >= ehdr->e_shnum)
2598 /* Invalid strtab section number */
2599 continue;
2600 strtab = (char *)sechdrs[sechdrs[i].sh_link].sh_offset;
2601 syms = (Elf_Sym *)sechdrs[i].sh_offset;
2602
2603 /* Go through symbols for a match */
2604 for (k = 0; k < sechdrs[i].sh_size/sizeof(Elf_Sym); k++) {
2605 if (ELF_ST_BIND(syms[k].st_info) != STB_GLOBAL)
2606 continue;
2607
2608 if (strcmp(strtab + syms[k].st_name, name) != 0)
2609 continue;
2610
2611 if (syms[k].st_shndx == SHN_UNDEF ||
2612 syms[k].st_shndx >= ehdr->e_shnum) {
2613 pr_debug("Symbol: %s has bad section index %d.\n",
2614 name, syms[k].st_shndx);
2615 return NULL;
2616 }
2617
2618 /* Found the symbol we are looking for */
2619 return &syms[k];
2620 }
2621 }
2622
2623 return NULL;
2624}
2625
2626void *kexec_purgatory_get_symbol_addr(struct kimage *image, const char *name)
2627{
2628 struct purgatory_info *pi = &image->purgatory_info;
2629 Elf_Sym *sym;
2630 Elf_Shdr *sechdr;
2631
2632 sym = kexec_purgatory_find_symbol(pi, name);
2633 if (!sym)
2634 return ERR_PTR(-EINVAL);
2635
2636 sechdr = &pi->sechdrs[sym->st_shndx];
2637
2638 /*
2639 * Returns the address where symbol will finally be loaded after
2640 * kexec_load_segment()
2641 */
2642 return (void *)(sechdr->sh_addr + sym->st_value);
2643}
2644
2645/*
2646 * Get or set value of a symbol. If "get_value" is true, symbol value is
2647 * returned in buf otherwise symbol value is set based on value in buf.
2648 */
2649int kexec_purgatory_get_set_symbol(struct kimage *image, const char *name,
2650 void *buf, unsigned int size, bool get_value)
2651{
2652 Elf_Sym *sym;
2653 Elf_Shdr *sechdrs;
2654 struct purgatory_info *pi = &image->purgatory_info;
2655 char *sym_buf;
2656
2657 sym = kexec_purgatory_find_symbol(pi, name);
2658 if (!sym)
2659 return -EINVAL;
2660
2661 if (sym->st_size != size) {
2662 pr_err("symbol %s size mismatch: expected %lu actual %u\n",
2663 name, (unsigned long)sym->st_size, size);
2664 return -EINVAL;
2665 }
2666
2667 sechdrs = pi->sechdrs;
2668
2669 if (sechdrs[sym->st_shndx].sh_type == SHT_NOBITS) {
2670 pr_err("symbol %s is in a bss section. Cannot %s\n", name,
2671 get_value ? "get" : "set");
2672 return -EINVAL;
2673 }
2674
2675 sym_buf = (unsigned char *)sechdrs[sym->st_shndx].sh_offset +
2676 sym->st_value;
2677
2678 if (get_value)
2679 memcpy((void *)buf, sym_buf, size);
2680 else
2681 memcpy((void *)sym_buf, buf, size);
2682
2683 return 0;
2684}
2685
1635/* 2686/*
1636 * Move into place and start executing a preloaded standalone 2687 * Move into place and start executing a preloaded standalone
1637 * executable. If nothing was preloaded return an error. 2688 * executable. If nothing was preloaded return an error.
diff --git a/kernel/panic.c b/kernel/panic.c
index 62e16cef9cc2..d09dc5c32c67 100644
--- a/kernel/panic.c
+++ b/kernel/panic.c
@@ -224,6 +224,7 @@ static const struct tnt tnts[] = {
224 { TAINT_FIRMWARE_WORKAROUND, 'I', ' ' }, 224 { TAINT_FIRMWARE_WORKAROUND, 'I', ' ' },
225 { TAINT_OOT_MODULE, 'O', ' ' }, 225 { TAINT_OOT_MODULE, 'O', ' ' },
226 { TAINT_UNSIGNED_MODULE, 'E', ' ' }, 226 { TAINT_UNSIGNED_MODULE, 'E', ' ' },
227 { TAINT_SOFTLOCKUP, 'L', ' ' },
227}; 228};
228 229
229/** 230/**
diff --git a/kernel/resource.c b/kernel/resource.c
index 3c2237ac32db..da14b8d09296 100644
--- a/kernel/resource.c
+++ b/kernel/resource.c
@@ -59,10 +59,12 @@ static DEFINE_RWLOCK(resource_lock);
59static struct resource *bootmem_resource_free; 59static struct resource *bootmem_resource_free;
60static DEFINE_SPINLOCK(bootmem_resource_lock); 60static DEFINE_SPINLOCK(bootmem_resource_lock);
61 61
62static void *r_next(struct seq_file *m, void *v, loff_t *pos) 62static struct resource *next_resource(struct resource *p, bool sibling_only)
63{ 63{
64 struct resource *p = v; 64 /* Caller wants to traverse through siblings only */
65 (*pos)++; 65 if (sibling_only)
66 return p->sibling;
67
66 if (p->child) 68 if (p->child)
67 return p->child; 69 return p->child;
68 while (!p->sibling && p->parent) 70 while (!p->sibling && p->parent)
@@ -70,6 +72,13 @@ static void *r_next(struct seq_file *m, void *v, loff_t *pos)
70 return p->sibling; 72 return p->sibling;
71} 73}
72 74
75static void *r_next(struct seq_file *m, void *v, loff_t *pos)
76{
77 struct resource *p = v;
78 (*pos)++;
79 return (void *)next_resource(p, false);
80}
81
73#ifdef CONFIG_PROC_FS 82#ifdef CONFIG_PROC_FS
74 83
75enum { MAX_IORES_LEVEL = 5 }; 84enum { MAX_IORES_LEVEL = 5 };
@@ -322,16 +331,19 @@ int release_resource(struct resource *old)
322 331
323EXPORT_SYMBOL(release_resource); 332EXPORT_SYMBOL(release_resource);
324 333
325#if !defined(CONFIG_ARCH_HAS_WALK_MEMORY)
326/* 334/*
327 * Finds the lowest memory reosurce exists within [res->start.res->end) 335 * Finds the lowest iomem reosurce exists with-in [res->start.res->end)
328 * the caller must specify res->start, res->end, res->flags and "name". 336 * the caller must specify res->start, res->end, res->flags and "name".
329 * If found, returns 0, res is overwritten, if not found, returns -1. 337 * If found, returns 0, res is overwritten, if not found, returns -1.
338 * This walks through whole tree and not just first level children
339 * until and unless first_level_children_only is true.
330 */ 340 */
331static int find_next_system_ram(struct resource *res, char *name) 341static int find_next_iomem_res(struct resource *res, char *name,
342 bool first_level_children_only)
332{ 343{
333 resource_size_t start, end; 344 resource_size_t start, end;
334 struct resource *p; 345 struct resource *p;
346 bool sibling_only = false;
335 347
336 BUG_ON(!res); 348 BUG_ON(!res);
337 349
@@ -340,8 +352,14 @@ static int find_next_system_ram(struct resource *res, char *name)
340 BUG_ON(start >= end); 352 BUG_ON(start >= end);
341 353
342 read_lock(&resource_lock); 354 read_lock(&resource_lock);
343 for (p = iomem_resource.child; p ; p = p->sibling) { 355
344 /* system ram is just marked as IORESOURCE_MEM */ 356 if (first_level_children_only) {
357 p = iomem_resource.child;
358 sibling_only = true;
359 } else
360 p = &iomem_resource;
361
362 while ((p = next_resource(p, sibling_only))) {
345 if (p->flags != res->flags) 363 if (p->flags != res->flags)
346 continue; 364 continue;
347 if (name && strcmp(p->name, name)) 365 if (name && strcmp(p->name, name))
@@ -353,6 +371,7 @@ static int find_next_system_ram(struct resource *res, char *name)
353 if ((p->end >= start) && (p->start < end)) 371 if ((p->end >= start) && (p->start < end))
354 break; 372 break;
355 } 373 }
374
356 read_unlock(&resource_lock); 375 read_unlock(&resource_lock);
357 if (!p) 376 if (!p)
358 return -1; 377 return -1;
@@ -365,6 +384,70 @@ static int find_next_system_ram(struct resource *res, char *name)
365} 384}
366 385
367/* 386/*
387 * Walks through iomem resources and calls func() with matching resource
388 * ranges. This walks through whole tree and not just first level children.
389 * All the memory ranges which overlap start,end and also match flags and
390 * name are valid candidates.
391 *
392 * @name: name of resource
393 * @flags: resource flags
394 * @start: start addr
395 * @end: end addr
396 */
397int walk_iomem_res(char *name, unsigned long flags, u64 start, u64 end,
398 void *arg, int (*func)(u64, u64, void *))
399{
400 struct resource res;
401 u64 orig_end;
402 int ret = -1;
403
404 res.start = start;
405 res.end = end;
406 res.flags = flags;
407 orig_end = res.end;
408 while ((res.start < res.end) &&
409 (!find_next_iomem_res(&res, name, false))) {
410 ret = (*func)(res.start, res.end, arg);
411 if (ret)
412 break;
413 res.start = res.end + 1;
414 res.end = orig_end;
415 }
416 return ret;
417}
418
419/*
420 * This function calls callback against all memory range of "System RAM"
421 * which are marked as IORESOURCE_MEM and IORESOUCE_BUSY.
422 * Now, this function is only for "System RAM". This function deals with
423 * full ranges and not pfn. If resources are not pfn aligned, dealing
424 * with pfn can truncate ranges.
425 */
426int walk_system_ram_res(u64 start, u64 end, void *arg,
427 int (*func)(u64, u64, void *))
428{
429 struct resource res;
430 u64 orig_end;
431 int ret = -1;
432
433 res.start = start;
434 res.end = end;
435 res.flags = IORESOURCE_MEM | IORESOURCE_BUSY;
436 orig_end = res.end;
437 while ((res.start < res.end) &&
438 (!find_next_iomem_res(&res, "System RAM", true))) {
439 ret = (*func)(res.start, res.end, arg);
440 if (ret)
441 break;
442 res.start = res.end + 1;
443 res.end = orig_end;
444 }
445 return ret;
446}
447
448#if !defined(CONFIG_ARCH_HAS_WALK_MEMORY)
449
450/*
368 * This function calls callback against all memory range of "System RAM" 451 * This function calls callback against all memory range of "System RAM"
369 * which are marked as IORESOURCE_MEM and IORESOUCE_BUSY. 452 * which are marked as IORESOURCE_MEM and IORESOUCE_BUSY.
370 * Now, this function is only for "System RAM". 453 * Now, this function is only for "System RAM".
@@ -382,7 +465,7 @@ int walk_system_ram_range(unsigned long start_pfn, unsigned long nr_pages,
382 res.flags = IORESOURCE_MEM | IORESOURCE_BUSY; 465 res.flags = IORESOURCE_MEM | IORESOURCE_BUSY;
383 orig_end = res.end; 466 orig_end = res.end;
384 while ((res.start < res.end) && 467 while ((res.start < res.end) &&
385 (find_next_system_ram(&res, "System RAM") >= 0)) { 468 (find_next_iomem_res(&res, "System RAM", true) >= 0)) {
386 pfn = (res.start + PAGE_SIZE - 1) >> PAGE_SHIFT; 469 pfn = (res.start + PAGE_SIZE - 1) >> PAGE_SHIFT;
387 end_pfn = (res.end + 1) >> PAGE_SHIFT; 470 end_pfn = (res.end + 1) >> PAGE_SHIFT;
388 if (end_pfn > pfn) 471 if (end_pfn > pfn)
diff --git a/kernel/sys_ni.c b/kernel/sys_ni.c
index 2904a2105914..391d4ddb6f4b 100644
--- a/kernel/sys_ni.c
+++ b/kernel/sys_ni.c
@@ -25,6 +25,7 @@ cond_syscall(sys_swapon);
25cond_syscall(sys_swapoff); 25cond_syscall(sys_swapoff);
26cond_syscall(sys_kexec_load); 26cond_syscall(sys_kexec_load);
27cond_syscall(compat_sys_kexec_load); 27cond_syscall(compat_sys_kexec_load);
28cond_syscall(sys_kexec_file_load);
28cond_syscall(sys_init_module); 29cond_syscall(sys_init_module);
29cond_syscall(sys_finit_module); 30cond_syscall(sys_finit_module);
30cond_syscall(sys_delete_module); 31cond_syscall(sys_delete_module);
@@ -197,6 +198,7 @@ cond_syscall(compat_sys_timerfd_settime);
197cond_syscall(compat_sys_timerfd_gettime); 198cond_syscall(compat_sys_timerfd_gettime);
198cond_syscall(sys_eventfd); 199cond_syscall(sys_eventfd);
199cond_syscall(sys_eventfd2); 200cond_syscall(sys_eventfd2);
201cond_syscall(sys_memfd_create);
200 202
201/* performance counters: */ 203/* performance counters: */
202cond_syscall(sys_perf_event_open); 204cond_syscall(sys_perf_event_open);
diff --git a/kernel/test_kprobes.c b/kernel/test_kprobes.c
index 12d6ebbfdd83..0dbab6d1acb4 100644
--- a/kernel/test_kprobes.c
+++ b/kernel/test_kprobes.c
@@ -14,6 +14,8 @@
14 * the GNU General Public License for more details. 14 * the GNU General Public License for more details.
15 */ 15 */
16 16
17#define pr_fmt(fmt) "Kprobe smoke test: " fmt
18
17#include <linux/kernel.h> 19#include <linux/kernel.h>
18#include <linux/kprobes.h> 20#include <linux/kprobes.h>
19#include <linux/random.h> 21#include <linux/random.h>
@@ -41,8 +43,7 @@ static void kp_post_handler(struct kprobe *p, struct pt_regs *regs,
41{ 43{
42 if (preh_val != (rand1 / div_factor)) { 44 if (preh_val != (rand1 / div_factor)) {
43 handler_errors++; 45 handler_errors++;
44 printk(KERN_ERR "Kprobe smoke test failed: " 46 pr_err("incorrect value in post_handler\n");
45 "incorrect value in post_handler\n");
46 } 47 }
47 posth_val = preh_val + div_factor; 48 posth_val = preh_val + div_factor;
48} 49}
@@ -59,8 +60,7 @@ static int test_kprobe(void)
59 60
60 ret = register_kprobe(&kp); 61 ret = register_kprobe(&kp);
61 if (ret < 0) { 62 if (ret < 0) {
62 printk(KERN_ERR "Kprobe smoke test failed: " 63 pr_err("register_kprobe returned %d\n", ret);
63 "register_kprobe returned %d\n", ret);
64 return ret; 64 return ret;
65 } 65 }
66 66
@@ -68,14 +68,12 @@ static int test_kprobe(void)
68 unregister_kprobe(&kp); 68 unregister_kprobe(&kp);
69 69
70 if (preh_val == 0) { 70 if (preh_val == 0) {
71 printk(KERN_ERR "Kprobe smoke test failed: " 71 pr_err("kprobe pre_handler not called\n");
72 "kprobe pre_handler not called\n");
73 handler_errors++; 72 handler_errors++;
74 } 73 }
75 74
76 if (posth_val == 0) { 75 if (posth_val == 0) {
77 printk(KERN_ERR "Kprobe smoke test failed: " 76 pr_err("kprobe post_handler not called\n");
78 "kprobe post_handler not called\n");
79 handler_errors++; 77 handler_errors++;
80 } 78 }
81 79
@@ -98,8 +96,7 @@ static void kp_post_handler2(struct kprobe *p, struct pt_regs *regs,
98{ 96{
99 if (preh_val != (rand1 / div_factor) + 1) { 97 if (preh_val != (rand1 / div_factor) + 1) {
100 handler_errors++; 98 handler_errors++;
101 printk(KERN_ERR "Kprobe smoke test failed: " 99 pr_err("incorrect value in post_handler2\n");
102 "incorrect value in post_handler2\n");
103 } 100 }
104 posth_val = preh_val + div_factor; 101 posth_val = preh_val + div_factor;
105} 102}
@@ -120,8 +117,7 @@ static int test_kprobes(void)
120 kp.flags = 0; 117 kp.flags = 0;
121 ret = register_kprobes(kps, 2); 118 ret = register_kprobes(kps, 2);
122 if (ret < 0) { 119 if (ret < 0) {
123 printk(KERN_ERR "Kprobe smoke test failed: " 120 pr_err("register_kprobes returned %d\n", ret);
124 "register_kprobes returned %d\n", ret);
125 return ret; 121 return ret;
126 } 122 }
127 123
@@ -130,14 +126,12 @@ static int test_kprobes(void)
130 ret = target(rand1); 126 ret = target(rand1);
131 127
132 if (preh_val == 0) { 128 if (preh_val == 0) {
133 printk(KERN_ERR "Kprobe smoke test failed: " 129 pr_err("kprobe pre_handler not called\n");
134 "kprobe pre_handler not called\n");
135 handler_errors++; 130 handler_errors++;
136 } 131 }
137 132
138 if (posth_val == 0) { 133 if (posth_val == 0) {
139 printk(KERN_ERR "Kprobe smoke test failed: " 134 pr_err("kprobe post_handler not called\n");
140 "kprobe post_handler not called\n");
141 handler_errors++; 135 handler_errors++;
142 } 136 }
143 137
@@ -146,14 +140,12 @@ static int test_kprobes(void)
146 ret = target2(rand1); 140 ret = target2(rand1);
147 141
148 if (preh_val == 0) { 142 if (preh_val == 0) {
149 printk(KERN_ERR "Kprobe smoke test failed: " 143 pr_err("kprobe pre_handler2 not called\n");
150 "kprobe pre_handler2 not called\n");
151 handler_errors++; 144 handler_errors++;
152 } 145 }
153 146
154 if (posth_val == 0) { 147 if (posth_val == 0) {
155 printk(KERN_ERR "Kprobe smoke test failed: " 148 pr_err("kprobe post_handler2 not called\n");
156 "kprobe post_handler2 not called\n");
157 handler_errors++; 149 handler_errors++;
158 } 150 }
159 151
@@ -166,8 +158,7 @@ static u32 j_kprobe_target(u32 value)
166{ 158{
167 if (value != rand1) { 159 if (value != rand1) {
168 handler_errors++; 160 handler_errors++;
169 printk(KERN_ERR "Kprobe smoke test failed: " 161 pr_err("incorrect value in jprobe handler\n");
170 "incorrect value in jprobe handler\n");
171 } 162 }
172 163
173 jph_val = rand1; 164 jph_val = rand1;
@@ -186,16 +177,14 @@ static int test_jprobe(void)
186 177
187 ret = register_jprobe(&jp); 178 ret = register_jprobe(&jp);
188 if (ret < 0) { 179 if (ret < 0) {
189 printk(KERN_ERR "Kprobe smoke test failed: " 180 pr_err("register_jprobe returned %d\n", ret);
190 "register_jprobe returned %d\n", ret);
191 return ret; 181 return ret;
192 } 182 }
193 183
194 ret = target(rand1); 184 ret = target(rand1);
195 unregister_jprobe(&jp); 185 unregister_jprobe(&jp);
196 if (jph_val == 0) { 186 if (jph_val == 0) {
197 printk(KERN_ERR "Kprobe smoke test failed: " 187 pr_err("jprobe handler not called\n");
198 "jprobe handler not called\n");
199 handler_errors++; 188 handler_errors++;
200 } 189 }
201 190
@@ -217,24 +206,21 @@ static int test_jprobes(void)
217 jp.kp.flags = 0; 206 jp.kp.flags = 0;
218 ret = register_jprobes(jps, 2); 207 ret = register_jprobes(jps, 2);
219 if (ret < 0) { 208 if (ret < 0) {
220 printk(KERN_ERR "Kprobe smoke test failed: " 209 pr_err("register_jprobes returned %d\n", ret);
221 "register_jprobes returned %d\n", ret);
222 return ret; 210 return ret;
223 } 211 }
224 212
225 jph_val = 0; 213 jph_val = 0;
226 ret = target(rand1); 214 ret = target(rand1);
227 if (jph_val == 0) { 215 if (jph_val == 0) {
228 printk(KERN_ERR "Kprobe smoke test failed: " 216 pr_err("jprobe handler not called\n");
229 "jprobe handler not called\n");
230 handler_errors++; 217 handler_errors++;
231 } 218 }
232 219
233 jph_val = 0; 220 jph_val = 0;
234 ret = target2(rand1); 221 ret = target2(rand1);
235 if (jph_val == 0) { 222 if (jph_val == 0) {
236 printk(KERN_ERR "Kprobe smoke test failed: " 223 pr_err("jprobe handler2 not called\n");
237 "jprobe handler2 not called\n");
238 handler_errors++; 224 handler_errors++;
239 } 225 }
240 unregister_jprobes(jps, 2); 226 unregister_jprobes(jps, 2);
@@ -256,13 +242,11 @@ static int return_handler(struct kretprobe_instance *ri, struct pt_regs *regs)
256 242
257 if (ret != (rand1 / div_factor)) { 243 if (ret != (rand1 / div_factor)) {
258 handler_errors++; 244 handler_errors++;
259 printk(KERN_ERR "Kprobe smoke test failed: " 245 pr_err("incorrect value in kretprobe handler\n");
260 "incorrect value in kretprobe handler\n");
261 } 246 }
262 if (krph_val == 0) { 247 if (krph_val == 0) {
263 handler_errors++; 248 handler_errors++;
264 printk(KERN_ERR "Kprobe smoke test failed: " 249 pr_err("call to kretprobe entry handler failed\n");
265 "call to kretprobe entry handler failed\n");
266 } 250 }
267 251
268 krph_val = rand1; 252 krph_val = rand1;
@@ -281,16 +265,14 @@ static int test_kretprobe(void)
281 265
282 ret = register_kretprobe(&rp); 266 ret = register_kretprobe(&rp);
283 if (ret < 0) { 267 if (ret < 0) {
284 printk(KERN_ERR "Kprobe smoke test failed: " 268 pr_err("register_kretprobe returned %d\n", ret);
285 "register_kretprobe returned %d\n", ret);
286 return ret; 269 return ret;
287 } 270 }
288 271
289 ret = target(rand1); 272 ret = target(rand1);
290 unregister_kretprobe(&rp); 273 unregister_kretprobe(&rp);
291 if (krph_val != rand1) { 274 if (krph_val != rand1) {
292 printk(KERN_ERR "Kprobe smoke test failed: " 275 pr_err("kretprobe handler not called\n");
293 "kretprobe handler not called\n");
294 handler_errors++; 276 handler_errors++;
295 } 277 }
296 278
@@ -303,13 +285,11 @@ static int return_handler2(struct kretprobe_instance *ri, struct pt_regs *regs)
303 285
304 if (ret != (rand1 / div_factor) + 1) { 286 if (ret != (rand1 / div_factor) + 1) {
305 handler_errors++; 287 handler_errors++;
306 printk(KERN_ERR "Kprobe smoke test failed: " 288 pr_err("incorrect value in kretprobe handler2\n");
307 "incorrect value in kretprobe handler2\n");
308 } 289 }
309 if (krph_val == 0) { 290 if (krph_val == 0) {
310 handler_errors++; 291 handler_errors++;
311 printk(KERN_ERR "Kprobe smoke test failed: " 292 pr_err("call to kretprobe entry handler failed\n");
312 "call to kretprobe entry handler failed\n");
313 } 293 }
314 294
315 krph_val = rand1; 295 krph_val = rand1;
@@ -332,24 +312,21 @@ static int test_kretprobes(void)
332 rp.kp.flags = 0; 312 rp.kp.flags = 0;
333 ret = register_kretprobes(rps, 2); 313 ret = register_kretprobes(rps, 2);
334 if (ret < 0) { 314 if (ret < 0) {
335 printk(KERN_ERR "Kprobe smoke test failed: " 315 pr_err("register_kretprobe returned %d\n", ret);
336 "register_kretprobe returned %d\n", ret);
337 return ret; 316 return ret;
338 } 317 }
339 318
340 krph_val = 0; 319 krph_val = 0;
341 ret = target(rand1); 320 ret = target(rand1);
342 if (krph_val != rand1) { 321 if (krph_val != rand1) {
343 printk(KERN_ERR "Kprobe smoke test failed: " 322 pr_err("kretprobe handler not called\n");
344 "kretprobe handler not called\n");
345 handler_errors++; 323 handler_errors++;
346 } 324 }
347 325
348 krph_val = 0; 326 krph_val = 0;
349 ret = target2(rand1); 327 ret = target2(rand1);
350 if (krph_val != rand1) { 328 if (krph_val != rand1) {
351 printk(KERN_ERR "Kprobe smoke test failed: " 329 pr_err("kretprobe handler2 not called\n");
352 "kretprobe handler2 not called\n");
353 handler_errors++; 330 handler_errors++;
354 } 331 }
355 unregister_kretprobes(rps, 2); 332 unregister_kretprobes(rps, 2);
@@ -368,7 +345,7 @@ int init_test_probes(void)
368 rand1 = prandom_u32(); 345 rand1 = prandom_u32();
369 } while (rand1 <= div_factor); 346 } while (rand1 <= div_factor);
370 347
371 printk(KERN_INFO "Kprobe smoke test started\n"); 348 pr_info("started\n");
372 num_tests++; 349 num_tests++;
373 ret = test_kprobe(); 350 ret = test_kprobe();
374 if (ret < 0) 351 if (ret < 0)
@@ -402,13 +379,11 @@ int init_test_probes(void)
402#endif /* CONFIG_KRETPROBES */ 379#endif /* CONFIG_KRETPROBES */
403 380
404 if (errors) 381 if (errors)
405 printk(KERN_ERR "BUG: Kprobe smoke test: %d out of " 382 pr_err("BUG: %d out of %d tests failed\n", errors, num_tests);
406 "%d tests failed\n", errors, num_tests);
407 else if (handler_errors) 383 else if (handler_errors)
408 printk(KERN_ERR "BUG: Kprobe smoke test: %d error(s) " 384 pr_err("BUG: %d error(s) running handlers\n", handler_errors);
409 "running handlers\n", handler_errors);
410 else 385 else
411 printk(KERN_INFO "Kprobe smoke test passed successfully\n"); 386 pr_info("passed successfully\n");
412 387
413 return 0; 388 return 0;
414} 389}
diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c
index fcc02560fd6b..aa312b0dc3ec 100644
--- a/kernel/user_namespace.c
+++ b/kernel/user_namespace.c
@@ -526,21 +526,21 @@ static void m_stop(struct seq_file *seq, void *v)
526 return; 526 return;
527} 527}
528 528
529struct seq_operations proc_uid_seq_operations = { 529const struct seq_operations proc_uid_seq_operations = {
530 .start = uid_m_start, 530 .start = uid_m_start,
531 .stop = m_stop, 531 .stop = m_stop,
532 .next = m_next, 532 .next = m_next,
533 .show = uid_m_show, 533 .show = uid_m_show,
534}; 534};
535 535
536struct seq_operations proc_gid_seq_operations = { 536const struct seq_operations proc_gid_seq_operations = {
537 .start = gid_m_start, 537 .start = gid_m_start,
538 .stop = m_stop, 538 .stop = m_stop,
539 .next = m_next, 539 .next = m_next,
540 .show = gid_m_show, 540 .show = gid_m_show,
541}; 541};
542 542
543struct seq_operations proc_projid_seq_operations = { 543const struct seq_operations proc_projid_seq_operations = {
544 .start = projid_m_start, 544 .start = projid_m_start,
545 .stop = m_stop, 545 .stop = m_stop,
546 .next = m_next, 546 .next = m_next,
diff --git a/kernel/watchdog.c b/kernel/watchdog.c
index 51b29e9d2ba6..a8d6914030fe 100644
--- a/kernel/watchdog.c
+++ b/kernel/watchdog.c
@@ -368,6 +368,7 @@ static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer)
368 smp_mb__after_atomic(); 368 smp_mb__after_atomic();
369 } 369 }
370 370
371 add_taint(TAINT_SOFTLOCKUP, LOCKDEP_STILL_OK);
371 if (softlockup_panic) 372 if (softlockup_panic)
372 panic("softlockup: hung tasks"); 373 panic("softlockup: hung tasks");
373 __this_cpu_write(soft_watchdog_warn, true); 374 __this_cpu_write(soft_watchdog_warn, true);
diff --git a/lib/Kconfig b/lib/Kconfig
index df872659ddd3..a5ce0c7f6c30 100644
--- a/lib/Kconfig
+++ b/lib/Kconfig
@@ -508,4 +508,11 @@ config UCS2_STRING
508 508
509source "lib/fonts/Kconfig" 509source "lib/fonts/Kconfig"
510 510
511#
512# sg chaining option
513#
514
515config ARCH_HAS_SG_CHAIN
516 def_bool n
517
511endmenu 518endmenu
diff --git a/lib/decompress.c b/lib/decompress.c
index 86069d74c062..37f3c786348f 100644
--- a/lib/decompress.c
+++ b/lib/decompress.c
@@ -54,7 +54,7 @@ static const struct compress_format compressed_formats[] __initconst = {
54 { {0, 0}, NULL, NULL } 54 { {0, 0}, NULL, NULL }
55}; 55};
56 56
57decompress_fn __init decompress_method(const unsigned char *inbuf, int len, 57decompress_fn __init decompress_method(const unsigned char *inbuf, long len,
58 const char **name) 58 const char **name)
59{ 59{
60 const struct compress_format *cf; 60 const struct compress_format *cf;
diff --git a/lib/decompress_bunzip2.c b/lib/decompress_bunzip2.c
index 31c5f7675fbf..8290e0bef7ea 100644
--- a/lib/decompress_bunzip2.c
+++ b/lib/decompress_bunzip2.c
@@ -92,8 +92,8 @@ struct bunzip_data {
92 /* State for interrupting output loop */ 92 /* State for interrupting output loop */
93 int writeCopies, writePos, writeRunCountdown, writeCount, writeCurrent; 93 int writeCopies, writePos, writeRunCountdown, writeCount, writeCurrent;
94 /* I/O tracking data (file handles, buffers, positions, etc.) */ 94 /* I/O tracking data (file handles, buffers, positions, etc.) */
95 int (*fill)(void*, unsigned int); 95 long (*fill)(void*, unsigned long);
96 int inbufCount, inbufPos /*, outbufPos*/; 96 long inbufCount, inbufPos /*, outbufPos*/;
97 unsigned char *inbuf /*,*outbuf*/; 97 unsigned char *inbuf /*,*outbuf*/;
98 unsigned int inbufBitCount, inbufBits; 98 unsigned int inbufBitCount, inbufBits;
99 /* The CRC values stored in the block header and calculated from the 99 /* The CRC values stored in the block header and calculated from the
@@ -617,7 +617,7 @@ decode_next_byte:
617 goto decode_next_byte; 617 goto decode_next_byte;
618} 618}
619 619
620static int INIT nofill(void *buf, unsigned int len) 620static long INIT nofill(void *buf, unsigned long len)
621{ 621{
622 return -1; 622 return -1;
623} 623}
@@ -625,8 +625,8 @@ static int INIT nofill(void *buf, unsigned int len)
625/* Allocate the structure, read file header. If in_fd ==-1, inbuf must contain 625/* Allocate the structure, read file header. If in_fd ==-1, inbuf must contain
626 a complete bunzip file (len bytes long). If in_fd!=-1, inbuf and len are 626 a complete bunzip file (len bytes long). If in_fd!=-1, inbuf and len are
627 ignored, and data is read from file handle into temporary buffer. */ 627 ignored, and data is read from file handle into temporary buffer. */
628static int INIT start_bunzip(struct bunzip_data **bdp, void *inbuf, int len, 628static int INIT start_bunzip(struct bunzip_data **bdp, void *inbuf, long len,
629 int (*fill)(void*, unsigned int)) 629 long (*fill)(void*, unsigned long))
630{ 630{
631 struct bunzip_data *bd; 631 struct bunzip_data *bd;
632 unsigned int i, j, c; 632 unsigned int i, j, c;
@@ -675,11 +675,11 @@ static int INIT start_bunzip(struct bunzip_data **bdp, void *inbuf, int len,
675 675
676/* Example usage: decompress src_fd to dst_fd. (Stops at end of bzip2 data, 676/* Example usage: decompress src_fd to dst_fd. (Stops at end of bzip2 data,
677 not end of file.) */ 677 not end of file.) */
678STATIC int INIT bunzip2(unsigned char *buf, int len, 678STATIC int INIT bunzip2(unsigned char *buf, long len,
679 int(*fill)(void*, unsigned int), 679 long (*fill)(void*, unsigned long),
680 int(*flush)(void*, unsigned int), 680 long (*flush)(void*, unsigned long),
681 unsigned char *outbuf, 681 unsigned char *outbuf,
682 int *pos, 682 long *pos,
683 void(*error)(char *x)) 683 void(*error)(char *x))
684{ 684{
685 struct bunzip_data *bd; 685 struct bunzip_data *bd;
@@ -743,11 +743,11 @@ exit_0:
743} 743}
744 744
745#ifdef PREBOOT 745#ifdef PREBOOT
746STATIC int INIT decompress(unsigned char *buf, int len, 746STATIC int INIT decompress(unsigned char *buf, long len,
747 int(*fill)(void*, unsigned int), 747 long (*fill)(void*, unsigned long),
748 int(*flush)(void*, unsigned int), 748 long (*flush)(void*, unsigned long),
749 unsigned char *outbuf, 749 unsigned char *outbuf,
750 int *pos, 750 long *pos,
751 void(*error)(char *x)) 751 void(*error)(char *x))
752{ 752{
753 return bunzip2(buf, len - 4, fill, flush, outbuf, pos, error); 753 return bunzip2(buf, len - 4, fill, flush, outbuf, pos, error);
diff --git a/lib/decompress_inflate.c b/lib/decompress_inflate.c
index 0edfd742a154..d4c7891635ec 100644
--- a/lib/decompress_inflate.c
+++ b/lib/decompress_inflate.c
@@ -27,17 +27,17 @@
27 27
28#define GZIP_IOBUF_SIZE (16*1024) 28#define GZIP_IOBUF_SIZE (16*1024)
29 29
30static int INIT nofill(void *buffer, unsigned int len) 30static long INIT nofill(void *buffer, unsigned long len)
31{ 31{
32 return -1; 32 return -1;
33} 33}
34 34
35/* Included from initramfs et al code */ 35/* Included from initramfs et al code */
36STATIC int INIT gunzip(unsigned char *buf, int len, 36STATIC int INIT gunzip(unsigned char *buf, long len,
37 int(*fill)(void*, unsigned int), 37 long (*fill)(void*, unsigned long),
38 int(*flush)(void*, unsigned int), 38 long (*flush)(void*, unsigned long),
39 unsigned char *out_buf, 39 unsigned char *out_buf,
40 int *pos, 40 long *pos,
41 void(*error)(char *x)) { 41 void(*error)(char *x)) {
42 u8 *zbuf; 42 u8 *zbuf;
43 struct z_stream_s *strm; 43 struct z_stream_s *strm;
@@ -142,7 +142,7 @@ STATIC int INIT gunzip(unsigned char *buf, int len,
142 142
143 /* Write any data generated */ 143 /* Write any data generated */
144 if (flush && strm->next_out > out_buf) { 144 if (flush && strm->next_out > out_buf) {
145 int l = strm->next_out - out_buf; 145 long l = strm->next_out - out_buf;
146 if (l != flush(out_buf, l)) { 146 if (l != flush(out_buf, l)) {
147 rc = -1; 147 rc = -1;
148 error("write error"); 148 error("write error");
diff --git a/lib/decompress_unlz4.c b/lib/decompress_unlz4.c
index 7d1e83caf8ad..40f66ebe57b7 100644
--- a/lib/decompress_unlz4.c
+++ b/lib/decompress_unlz4.c
@@ -31,10 +31,10 @@
31#define LZ4_DEFAULT_UNCOMPRESSED_CHUNK_SIZE (8 << 20) 31#define LZ4_DEFAULT_UNCOMPRESSED_CHUNK_SIZE (8 << 20)
32#define ARCHIVE_MAGICNUMBER 0x184C2102 32#define ARCHIVE_MAGICNUMBER 0x184C2102
33 33
34STATIC inline int INIT unlz4(u8 *input, int in_len, 34STATIC inline int INIT unlz4(u8 *input, long in_len,
35 int (*fill) (void *, unsigned int), 35 long (*fill)(void *, unsigned long),
36 int (*flush) (void *, unsigned int), 36 long (*flush)(void *, unsigned long),
37 u8 *output, int *posp, 37 u8 *output, long *posp,
38 void (*error) (char *x)) 38 void (*error) (char *x))
39{ 39{
40 int ret = -1; 40 int ret = -1;
@@ -43,7 +43,7 @@ STATIC inline int INIT unlz4(u8 *input, int in_len,
43 u8 *inp; 43 u8 *inp;
44 u8 *inp_start; 44 u8 *inp_start;
45 u8 *outp; 45 u8 *outp;
46 int size = in_len; 46 long size = in_len;
47#ifdef PREBOOT 47#ifdef PREBOOT
48 size_t out_len = get_unaligned_le32(input + in_len); 48 size_t out_len = get_unaligned_le32(input + in_len);
49#endif 49#endif
@@ -83,13 +83,20 @@ STATIC inline int INIT unlz4(u8 *input, int in_len,
83 if (posp) 83 if (posp)
84 *posp = 0; 84 *posp = 0;
85 85
86 if (fill) 86 if (fill) {
87 fill(inp, 4); 87 size = fill(inp, 4);
88 if (size < 4) {
89 error("data corrupted");
90 goto exit_2;
91 }
92 }
88 93
89 chunksize = get_unaligned_le32(inp); 94 chunksize = get_unaligned_le32(inp);
90 if (chunksize == ARCHIVE_MAGICNUMBER) { 95 if (chunksize == ARCHIVE_MAGICNUMBER) {
91 inp += 4; 96 if (!fill) {
92 size -= 4; 97 inp += 4;
98 size -= 4;
99 }
93 } else { 100 } else {
94 error("invalid header"); 101 error("invalid header");
95 goto exit_2; 102 goto exit_2;
@@ -100,29 +107,44 @@ STATIC inline int INIT unlz4(u8 *input, int in_len,
100 107
101 for (;;) { 108 for (;;) {
102 109
103 if (fill) 110 if (fill) {
104 fill(inp, 4); 111 size = fill(inp, 4);
112 if (size == 0)
113 break;
114 if (size < 4) {
115 error("data corrupted");
116 goto exit_2;
117 }
118 }
105 119
106 chunksize = get_unaligned_le32(inp); 120 chunksize = get_unaligned_le32(inp);
107 if (chunksize == ARCHIVE_MAGICNUMBER) { 121 if (chunksize == ARCHIVE_MAGICNUMBER) {
108 inp += 4; 122 if (!fill) {
109 size -= 4; 123 inp += 4;
124 size -= 4;
125 }
110 if (posp) 126 if (posp)
111 *posp += 4; 127 *posp += 4;
112 continue; 128 continue;
113 } 129 }
114 inp += 4; 130
115 size -= 4;
116 131
117 if (posp) 132 if (posp)
118 *posp += 4; 133 *posp += 4;
119 134
120 if (fill) { 135 if (!fill) {
136 inp += 4;
137 size -= 4;
138 } else {
121 if (chunksize > lz4_compressbound(uncomp_chunksize)) { 139 if (chunksize > lz4_compressbound(uncomp_chunksize)) {
122 error("chunk length is longer than allocated"); 140 error("chunk length is longer than allocated");
123 goto exit_2; 141 goto exit_2;
124 } 142 }
125 fill(inp, chunksize); 143 size = fill(inp, chunksize);
144 if (size < chunksize) {
145 error("data corrupted");
146 goto exit_2;
147 }
126 } 148 }
127#ifdef PREBOOT 149#ifdef PREBOOT
128 if (out_len >= uncomp_chunksize) { 150 if (out_len >= uncomp_chunksize) {
@@ -149,18 +171,17 @@ STATIC inline int INIT unlz4(u8 *input, int in_len,
149 if (posp) 171 if (posp)
150 *posp += chunksize; 172 *posp += chunksize;
151 173
152 size -= chunksize; 174 if (!fill) {
175 size -= chunksize;
153 176
154 if (size == 0) 177 if (size == 0)
155 break; 178 break;
156 else if (size < 0) { 179 else if (size < 0) {
157 error("data corrupted"); 180 error("data corrupted");
158 goto exit_2; 181 goto exit_2;
182 }
183 inp += chunksize;
159 } 184 }
160
161 inp += chunksize;
162 if (fill)
163 inp = inp_start;
164 } 185 }
165 186
166 ret = 0; 187 ret = 0;
@@ -175,11 +196,11 @@ exit_0:
175} 196}
176 197
177#ifdef PREBOOT 198#ifdef PREBOOT
178STATIC int INIT decompress(unsigned char *buf, int in_len, 199STATIC int INIT decompress(unsigned char *buf, long in_len,
179 int(*fill)(void*, unsigned int), 200 long (*fill)(void*, unsigned long),
180 int(*flush)(void*, unsigned int), 201 long (*flush)(void*, unsigned long),
181 unsigned char *output, 202 unsigned char *output,
182 int *posp, 203 long *posp,
183 void(*error)(char *x) 204 void(*error)(char *x)
184 ) 205 )
185{ 206{
diff --git a/lib/decompress_unlzma.c b/lib/decompress_unlzma.c
index 32adb73a9038..0be83af62b88 100644
--- a/lib/decompress_unlzma.c
+++ b/lib/decompress_unlzma.c
@@ -65,11 +65,11 @@ static long long INIT read_int(unsigned char *ptr, int size)
65#define LZMA_IOBUF_SIZE 0x10000 65#define LZMA_IOBUF_SIZE 0x10000
66 66
67struct rc { 67struct rc {
68 int (*fill)(void*, unsigned int); 68 long (*fill)(void*, unsigned long);
69 uint8_t *ptr; 69 uint8_t *ptr;
70 uint8_t *buffer; 70 uint8_t *buffer;
71 uint8_t *buffer_end; 71 uint8_t *buffer_end;
72 int buffer_size; 72 long buffer_size;
73 uint32_t code; 73 uint32_t code;
74 uint32_t range; 74 uint32_t range;
75 uint32_t bound; 75 uint32_t bound;
@@ -82,7 +82,7 @@ struct rc {
82#define RC_MODEL_TOTAL_BITS 11 82#define RC_MODEL_TOTAL_BITS 11
83 83
84 84
85static int INIT nofill(void *buffer, unsigned int len) 85static long INIT nofill(void *buffer, unsigned long len)
86{ 86{
87 return -1; 87 return -1;
88} 88}
@@ -99,8 +99,8 @@ static void INIT rc_read(struct rc *rc)
99 99
100/* Called once */ 100/* Called once */
101static inline void INIT rc_init(struct rc *rc, 101static inline void INIT rc_init(struct rc *rc,
102 int (*fill)(void*, unsigned int), 102 long (*fill)(void*, unsigned long),
103 char *buffer, int buffer_size) 103 char *buffer, long buffer_size)
104{ 104{
105 if (fill) 105 if (fill)
106 rc->fill = fill; 106 rc->fill = fill;
@@ -280,7 +280,7 @@ struct writer {
280 size_t buffer_pos; 280 size_t buffer_pos;
281 int bufsize; 281 int bufsize;
282 size_t global_pos; 282 size_t global_pos;
283 int(*flush)(void*, unsigned int); 283 long (*flush)(void*, unsigned long);
284 struct lzma_header *header; 284 struct lzma_header *header;
285}; 285};
286 286
@@ -534,11 +534,11 @@ static inline int INIT process_bit1(struct writer *wr, struct rc *rc,
534 534
535 535
536 536
537STATIC inline int INIT unlzma(unsigned char *buf, int in_len, 537STATIC inline int INIT unlzma(unsigned char *buf, long in_len,
538 int(*fill)(void*, unsigned int), 538 long (*fill)(void*, unsigned long),
539 int(*flush)(void*, unsigned int), 539 long (*flush)(void*, unsigned long),
540 unsigned char *output, 540 unsigned char *output,
541 int *posp, 541 long *posp,
542 void(*error)(char *x) 542 void(*error)(char *x)
543 ) 543 )
544{ 544{
@@ -667,11 +667,11 @@ exit_0:
667} 667}
668 668
669#ifdef PREBOOT 669#ifdef PREBOOT
670STATIC int INIT decompress(unsigned char *buf, int in_len, 670STATIC int INIT decompress(unsigned char *buf, long in_len,
671 int(*fill)(void*, unsigned int), 671 long (*fill)(void*, unsigned long),
672 int(*flush)(void*, unsigned int), 672 long (*flush)(void*, unsigned long),
673 unsigned char *output, 673 unsigned char *output,
674 int *posp, 674 long *posp,
675 void(*error)(char *x) 675 void(*error)(char *x)
676 ) 676 )
677{ 677{
diff --git a/lib/decompress_unlzo.c b/lib/decompress_unlzo.c
index 960183d4258f..b94a31bdd87d 100644
--- a/lib/decompress_unlzo.c
+++ b/lib/decompress_unlzo.c
@@ -51,7 +51,7 @@ static const unsigned char lzop_magic[] = {
51#define HEADER_SIZE_MIN (9 + 7 + 4 + 8 + 1 + 4) 51#define HEADER_SIZE_MIN (9 + 7 + 4 + 8 + 1 + 4)
52#define HEADER_SIZE_MAX (9 + 7 + 1 + 8 + 8 + 4 + 1 + 255 + 4) 52#define HEADER_SIZE_MAX (9 + 7 + 1 + 8 + 8 + 4 + 1 + 255 + 4)
53 53
54STATIC inline int INIT parse_header(u8 *input, int *skip, int in_len) 54STATIC inline long INIT parse_header(u8 *input, long *skip, long in_len)
55{ 55{
56 int l; 56 int l;
57 u8 *parse = input; 57 u8 *parse = input;
@@ -108,14 +108,14 @@ STATIC inline int INIT parse_header(u8 *input, int *skip, int in_len)
108 return 1; 108 return 1;
109} 109}
110 110
111STATIC inline int INIT unlzo(u8 *input, int in_len, 111STATIC int INIT unlzo(u8 *input, long in_len,
112 int (*fill) (void *, unsigned int), 112 long (*fill)(void *, unsigned long),
113 int (*flush) (void *, unsigned int), 113 long (*flush)(void *, unsigned long),
114 u8 *output, int *posp, 114 u8 *output, long *posp,
115 void (*error) (char *x)) 115 void (*error) (char *x))
116{ 116{
117 u8 r = 0; 117 u8 r = 0;
118 int skip = 0; 118 long skip = 0;
119 u32 src_len, dst_len; 119 u32 src_len, dst_len;
120 size_t tmp; 120 size_t tmp;
121 u8 *in_buf, *in_buf_save, *out_buf; 121 u8 *in_buf, *in_buf_save, *out_buf;
diff --git a/lib/decompress_unxz.c b/lib/decompress_unxz.c
index 9f34eb56854d..b07a78340e9d 100644
--- a/lib/decompress_unxz.c
+++ b/lib/decompress_unxz.c
@@ -248,10 +248,10 @@ void *memmove(void *dest, const void *src, size_t size)
248 * both input and output buffers are available as a single chunk, i.e. when 248 * both input and output buffers are available as a single chunk, i.e. when
249 * fill() and flush() won't be used. 249 * fill() and flush() won't be used.
250 */ 250 */
251STATIC int INIT unxz(unsigned char *in, int in_size, 251STATIC int INIT unxz(unsigned char *in, long in_size,
252 int (*fill)(void *dest, unsigned int size), 252 long (*fill)(void *dest, unsigned long size),
253 int (*flush)(void *src, unsigned int size), 253 long (*flush)(void *src, unsigned long size),
254 unsigned char *out, int *in_used, 254 unsigned char *out, long *in_used,
255 void (*error)(char *x)) 255 void (*error)(char *x))
256{ 256{
257 struct xz_buf b; 257 struct xz_buf b;
@@ -329,7 +329,7 @@ STATIC int INIT unxz(unsigned char *in, int in_size,
329 * returned by xz_dec_run(), but probably 329 * returned by xz_dec_run(), but probably
330 * it's not too bad. 330 * it's not too bad.
331 */ 331 */
332 if (flush(b.out, b.out_pos) != (int)b.out_pos) 332 if (flush(b.out, b.out_pos) != (long)b.out_pos)
333 ret = XZ_BUF_ERROR; 333 ret = XZ_BUF_ERROR;
334 334
335 b.out_pos = 0; 335 b.out_pos = 0;
diff --git a/lib/idr.c b/lib/idr.c
index 39158abebad1..50be3fa9b657 100644
--- a/lib/idr.c
+++ b/lib/idr.c
@@ -590,26 +590,27 @@ static void __idr_remove_all(struct idr *idp)
590 struct idr_layer **paa = &pa[0]; 590 struct idr_layer **paa = &pa[0];
591 591
592 n = idp->layers * IDR_BITS; 592 n = idp->layers * IDR_BITS;
593 p = idp->top; 593 *paa = idp->top;
594 RCU_INIT_POINTER(idp->top, NULL); 594 RCU_INIT_POINTER(idp->top, NULL);
595 max = idr_max(idp->layers); 595 max = idr_max(idp->layers);
596 596
597 id = 0; 597 id = 0;
598 while (id >= 0 && id <= max) { 598 while (id >= 0 && id <= max) {
599 p = *paa;
599 while (n > IDR_BITS && p) { 600 while (n > IDR_BITS && p) {
600 n -= IDR_BITS; 601 n -= IDR_BITS;
601 *paa++ = p;
602 p = p->ary[(id >> n) & IDR_MASK]; 602 p = p->ary[(id >> n) & IDR_MASK];
603 *++paa = p;
603 } 604 }
604 605
605 bt_mask = id; 606 bt_mask = id;
606 id += 1 << n; 607 id += 1 << n;
607 /* Get the highest bit that the above add changed from 0->1. */ 608 /* Get the highest bit that the above add changed from 0->1. */
608 while (n < fls(id ^ bt_mask)) { 609 while (n < fls(id ^ bt_mask)) {
609 if (p) 610 if (*paa)
610 free_layer(idp, p); 611 free_layer(idp, *paa);
611 n += IDR_BITS; 612 n += IDR_BITS;
612 p = *--paa; 613 --paa;
613 } 614 }
614 } 615 }
615 idp->layers = 0; 616 idp->layers = 0;
@@ -692,15 +693,16 @@ int idr_for_each(struct idr *idp,
692 struct idr_layer **paa = &pa[0]; 693 struct idr_layer **paa = &pa[0];
693 694
694 n = idp->layers * IDR_BITS; 695 n = idp->layers * IDR_BITS;
695 p = rcu_dereference_raw(idp->top); 696 *paa = rcu_dereference_raw(idp->top);
696 max = idr_max(idp->layers); 697 max = idr_max(idp->layers);
697 698
698 id = 0; 699 id = 0;
699 while (id >= 0 && id <= max) { 700 while (id >= 0 && id <= max) {
701 p = *paa;
700 while (n > 0 && p) { 702 while (n > 0 && p) {
701 n -= IDR_BITS; 703 n -= IDR_BITS;
702 *paa++ = p;
703 p = rcu_dereference_raw(p->ary[(id >> n) & IDR_MASK]); 704 p = rcu_dereference_raw(p->ary[(id >> n) & IDR_MASK]);
705 *++paa = p;
704 } 706 }
705 707
706 if (p) { 708 if (p) {
@@ -712,7 +714,7 @@ int idr_for_each(struct idr *idp,
712 id += 1 << n; 714 id += 1 << n;
713 while (n < fls(id)) { 715 while (n < fls(id)) {
714 n += IDR_BITS; 716 n += IDR_BITS;
715 p = *--paa; 717 --paa;
716 } 718 }
717 } 719 }
718 720
@@ -740,17 +742,18 @@ void *idr_get_next(struct idr *idp, int *nextidp)
740 int n, max; 742 int n, max;
741 743
742 /* find first ent */ 744 /* find first ent */
743 p = rcu_dereference_raw(idp->top); 745 p = *paa = rcu_dereference_raw(idp->top);
744 if (!p) 746 if (!p)
745 return NULL; 747 return NULL;
746 n = (p->layer + 1) * IDR_BITS; 748 n = (p->layer + 1) * IDR_BITS;
747 max = idr_max(p->layer + 1); 749 max = idr_max(p->layer + 1);
748 750
749 while (id >= 0 && id <= max) { 751 while (id >= 0 && id <= max) {
752 p = *paa;
750 while (n > 0 && p) { 753 while (n > 0 && p) {
751 n -= IDR_BITS; 754 n -= IDR_BITS;
752 *paa++ = p;
753 p = rcu_dereference_raw(p->ary[(id >> n) & IDR_MASK]); 755 p = rcu_dereference_raw(p->ary[(id >> n) & IDR_MASK]);
756 *++paa = p;
754 } 757 }
755 758
756 if (p) { 759 if (p) {
@@ -768,7 +771,7 @@ void *idr_get_next(struct idr *idp, int *nextidp)
768 id = round_up(id + 1, 1 << n); 771 id = round_up(id + 1, 1 << n);
769 while (n < fls(id)) { 772 while (n < fls(id)) {
770 n += IDR_BITS; 773 n += IDR_BITS;
771 p = *--paa; 774 --paa;
772 } 775 }
773 } 776 }
774 return NULL; 777 return NULL;
diff --git a/lib/kfifo.c b/lib/kfifo.c
index d79b9d222065..90ba1eb1df06 100644
--- a/lib/kfifo.c
+++ b/lib/kfifo.c
@@ -561,8 +561,7 @@ EXPORT_SYMBOL(__kfifo_to_user_r);
561unsigned int __kfifo_dma_in_prepare_r(struct __kfifo *fifo, 561unsigned int __kfifo_dma_in_prepare_r(struct __kfifo *fifo,
562 struct scatterlist *sgl, int nents, unsigned int len, size_t recsize) 562 struct scatterlist *sgl, int nents, unsigned int len, size_t recsize)
563{ 563{
564 if (!nents) 564 BUG_ON(!nents);
565 BUG();
566 565
567 len = __kfifo_max_r(len, recsize); 566 len = __kfifo_max_r(len, recsize);
568 567
@@ -585,8 +584,7 @@ EXPORT_SYMBOL(__kfifo_dma_in_finish_r);
585unsigned int __kfifo_dma_out_prepare_r(struct __kfifo *fifo, 584unsigned int __kfifo_dma_out_prepare_r(struct __kfifo *fifo,
586 struct scatterlist *sgl, int nents, unsigned int len, size_t recsize) 585 struct scatterlist *sgl, int nents, unsigned int len, size_t recsize)
587{ 586{
588 if (!nents) 587 BUG_ON(!nents);
589 BUG();
590 588
591 len = __kfifo_max_r(len, recsize); 589 len = __kfifo_max_r(len, recsize);
592 590
diff --git a/lib/rbtree.c b/lib/rbtree.c
index 65f4effd117f..c16c81a3d430 100644
--- a/lib/rbtree.c
+++ b/lib/rbtree.c
@@ -101,7 +101,7 @@ __rb_insert(struct rb_node *node, struct rb_root *root,
101 * / \ / \ 101 * / \ / \
102 * p u --> P U 102 * p u --> P U
103 * / / 103 * / /
104 * n N 104 * n n
105 * 105 *
106 * However, since g's parent might be red, and 106 * However, since g's parent might be red, and
107 * 4) does not allow this, we need to recurse 107 * 4) does not allow this, we need to recurse
diff --git a/lib/scatterlist.c b/lib/scatterlist.c
index b4415fceb7e7..9cdf62f8accd 100644
--- a/lib/scatterlist.c
+++ b/lib/scatterlist.c
@@ -73,7 +73,7 @@ EXPORT_SYMBOL(sg_nents);
73 **/ 73 **/
74struct scatterlist *sg_last(struct scatterlist *sgl, unsigned int nents) 74struct scatterlist *sg_last(struct scatterlist *sgl, unsigned int nents)
75{ 75{
76#ifndef ARCH_HAS_SG_CHAIN 76#ifndef CONFIG_ARCH_HAS_SG_CHAIN
77 struct scatterlist *ret = &sgl[nents - 1]; 77 struct scatterlist *ret = &sgl[nents - 1];
78#else 78#else
79 struct scatterlist *sg, *ret = NULL; 79 struct scatterlist *sg, *ret = NULL;
@@ -255,7 +255,7 @@ int __sg_alloc_table(struct sg_table *table, unsigned int nents,
255 255
256 if (nents == 0) 256 if (nents == 0)
257 return -EINVAL; 257 return -EINVAL;
258#ifndef ARCH_HAS_SG_CHAIN 258#ifndef CONFIG_ARCH_HAS_SG_CHAIN
259 if (WARN_ON_ONCE(nents > max_ents)) 259 if (WARN_ON_ONCE(nents > max_ents))
260 return -EINVAL; 260 return -EINVAL;
261#endif 261#endif
diff --git a/mm/filemap.c b/mm/filemap.c
index af19a6b079f5..f501b56ec2c6 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -31,6 +31,7 @@
31#include <linux/security.h> 31#include <linux/security.h>
32#include <linux/cpuset.h> 32#include <linux/cpuset.h>
33#include <linux/hardirq.h> /* for BUG_ON(!in_atomic()) only */ 33#include <linux/hardirq.h> /* for BUG_ON(!in_atomic()) only */
34#include <linux/hugetlb.h>
34#include <linux/memcontrol.h> 35#include <linux/memcontrol.h>
35#include <linux/cleancache.h> 36#include <linux/cleancache.h>
36#include <linux/rmap.h> 37#include <linux/rmap.h>
@@ -233,7 +234,6 @@ void delete_from_page_cache(struct page *page)
233 spin_lock_irq(&mapping->tree_lock); 234 spin_lock_irq(&mapping->tree_lock);
234 __delete_from_page_cache(page, NULL); 235 __delete_from_page_cache(page, NULL);
235 spin_unlock_irq(&mapping->tree_lock); 236 spin_unlock_irq(&mapping->tree_lock);
236 mem_cgroup_uncharge_cache_page(page);
237 237
238 if (freepage) 238 if (freepage)
239 freepage(page); 239 freepage(page);
@@ -489,8 +489,7 @@ int replace_page_cache_page(struct page *old, struct page *new, gfp_t gfp_mask)
489 if (PageSwapBacked(new)) 489 if (PageSwapBacked(new))
490 __inc_zone_page_state(new, NR_SHMEM); 490 __inc_zone_page_state(new, NR_SHMEM);
491 spin_unlock_irq(&mapping->tree_lock); 491 spin_unlock_irq(&mapping->tree_lock);
492 /* mem_cgroup codes must not be called under tree_lock */ 492 mem_cgroup_migrate(old, new, true);
493 mem_cgroup_replace_page_cache(old, new);
494 radix_tree_preload_end(); 493 radix_tree_preload_end();
495 if (freepage) 494 if (freepage)
496 freepage(old); 495 freepage(old);
@@ -548,19 +547,24 @@ static int __add_to_page_cache_locked(struct page *page,
548 pgoff_t offset, gfp_t gfp_mask, 547 pgoff_t offset, gfp_t gfp_mask,
549 void **shadowp) 548 void **shadowp)
550{ 549{
550 int huge = PageHuge(page);
551 struct mem_cgroup *memcg;
551 int error; 552 int error;
552 553
553 VM_BUG_ON_PAGE(!PageLocked(page), page); 554 VM_BUG_ON_PAGE(!PageLocked(page), page);
554 VM_BUG_ON_PAGE(PageSwapBacked(page), page); 555 VM_BUG_ON_PAGE(PageSwapBacked(page), page);
555 556
556 error = mem_cgroup_charge_file(page, current->mm, 557 if (!huge) {
557 gfp_mask & GFP_RECLAIM_MASK); 558 error = mem_cgroup_try_charge(page, current->mm,
558 if (error) 559 gfp_mask, &memcg);
559 return error; 560 if (error)
561 return error;
562 }
560 563
561 error = radix_tree_maybe_preload(gfp_mask & ~__GFP_HIGHMEM); 564 error = radix_tree_maybe_preload(gfp_mask & ~__GFP_HIGHMEM);
562 if (error) { 565 if (error) {
563 mem_cgroup_uncharge_cache_page(page); 566 if (!huge)
567 mem_cgroup_cancel_charge(page, memcg);
564 return error; 568 return error;
565 } 569 }
566 570
@@ -575,13 +579,16 @@ static int __add_to_page_cache_locked(struct page *page,
575 goto err_insert; 579 goto err_insert;
576 __inc_zone_page_state(page, NR_FILE_PAGES); 580 __inc_zone_page_state(page, NR_FILE_PAGES);
577 spin_unlock_irq(&mapping->tree_lock); 581 spin_unlock_irq(&mapping->tree_lock);
582 if (!huge)
583 mem_cgroup_commit_charge(page, memcg, false);
578 trace_mm_filemap_add_to_page_cache(page); 584 trace_mm_filemap_add_to_page_cache(page);
579 return 0; 585 return 0;
580err_insert: 586err_insert:
581 page->mapping = NULL; 587 page->mapping = NULL;
582 /* Leave page->index set: truncation relies upon it */ 588 /* Leave page->index set: truncation relies upon it */
583 spin_unlock_irq(&mapping->tree_lock); 589 spin_unlock_irq(&mapping->tree_lock);
584 mem_cgroup_uncharge_cache_page(page); 590 if (!huge)
591 mem_cgroup_cancel_charge(page, memcg);
585 page_cache_release(page); 592 page_cache_release(page);
586 return error; 593 return error;
587} 594}
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 3630d577e987..d9a21d06b862 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -715,13 +715,20 @@ static int __do_huge_pmd_anonymous_page(struct mm_struct *mm,
715 unsigned long haddr, pmd_t *pmd, 715 unsigned long haddr, pmd_t *pmd,
716 struct page *page) 716 struct page *page)
717{ 717{
718 struct mem_cgroup *memcg;
718 pgtable_t pgtable; 719 pgtable_t pgtable;
719 spinlock_t *ptl; 720 spinlock_t *ptl;
720 721
721 VM_BUG_ON_PAGE(!PageCompound(page), page); 722 VM_BUG_ON_PAGE(!PageCompound(page), page);
723
724 if (mem_cgroup_try_charge(page, mm, GFP_TRANSHUGE, &memcg))
725 return VM_FAULT_OOM;
726
722 pgtable = pte_alloc_one(mm, haddr); 727 pgtable = pte_alloc_one(mm, haddr);
723 if (unlikely(!pgtable)) 728 if (unlikely(!pgtable)) {
729 mem_cgroup_cancel_charge(page, memcg);
724 return VM_FAULT_OOM; 730 return VM_FAULT_OOM;
731 }
725 732
726 clear_huge_page(page, haddr, HPAGE_PMD_NR); 733 clear_huge_page(page, haddr, HPAGE_PMD_NR);
727 /* 734 /*
@@ -734,7 +741,7 @@ static int __do_huge_pmd_anonymous_page(struct mm_struct *mm,
734 ptl = pmd_lock(mm, pmd); 741 ptl = pmd_lock(mm, pmd);
735 if (unlikely(!pmd_none(*pmd))) { 742 if (unlikely(!pmd_none(*pmd))) {
736 spin_unlock(ptl); 743 spin_unlock(ptl);
737 mem_cgroup_uncharge_page(page); 744 mem_cgroup_cancel_charge(page, memcg);
738 put_page(page); 745 put_page(page);
739 pte_free(mm, pgtable); 746 pte_free(mm, pgtable);
740 } else { 747 } else {
@@ -742,6 +749,8 @@ static int __do_huge_pmd_anonymous_page(struct mm_struct *mm,
742 entry = mk_huge_pmd(page, vma->vm_page_prot); 749 entry = mk_huge_pmd(page, vma->vm_page_prot);
743 entry = maybe_pmd_mkwrite(pmd_mkdirty(entry), vma); 750 entry = maybe_pmd_mkwrite(pmd_mkdirty(entry), vma);
744 page_add_new_anon_rmap(page, vma, haddr); 751 page_add_new_anon_rmap(page, vma, haddr);
752 mem_cgroup_commit_charge(page, memcg, false);
753 lru_cache_add_active_or_unevictable(page, vma);
745 pgtable_trans_huge_deposit(mm, pmd, pgtable); 754 pgtable_trans_huge_deposit(mm, pmd, pgtable);
746 set_pmd_at(mm, haddr, pmd, entry); 755 set_pmd_at(mm, haddr, pmd, entry);
747 add_mm_counter(mm, MM_ANONPAGES, HPAGE_PMD_NR); 756 add_mm_counter(mm, MM_ANONPAGES, HPAGE_PMD_NR);
@@ -827,13 +836,7 @@ int do_huge_pmd_anonymous_page(struct mm_struct *mm, struct vm_area_struct *vma,
827 count_vm_event(THP_FAULT_FALLBACK); 836 count_vm_event(THP_FAULT_FALLBACK);
828 return VM_FAULT_FALLBACK; 837 return VM_FAULT_FALLBACK;
829 } 838 }
830 if (unlikely(mem_cgroup_charge_anon(page, mm, GFP_TRANSHUGE))) {
831 put_page(page);
832 count_vm_event(THP_FAULT_FALLBACK);
833 return VM_FAULT_FALLBACK;
834 }
835 if (unlikely(__do_huge_pmd_anonymous_page(mm, vma, haddr, pmd, page))) { 839 if (unlikely(__do_huge_pmd_anonymous_page(mm, vma, haddr, pmd, page))) {
836 mem_cgroup_uncharge_page(page);
837 put_page(page); 840 put_page(page);
838 count_vm_event(THP_FAULT_FALLBACK); 841 count_vm_event(THP_FAULT_FALLBACK);
839 return VM_FAULT_FALLBACK; 842 return VM_FAULT_FALLBACK;
@@ -979,6 +982,7 @@ static int do_huge_pmd_wp_page_fallback(struct mm_struct *mm,
979 struct page *page, 982 struct page *page,
980 unsigned long haddr) 983 unsigned long haddr)
981{ 984{
985 struct mem_cgroup *memcg;
982 spinlock_t *ptl; 986 spinlock_t *ptl;
983 pgtable_t pgtable; 987 pgtable_t pgtable;
984 pmd_t _pmd; 988 pmd_t _pmd;
@@ -999,20 +1003,21 @@ static int do_huge_pmd_wp_page_fallback(struct mm_struct *mm,
999 __GFP_OTHER_NODE, 1003 __GFP_OTHER_NODE,
1000 vma, address, page_to_nid(page)); 1004 vma, address, page_to_nid(page));
1001 if (unlikely(!pages[i] || 1005 if (unlikely(!pages[i] ||
1002 mem_cgroup_charge_anon(pages[i], mm, 1006 mem_cgroup_try_charge(pages[i], mm, GFP_KERNEL,
1003 GFP_KERNEL))) { 1007 &memcg))) {
1004 if (pages[i]) 1008 if (pages[i])
1005 put_page(pages[i]); 1009 put_page(pages[i]);
1006 mem_cgroup_uncharge_start();
1007 while (--i >= 0) { 1010 while (--i >= 0) {
1008 mem_cgroup_uncharge_page(pages[i]); 1011 memcg = (void *)page_private(pages[i]);
1012 set_page_private(pages[i], 0);
1013 mem_cgroup_cancel_charge(pages[i], memcg);
1009 put_page(pages[i]); 1014 put_page(pages[i]);
1010 } 1015 }
1011 mem_cgroup_uncharge_end();
1012 kfree(pages); 1016 kfree(pages);
1013 ret |= VM_FAULT_OOM; 1017 ret |= VM_FAULT_OOM;
1014 goto out; 1018 goto out;
1015 } 1019 }
1020 set_page_private(pages[i], (unsigned long)memcg);
1016 } 1021 }
1017 1022
1018 for (i = 0; i < HPAGE_PMD_NR; i++) { 1023 for (i = 0; i < HPAGE_PMD_NR; i++) {
@@ -1041,7 +1046,11 @@ static int do_huge_pmd_wp_page_fallback(struct mm_struct *mm,
1041 pte_t *pte, entry; 1046 pte_t *pte, entry;
1042 entry = mk_pte(pages[i], vma->vm_page_prot); 1047 entry = mk_pte(pages[i], vma->vm_page_prot);
1043 entry = maybe_mkwrite(pte_mkdirty(entry), vma); 1048 entry = maybe_mkwrite(pte_mkdirty(entry), vma);
1049 memcg = (void *)page_private(pages[i]);
1050 set_page_private(pages[i], 0);
1044 page_add_new_anon_rmap(pages[i], vma, haddr); 1051 page_add_new_anon_rmap(pages[i], vma, haddr);
1052 mem_cgroup_commit_charge(pages[i], memcg, false);
1053 lru_cache_add_active_or_unevictable(pages[i], vma);
1045 pte = pte_offset_map(&_pmd, haddr); 1054 pte = pte_offset_map(&_pmd, haddr);
1046 VM_BUG_ON(!pte_none(*pte)); 1055 VM_BUG_ON(!pte_none(*pte));
1047 set_pte_at(mm, haddr, pte, entry); 1056 set_pte_at(mm, haddr, pte, entry);
@@ -1065,12 +1074,12 @@ out:
1065out_free_pages: 1074out_free_pages:
1066 spin_unlock(ptl); 1075 spin_unlock(ptl);
1067 mmu_notifier_invalidate_range_end(mm, mmun_start, mmun_end); 1076 mmu_notifier_invalidate_range_end(mm, mmun_start, mmun_end);
1068 mem_cgroup_uncharge_start();
1069 for (i = 0; i < HPAGE_PMD_NR; i++) { 1077 for (i = 0; i < HPAGE_PMD_NR; i++) {
1070 mem_cgroup_uncharge_page(pages[i]); 1078 memcg = (void *)page_private(pages[i]);
1079 set_page_private(pages[i], 0);
1080 mem_cgroup_cancel_charge(pages[i], memcg);
1071 put_page(pages[i]); 1081 put_page(pages[i]);
1072 } 1082 }
1073 mem_cgroup_uncharge_end();
1074 kfree(pages); 1083 kfree(pages);
1075 goto out; 1084 goto out;
1076} 1085}
@@ -1081,6 +1090,7 @@ int do_huge_pmd_wp_page(struct mm_struct *mm, struct vm_area_struct *vma,
1081 spinlock_t *ptl; 1090 spinlock_t *ptl;
1082 int ret = 0; 1091 int ret = 0;
1083 struct page *page = NULL, *new_page; 1092 struct page *page = NULL, *new_page;
1093 struct mem_cgroup *memcg;
1084 unsigned long haddr; 1094 unsigned long haddr;
1085 unsigned long mmun_start; /* For mmu_notifiers */ 1095 unsigned long mmun_start; /* For mmu_notifiers */
1086 unsigned long mmun_end; /* For mmu_notifiers */ 1096 unsigned long mmun_end; /* For mmu_notifiers */
@@ -1132,7 +1142,8 @@ alloc:
1132 goto out; 1142 goto out;
1133 } 1143 }
1134 1144
1135 if (unlikely(mem_cgroup_charge_anon(new_page, mm, GFP_TRANSHUGE))) { 1145 if (unlikely(mem_cgroup_try_charge(new_page, mm,
1146 GFP_TRANSHUGE, &memcg))) {
1136 put_page(new_page); 1147 put_page(new_page);
1137 if (page) { 1148 if (page) {
1138 split_huge_page(page); 1149 split_huge_page(page);
@@ -1161,7 +1172,7 @@ alloc:
1161 put_user_huge_page(page); 1172 put_user_huge_page(page);
1162 if (unlikely(!pmd_same(*pmd, orig_pmd))) { 1173 if (unlikely(!pmd_same(*pmd, orig_pmd))) {
1163 spin_unlock(ptl); 1174 spin_unlock(ptl);
1164 mem_cgroup_uncharge_page(new_page); 1175 mem_cgroup_cancel_charge(new_page, memcg);
1165 put_page(new_page); 1176 put_page(new_page);
1166 goto out_mn; 1177 goto out_mn;
1167 } else { 1178 } else {
@@ -1170,6 +1181,8 @@ alloc:
1170 entry = maybe_pmd_mkwrite(pmd_mkdirty(entry), vma); 1181 entry = maybe_pmd_mkwrite(pmd_mkdirty(entry), vma);
1171 pmdp_clear_flush(vma, haddr, pmd); 1182 pmdp_clear_flush(vma, haddr, pmd);
1172 page_add_new_anon_rmap(new_page, vma, haddr); 1183 page_add_new_anon_rmap(new_page, vma, haddr);
1184 mem_cgroup_commit_charge(new_page, memcg, false);
1185 lru_cache_add_active_or_unevictable(new_page, vma);
1173 set_pmd_at(mm, haddr, pmd, entry); 1186 set_pmd_at(mm, haddr, pmd, entry);
1174 update_mmu_cache_pmd(vma, address, pmd); 1187 update_mmu_cache_pmd(vma, address, pmd);
1175 if (!page) { 1188 if (!page) {
@@ -2413,6 +2426,7 @@ static void collapse_huge_page(struct mm_struct *mm,
2413 spinlock_t *pmd_ptl, *pte_ptl; 2426 spinlock_t *pmd_ptl, *pte_ptl;
2414 int isolated; 2427 int isolated;
2415 unsigned long hstart, hend; 2428 unsigned long hstart, hend;
2429 struct mem_cgroup *memcg;
2416 unsigned long mmun_start; /* For mmu_notifiers */ 2430 unsigned long mmun_start; /* For mmu_notifiers */
2417 unsigned long mmun_end; /* For mmu_notifiers */ 2431 unsigned long mmun_end; /* For mmu_notifiers */
2418 2432
@@ -2423,7 +2437,8 @@ static void collapse_huge_page(struct mm_struct *mm,
2423 if (!new_page) 2437 if (!new_page)
2424 return; 2438 return;
2425 2439
2426 if (unlikely(mem_cgroup_charge_anon(new_page, mm, GFP_TRANSHUGE))) 2440 if (unlikely(mem_cgroup_try_charge(new_page, mm,
2441 GFP_TRANSHUGE, &memcg)))
2427 return; 2442 return;
2428 2443
2429 /* 2444 /*
@@ -2510,6 +2525,8 @@ static void collapse_huge_page(struct mm_struct *mm,
2510 spin_lock(pmd_ptl); 2525 spin_lock(pmd_ptl);
2511 BUG_ON(!pmd_none(*pmd)); 2526 BUG_ON(!pmd_none(*pmd));
2512 page_add_new_anon_rmap(new_page, vma, address); 2527 page_add_new_anon_rmap(new_page, vma, address);
2528 mem_cgroup_commit_charge(new_page, memcg, false);
2529 lru_cache_add_active_or_unevictable(new_page, vma);
2513 pgtable_trans_huge_deposit(mm, pmd, pgtable); 2530 pgtable_trans_huge_deposit(mm, pmd, pgtable);
2514 set_pmd_at(mm, address, pmd, _pmd); 2531 set_pmd_at(mm, address, pmd, _pmd);
2515 update_mmu_cache_pmd(vma, address, pmd); 2532 update_mmu_cache_pmd(vma, address, pmd);
@@ -2523,7 +2540,7 @@ out_up_write:
2523 return; 2540 return;
2524 2541
2525out: 2542out:
2526 mem_cgroup_uncharge_page(new_page); 2543 mem_cgroup_cancel_charge(new_page, memcg);
2527 goto out_up_write; 2544 goto out_up_write;
2528} 2545}
2529 2546
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 90dc501eaf3f..ec4dcf1b9562 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -754,9 +754,11 @@ static void __mem_cgroup_remove_exceeded(struct mem_cgroup_per_zone *mz,
754static void mem_cgroup_remove_exceeded(struct mem_cgroup_per_zone *mz, 754static void mem_cgroup_remove_exceeded(struct mem_cgroup_per_zone *mz,
755 struct mem_cgroup_tree_per_zone *mctz) 755 struct mem_cgroup_tree_per_zone *mctz)
756{ 756{
757 spin_lock(&mctz->lock); 757 unsigned long flags;
758
759 spin_lock_irqsave(&mctz->lock, flags);
758 __mem_cgroup_remove_exceeded(mz, mctz); 760 __mem_cgroup_remove_exceeded(mz, mctz);
759 spin_unlock(&mctz->lock); 761 spin_unlock_irqrestore(&mctz->lock, flags);
760} 762}
761 763
762 764
@@ -779,7 +781,9 @@ static void mem_cgroup_update_tree(struct mem_cgroup *memcg, struct page *page)
779 * mem is over its softlimit. 781 * mem is over its softlimit.
780 */ 782 */
781 if (excess || mz->on_tree) { 783 if (excess || mz->on_tree) {
782 spin_lock(&mctz->lock); 784 unsigned long flags;
785
786 spin_lock_irqsave(&mctz->lock, flags);
783 /* if on-tree, remove it */ 787 /* if on-tree, remove it */
784 if (mz->on_tree) 788 if (mz->on_tree)
785 __mem_cgroup_remove_exceeded(mz, mctz); 789 __mem_cgroup_remove_exceeded(mz, mctz);
@@ -788,7 +792,7 @@ static void mem_cgroup_update_tree(struct mem_cgroup *memcg, struct page *page)
788 * If excess is 0, no tree ops. 792 * If excess is 0, no tree ops.
789 */ 793 */
790 __mem_cgroup_insert_exceeded(mz, mctz, excess); 794 __mem_cgroup_insert_exceeded(mz, mctz, excess);
791 spin_unlock(&mctz->lock); 795 spin_unlock_irqrestore(&mctz->lock, flags);
792 } 796 }
793 } 797 }
794} 798}
@@ -839,9 +843,9 @@ mem_cgroup_largest_soft_limit_node(struct mem_cgroup_tree_per_zone *mctz)
839{ 843{
840 struct mem_cgroup_per_zone *mz; 844 struct mem_cgroup_per_zone *mz;
841 845
842 spin_lock(&mctz->lock); 846 spin_lock_irq(&mctz->lock);
843 mz = __mem_cgroup_largest_soft_limit_node(mctz); 847 mz = __mem_cgroup_largest_soft_limit_node(mctz);
844 spin_unlock(&mctz->lock); 848 spin_unlock_irq(&mctz->lock);
845 return mz; 849 return mz;
846} 850}
847 851
@@ -882,13 +886,6 @@ static long mem_cgroup_read_stat(struct mem_cgroup *memcg,
882 return val; 886 return val;
883} 887}
884 888
885static void mem_cgroup_swap_statistics(struct mem_cgroup *memcg,
886 bool charge)
887{
888 int val = (charge) ? 1 : -1;
889 this_cpu_add(memcg->stat->count[MEM_CGROUP_STAT_SWAP], val);
890}
891
892static unsigned long mem_cgroup_read_events(struct mem_cgroup *memcg, 889static unsigned long mem_cgroup_read_events(struct mem_cgroup *memcg,
893 enum mem_cgroup_events_index idx) 890 enum mem_cgroup_events_index idx)
894{ 891{
@@ -909,13 +906,13 @@ static unsigned long mem_cgroup_read_events(struct mem_cgroup *memcg,
909 906
910static void mem_cgroup_charge_statistics(struct mem_cgroup *memcg, 907static void mem_cgroup_charge_statistics(struct mem_cgroup *memcg,
911 struct page *page, 908 struct page *page,
912 bool anon, int nr_pages) 909 int nr_pages)
913{ 910{
914 /* 911 /*
915 * Here, RSS means 'mapped anon' and anon's SwapCache. Shmem/tmpfs is 912 * Here, RSS means 'mapped anon' and anon's SwapCache. Shmem/tmpfs is
916 * counted as CACHE even if it's on ANON LRU. 913 * counted as CACHE even if it's on ANON LRU.
917 */ 914 */
918 if (anon) 915 if (PageAnon(page))
919 __this_cpu_add(memcg->stat->count[MEM_CGROUP_STAT_RSS], 916 __this_cpu_add(memcg->stat->count[MEM_CGROUP_STAT_RSS],
920 nr_pages); 917 nr_pages);
921 else 918 else
@@ -1013,7 +1010,6 @@ static bool mem_cgroup_event_ratelimit(struct mem_cgroup *memcg,
1013 */ 1010 */
1014static void memcg_check_events(struct mem_cgroup *memcg, struct page *page) 1011static void memcg_check_events(struct mem_cgroup *memcg, struct page *page)
1015{ 1012{
1016 preempt_disable();
1017 /* threshold event is triggered in finer grain than soft limit */ 1013 /* threshold event is triggered in finer grain than soft limit */
1018 if (unlikely(mem_cgroup_event_ratelimit(memcg, 1014 if (unlikely(mem_cgroup_event_ratelimit(memcg,
1019 MEM_CGROUP_TARGET_THRESH))) { 1015 MEM_CGROUP_TARGET_THRESH))) {
@@ -1026,8 +1022,6 @@ static void memcg_check_events(struct mem_cgroup *memcg, struct page *page)
1026 do_numainfo = mem_cgroup_event_ratelimit(memcg, 1022 do_numainfo = mem_cgroup_event_ratelimit(memcg,
1027 MEM_CGROUP_TARGET_NUMAINFO); 1023 MEM_CGROUP_TARGET_NUMAINFO);
1028#endif 1024#endif
1029 preempt_enable();
1030
1031 mem_cgroup_threshold(memcg); 1025 mem_cgroup_threshold(memcg);
1032 if (unlikely(do_softlimit)) 1026 if (unlikely(do_softlimit))
1033 mem_cgroup_update_tree(memcg, page); 1027 mem_cgroup_update_tree(memcg, page);
@@ -1035,8 +1029,7 @@ static void memcg_check_events(struct mem_cgroup *memcg, struct page *page)
1035 if (unlikely(do_numainfo)) 1029 if (unlikely(do_numainfo))
1036 atomic_inc(&memcg->numainfo_events); 1030 atomic_inc(&memcg->numainfo_events);
1037#endif 1031#endif
1038 } else 1032 }
1039 preempt_enable();
1040} 1033}
1041 1034
1042struct mem_cgroup *mem_cgroup_from_task(struct task_struct *p) 1035struct mem_cgroup *mem_cgroup_from_task(struct task_struct *p)
@@ -1347,20 +1340,6 @@ out:
1347 return lruvec; 1340 return lruvec;
1348} 1341}
1349 1342
1350/*
1351 * Following LRU functions are allowed to be used without PCG_LOCK.
1352 * Operations are called by routine of global LRU independently from memcg.
1353 * What we have to take care of here is validness of pc->mem_cgroup.
1354 *
1355 * Changes to pc->mem_cgroup happens when
1356 * 1. charge
1357 * 2. moving account
1358 * In typical case, "charge" is done before add-to-lru. Exception is SwapCache.
1359 * It is added to LRU before charge.
1360 * If PCG_USED bit is not set, page_cgroup is not added to this private LRU.
1361 * When moving account, the page is not on LRU. It's isolated.
1362 */
1363
1364/** 1343/**
1365 * mem_cgroup_page_lruvec - return lruvec for adding an lru page 1344 * mem_cgroup_page_lruvec - return lruvec for adding an lru page
1366 * @page: the page 1345 * @page: the page
@@ -2261,22 +2240,14 @@ cleanup:
2261 * 2240 *
2262 * Notes: Race condition 2241 * Notes: Race condition
2263 * 2242 *
2264 * We usually use lock_page_cgroup() for accessing page_cgroup member but 2243 * Charging occurs during page instantiation, while the page is
2265 * it tends to be costly. But considering some conditions, we doesn't need 2244 * unmapped and locked in page migration, or while the page table is
2266 * to do so _always_. 2245 * locked in THP migration. No race is possible.
2267 *
2268 * Considering "charge", lock_page_cgroup() is not required because all
2269 * file-stat operations happen after a page is attached to radix-tree. There
2270 * are no race with "charge".
2271 * 2246 *
2272 * Considering "uncharge", we know that memcg doesn't clear pc->mem_cgroup 2247 * Uncharge happens to pages with zero references, no race possible.
2273 * at "uncharge" intentionally. So, we always see valid pc->mem_cgroup even
2274 * if there are race with "uncharge". Statistics itself is properly handled
2275 * by flags.
2276 * 2248 *
2277 * Considering "move", this is an only case we see a race. To make the race 2249 * Charge moving between groups is protected by checking mm->moving
2278 * small, we check memcg->moving_account and detect there are possibility 2250 * account and taking the move_lock in the slowpath.
2279 * of race or not. If there is, we take a lock.
2280 */ 2251 */
2281 2252
2282void __mem_cgroup_begin_update_page_stat(struct page *page, 2253void __mem_cgroup_begin_update_page_stat(struct page *page,
@@ -2551,17 +2522,8 @@ static int memcg_cpu_hotplug_callback(struct notifier_block *nb,
2551 return NOTIFY_OK; 2522 return NOTIFY_OK;
2552} 2523}
2553 2524
2554/** 2525static int try_charge(struct mem_cgroup *memcg, gfp_t gfp_mask,
2555 * mem_cgroup_try_charge - try charging a memcg 2526 unsigned int nr_pages)
2556 * @memcg: memcg to charge
2557 * @nr_pages: number of pages to charge
2558 *
2559 * Returns 0 if @memcg was charged successfully, -EINTR if the charge
2560 * was bypassed to root_mem_cgroup, and -ENOMEM if the charge failed.
2561 */
2562static int mem_cgroup_try_charge(struct mem_cgroup *memcg,
2563 gfp_t gfp_mask,
2564 unsigned int nr_pages)
2565{ 2527{
2566 unsigned int batch = max(CHARGE_BATCH, nr_pages); 2528 unsigned int batch = max(CHARGE_BATCH, nr_pages);
2567 int nr_retries = MEM_CGROUP_RECLAIM_RETRIES; 2529 int nr_retries = MEM_CGROUP_RECLAIM_RETRIES;
@@ -2660,41 +2622,7 @@ done:
2660 return ret; 2622 return ret;
2661} 2623}
2662 2624
2663/** 2625static void cancel_charge(struct mem_cgroup *memcg, unsigned int nr_pages)
2664 * mem_cgroup_try_charge_mm - try charging a mm
2665 * @mm: mm_struct to charge
2666 * @nr_pages: number of pages to charge
2667 * @oom: trigger OOM if reclaim fails
2668 *
2669 * Returns the charged mem_cgroup associated with the given mm_struct or
2670 * NULL the charge failed.
2671 */
2672static struct mem_cgroup *mem_cgroup_try_charge_mm(struct mm_struct *mm,
2673 gfp_t gfp_mask,
2674 unsigned int nr_pages)
2675
2676{
2677 struct mem_cgroup *memcg;
2678 int ret;
2679
2680 memcg = get_mem_cgroup_from_mm(mm);
2681 ret = mem_cgroup_try_charge(memcg, gfp_mask, nr_pages);
2682 css_put(&memcg->css);
2683 if (ret == -EINTR)
2684 memcg = root_mem_cgroup;
2685 else if (ret)
2686 memcg = NULL;
2687
2688 return memcg;
2689}
2690
2691/*
2692 * Somemtimes we have to undo a charge we got by try_charge().
2693 * This function is for that and do uncharge, put css's refcnt.
2694 * gotten by try_charge().
2695 */
2696static void __mem_cgroup_cancel_charge(struct mem_cgroup *memcg,
2697 unsigned int nr_pages)
2698{ 2626{
2699 unsigned long bytes = nr_pages * PAGE_SIZE; 2627 unsigned long bytes = nr_pages * PAGE_SIZE;
2700 2628
@@ -2732,6 +2660,16 @@ static struct mem_cgroup *mem_cgroup_lookup(unsigned short id)
2732 return mem_cgroup_from_id(id); 2660 return mem_cgroup_from_id(id);
2733} 2661}
2734 2662
2663/*
2664 * try_get_mem_cgroup_from_page - look up page's memcg association
2665 * @page: the page
2666 *
2667 * Look up, get a css reference, and return the memcg that owns @page.
2668 *
2669 * The page must be locked to prevent racing with swap-in and page
2670 * cache charges. If coming from an unlocked page table, the caller
2671 * must ensure the page is on the LRU or this can race with charging.
2672 */
2735struct mem_cgroup *try_get_mem_cgroup_from_page(struct page *page) 2673struct mem_cgroup *try_get_mem_cgroup_from_page(struct page *page)
2736{ 2674{
2737 struct mem_cgroup *memcg = NULL; 2675 struct mem_cgroup *memcg = NULL;
@@ -2742,7 +2680,6 @@ struct mem_cgroup *try_get_mem_cgroup_from_page(struct page *page)
2742 VM_BUG_ON_PAGE(!PageLocked(page), page); 2680 VM_BUG_ON_PAGE(!PageLocked(page), page);
2743 2681
2744 pc = lookup_page_cgroup(page); 2682 pc = lookup_page_cgroup(page);
2745 lock_page_cgroup(pc);
2746 if (PageCgroupUsed(pc)) { 2683 if (PageCgroupUsed(pc)) {
2747 memcg = pc->mem_cgroup; 2684 memcg = pc->mem_cgroup;
2748 if (memcg && !css_tryget_online(&memcg->css)) 2685 if (memcg && !css_tryget_online(&memcg->css))
@@ -2756,23 +2693,46 @@ struct mem_cgroup *try_get_mem_cgroup_from_page(struct page *page)
2756 memcg = NULL; 2693 memcg = NULL;
2757 rcu_read_unlock(); 2694 rcu_read_unlock();
2758 } 2695 }
2759 unlock_page_cgroup(pc);
2760 return memcg; 2696 return memcg;
2761} 2697}
2762 2698
2763static void __mem_cgroup_commit_charge(struct mem_cgroup *memcg, 2699static void lock_page_lru(struct page *page, int *isolated)
2764 struct page *page, 2700{
2765 unsigned int nr_pages, 2701 struct zone *zone = page_zone(page);
2766 enum charge_type ctype, 2702
2767 bool lrucare) 2703 spin_lock_irq(&zone->lru_lock);
2704 if (PageLRU(page)) {
2705 struct lruvec *lruvec;
2706
2707 lruvec = mem_cgroup_page_lruvec(page, zone);
2708 ClearPageLRU(page);
2709 del_page_from_lru_list(page, lruvec, page_lru(page));
2710 *isolated = 1;
2711 } else
2712 *isolated = 0;
2713}
2714
2715static void unlock_page_lru(struct page *page, int isolated)
2716{
2717 struct zone *zone = page_zone(page);
2718
2719 if (isolated) {
2720 struct lruvec *lruvec;
2721
2722 lruvec = mem_cgroup_page_lruvec(page, zone);
2723 VM_BUG_ON_PAGE(PageLRU(page), page);
2724 SetPageLRU(page);
2725 add_page_to_lru_list(page, lruvec, page_lru(page));
2726 }
2727 spin_unlock_irq(&zone->lru_lock);
2728}
2729
2730static void commit_charge(struct page *page, struct mem_cgroup *memcg,
2731 bool lrucare)
2768{ 2732{
2769 struct page_cgroup *pc = lookup_page_cgroup(page); 2733 struct page_cgroup *pc = lookup_page_cgroup(page);
2770 struct zone *uninitialized_var(zone); 2734 int isolated;
2771 struct lruvec *lruvec;
2772 bool was_on_lru = false;
2773 bool anon;
2774 2735
2775 lock_page_cgroup(pc);
2776 VM_BUG_ON_PAGE(PageCgroupUsed(pc), page); 2736 VM_BUG_ON_PAGE(PageCgroupUsed(pc), page);
2777 /* 2737 /*
2778 * we don't need page_cgroup_lock about tail pages, becase they are not 2738 * we don't need page_cgroup_lock about tail pages, becase they are not
@@ -2783,44 +2743,28 @@ static void __mem_cgroup_commit_charge(struct mem_cgroup *memcg,
2783 * In some cases, SwapCache and FUSE(splice_buf->radixtree), the page 2743 * In some cases, SwapCache and FUSE(splice_buf->radixtree), the page
2784 * may already be on some other mem_cgroup's LRU. Take care of it. 2744 * may already be on some other mem_cgroup's LRU. Take care of it.
2785 */ 2745 */
2786 if (lrucare) { 2746 if (lrucare)
2787 zone = page_zone(page); 2747 lock_page_lru(page, &isolated);
2788 spin_lock_irq(&zone->lru_lock);
2789 if (PageLRU(page)) {
2790 lruvec = mem_cgroup_zone_lruvec(zone, pc->mem_cgroup);
2791 ClearPageLRU(page);
2792 del_page_from_lru_list(page, lruvec, page_lru(page));
2793 was_on_lru = true;
2794 }
2795 }
2796
2797 pc->mem_cgroup = memcg;
2798 SetPageCgroupUsed(pc);
2799
2800 if (lrucare) {
2801 if (was_on_lru) {
2802 lruvec = mem_cgroup_zone_lruvec(zone, pc->mem_cgroup);
2803 VM_BUG_ON_PAGE(PageLRU(page), page);
2804 SetPageLRU(page);
2805 add_page_to_lru_list(page, lruvec, page_lru(page));
2806 }
2807 spin_unlock_irq(&zone->lru_lock);
2808 }
2809
2810 if (ctype == MEM_CGROUP_CHARGE_TYPE_ANON)
2811 anon = true;
2812 else
2813 anon = false;
2814
2815 mem_cgroup_charge_statistics(memcg, page, anon, nr_pages);
2816 unlock_page_cgroup(pc);
2817 2748
2818 /* 2749 /*
2819 * "charge_statistics" updated event counter. Then, check it. 2750 * Nobody should be changing or seriously looking at
2820 * Insert ancestor (and ancestor's ancestors), to softlimit RB-tree. 2751 * pc->mem_cgroup and pc->flags at this point:
2821 * if they exceeds softlimit. 2752 *
2753 * - the page is uncharged
2754 *
2755 * - the page is off-LRU
2756 *
2757 * - an anonymous fault has exclusive page access, except for
2758 * a locked page table
2759 *
2760 * - a page cache insertion, a swapin fault, or a migration
2761 * have the page locked
2822 */ 2762 */
2823 memcg_check_events(memcg, page); 2763 pc->mem_cgroup = memcg;
2764 pc->flags = PCG_USED | PCG_MEM | (do_swap_account ? PCG_MEMSW : 0);
2765
2766 if (lrucare)
2767 unlock_page_lru(page, isolated);
2824} 2768}
2825 2769
2826static DEFINE_MUTEX(set_limit_mutex); 2770static DEFINE_MUTEX(set_limit_mutex);
@@ -2882,21 +2826,21 @@ static int memcg_charge_kmem(struct mem_cgroup *memcg, gfp_t gfp, u64 size)
2882 if (ret) 2826 if (ret)
2883 return ret; 2827 return ret;
2884 2828
2885 ret = mem_cgroup_try_charge(memcg, gfp, size >> PAGE_SHIFT); 2829 ret = try_charge(memcg, gfp, size >> PAGE_SHIFT);
2886 if (ret == -EINTR) { 2830 if (ret == -EINTR) {
2887 /* 2831 /*
2888 * mem_cgroup_try_charge() chosed to bypass to root due to 2832 * try_charge() chose to bypass to root due to OOM kill or
2889 * OOM kill or fatal signal. Since our only options are to 2833 * fatal signal. Since our only options are to either fail
2890 * either fail the allocation or charge it to this cgroup, do 2834 * the allocation or charge it to this cgroup, do it as a
2891 * it as a temporary condition. But we can't fail. From a 2835 * temporary condition. But we can't fail. From a kmem/slab
2892 * kmem/slab perspective, the cache has already been selected, 2836 * perspective, the cache has already been selected, by
2893 * by mem_cgroup_kmem_get_cache(), so it is too late to change 2837 * mem_cgroup_kmem_get_cache(), so it is too late to change
2894 * our minds. 2838 * our minds.
2895 * 2839 *
2896 * This condition will only trigger if the task entered 2840 * This condition will only trigger if the task entered
2897 * memcg_charge_kmem in a sane state, but was OOM-killed during 2841 * memcg_charge_kmem in a sane state, but was OOM-killed
2898 * mem_cgroup_try_charge() above. Tasks that were already 2842 * during try_charge() above. Tasks that were already dying
2899 * dying when the allocation triggers should have been already 2843 * when the allocation triggers should have been already
2900 * directed to the root cgroup in memcontrol.h 2844 * directed to the root cgroup in memcontrol.h
2901 */ 2845 */
2902 res_counter_charge_nofail(&memcg->res, size, &fail_res); 2846 res_counter_charge_nofail(&memcg->res, size, &fail_res);
@@ -3447,7 +3391,6 @@ static inline void memcg_unregister_all_caches(struct mem_cgroup *memcg)
3447 3391
3448#ifdef CONFIG_TRANSPARENT_HUGEPAGE 3392#ifdef CONFIG_TRANSPARENT_HUGEPAGE
3449 3393
3450#define PCGF_NOCOPY_AT_SPLIT (1 << PCG_LOCK | 1 << PCG_MIGRATION)
3451/* 3394/*
3452 * Because tail pages are not marked as "used", set it. We're under 3395 * Because tail pages are not marked as "used", set it. We're under
3453 * zone->lru_lock, 'splitting on pmd' and compound_lock. 3396 * zone->lru_lock, 'splitting on pmd' and compound_lock.
@@ -3468,7 +3411,7 @@ void mem_cgroup_split_huge_fixup(struct page *head)
3468 for (i = 1; i < HPAGE_PMD_NR; i++) { 3411 for (i = 1; i < HPAGE_PMD_NR; i++) {
3469 pc = head_pc + i; 3412 pc = head_pc + i;
3470 pc->mem_cgroup = memcg; 3413 pc->mem_cgroup = memcg;
3471 pc->flags = head_pc->flags & ~PCGF_NOCOPY_AT_SPLIT; 3414 pc->flags = head_pc->flags;
3472 } 3415 }
3473 __this_cpu_sub(memcg->stat->count[MEM_CGROUP_STAT_RSS_HUGE], 3416 __this_cpu_sub(memcg->stat->count[MEM_CGROUP_STAT_RSS_HUGE],
3474 HPAGE_PMD_NR); 3417 HPAGE_PMD_NR);
@@ -3498,7 +3441,6 @@ static int mem_cgroup_move_account(struct page *page,
3498{ 3441{
3499 unsigned long flags; 3442 unsigned long flags;
3500 int ret; 3443 int ret;
3501 bool anon = PageAnon(page);
3502 3444
3503 VM_BUG_ON(from == to); 3445 VM_BUG_ON(from == to);
3504 VM_BUG_ON_PAGE(PageLRU(page), page); 3446 VM_BUG_ON_PAGE(PageLRU(page), page);
@@ -3512,15 +3454,21 @@ static int mem_cgroup_move_account(struct page *page,
3512 if (nr_pages > 1 && !PageTransHuge(page)) 3454 if (nr_pages > 1 && !PageTransHuge(page))
3513 goto out; 3455 goto out;
3514 3456
3515 lock_page_cgroup(pc); 3457 /*
3458 * Prevent mem_cgroup_migrate() from looking at pc->mem_cgroup
3459 * of its source page while we change it: page migration takes
3460 * both pages off the LRU, but page cache replacement doesn't.
3461 */
3462 if (!trylock_page(page))
3463 goto out;
3516 3464
3517 ret = -EINVAL; 3465 ret = -EINVAL;
3518 if (!PageCgroupUsed(pc) || pc->mem_cgroup != from) 3466 if (!PageCgroupUsed(pc) || pc->mem_cgroup != from)
3519 goto unlock; 3467 goto out_unlock;
3520 3468
3521 move_lock_mem_cgroup(from, &flags); 3469 move_lock_mem_cgroup(from, &flags);
3522 3470
3523 if (!anon && page_mapped(page)) { 3471 if (!PageAnon(page) && page_mapped(page)) {
3524 __this_cpu_sub(from->stat->count[MEM_CGROUP_STAT_FILE_MAPPED], 3472 __this_cpu_sub(from->stat->count[MEM_CGROUP_STAT_FILE_MAPPED],
3525 nr_pages); 3473 nr_pages);
3526 __this_cpu_add(to->stat->count[MEM_CGROUP_STAT_FILE_MAPPED], 3474 __this_cpu_add(to->stat->count[MEM_CGROUP_STAT_FILE_MAPPED],
@@ -3534,20 +3482,25 @@ static int mem_cgroup_move_account(struct page *page,
3534 nr_pages); 3482 nr_pages);
3535 } 3483 }
3536 3484
3537 mem_cgroup_charge_statistics(from, page, anon, -nr_pages); 3485 /*
3486 * It is safe to change pc->mem_cgroup here because the page
3487 * is referenced, charged, and isolated - we can't race with
3488 * uncharging, charging, migration, or LRU putback.
3489 */
3538 3490
3539 /* caller should have done css_get */ 3491 /* caller should have done css_get */
3540 pc->mem_cgroup = to; 3492 pc->mem_cgroup = to;
3541 mem_cgroup_charge_statistics(to, page, anon, nr_pages);
3542 move_unlock_mem_cgroup(from, &flags); 3493 move_unlock_mem_cgroup(from, &flags);
3543 ret = 0; 3494 ret = 0;
3544unlock: 3495
3545 unlock_page_cgroup(pc); 3496 local_irq_disable();
3546 /* 3497 mem_cgroup_charge_statistics(to, page, nr_pages);
3547 * check events
3548 */
3549 memcg_check_events(to, page); 3498 memcg_check_events(to, page);
3499 mem_cgroup_charge_statistics(from, page, -nr_pages);
3550 memcg_check_events(from, page); 3500 memcg_check_events(from, page);
3501 local_irq_enable();
3502out_unlock:
3503 unlock_page(page);
3551out: 3504out:
3552 return ret; 3505 return ret;
3553} 3506}
@@ -3618,449 +3571,12 @@ out:
3618 return ret; 3571 return ret;
3619} 3572}
3620 3573
3621int mem_cgroup_charge_anon(struct page *page,
3622 struct mm_struct *mm, gfp_t gfp_mask)
3623{
3624 unsigned int nr_pages = 1;
3625 struct mem_cgroup *memcg;
3626
3627 if (mem_cgroup_disabled())
3628 return 0;
3629
3630 VM_BUG_ON_PAGE(page_mapped(page), page);
3631 VM_BUG_ON_PAGE(page->mapping && !PageAnon(page), page);
3632 VM_BUG_ON(!mm);
3633
3634 if (PageTransHuge(page)) {
3635 nr_pages <<= compound_order(page);
3636 VM_BUG_ON_PAGE(!PageTransHuge(page), page);
3637 }
3638
3639 memcg = mem_cgroup_try_charge_mm(mm, gfp_mask, nr_pages);
3640 if (!memcg)
3641 return -ENOMEM;
3642 __mem_cgroup_commit_charge(memcg, page, nr_pages,
3643 MEM_CGROUP_CHARGE_TYPE_ANON, false);
3644 return 0;
3645}
3646
3647/*
3648 * While swap-in, try_charge -> commit or cancel, the page is locked.
3649 * And when try_charge() successfully returns, one refcnt to memcg without
3650 * struct page_cgroup is acquired. This refcnt will be consumed by
3651 * "commit()" or removed by "cancel()"
3652 */
3653static int __mem_cgroup_try_charge_swapin(struct mm_struct *mm,
3654 struct page *page,
3655 gfp_t mask,
3656 struct mem_cgroup **memcgp)
3657{
3658 struct mem_cgroup *memcg = NULL;
3659 struct page_cgroup *pc;
3660 int ret;
3661
3662 pc = lookup_page_cgroup(page);
3663 /*
3664 * Every swap fault against a single page tries to charge the
3665 * page, bail as early as possible. shmem_unuse() encounters
3666 * already charged pages, too. The USED bit is protected by
3667 * the page lock, which serializes swap cache removal, which
3668 * in turn serializes uncharging.
3669 */
3670 if (PageCgroupUsed(pc))
3671 goto out;
3672 if (do_swap_account)
3673 memcg = try_get_mem_cgroup_from_page(page);
3674 if (!memcg)
3675 memcg = get_mem_cgroup_from_mm(mm);
3676 ret = mem_cgroup_try_charge(memcg, mask, 1);
3677 css_put(&memcg->css);
3678 if (ret == -EINTR)
3679 memcg = root_mem_cgroup;
3680 else if (ret)
3681 return ret;
3682out:
3683 *memcgp = memcg;
3684 return 0;
3685}
3686
3687int mem_cgroup_try_charge_swapin(struct mm_struct *mm, struct page *page,
3688 gfp_t gfp_mask, struct mem_cgroup **memcgp)
3689{
3690 if (mem_cgroup_disabled()) {
3691 *memcgp = NULL;
3692 return 0;
3693 }
3694 /*
3695 * A racing thread's fault, or swapoff, may have already
3696 * updated the pte, and even removed page from swap cache: in
3697 * those cases unuse_pte()'s pte_same() test will fail; but
3698 * there's also a KSM case which does need to charge the page.
3699 */
3700 if (!PageSwapCache(page)) {
3701 struct mem_cgroup *memcg;
3702
3703 memcg = mem_cgroup_try_charge_mm(mm, gfp_mask, 1);
3704 if (!memcg)
3705 return -ENOMEM;
3706 *memcgp = memcg;
3707 return 0;
3708 }
3709 return __mem_cgroup_try_charge_swapin(mm, page, gfp_mask, memcgp);
3710}
3711
3712void mem_cgroup_cancel_charge_swapin(struct mem_cgroup *memcg)
3713{
3714 if (mem_cgroup_disabled())
3715 return;
3716 if (!memcg)
3717 return;
3718 __mem_cgroup_cancel_charge(memcg, 1);
3719}
3720
3721static void
3722__mem_cgroup_commit_charge_swapin(struct page *page, struct mem_cgroup *memcg,
3723 enum charge_type ctype)
3724{
3725 if (mem_cgroup_disabled())
3726 return;
3727 if (!memcg)
3728 return;
3729
3730 __mem_cgroup_commit_charge(memcg, page, 1, ctype, true);
3731 /*
3732 * Now swap is on-memory. This means this page may be
3733 * counted both as mem and swap....double count.
3734 * Fix it by uncharging from memsw. Basically, this SwapCache is stable
3735 * under lock_page(). But in do_swap_page()::memory.c, reuse_swap_page()
3736 * may call delete_from_swap_cache() before reach here.
3737 */
3738 if (do_swap_account && PageSwapCache(page)) {
3739 swp_entry_t ent = {.val = page_private(page)};
3740 mem_cgroup_uncharge_swap(ent);
3741 }
3742}
3743
3744void mem_cgroup_commit_charge_swapin(struct page *page,
3745 struct mem_cgroup *memcg)
3746{
3747 __mem_cgroup_commit_charge_swapin(page, memcg,
3748 MEM_CGROUP_CHARGE_TYPE_ANON);
3749}
3750
3751int mem_cgroup_charge_file(struct page *page, struct mm_struct *mm,
3752 gfp_t gfp_mask)
3753{
3754 enum charge_type type = MEM_CGROUP_CHARGE_TYPE_CACHE;
3755 struct mem_cgroup *memcg;
3756 int ret;
3757
3758 if (mem_cgroup_disabled())
3759 return 0;
3760 if (PageCompound(page))
3761 return 0;
3762
3763 if (PageSwapCache(page)) { /* shmem */
3764 ret = __mem_cgroup_try_charge_swapin(mm, page,
3765 gfp_mask, &memcg);
3766 if (ret)
3767 return ret;
3768 __mem_cgroup_commit_charge_swapin(page, memcg, type);
3769 return 0;
3770 }
3771
3772 memcg = mem_cgroup_try_charge_mm(mm, gfp_mask, 1);
3773 if (!memcg)
3774 return -ENOMEM;
3775 __mem_cgroup_commit_charge(memcg, page, 1, type, false);
3776 return 0;
3777}
3778
3779static void mem_cgroup_do_uncharge(struct mem_cgroup *memcg,
3780 unsigned int nr_pages,
3781 const enum charge_type ctype)
3782{
3783 struct memcg_batch_info *batch = NULL;
3784 bool uncharge_memsw = true;
3785
3786 /* If swapout, usage of swap doesn't decrease */
3787 if (!do_swap_account || ctype == MEM_CGROUP_CHARGE_TYPE_SWAPOUT)
3788 uncharge_memsw = false;
3789
3790 batch = &current->memcg_batch;
3791 /*
3792 * In usual, we do css_get() when we remember memcg pointer.
3793 * But in this case, we keep res->usage until end of a series of
3794 * uncharges. Then, it's ok to ignore memcg's refcnt.
3795 */
3796 if (!batch->memcg)
3797 batch->memcg = memcg;
3798 /*
3799 * do_batch > 0 when unmapping pages or inode invalidate/truncate.
3800 * In those cases, all pages freed continuously can be expected to be in
3801 * the same cgroup and we have chance to coalesce uncharges.
3802 * But we do uncharge one by one if this is killed by OOM(TIF_MEMDIE)
3803 * because we want to do uncharge as soon as possible.
3804 */
3805
3806 if (!batch->do_batch || test_thread_flag(TIF_MEMDIE))
3807 goto direct_uncharge;
3808
3809 if (nr_pages > 1)
3810 goto direct_uncharge;
3811
3812 /*
3813 * In typical case, batch->memcg == mem. This means we can
3814 * merge a series of uncharges to an uncharge of res_counter.
3815 * If not, we uncharge res_counter ony by one.
3816 */
3817 if (batch->memcg != memcg)
3818 goto direct_uncharge;
3819 /* remember freed charge and uncharge it later */
3820 batch->nr_pages++;
3821 if (uncharge_memsw)
3822 batch->memsw_nr_pages++;
3823 return;
3824direct_uncharge:
3825 res_counter_uncharge(&memcg->res, nr_pages * PAGE_SIZE);
3826 if (uncharge_memsw)
3827 res_counter_uncharge(&memcg->memsw, nr_pages * PAGE_SIZE);
3828 if (unlikely(batch->memcg != memcg))
3829 memcg_oom_recover(memcg);
3830}
3831
3832/*
3833 * uncharge if !page_mapped(page)
3834 */
3835static struct mem_cgroup *
3836__mem_cgroup_uncharge_common(struct page *page, enum charge_type ctype,
3837 bool end_migration)
3838{
3839 struct mem_cgroup *memcg = NULL;
3840 unsigned int nr_pages = 1;
3841 struct page_cgroup *pc;
3842 bool anon;
3843
3844 if (mem_cgroup_disabled())
3845 return NULL;
3846
3847 if (PageTransHuge(page)) {
3848 nr_pages <<= compound_order(page);
3849 VM_BUG_ON_PAGE(!PageTransHuge(page), page);
3850 }
3851 /*
3852 * Check if our page_cgroup is valid
3853 */
3854 pc = lookup_page_cgroup(page);
3855 if (unlikely(!PageCgroupUsed(pc)))
3856 return NULL;
3857
3858 lock_page_cgroup(pc);
3859
3860 memcg = pc->mem_cgroup;
3861
3862 if (!PageCgroupUsed(pc))
3863 goto unlock_out;
3864
3865 anon = PageAnon(page);
3866
3867 switch (ctype) {
3868 case MEM_CGROUP_CHARGE_TYPE_ANON:
3869 /*
3870 * Generally PageAnon tells if it's the anon statistics to be
3871 * updated; but sometimes e.g. mem_cgroup_uncharge_page() is
3872 * used before page reached the stage of being marked PageAnon.
3873 */
3874 anon = true;
3875 /* fallthrough */
3876 case MEM_CGROUP_CHARGE_TYPE_DROP:
3877 /* See mem_cgroup_prepare_migration() */
3878 if (page_mapped(page))
3879 goto unlock_out;
3880 /*
3881 * Pages under migration may not be uncharged. But
3882 * end_migration() /must/ be the one uncharging the
3883 * unused post-migration page and so it has to call
3884 * here with the migration bit still set. See the
3885 * res_counter handling below.
3886 */
3887 if (!end_migration && PageCgroupMigration(pc))
3888 goto unlock_out;
3889 break;
3890 case MEM_CGROUP_CHARGE_TYPE_SWAPOUT:
3891 if (!PageAnon(page)) { /* Shared memory */
3892 if (page->mapping && !page_is_file_cache(page))
3893 goto unlock_out;
3894 } else if (page_mapped(page)) /* Anon */
3895 goto unlock_out;
3896 break;
3897 default:
3898 break;
3899 }
3900
3901 mem_cgroup_charge_statistics(memcg, page, anon, -nr_pages);
3902
3903 ClearPageCgroupUsed(pc);
3904 /*
3905 * pc->mem_cgroup is not cleared here. It will be accessed when it's
3906 * freed from LRU. This is safe because uncharged page is expected not
3907 * to be reused (freed soon). Exception is SwapCache, it's handled by
3908 * special functions.
3909 */
3910
3911 unlock_page_cgroup(pc);
3912 /*
3913 * even after unlock, we have memcg->res.usage here and this memcg
3914 * will never be freed, so it's safe to call css_get().
3915 */
3916 memcg_check_events(memcg, page);
3917 if (do_swap_account && ctype == MEM_CGROUP_CHARGE_TYPE_SWAPOUT) {
3918 mem_cgroup_swap_statistics(memcg, true);
3919 css_get(&memcg->css);
3920 }
3921 /*
3922 * Migration does not charge the res_counter for the
3923 * replacement page, so leave it alone when phasing out the
3924 * page that is unused after the migration.
3925 */
3926 if (!end_migration)
3927 mem_cgroup_do_uncharge(memcg, nr_pages, ctype);
3928
3929 return memcg;
3930
3931unlock_out:
3932 unlock_page_cgroup(pc);
3933 return NULL;
3934}
3935
3936void mem_cgroup_uncharge_page(struct page *page)
3937{
3938 /* early check. */
3939 if (page_mapped(page))
3940 return;
3941 VM_BUG_ON_PAGE(page->mapping && !PageAnon(page), page);
3942 /*
3943 * If the page is in swap cache, uncharge should be deferred
3944 * to the swap path, which also properly accounts swap usage
3945 * and handles memcg lifetime.
3946 *
3947 * Note that this check is not stable and reclaim may add the
3948 * page to swap cache at any time after this. However, if the
3949 * page is not in swap cache by the time page->mapcount hits
3950 * 0, there won't be any page table references to the swap
3951 * slot, and reclaim will free it and not actually write the
3952 * page to disk.
3953 */
3954 if (PageSwapCache(page))
3955 return;
3956 __mem_cgroup_uncharge_common(page, MEM_CGROUP_CHARGE_TYPE_ANON, false);
3957}
3958
3959void mem_cgroup_uncharge_cache_page(struct page *page)
3960{
3961 VM_BUG_ON_PAGE(page_mapped(page), page);
3962 VM_BUG_ON_PAGE(page->mapping, page);
3963 __mem_cgroup_uncharge_common(page, MEM_CGROUP_CHARGE_TYPE_CACHE, false);
3964}
3965
3966/*
3967 * Batch_start/batch_end is called in unmap_page_range/invlidate/trucate.
3968 * In that cases, pages are freed continuously and we can expect pages
3969 * are in the same memcg. All these calls itself limits the number of
3970 * pages freed at once, then uncharge_start/end() is called properly.
3971 * This may be called prural(2) times in a context,
3972 */
3973
3974void mem_cgroup_uncharge_start(void)
3975{
3976 current->memcg_batch.do_batch++;
3977 /* We can do nest. */
3978 if (current->memcg_batch.do_batch == 1) {
3979 current->memcg_batch.memcg = NULL;
3980 current->memcg_batch.nr_pages = 0;
3981 current->memcg_batch.memsw_nr_pages = 0;
3982 }
3983}
3984
3985void mem_cgroup_uncharge_end(void)
3986{
3987 struct memcg_batch_info *batch = &current->memcg_batch;
3988
3989 if (!batch->do_batch)
3990 return;
3991
3992 batch->do_batch--;
3993 if (batch->do_batch) /* If stacked, do nothing. */
3994 return;
3995
3996 if (!batch->memcg)
3997 return;
3998 /*
3999 * This "batch->memcg" is valid without any css_get/put etc...
4000 * bacause we hide charges behind us.
4001 */
4002 if (batch->nr_pages)
4003 res_counter_uncharge(&batch->memcg->res,
4004 batch->nr_pages * PAGE_SIZE);
4005 if (batch->memsw_nr_pages)
4006 res_counter_uncharge(&batch->memcg->memsw,
4007 batch->memsw_nr_pages * PAGE_SIZE);
4008 memcg_oom_recover(batch->memcg);
4009 /* forget this pointer (for sanity check) */
4010 batch->memcg = NULL;
4011}
4012
4013#ifdef CONFIG_SWAP
4014/*
4015 * called after __delete_from_swap_cache() and drop "page" account.
4016 * memcg information is recorded to swap_cgroup of "ent"
4017 */
4018void
4019mem_cgroup_uncharge_swapcache(struct page *page, swp_entry_t ent, bool swapout)
4020{
4021 struct mem_cgroup *memcg;
4022 int ctype = MEM_CGROUP_CHARGE_TYPE_SWAPOUT;
4023
4024 if (!swapout) /* this was a swap cache but the swap is unused ! */
4025 ctype = MEM_CGROUP_CHARGE_TYPE_DROP;
4026
4027 memcg = __mem_cgroup_uncharge_common(page, ctype, false);
4028
4029 /*
4030 * record memcg information, if swapout && memcg != NULL,
4031 * css_get() was called in uncharge().
4032 */
4033 if (do_swap_account && swapout && memcg)
4034 swap_cgroup_record(ent, mem_cgroup_id(memcg));
4035}
4036#endif
4037
4038#ifdef CONFIG_MEMCG_SWAP 3574#ifdef CONFIG_MEMCG_SWAP
4039/* 3575static void mem_cgroup_swap_statistics(struct mem_cgroup *memcg,
4040 * called from swap_entry_free(). remove record in swap_cgroup and 3576 bool charge)
4041 * uncharge "memsw" account.
4042 */
4043void mem_cgroup_uncharge_swap(swp_entry_t ent)
4044{ 3577{
4045 struct mem_cgroup *memcg; 3578 int val = (charge) ? 1 : -1;
4046 unsigned short id; 3579 this_cpu_add(memcg->stat->count[MEM_CGROUP_STAT_SWAP], val);
4047
4048 if (!do_swap_account)
4049 return;
4050
4051 id = swap_cgroup_record(ent, 0);
4052 rcu_read_lock();
4053 memcg = mem_cgroup_lookup(id);
4054 if (memcg) {
4055 /*
4056 * We uncharge this because swap is freed. This memcg can
4057 * be obsolete one. We avoid calling css_tryget_online().
4058 */
4059 res_counter_uncharge(&memcg->memsw, PAGE_SIZE);
4060 mem_cgroup_swap_statistics(memcg, false);
4061 css_put(&memcg->css);
4062 }
4063 rcu_read_unlock();
4064} 3580}
4065 3581
4066/** 3582/**
@@ -4112,175 +3628,6 @@ static inline int mem_cgroup_move_swap_account(swp_entry_t entry,
4112} 3628}
4113#endif 3629#endif
4114 3630
4115/*
4116 * Before starting migration, account PAGE_SIZE to mem_cgroup that the old
4117 * page belongs to.
4118 */
4119void mem_cgroup_prepare_migration(struct page *page, struct page *newpage,
4120 struct mem_cgroup **memcgp)
4121{
4122 struct mem_cgroup *memcg = NULL;
4123 unsigned int nr_pages = 1;
4124 struct page_cgroup *pc;
4125 enum charge_type ctype;
4126
4127 *memcgp = NULL;
4128
4129 if (mem_cgroup_disabled())
4130 return;
4131
4132 if (PageTransHuge(page))
4133 nr_pages <<= compound_order(page);
4134
4135 pc = lookup_page_cgroup(page);
4136 lock_page_cgroup(pc);
4137 if (PageCgroupUsed(pc)) {
4138 memcg = pc->mem_cgroup;
4139 css_get(&memcg->css);
4140 /*
4141 * At migrating an anonymous page, its mapcount goes down
4142 * to 0 and uncharge() will be called. But, even if it's fully
4143 * unmapped, migration may fail and this page has to be
4144 * charged again. We set MIGRATION flag here and delay uncharge
4145 * until end_migration() is called
4146 *
4147 * Corner Case Thinking
4148 * A)
4149 * When the old page was mapped as Anon and it's unmap-and-freed
4150 * while migration was ongoing.
4151 * If unmap finds the old page, uncharge() of it will be delayed
4152 * until end_migration(). If unmap finds a new page, it's
4153 * uncharged when it make mapcount to be 1->0. If unmap code
4154 * finds swap_migration_entry, the new page will not be mapped
4155 * and end_migration() will find it(mapcount==0).
4156 *
4157 * B)
4158 * When the old page was mapped but migraion fails, the kernel
4159 * remaps it. A charge for it is kept by MIGRATION flag even
4160 * if mapcount goes down to 0. We can do remap successfully
4161 * without charging it again.
4162 *
4163 * C)
4164 * The "old" page is under lock_page() until the end of
4165 * migration, so, the old page itself will not be swapped-out.
4166 * If the new page is swapped out before end_migraton, our
4167 * hook to usual swap-out path will catch the event.
4168 */
4169 if (PageAnon(page))
4170 SetPageCgroupMigration(pc);
4171 }
4172 unlock_page_cgroup(pc);
4173 /*
4174 * If the page is not charged at this point,
4175 * we return here.
4176 */
4177 if (!memcg)
4178 return;
4179
4180 *memcgp = memcg;
4181 /*
4182 * We charge new page before it's used/mapped. So, even if unlock_page()
4183 * is called before end_migration, we can catch all events on this new
4184 * page. In the case new page is migrated but not remapped, new page's
4185 * mapcount will be finally 0 and we call uncharge in end_migration().
4186 */
4187 if (PageAnon(page))
4188 ctype = MEM_CGROUP_CHARGE_TYPE_ANON;
4189 else
4190 ctype = MEM_CGROUP_CHARGE_TYPE_CACHE;
4191 /*
4192 * The page is committed to the memcg, but it's not actually
4193 * charged to the res_counter since we plan on replacing the
4194 * old one and only one page is going to be left afterwards.
4195 */
4196 __mem_cgroup_commit_charge(memcg, newpage, nr_pages, ctype, false);
4197}
4198
4199/* remove redundant charge if migration failed*/
4200void mem_cgroup_end_migration(struct mem_cgroup *memcg,
4201 struct page *oldpage, struct page *newpage, bool migration_ok)
4202{
4203 struct page *used, *unused;
4204 struct page_cgroup *pc;
4205 bool anon;
4206
4207 if (!memcg)
4208 return;
4209
4210 if (!migration_ok) {
4211 used = oldpage;
4212 unused = newpage;
4213 } else {
4214 used = newpage;
4215 unused = oldpage;
4216 }
4217 anon = PageAnon(used);
4218 __mem_cgroup_uncharge_common(unused,
4219 anon ? MEM_CGROUP_CHARGE_TYPE_ANON
4220 : MEM_CGROUP_CHARGE_TYPE_CACHE,
4221 true);
4222 css_put(&memcg->css);
4223 /*
4224 * We disallowed uncharge of pages under migration because mapcount
4225 * of the page goes down to zero, temporarly.
4226 * Clear the flag and check the page should be charged.
4227 */
4228 pc = lookup_page_cgroup(oldpage);
4229 lock_page_cgroup(pc);
4230 ClearPageCgroupMigration(pc);
4231 unlock_page_cgroup(pc);
4232
4233 /*
4234 * If a page is a file cache, radix-tree replacement is very atomic
4235 * and we can skip this check. When it was an Anon page, its mapcount
4236 * goes down to 0. But because we added MIGRATION flage, it's not
4237 * uncharged yet. There are several case but page->mapcount check
4238 * and USED bit check in mem_cgroup_uncharge_page() will do enough
4239 * check. (see prepare_charge() also)
4240 */
4241 if (anon)
4242 mem_cgroup_uncharge_page(used);
4243}
4244
4245/*
4246 * At replace page cache, newpage is not under any memcg but it's on
4247 * LRU. So, this function doesn't touch res_counter but handles LRU
4248 * in correct way. Both pages are locked so we cannot race with uncharge.
4249 */
4250void mem_cgroup_replace_page_cache(struct page *oldpage,
4251 struct page *newpage)
4252{
4253 struct mem_cgroup *memcg = NULL;
4254 struct page_cgroup *pc;
4255 enum charge_type type = MEM_CGROUP_CHARGE_TYPE_CACHE;
4256
4257 if (mem_cgroup_disabled())
4258 return;
4259
4260 pc = lookup_page_cgroup(oldpage);
4261 /* fix accounting on old pages */
4262 lock_page_cgroup(pc);
4263 if (PageCgroupUsed(pc)) {
4264 memcg = pc->mem_cgroup;
4265 mem_cgroup_charge_statistics(memcg, oldpage, false, -1);
4266 ClearPageCgroupUsed(pc);
4267 }
4268 unlock_page_cgroup(pc);
4269
4270 /*
4271 * When called from shmem_replace_page(), in some cases the
4272 * oldpage has already been charged, and in some cases not.
4273 */
4274 if (!memcg)
4275 return;
4276 /*
4277 * Even if newpage->mapping was NULL before starting replacement,
4278 * the newpage may be on LRU(or pagevec for LRU) already. We lock
4279 * LRU while we overwrite pc->mem_cgroup.
4280 */
4281 __mem_cgroup_commit_charge(memcg, newpage, 1, type, true);
4282}
4283
4284#ifdef CONFIG_DEBUG_VM 3631#ifdef CONFIG_DEBUG_VM
4285static struct page_cgroup *lookup_page_cgroup_used(struct page *page) 3632static struct page_cgroup *lookup_page_cgroup_used(struct page *page)
4286{ 3633{
@@ -4479,7 +3826,7 @@ unsigned long mem_cgroup_soft_limit_reclaim(struct zone *zone, int order,
4479 gfp_mask, &nr_scanned); 3826 gfp_mask, &nr_scanned);
4480 nr_reclaimed += reclaimed; 3827 nr_reclaimed += reclaimed;
4481 *total_scanned += nr_scanned; 3828 *total_scanned += nr_scanned;
4482 spin_lock(&mctz->lock); 3829 spin_lock_irq(&mctz->lock);
4483 3830
4484 /* 3831 /*
4485 * If we failed to reclaim anything from this memory cgroup 3832 * If we failed to reclaim anything from this memory cgroup
@@ -4519,7 +3866,7 @@ unsigned long mem_cgroup_soft_limit_reclaim(struct zone *zone, int order,
4519 */ 3866 */
4520 /* If excess == 0, no tree ops */ 3867 /* If excess == 0, no tree ops */
4521 __mem_cgroup_insert_exceeded(mz, mctz, excess); 3868 __mem_cgroup_insert_exceeded(mz, mctz, excess);
4522 spin_unlock(&mctz->lock); 3869 spin_unlock_irq(&mctz->lock);
4523 css_put(&mz->memcg->css); 3870 css_put(&mz->memcg->css);
4524 loop++; 3871 loop++;
4525 /* 3872 /*
@@ -6319,20 +5666,19 @@ static int mem_cgroup_do_precharge(unsigned long count)
6319 int ret; 5666 int ret;
6320 5667
6321 /* Try a single bulk charge without reclaim first */ 5668 /* Try a single bulk charge without reclaim first */
6322 ret = mem_cgroup_try_charge(mc.to, GFP_KERNEL & ~__GFP_WAIT, count); 5669 ret = try_charge(mc.to, GFP_KERNEL & ~__GFP_WAIT, count);
6323 if (!ret) { 5670 if (!ret) {
6324 mc.precharge += count; 5671 mc.precharge += count;
6325 return ret; 5672 return ret;
6326 } 5673 }
6327 if (ret == -EINTR) { 5674 if (ret == -EINTR) {
6328 __mem_cgroup_cancel_charge(root_mem_cgroup, count); 5675 cancel_charge(root_mem_cgroup, count);
6329 return ret; 5676 return ret;
6330 } 5677 }
6331 5678
6332 /* Try charges one by one with reclaim */ 5679 /* Try charges one by one with reclaim */
6333 while (count--) { 5680 while (count--) {
6334 ret = mem_cgroup_try_charge(mc.to, 5681 ret = try_charge(mc.to, GFP_KERNEL & ~__GFP_NORETRY, 1);
6335 GFP_KERNEL & ~__GFP_NORETRY, 1);
6336 /* 5682 /*
6337 * In case of failure, any residual charges against 5683 * In case of failure, any residual charges against
6338 * mc.to will be dropped by mem_cgroup_clear_mc() 5684 * mc.to will be dropped by mem_cgroup_clear_mc()
@@ -6340,7 +5686,7 @@ static int mem_cgroup_do_precharge(unsigned long count)
6340 * bypassed to root right away or they'll be lost. 5686 * bypassed to root right away or they'll be lost.
6341 */ 5687 */
6342 if (ret == -EINTR) 5688 if (ret == -EINTR)
6343 __mem_cgroup_cancel_charge(root_mem_cgroup, 1); 5689 cancel_charge(root_mem_cgroup, 1);
6344 if (ret) 5690 if (ret)
6345 return ret; 5691 return ret;
6346 mc.precharge++; 5692 mc.precharge++;
@@ -6482,9 +5828,9 @@ static enum mc_target_type get_mctgt_type(struct vm_area_struct *vma,
6482 if (page) { 5828 if (page) {
6483 pc = lookup_page_cgroup(page); 5829 pc = lookup_page_cgroup(page);
6484 /* 5830 /*
6485 * Do only loose check w/o page_cgroup lock. 5831 * Do only loose check w/o serialization.
6486 * mem_cgroup_move_account() checks the pc is valid or not under 5832 * mem_cgroup_move_account() checks the pc is valid or
6487 * the lock. 5833 * not under LRU exclusion.
6488 */ 5834 */
6489 if (PageCgroupUsed(pc) && pc->mem_cgroup == mc.from) { 5835 if (PageCgroupUsed(pc) && pc->mem_cgroup == mc.from) {
6490 ret = MC_TARGET_PAGE; 5836 ret = MC_TARGET_PAGE;
@@ -6609,7 +5955,7 @@ static void __mem_cgroup_clear_mc(void)
6609 5955
6610 /* we must uncharge all the leftover precharges from mc.to */ 5956 /* we must uncharge all the leftover precharges from mc.to */
6611 if (mc.precharge) { 5957 if (mc.precharge) {
6612 __mem_cgroup_cancel_charge(mc.to, mc.precharge); 5958 cancel_charge(mc.to, mc.precharge);
6613 mc.precharge = 0; 5959 mc.precharge = 0;
6614 } 5960 }
6615 /* 5961 /*
@@ -6617,7 +5963,7 @@ static void __mem_cgroup_clear_mc(void)
6617 * we must uncharge here. 5963 * we must uncharge here.
6618 */ 5964 */
6619 if (mc.moved_charge) { 5965 if (mc.moved_charge) {
6620 __mem_cgroup_cancel_charge(mc.from, mc.moved_charge); 5966 cancel_charge(mc.from, mc.moved_charge);
6621 mc.moved_charge = 0; 5967 mc.moved_charge = 0;
6622 } 5968 }
6623 /* we must fixup refcnts and charges */ 5969 /* we must fixup refcnts and charges */
@@ -6946,6 +6292,398 @@ static void __init enable_swap_cgroup(void)
6946} 6292}
6947#endif 6293#endif
6948 6294
6295#ifdef CONFIG_MEMCG_SWAP
6296/**
6297 * mem_cgroup_swapout - transfer a memsw charge to swap
6298 * @page: page whose memsw charge to transfer
6299 * @entry: swap entry to move the charge to
6300 *
6301 * Transfer the memsw charge of @page to @entry.
6302 */
6303void mem_cgroup_swapout(struct page *page, swp_entry_t entry)
6304{
6305 struct page_cgroup *pc;
6306 unsigned short oldid;
6307
6308 VM_BUG_ON_PAGE(PageLRU(page), page);
6309 VM_BUG_ON_PAGE(page_count(page), page);
6310
6311 if (!do_swap_account)
6312 return;
6313
6314 pc = lookup_page_cgroup(page);
6315
6316 /* Readahead page, never charged */
6317 if (!PageCgroupUsed(pc))
6318 return;
6319
6320 VM_BUG_ON_PAGE(!(pc->flags & PCG_MEMSW), page);
6321
6322 oldid = swap_cgroup_record(entry, mem_cgroup_id(pc->mem_cgroup));
6323 VM_BUG_ON_PAGE(oldid, page);
6324
6325 pc->flags &= ~PCG_MEMSW;
6326 css_get(&pc->mem_cgroup->css);
6327 mem_cgroup_swap_statistics(pc->mem_cgroup, true);
6328}
6329
6330/**
6331 * mem_cgroup_uncharge_swap - uncharge a swap entry
6332 * @entry: swap entry to uncharge
6333 *
6334 * Drop the memsw charge associated with @entry.
6335 */
6336void mem_cgroup_uncharge_swap(swp_entry_t entry)
6337{
6338 struct mem_cgroup *memcg;
6339 unsigned short id;
6340
6341 if (!do_swap_account)
6342 return;
6343
6344 id = swap_cgroup_record(entry, 0);
6345 rcu_read_lock();
6346 memcg = mem_cgroup_lookup(id);
6347 if (memcg) {
6348 res_counter_uncharge(&memcg->memsw, PAGE_SIZE);
6349 mem_cgroup_swap_statistics(memcg, false);
6350 css_put(&memcg->css);
6351 }
6352 rcu_read_unlock();
6353}
6354#endif
6355
6356/**
6357 * mem_cgroup_try_charge - try charging a page
6358 * @page: page to charge
6359 * @mm: mm context of the victim
6360 * @gfp_mask: reclaim mode
6361 * @memcgp: charged memcg return
6362 *
6363 * Try to charge @page to the memcg that @mm belongs to, reclaiming
6364 * pages according to @gfp_mask if necessary.
6365 *
6366 * Returns 0 on success, with *@memcgp pointing to the charged memcg.
6367 * Otherwise, an error code is returned.
6368 *
6369 * After page->mapping has been set up, the caller must finalize the
6370 * charge with mem_cgroup_commit_charge(). Or abort the transaction
6371 * with mem_cgroup_cancel_charge() in case page instantiation fails.
6372 */
6373int mem_cgroup_try_charge(struct page *page, struct mm_struct *mm,
6374 gfp_t gfp_mask, struct mem_cgroup **memcgp)
6375{
6376 struct mem_cgroup *memcg = NULL;
6377 unsigned int nr_pages = 1;
6378 int ret = 0;
6379
6380 if (mem_cgroup_disabled())
6381 goto out;
6382
6383 if (PageSwapCache(page)) {
6384 struct page_cgroup *pc = lookup_page_cgroup(page);
6385 /*
6386 * Every swap fault against a single page tries to charge the
6387 * page, bail as early as possible. shmem_unuse() encounters
6388 * already charged pages, too. The USED bit is protected by
6389 * the page lock, which serializes swap cache removal, which
6390 * in turn serializes uncharging.
6391 */
6392 if (PageCgroupUsed(pc))
6393 goto out;
6394 }
6395
6396 if (PageTransHuge(page)) {
6397 nr_pages <<= compound_order(page);
6398 VM_BUG_ON_PAGE(!PageTransHuge(page), page);
6399 }
6400
6401 if (do_swap_account && PageSwapCache(page))
6402 memcg = try_get_mem_cgroup_from_page(page);
6403 if (!memcg)
6404 memcg = get_mem_cgroup_from_mm(mm);
6405
6406 ret = try_charge(memcg, gfp_mask, nr_pages);
6407
6408 css_put(&memcg->css);
6409
6410 if (ret == -EINTR) {
6411 memcg = root_mem_cgroup;
6412 ret = 0;
6413 }
6414out:
6415 *memcgp = memcg;
6416 return ret;
6417}
6418
6419/**
6420 * mem_cgroup_commit_charge - commit a page charge
6421 * @page: page to charge
6422 * @memcg: memcg to charge the page to
6423 * @lrucare: page might be on LRU already
6424 *
6425 * Finalize a charge transaction started by mem_cgroup_try_charge(),
6426 * after page->mapping has been set up. This must happen atomically
6427 * as part of the page instantiation, i.e. under the page table lock
6428 * for anonymous pages, under the page lock for page and swap cache.
6429 *
6430 * In addition, the page must not be on the LRU during the commit, to
6431 * prevent racing with task migration. If it might be, use @lrucare.
6432 *
6433 * Use mem_cgroup_cancel_charge() to cancel the transaction instead.
6434 */
6435void mem_cgroup_commit_charge(struct page *page, struct mem_cgroup *memcg,
6436 bool lrucare)
6437{
6438 unsigned int nr_pages = 1;
6439
6440 VM_BUG_ON_PAGE(!page->mapping, page);
6441 VM_BUG_ON_PAGE(PageLRU(page) && !lrucare, page);
6442
6443 if (mem_cgroup_disabled())
6444 return;
6445 /*
6446 * Swap faults will attempt to charge the same page multiple
6447 * times. But reuse_swap_page() might have removed the page
6448 * from swapcache already, so we can't check PageSwapCache().
6449 */
6450 if (!memcg)
6451 return;
6452
6453 commit_charge(page, memcg, lrucare);
6454
6455 if (PageTransHuge(page)) {
6456 nr_pages <<= compound_order(page);
6457 VM_BUG_ON_PAGE(!PageTransHuge(page), page);
6458 }
6459
6460 local_irq_disable();
6461 mem_cgroup_charge_statistics(memcg, page, nr_pages);
6462 memcg_check_events(memcg, page);
6463 local_irq_enable();
6464
6465 if (do_swap_account && PageSwapCache(page)) {
6466 swp_entry_t entry = { .val = page_private(page) };
6467 /*
6468 * The swap entry might not get freed for a long time,
6469 * let's not wait for it. The page already received a
6470 * memory+swap charge, drop the swap entry duplicate.
6471 */
6472 mem_cgroup_uncharge_swap(entry);
6473 }
6474}
6475
6476/**
6477 * mem_cgroup_cancel_charge - cancel a page charge
6478 * @page: page to charge
6479 * @memcg: memcg to charge the page to
6480 *
6481 * Cancel a charge transaction started by mem_cgroup_try_charge().
6482 */
6483void mem_cgroup_cancel_charge(struct page *page, struct mem_cgroup *memcg)
6484{
6485 unsigned int nr_pages = 1;
6486
6487 if (mem_cgroup_disabled())
6488 return;
6489 /*
6490 * Swap faults will attempt to charge the same page multiple
6491 * times. But reuse_swap_page() might have removed the page
6492 * from swapcache already, so we can't check PageSwapCache().
6493 */
6494 if (!memcg)
6495 return;
6496
6497 if (PageTransHuge(page)) {
6498 nr_pages <<= compound_order(page);
6499 VM_BUG_ON_PAGE(!PageTransHuge(page), page);
6500 }
6501
6502 cancel_charge(memcg, nr_pages);
6503}
6504
6505static void uncharge_batch(struct mem_cgroup *memcg, unsigned long pgpgout,
6506 unsigned long nr_mem, unsigned long nr_memsw,
6507 unsigned long nr_anon, unsigned long nr_file,
6508 unsigned long nr_huge, struct page *dummy_page)
6509{
6510 unsigned long flags;
6511
6512 if (nr_mem)
6513 res_counter_uncharge(&memcg->res, nr_mem * PAGE_SIZE);
6514 if (nr_memsw)
6515 res_counter_uncharge(&memcg->memsw, nr_memsw * PAGE_SIZE);
6516
6517 memcg_oom_recover(memcg);
6518
6519 local_irq_save(flags);
6520 __this_cpu_sub(memcg->stat->count[MEM_CGROUP_STAT_RSS], nr_anon);
6521 __this_cpu_sub(memcg->stat->count[MEM_CGROUP_STAT_CACHE], nr_file);
6522 __this_cpu_sub(memcg->stat->count[MEM_CGROUP_STAT_RSS_HUGE], nr_huge);
6523 __this_cpu_add(memcg->stat->events[MEM_CGROUP_EVENTS_PGPGOUT], pgpgout);
6524 __this_cpu_add(memcg->stat->nr_page_events, nr_anon + nr_file);
6525 memcg_check_events(memcg, dummy_page);
6526 local_irq_restore(flags);
6527}
6528
6529static void uncharge_list(struct list_head *page_list)
6530{
6531 struct mem_cgroup *memcg = NULL;
6532 unsigned long nr_memsw = 0;
6533 unsigned long nr_anon = 0;
6534 unsigned long nr_file = 0;
6535 unsigned long nr_huge = 0;
6536 unsigned long pgpgout = 0;
6537 unsigned long nr_mem = 0;
6538 struct list_head *next;
6539 struct page *page;
6540
6541 next = page_list->next;
6542 do {
6543 unsigned int nr_pages = 1;
6544 struct page_cgroup *pc;
6545
6546 page = list_entry(next, struct page, lru);
6547 next = page->lru.next;
6548
6549 VM_BUG_ON_PAGE(PageLRU(page), page);
6550 VM_BUG_ON_PAGE(page_count(page), page);
6551
6552 pc = lookup_page_cgroup(page);
6553 if (!PageCgroupUsed(pc))
6554 continue;
6555
6556 /*
6557 * Nobody should be changing or seriously looking at
6558 * pc->mem_cgroup and pc->flags at this point, we have
6559 * fully exclusive access to the page.
6560 */
6561
6562 if (memcg != pc->mem_cgroup) {
6563 if (memcg) {
6564 uncharge_batch(memcg, pgpgout, nr_mem, nr_memsw,
6565 nr_anon, nr_file, nr_huge, page);
6566 pgpgout = nr_mem = nr_memsw = 0;
6567 nr_anon = nr_file = nr_huge = 0;
6568 }
6569 memcg = pc->mem_cgroup;
6570 }
6571
6572 if (PageTransHuge(page)) {
6573 nr_pages <<= compound_order(page);
6574 VM_BUG_ON_PAGE(!PageTransHuge(page), page);
6575 nr_huge += nr_pages;
6576 }
6577
6578 if (PageAnon(page))
6579 nr_anon += nr_pages;
6580 else
6581 nr_file += nr_pages;
6582
6583 if (pc->flags & PCG_MEM)
6584 nr_mem += nr_pages;
6585 if (pc->flags & PCG_MEMSW)
6586 nr_memsw += nr_pages;
6587 pc->flags = 0;
6588
6589 pgpgout++;
6590 } while (next != page_list);
6591
6592 if (memcg)
6593 uncharge_batch(memcg, pgpgout, nr_mem, nr_memsw,
6594 nr_anon, nr_file, nr_huge, page);
6595}
6596
6597/**
6598 * mem_cgroup_uncharge - uncharge a page
6599 * @page: page to uncharge
6600 *
6601 * Uncharge a page previously charged with mem_cgroup_try_charge() and
6602 * mem_cgroup_commit_charge().
6603 */
6604void mem_cgroup_uncharge(struct page *page)
6605{
6606 struct page_cgroup *pc;
6607
6608 if (mem_cgroup_disabled())
6609 return;
6610
6611 /* Don't touch page->lru of any random page, pre-check: */
6612 pc = lookup_page_cgroup(page);
6613 if (!PageCgroupUsed(pc))
6614 return;
6615
6616 INIT_LIST_HEAD(&page->lru);
6617 uncharge_list(&page->lru);
6618}
6619
6620/**
6621 * mem_cgroup_uncharge_list - uncharge a list of page
6622 * @page_list: list of pages to uncharge
6623 *
6624 * Uncharge a list of pages previously charged with
6625 * mem_cgroup_try_charge() and mem_cgroup_commit_charge().
6626 */
6627void mem_cgroup_uncharge_list(struct list_head *page_list)
6628{
6629 if (mem_cgroup_disabled())
6630 return;
6631
6632 if (!list_empty(page_list))
6633 uncharge_list(page_list);
6634}
6635
6636/**
6637 * mem_cgroup_migrate - migrate a charge to another page
6638 * @oldpage: currently charged page
6639 * @newpage: page to transfer the charge to
6640 * @lrucare: both pages might be on the LRU already
6641 *
6642 * Migrate the charge from @oldpage to @newpage.
6643 *
6644 * Both pages must be locked, @newpage->mapping must be set up.
6645 */
6646void mem_cgroup_migrate(struct page *oldpage, struct page *newpage,
6647 bool lrucare)
6648{
6649 struct page_cgroup *pc;
6650 int isolated;
6651
6652 VM_BUG_ON_PAGE(!PageLocked(oldpage), oldpage);
6653 VM_BUG_ON_PAGE(!PageLocked(newpage), newpage);
6654 VM_BUG_ON_PAGE(!lrucare && PageLRU(oldpage), oldpage);
6655 VM_BUG_ON_PAGE(!lrucare && PageLRU(newpage), newpage);
6656 VM_BUG_ON_PAGE(PageAnon(oldpage) != PageAnon(newpage), newpage);
6657 VM_BUG_ON_PAGE(PageTransHuge(oldpage) != PageTransHuge(newpage),
6658 newpage);
6659
6660 if (mem_cgroup_disabled())
6661 return;
6662
6663 /* Page cache replacement: new page already charged? */
6664 pc = lookup_page_cgroup(newpage);
6665 if (PageCgroupUsed(pc))
6666 return;
6667
6668 /* Re-entrant migration: old page already uncharged? */
6669 pc = lookup_page_cgroup(oldpage);
6670 if (!PageCgroupUsed(pc))
6671 return;
6672
6673 VM_BUG_ON_PAGE(!(pc->flags & PCG_MEM), oldpage);
6674 VM_BUG_ON_PAGE(do_swap_account && !(pc->flags & PCG_MEMSW), oldpage);
6675
6676 if (lrucare)
6677 lock_page_lru(oldpage, &isolated);
6678
6679 pc->flags = 0;
6680
6681 if (lrucare)
6682 unlock_page_lru(oldpage, isolated);
6683
6684 commit_charge(newpage, pc->mem_cgroup, lrucare);
6685}
6686
6949/* 6687/*
6950 * subsys_initcall() for memory controller. 6688 * subsys_initcall() for memory controller.
6951 * 6689 *
diff --git a/mm/memory.c b/mm/memory.c
index 5c55270729f7..ab3537bcfed2 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -1292,7 +1292,6 @@ static void unmap_page_range(struct mmu_gather *tlb,
1292 details = NULL; 1292 details = NULL;
1293 1293
1294 BUG_ON(addr >= end); 1294 BUG_ON(addr >= end);
1295 mem_cgroup_uncharge_start();
1296 tlb_start_vma(tlb, vma); 1295 tlb_start_vma(tlb, vma);
1297 pgd = pgd_offset(vma->vm_mm, addr); 1296 pgd = pgd_offset(vma->vm_mm, addr);
1298 do { 1297 do {
@@ -1302,7 +1301,6 @@ static void unmap_page_range(struct mmu_gather *tlb,
1302 next = zap_pud_range(tlb, vma, pgd, addr, next, details); 1301 next = zap_pud_range(tlb, vma, pgd, addr, next, details);
1303 } while (pgd++, addr = next, addr != end); 1302 } while (pgd++, addr = next, addr != end);
1304 tlb_end_vma(tlb, vma); 1303 tlb_end_vma(tlb, vma);
1305 mem_cgroup_uncharge_end();
1306} 1304}
1307 1305
1308 1306
@@ -2049,6 +2047,7 @@ static int do_wp_page(struct mm_struct *mm, struct vm_area_struct *vma,
2049 struct page *dirty_page = NULL; 2047 struct page *dirty_page = NULL;
2050 unsigned long mmun_start = 0; /* For mmu_notifiers */ 2048 unsigned long mmun_start = 0; /* For mmu_notifiers */
2051 unsigned long mmun_end = 0; /* For mmu_notifiers */ 2049 unsigned long mmun_end = 0; /* For mmu_notifiers */
2050 struct mem_cgroup *memcg;
2052 2051
2053 old_page = vm_normal_page(vma, address, orig_pte); 2052 old_page = vm_normal_page(vma, address, orig_pte);
2054 if (!old_page) { 2053 if (!old_page) {
@@ -2204,7 +2203,7 @@ gotten:
2204 } 2203 }
2205 __SetPageUptodate(new_page); 2204 __SetPageUptodate(new_page);
2206 2205
2207 if (mem_cgroup_charge_anon(new_page, mm, GFP_KERNEL)) 2206 if (mem_cgroup_try_charge(new_page, mm, GFP_KERNEL, &memcg))
2208 goto oom_free_new; 2207 goto oom_free_new;
2209 2208
2210 mmun_start = address & PAGE_MASK; 2209 mmun_start = address & PAGE_MASK;
@@ -2234,6 +2233,8 @@ gotten:
2234 */ 2233 */
2235 ptep_clear_flush(vma, address, page_table); 2234 ptep_clear_flush(vma, address, page_table);
2236 page_add_new_anon_rmap(new_page, vma, address); 2235 page_add_new_anon_rmap(new_page, vma, address);
2236 mem_cgroup_commit_charge(new_page, memcg, false);
2237 lru_cache_add_active_or_unevictable(new_page, vma);
2237 /* 2238 /*
2238 * We call the notify macro here because, when using secondary 2239 * We call the notify macro here because, when using secondary
2239 * mmu page tables (such as kvm shadow page tables), we want the 2240 * mmu page tables (such as kvm shadow page tables), we want the
@@ -2271,7 +2272,7 @@ gotten:
2271 new_page = old_page; 2272 new_page = old_page;
2272 ret |= VM_FAULT_WRITE; 2273 ret |= VM_FAULT_WRITE;
2273 } else 2274 } else
2274 mem_cgroup_uncharge_page(new_page); 2275 mem_cgroup_cancel_charge(new_page, memcg);
2275 2276
2276 if (new_page) 2277 if (new_page)
2277 page_cache_release(new_page); 2278 page_cache_release(new_page);
@@ -2410,10 +2411,10 @@ static int do_swap_page(struct mm_struct *mm, struct vm_area_struct *vma,
2410{ 2411{
2411 spinlock_t *ptl; 2412 spinlock_t *ptl;
2412 struct page *page, *swapcache; 2413 struct page *page, *swapcache;
2414 struct mem_cgroup *memcg;
2413 swp_entry_t entry; 2415 swp_entry_t entry;
2414 pte_t pte; 2416 pte_t pte;
2415 int locked; 2417 int locked;
2416 struct mem_cgroup *ptr;
2417 int exclusive = 0; 2418 int exclusive = 0;
2418 int ret = 0; 2419 int ret = 0;
2419 2420
@@ -2489,7 +2490,7 @@ static int do_swap_page(struct mm_struct *mm, struct vm_area_struct *vma,
2489 goto out_page; 2490 goto out_page;
2490 } 2491 }
2491 2492
2492 if (mem_cgroup_try_charge_swapin(mm, page, GFP_KERNEL, &ptr)) { 2493 if (mem_cgroup_try_charge(page, mm, GFP_KERNEL, &memcg)) {
2493 ret = VM_FAULT_OOM; 2494 ret = VM_FAULT_OOM;
2494 goto out_page; 2495 goto out_page;
2495 } 2496 }
@@ -2514,10 +2515,6 @@ static int do_swap_page(struct mm_struct *mm, struct vm_area_struct *vma,
2514 * while the page is counted on swap but not yet in mapcount i.e. 2515 * while the page is counted on swap but not yet in mapcount i.e.
2515 * before page_add_anon_rmap() and swap_free(); try_to_free_swap() 2516 * before page_add_anon_rmap() and swap_free(); try_to_free_swap()
2516 * must be called after the swap_free(), or it will never succeed. 2517 * must be called after the swap_free(), or it will never succeed.
2517 * Because delete_from_swap_page() may be called by reuse_swap_page(),
2518 * mem_cgroup_commit_charge_swapin() may not be able to find swp_entry
2519 * in page->private. In this case, a record in swap_cgroup is silently
2520 * discarded at swap_free().
2521 */ 2518 */
2522 2519
2523 inc_mm_counter_fast(mm, MM_ANONPAGES); 2520 inc_mm_counter_fast(mm, MM_ANONPAGES);
@@ -2533,12 +2530,14 @@ static int do_swap_page(struct mm_struct *mm, struct vm_area_struct *vma,
2533 if (pte_swp_soft_dirty(orig_pte)) 2530 if (pte_swp_soft_dirty(orig_pte))
2534 pte = pte_mksoft_dirty(pte); 2531 pte = pte_mksoft_dirty(pte);
2535 set_pte_at(mm, address, page_table, pte); 2532 set_pte_at(mm, address, page_table, pte);
2536 if (page == swapcache) 2533 if (page == swapcache) {
2537 do_page_add_anon_rmap(page, vma, address, exclusive); 2534 do_page_add_anon_rmap(page, vma, address, exclusive);
2538 else /* ksm created a completely new copy */ 2535 mem_cgroup_commit_charge(page, memcg, true);
2536 } else { /* ksm created a completely new copy */
2539 page_add_new_anon_rmap(page, vma, address); 2537 page_add_new_anon_rmap(page, vma, address);
2540 /* It's better to call commit-charge after rmap is established */ 2538 mem_cgroup_commit_charge(page, memcg, false);
2541 mem_cgroup_commit_charge_swapin(page, ptr); 2539 lru_cache_add_active_or_unevictable(page, vma);
2540 }
2542 2541
2543 swap_free(entry); 2542 swap_free(entry);
2544 if (vm_swap_full() || (vma->vm_flags & VM_LOCKED) || PageMlocked(page)) 2543 if (vm_swap_full() || (vma->vm_flags & VM_LOCKED) || PageMlocked(page))
@@ -2571,7 +2570,7 @@ unlock:
2571out: 2570out:
2572 return ret; 2571 return ret;
2573out_nomap: 2572out_nomap:
2574 mem_cgroup_cancel_charge_swapin(ptr); 2573 mem_cgroup_cancel_charge(page, memcg);
2575 pte_unmap_unlock(page_table, ptl); 2574 pte_unmap_unlock(page_table, ptl);
2576out_page: 2575out_page:
2577 unlock_page(page); 2576 unlock_page(page);
@@ -2627,6 +2626,7 @@ static int do_anonymous_page(struct mm_struct *mm, struct vm_area_struct *vma,
2627 unsigned long address, pte_t *page_table, pmd_t *pmd, 2626 unsigned long address, pte_t *page_table, pmd_t *pmd,
2628 unsigned int flags) 2627 unsigned int flags)
2629{ 2628{
2629 struct mem_cgroup *memcg;
2630 struct page *page; 2630 struct page *page;
2631 spinlock_t *ptl; 2631 spinlock_t *ptl;
2632 pte_t entry; 2632 pte_t entry;
@@ -2660,7 +2660,7 @@ static int do_anonymous_page(struct mm_struct *mm, struct vm_area_struct *vma,
2660 */ 2660 */
2661 __SetPageUptodate(page); 2661 __SetPageUptodate(page);
2662 2662
2663 if (mem_cgroup_charge_anon(page, mm, GFP_KERNEL)) 2663 if (mem_cgroup_try_charge(page, mm, GFP_KERNEL, &memcg))
2664 goto oom_free_page; 2664 goto oom_free_page;
2665 2665
2666 entry = mk_pte(page, vma->vm_page_prot); 2666 entry = mk_pte(page, vma->vm_page_prot);
@@ -2673,6 +2673,8 @@ static int do_anonymous_page(struct mm_struct *mm, struct vm_area_struct *vma,
2673 2673
2674 inc_mm_counter_fast(mm, MM_ANONPAGES); 2674 inc_mm_counter_fast(mm, MM_ANONPAGES);
2675 page_add_new_anon_rmap(page, vma, address); 2675 page_add_new_anon_rmap(page, vma, address);
2676 mem_cgroup_commit_charge(page, memcg, false);
2677 lru_cache_add_active_or_unevictable(page, vma);
2676setpte: 2678setpte:
2677 set_pte_at(mm, address, page_table, entry); 2679 set_pte_at(mm, address, page_table, entry);
2678 2680
@@ -2682,7 +2684,7 @@ unlock:
2682 pte_unmap_unlock(page_table, ptl); 2684 pte_unmap_unlock(page_table, ptl);
2683 return 0; 2685 return 0;
2684release: 2686release:
2685 mem_cgroup_uncharge_page(page); 2687 mem_cgroup_cancel_charge(page, memcg);
2686 page_cache_release(page); 2688 page_cache_release(page);
2687 goto unlock; 2689 goto unlock;
2688oom_free_page: 2690oom_free_page:
@@ -2919,6 +2921,7 @@ static int do_cow_fault(struct mm_struct *mm, struct vm_area_struct *vma,
2919 pgoff_t pgoff, unsigned int flags, pte_t orig_pte) 2921 pgoff_t pgoff, unsigned int flags, pte_t orig_pte)
2920{ 2922{
2921 struct page *fault_page, *new_page; 2923 struct page *fault_page, *new_page;
2924 struct mem_cgroup *memcg;
2922 spinlock_t *ptl; 2925 spinlock_t *ptl;
2923 pte_t *pte; 2926 pte_t *pte;
2924 int ret; 2927 int ret;
@@ -2930,7 +2933,7 @@ static int do_cow_fault(struct mm_struct *mm, struct vm_area_struct *vma,
2930 if (!new_page) 2933 if (!new_page)
2931 return VM_FAULT_OOM; 2934 return VM_FAULT_OOM;
2932 2935
2933 if (mem_cgroup_charge_anon(new_page, mm, GFP_KERNEL)) { 2936 if (mem_cgroup_try_charge(new_page, mm, GFP_KERNEL, &memcg)) {
2934 page_cache_release(new_page); 2937 page_cache_release(new_page);
2935 return VM_FAULT_OOM; 2938 return VM_FAULT_OOM;
2936 } 2939 }
@@ -2950,12 +2953,14 @@ static int do_cow_fault(struct mm_struct *mm, struct vm_area_struct *vma,
2950 goto uncharge_out; 2953 goto uncharge_out;
2951 } 2954 }
2952 do_set_pte(vma, address, new_page, pte, true, true); 2955 do_set_pte(vma, address, new_page, pte, true, true);
2956 mem_cgroup_commit_charge(new_page, memcg, false);
2957 lru_cache_add_active_or_unevictable(new_page, vma);
2953 pte_unmap_unlock(pte, ptl); 2958 pte_unmap_unlock(pte, ptl);
2954 unlock_page(fault_page); 2959 unlock_page(fault_page);
2955 page_cache_release(fault_page); 2960 page_cache_release(fault_page);
2956 return ret; 2961 return ret;
2957uncharge_out: 2962uncharge_out:
2958 mem_cgroup_uncharge_page(new_page); 2963 mem_cgroup_cancel_charge(new_page, memcg);
2959 page_cache_release(new_page); 2964 page_cache_release(new_page);
2960 return ret; 2965 return ret;
2961} 2966}
@@ -3425,44 +3430,6 @@ int __pmd_alloc(struct mm_struct *mm, pud_t *pud, unsigned long address)
3425} 3430}
3426#endif /* __PAGETABLE_PMD_FOLDED */ 3431#endif /* __PAGETABLE_PMD_FOLDED */
3427 3432
3428#if !defined(__HAVE_ARCH_GATE_AREA)
3429
3430#if defined(AT_SYSINFO_EHDR)
3431static struct vm_area_struct gate_vma;
3432
3433static int __init gate_vma_init(void)
3434{
3435 gate_vma.vm_mm = NULL;
3436 gate_vma.vm_start = FIXADDR_USER_START;
3437 gate_vma.vm_end = FIXADDR_USER_END;
3438 gate_vma.vm_flags = VM_READ | VM_MAYREAD | VM_EXEC | VM_MAYEXEC;
3439 gate_vma.vm_page_prot = __P101;
3440
3441 return 0;
3442}
3443__initcall(gate_vma_init);
3444#endif
3445
3446struct vm_area_struct *get_gate_vma(struct mm_struct *mm)
3447{
3448#ifdef AT_SYSINFO_EHDR
3449 return &gate_vma;
3450#else
3451 return NULL;
3452#endif
3453}
3454
3455int in_gate_area_no_mm(unsigned long addr)
3456{
3457#ifdef AT_SYSINFO_EHDR
3458 if ((addr >= FIXADDR_USER_START) && (addr < FIXADDR_USER_END))
3459 return 1;
3460#endif
3461 return 0;
3462}
3463
3464#endif /* __HAVE_ARCH_GATE_AREA */
3465
3466static int __follow_pte(struct mm_struct *mm, unsigned long address, 3433static int __follow_pte(struct mm_struct *mm, unsigned long address,
3467 pte_t **ptepp, spinlock_t **ptlp) 3434 pte_t **ptepp, spinlock_t **ptlp)
3468{ 3435{
diff --git a/mm/migrate.c b/mm/migrate.c
index be6dbf995c0c..f78ec9bd454d 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -780,6 +780,7 @@ static int move_to_new_page(struct page *newpage, struct page *page,
780 if (rc != MIGRATEPAGE_SUCCESS) { 780 if (rc != MIGRATEPAGE_SUCCESS) {
781 newpage->mapping = NULL; 781 newpage->mapping = NULL;
782 } else { 782 } else {
783 mem_cgroup_migrate(page, newpage, false);
783 if (remap_swapcache) 784 if (remap_swapcache)
784 remove_migration_ptes(page, newpage); 785 remove_migration_ptes(page, newpage);
785 page->mapping = NULL; 786 page->mapping = NULL;
@@ -795,7 +796,6 @@ static int __unmap_and_move(struct page *page, struct page *newpage,
795{ 796{
796 int rc = -EAGAIN; 797 int rc = -EAGAIN;
797 int remap_swapcache = 1; 798 int remap_swapcache = 1;
798 struct mem_cgroup *mem;
799 struct anon_vma *anon_vma = NULL; 799 struct anon_vma *anon_vma = NULL;
800 800
801 if (!trylock_page(page)) { 801 if (!trylock_page(page)) {
@@ -821,9 +821,6 @@ static int __unmap_and_move(struct page *page, struct page *newpage,
821 lock_page(page); 821 lock_page(page);
822 } 822 }
823 823
824 /* charge against new page */
825 mem_cgroup_prepare_migration(page, newpage, &mem);
826
827 if (PageWriteback(page)) { 824 if (PageWriteback(page)) {
828 /* 825 /*
829 * Only in the case of a full synchronous migration is it 826 * Only in the case of a full synchronous migration is it
@@ -833,10 +830,10 @@ static int __unmap_and_move(struct page *page, struct page *newpage,
833 */ 830 */
834 if (mode != MIGRATE_SYNC) { 831 if (mode != MIGRATE_SYNC) {
835 rc = -EBUSY; 832 rc = -EBUSY;
836 goto uncharge; 833 goto out_unlock;
837 } 834 }
838 if (!force) 835 if (!force)
839 goto uncharge; 836 goto out_unlock;
840 wait_on_page_writeback(page); 837 wait_on_page_writeback(page);
841 } 838 }
842 /* 839 /*
@@ -872,7 +869,7 @@ static int __unmap_and_move(struct page *page, struct page *newpage,
872 */ 869 */
873 remap_swapcache = 0; 870 remap_swapcache = 0;
874 } else { 871 } else {
875 goto uncharge; 872 goto out_unlock;
876 } 873 }
877 } 874 }
878 875
@@ -885,7 +882,7 @@ static int __unmap_and_move(struct page *page, struct page *newpage,
885 * the page migration right away (proteced by page lock). 882 * the page migration right away (proteced by page lock).
886 */ 883 */
887 rc = balloon_page_migrate(newpage, page, mode); 884 rc = balloon_page_migrate(newpage, page, mode);
888 goto uncharge; 885 goto out_unlock;
889 } 886 }
890 887
891 /* 888 /*
@@ -904,7 +901,7 @@ static int __unmap_and_move(struct page *page, struct page *newpage,
904 VM_BUG_ON_PAGE(PageAnon(page), page); 901 VM_BUG_ON_PAGE(PageAnon(page), page);
905 if (page_has_private(page)) { 902 if (page_has_private(page)) {
906 try_to_free_buffers(page); 903 try_to_free_buffers(page);
907 goto uncharge; 904 goto out_unlock;
908 } 905 }
909 goto skip_unmap; 906 goto skip_unmap;
910 } 907 }
@@ -923,10 +920,7 @@ skip_unmap:
923 if (anon_vma) 920 if (anon_vma)
924 put_anon_vma(anon_vma); 921 put_anon_vma(anon_vma);
925 922
926uncharge: 923out_unlock:
927 mem_cgroup_end_migration(mem, page, newpage,
928 (rc == MIGRATEPAGE_SUCCESS ||
929 rc == MIGRATEPAGE_BALLOON_SUCCESS));
930 unlock_page(page); 924 unlock_page(page);
931out: 925out:
932 return rc; 926 return rc;
@@ -1786,7 +1780,6 @@ int migrate_misplaced_transhuge_page(struct mm_struct *mm,
1786 pg_data_t *pgdat = NODE_DATA(node); 1780 pg_data_t *pgdat = NODE_DATA(node);
1787 int isolated = 0; 1781 int isolated = 0;
1788 struct page *new_page = NULL; 1782 struct page *new_page = NULL;
1789 struct mem_cgroup *memcg = NULL;
1790 int page_lru = page_is_file_cache(page); 1783 int page_lru = page_is_file_cache(page);
1791 unsigned long mmun_start = address & HPAGE_PMD_MASK; 1784 unsigned long mmun_start = address & HPAGE_PMD_MASK;
1792 unsigned long mmun_end = mmun_start + HPAGE_PMD_SIZE; 1785 unsigned long mmun_end = mmun_start + HPAGE_PMD_SIZE;
@@ -1852,15 +1845,6 @@ fail_putback:
1852 goto out_unlock; 1845 goto out_unlock;
1853 } 1846 }
1854 1847
1855 /*
1856 * Traditional migration needs to prepare the memcg charge
1857 * transaction early to prevent the old page from being
1858 * uncharged when installing migration entries. Here we can
1859 * save the potential rollback and start the charge transfer
1860 * only when migration is already known to end successfully.
1861 */
1862 mem_cgroup_prepare_migration(page, new_page, &memcg);
1863
1864 orig_entry = *pmd; 1848 orig_entry = *pmd;
1865 entry = mk_pmd(new_page, vma->vm_page_prot); 1849 entry = mk_pmd(new_page, vma->vm_page_prot);
1866 entry = pmd_mkhuge(entry); 1850 entry = pmd_mkhuge(entry);
@@ -1888,14 +1872,10 @@ fail_putback:
1888 goto fail_putback; 1872 goto fail_putback;
1889 } 1873 }
1890 1874
1875 mem_cgroup_migrate(page, new_page, false);
1876
1891 page_remove_rmap(page); 1877 page_remove_rmap(page);
1892 1878
1893 /*
1894 * Finish the charge transaction under the page table lock to
1895 * prevent split_huge_page() from dividing up the charge
1896 * before it's fully transferred to the new page.
1897 */
1898 mem_cgroup_end_migration(memcg, page, new_page, true);
1899 spin_unlock(ptl); 1879 spin_unlock(ptl);
1900 mmu_notifier_invalidate_range_end(mm, mmun_start, mmun_end); 1880 mmu_notifier_invalidate_range_end(mm, mmun_start, mmun_end);
1901 1881
diff --git a/mm/mmap.c b/mm/mmap.c
index 64c9d736155c..c1f2ea4a0b99 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -221,7 +221,7 @@ static void __remove_shared_vm_struct(struct vm_area_struct *vma,
221 if (vma->vm_flags & VM_DENYWRITE) 221 if (vma->vm_flags & VM_DENYWRITE)
222 atomic_inc(&file_inode(file)->i_writecount); 222 atomic_inc(&file_inode(file)->i_writecount);
223 if (vma->vm_flags & VM_SHARED) 223 if (vma->vm_flags & VM_SHARED)
224 mapping->i_mmap_writable--; 224 mapping_unmap_writable(mapping);
225 225
226 flush_dcache_mmap_lock(mapping); 226 flush_dcache_mmap_lock(mapping);
227 if (unlikely(vma->vm_flags & VM_NONLINEAR)) 227 if (unlikely(vma->vm_flags & VM_NONLINEAR))
@@ -622,7 +622,7 @@ static void __vma_link_file(struct vm_area_struct *vma)
622 if (vma->vm_flags & VM_DENYWRITE) 622 if (vma->vm_flags & VM_DENYWRITE)
623 atomic_dec(&file_inode(file)->i_writecount); 623 atomic_dec(&file_inode(file)->i_writecount);
624 if (vma->vm_flags & VM_SHARED) 624 if (vma->vm_flags & VM_SHARED)
625 mapping->i_mmap_writable++; 625 atomic_inc(&mapping->i_mmap_writable);
626 626
627 flush_dcache_mmap_lock(mapping); 627 flush_dcache_mmap_lock(mapping);
628 if (unlikely(vma->vm_flags & VM_NONLINEAR)) 628 if (unlikely(vma->vm_flags & VM_NONLINEAR))
@@ -1577,6 +1577,17 @@ munmap_back:
1577 if (error) 1577 if (error)
1578 goto free_vma; 1578 goto free_vma;
1579 } 1579 }
1580 if (vm_flags & VM_SHARED) {
1581 error = mapping_map_writable(file->f_mapping);
1582 if (error)
1583 goto allow_write_and_free_vma;
1584 }
1585
1586 /* ->mmap() can change vma->vm_file, but must guarantee that
1587 * vma_link() below can deny write-access if VM_DENYWRITE is set
1588 * and map writably if VM_SHARED is set. This usually means the
1589 * new file must not have been exposed to user-space, yet.
1590 */
1580 vma->vm_file = get_file(file); 1591 vma->vm_file = get_file(file);
1581 error = file->f_op->mmap(file, vma); 1592 error = file->f_op->mmap(file, vma);
1582 if (error) 1593 if (error)
@@ -1616,8 +1627,12 @@ munmap_back:
1616 1627
1617 vma_link(mm, vma, prev, rb_link, rb_parent); 1628 vma_link(mm, vma, prev, rb_link, rb_parent);
1618 /* Once vma denies write, undo our temporary denial count */ 1629 /* Once vma denies write, undo our temporary denial count */
1619 if (vm_flags & VM_DENYWRITE) 1630 if (file) {
1620 allow_write_access(file); 1631 if (vm_flags & VM_SHARED)
1632 mapping_unmap_writable(file->f_mapping);
1633 if (vm_flags & VM_DENYWRITE)
1634 allow_write_access(file);
1635 }
1621 file = vma->vm_file; 1636 file = vma->vm_file;
1622out: 1637out:
1623 perf_event_mmap(vma); 1638 perf_event_mmap(vma);
@@ -1646,14 +1661,17 @@ out:
1646 return addr; 1661 return addr;
1647 1662
1648unmap_and_free_vma: 1663unmap_and_free_vma:
1649 if (vm_flags & VM_DENYWRITE)
1650 allow_write_access(file);
1651 vma->vm_file = NULL; 1664 vma->vm_file = NULL;
1652 fput(file); 1665 fput(file);
1653 1666
1654 /* Undo any partial mapping done by a device driver. */ 1667 /* Undo any partial mapping done by a device driver. */
1655 unmap_region(mm, vma, prev, vma->vm_start, vma->vm_end); 1668 unmap_region(mm, vma, prev, vma->vm_start, vma->vm_end);
1656 charged = 0; 1669 charged = 0;
1670 if (vm_flags & VM_SHARED)
1671 mapping_unmap_writable(file->f_mapping);
1672allow_write_and_free_vma:
1673 if (vm_flags & VM_DENYWRITE)
1674 allow_write_access(file);
1657free_vma: 1675free_vma:
1658 kmem_cache_free(vm_area_cachep, vma); 1676 kmem_cache_free(vm_area_cachep, vma);
1659unacct_error: 1677unacct_error:
diff --git a/mm/nommu.c b/mm/nommu.c
index 4a852f6c5709..a881d9673c6b 100644
--- a/mm/nommu.c
+++ b/mm/nommu.c
@@ -1981,11 +1981,6 @@ error:
1981 return -ENOMEM; 1981 return -ENOMEM;
1982} 1982}
1983 1983
1984int in_gate_area_no_mm(unsigned long addr)
1985{
1986 return 0;
1987}
1988
1989int filemap_fault(struct vm_area_struct *vma, struct vm_fault *vmf) 1984int filemap_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
1990{ 1985{
1991 BUG(); 1986 BUG();
diff --git a/mm/rmap.c b/mm/rmap.c
index 22a4a7699cdb..3e8491c504f8 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -1032,25 +1032,6 @@ void page_add_new_anon_rmap(struct page *page,
1032 __mod_zone_page_state(page_zone(page), NR_ANON_PAGES, 1032 __mod_zone_page_state(page_zone(page), NR_ANON_PAGES,
1033 hpage_nr_pages(page)); 1033 hpage_nr_pages(page));
1034 __page_set_anon_rmap(page, vma, address, 1); 1034 __page_set_anon_rmap(page, vma, address, 1);
1035
1036 VM_BUG_ON_PAGE(PageLRU(page), page);
1037 if (likely((vma->vm_flags & (VM_LOCKED | VM_SPECIAL)) != VM_LOCKED)) {
1038 SetPageActive(page);
1039 lru_cache_add(page);
1040 return;
1041 }
1042
1043 if (!TestSetPageMlocked(page)) {
1044 /*
1045 * We use the irq-unsafe __mod_zone_page_stat because this
1046 * counter is not modified from interrupt context, and the pte
1047 * lock is held(spinlock), which implies preemption disabled.
1048 */
1049 __mod_zone_page_state(page_zone(page), NR_MLOCK,
1050 hpage_nr_pages(page));
1051 count_vm_event(UNEVICTABLE_PGMLOCKED);
1052 }
1053 add_page_to_unevictable_list(page);
1054} 1035}
1055 1036
1056/** 1037/**
@@ -1108,7 +1089,6 @@ void page_remove_rmap(struct page *page)
1108 if (unlikely(PageHuge(page))) 1089 if (unlikely(PageHuge(page)))
1109 goto out; 1090 goto out;
1110 if (anon) { 1091 if (anon) {
1111 mem_cgroup_uncharge_page(page);
1112 if (PageTransHuge(page)) 1092 if (PageTransHuge(page))
1113 __dec_zone_page_state(page, 1093 __dec_zone_page_state(page,
1114 NR_ANON_TRANSPARENT_HUGEPAGES); 1094 NR_ANON_TRANSPARENT_HUGEPAGES);
diff --git a/mm/shmem.c b/mm/shmem.c
index 302d1cf7ad07..a42add14331c 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -66,6 +66,9 @@ static struct vfsmount *shm_mnt;
66#include <linux/highmem.h> 66#include <linux/highmem.h>
67#include <linux/seq_file.h> 67#include <linux/seq_file.h>
68#include <linux/magic.h> 68#include <linux/magic.h>
69#include <linux/syscalls.h>
70#include <linux/fcntl.h>
71#include <uapi/linux/memfd.h>
69 72
70#include <asm/uaccess.h> 73#include <asm/uaccess.h>
71#include <asm/pgtable.h> 74#include <asm/pgtable.h>
@@ -419,7 +422,6 @@ static void shmem_undo_range(struct inode *inode, loff_t lstart, loff_t lend,
419 pvec.pages, indices); 422 pvec.pages, indices);
420 if (!pvec.nr) 423 if (!pvec.nr)
421 break; 424 break;
422 mem_cgroup_uncharge_start();
423 for (i = 0; i < pagevec_count(&pvec); i++) { 425 for (i = 0; i < pagevec_count(&pvec); i++) {
424 struct page *page = pvec.pages[i]; 426 struct page *page = pvec.pages[i];
425 427
@@ -447,7 +449,6 @@ static void shmem_undo_range(struct inode *inode, loff_t lstart, loff_t lend,
447 } 449 }
448 pagevec_remove_exceptionals(&pvec); 450 pagevec_remove_exceptionals(&pvec);
449 pagevec_release(&pvec); 451 pagevec_release(&pvec);
450 mem_cgroup_uncharge_end();
451 cond_resched(); 452 cond_resched();
452 index++; 453 index++;
453 } 454 }
@@ -495,7 +496,6 @@ static void shmem_undo_range(struct inode *inode, loff_t lstart, loff_t lend,
495 index = start; 496 index = start;
496 continue; 497 continue;
497 } 498 }
498 mem_cgroup_uncharge_start();
499 for (i = 0; i < pagevec_count(&pvec); i++) { 499 for (i = 0; i < pagevec_count(&pvec); i++) {
500 struct page *page = pvec.pages[i]; 500 struct page *page = pvec.pages[i];
501 501
@@ -531,7 +531,6 @@ static void shmem_undo_range(struct inode *inode, loff_t lstart, loff_t lend,
531 } 531 }
532 pagevec_remove_exceptionals(&pvec); 532 pagevec_remove_exceptionals(&pvec);
533 pagevec_release(&pvec); 533 pagevec_release(&pvec);
534 mem_cgroup_uncharge_end();
535 index++; 534 index++;
536 } 535 }
537 536
@@ -551,6 +550,7 @@ EXPORT_SYMBOL_GPL(shmem_truncate_range);
551static int shmem_setattr(struct dentry *dentry, struct iattr *attr) 550static int shmem_setattr(struct dentry *dentry, struct iattr *attr)
552{ 551{
553 struct inode *inode = dentry->d_inode; 552 struct inode *inode = dentry->d_inode;
553 struct shmem_inode_info *info = SHMEM_I(inode);
554 int error; 554 int error;
555 555
556 error = inode_change_ok(inode, attr); 556 error = inode_change_ok(inode, attr);
@@ -561,6 +561,11 @@ static int shmem_setattr(struct dentry *dentry, struct iattr *attr)
561 loff_t oldsize = inode->i_size; 561 loff_t oldsize = inode->i_size;
562 loff_t newsize = attr->ia_size; 562 loff_t newsize = attr->ia_size;
563 563
564 /* protected by i_mutex */
565 if ((newsize < oldsize && (info->seals & F_SEAL_SHRINK)) ||
566 (newsize > oldsize && (info->seals & F_SEAL_GROW)))
567 return -EPERM;
568
564 if (newsize != oldsize) { 569 if (newsize != oldsize) {
565 error = shmem_reacct_size(SHMEM_I(inode)->flags, 570 error = shmem_reacct_size(SHMEM_I(inode)->flags,
566 oldsize, newsize); 571 oldsize, newsize);
@@ -621,7 +626,7 @@ static int shmem_unuse_inode(struct shmem_inode_info *info,
621 radswap = swp_to_radix_entry(swap); 626 radswap = swp_to_radix_entry(swap);
622 index = radix_tree_locate_item(&mapping->page_tree, radswap); 627 index = radix_tree_locate_item(&mapping->page_tree, radswap);
623 if (index == -1) 628 if (index == -1)
624 return 0; 629 return -EAGAIN; /* tell shmem_unuse we found nothing */
625 630
626 /* 631 /*
627 * Move _head_ to start search for next from here. 632 * Move _head_ to start search for next from here.
@@ -680,7 +685,6 @@ static int shmem_unuse_inode(struct shmem_inode_info *info,
680 spin_unlock(&info->lock); 685 spin_unlock(&info->lock);
681 swap_free(swap); 686 swap_free(swap);
682 } 687 }
683 error = 1; /* not an error, but entry was found */
684 } 688 }
685 return error; 689 return error;
686} 690}
@@ -692,7 +696,7 @@ int shmem_unuse(swp_entry_t swap, struct page *page)
692{ 696{
693 struct list_head *this, *next; 697 struct list_head *this, *next;
694 struct shmem_inode_info *info; 698 struct shmem_inode_info *info;
695 int found = 0; 699 struct mem_cgroup *memcg;
696 int error = 0; 700 int error = 0;
697 701
698 /* 702 /*
@@ -707,26 +711,32 @@ int shmem_unuse(swp_entry_t swap, struct page *page)
707 * the shmem_swaplist_mutex which might hold up shmem_writepage(). 711 * the shmem_swaplist_mutex which might hold up shmem_writepage().
708 * Charged back to the user (not to caller) when swap account is used. 712 * Charged back to the user (not to caller) when swap account is used.
709 */ 713 */
710 error = mem_cgroup_charge_file(page, current->mm, GFP_KERNEL); 714 error = mem_cgroup_try_charge(page, current->mm, GFP_KERNEL, &memcg);
711 if (error) 715 if (error)
712 goto out; 716 goto out;
713 /* No radix_tree_preload: swap entry keeps a place for page in tree */ 717 /* No radix_tree_preload: swap entry keeps a place for page in tree */
718 error = -EAGAIN;
714 719
715 mutex_lock(&shmem_swaplist_mutex); 720 mutex_lock(&shmem_swaplist_mutex);
716 list_for_each_safe(this, next, &shmem_swaplist) { 721 list_for_each_safe(this, next, &shmem_swaplist) {
717 info = list_entry(this, struct shmem_inode_info, swaplist); 722 info = list_entry(this, struct shmem_inode_info, swaplist);
718 if (info->swapped) 723 if (info->swapped)
719 found = shmem_unuse_inode(info, swap, &page); 724 error = shmem_unuse_inode(info, swap, &page);
720 else 725 else
721 list_del_init(&info->swaplist); 726 list_del_init(&info->swaplist);
722 cond_resched(); 727 cond_resched();
723 if (found) 728 if (error != -EAGAIN)
724 break; 729 break;
730 /* found nothing in this: move on to search the next */
725 } 731 }
726 mutex_unlock(&shmem_swaplist_mutex); 732 mutex_unlock(&shmem_swaplist_mutex);
727 733
728 if (found < 0) 734 if (error) {
729 error = found; 735 if (error != -ENOMEM)
736 error = 0;
737 mem_cgroup_cancel_charge(page, memcg);
738 } else
739 mem_cgroup_commit_charge(page, memcg, true);
730out: 740out:
731 unlock_page(page); 741 unlock_page(page);
732 page_cache_release(page); 742 page_cache_release(page);
@@ -830,7 +840,7 @@ static int shmem_writepage(struct page *page, struct writeback_control *wbc)
830 } 840 }
831 841
832 mutex_unlock(&shmem_swaplist_mutex); 842 mutex_unlock(&shmem_swaplist_mutex);
833 swapcache_free(swap, NULL); 843 swapcache_free(swap);
834redirty: 844redirty:
835 set_page_dirty(page); 845 set_page_dirty(page);
836 if (wbc->for_reclaim) 846 if (wbc->for_reclaim)
@@ -1003,7 +1013,7 @@ static int shmem_replace_page(struct page **pagep, gfp_t gfp,
1003 */ 1013 */
1004 oldpage = newpage; 1014 oldpage = newpage;
1005 } else { 1015 } else {
1006 mem_cgroup_replace_page_cache(oldpage, newpage); 1016 mem_cgroup_migrate(oldpage, newpage, false);
1007 lru_cache_add_anon(newpage); 1017 lru_cache_add_anon(newpage);
1008 *pagep = newpage; 1018 *pagep = newpage;
1009 } 1019 }
@@ -1030,6 +1040,7 @@ static int shmem_getpage_gfp(struct inode *inode, pgoff_t index,
1030 struct address_space *mapping = inode->i_mapping; 1040 struct address_space *mapping = inode->i_mapping;
1031 struct shmem_inode_info *info; 1041 struct shmem_inode_info *info;
1032 struct shmem_sb_info *sbinfo; 1042 struct shmem_sb_info *sbinfo;
1043 struct mem_cgroup *memcg;
1033 struct page *page; 1044 struct page *page;
1034 swp_entry_t swap; 1045 swp_entry_t swap;
1035 int error; 1046 int error;
@@ -1108,8 +1119,7 @@ repeat:
1108 goto failed; 1119 goto failed;
1109 } 1120 }
1110 1121
1111 error = mem_cgroup_charge_file(page, current->mm, 1122 error = mem_cgroup_try_charge(page, current->mm, gfp, &memcg);
1112 gfp & GFP_RECLAIM_MASK);
1113 if (!error) { 1123 if (!error) {
1114 error = shmem_add_to_page_cache(page, mapping, index, 1124 error = shmem_add_to_page_cache(page, mapping, index,
1115 swp_to_radix_entry(swap)); 1125 swp_to_radix_entry(swap));
@@ -1125,12 +1135,16 @@ repeat:
1125 * Reset swap.val? No, leave it so "failed" goes back to 1135 * Reset swap.val? No, leave it so "failed" goes back to
1126 * "repeat": reading a hole and writing should succeed. 1136 * "repeat": reading a hole and writing should succeed.
1127 */ 1137 */
1128 if (error) 1138 if (error) {
1139 mem_cgroup_cancel_charge(page, memcg);
1129 delete_from_swap_cache(page); 1140 delete_from_swap_cache(page);
1141 }
1130 } 1142 }
1131 if (error) 1143 if (error)
1132 goto failed; 1144 goto failed;
1133 1145
1146 mem_cgroup_commit_charge(page, memcg, true);
1147
1134 spin_lock(&info->lock); 1148 spin_lock(&info->lock);
1135 info->swapped--; 1149 info->swapped--;
1136 shmem_recalc_inode(inode); 1150 shmem_recalc_inode(inode);
@@ -1168,8 +1182,7 @@ repeat:
1168 if (sgp == SGP_WRITE) 1182 if (sgp == SGP_WRITE)
1169 __SetPageReferenced(page); 1183 __SetPageReferenced(page);
1170 1184
1171 error = mem_cgroup_charge_file(page, current->mm, 1185 error = mem_cgroup_try_charge(page, current->mm, gfp, &memcg);
1172 gfp & GFP_RECLAIM_MASK);
1173 if (error) 1186 if (error)
1174 goto decused; 1187 goto decused;
1175 error = radix_tree_maybe_preload(gfp & GFP_RECLAIM_MASK); 1188 error = radix_tree_maybe_preload(gfp & GFP_RECLAIM_MASK);
@@ -1179,9 +1192,10 @@ repeat:
1179 radix_tree_preload_end(); 1192 radix_tree_preload_end();
1180 } 1193 }
1181 if (error) { 1194 if (error) {
1182 mem_cgroup_uncharge_cache_page(page); 1195 mem_cgroup_cancel_charge(page, memcg);
1183 goto decused; 1196 goto decused;
1184 } 1197 }
1198 mem_cgroup_commit_charge(page, memcg, false);
1185 lru_cache_add_anon(page); 1199 lru_cache_add_anon(page);
1186 1200
1187 spin_lock(&info->lock); 1201 spin_lock(&info->lock);
@@ -1407,6 +1421,7 @@ static struct inode *shmem_get_inode(struct super_block *sb, const struct inode
1407 info = SHMEM_I(inode); 1421 info = SHMEM_I(inode);
1408 memset(info, 0, (char *)inode - (char *)info); 1422 memset(info, 0, (char *)inode - (char *)info);
1409 spin_lock_init(&info->lock); 1423 spin_lock_init(&info->lock);
1424 info->seals = F_SEAL_SEAL;
1410 info->flags = flags & VM_NORESERVE; 1425 info->flags = flags & VM_NORESERVE;
1411 INIT_LIST_HEAD(&info->swaplist); 1426 INIT_LIST_HEAD(&info->swaplist);
1412 simple_xattrs_init(&info->xattrs); 1427 simple_xattrs_init(&info->xattrs);
@@ -1465,7 +1480,17 @@ shmem_write_begin(struct file *file, struct address_space *mapping,
1465 struct page **pagep, void **fsdata) 1480 struct page **pagep, void **fsdata)
1466{ 1481{
1467 struct inode *inode = mapping->host; 1482 struct inode *inode = mapping->host;
1483 struct shmem_inode_info *info = SHMEM_I(inode);
1468 pgoff_t index = pos >> PAGE_CACHE_SHIFT; 1484 pgoff_t index = pos >> PAGE_CACHE_SHIFT;
1485
1486 /* i_mutex is held by caller */
1487 if (unlikely(info->seals)) {
1488 if (info->seals & F_SEAL_WRITE)
1489 return -EPERM;
1490 if ((info->seals & F_SEAL_GROW) && pos + len > inode->i_size)
1491 return -EPERM;
1492 }
1493
1469 return shmem_getpage(inode, index, pagep, SGP_WRITE, NULL); 1494 return shmem_getpage(inode, index, pagep, SGP_WRITE, NULL);
1470} 1495}
1471 1496
@@ -1803,11 +1828,233 @@ static loff_t shmem_file_llseek(struct file *file, loff_t offset, int whence)
1803 return offset; 1828 return offset;
1804} 1829}
1805 1830
1831/*
1832 * We need a tag: a new tag would expand every radix_tree_node by 8 bytes,
1833 * so reuse a tag which we firmly believe is never set or cleared on shmem.
1834 */
1835#define SHMEM_TAG_PINNED PAGECACHE_TAG_TOWRITE
1836#define LAST_SCAN 4 /* about 150ms max */
1837
1838static void shmem_tag_pins(struct address_space *mapping)
1839{
1840 struct radix_tree_iter iter;
1841 void **slot;
1842 pgoff_t start;
1843 struct page *page;
1844
1845 lru_add_drain();
1846 start = 0;
1847 rcu_read_lock();
1848
1849restart:
1850 radix_tree_for_each_slot(slot, &mapping->page_tree, &iter, start) {
1851 page = radix_tree_deref_slot(slot);
1852 if (!page || radix_tree_exception(page)) {
1853 if (radix_tree_deref_retry(page))
1854 goto restart;
1855 } else if (page_count(page) - page_mapcount(page) > 1) {
1856 spin_lock_irq(&mapping->tree_lock);
1857 radix_tree_tag_set(&mapping->page_tree, iter.index,
1858 SHMEM_TAG_PINNED);
1859 spin_unlock_irq(&mapping->tree_lock);
1860 }
1861
1862 if (need_resched()) {
1863 cond_resched_rcu();
1864 start = iter.index + 1;
1865 goto restart;
1866 }
1867 }
1868 rcu_read_unlock();
1869}
1870
1871/*
1872 * Setting SEAL_WRITE requires us to verify there's no pending writer. However,
1873 * via get_user_pages(), drivers might have some pending I/O without any active
1874 * user-space mappings (eg., direct-IO, AIO). Therefore, we look at all pages
1875 * and see whether it has an elevated ref-count. If so, we tag them and wait for
1876 * them to be dropped.
1877 * The caller must guarantee that no new user will acquire writable references
1878 * to those pages to avoid races.
1879 */
1880static int shmem_wait_for_pins(struct address_space *mapping)
1881{
1882 struct radix_tree_iter iter;
1883 void **slot;
1884 pgoff_t start;
1885 struct page *page;
1886 int error, scan;
1887
1888 shmem_tag_pins(mapping);
1889
1890 error = 0;
1891 for (scan = 0; scan <= LAST_SCAN; scan++) {
1892 if (!radix_tree_tagged(&mapping->page_tree, SHMEM_TAG_PINNED))
1893 break;
1894
1895 if (!scan)
1896 lru_add_drain_all();
1897 else if (schedule_timeout_killable((HZ << scan) / 200))
1898 scan = LAST_SCAN;
1899
1900 start = 0;
1901 rcu_read_lock();
1902restart:
1903 radix_tree_for_each_tagged(slot, &mapping->page_tree, &iter,
1904 start, SHMEM_TAG_PINNED) {
1905
1906 page = radix_tree_deref_slot(slot);
1907 if (radix_tree_exception(page)) {
1908 if (radix_tree_deref_retry(page))
1909 goto restart;
1910
1911 page = NULL;
1912 }
1913
1914 if (page &&
1915 page_count(page) - page_mapcount(page) != 1) {
1916 if (scan < LAST_SCAN)
1917 goto continue_resched;
1918
1919 /*
1920 * On the last scan, we clean up all those tags
1921 * we inserted; but make a note that we still
1922 * found pages pinned.
1923 */
1924 error = -EBUSY;
1925 }
1926
1927 spin_lock_irq(&mapping->tree_lock);
1928 radix_tree_tag_clear(&mapping->page_tree,
1929 iter.index, SHMEM_TAG_PINNED);
1930 spin_unlock_irq(&mapping->tree_lock);
1931continue_resched:
1932 if (need_resched()) {
1933 cond_resched_rcu();
1934 start = iter.index + 1;
1935 goto restart;
1936 }
1937 }
1938 rcu_read_unlock();
1939 }
1940
1941 return error;
1942}
1943
1944#define F_ALL_SEALS (F_SEAL_SEAL | \
1945 F_SEAL_SHRINK | \
1946 F_SEAL_GROW | \
1947 F_SEAL_WRITE)
1948
1949int shmem_add_seals(struct file *file, unsigned int seals)
1950{
1951 struct inode *inode = file_inode(file);
1952 struct shmem_inode_info *info = SHMEM_I(inode);
1953 int error;
1954
1955 /*
1956 * SEALING
1957 * Sealing allows multiple parties to share a shmem-file but restrict
1958 * access to a specific subset of file operations. Seals can only be
1959 * added, but never removed. This way, mutually untrusted parties can
1960 * share common memory regions with a well-defined policy. A malicious
1961 * peer can thus never perform unwanted operations on a shared object.
1962 *
1963 * Seals are only supported on special shmem-files and always affect
1964 * the whole underlying inode. Once a seal is set, it may prevent some
1965 * kinds of access to the file. Currently, the following seals are
1966 * defined:
1967 * SEAL_SEAL: Prevent further seals from being set on this file
1968 * SEAL_SHRINK: Prevent the file from shrinking
1969 * SEAL_GROW: Prevent the file from growing
1970 * SEAL_WRITE: Prevent write access to the file
1971 *
1972 * As we don't require any trust relationship between two parties, we
1973 * must prevent seals from being removed. Therefore, sealing a file
1974 * only adds a given set of seals to the file, it never touches
1975 * existing seals. Furthermore, the "setting seals"-operation can be
1976 * sealed itself, which basically prevents any further seal from being
1977 * added.
1978 *
1979 * Semantics of sealing are only defined on volatile files. Only
1980 * anonymous shmem files support sealing. More importantly, seals are
1981 * never written to disk. Therefore, there's no plan to support it on
1982 * other file types.
1983 */
1984
1985 if (file->f_op != &shmem_file_operations)
1986 return -EINVAL;
1987 if (!(file->f_mode & FMODE_WRITE))
1988 return -EPERM;
1989 if (seals & ~(unsigned int)F_ALL_SEALS)
1990 return -EINVAL;
1991
1992 mutex_lock(&inode->i_mutex);
1993
1994 if (info->seals & F_SEAL_SEAL) {
1995 error = -EPERM;
1996 goto unlock;
1997 }
1998
1999 if ((seals & F_SEAL_WRITE) && !(info->seals & F_SEAL_WRITE)) {
2000 error = mapping_deny_writable(file->f_mapping);
2001 if (error)
2002 goto unlock;
2003
2004 error = shmem_wait_for_pins(file->f_mapping);
2005 if (error) {
2006 mapping_allow_writable(file->f_mapping);
2007 goto unlock;
2008 }
2009 }
2010
2011 info->seals |= seals;
2012 error = 0;
2013
2014unlock:
2015 mutex_unlock(&inode->i_mutex);
2016 return error;
2017}
2018EXPORT_SYMBOL_GPL(shmem_add_seals);
2019
2020int shmem_get_seals(struct file *file)
2021{
2022 if (file->f_op != &shmem_file_operations)
2023 return -EINVAL;
2024
2025 return SHMEM_I(file_inode(file))->seals;
2026}
2027EXPORT_SYMBOL_GPL(shmem_get_seals);
2028
2029long shmem_fcntl(struct file *file, unsigned int cmd, unsigned long arg)
2030{
2031 long error;
2032
2033 switch (cmd) {
2034 case F_ADD_SEALS:
2035 /* disallow upper 32bit */
2036 if (arg > UINT_MAX)
2037 return -EINVAL;
2038
2039 error = shmem_add_seals(file, arg);
2040 break;
2041 case F_GET_SEALS:
2042 error = shmem_get_seals(file);
2043 break;
2044 default:
2045 error = -EINVAL;
2046 break;
2047 }
2048
2049 return error;
2050}
2051
1806static long shmem_fallocate(struct file *file, int mode, loff_t offset, 2052static long shmem_fallocate(struct file *file, int mode, loff_t offset,
1807 loff_t len) 2053 loff_t len)
1808{ 2054{
1809 struct inode *inode = file_inode(file); 2055 struct inode *inode = file_inode(file);
1810 struct shmem_sb_info *sbinfo = SHMEM_SB(inode->i_sb); 2056 struct shmem_sb_info *sbinfo = SHMEM_SB(inode->i_sb);
2057 struct shmem_inode_info *info = SHMEM_I(inode);
1811 struct shmem_falloc shmem_falloc; 2058 struct shmem_falloc shmem_falloc;
1812 pgoff_t start, index, end; 2059 pgoff_t start, index, end;
1813 int error; 2060 int error;
@@ -1823,6 +2070,12 @@ static long shmem_fallocate(struct file *file, int mode, loff_t offset,
1823 loff_t unmap_end = round_down(offset + len, PAGE_SIZE) - 1; 2070 loff_t unmap_end = round_down(offset + len, PAGE_SIZE) - 1;
1824 DECLARE_WAIT_QUEUE_HEAD_ONSTACK(shmem_falloc_waitq); 2071 DECLARE_WAIT_QUEUE_HEAD_ONSTACK(shmem_falloc_waitq);
1825 2072
2073 /* protected by i_mutex */
2074 if (info->seals & F_SEAL_WRITE) {
2075 error = -EPERM;
2076 goto out;
2077 }
2078
1826 shmem_falloc.waitq = &shmem_falloc_waitq; 2079 shmem_falloc.waitq = &shmem_falloc_waitq;
1827 shmem_falloc.start = unmap_start >> PAGE_SHIFT; 2080 shmem_falloc.start = unmap_start >> PAGE_SHIFT;
1828 shmem_falloc.next = (unmap_end + 1) >> PAGE_SHIFT; 2081 shmem_falloc.next = (unmap_end + 1) >> PAGE_SHIFT;
@@ -1849,6 +2102,11 @@ static long shmem_fallocate(struct file *file, int mode, loff_t offset,
1849 if (error) 2102 if (error)
1850 goto out; 2103 goto out;
1851 2104
2105 if ((info->seals & F_SEAL_GROW) && offset + len > inode->i_size) {
2106 error = -EPERM;
2107 goto out;
2108 }
2109
1852 start = offset >> PAGE_CACHE_SHIFT; 2110 start = offset >> PAGE_CACHE_SHIFT;
1853 end = (offset + len + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; 2111 end = (offset + len + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
1854 /* Try to avoid a swapstorm if len is impossible to satisfy */ 2112 /* Try to avoid a swapstorm if len is impossible to satisfy */
@@ -2584,6 +2842,77 @@ static int shmem_show_options(struct seq_file *seq, struct dentry *root)
2584 shmem_show_mpol(seq, sbinfo->mpol); 2842 shmem_show_mpol(seq, sbinfo->mpol);
2585 return 0; 2843 return 0;
2586} 2844}
2845
2846#define MFD_NAME_PREFIX "memfd:"
2847#define MFD_NAME_PREFIX_LEN (sizeof(MFD_NAME_PREFIX) - 1)
2848#define MFD_NAME_MAX_LEN (NAME_MAX - MFD_NAME_PREFIX_LEN)
2849
2850#define MFD_ALL_FLAGS (MFD_CLOEXEC | MFD_ALLOW_SEALING)
2851
2852SYSCALL_DEFINE2(memfd_create,
2853 const char __user *, uname,
2854 unsigned int, flags)
2855{
2856 struct shmem_inode_info *info;
2857 struct file *file;
2858 int fd, error;
2859 char *name;
2860 long len;
2861
2862 if (flags & ~(unsigned int)MFD_ALL_FLAGS)
2863 return -EINVAL;
2864
2865 /* length includes terminating zero */
2866 len = strnlen_user(uname, MFD_NAME_MAX_LEN + 1);
2867 if (len <= 0)
2868 return -EFAULT;
2869 if (len > MFD_NAME_MAX_LEN + 1)
2870 return -EINVAL;
2871
2872 name = kmalloc(len + MFD_NAME_PREFIX_LEN, GFP_TEMPORARY);
2873 if (!name)
2874 return -ENOMEM;
2875
2876 strcpy(name, MFD_NAME_PREFIX);
2877 if (copy_from_user(&name[MFD_NAME_PREFIX_LEN], uname, len)) {
2878 error = -EFAULT;
2879 goto err_name;
2880 }
2881
2882 /* terminating-zero may have changed after strnlen_user() returned */
2883 if (name[len + MFD_NAME_PREFIX_LEN - 1]) {
2884 error = -EFAULT;
2885 goto err_name;
2886 }
2887
2888 fd = get_unused_fd_flags((flags & MFD_CLOEXEC) ? O_CLOEXEC : 0);
2889 if (fd < 0) {
2890 error = fd;
2891 goto err_name;
2892 }
2893
2894 file = shmem_file_setup(name, 0, VM_NORESERVE);
2895 if (IS_ERR(file)) {
2896 error = PTR_ERR(file);
2897 goto err_fd;
2898 }
2899 info = SHMEM_I(file_inode(file));
2900 file->f_mode |= FMODE_LSEEK | FMODE_PREAD | FMODE_PWRITE;
2901 file->f_flags |= O_RDWR | O_LARGEFILE;
2902 if (flags & MFD_ALLOW_SEALING)
2903 info->seals &= ~F_SEAL_SEAL;
2904
2905 fd_install(fd, file);
2906 kfree(name);
2907 return fd;
2908
2909err_fd:
2910 put_unused_fd(fd);
2911err_name:
2912 kfree(name);
2913 return error;
2914}
2915
2587#endif /* CONFIG_TMPFS */ 2916#endif /* CONFIG_TMPFS */
2588 2917
2589static void shmem_put_super(struct super_block *sb) 2918static void shmem_put_super(struct super_block *sb)
diff --git a/mm/slab.c b/mm/slab.c
index 2e60bf3dedbb..a467b308c682 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -470,6 +470,8 @@ static struct kmem_cache kmem_cache_boot = {
470 .name = "kmem_cache", 470 .name = "kmem_cache",
471}; 471};
472 472
473#define BAD_ALIEN_MAGIC 0x01020304ul
474
473static DEFINE_PER_CPU(struct delayed_work, slab_reap_work); 475static DEFINE_PER_CPU(struct delayed_work, slab_reap_work);
474 476
475static inline struct array_cache *cpu_cache_get(struct kmem_cache *cachep) 477static inline struct array_cache *cpu_cache_get(struct kmem_cache *cachep)
@@ -836,7 +838,7 @@ static int transfer_objects(struct array_cache *to,
836static inline struct alien_cache **alloc_alien_cache(int node, 838static inline struct alien_cache **alloc_alien_cache(int node,
837 int limit, gfp_t gfp) 839 int limit, gfp_t gfp)
838{ 840{
839 return NULL; 841 return (struct alien_cache **)BAD_ALIEN_MAGIC;
840} 842}
841 843
842static inline void free_alien_cache(struct alien_cache **ac_ptr) 844static inline void free_alien_cache(struct alien_cache **ac_ptr)
diff --git a/mm/swap.c b/mm/swap.c
index c789d01c9ec3..6b2dc3897cd5 100644
--- a/mm/swap.c
+++ b/mm/swap.c
@@ -62,6 +62,7 @@ static void __page_cache_release(struct page *page)
62 del_page_from_lru_list(page, lruvec, page_off_lru(page)); 62 del_page_from_lru_list(page, lruvec, page_off_lru(page));
63 spin_unlock_irqrestore(&zone->lru_lock, flags); 63 spin_unlock_irqrestore(&zone->lru_lock, flags);
64 } 64 }
65 mem_cgroup_uncharge(page);
65} 66}
66 67
67static void __put_single_page(struct page *page) 68static void __put_single_page(struct page *page)
@@ -687,6 +688,40 @@ void add_page_to_unevictable_list(struct page *page)
687 spin_unlock_irq(&zone->lru_lock); 688 spin_unlock_irq(&zone->lru_lock);
688} 689}
689 690
691/**
692 * lru_cache_add_active_or_unevictable
693 * @page: the page to be added to LRU
694 * @vma: vma in which page is mapped for determining reclaimability
695 *
696 * Place @page on the active or unevictable LRU list, depending on its
697 * evictability. Note that if the page is not evictable, it goes
698 * directly back onto it's zone's unevictable list, it does NOT use a
699 * per cpu pagevec.
700 */
701void lru_cache_add_active_or_unevictable(struct page *page,
702 struct vm_area_struct *vma)
703{
704 VM_BUG_ON_PAGE(PageLRU(page), page);
705
706 if (likely((vma->vm_flags & (VM_LOCKED | VM_SPECIAL)) != VM_LOCKED)) {
707 SetPageActive(page);
708 lru_cache_add(page);
709 return;
710 }
711
712 if (!TestSetPageMlocked(page)) {
713 /*
714 * We use the irq-unsafe __mod_zone_page_stat because this
715 * counter is not modified from interrupt context, and the pte
716 * lock is held(spinlock), which implies preemption disabled.
717 */
718 __mod_zone_page_state(page_zone(page), NR_MLOCK,
719 hpage_nr_pages(page));
720 count_vm_event(UNEVICTABLE_PGMLOCKED);
721 }
722 add_page_to_unevictable_list(page);
723}
724
690/* 725/*
691 * If the page can not be invalidated, it is moved to the 726 * If the page can not be invalidated, it is moved to the
692 * inactive list to speed up its reclaim. It is moved to the 727 * inactive list to speed up its reclaim. It is moved to the
@@ -913,6 +948,7 @@ void release_pages(struct page **pages, int nr, bool cold)
913 if (zone) 948 if (zone)
914 spin_unlock_irqrestore(&zone->lru_lock, flags); 949 spin_unlock_irqrestore(&zone->lru_lock, flags);
915 950
951 mem_cgroup_uncharge_list(&pages_to_free);
916 free_hot_cold_page_list(&pages_to_free, cold); 952 free_hot_cold_page_list(&pages_to_free, cold);
917} 953}
918EXPORT_SYMBOL(release_pages); 954EXPORT_SYMBOL(release_pages);
diff --git a/mm/swap_state.c b/mm/swap_state.c
index 2972eee184a4..3e0ec83d000c 100644
--- a/mm/swap_state.c
+++ b/mm/swap_state.c
@@ -39,6 +39,7 @@ static struct backing_dev_info swap_backing_dev_info = {
39struct address_space swapper_spaces[MAX_SWAPFILES] = { 39struct address_space swapper_spaces[MAX_SWAPFILES] = {
40 [0 ... MAX_SWAPFILES - 1] = { 40 [0 ... MAX_SWAPFILES - 1] = {
41 .page_tree = RADIX_TREE_INIT(GFP_ATOMIC|__GFP_NOWARN), 41 .page_tree = RADIX_TREE_INIT(GFP_ATOMIC|__GFP_NOWARN),
42 .i_mmap_writable = ATOMIC_INIT(0),
42 .a_ops = &swap_aops, 43 .a_ops = &swap_aops,
43 .backing_dev_info = &swap_backing_dev_info, 44 .backing_dev_info = &swap_backing_dev_info,
44 } 45 }
@@ -176,7 +177,7 @@ int add_to_swap(struct page *page, struct list_head *list)
176 177
177 if (unlikely(PageTransHuge(page))) 178 if (unlikely(PageTransHuge(page)))
178 if (unlikely(split_huge_page_to_list(page, list))) { 179 if (unlikely(split_huge_page_to_list(page, list))) {
179 swapcache_free(entry, NULL); 180 swapcache_free(entry);
180 return 0; 181 return 0;
181 } 182 }
182 183
@@ -202,7 +203,7 @@ int add_to_swap(struct page *page, struct list_head *list)
202 * add_to_swap_cache() doesn't return -EEXIST, so we can safely 203 * add_to_swap_cache() doesn't return -EEXIST, so we can safely
203 * clear SWAP_HAS_CACHE flag. 204 * clear SWAP_HAS_CACHE flag.
204 */ 205 */
205 swapcache_free(entry, NULL); 206 swapcache_free(entry);
206 return 0; 207 return 0;
207 } 208 }
208} 209}
@@ -225,7 +226,7 @@ void delete_from_swap_cache(struct page *page)
225 __delete_from_swap_cache(page); 226 __delete_from_swap_cache(page);
226 spin_unlock_irq(&address_space->tree_lock); 227 spin_unlock_irq(&address_space->tree_lock);
227 228
228 swapcache_free(entry, page); 229 swapcache_free(entry);
229 page_cache_release(page); 230 page_cache_release(page);
230} 231}
231 232
@@ -386,7 +387,7 @@ struct page *read_swap_cache_async(swp_entry_t entry, gfp_t gfp_mask,
386 * add_to_swap_cache() doesn't return -EEXIST, so we can safely 387 * add_to_swap_cache() doesn't return -EEXIST, so we can safely
387 * clear SWAP_HAS_CACHE flag. 388 * clear SWAP_HAS_CACHE flag.
388 */ 389 */
389 swapcache_free(entry, NULL); 390 swapcache_free(entry);
390 } while (err != -ENOMEM); 391 } while (err != -ENOMEM);
391 392
392 if (new_page) 393 if (new_page)
diff --git a/mm/swapfile.c b/mm/swapfile.c
index 4c524f7bd0bf..8798b2e0ac59 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -843,16 +843,13 @@ void swap_free(swp_entry_t entry)
843/* 843/*
844 * Called after dropping swapcache to decrease refcnt to swap entries. 844 * Called after dropping swapcache to decrease refcnt to swap entries.
845 */ 845 */
846void swapcache_free(swp_entry_t entry, struct page *page) 846void swapcache_free(swp_entry_t entry)
847{ 847{
848 struct swap_info_struct *p; 848 struct swap_info_struct *p;
849 unsigned char count;
850 849
851 p = swap_info_get(entry); 850 p = swap_info_get(entry);
852 if (p) { 851 if (p) {
853 count = swap_entry_free(p, entry, SWAP_HAS_CACHE); 852 swap_entry_free(p, entry, SWAP_HAS_CACHE);
854 if (page)
855 mem_cgroup_uncharge_swapcache(page, entry, count != 0);
856 spin_unlock(&p->lock); 853 spin_unlock(&p->lock);
857 } 854 }
858} 855}
@@ -1106,15 +1103,14 @@ static int unuse_pte(struct vm_area_struct *vma, pmd_t *pmd,
1106 if (unlikely(!page)) 1103 if (unlikely(!page))
1107 return -ENOMEM; 1104 return -ENOMEM;
1108 1105
1109 if (mem_cgroup_try_charge_swapin(vma->vm_mm, page, 1106 if (mem_cgroup_try_charge(page, vma->vm_mm, GFP_KERNEL, &memcg)) {
1110 GFP_KERNEL, &memcg)) {
1111 ret = -ENOMEM; 1107 ret = -ENOMEM;
1112 goto out_nolock; 1108 goto out_nolock;
1113 } 1109 }
1114 1110
1115 pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl); 1111 pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl);
1116 if (unlikely(!maybe_same_pte(*pte, swp_entry_to_pte(entry)))) { 1112 if (unlikely(!maybe_same_pte(*pte, swp_entry_to_pte(entry)))) {
1117 mem_cgroup_cancel_charge_swapin(memcg); 1113 mem_cgroup_cancel_charge(page, memcg);
1118 ret = 0; 1114 ret = 0;
1119 goto out; 1115 goto out;
1120 } 1116 }
@@ -1124,11 +1120,14 @@ static int unuse_pte(struct vm_area_struct *vma, pmd_t *pmd,
1124 get_page(page); 1120 get_page(page);
1125 set_pte_at(vma->vm_mm, addr, pte, 1121 set_pte_at(vma->vm_mm, addr, pte,
1126 pte_mkold(mk_pte(page, vma->vm_page_prot))); 1122 pte_mkold(mk_pte(page, vma->vm_page_prot)));
1127 if (page == swapcache) 1123 if (page == swapcache) {
1128 page_add_anon_rmap(page, vma, addr); 1124 page_add_anon_rmap(page, vma, addr);
1129 else /* ksm created a completely new copy */ 1125 mem_cgroup_commit_charge(page, memcg, true);
1126 } else { /* ksm created a completely new copy */
1130 page_add_new_anon_rmap(page, vma, addr); 1127 page_add_new_anon_rmap(page, vma, addr);
1131 mem_cgroup_commit_charge_swapin(page, memcg); 1128 mem_cgroup_commit_charge(page, memcg, false);
1129 lru_cache_add_active_or_unevictable(page, vma);
1130 }
1132 swap_free(entry); 1131 swap_free(entry);
1133 /* 1132 /*
1134 * Move the page to the active list so it is not 1133 * Move the page to the active list so it is not
diff --git a/mm/truncate.c b/mm/truncate.c
index eda247307164..96d167372d89 100644
--- a/mm/truncate.c
+++ b/mm/truncate.c
@@ -281,7 +281,6 @@ void truncate_inode_pages_range(struct address_space *mapping,
281 while (index < end && pagevec_lookup_entries(&pvec, mapping, index, 281 while (index < end && pagevec_lookup_entries(&pvec, mapping, index,
282 min(end - index, (pgoff_t)PAGEVEC_SIZE), 282 min(end - index, (pgoff_t)PAGEVEC_SIZE),
283 indices)) { 283 indices)) {
284 mem_cgroup_uncharge_start();
285 for (i = 0; i < pagevec_count(&pvec); i++) { 284 for (i = 0; i < pagevec_count(&pvec); i++) {
286 struct page *page = pvec.pages[i]; 285 struct page *page = pvec.pages[i];
287 286
@@ -307,7 +306,6 @@ void truncate_inode_pages_range(struct address_space *mapping,
307 } 306 }
308 pagevec_remove_exceptionals(&pvec); 307 pagevec_remove_exceptionals(&pvec);
309 pagevec_release(&pvec); 308 pagevec_release(&pvec);
310 mem_cgroup_uncharge_end();
311 cond_resched(); 309 cond_resched();
312 index++; 310 index++;
313 } 311 }
@@ -369,7 +367,6 @@ void truncate_inode_pages_range(struct address_space *mapping,
369 pagevec_release(&pvec); 367 pagevec_release(&pvec);
370 break; 368 break;
371 } 369 }
372 mem_cgroup_uncharge_start();
373 for (i = 0; i < pagevec_count(&pvec); i++) { 370 for (i = 0; i < pagevec_count(&pvec); i++) {
374 struct page *page = pvec.pages[i]; 371 struct page *page = pvec.pages[i];
375 372
@@ -394,7 +391,6 @@ void truncate_inode_pages_range(struct address_space *mapping,
394 } 391 }
395 pagevec_remove_exceptionals(&pvec); 392 pagevec_remove_exceptionals(&pvec);
396 pagevec_release(&pvec); 393 pagevec_release(&pvec);
397 mem_cgroup_uncharge_end();
398 index++; 394 index++;
399 } 395 }
400 cleancache_invalidate_inode(mapping); 396 cleancache_invalidate_inode(mapping);
@@ -493,7 +489,6 @@ unsigned long invalidate_mapping_pages(struct address_space *mapping,
493 while (index <= end && pagevec_lookup_entries(&pvec, mapping, index, 489 while (index <= end && pagevec_lookup_entries(&pvec, mapping, index,
494 min(end - index, (pgoff_t)PAGEVEC_SIZE - 1) + 1, 490 min(end - index, (pgoff_t)PAGEVEC_SIZE - 1) + 1,
495 indices)) { 491 indices)) {
496 mem_cgroup_uncharge_start();
497 for (i = 0; i < pagevec_count(&pvec); i++) { 492 for (i = 0; i < pagevec_count(&pvec); i++) {
498 struct page *page = pvec.pages[i]; 493 struct page *page = pvec.pages[i];
499 494
@@ -522,7 +517,6 @@ unsigned long invalidate_mapping_pages(struct address_space *mapping,
522 } 517 }
523 pagevec_remove_exceptionals(&pvec); 518 pagevec_remove_exceptionals(&pvec);
524 pagevec_release(&pvec); 519 pagevec_release(&pvec);
525 mem_cgroup_uncharge_end();
526 cond_resched(); 520 cond_resched();
527 index++; 521 index++;
528 } 522 }
@@ -553,7 +547,6 @@ invalidate_complete_page2(struct address_space *mapping, struct page *page)
553 BUG_ON(page_has_private(page)); 547 BUG_ON(page_has_private(page));
554 __delete_from_page_cache(page, NULL); 548 __delete_from_page_cache(page, NULL);
555 spin_unlock_irq(&mapping->tree_lock); 549 spin_unlock_irq(&mapping->tree_lock);
556 mem_cgroup_uncharge_cache_page(page);
557 550
558 if (mapping->a_ops->freepage) 551 if (mapping->a_ops->freepage)
559 mapping->a_ops->freepage(page); 552 mapping->a_ops->freepage(page);
@@ -602,7 +595,6 @@ int invalidate_inode_pages2_range(struct address_space *mapping,
602 while (index <= end && pagevec_lookup_entries(&pvec, mapping, index, 595 while (index <= end && pagevec_lookup_entries(&pvec, mapping, index,
603 min(end - index, (pgoff_t)PAGEVEC_SIZE - 1) + 1, 596 min(end - index, (pgoff_t)PAGEVEC_SIZE - 1) + 1,
604 indices)) { 597 indices)) {
605 mem_cgroup_uncharge_start();
606 for (i = 0; i < pagevec_count(&pvec); i++) { 598 for (i = 0; i < pagevec_count(&pvec); i++) {
607 struct page *page = pvec.pages[i]; 599 struct page *page = pvec.pages[i];
608 600
@@ -655,7 +647,6 @@ int invalidate_inode_pages2_range(struct address_space *mapping,
655 } 647 }
656 pagevec_remove_exceptionals(&pvec); 648 pagevec_remove_exceptionals(&pvec);
657 pagevec_release(&pvec); 649 pagevec_release(&pvec);
658 mem_cgroup_uncharge_end();
659 cond_resched(); 650 cond_resched();
660 index++; 651 index++;
661 } 652 }
diff --git a/mm/util.c b/mm/util.c
index 7b6608df2ee8..093c973f1697 100644
--- a/mm/util.c
+++ b/mm/util.c
@@ -183,17 +183,14 @@ pid_t vm_is_stack(struct task_struct *task,
183 183
184 if (in_group) { 184 if (in_group) {
185 struct task_struct *t; 185 struct task_struct *t;
186 rcu_read_lock();
187 if (!pid_alive(task))
188 goto done;
189 186
190 t = task; 187 rcu_read_lock();
191 do { 188 for_each_thread(task, t) {
192 if (vm_is_stack_for_task(t, vma)) { 189 if (vm_is_stack_for_task(t, vma)) {
193 ret = t->pid; 190 ret = t->pid;
194 goto done; 191 goto done;
195 } 192 }
196 } while_each_thread(task, t); 193 }
197done: 194done:
198 rcu_read_unlock(); 195 rcu_read_unlock();
199 } 196 }
diff --git a/mm/vmscan.c b/mm/vmscan.c
index d2f65c856350..2836b5373b2e 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -577,9 +577,10 @@ static int __remove_mapping(struct address_space *mapping, struct page *page,
577 577
578 if (PageSwapCache(page)) { 578 if (PageSwapCache(page)) {
579 swp_entry_t swap = { .val = page_private(page) }; 579 swp_entry_t swap = { .val = page_private(page) };
580 mem_cgroup_swapout(page, swap);
580 __delete_from_swap_cache(page); 581 __delete_from_swap_cache(page);
581 spin_unlock_irq(&mapping->tree_lock); 582 spin_unlock_irq(&mapping->tree_lock);
582 swapcache_free(swap, page); 583 swapcache_free(swap);
583 } else { 584 } else {
584 void (*freepage)(struct page *); 585 void (*freepage)(struct page *);
585 void *shadow = NULL; 586 void *shadow = NULL;
@@ -600,7 +601,6 @@ static int __remove_mapping(struct address_space *mapping, struct page *page,
600 shadow = workingset_eviction(mapping, page); 601 shadow = workingset_eviction(mapping, page);
601 __delete_from_page_cache(page, shadow); 602 __delete_from_page_cache(page, shadow);
602 spin_unlock_irq(&mapping->tree_lock); 603 spin_unlock_irq(&mapping->tree_lock);
603 mem_cgroup_uncharge_cache_page(page);
604 604
605 if (freepage != NULL) 605 if (freepage != NULL)
606 freepage(page); 606 freepage(page);
@@ -822,7 +822,6 @@ static unsigned long shrink_page_list(struct list_head *page_list,
822 822
823 cond_resched(); 823 cond_resched();
824 824
825 mem_cgroup_uncharge_start();
826 while (!list_empty(page_list)) { 825 while (!list_empty(page_list)) {
827 struct address_space *mapping; 826 struct address_space *mapping;
828 struct page *page; 827 struct page *page;
@@ -1133,11 +1132,12 @@ keep:
1133 VM_BUG_ON_PAGE(PageLRU(page) || PageUnevictable(page), page); 1132 VM_BUG_ON_PAGE(PageLRU(page) || PageUnevictable(page), page);
1134 } 1133 }
1135 1134
1135 mem_cgroup_uncharge_list(&free_pages);
1136 free_hot_cold_page_list(&free_pages, true); 1136 free_hot_cold_page_list(&free_pages, true);
1137 1137
1138 list_splice(&ret_pages, page_list); 1138 list_splice(&ret_pages, page_list);
1139 count_vm_events(PGACTIVATE, pgactivate); 1139 count_vm_events(PGACTIVATE, pgactivate);
1140 mem_cgroup_uncharge_end(); 1140
1141 *ret_nr_dirty += nr_dirty; 1141 *ret_nr_dirty += nr_dirty;
1142 *ret_nr_congested += nr_congested; 1142 *ret_nr_congested += nr_congested;
1143 *ret_nr_unqueued_dirty += nr_unqueued_dirty; 1143 *ret_nr_unqueued_dirty += nr_unqueued_dirty;
@@ -1437,6 +1437,7 @@ putback_inactive_pages(struct lruvec *lruvec, struct list_head *page_list)
1437 1437
1438 if (unlikely(PageCompound(page))) { 1438 if (unlikely(PageCompound(page))) {
1439 spin_unlock_irq(&zone->lru_lock); 1439 spin_unlock_irq(&zone->lru_lock);
1440 mem_cgroup_uncharge(page);
1440 (*get_compound_page_dtor(page))(page); 1441 (*get_compound_page_dtor(page))(page);
1441 spin_lock_irq(&zone->lru_lock); 1442 spin_lock_irq(&zone->lru_lock);
1442 } else 1443 } else
@@ -1544,6 +1545,7 @@ shrink_inactive_list(unsigned long nr_to_scan, struct lruvec *lruvec,
1544 1545
1545 spin_unlock_irq(&zone->lru_lock); 1546 spin_unlock_irq(&zone->lru_lock);
1546 1547
1548 mem_cgroup_uncharge_list(&page_list);
1547 free_hot_cold_page_list(&page_list, true); 1549 free_hot_cold_page_list(&page_list, true);
1548 1550
1549 /* 1551 /*
@@ -1658,6 +1660,7 @@ static void move_active_pages_to_lru(struct lruvec *lruvec,
1658 1660
1659 if (unlikely(PageCompound(page))) { 1661 if (unlikely(PageCompound(page))) {
1660 spin_unlock_irq(&zone->lru_lock); 1662 spin_unlock_irq(&zone->lru_lock);
1663 mem_cgroup_uncharge(page);
1661 (*get_compound_page_dtor(page))(page); 1664 (*get_compound_page_dtor(page))(page);
1662 spin_lock_irq(&zone->lru_lock); 1665 spin_lock_irq(&zone->lru_lock);
1663 } else 1666 } else
@@ -1765,6 +1768,7 @@ static void shrink_active_list(unsigned long nr_to_scan,
1765 __mod_zone_page_state(zone, NR_ISOLATED_ANON + file, -nr_taken); 1768 __mod_zone_page_state(zone, NR_ISOLATED_ANON + file, -nr_taken);
1766 spin_unlock_irq(&zone->lru_lock); 1769 spin_unlock_irq(&zone->lru_lock);
1767 1770
1771 mem_cgroup_uncharge_list(&l_hold);
1768 free_hot_cold_page_list(&l_hold, true); 1772 free_hot_cold_page_list(&l_hold, true);
1769} 1773}
1770 1774
diff --git a/mm/zswap.c b/mm/zswap.c
index 032c21eeab2b..ea064c1a09ba 100644
--- a/mm/zswap.c
+++ b/mm/zswap.c
@@ -212,7 +212,7 @@ static int zswap_entry_cache_create(void)
212 return zswap_entry_cache == NULL; 212 return zswap_entry_cache == NULL;
213} 213}
214 214
215static void zswap_entry_cache_destory(void) 215static void __init zswap_entry_cache_destroy(void)
216{ 216{
217 kmem_cache_destroy(zswap_entry_cache); 217 kmem_cache_destroy(zswap_entry_cache);
218} 218}
@@ -507,7 +507,7 @@ static int zswap_get_swap_cache_page(swp_entry_t entry,
507 * add_to_swap_cache() doesn't return -EEXIST, so we can safely 507 * add_to_swap_cache() doesn't return -EEXIST, so we can safely
508 * clear SWAP_HAS_CACHE flag. 508 * clear SWAP_HAS_CACHE flag.
509 */ 509 */
510 swapcache_free(entry, NULL); 510 swapcache_free(entry);
511 } while (err != -ENOMEM); 511 } while (err != -ENOMEM);
512 512
513 if (new_page) 513 if (new_page)
@@ -941,7 +941,7 @@ static int __init init_zswap(void)
941pcpufail: 941pcpufail:
942 zswap_comp_exit(); 942 zswap_comp_exit();
943compfail: 943compfail:
944 zswap_entry_cache_destory(); 944 zswap_entry_cache_destroy();
945cachefail: 945cachefail:
946 zpool_destroy_pool(zswap_pool); 946 zpool_destroy_pool(zswap_pool);
947error: 947error:
diff --git a/scripts/.gitignore b/scripts/.gitignore
index fb070fa1038f..5ecfe93f2028 100644
--- a/scripts/.gitignore
+++ b/scripts/.gitignore
@@ -4,7 +4,6 @@
4conmakehash 4conmakehash
5kallsyms 5kallsyms
6pnmtologo 6pnmtologo
7bin2c
8unifdef 7unifdef
9ihex2fw 8ihex2fw
10recordmcount 9recordmcount
diff --git a/scripts/Makefile b/scripts/Makefile
index 890df5c6adfb..72902b5f2721 100644
--- a/scripts/Makefile
+++ b/scripts/Makefile
@@ -13,7 +13,6 @@ HOST_EXTRACFLAGS += -I$(srctree)/tools/include
13hostprogs-$(CONFIG_KALLSYMS) += kallsyms 13hostprogs-$(CONFIG_KALLSYMS) += kallsyms
14hostprogs-$(CONFIG_LOGO) += pnmtologo 14hostprogs-$(CONFIG_LOGO) += pnmtologo
15hostprogs-$(CONFIG_VT) += conmakehash 15hostprogs-$(CONFIG_VT) += conmakehash
16hostprogs-$(CONFIG_IKCONFIG) += bin2c
17hostprogs-$(BUILD_C_RECORDMCOUNT) += recordmcount 16hostprogs-$(BUILD_C_RECORDMCOUNT) += recordmcount
18hostprogs-$(CONFIG_BUILDTIME_EXTABLE_SORT) += sortextable 17hostprogs-$(CONFIG_BUILDTIME_EXTABLE_SORT) += sortextable
19hostprogs-$(CONFIG_ASN1) += asn1_compiler 18hostprogs-$(CONFIG_ASN1) += asn1_compiler
diff --git a/scripts/basic/.gitignore b/scripts/basic/.gitignore
index a776371a3502..9528ec9e5adc 100644
--- a/scripts/basic/.gitignore
+++ b/scripts/basic/.gitignore
@@ -1 +1,2 @@
1fixdep 1fixdep
2bin2c
diff --git a/scripts/basic/Makefile b/scripts/basic/Makefile
index 4fcef87bb875..ec10d9345bc2 100644
--- a/scripts/basic/Makefile
+++ b/scripts/basic/Makefile
@@ -9,6 +9,7 @@
9# fixdep: Used to generate dependency information during build process 9# fixdep: Used to generate dependency information during build process
10 10
11hostprogs-y := fixdep 11hostprogs-y := fixdep
12hostprogs-$(CONFIG_BUILD_BIN2C) += bin2c
12always := $(hostprogs-y) 13always := $(hostprogs-y)
13 14
14# fixdep is needed to compile other host programs 15# fixdep is needed to compile other host programs
diff --git a/scripts/bin2c.c b/scripts/basic/bin2c.c
index 96dd2bcbb407..af187e695345 100644
--- a/scripts/bin2c.c
+++ b/scripts/basic/bin2c.c
@@ -11,7 +11,7 @@
11 11
12int main(int argc, char *argv[]) 12int main(int argc, char *argv[])
13{ 13{
14 int ch, total=0; 14 int ch, total = 0;
15 15
16 if (argc > 1) 16 if (argc > 1)
17 printf("const char %s[] %s=\n", 17 printf("const char %s[] %s=\n",
@@ -19,10 +19,9 @@ int main(int argc, char *argv[])
19 19
20 do { 20 do {
21 printf("\t\""); 21 printf("\t\"");
22 while ((ch = getchar()) != EOF) 22 while ((ch = getchar()) != EOF) {
23 {
24 total++; 23 total++;
25 printf("\\x%02x",ch); 24 printf("\\x%02x", ch);
26 if (total % 16 == 0) 25 if (total % 16 == 0)
27 break; 26 break;
28 } 27 }
diff --git a/scripts/checkstack.pl b/scripts/checkstack.pl
index c05d586b1fee..899b4230320e 100755
--- a/scripts/checkstack.pl
+++ b/scripts/checkstack.pl
@@ -52,14 +52,12 @@ my (@stack, $re, $dre, $x, $xs, $funcre);
52 #8000008a: 20 1d sub sp,4 52 #8000008a: 20 1d sub sp,4
53 #80000ca8: fa cd 05 b0 sub sp,sp,1456 53 #80000ca8: fa cd 05 b0 sub sp,sp,1456
54 $re = qr/^.*sub.*sp.*,([0-9]{1,8})/o; 54 $re = qr/^.*sub.*sp.*,([0-9]{1,8})/o;
55 } elsif ($arch =~ /^i[3456]86$/) { 55 } elsif ($arch =~ /^x86(_64)?$/ || $arch =~ /^i[3456]86$/) {
56 #c0105234: 81 ec ac 05 00 00 sub $0x5ac,%esp 56 #c0105234: 81 ec ac 05 00 00 sub $0x5ac,%esp
57 $re = qr/^.*[as][du][db] \$(0x$x{1,8}),\%esp$/o; 57 # or
58 $dre = qr/^.*[as][du][db] (%.*),\%esp$/o; 58 # 2f60: 48 81 ec e8 05 00 00 sub $0x5e8,%rsp
59 } elsif ($arch eq 'x86_64') { 59 $re = qr/^.*[as][du][db] \$(0x$x{1,8}),\%(e|r)sp$/o;
60 # 2f60: 48 81 ec e8 05 00 00 sub $0x5e8,%rsp 60 $dre = qr/^.*[as][du][db] (%.*),\%(e|r)sp$/o;
61 $re = qr/^.*[as][du][db] \$(0x$x{1,8}),\%rsp$/o;
62 $dre = qr/^.*[as][du][db] (\%.*),\%rsp$/o;
63 } elsif ($arch eq 'ia64') { 61 } elsif ($arch eq 'ia64') {
64 #e0000000044011fc: 01 0f fc 8c adds r12=-384,r12 62 #e0000000044011fc: 01 0f fc 8c adds r12=-384,r12
65 $re = qr/.*adds.*r12=-(([0-9]{2}|[3-9])[0-9]{2}),r12/o; 63 $re = qr/.*adds.*r12=-(([0-9]{2}|[3-9])[0-9]{2}),r12/o;
diff --git a/scripts/coccinelle/free/ifnullfree.cocci b/scripts/coccinelle/free/ifnullfree.cocci
new file mode 100644
index 000000000000..a42d70bf88b3
--- /dev/null
+++ b/scripts/coccinelle/free/ifnullfree.cocci
@@ -0,0 +1,53 @@
1/// NULL check before some freeing functions is not needed.
2///
3/// Based on checkpatch warning
4/// "kfree(NULL) is safe this check is probably not required"
5/// and kfreeaddr.cocci by Julia Lawall.
6///
7// Copyright: (C) 2014 Fabian Frederick. GPLv2.
8// Comments: -
9// Options: --no-includes --include-headers
10
11virtual patch
12virtual org
13virtual report
14virtual context
15
16@r2 depends on patch@
17expression E;
18@@
19- if (E)
20(
21- kfree(E);
22+ kfree(E);
23|
24- debugfs_remove(E);
25+ debugfs_remove(E);
26|
27- debugfs_remove_recursive(E);
28+ debugfs_remove_recursive(E);
29|
30- usb_free_urb(E);
31+ usb_free_urb(E);
32)
33
34@r depends on context || report || org @
35expression E;
36position p;
37@@
38
39* if (E)
40* \(kfree@p\|debugfs_remove@p\|debugfs_remove_recursive@p\|usb_free_urb\)(E);
41
42@script:python depends on org@
43p << r.p;
44@@
45
46cocci.print_main("NULL check before that freeing function is not needed", p)
47
48@script:python depends on report@
49p << r.p;
50@@
51
52msg = "WARNING: NULL check before freeing functions like kfree, debugfs_remove, debugfs_remove_recursive or usb_free_urb is not needed. Maybe consider reorganizing relevant code to avoid passing NULL values."
53coccilib.report.print_report(p[0], msg)
diff --git a/scripts/tags.sh b/scripts/tags.sh
index e6b011fe1d0d..cbfd269a6011 100755
--- a/scripts/tags.sh
+++ b/scripts/tags.sh
@@ -168,6 +168,7 @@ exuberant()
168 --extra=+f --c-kinds=+px \ 168 --extra=+f --c-kinds=+px \
169 --regex-asm='/^(ENTRY|_GLOBAL)\(([^)]*)\).*/\2/' \ 169 --regex-asm='/^(ENTRY|_GLOBAL)\(([^)]*)\).*/\2/' \
170 --regex-c='/^SYSCALL_DEFINE[[:digit:]]?\(([^,)]*).*/sys_\1/' \ 170 --regex-c='/^SYSCALL_DEFINE[[:digit:]]?\(([^,)]*).*/sys_\1/' \
171 --regex-c='/^COMPAT_SYSCALL_DEFINE[[:digit:]]?\(([^,)]*).*/compat_sys_\1/' \
171 --regex-c++='/^TRACE_EVENT\(([^,)]*).*/trace_\1/' \ 172 --regex-c++='/^TRACE_EVENT\(([^,)]*).*/trace_\1/' \
172 --regex-c++='/^DEFINE_EVENT\([^,)]*, *([^,)]*).*/trace_\1/' \ 173 --regex-c++='/^DEFINE_EVENT\([^,)]*, *([^,)]*).*/trace_\1/' \
173 --regex-c++='/PAGEFLAG\(([^,)]*).*/Page\1/' \ 174 --regex-c++='/PAGEFLAG\(([^,)]*).*/Page\1/' \
@@ -231,6 +232,7 @@ emacs()
231 all_target_sources | xargs $1 -a \ 232 all_target_sources | xargs $1 -a \
232 --regex='/^\(ENTRY\|_GLOBAL\)(\([^)]*\)).*/\2/' \ 233 --regex='/^\(ENTRY\|_GLOBAL\)(\([^)]*\)).*/\2/' \
233 --regex='/^SYSCALL_DEFINE[0-9]?(\([^,)]*\).*/sys_\1/' \ 234 --regex='/^SYSCALL_DEFINE[0-9]?(\([^,)]*\).*/sys_\1/' \
235 --regex='/^COMPAT_SYSCALL_DEFINE[0-9]?(\([^,)]*\).*/compat_sys_\1/' \
234 --regex='/^TRACE_EVENT(\([^,)]*\).*/trace_\1/' \ 236 --regex='/^TRACE_EVENT(\([^,)]*\).*/trace_\1/' \
235 --regex='/^DEFINE_EVENT([^,)]*, *\([^,)]*\).*/trace_\1/' \ 237 --regex='/^DEFINE_EVENT([^,)]*, *\([^,)]*\).*/trace_\1/' \
236 --regex='/PAGEFLAG(\([^,)]*\).*/Page\1/' \ 238 --regex='/PAGEFLAG(\([^,)]*\).*/Page\1/' \
diff --git a/tools/testing/selftests/Makefile b/tools/testing/selftests/Makefile
index d10f95ce2ea4..6fd2a4402069 100644
--- a/tools/testing/selftests/Makefile
+++ b/tools/testing/selftests/Makefile
@@ -2,6 +2,7 @@ TARGETS = breakpoints
2TARGETS += cpu-hotplug 2TARGETS += cpu-hotplug
3TARGETS += efivarfs 3TARGETS += efivarfs
4TARGETS += kcmp 4TARGETS += kcmp
5TARGETS += memfd
5TARGETS += memory-hotplug 6TARGETS += memory-hotplug
6TARGETS += mqueue 7TARGETS += mqueue
7TARGETS += net 8TARGETS += net
diff --git a/tools/testing/selftests/memfd/.gitignore b/tools/testing/selftests/memfd/.gitignore
new file mode 100644
index 000000000000..afe87c40ac80
--- /dev/null
+++ b/tools/testing/selftests/memfd/.gitignore
@@ -0,0 +1,4 @@
1fuse_mnt
2fuse_test
3memfd_test
4memfd-test-file
diff --git a/tools/testing/selftests/memfd/Makefile b/tools/testing/selftests/memfd/Makefile
new file mode 100644
index 000000000000..6816c491c5ff
--- /dev/null
+++ b/tools/testing/selftests/memfd/Makefile
@@ -0,0 +1,41 @@
1uname_M := $(shell uname -m 2>/dev/null || echo not)
2ARCH ?= $(shell echo $(uname_M) | sed -e s/i.86/i386/)
3ifeq ($(ARCH),i386)
4 ARCH := X86
5endif
6ifeq ($(ARCH),x86_64)
7 ARCH := X86
8endif
9
10CFLAGS += -D_FILE_OFFSET_BITS=64
11CFLAGS += -I../../../../arch/x86/include/generated/uapi/
12CFLAGS += -I../../../../arch/x86/include/uapi/
13CFLAGS += -I../../../../include/uapi/
14CFLAGS += -I../../../../include/
15
16all:
17ifeq ($(ARCH),X86)
18 gcc $(CFLAGS) memfd_test.c -o memfd_test
19else
20 echo "Not an x86 target, can't build memfd selftest"
21endif
22
23run_tests: all
24ifeq ($(ARCH),X86)
25 gcc $(CFLAGS) memfd_test.c -o memfd_test
26endif
27 @./memfd_test || echo "memfd_test: [FAIL]"
28
29build_fuse:
30ifeq ($(ARCH),X86)
31 gcc $(CFLAGS) fuse_mnt.c `pkg-config fuse --cflags --libs` -o fuse_mnt
32 gcc $(CFLAGS) fuse_test.c -o fuse_test
33else
34 echo "Not an x86 target, can't build memfd selftest"
35endif
36
37run_fuse: build_fuse
38 @./run_fuse_test.sh || echo "fuse_test: [FAIL]"
39
40clean:
41 $(RM) memfd_test fuse_test
diff --git a/tools/testing/selftests/memfd/fuse_mnt.c b/tools/testing/selftests/memfd/fuse_mnt.c
new file mode 100644
index 000000000000..feacf1280fcd
--- /dev/null
+++ b/tools/testing/selftests/memfd/fuse_mnt.c
@@ -0,0 +1,110 @@
1/*
2 * memfd test file-system
3 * This file uses FUSE to create a dummy file-system with only one file /memfd.
4 * This file is read-only and takes 1s per read.
5 *
6 * This file-system is used by the memfd test-cases to force the kernel to pin
7 * pages during reads(). Due to the 1s delay of this file-system, this is a
8 * nice way to test race-conditions against get_user_pages() in the kernel.
9 *
10 * We use direct_io==1 to force the kernel to use direct-IO for this
11 * file-system.
12 */
13
14#define FUSE_USE_VERSION 26
15
16#include <fuse.h>
17#include <stdio.h>
18#include <string.h>
19#include <errno.h>
20#include <fcntl.h>
21#include <unistd.h>
22
23static const char memfd_content[] = "memfd-example-content";
24static const char memfd_path[] = "/memfd";
25
26static int memfd_getattr(const char *path, struct stat *st)
27{
28 memset(st, 0, sizeof(*st));
29
30 if (!strcmp(path, "/")) {
31 st->st_mode = S_IFDIR | 0755;
32 st->st_nlink = 2;
33 } else if (!strcmp(path, memfd_path)) {
34 st->st_mode = S_IFREG | 0444;
35 st->st_nlink = 1;
36 st->st_size = strlen(memfd_content);
37 } else {
38 return -ENOENT;
39 }
40
41 return 0;
42}
43
44static int memfd_readdir(const char *path,
45 void *buf,
46 fuse_fill_dir_t filler,
47 off_t offset,
48 struct fuse_file_info *fi)
49{
50 if (strcmp(path, "/"))
51 return -ENOENT;
52
53 filler(buf, ".", NULL, 0);
54 filler(buf, "..", NULL, 0);
55 filler(buf, memfd_path + 1, NULL, 0);
56
57 return 0;
58}
59
60static int memfd_open(const char *path, struct fuse_file_info *fi)
61{
62 if (strcmp(path, memfd_path))
63 return -ENOENT;
64
65 if ((fi->flags & 3) != O_RDONLY)
66 return -EACCES;
67
68 /* force direct-IO */
69 fi->direct_io = 1;
70
71 return 0;
72}
73
74static int memfd_read(const char *path,
75 char *buf,
76 size_t size,
77 off_t offset,
78 struct fuse_file_info *fi)
79{
80 size_t len;
81
82 if (strcmp(path, memfd_path) != 0)
83 return -ENOENT;
84
85 sleep(1);
86
87 len = strlen(memfd_content);
88 if (offset < len) {
89 if (offset + size > len)
90 size = len - offset;
91
92 memcpy(buf, memfd_content + offset, size);
93 } else {
94 size = 0;
95 }
96
97 return size;
98}
99
100static struct fuse_operations memfd_ops = {
101 .getattr = memfd_getattr,
102 .readdir = memfd_readdir,
103 .open = memfd_open,
104 .read = memfd_read,
105};
106
107int main(int argc, char *argv[])
108{
109 return fuse_main(argc, argv, &memfd_ops, NULL);
110}
diff --git a/tools/testing/selftests/memfd/fuse_test.c b/tools/testing/selftests/memfd/fuse_test.c
new file mode 100644
index 000000000000..67908b18f035
--- /dev/null
+++ b/tools/testing/selftests/memfd/fuse_test.c
@@ -0,0 +1,311 @@
1/*
2 * memfd GUP test-case
3 * This tests memfd interactions with get_user_pages(). We require the
4 * fuse_mnt.c program to provide a fake direct-IO FUSE mount-point for us. This
5 * file-system delays _all_ reads by 1s and forces direct-IO. This means, any
6 * read() on files in that file-system will pin the receive-buffer pages for at
7 * least 1s via get_user_pages().
8 *
9 * We use this trick to race ADD_SEALS against a write on a memfd object. The
10 * ADD_SEALS must fail if the memfd pages are still pinned. Note that we use
11 * the read() syscall with our memory-mapped memfd object as receive buffer to
12 * force the kernel to write into our memfd object.
13 */
14
15#define _GNU_SOURCE
16#define __EXPORTED_HEADERS__
17
18#include <errno.h>
19#include <inttypes.h>
20#include <limits.h>
21#include <linux/falloc.h>
22#include <linux/fcntl.h>
23#include <linux/memfd.h>
24#include <sched.h>
25#include <stdio.h>
26#include <stdlib.h>
27#include <signal.h>
28#include <string.h>
29#include <sys/mman.h>
30#include <sys/stat.h>
31#include <sys/syscall.h>
32#include <sys/wait.h>
33#include <unistd.h>
34
35#define MFD_DEF_SIZE 8192
36#define STACK_SIZE 65535
37
38static int sys_memfd_create(const char *name,
39 unsigned int flags)
40{
41 return syscall(__NR_memfd_create, name, flags);
42}
43
44static int mfd_assert_new(const char *name, loff_t sz, unsigned int flags)
45{
46 int r, fd;
47
48 fd = sys_memfd_create(name, flags);
49 if (fd < 0) {
50 printf("memfd_create(\"%s\", %u) failed: %m\n",
51 name, flags);
52 abort();
53 }
54
55 r = ftruncate(fd, sz);
56 if (r < 0) {
57 printf("ftruncate(%llu) failed: %m\n", (unsigned long long)sz);
58 abort();
59 }
60
61 return fd;
62}
63
64static __u64 mfd_assert_get_seals(int fd)
65{
66 long r;
67
68 r = fcntl(fd, F_GET_SEALS);
69 if (r < 0) {
70 printf("GET_SEALS(%d) failed: %m\n", fd);
71 abort();
72 }
73
74 return r;
75}
76
77static void mfd_assert_has_seals(int fd, __u64 seals)
78{
79 __u64 s;
80
81 s = mfd_assert_get_seals(fd);
82 if (s != seals) {
83 printf("%llu != %llu = GET_SEALS(%d)\n",
84 (unsigned long long)seals, (unsigned long long)s, fd);
85 abort();
86 }
87}
88
89static void mfd_assert_add_seals(int fd, __u64 seals)
90{
91 long r;
92 __u64 s;
93
94 s = mfd_assert_get_seals(fd);
95 r = fcntl(fd, F_ADD_SEALS, seals);
96 if (r < 0) {
97 printf("ADD_SEALS(%d, %llu -> %llu) failed: %m\n",
98 fd, (unsigned long long)s, (unsigned long long)seals);
99 abort();
100 }
101}
102
103static int mfd_busy_add_seals(int fd, __u64 seals)
104{
105 long r;
106 __u64 s;
107
108 r = fcntl(fd, F_GET_SEALS);
109 if (r < 0)
110 s = 0;
111 else
112 s = r;
113
114 r = fcntl(fd, F_ADD_SEALS, seals);
115 if (r < 0 && errno != EBUSY) {
116 printf("ADD_SEALS(%d, %llu -> %llu) didn't fail as expected with EBUSY: %m\n",
117 fd, (unsigned long long)s, (unsigned long long)seals);
118 abort();
119 }
120
121 return r;
122}
123
124static void *mfd_assert_mmap_shared(int fd)
125{
126 void *p;
127
128 p = mmap(NULL,
129 MFD_DEF_SIZE,
130 PROT_READ | PROT_WRITE,
131 MAP_SHARED,
132 fd,
133 0);
134 if (p == MAP_FAILED) {
135 printf("mmap() failed: %m\n");
136 abort();
137 }
138
139 return p;
140}
141
142static void *mfd_assert_mmap_private(int fd)
143{
144 void *p;
145
146 p = mmap(NULL,
147 MFD_DEF_SIZE,
148 PROT_READ | PROT_WRITE,
149 MAP_PRIVATE,
150 fd,
151 0);
152 if (p == MAP_FAILED) {
153 printf("mmap() failed: %m\n");
154 abort();
155 }
156
157 return p;
158}
159
160static int global_mfd = -1;
161static void *global_p = NULL;
162
163static int sealing_thread_fn(void *arg)
164{
165 int sig, r;
166
167 /*
168 * This thread first waits 200ms so any pending operation in the parent
169 * is correctly started. After that, it tries to seal @global_mfd as
170 * SEAL_WRITE. This _must_ fail as the parent thread has a read() into
171 * that memory mapped object still ongoing.
172 * We then wait one more second and try sealing again. This time it
173 * must succeed as there shouldn't be anyone else pinning the pages.
174 */
175
176 /* wait 200ms for FUSE-request to be active */
177 usleep(200000);
178
179 /* unmount mapping before sealing to avoid i_mmap_writable failures */
180 munmap(global_p, MFD_DEF_SIZE);
181
182 /* Try sealing the global file; expect EBUSY or success. Current
183 * kernels will never succeed, but in the future, kernels might
184 * implement page-replacements or other fancy ways to avoid racing
185 * writes. */
186 r = mfd_busy_add_seals(global_mfd, F_SEAL_WRITE);
187 if (r >= 0) {
188 printf("HURRAY! This kernel fixed GUP races!\n");
189 } else {
190 /* wait 1s more so the FUSE-request is done */
191 sleep(1);
192
193 /* try sealing the global file again */
194 mfd_assert_add_seals(global_mfd, F_SEAL_WRITE);
195 }
196
197 return 0;
198}
199
200static pid_t spawn_sealing_thread(void)
201{
202 uint8_t *stack;
203 pid_t pid;
204
205 stack = malloc(STACK_SIZE);
206 if (!stack) {
207 printf("malloc(STACK_SIZE) failed: %m\n");
208 abort();
209 }
210
211 pid = clone(sealing_thread_fn,
212 stack + STACK_SIZE,
213 SIGCHLD | CLONE_FILES | CLONE_FS | CLONE_VM,
214 NULL);
215 if (pid < 0) {
216 printf("clone() failed: %m\n");
217 abort();
218 }
219
220 return pid;
221}
222
223static void join_sealing_thread(pid_t pid)
224{
225 waitpid(pid, NULL, 0);
226}
227
228int main(int argc, char **argv)
229{
230 static const char zero[MFD_DEF_SIZE];
231 int fd, mfd, r;
232 void *p;
233 int was_sealed;
234 pid_t pid;
235
236 if (argc < 2) {
237 printf("error: please pass path to file in fuse_mnt mount-point\n");
238 abort();
239 }
240
241 /* open FUSE memfd file for GUP testing */
242 printf("opening: %s\n", argv[1]);
243 fd = open(argv[1], O_RDONLY | O_CLOEXEC);
244 if (fd < 0) {
245 printf("cannot open(\"%s\"): %m\n", argv[1]);
246 abort();
247 }
248
249 /* create new memfd-object */
250 mfd = mfd_assert_new("kern_memfd_fuse",
251 MFD_DEF_SIZE,
252 MFD_CLOEXEC | MFD_ALLOW_SEALING);
253
254 /* mmap memfd-object for writing */
255 p = mfd_assert_mmap_shared(mfd);
256
257 /* pass mfd+mapping to a separate sealing-thread which tries to seal
258 * the memfd objects with SEAL_WRITE while we write into it */
259 global_mfd = mfd;
260 global_p = p;
261 pid = spawn_sealing_thread();
262
263 /* Use read() on the FUSE file to read into our memory-mapped memfd
264 * object. This races the other thread which tries to seal the
265 * memfd-object.
266 * If @fd is on the memfd-fake-FUSE-FS, the read() is delayed by 1s.
267 * This guarantees that the receive-buffer is pinned for 1s until the
268 * data is written into it. The racing ADD_SEALS should thus fail as
269 * the pages are still pinned. */
270 r = read(fd, p, MFD_DEF_SIZE);
271 if (r < 0) {
272 printf("read() failed: %m\n");
273 abort();
274 } else if (!r) {
275 printf("unexpected EOF on read()\n");
276 abort();
277 }
278
279 was_sealed = mfd_assert_get_seals(mfd) & F_SEAL_WRITE;
280
281 /* Wait for sealing-thread to finish and verify that it
282 * successfully sealed the file after the second try. */
283 join_sealing_thread(pid);
284 mfd_assert_has_seals(mfd, F_SEAL_WRITE);
285
286 /* *IF* the memfd-object was sealed at the time our read() returned,
287 * then the kernel did a page-replacement or canceled the read() (or
288 * whatever magic it did..). In that case, the memfd object is still
289 * all zero.
290 * In case the memfd-object was *not* sealed, the read() was successfull
291 * and the memfd object must *not* be all zero.
292 * Note that in real scenarios, there might be a mixture of both, but
293 * in this test-cases, we have explicit 200ms delays which should be
294 * enough to avoid any in-flight writes. */
295
296 p = mfd_assert_mmap_private(mfd);
297 if (was_sealed && memcmp(p, zero, MFD_DEF_SIZE)) {
298 printf("memfd sealed during read() but data not discarded\n");
299 abort();
300 } else if (!was_sealed && !memcmp(p, zero, MFD_DEF_SIZE)) {
301 printf("memfd sealed after read() but data discarded\n");
302 abort();
303 }
304
305 close(mfd);
306 close(fd);
307
308 printf("fuse: DONE\n");
309
310 return 0;
311}
diff --git a/tools/testing/selftests/memfd/memfd_test.c b/tools/testing/selftests/memfd/memfd_test.c
new file mode 100644
index 000000000000..3634c909b1b0
--- /dev/null
+++ b/tools/testing/selftests/memfd/memfd_test.c
@@ -0,0 +1,913 @@
1#define _GNU_SOURCE
2#define __EXPORTED_HEADERS__
3
4#include <errno.h>
5#include <inttypes.h>
6#include <limits.h>
7#include <linux/falloc.h>
8#include <linux/fcntl.h>
9#include <linux/memfd.h>
10#include <sched.h>
11#include <stdio.h>
12#include <stdlib.h>
13#include <signal.h>
14#include <string.h>
15#include <sys/mman.h>
16#include <sys/stat.h>
17#include <sys/syscall.h>
18#include <unistd.h>
19
20#define MFD_DEF_SIZE 8192
21#define STACK_SIZE 65535
22
23static int sys_memfd_create(const char *name,
24 unsigned int flags)
25{
26 return syscall(__NR_memfd_create, name, flags);
27}
28
29static int mfd_assert_new(const char *name, loff_t sz, unsigned int flags)
30{
31 int r, fd;
32
33 fd = sys_memfd_create(name, flags);
34 if (fd < 0) {
35 printf("memfd_create(\"%s\", %u) failed: %m\n",
36 name, flags);
37 abort();
38 }
39
40 r = ftruncate(fd, sz);
41 if (r < 0) {
42 printf("ftruncate(%llu) failed: %m\n", (unsigned long long)sz);
43 abort();
44 }
45
46 return fd;
47}
48
49static void mfd_fail_new(const char *name, unsigned int flags)
50{
51 int r;
52
53 r = sys_memfd_create(name, flags);
54 if (r >= 0) {
55 printf("memfd_create(\"%s\", %u) succeeded, but failure expected\n",
56 name, flags);
57 close(r);
58 abort();
59 }
60}
61
62static __u64 mfd_assert_get_seals(int fd)
63{
64 long r;
65
66 r = fcntl(fd, F_GET_SEALS);
67 if (r < 0) {
68 printf("GET_SEALS(%d) failed: %m\n", fd);
69 abort();
70 }
71
72 return r;
73}
74
75static void mfd_assert_has_seals(int fd, __u64 seals)
76{
77 __u64 s;
78
79 s = mfd_assert_get_seals(fd);
80 if (s != seals) {
81 printf("%llu != %llu = GET_SEALS(%d)\n",
82 (unsigned long long)seals, (unsigned long long)s, fd);
83 abort();
84 }
85}
86
87static void mfd_assert_add_seals(int fd, __u64 seals)
88{
89 long r;
90 __u64 s;
91
92 s = mfd_assert_get_seals(fd);
93 r = fcntl(fd, F_ADD_SEALS, seals);
94 if (r < 0) {
95 printf("ADD_SEALS(%d, %llu -> %llu) failed: %m\n",
96 fd, (unsigned long long)s, (unsigned long long)seals);
97 abort();
98 }
99}
100
101static void mfd_fail_add_seals(int fd, __u64 seals)
102{
103 long r;
104 __u64 s;
105
106 r = fcntl(fd, F_GET_SEALS);
107 if (r < 0)
108 s = 0;
109 else
110 s = r;
111
112 r = fcntl(fd, F_ADD_SEALS, seals);
113 if (r >= 0) {
114 printf("ADD_SEALS(%d, %llu -> %llu) didn't fail as expected\n",
115 fd, (unsigned long long)s, (unsigned long long)seals);
116 abort();
117 }
118}
119
120static void mfd_assert_size(int fd, size_t size)
121{
122 struct stat st;
123 int r;
124
125 r = fstat(fd, &st);
126 if (r < 0) {
127 printf("fstat(%d) failed: %m\n", fd);
128 abort();
129 } else if (st.st_size != size) {
130 printf("wrong file size %lld, but expected %lld\n",
131 (long long)st.st_size, (long long)size);
132 abort();
133 }
134}
135
136static int mfd_assert_dup(int fd)
137{
138 int r;
139
140 r = dup(fd);
141 if (r < 0) {
142 printf("dup(%d) failed: %m\n", fd);
143 abort();
144 }
145
146 return r;
147}
148
149static void *mfd_assert_mmap_shared(int fd)
150{
151 void *p;
152
153 p = mmap(NULL,
154 MFD_DEF_SIZE,
155 PROT_READ | PROT_WRITE,
156 MAP_SHARED,
157 fd,
158 0);
159 if (p == MAP_FAILED) {
160 printf("mmap() failed: %m\n");
161 abort();
162 }
163
164 return p;
165}
166
167static void *mfd_assert_mmap_private(int fd)
168{
169 void *p;
170
171 p = mmap(NULL,
172 MFD_DEF_SIZE,
173 PROT_READ,
174 MAP_PRIVATE,
175 fd,
176 0);
177 if (p == MAP_FAILED) {
178 printf("mmap() failed: %m\n");
179 abort();
180 }
181
182 return p;
183}
184
185static int mfd_assert_open(int fd, int flags, mode_t mode)
186{
187 char buf[512];
188 int r;
189
190 sprintf(buf, "/proc/self/fd/%d", fd);
191 r = open(buf, flags, mode);
192 if (r < 0) {
193 printf("open(%s) failed: %m\n", buf);
194 abort();
195 }
196
197 return r;
198}
199
200static void mfd_fail_open(int fd, int flags, mode_t mode)
201{
202 char buf[512];
203 int r;
204
205 sprintf(buf, "/proc/self/fd/%d", fd);
206 r = open(buf, flags, mode);
207 if (r >= 0) {
208 printf("open(%s) didn't fail as expected\n");
209 abort();
210 }
211}
212
213static void mfd_assert_read(int fd)
214{
215 char buf[16];
216 void *p;
217 ssize_t l;
218
219 l = read(fd, buf, sizeof(buf));
220 if (l != sizeof(buf)) {
221 printf("read() failed: %m\n");
222 abort();
223 }
224
225 /* verify PROT_READ *is* allowed */
226 p = mmap(NULL,
227 MFD_DEF_SIZE,
228 PROT_READ,
229 MAP_PRIVATE,
230 fd,
231 0);
232 if (p == MAP_FAILED) {
233 printf("mmap() failed: %m\n");
234 abort();
235 }
236 munmap(p, MFD_DEF_SIZE);
237
238 /* verify MAP_PRIVATE is *always* allowed (even writable) */
239 p = mmap(NULL,
240 MFD_DEF_SIZE,
241 PROT_READ | PROT_WRITE,
242 MAP_PRIVATE,
243 fd,
244 0);
245 if (p == MAP_FAILED) {
246 printf("mmap() failed: %m\n");
247 abort();
248 }
249 munmap(p, MFD_DEF_SIZE);
250}
251
252static void mfd_assert_write(int fd)
253{
254 ssize_t l;
255 void *p;
256 int r;
257
258 /* verify write() succeeds */
259 l = write(fd, "\0\0\0\0", 4);
260 if (l != 4) {
261 printf("write() failed: %m\n");
262 abort();
263 }
264
265 /* verify PROT_READ | PROT_WRITE is allowed */
266 p = mmap(NULL,
267 MFD_DEF_SIZE,
268 PROT_READ | PROT_WRITE,
269 MAP_SHARED,
270 fd,
271 0);
272 if (p == MAP_FAILED) {
273 printf("mmap() failed: %m\n");
274 abort();
275 }
276 *(char *)p = 0;
277 munmap(p, MFD_DEF_SIZE);
278
279 /* verify PROT_WRITE is allowed */
280 p = mmap(NULL,
281 MFD_DEF_SIZE,
282 PROT_WRITE,
283 MAP_SHARED,
284 fd,
285 0);
286 if (p == MAP_FAILED) {
287 printf("mmap() failed: %m\n");
288 abort();
289 }
290 *(char *)p = 0;
291 munmap(p, MFD_DEF_SIZE);
292
293 /* verify PROT_READ with MAP_SHARED is allowed and a following
294 * mprotect(PROT_WRITE) allows writing */
295 p = mmap(NULL,
296 MFD_DEF_SIZE,
297 PROT_READ,
298 MAP_SHARED,
299 fd,
300 0);
301 if (p == MAP_FAILED) {
302 printf("mmap() failed: %m\n");
303 abort();
304 }
305
306 r = mprotect(p, MFD_DEF_SIZE, PROT_READ | PROT_WRITE);
307 if (r < 0) {
308 printf("mprotect() failed: %m\n");
309 abort();
310 }
311
312 *(char *)p = 0;
313 munmap(p, MFD_DEF_SIZE);
314
315 /* verify PUNCH_HOLE works */
316 r = fallocate(fd,
317 FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE,
318 0,
319 MFD_DEF_SIZE);
320 if (r < 0) {
321 printf("fallocate(PUNCH_HOLE) failed: %m\n");
322 abort();
323 }
324}
325
326static void mfd_fail_write(int fd)
327{
328 ssize_t l;
329 void *p;
330 int r;
331
332 /* verify write() fails */
333 l = write(fd, "data", 4);
334 if (l != -EPERM) {
335 printf("expected EPERM on write(), but got %d: %m\n", (int)l);
336 abort();
337 }
338
339 /* verify PROT_READ | PROT_WRITE is not allowed */
340 p = mmap(NULL,
341 MFD_DEF_SIZE,
342 PROT_READ | PROT_WRITE,
343 MAP_SHARED,
344 fd,
345 0);
346 if (p != MAP_FAILED) {
347 printf("mmap() didn't fail as expected\n");
348 abort();
349 }
350
351 /* verify PROT_WRITE is not allowed */
352 p = mmap(NULL,
353 MFD_DEF_SIZE,
354 PROT_WRITE,
355 MAP_SHARED,
356 fd,
357 0);
358 if (p != MAP_FAILED) {
359 printf("mmap() didn't fail as expected\n");
360 abort();
361 }
362
363 /* Verify PROT_READ with MAP_SHARED with a following mprotect is not
364 * allowed. Note that for r/w the kernel already prevents the mmap. */
365 p = mmap(NULL,
366 MFD_DEF_SIZE,
367 PROT_READ,
368 MAP_SHARED,
369 fd,
370 0);
371 if (p != MAP_FAILED) {
372 r = mprotect(p, MFD_DEF_SIZE, PROT_READ | PROT_WRITE);
373 if (r >= 0) {
374 printf("mmap()+mprotect() didn't fail as expected\n");
375 abort();
376 }
377 }
378
379 /* verify PUNCH_HOLE fails */
380 r = fallocate(fd,
381 FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE,
382 0,
383 MFD_DEF_SIZE);
384 if (r >= 0) {
385 printf("fallocate(PUNCH_HOLE) didn't fail as expected\n");
386 abort();
387 }
388}
389
390static void mfd_assert_shrink(int fd)
391{
392 int r, fd2;
393
394 r = ftruncate(fd, MFD_DEF_SIZE / 2);
395 if (r < 0) {
396 printf("ftruncate(SHRINK) failed: %m\n");
397 abort();
398 }
399
400 mfd_assert_size(fd, MFD_DEF_SIZE / 2);
401
402 fd2 = mfd_assert_open(fd,
403 O_RDWR | O_CREAT | O_TRUNC,
404 S_IRUSR | S_IWUSR);
405 close(fd2);
406
407 mfd_assert_size(fd, 0);
408}
409
410static void mfd_fail_shrink(int fd)
411{
412 int r;
413
414 r = ftruncate(fd, MFD_DEF_SIZE / 2);
415 if (r >= 0) {
416 printf("ftruncate(SHRINK) didn't fail as expected\n");
417 abort();
418 }
419
420 mfd_fail_open(fd,
421 O_RDWR | O_CREAT | O_TRUNC,
422 S_IRUSR | S_IWUSR);
423}
424
425static void mfd_assert_grow(int fd)
426{
427 int r;
428
429 r = ftruncate(fd, MFD_DEF_SIZE * 2);
430 if (r < 0) {
431 printf("ftruncate(GROW) failed: %m\n");
432 abort();
433 }
434
435 mfd_assert_size(fd, MFD_DEF_SIZE * 2);
436
437 r = fallocate(fd,
438 0,
439 0,
440 MFD_DEF_SIZE * 4);
441 if (r < 0) {
442 printf("fallocate(ALLOC) failed: %m\n");
443 abort();
444 }
445
446 mfd_assert_size(fd, MFD_DEF_SIZE * 4);
447}
448
449static void mfd_fail_grow(int fd)
450{
451 int r;
452
453 r = ftruncate(fd, MFD_DEF_SIZE * 2);
454 if (r >= 0) {
455 printf("ftruncate(GROW) didn't fail as expected\n");
456 abort();
457 }
458
459 r = fallocate(fd,
460 0,
461 0,
462 MFD_DEF_SIZE * 4);
463 if (r >= 0) {
464 printf("fallocate(ALLOC) didn't fail as expected\n");
465 abort();
466 }
467}
468
469static void mfd_assert_grow_write(int fd)
470{
471 static char buf[MFD_DEF_SIZE * 8];
472 ssize_t l;
473
474 l = pwrite(fd, buf, sizeof(buf), 0);
475 if (l != sizeof(buf)) {
476 printf("pwrite() failed: %m\n");
477 abort();
478 }
479
480 mfd_assert_size(fd, MFD_DEF_SIZE * 8);
481}
482
483static void mfd_fail_grow_write(int fd)
484{
485 static char buf[MFD_DEF_SIZE * 8];
486 ssize_t l;
487
488 l = pwrite(fd, buf, sizeof(buf), 0);
489 if (l == sizeof(buf)) {
490 printf("pwrite() didn't fail as expected\n");
491 abort();
492 }
493}
494
495static int idle_thread_fn(void *arg)
496{
497 sigset_t set;
498 int sig;
499
500 /* dummy waiter; SIGTERM terminates us anyway */
501 sigemptyset(&set);
502 sigaddset(&set, SIGTERM);
503 sigwait(&set, &sig);
504
505 return 0;
506}
507
508static pid_t spawn_idle_thread(unsigned int flags)
509{
510 uint8_t *stack;
511 pid_t pid;
512
513 stack = malloc(STACK_SIZE);
514 if (!stack) {
515 printf("malloc(STACK_SIZE) failed: %m\n");
516 abort();
517 }
518
519 pid = clone(idle_thread_fn,
520 stack + STACK_SIZE,
521 SIGCHLD | flags,
522 NULL);
523 if (pid < 0) {
524 printf("clone() failed: %m\n");
525 abort();
526 }
527
528 return pid;
529}
530
531static void join_idle_thread(pid_t pid)
532{
533 kill(pid, SIGTERM);
534 waitpid(pid, NULL, 0);
535}
536
537/*
538 * Test memfd_create() syscall
539 * Verify syscall-argument validation, including name checks, flag validation
540 * and more.
541 */
542static void test_create(void)
543{
544 char buf[2048];
545 int fd;
546
547 /* test NULL name */
548 mfd_fail_new(NULL, 0);
549
550 /* test over-long name (not zero-terminated) */
551 memset(buf, 0xff, sizeof(buf));
552 mfd_fail_new(buf, 0);
553
554 /* test over-long zero-terminated name */
555 memset(buf, 0xff, sizeof(buf));
556 buf[sizeof(buf) - 1] = 0;
557 mfd_fail_new(buf, 0);
558
559 /* verify "" is a valid name */
560 fd = mfd_assert_new("", 0, 0);
561 close(fd);
562
563 /* verify invalid O_* open flags */
564 mfd_fail_new("", 0x0100);
565 mfd_fail_new("", ~MFD_CLOEXEC);
566 mfd_fail_new("", ~MFD_ALLOW_SEALING);
567 mfd_fail_new("", ~0);
568 mfd_fail_new("", 0x80000000U);
569
570 /* verify MFD_CLOEXEC is allowed */
571 fd = mfd_assert_new("", 0, MFD_CLOEXEC);
572 close(fd);
573
574 /* verify MFD_ALLOW_SEALING is allowed */
575 fd = mfd_assert_new("", 0, MFD_ALLOW_SEALING);
576 close(fd);
577
578 /* verify MFD_ALLOW_SEALING | MFD_CLOEXEC is allowed */
579 fd = mfd_assert_new("", 0, MFD_ALLOW_SEALING | MFD_CLOEXEC);
580 close(fd);
581}
582
583/*
584 * Test basic sealing
585 * A very basic sealing test to see whether setting/retrieving seals works.
586 */
587static void test_basic(void)
588{
589 int fd;
590
591 fd = mfd_assert_new("kern_memfd_basic",
592 MFD_DEF_SIZE,
593 MFD_CLOEXEC | MFD_ALLOW_SEALING);
594
595 /* add basic seals */
596 mfd_assert_has_seals(fd, 0);
597 mfd_assert_add_seals(fd, F_SEAL_SHRINK |
598 F_SEAL_WRITE);
599 mfd_assert_has_seals(fd, F_SEAL_SHRINK |
600 F_SEAL_WRITE);
601
602 /* add them again */
603 mfd_assert_add_seals(fd, F_SEAL_SHRINK |
604 F_SEAL_WRITE);
605 mfd_assert_has_seals(fd, F_SEAL_SHRINK |
606 F_SEAL_WRITE);
607
608 /* add more seals and seal against sealing */
609 mfd_assert_add_seals(fd, F_SEAL_GROW | F_SEAL_SEAL);
610 mfd_assert_has_seals(fd, F_SEAL_SHRINK |
611 F_SEAL_GROW |
612 F_SEAL_WRITE |
613 F_SEAL_SEAL);
614
615 /* verify that sealing no longer works */
616 mfd_fail_add_seals(fd, F_SEAL_GROW);
617 mfd_fail_add_seals(fd, 0);
618
619 close(fd);
620
621 /* verify sealing does not work without MFD_ALLOW_SEALING */
622 fd = mfd_assert_new("kern_memfd_basic",
623 MFD_DEF_SIZE,
624 MFD_CLOEXEC);
625 mfd_assert_has_seals(fd, F_SEAL_SEAL);
626 mfd_fail_add_seals(fd, F_SEAL_SHRINK |
627 F_SEAL_GROW |
628 F_SEAL_WRITE);
629 mfd_assert_has_seals(fd, F_SEAL_SEAL);
630 close(fd);
631}
632
633/*
634 * Test SEAL_WRITE
635 * Test whether SEAL_WRITE actually prevents modifications.
636 */
637static void test_seal_write(void)
638{
639 int fd;
640
641 fd = mfd_assert_new("kern_memfd_seal_write",
642 MFD_DEF_SIZE,
643 MFD_CLOEXEC | MFD_ALLOW_SEALING);
644 mfd_assert_has_seals(fd, 0);
645 mfd_assert_add_seals(fd, F_SEAL_WRITE);
646 mfd_assert_has_seals(fd, F_SEAL_WRITE);
647
648 mfd_assert_read(fd);
649 mfd_fail_write(fd);
650 mfd_assert_shrink(fd);
651 mfd_assert_grow(fd);
652 mfd_fail_grow_write(fd);
653
654 close(fd);
655}
656
657/*
658 * Test SEAL_SHRINK
659 * Test whether SEAL_SHRINK actually prevents shrinking
660 */
661static void test_seal_shrink(void)
662{
663 int fd;
664
665 fd = mfd_assert_new("kern_memfd_seal_shrink",
666 MFD_DEF_SIZE,
667 MFD_CLOEXEC | MFD_ALLOW_SEALING);
668 mfd_assert_has_seals(fd, 0);
669 mfd_assert_add_seals(fd, F_SEAL_SHRINK);
670 mfd_assert_has_seals(fd, F_SEAL_SHRINK);
671
672 mfd_assert_read(fd);
673 mfd_assert_write(fd);
674 mfd_fail_shrink(fd);
675 mfd_assert_grow(fd);
676 mfd_assert_grow_write(fd);
677
678 close(fd);
679}
680
681/*
682 * Test SEAL_GROW
683 * Test whether SEAL_GROW actually prevents growing
684 */
685static void test_seal_grow(void)
686{
687 int fd;
688
689 fd = mfd_assert_new("kern_memfd_seal_grow",
690 MFD_DEF_SIZE,
691 MFD_CLOEXEC | MFD_ALLOW_SEALING);
692 mfd_assert_has_seals(fd, 0);
693 mfd_assert_add_seals(fd, F_SEAL_GROW);
694 mfd_assert_has_seals(fd, F_SEAL_GROW);
695
696 mfd_assert_read(fd);
697 mfd_assert_write(fd);
698 mfd_assert_shrink(fd);
699 mfd_fail_grow(fd);
700 mfd_fail_grow_write(fd);
701
702 close(fd);
703}
704
705/*
706 * Test SEAL_SHRINK | SEAL_GROW
707 * Test whether SEAL_SHRINK | SEAL_GROW actually prevents resizing
708 */
709static void test_seal_resize(void)
710{
711 int fd;
712
713 fd = mfd_assert_new("kern_memfd_seal_resize",
714 MFD_DEF_SIZE,
715 MFD_CLOEXEC | MFD_ALLOW_SEALING);
716 mfd_assert_has_seals(fd, 0);
717 mfd_assert_add_seals(fd, F_SEAL_SHRINK | F_SEAL_GROW);
718 mfd_assert_has_seals(fd, F_SEAL_SHRINK | F_SEAL_GROW);
719
720 mfd_assert_read(fd);
721 mfd_assert_write(fd);
722 mfd_fail_shrink(fd);
723 mfd_fail_grow(fd);
724 mfd_fail_grow_write(fd);
725
726 close(fd);
727}
728
729/*
730 * Test sharing via dup()
731 * Test that seals are shared between dupped FDs and they're all equal.
732 */
733static void test_share_dup(void)
734{
735 int fd, fd2;
736
737 fd = mfd_assert_new("kern_memfd_share_dup",
738 MFD_DEF_SIZE,
739 MFD_CLOEXEC | MFD_ALLOW_SEALING);
740 mfd_assert_has_seals(fd, 0);
741
742 fd2 = mfd_assert_dup(fd);
743 mfd_assert_has_seals(fd2, 0);
744
745 mfd_assert_add_seals(fd, F_SEAL_WRITE);
746 mfd_assert_has_seals(fd, F_SEAL_WRITE);
747 mfd_assert_has_seals(fd2, F_SEAL_WRITE);
748
749 mfd_assert_add_seals(fd2, F_SEAL_SHRINK);
750 mfd_assert_has_seals(fd, F_SEAL_WRITE | F_SEAL_SHRINK);
751 mfd_assert_has_seals(fd2, F_SEAL_WRITE | F_SEAL_SHRINK);
752
753 mfd_assert_add_seals(fd, F_SEAL_SEAL);
754 mfd_assert_has_seals(fd, F_SEAL_WRITE | F_SEAL_SHRINK | F_SEAL_SEAL);
755 mfd_assert_has_seals(fd2, F_SEAL_WRITE | F_SEAL_SHRINK | F_SEAL_SEAL);
756
757 mfd_fail_add_seals(fd, F_SEAL_GROW);
758 mfd_fail_add_seals(fd2, F_SEAL_GROW);
759 mfd_fail_add_seals(fd, F_SEAL_SEAL);
760 mfd_fail_add_seals(fd2, F_SEAL_SEAL);
761
762 close(fd2);
763
764 mfd_fail_add_seals(fd, F_SEAL_GROW);
765 close(fd);
766}
767
768/*
769 * Test sealing with active mmap()s
770 * Modifying seals is only allowed if no other mmap() refs exist.
771 */
772static void test_share_mmap(void)
773{
774 int fd;
775 void *p;
776
777 fd = mfd_assert_new("kern_memfd_share_mmap",
778 MFD_DEF_SIZE,
779 MFD_CLOEXEC | MFD_ALLOW_SEALING);
780 mfd_assert_has_seals(fd, 0);
781
782 /* shared/writable ref prevents sealing WRITE, but allows others */
783 p = mfd_assert_mmap_shared(fd);
784 mfd_fail_add_seals(fd, F_SEAL_WRITE);
785 mfd_assert_has_seals(fd, 0);
786 mfd_assert_add_seals(fd, F_SEAL_SHRINK);
787 mfd_assert_has_seals(fd, F_SEAL_SHRINK);
788 munmap(p, MFD_DEF_SIZE);
789
790 /* readable ref allows sealing */
791 p = mfd_assert_mmap_private(fd);
792 mfd_assert_add_seals(fd, F_SEAL_WRITE);
793 mfd_assert_has_seals(fd, F_SEAL_WRITE | F_SEAL_SHRINK);
794 munmap(p, MFD_DEF_SIZE);
795
796 close(fd);
797}
798
799/*
800 * Test sealing with open(/proc/self/fd/%d)
801 * Via /proc we can get access to a separate file-context for the same memfd.
802 * This is *not* like dup(), but like a real separate open(). Make sure the
803 * semantics are as expected and we correctly check for RDONLY / WRONLY / RDWR.
804 */
805static void test_share_open(void)
806{
807 int fd, fd2;
808
809 fd = mfd_assert_new("kern_memfd_share_open",
810 MFD_DEF_SIZE,
811 MFD_CLOEXEC | MFD_ALLOW_SEALING);
812 mfd_assert_has_seals(fd, 0);
813
814 fd2 = mfd_assert_open(fd, O_RDWR, 0);
815 mfd_assert_add_seals(fd, F_SEAL_WRITE);
816 mfd_assert_has_seals(fd, F_SEAL_WRITE);
817 mfd_assert_has_seals(fd2, F_SEAL_WRITE);
818
819 mfd_assert_add_seals(fd2, F_SEAL_SHRINK);
820 mfd_assert_has_seals(fd, F_SEAL_WRITE | F_SEAL_SHRINK);
821 mfd_assert_has_seals(fd2, F_SEAL_WRITE | F_SEAL_SHRINK);
822
823 close(fd);
824 fd = mfd_assert_open(fd2, O_RDONLY, 0);
825
826 mfd_fail_add_seals(fd, F_SEAL_SEAL);
827 mfd_assert_has_seals(fd, F_SEAL_WRITE | F_SEAL_SHRINK);
828 mfd_assert_has_seals(fd2, F_SEAL_WRITE | F_SEAL_SHRINK);
829
830 close(fd2);
831 fd2 = mfd_assert_open(fd, O_RDWR, 0);
832
833 mfd_assert_add_seals(fd2, F_SEAL_SEAL);
834 mfd_assert_has_seals(fd, F_SEAL_WRITE | F_SEAL_SHRINK | F_SEAL_SEAL);
835 mfd_assert_has_seals(fd2, F_SEAL_WRITE | F_SEAL_SHRINK | F_SEAL_SEAL);
836
837 close(fd2);
838 close(fd);
839}
840
841/*
842 * Test sharing via fork()
843 * Test whether seal-modifications work as expected with forked childs.
844 */
845static void test_share_fork(void)
846{
847 int fd;
848 pid_t pid;
849
850 fd = mfd_assert_new("kern_memfd_share_fork",
851 MFD_DEF_SIZE,
852 MFD_CLOEXEC | MFD_ALLOW_SEALING);
853 mfd_assert_has_seals(fd, 0);
854
855 pid = spawn_idle_thread(0);
856 mfd_assert_add_seals(fd, F_SEAL_SEAL);
857 mfd_assert_has_seals(fd, F_SEAL_SEAL);
858
859 mfd_fail_add_seals(fd, F_SEAL_WRITE);
860 mfd_assert_has_seals(fd, F_SEAL_SEAL);
861
862 join_idle_thread(pid);
863
864 mfd_fail_add_seals(fd, F_SEAL_WRITE);
865 mfd_assert_has_seals(fd, F_SEAL_SEAL);
866
867 close(fd);
868}
869
870int main(int argc, char **argv)
871{
872 pid_t pid;
873
874 printf("memfd: CREATE\n");
875 test_create();
876 printf("memfd: BASIC\n");
877 test_basic();
878
879 printf("memfd: SEAL-WRITE\n");
880 test_seal_write();
881 printf("memfd: SEAL-SHRINK\n");
882 test_seal_shrink();
883 printf("memfd: SEAL-GROW\n");
884 test_seal_grow();
885 printf("memfd: SEAL-RESIZE\n");
886 test_seal_resize();
887
888 printf("memfd: SHARE-DUP\n");
889 test_share_dup();
890 printf("memfd: SHARE-MMAP\n");
891 test_share_mmap();
892 printf("memfd: SHARE-OPEN\n");
893 test_share_open();
894 printf("memfd: SHARE-FORK\n");
895 test_share_fork();
896
897 /* Run test-suite in a multi-threaded environment with a shared
898 * file-table. */
899 pid = spawn_idle_thread(CLONE_FILES | CLONE_FS | CLONE_VM);
900 printf("memfd: SHARE-DUP (shared file-table)\n");
901 test_share_dup();
902 printf("memfd: SHARE-MMAP (shared file-table)\n");
903 test_share_mmap();
904 printf("memfd: SHARE-OPEN (shared file-table)\n");
905 test_share_open();
906 printf("memfd: SHARE-FORK (shared file-table)\n");
907 test_share_fork();
908 join_idle_thread(pid);
909
910 printf("memfd: DONE\n");
911
912 return 0;
913}
diff --git a/tools/testing/selftests/memfd/run_fuse_test.sh b/tools/testing/selftests/memfd/run_fuse_test.sh
new file mode 100644
index 000000000000..69b930e1e041
--- /dev/null
+++ b/tools/testing/selftests/memfd/run_fuse_test.sh
@@ -0,0 +1,14 @@
1#!/bin/sh
2
3if test -d "./mnt" ; then
4 fusermount -u ./mnt
5 rmdir ./mnt
6fi
7
8set -e
9
10mkdir mnt
11./fuse_mnt ./mnt
12./fuse_test ./mnt/memfd
13fusermount -u ./mnt
14rmdir ./mnt
diff --git a/tools/testing/selftests/ptrace/peeksiginfo.c b/tools/testing/selftests/ptrace/peeksiginfo.c
index d46558b1f58d..c34cd8ac8aaa 100644
--- a/tools/testing/selftests/ptrace/peeksiginfo.c
+++ b/tools/testing/selftests/ptrace/peeksiginfo.c
@@ -31,6 +31,10 @@ static int sys_ptrace(int request, pid_t pid, void *addr, void *data)
31#define TEST_SICODE_PRIV -1 31#define TEST_SICODE_PRIV -1
32#define TEST_SICODE_SHARE -2 32#define TEST_SICODE_SHARE -2
33 33
34#ifndef PAGE_SIZE
35#define PAGE_SIZE sysconf(_SC_PAGESIZE)
36#endif
37
34#define err(fmt, ...) \ 38#define err(fmt, ...) \
35 fprintf(stderr, \ 39 fprintf(stderr, \
36 "Error (%s:%d): " fmt, \ 40 "Error (%s:%d): " fmt, \